WIP refactor logging

This commit is contained in:
msramalho
2025-06-21 15:54:51 +01:00
parent ade7feb5a0
commit ce4d7ac649
54 changed files with 298 additions and 207 deletions

View File

@@ -7,7 +7,7 @@ from tempfile import TemporaryDirectory
from auto_archiver.utils import url as UrlUtil
from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES
from loguru import logger
from auto_archiver.utils.custom_logger import logger
if TYPE_CHECKING:
from .module import ModuleFactory

View File

@@ -10,7 +10,7 @@ from ruamel.yaml import YAML, CommentedMap
import json
import os
from loguru import logger
from auto_archiver.utils.custom_logger import logger
from copy import deepcopy
from auto_archiver.core.consts import MODULE_TYPES
@@ -118,8 +118,7 @@ class DefaultValidatingParser(argparse.ArgumentParser):
"""
Override of error to format a nicer looking error message using logger
"""
logger.error("Problem with configuration file (tip: use --help to see the available options):")
logger.error(message)
logger.error(f"Problem with configuration file (tip: use --help to see the available options): \n{message}")
self.exit(2)
def parse_known_args(self, args=None, namespace=None):
@@ -136,8 +135,7 @@ class DefaultValidatingParser(argparse.ArgumentParser):
try:
self._check_value(action, action.default)
except argparse.ArgumentError as e:
logger.error(f"You have an invalid setting in your configuration file ({action.dest}):")
logger.error(e)
logger.error(f"You have an invalid setting in your configuration file ({action.dest}):\n {e}")
exit()
return super().parse_known_args(args, namespace)

View File

@@ -12,7 +12,7 @@ from contextlib import suppress
import mimetypes
import os
import requests
from loguru import logger
from auto_archiver.utils.custom_logger import logger
from retrying import retry
import re
@@ -94,7 +94,7 @@ class Extractor(BaseModule):
to_filename = to_filename[-64:]
to_filename = os.path.join(self.tmp_dir, to_filename)
if verbose:
logger.debug(f"downloading {url[0:50]=} {to_filename=}")
logger.debug(f"downloading {to_filename=}")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
}
@@ -117,7 +117,7 @@ class Extractor(BaseModule):
return to_filename
except requests.RequestException as e:
logger.warning(f"Failed to fetch the Media URL: {str(e)[:250]}")
logger.warning(f"Failed to fetch the Media URL: {e}")
if try_best_quality:
return None, url

View File

@@ -11,7 +11,7 @@ from dataclasses import dataclass, field
from dataclasses_json import dataclass_json, config
import mimetypes
from loguru import logger
from auto_archiver.utils.custom_logger import logger
@dataclass_json # annotation order matters
@@ -121,8 +121,7 @@ class Media:
except Error:
return False # ffmpeg errors when reading bad files
except Exception as e:
logger.error(e)
logger.error(traceback.format_exc())
logger.error(f"{e}: {traceback.format_exc()}")
try:
fsize = os.path.getsize(self.filename)
return fsize > 20_000

View File

@@ -17,7 +17,7 @@ from dataclasses_json import dataclass_json
import datetime
from urllib.parse import urlparse
from dateutil.parser import parse as parse_dt
from loguru import logger
from auto_archiver.utils.custom_logger import logger
from .media import Media

View File

@@ -16,7 +16,7 @@ import sys
from importlib.util import find_spec
import os
from os.path import join
from loguru import logger
from auto_archiver.utils.custom_logger import logger
import auto_archiver
from auto_archiver.core.consts import DEFAULT_MANIFEST, MANIFEST_FILE, SetupError

View File

@@ -15,9 +15,11 @@ import traceback
from copy import copy
from rich_argparse import RichHelpFormatter
from loguru import logger
from auto_archiver.utils.custom_logger import logger
import requests
from auto_archiver.utils.misc import random_str
from .metadata import Metadata, Media
from auto_archiver.version import __version__
from .config import (
@@ -342,7 +344,12 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
# add other logging info
if self.logger_id is None: # note - need direct comparison to None since need to consider falsy value 0
use_level = logging_config["level"]
self.logger_id = logger.add(sys.stderr, level=use_level)
self.logger_id = logger.add(
sys.stderr,
level=use_level,
catch=True,
format="<level>{level}</level>: <fg #64FFDA>{message}</fg #64FFDA> {extra[serialize_no_message]}",
)
rotation = logging_config["rotation"]
log_file = logging_config["file"]
@@ -356,9 +363,10 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
f"{log_file}.{i}_{level.lower()}",
filter=lambda rec, lvl=level: rec["level"].name == lvl,
rotation=rotation,
format="{extra[serialized]}",
)
elif log_file:
logger.add(log_file, rotation=rotation, level=use_level)
logger.add(log_file, rotation=rotation, level=use_level, format="{extra[serialized]}")
def install_modules(self, modules_by_type):
"""
@@ -466,13 +474,9 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
update_cmd = "`docker pull bellingcat/auto-archiver:latest`"
else:
update_cmd = "`pip install --upgrade auto-archiver`"
logger.warning("")
logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********")
logger.warning(
f"A new version of auto-archiver is available (v{latest_version}, you have v{current_version})"
f"\n********* IMPORTANT: UPDATE AVAILABLE ********\nA new version of auto-archiver is available (v{latest_version}, you have v{current_version})\nMake sure to update to the latest version using: {update_cmd}\n"
)
logger.warning(f"Make sure to update to the latest version using: {update_cmd}")
logger.warning("")
def setup(self, args: list):
"""
@@ -522,7 +526,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
self.setup(args)
return self.feed()
except Exception as e:
logger.error(e)
logger.error(f"{e}: {traceback.format_exc()}")
exit(1)
def cleanup(self) -> None:
@@ -534,10 +538,12 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
url_count = 0
for feeder in self.feeders:
for item in feeder:
yield self.feed_item(item)
url_count += 1
with logger.contextualize(url=item.get_url(), trace=random_str(12)):
logger.info("started processing")
yield self.feed_item(item)
url_count += 1
logger.info(f"Processed {url_count} URL(s)")
logger.info(f"processed {url_count} URL(s)")
self.cleanup()
def feed_item(self, item: Metadata) -> Metadata:
@@ -555,13 +561,13 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
return self.archive(item)
except KeyboardInterrupt:
# catches keyboard interruptions to do a clean exit
logger.warning(f"caught interrupt on {item=}")
logger.warning("caught interrupt")
for d in self.databases:
d.aborted(item)
self.cleanup()
exit()
except Exception as e:
logger.error(f"Got unexpected error on item {item}: {e}\n{traceback.format_exc()}")
logger.error(f"Got unexpected error: {e}\n{traceback.format_exc()}")
for d in self.databases:
if isinstance(e, AssertionError):
d.failed(item, str(e))
@@ -589,7 +595,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
try:
check_url_or_raise(original_url)
except ValueError as e:
logger.error(f"Error archiving URL {original_url}: {e}")
logger.error(f"Error archiving: {e}")
raise e
# 1 - sanitize - each archiver is responsible for cleaning/expanding its own URLs
@@ -599,7 +605,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
result.set_url(url)
if original_url != url:
logger.debug(f"Sanitized URL from {original_url} to {url}")
logger.debug(f"Sanitized URL to {url}")
result.set("original_url", original_url)
# 2 - notify start to DBs, propagate already archived if feature enabled in DBs
@@ -614,25 +620,25 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
try:
d.done(cached_result, cached=True)
except Exception as e:
logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}")
logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
return cached_result
# 3 - call extractors until one succeeds
for a in self.extractors:
logger.info(f"Trying extractor {a.name} for {url}")
logger.info(f"trying extractor {a.name}")
try:
result.merge(a.download(result))
if result.is_success():
break
except Exception as e:
logger.error(f"ERROR archiver {a.name}: {e}: {traceback.format_exc()}")
logger.error(f"archiver {a.name}: {e}: {traceback.format_exc()}")
# 4 - call enrichers to work with archived content
for e in self.enrichers:
try:
e.enrich(result)
except Exception as exc:
logger.error(f"ERROR enricher {e.name}: {exc}: {traceback.format_exc()}")
logger.error(f"enricher {e.name}: {exc}: {traceback.format_exc()}")
# 5 - store all downloaded/generated media
result.store(storages=self.storages)
@@ -651,7 +657,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
try:
d.done(result)
except Exception as e:
logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}")
logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
return result

View File

@@ -24,7 +24,7 @@ from abc import abstractmethod
from typing import IO
import os
from loguru import logger
from auto_archiver.utils.custom_logger import logger
from slugify import slugify
from auto_archiver.utils.misc import random_str