mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
WIP refactor logging
This commit is contained in:
@@ -7,7 +7,7 @@ from tempfile import TemporaryDirectory
|
||||
from auto_archiver.utils import url as UrlUtil
|
||||
from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES
|
||||
|
||||
from loguru import logger
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .module import ModuleFactory
|
||||
|
||||
@@ -10,7 +10,7 @@ from ruamel.yaml import YAML, CommentedMap
|
||||
import json
|
||||
import os
|
||||
|
||||
from loguru import logger
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
|
||||
from copy import deepcopy
|
||||
from auto_archiver.core.consts import MODULE_TYPES
|
||||
@@ -118,8 +118,7 @@ class DefaultValidatingParser(argparse.ArgumentParser):
|
||||
"""
|
||||
Override of error to format a nicer looking error message using logger
|
||||
"""
|
||||
logger.error("Problem with configuration file (tip: use --help to see the available options):")
|
||||
logger.error(message)
|
||||
logger.error(f"Problem with configuration file (tip: use --help to see the available options): \n{message}")
|
||||
self.exit(2)
|
||||
|
||||
def parse_known_args(self, args=None, namespace=None):
|
||||
@@ -136,8 +135,7 @@ class DefaultValidatingParser(argparse.ArgumentParser):
|
||||
try:
|
||||
self._check_value(action, action.default)
|
||||
except argparse.ArgumentError as e:
|
||||
logger.error(f"You have an invalid setting in your configuration file ({action.dest}):")
|
||||
logger.error(e)
|
||||
logger.error(f"You have an invalid setting in your configuration file ({action.dest}):\n {e}")
|
||||
exit()
|
||||
|
||||
return super().parse_known_args(args, namespace)
|
||||
|
||||
@@ -12,7 +12,7 @@ from contextlib import suppress
|
||||
import mimetypes
|
||||
import os
|
||||
import requests
|
||||
from loguru import logger
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
from retrying import retry
|
||||
import re
|
||||
|
||||
@@ -94,7 +94,7 @@ class Extractor(BaseModule):
|
||||
to_filename = to_filename[-64:]
|
||||
to_filename = os.path.join(self.tmp_dir, to_filename)
|
||||
if verbose:
|
||||
logger.debug(f"downloading {url[0:50]=} {to_filename=}")
|
||||
logger.debug(f"downloading {to_filename=}")
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
||||
}
|
||||
@@ -117,7 +117,7 @@ class Extractor(BaseModule):
|
||||
return to_filename
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.warning(f"Failed to fetch the Media URL: {str(e)[:250]}")
|
||||
logger.warning(f"Failed to fetch the Media URL: {e}")
|
||||
if try_best_quality:
|
||||
return None, url
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from dataclasses import dataclass, field
|
||||
from dataclasses_json import dataclass_json, config
|
||||
import mimetypes
|
||||
|
||||
from loguru import logger
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
|
||||
|
||||
@dataclass_json # annotation order matters
|
||||
@@ -121,8 +121,7 @@ class Media:
|
||||
except Error:
|
||||
return False # ffmpeg errors when reading bad files
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
logger.error(traceback.format_exc())
|
||||
logger.error(f"{e}: {traceback.format_exc()}")
|
||||
try:
|
||||
fsize = os.path.getsize(self.filename)
|
||||
return fsize > 20_000
|
||||
|
||||
@@ -17,7 +17,7 @@ from dataclasses_json import dataclass_json
|
||||
import datetime
|
||||
from urllib.parse import urlparse
|
||||
from dateutil.parser import parse as parse_dt
|
||||
from loguru import logger
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
|
||||
from .media import Media
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ import sys
|
||||
from importlib.util import find_spec
|
||||
import os
|
||||
from os.path import join
|
||||
from loguru import logger
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
import auto_archiver
|
||||
from auto_archiver.core.consts import DEFAULT_MANIFEST, MANIFEST_FILE, SetupError
|
||||
|
||||
|
||||
@@ -15,9 +15,11 @@ import traceback
|
||||
from copy import copy
|
||||
|
||||
from rich_argparse import RichHelpFormatter
|
||||
from loguru import logger
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
import requests
|
||||
|
||||
from auto_archiver.utils.misc import random_str
|
||||
|
||||
from .metadata import Metadata, Media
|
||||
from auto_archiver.version import __version__
|
||||
from .config import (
|
||||
@@ -342,7 +344,12 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
# add other logging info
|
||||
if self.logger_id is None: # note - need direct comparison to None since need to consider falsy value 0
|
||||
use_level = logging_config["level"]
|
||||
self.logger_id = logger.add(sys.stderr, level=use_level)
|
||||
self.logger_id = logger.add(
|
||||
sys.stderr,
|
||||
level=use_level,
|
||||
catch=True,
|
||||
format="<level>{level}</level>: <fg #64FFDA>{message}</fg #64FFDA> {extra[serialize_no_message]}",
|
||||
)
|
||||
|
||||
rotation = logging_config["rotation"]
|
||||
log_file = logging_config["file"]
|
||||
@@ -356,9 +363,10 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
f"{log_file}.{i}_{level.lower()}",
|
||||
filter=lambda rec, lvl=level: rec["level"].name == lvl,
|
||||
rotation=rotation,
|
||||
format="{extra[serialized]}",
|
||||
)
|
||||
elif log_file:
|
||||
logger.add(log_file, rotation=rotation, level=use_level)
|
||||
logger.add(log_file, rotation=rotation, level=use_level, format="{extra[serialized]}")
|
||||
|
||||
def install_modules(self, modules_by_type):
|
||||
"""
|
||||
@@ -466,13 +474,9 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
update_cmd = "`docker pull bellingcat/auto-archiver:latest`"
|
||||
else:
|
||||
update_cmd = "`pip install --upgrade auto-archiver`"
|
||||
logger.warning("")
|
||||
logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********")
|
||||
logger.warning(
|
||||
f"A new version of auto-archiver is available (v{latest_version}, you have v{current_version})"
|
||||
f"\n********* IMPORTANT: UPDATE AVAILABLE ********\nA new version of auto-archiver is available (v{latest_version}, you have v{current_version})\nMake sure to update to the latest version using: {update_cmd}\n"
|
||||
)
|
||||
logger.warning(f"Make sure to update to the latest version using: {update_cmd}")
|
||||
logger.warning("")
|
||||
|
||||
def setup(self, args: list):
|
||||
"""
|
||||
@@ -522,7 +526,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
self.setup(args)
|
||||
return self.feed()
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
logger.error(f"{e}: {traceback.format_exc()}")
|
||||
exit(1)
|
||||
|
||||
def cleanup(self) -> None:
|
||||
@@ -534,10 +538,12 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
url_count = 0
|
||||
for feeder in self.feeders:
|
||||
for item in feeder:
|
||||
yield self.feed_item(item)
|
||||
url_count += 1
|
||||
with logger.contextualize(url=item.get_url(), trace=random_str(12)):
|
||||
logger.info("started processing")
|
||||
yield self.feed_item(item)
|
||||
url_count += 1
|
||||
|
||||
logger.info(f"Processed {url_count} URL(s)")
|
||||
logger.info(f"processed {url_count} URL(s)")
|
||||
self.cleanup()
|
||||
|
||||
def feed_item(self, item: Metadata) -> Metadata:
|
||||
@@ -555,13 +561,13 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
return self.archive(item)
|
||||
except KeyboardInterrupt:
|
||||
# catches keyboard interruptions to do a clean exit
|
||||
logger.warning(f"caught interrupt on {item=}")
|
||||
logger.warning("caught interrupt")
|
||||
for d in self.databases:
|
||||
d.aborted(item)
|
||||
self.cleanup()
|
||||
exit()
|
||||
except Exception as e:
|
||||
logger.error(f"Got unexpected error on item {item}: {e}\n{traceback.format_exc()}")
|
||||
logger.error(f"Got unexpected error: {e}\n{traceback.format_exc()}")
|
||||
for d in self.databases:
|
||||
if isinstance(e, AssertionError):
|
||||
d.failed(item, str(e))
|
||||
@@ -589,7 +595,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
try:
|
||||
check_url_or_raise(original_url)
|
||||
except ValueError as e:
|
||||
logger.error(f"Error archiving URL {original_url}: {e}")
|
||||
logger.error(f"Error archiving: {e}")
|
||||
raise e
|
||||
|
||||
# 1 - sanitize - each archiver is responsible for cleaning/expanding its own URLs
|
||||
@@ -599,7 +605,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
|
||||
result.set_url(url)
|
||||
if original_url != url:
|
||||
logger.debug(f"Sanitized URL from {original_url} to {url}")
|
||||
logger.debug(f"Sanitized URL to {url}")
|
||||
result.set("original_url", original_url)
|
||||
|
||||
# 2 - notify start to DBs, propagate already archived if feature enabled in DBs
|
||||
@@ -614,25 +620,25 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
try:
|
||||
d.done(cached_result, cached=True)
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}")
|
||||
logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
|
||||
return cached_result
|
||||
|
||||
# 3 - call extractors until one succeeds
|
||||
for a in self.extractors:
|
||||
logger.info(f"Trying extractor {a.name} for {url}")
|
||||
logger.info(f"trying extractor {a.name}")
|
||||
try:
|
||||
result.merge(a.download(result))
|
||||
if result.is_success():
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR archiver {a.name}: {e}: {traceback.format_exc()}")
|
||||
logger.error(f"archiver {a.name}: {e}: {traceback.format_exc()}")
|
||||
|
||||
# 4 - call enrichers to work with archived content
|
||||
for e in self.enrichers:
|
||||
try:
|
||||
e.enrich(result)
|
||||
except Exception as exc:
|
||||
logger.error(f"ERROR enricher {e.name}: {exc}: {traceback.format_exc()}")
|
||||
logger.error(f"enricher {e.name}: {exc}: {traceback.format_exc()}")
|
||||
|
||||
# 5 - store all downloaded/generated media
|
||||
result.store(storages=self.storages)
|
||||
@@ -651,7 +657,7 @@ Here's how that would look: \n\nsteps:\n extractors:\n - [your_extractor_name_
|
||||
try:
|
||||
d.done(result)
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}")
|
||||
logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ from abc import abstractmethod
|
||||
from typing import IO
|
||||
import os
|
||||
|
||||
from loguru import logger
|
||||
from auto_archiver.utils.custom_logger import logger
|
||||
from slugify import slugify
|
||||
|
||||
from auto_archiver.utils.misc import random_str
|
||||
|
||||
Reference in New Issue
Block a user