WIP refactor logging

2026-06-07 19:08:30 +03:00 · 2025-06-21 15:54:51 +01:00
parent ade7feb5a0
commit ce4d7ac649
54 changed files with 298 additions and 207 deletions
--- a/scripts/telegram_setup.py
+++ b/scripts/telegram_setup.py
@@ -14,7 +14,7 @@ You will need to provide your phone number and a 2FA code the first time you run

 import os
 from telethon.sync import TelegramClient
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger


 # Create a
--- a/src/auto_archiver/core/base_module.py
+++ b/src/auto_archiver/core/base_module.py
@@ -7,7 +7,7 @@ from tempfile import TemporaryDirectory
 from auto_archiver.utils import url as UrlUtil
 from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 if TYPE_CHECKING:
    from .module import ModuleFactory
--- a/src/auto_archiver/core/config.py
+++ b/src/auto_archiver/core/config.py
@@ -10,7 +10,7 @@ from ruamel.yaml import YAML, CommentedMap
 import json
 import os

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from copy import deepcopy
 from auto_archiver.core.consts import MODULE_TYPES
@@ -118,8 +118,7 @@ class DefaultValidatingParser(argparse.ArgumentParser):
        """
        Override of error to format a nicer looking error message using logger
        """
-        logger.error("Problem with configuration file (tip: use --help to see the available options):")
-        logger.error(message)
+        logger.error(f"Problem with configuration file (tip: use --help to see the available options): \n{message}")
        self.exit(2)

    def parse_known_args(self, args=None, namespace=None):
@@ -136,8 +135,7 @@ class DefaultValidatingParser(argparse.ArgumentParser):
                    try:
                        self._check_value(action, action.default)
                    except argparse.ArgumentError as e:
-                        logger.error(f"You have an invalid setting in your configuration file ({action.dest}):")
-                        logger.error(e)
+                        logger.error(f"You have an invalid setting in your configuration file ({action.dest}):\n {e}")
                        exit()

        return super().parse_known_args(args, namespace)
--- a/src/auto_archiver/core/extractor.py
+++ b/src/auto_archiver/core/extractor.py
@@ -12,7 +12,7 @@ from contextlib import suppress
 import mimetypes
 import os
 import requests
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from retrying import retry
 import re

@@ -94,7 +94,7 @@ class Extractor(BaseModule):
                to_filename = to_filename[-64:]
        to_filename = os.path.join(self.tmp_dir, to_filename)
        if verbose:
-            logger.debug(f"downloading {url[0:50]=} {to_filename=}")
+            logger.debug(f"downloading {to_filename=}")
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
        }
@@ -117,7 +117,7 @@ class Extractor(BaseModule):
            return to_filename

        except requests.RequestException as e:
-            logger.warning(f"Failed to fetch the Media URL: {str(e)[:250]}")
+            logger.warning(f"Failed to fetch the Media URL: {e}")
        if try_best_quality:
            return None, url

--- a/src/auto_archiver/core/media.py
+++ b/src/auto_archiver/core/media.py
@@ -11,7 +11,7 @@ from dataclasses import dataclass, field
 from dataclasses_json import dataclass_json, config
 import mimetypes

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger


@dataclass_json  # annotation order matters
@@ -121,8 +121,7 @@ class Media:
        except Error:
            return False  # ffmpeg errors when reading bad files
        except Exception as e:
-            logger.error(e)
-            logger.error(traceback.format_exc())
+            logger.error(f"{e}: {traceback.format_exc()}")
            try:
                fsize = os.path.getsize(self.filename)
                return fsize > 20_000
--- a/src/auto_archiver/core/metadata.py
+++ b/src/auto_archiver/core/metadata.py
@@ -17,7 +17,7 @@ from dataclasses_json import dataclass_json
 import datetime
 from urllib.parse import urlparse
 from dateutil.parser import parse as parse_dt
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from .media import Media

--- a/src/auto_archiver/core/module.py
+++ b/src/auto_archiver/core/module.py
@@ -16,7 +16,7 @@ import sys
 from importlib.util import find_spec
 import os
 from os.path import join
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import auto_archiver
 from auto_archiver.core.consts import DEFAULT_MANIFEST, MANIFEST_FILE, SetupError

--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -15,9 +15,11 @@ import traceback
 from copy import copy

 from rich_argparse import RichHelpFormatter
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import requests

+from auto_archiver.utils.misc import random_str
+
 from .metadata import Metadata, Media
 from auto_archiver.version import __version__
 from .config import (
@@ -342,7 +344,12 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
        # add other logging info
        if self.logger_id is None:  # note - need direct comparison to None since need to consider falsy value 0
            use_level = logging_config["level"]
-            self.logger_id = logger.add(sys.stderr, level=use_level)
+            self.logger_id = logger.add(
+                sys.stderr,
+                level=use_level,
+                catch=True,
+                format="<level>{level}</level>: <fg #64FFDA>{message}</fg #64FFDA> {extra[serialize_no_message]}",
+            )

            rotation = logging_config["rotation"]
            log_file = logging_config["file"]
@@ -356,9 +363,10 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
                        f"{log_file}.{i}_{level.lower()}",
                        filter=lambda rec, lvl=level: rec["level"].name == lvl,
                        rotation=rotation,
+                        format="{extra[serialized]}",
                    )
            elif log_file:
-                logger.add(log_file, rotation=rotation, level=use_level)
+                logger.add(log_file, rotation=rotation, level=use_level, format="{extra[serialized]}")

    def install_modules(self, modules_by_type):
        """
@@ -466,13 +474,9 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
                update_cmd = "`docker pull bellingcat/auto-archiver:latest`"
            else:
                update_cmd = "`pip install --upgrade auto-archiver`"
-            logger.warning("")
-            logger.warning("********* IMPORTANT: UPDATE AVAILABLE ********")
            logger.warning(
-                f"A new version of auto-archiver is available (v{latest_version}, you have v{current_version})"
+                f"\n********* IMPORTANT: UPDATE AVAILABLE ********\nA new version of auto-archiver is available (v{latest_version}, you have v{current_version})\nMake sure to update to the latest version using: {update_cmd}\n"
            )
-            logger.warning(f"Make sure to update to the latest version using: {update_cmd}")
-            logger.warning("")

    def setup(self, args: list):
        """
@@ -522,7 +526,7 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
            self.setup(args)
            return self.feed()
        except Exception as e:
-            logger.error(e)
+            logger.error(f"{e}: {traceback.format_exc()}")
            exit(1)

    def cleanup(self) -> None:
@@ -534,10 +538,12 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
        url_count = 0
        for feeder in self.feeders:
            for item in feeder:
-                yield self.feed_item(item)
-                url_count += 1
+                with logger.contextualize(url=item.get_url(), trace=random_str(12)):
+                    logger.info("started processing")
+                    yield self.feed_item(item)
+                    url_count += 1

-        logger.info(f"Processed {url_count} URL(s)")
+        logger.info(f"processed {url_count} URL(s)")
        self.cleanup()

    def feed_item(self, item: Metadata) -> Metadata:
@@ -555,13 +561,13 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
            return self.archive(item)
        except KeyboardInterrupt:
            # catches keyboard interruptions to do a clean exit
-            logger.warning(f"caught interrupt on {item=}")
+            logger.warning("caught interrupt")
            for d in self.databases:
                d.aborted(item)
            self.cleanup()
            exit()
        except Exception as e:
-            logger.error(f"Got unexpected error on item {item}: {e}\n{traceback.format_exc()}")
+            logger.error(f"Got unexpected error: {e}\n{traceback.format_exc()}")
            for d in self.databases:
                if isinstance(e, AssertionError):
                    d.failed(item, str(e))
@@ -589,7 +595,7 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
        try:
            check_url_or_raise(original_url)
        except ValueError as e:
-            logger.error(f"Error archiving URL {original_url}: {e}")
+            logger.error(f"Error archiving: {e}")
            raise e

        # 1 - sanitize - each archiver is responsible for cleaning/expanding its own URLs
@@ -599,7 +605,7 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_

        result.set_url(url)
        if original_url != url:
-            logger.debug(f"Sanitized URL from {original_url} to {url}")
+            logger.debug(f"Sanitized URL to {url}")
            result.set("original_url", original_url)

        # 2 - notify start to DBs, propagate already archived if feature enabled in DBs
@@ -614,25 +620,25 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
                try:
                    d.done(cached_result, cached=True)
                except Exception as e:
-                    logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}")
+                    logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
            return cached_result

        # 3 - call extractors until one succeeds
        for a in self.extractors:
-            logger.info(f"Trying extractor {a.name} for {url}")
+            logger.info(f"trying extractor {a.name}")
            try:
                result.merge(a.download(result))
                if result.is_success():
                    break
            except Exception as e:
-                logger.error(f"ERROR archiver {a.name}: {e}: {traceback.format_exc()}")
+                logger.error(f"archiver {a.name}: {e}: {traceback.format_exc()}")

        # 4 - call enrichers to work with archived content
        for e in self.enrichers:
            try:
                e.enrich(result)
            except Exception as exc:
-                logger.error(f"ERROR enricher {e.name}: {exc}: {traceback.format_exc()}")
+                logger.error(f"enricher {e.name}: {exc}: {traceback.format_exc()}")

        # 5 - store all downloaded/generated media
        result.store(storages=self.storages)
@@ -651,7 +657,7 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
            try:
                d.done(result)
            except Exception as e:
-                logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}")
+                logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")

        return result

--- a/src/auto_archiver/core/storage.py
+++ b/src/auto_archiver/core/storage.py
@@ -24,7 +24,7 @@ from abc import abstractmethod
 from typing import IO
 import os

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from slugify import slugify

 from auto_archiver.utils.misc import random_str
--- a/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py
@@ -7,7 +7,7 @@ from urllib.parse import urljoin
 import glob
 import importlib.util

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import selenium
 from seleniumbase import SB

@@ -57,7 +57,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
                    continue  # Skip imported modules/classes/functions
                if isinstance(obj, type) and issubclass(obj, Dropin):
                    dropins.append(obj)
-        logger.debug(f"ANTIBOT loaded drop-in classes: {', '.join([d.__name__ for d in dropins])}")
+        logger.debug(f"loaded drop-in classes: {', '.join([d.__name__ for d in dropins])}")
        return dropins

    def sanitize_url(self, url: str) -> str:
@@ -83,14 +83,13 @@ class AntibotExtractorEnricher(Extractor, Enricher):
    def enrich(self, to_enrich: Metadata, custom_data_dir: bool = True) -> bool:
        using_user_data_dir = self.user_data_dir if custom_data_dir else None
        url = to_enrich.get_url()
-        url_sample = url[:75]

        try:
            with SB(uc=True, agent=self.agent, headed=None, user_data_dir=using_user_data_dir, proxy=self.proxy) as sb:
-                logger.info(f"ANTIBOT selenium browser is up with agent {self.agent}, opening {url_sample}...")
+                logger.info(f"selenium browser is up with agent {self.agent}, opening url...")
                sb.uc_open_with_reconnect(url, 4)

-                logger.debug(f"ANTIBOT handling CAPTCHAs for {url_sample}...")
+                logger.debug("handling CAPTCHAs for...")
                sb.uc_gui_handle_cf()
                sb.uc_gui_click_rc()  # NB: using handle instead of click breaks some sites like reddit, for now we separate here but can have dropins deciding this in the future

@@ -98,7 +97,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
                dropin.open_page(url)

                if self.detect_auth_wall and self._hit_auth_wall(sb):
-                    logger.warning(f"ANTIBOT SKIP since auth wall or CAPTCHA was detected for {url_sample}")
+                    logger.warning("skipping since auth wall or CAPTCHA was detected")
                    return False

                sb.wait_for_ready_state_complete()
@@ -125,18 +124,18 @@ class AntibotExtractorEnricher(Extractor, Enricher):
                    js_css_selector=dropin.js_for_video_css_selectors(),
                    max_media=self.max_download_videos - downloaded_videos,
                )
-                logger.info(f"ANTIBOT completed for {url_sample}")
+                logger.info("completed")

            return to_enrich
        except selenium.common.exceptions.SessionNotCreatedException as e:
            if custom_data_dir:  # the retry logic only works once
                logger.error(
-                    f"ANTIBOT session not created error: {e}. Please remove the user_data_dir {self.user_data_dir} and try again, will retry without user data dir though."
+                    f"session not created error: {e}. Please remove the user_data_dir {self.user_data_dir} and try again, will retry without user data dir though."
                )
                return self.enrich(to_enrich, custom_data_dir=False)
            raise e  # re-raise
        except Exception as e:
-            logger.error(f"ANTIBOT runtime error: {e}: {traceback.format_exc()}")
+            logger.error(f"runtime error: {e}: {traceback.format_exc()}")
            return False

    def _get_suitable_dropin(self, url: str, sb: SB):
@@ -146,7 +145,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
        """
        for dropin in self.dropins:
            if dropin.suitable(url):
-                logger.debug(f"ANTIBOT using drop-in {dropin.__name__} for {url}")
+                logger.debug(f"using drop-in {dropin.__name__}")
                return dropin(sb, self)

        return DefaultDropin(sb, self)
@@ -241,7 +240,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):

        x = max(sb.execute_script("return document.documentElement.scrollWidth"), w)
        y = min(max(sb.execute_script("return document.documentElement.scrollHeight"), h), 25_000)
-        logger.debug(f"Setting window size to {x}x{y} for full page screenshot.")
+        logger.debug(f"setting window size to {x}x{y} for full page screenshot.")
        sb.set_window_size(x, y)

        screen_filename = os.path.join(self.tmp_dir, f"screenshot{random_str(6)}.png")
@@ -280,7 +279,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
        # js_for_css_selectors
        for src in sources:
            if len(all_urls) >= max_media:
-                logger.debug(f"Reached max download limit of {max_media} images/videos.")
+                logger.debug(f"reached max download limit of {max_media} images/videos.")
                break
            if not is_relevant_url(src):
                continue
--- a/src/auto_archiver/modules/antibot_extractor_enricher/captcha_services/anti_captcha.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/captcha_services/anti_captcha.py
@@ -0,0 +1,60 @@
+# def solve_captcha(image_url):
+#     # Download image
+#     img_data = requests.get(image_url).content
+#     encoded_image = base64.b64encode(img_data).decode()
+
+#     # Submit to AntiCaptcha
+#     task = {
+#         "clientKey": ANTI_CAPTCHA_KEY,
+#         "task": {
+#             "type": "ImageToTextTask",
+#             "body": encoded_image
+#         }
+#     }
+#     print("[*] Sending captcha request to anti-captcha...")
+
+#     task_response = requests.post("https://api.anti-captcha.com/createTask", json=task).json()
+#     task_id = task_response["taskId"]
+#     print(f"[*] Anti-captcha response: {task_response}")
+
+#     # Poll for result
+#     while True:
+#         time.sleep(5)
+#         res = requests.post("https://api.anti-captcha.com/getTaskResult", json={
+#             "clientKey": ANTI_CAPTCHA_KEY,
+#             "taskId": task_id
+#         }).json()
+#         if res["status"] == "ready":
+#             print(f"[*] Captcha solved: {res}")
+#             return res["solution"]["text"]
+#         print(f"[*] Polling for captcha solution: {res['status']}")
+
+
+# def solve_recaptcha(site_key, page_url):
+# 	print("[*] Sending captcha request to anti-captcha...")
+# 	# Step 1: Send captcha request
+# 	task_payload = {
+# 		"clientKey": ANTI_CAPTCHA_KEY,
+# 		"task": {
+# 			"type": "NoCaptchaTaskProxyless",
+# 			"websiteURL": page_url,
+# 			"websiteKey": site_key
+# 		}
+# 	}
+# 	response = requests.post("https://api.anti-captcha.com/createTask", json=task_payload).json()
+# 	print(f"[*] Anti-captcha response: {response}")
+# 	task_id = response["taskId"]
+
+# 	# Step 2: Poll for solution
+# 	print("[*] Polling for captcha solution...")
+# 	for i in range(40):  # ~80 seconds
+# 		time.sleep(2)
+# 		result = requests.post("https://api.anti-captcha.com/getTaskResult", json={
+# 			"clientKey": ANTI_CAPTCHA_KEY,
+# 			"taskId": task_id
+# 		}).json()
+# 		print(f"    Poll {i+1}: status={result['status']}")
+# 		if result["status"] == "ready":
+# 			print("[*] Captcha solved!")
+# 			return result["solution"]["gRecaptchaResponse"]
+# 	raise TimeoutError("AntiCaptcha took too long")
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropin.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropin.py
@@ -1,6 +1,7 @@
 import os
+import traceback
 from typing import Mapping
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from seleniumbase import SB
 import yt_dlp

@@ -143,7 +144,7 @@ class Dropin:
        with yt_dlp.YoutubeDL(validated_options) as ydl:
            for url in video_urls:
                try:
-                    logger.debug(f"Downloading video from URL: {url}")
+                    logger.debug("downloading video from url")
                    info = ydl.extract_info(url, download=True)
                    filename = ydl_entry_to_filename(ydl, info)
                    if not filename:  # Failed to download video.
@@ -155,5 +156,5 @@ class Dropin:
                    to_enrich.add_media(media)
                    downloaded += 1
                except Exception as e:
-                    logger.error(f"Error downloading {url}: {e}")
+                    logger.error(f"download failed: {e} {traceback.format_exc()}")
        return downloaded
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/linkedin.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/linkedin.py
@@ -1,5 +1,5 @@
 from typing import Mapping
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from auto_archiver.modules.antibot_extractor_enricher.dropin import Dropin


@@ -62,7 +62,7 @@ class LinkedinDropin(Dropin):
            self.sb.wait_for_ready_state_complete()

        username, password = self._get_username_password("linkedin.com")
-        logger.debug("LinkedinDropin Logging in to Linkedin with username: {}", username)
+        logger.debug("logging in to Linkedin with username: {}", username)
        self.sb.type("#username", username)
        self.sb.type("#password", password)
        self.sb.click_if_visible("#password-visibility-toggle", timeout=0.5)
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py
@@ -3,7 +3,7 @@ from typing import Mapping
 from auto_archiver.core.metadata import Metadata
 from auto_archiver.modules.antibot_extractor_enricher.dropin import Dropin

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger


 class RedditDropin(Dropin):
@@ -50,7 +50,7 @@ class RedditDropin(Dropin):
        self._close_cookies_banner()

        username, password = self._get_username_password("reddit.com")
-        logger.debug("RedditDropin Logging in to Reddit with username: {}", username)
+        logger.debug("logging in to Reddit with username: {}", username)

        self.sb.type("#login-username", username)
        self.sb.type("#login-password", password)
@@ -68,7 +68,7 @@ class RedditDropin(Dropin):
            self.sb.click_link_text("Log in")
            self.sb.wait_for_ready_state_complete()
            if self.sb.is_text_visible("Welcome back"):
-                logger.debug("RedditDropin Login successful")
+                logger.debug("login successful")
                self.sb.click_if_visible("this link")

    def _close_cookies_banner(self):
@@ -88,5 +88,5 @@ class RedditDropin(Dropin):
            .map(el => el.src || el.href)
            .filter(url => url && /\.(m3u8|mpd|ism)$/.test(url));
        """)
-        logger.debug("RedditDropin Found {} video URLs", len(filtered_urls))
+        logger.debug("found {} video URLs", len(filtered_urls))
        return 0, self._download_videos_with_ytdlp(filtered_urls, to_enrich)
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/vk.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/vk.py
@@ -4,7 +4,7 @@ from typing import Mapping
 from auto_archiver.core.metadata import Metadata
 from auto_archiver.modules.antibot_extractor_enricher.dropin import Dropin

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger


 class VkDropin(Dropin):
@@ -57,12 +57,12 @@ class VkDropin(Dropin):
        self.sb.open("https://vk.com")
        self.sb.wait_for_ready_state_complete()
        if "/feed" in self.sb.get_current_url():
-            logger.debug("Already logged in to VK.")
+            logger.debug("already logged in to VK.")
            return True

        # need to login
        username, password = self._get_username_password("vk.com")
-        logger.debug("Logging in to VK with username: {}", username)
+        logger.debug("logging in to VK with username: {}", username)

        self.sb.click('[data-testid="enter-another-way"]', timeout=10)
        self.sb.clear('input[name="login"][type="tel"]', by="css selector", timeout=10)
--- a/src/auto_archiver/modules/api_db/api_db.py
+++ b/src/auto_archiver/modules/api_db/api_db.py
@@ -2,7 +2,7 @@ from typing import Union

 import os
 import requests
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Database
 from auto_archiver.core import Metadata
@@ -36,9 +36,9 @@ class AAApiDb(Database):
        if not self.store_results:
            return
        if cached:
-            logger.debug(f"skipping saving archive of {item.get_url()} to the AA API because it was cached")
+            logger.debug("skipping saving archive to AA API because it was cached")
            return
-        logger.debug(f"saving archive of {item.get_url()} to the AA API.")
+        logger.debug("saving archive to the AA API.")

        payload = {
            "author_id": self.author_id,
--- a/src/auto_archiver/modules/atlos_feeder_db_storage/atlos_feeder_db_storage.py
+++ b/src/auto_archiver/modules/atlos_feeder_db_storage/atlos_feeder_db_storage.py
@@ -3,7 +3,7 @@ import os
 from typing import IO, Iterator, Optional, Union

 import requests
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Database, Feeder, Media, Metadata, Storage
 from auto_archiver.utils import calculate_file_hash
@@ -66,13 +66,13 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
        """Mark an item as failed in Atlos, if the ID exists."""
        atlos_id = item.metadata.get("atlos_id")
        if not atlos_id:
-            logger.info(f"Item {item.get_url()} has no Atlos ID, skipping")
+            logger.info("No Atlos ID available, skipping")
            return
        self._post(
            f"/api/v2/source_material/metadata/{atlos_id}/auto_archiver",
            json={"metadata": {"processed": True, "status": "error", "error": reason}},
        )
-        logger.info(f"Stored failure for {item.get_url()} (ID {atlos_id}) on Atlos: {reason}")
+        logger.info(f"stored failure ID {atlos_id} on Atlos: {reason}")

    def fetch(self, item: Metadata) -> Union[Metadata, bool]:
        """check and fetch if the given item has been archived already, each
@@ -88,7 +88,7 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
        """Mark an item as successfully archived in Atlos."""
        atlos_id = item.metadata.get("atlos_id")
        if not atlos_id:
-            logger.info(f"Item {item.get_url()} has no Atlos ID, skipping")
+            logger.info("item has no Atlos ID, skipping")
            return
        self._post(
            f"/api/v2/source_material/metadata/{atlos_id}/auto_archiver",
@@ -100,7 +100,7 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
                }
            },
        )
-        logger.info(f"Stored success for {item.get_url()} (ID {atlos_id}) on Atlos")
+        logger.info(f"stored success  ID {atlos_id} on Atlos")

    # ! Atlos Module - Storage Methods

@@ -111,12 +111,12 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
    def upload(self, media: Media, metadata: Optional[Metadata] = None, **_kwargs) -> bool:
        """Upload a media file to Atlos if it has not been uploaded already."""
        if metadata is None:
-            logger.error(f"No metadata provided for {media.filename}")
+            logger.error(f"no metadata provided for {media.filename}")
            return False

        atlos_id = metadata.get("atlos_id")
        if not atlos_id:
-            logger.error(f"No Atlos ID found in metadata; can't store {media.filename} in Atlos.")
+            logger.error(f"no Atlos ID found in metadata; can't store {media.filename} in Atlos.")
            return False

        media_hash = calculate_file_hash(media.filename, hash_algo=hashlib.sha256, chunksize=4096)
@@ -135,7 +135,7 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
                params={"title": media.properties},
                files={"file": (os.path.basename(media.filename), file_obj)},
            )
-        logger.info(f"Uploaded {media.filename} to Atlos with ID {atlos_id} and title {media.key}")
+        logger.info(f"uploaded {media.filename} to Atlos with ID {atlos_id} and title {media.key}")
        return True

    def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool:
--- a/src/auto_archiver/modules/cli_feeder/cli_feeder.py
+++ b/src/auto_archiver/modules/cli_feeder/cli_feeder.py
@@ -1,5 +1,3 @@
-from loguru import logger
-
 from auto_archiver.core.feeder import Feeder
 from auto_archiver.core.metadata import Metadata
 from auto_archiver.core.consts import SetupError
@@ -16,8 +14,5 @@ class CLIFeeder(Feeder):
    def __iter__(self) -> Metadata:
        urls = self.config["urls"]
        for url in urls:
-            logger.debug(f"Processing {url}")
            m = Metadata().set_url(url)
            yield m
-
-        logger.success(f"Processed {len(urls)} URL(s)")
--- a/src/auto_archiver/modules/console_db/console_db.py
+++ b/src/auto_archiver/modules/console_db/console_db.py
@@ -1,4 +1,4 @@
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Database
 from auto_archiver.core import Metadata
--- a/src/auto_archiver/modules/csv_db/csv_db.py
+++ b/src/auto_archiver/modules/csv_db/csv_db.py
@@ -1,5 +1,5 @@
 import os
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from csv import DictWriter
 from dataclasses import asdict

--- a/src/auto_archiver/modules/csv_feeder/csv_feeder.py
+++ b/src/auto_archiver/modules/csv_feeder/csv_feeder.py
@@ -1,4 +1,4 @@
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import csv

 from auto_archiver.core import Feeder
@@ -20,20 +20,19 @@ class CSVFeeder(Feeder):
                        url_column = first_row.index(url_column)
                    except ValueError:
                        logger.error(
-                            f"Column {url_column} not found in header row: {first_row}. Did you set the 'column' config correctly?"
+                            f"column {url_column} not found in header row: {first_row}. Did you set the 'column' config correctly?"
                        )
                        return
                elif not (url_or_none(first_row[url_column])):
                    # it's a header row, but we've been given a column number already
-                    logger.debug(f"Skipping header row: {first_row}")
+                    logger.debug(f"skipping header row: {first_row}")
                else:
                    # first row isn't a header row, rewind the file
                    f.seek(0)

                for row in reader:
                    if not url_or_none(row[url_column]):
-                        logger.warning(f"Not a valid URL in row: {row}, skipping")
+                        logger.warning(f"not a valid URL in row: {row}, skipping")
                        continue
                    url = row[url_column]
-                    logger.debug(f"Processing {url}")
                    yield Metadata().set_url(url)
--- a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
+++ b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
@@ -8,7 +8,7 @@ from google.oauth2 import service_account
 from google.oauth2.credentials import Credentials
 from googleapiclient.discovery import build
 from googleapiclient.http import MediaFileUpload
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Media
 from auto_archiver.core import Storage
@@ -23,10 +23,10 @@ class GDriveStorage(Storage):
    def _setup_google_drive_service(self):
        """Initialize Google Drive service based on provided credentials."""
        if self.oauth_token:
-            logger.debug(f"Using Google Drive OAuth token: {self.oauth_token}")
+            logger.debug(f"using Google Drive OAuth token: {self.oauth_token}")
            self.service = self._initialize_with_oauth_token()
        elif self.service_account:
-            logger.debug(f"Using Google Drive service account: {self.service_account}")
+            logger.debug(f"using Google Drive service account: {self.service_account}")
            self.service = self._initialize_with_service_account()
        else:
            raise ValueError("Missing credentials: either `oauth_token` or `service_account` must be provided.")
@@ -41,7 +41,7 @@ class GDriveStorage(Storage):
        if not creds.valid and creds.expired and creds.refresh_token:
            creds.refresh(Request())
            with open(self.oauth_token, "w") as token_file:
-                logger.debug("Saving refreshed OAuth token.")
+                logger.debug("saving refreshed OAuth token.")
                token_file.write(creds.to_json())
        elif not creds.valid:
            raise ValueError("Invalid OAuth token. Please regenerate the token.")
@@ -180,7 +180,7 @@ class GDriveStorage(Storage):
        Creates a new GDrive folder @name inside folder @parent_id
        Returns id of the created folder
        """
-        logger.debug(f"Creating new folder with {name=} inside {parent_id=}")
+        logger.debug(f"creating new folder with {name=} inside {parent_id=}")
        file_metadata = {"name": [name], "mimeType": "application/vnd.google-apps.folder", "parents": [parent_id]}
        gd_folder = self.service.files().create(supportsAllDrives=True, body=file_metadata, fields="id").execute()
        return gd_folder.get("id")
--- a/src/auto_archiver/modules/generic_extractor/bluesky.py
+++ b/src/auto_archiver/modules/generic_extractor/bluesky.py
@@ -1,4 +1,4 @@
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core.extractor import Extractor
 from auto_archiver.core.metadata import Metadata, Media
@@ -18,7 +18,7 @@ class Bluesky(GenericDropin):
        # download if embeds present (1 video XOR >=1 images)
        for media in self._download_bsky_embeds(post, archiver):
            result.add_media(media)
-        logger.debug(f"Downloaded {len(result.media)} media files")
+        logger.debug(f"downloaded {len(result.media)} media files")

        return result

--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -14,7 +14,7 @@ from yt_dlp.extractor.common import InfoExtractor
 from yt_dlp.utils import MaxDownloadsReached
 import pysubs2

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core.extractor import Extractor
 from auto_archiver.core import Metadata, Media
@@ -63,12 +63,11 @@ class GenericExtractor(Extractor):
            if os.environ.get("AUTO_ARCHIVER_ALLOW_RESTART", "1") != "1":
                logger.warning("yt-dlp or plugin was updated — please restart auto-archiver manually")
            else:
-                logger.warning("yt-dlp or plugin was updated — restarting auto-archiver")
-                logger.warning(" ======= RESTARTING ======= ")
+                logger.warning("yt-dlp or plugin was updated — restarting auto-archiver\n ======= RESTARTING ======= ")
                os.execv(sys.executable, [sys.executable] + sys.argv)

    def update_package(self, package_name: str) -> bool:
-        logger.info(f"Checking and updating {package_name}...")
+        logger.info(f"checking and updating {package_name}...")
        from importlib.metadata import version as get_version

        old_version = get_version(package_name)
@@ -80,7 +79,7 @@ class GenericExtractor(Extractor):
                return True
            logger.info(f"{package_name} already up to date")
        except Exception as e:
-            logger.error(f"Error updating {package_name}: {e}")
+            logger.error(f"failed to update {package_name}: {e}")
        return False

    def setup_po_tokens(self) -> None:
@@ -111,7 +110,7 @@ class GenericExtractor(Extractor):
        missing_tools = [tool for tool in ("node", "yarn", "npx") if shutil.which(tool) is None]
        if missing_tools:
            logger.error(
-                f"Cannot set up PO Token script; missing required tools: {', '.join(missing_tools)}. "
+                f"cannot set up PO Token script; missing required tools: {', '.join(missing_tools)}. "
                "Install these tools or run bgutils via Docker. "
                "See: https://github.com/Brainicism/bgutil-ytdlp-pot-provider"
            )
@@ -140,7 +139,7 @@ class GenericExtractor(Extractor):
                    f"https://github.com/Brainicism/bgutil-ytdlp-pot-provider/archive/refs/tags/{plugin_version}.zip"
                )
                zip_path = os.path.join(base_dir, f"{plugin_version}.zip")
-                logger.info(f"Downloading bgutils release zip for version {plugin_version}...")
+                logger.info(f"downloading bgutils release zip for version {plugin_version}...")
                urlretrieve(zip_url, zip_path)
                with zipfile.ZipFile(zip_path, "r") as z:
                    z.extractall(base_dir)
@@ -149,7 +148,7 @@ class GenericExtractor(Extractor):
                extracted_root = os.path.join(base_dir, f"bgutil-ytdlp-pot-provider-{plugin_version}")
                shutil.move(os.path.join(extracted_root, "server"), server_dir)
                shutil.rmtree(extracted_root)
-                logger.info("Installing dependencies and transpiling PoT Generator script...")
+                logger.info("installing dependencies and transpiling PoT Generator script...")
                subprocess.run(["yarn", "install", "--frozen-lockfile"], cwd=server_dir, check=True)
                subprocess.run(["npx", "tsc"], cwd=server_dir, check=True)

@@ -165,7 +164,7 @@ class GenericExtractor(Extractor):
            logger.info(f"PO Token script configured at: {script_path}")

        except Exception as e:
-            logger.error(f"Failed to set up PO Token script: {e}")
+            logger.error(f"failed to set up PO Token script: {e}")

    def suitable_extractors(self, url: str) -> Generator[str, None, None]:
        """
@@ -206,7 +205,7 @@ class GenericExtractor(Extractor):
                media = Media(cover_image_path)
                metadata.add_media(media, id="cover")
            except Exception as e:
-                logger.error(f"Error downloading cover image {thumbnail_url}: {e}")
+                logger.error(f"could not download cover image {thumbnail_url}: {e}")

        dropin = self.dropin_for_name(info_extractor.ie_key())
        if dropin:
@@ -353,7 +352,7 @@ class GenericExtractor(Extractor):

        if not dropin:
            # TODO: add a proper link to 'how to create your own dropin'
-            logger.debug(f"""Could not find valid dropin for {info_extractor.ie_key()}.
+            logger.debug(f"""could not find valid dropin for {info_extractor.ie_key()}.
                     Why not try creating your own, and make sure it has a valid function called 'create_metadata'. Learn more: https://auto-archiver.readthedocs.io/en/latest/user_guidelines.html#""")
            return False

@@ -389,7 +388,7 @@ class GenericExtractor(Extractor):
                    # file was not downloaded or could not be retrieved, example: sensitive videos on YT without using cookies.
                    continue

-                logger.debug(f"Using filename {filename} for entry {entry.get('id', 'unknown')}")
+                logger.debug(f"using filename {filename} for entry {entry.get('id', 'unknown')}")

                new_media = Media(filename)
                for x in ["duration", "original_url", "fulltitle", "description", "upload_date"]:
@@ -404,12 +403,12 @@ class GenericExtractor(Extractor):
                            text = " ".join([line.text for line in subs])
                            new_media.set(f"subtitles_{lang}", text)
                        except Exception as e:
-                            logger.error(f"Error loading subtitle file {val.get('filepath')}: {e}")
+                            logger.error(f"error loading subtitle file {val.get('filepath')}: {e}")
                result.add_media(new_media)
            except Exception as e:
-                logger.error(f"Error processing entry {entry}: {e}")
+                logger.error(f"error processing entry {entry}: {e}")
        if not len(result.media):
-            logger.info(f"No media found for entry {entry}, skipping.")
+            logger.info(f"no media found for entry {entry}, skipping.")
            return False

        return self.add_metadata(data, info_extractor, url, result)
@@ -471,14 +470,14 @@ class GenericExtractor(Extractor):

        def _helper_for_successful_extract_info(data, info_extractor, url, ydl):
            if data.get("is_live", False) and not self.livestreams:
-                logger.warning("Livestream detected, skipping due to 'livestreams' configuration setting")
+                logger.warning("livestream detected, skipping due to 'livestreams' configuration setting")
                return False
            # it's a valid video, that the youtubdedl can download out of the box
            return self.get_metadata_for_video(data, info_extractor, url, ydl)

        try:
            if dropin_submodule and dropin_submodule.skip_ytdlp_download(url, info_extractor):
-                logger.debug(f"Skipping using ytdlp to download files for {info_extractor.ie_key()}")
+                logger.debug(f"skipping using ytdlp to download files for {info_extractor.ie_key()}")
                raise SkipYtdlp()

            # don't download since it can be a live stream
@@ -497,17 +496,17 @@ class GenericExtractor(Extractor):

            if not isinstance(e, SkipYtdlp):
                logger.debug(
-                    f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use dropin to get post data instead'
+                    f'issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use dropin to get post data instead'
                )

            try:
                result = self.get_metadata_for_post(info_extractor, url, ydl)
            except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e:
-                logger.error("Error downloading metadata for post: {error}", error=str(post_e))
+                logger.error("error downloading metadata for post: {error}", error=str(post_e))
                return False
            except Exception as generic_e:
                logger.debug(
-                    'Attempt to extract using ytdlp extractor "{name}" failed:  \n  {error}',
+                    'attempt to extract using ytdlp extractor "{name}" failed:  \n  {error}',
                    name=info_extractor.IE_NAME,
                    error=str(generic_e),
                    exc_info=True,
@@ -560,17 +559,17 @@ class GenericExtractor(Extractor):
        # order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file
        if auth:
            if "username" in auth and "password" in auth:
-                logger.debug(f"Using provided auth username and password for {url}")
+                logger.debug("using provided auth username and password")
                ydl_options.extend(("--username", auth["username"]))
                ydl_options.extend(("--password", auth["password"]))
            elif "cookie" in auth:
-                logger.debug(f"Using provided auth cookie for {url}")
+                logger.debug("using provided auth cookie")
                yt_dlp.utils.std_headers["cookie"] = auth["cookie"]
            elif "cookies_from_browser" in auth:
-                logger.debug(f"Using extracted cookies from browser {auth['cookies_from_browser']} for {url}")
+                logger.debug(f"using extracted cookies from browser {auth['cookies_from_browser']}")
                ydl_options.extend(("--cookies-from-browser", auth["cookies_from_browser"]))
            elif "cookies_file" in auth:
-                logger.debug(f"Using cookies from file {auth['cookies_file']} for {url}")
+                logger.debug(f"using cookies from file {auth['cookies_file']}")
                ydl_options.extend(("--cookies", auth["cookies_file"]))

        # Applying user-defined extractor_args
@@ -580,11 +579,11 @@ class GenericExtractor(Extractor):
                    arg_str = ";".join(f"{k}={v}" for k, v in args.items())
                else:
                    arg_str = str(args)
-                logger.debug(f"Setting extractor_args: {key}:{arg_str}")
+                logger.debug(f"setting extractor_args: {key}:{arg_str}")
                ydl_options.extend(["--extractor-args", f"{key}:{arg_str}"])

        if self.ytdlp_args:
-            logger.debug("Adding additional ytdlp arguments: {self.ytdlp_args}")
+            logger.debug(f"adding additional ytdlp arguments: {self.ytdlp_args}")
            ydl_options += self.ytdlp_args.split(" ")

        *_, validated_options = yt_dlp.parse_options(ydl_options)
--- a/src/auto_archiver/modules/generic_extractor/tiktok.py
+++ b/src/auto_archiver/modules/generic_extractor/tiktok.py
@@ -1,5 +1,5 @@
 import requests
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from yt_dlp.extractor.tiktok import TikTokIE, TikTokLiveIE, TikTokVMIE, TikTokUserIE

@@ -22,7 +22,7 @@ class Tiktok(GenericDropin):
        return any(extractor().suitable(url) for extractor in (TikTokIE, TikTokLiveIE, TikTokVMIE, TikTokUserIE))

    def extract_post(self, url: str, ie_instance):
-        logger.debug(f"Using Tikwm API to attempt to download tiktok video from {url=}")
+        logger.debug(f"using Tikwm API to attempt to download tiktok video from {url=}")

        endpoint = self.TIKWM_ENDPOINT.format(url=url)

--- a/src/auto_archiver/modules/generic_extractor/twitter.py
+++ b/src/auto_archiver/modules/generic_extractor/twitter.py
@@ -1,7 +1,7 @@
 import re
 import mimetypes

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from slugify import slugify

 from auto_archiver.core.metadata import Metadata, Media
@@ -40,7 +40,7 @@ class Twitter(GenericDropin):
                raise ValueError("Error retreiving post. Are you sure it exists?")
            timestamp = get_datetime_from_str(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
        except (ValueError, KeyError) as ex:
-            logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
+            logger.warning(f"unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
            return False

        full_text = tweet.pop("full_text", "")
@@ -49,7 +49,7 @@ class Twitter(GenericDropin):

        result.set_title(f"{author} - {full_text}").set_content(full_text).set_timestamp(timestamp)
        if not tweet.get("entities", {}).get("media"):
-            logger.debug("No media found, archiving tweet text only")
+            logger.debug("no media found, archiving tweet text only")
            result.status = "twitter-ytdl"
            return result
        for i, tw_media in enumerate(tweet["entities"]["media"]):
--- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py
+++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py
@@ -10,11 +10,12 @@ The filtered rows are processed into `Metadata` objects.
 """

 import os
+import traceback
 from typing import Tuple, Union, Iterator
 from urllib.parse import quote

 import gspread
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from slugify import slugify
 from retrying import retry

@@ -41,19 +42,19 @@ class GsheetsFeederDB(Feeder, Database):
        sh = self.open_sheet()
        for ii, worksheet in enumerate(sh.worksheets()):
            if not self.should_process_sheet(worksheet.title):
-                logger.debug(f"SKIPPED worksheet '{worksheet.title}' due to allow/block rules")
+                logger.debug(f"skipped worksheet '{worksheet.title}' due to allow/block rules")
                continue
-            logger.info(f"Opening worksheet {ii=}: {worksheet.title=} header={self.header}")
+            logger.info(f"opening worksheet {ii=}: {worksheet.title=} header={self.header}")
            gw = GWorksheet(worksheet, header_row=self.header, columns=self.columns)
            if len(missing_cols := self.missing_required_columns(gw)):
                logger.debug(
-                    f"SKIPPED worksheet '{worksheet.title}' due to missing required column(s) for {missing_cols}"
+                    f"skipped worksheet '{worksheet.title}' due to missing required column(s) for {missing_cols}"
                )
                continue
-
-            # process and yield metadata here:
-            yield from self._process_rows(gw)
-            logger.info(f"Finished worksheet {worksheet.title}")
+            with logger.contextualize(worksheet=f"{sh.title}:{worksheet.title}"):
+                # process and yield metadata here:
+                yield from self._process_rows(gw)
+            logger.info(f"finished worksheet {worksheet.title}")

    def _process_rows(self, gw: GWorksheet):
        for row in range(1 + self.header, gw.count_rows() + 1):
@@ -69,7 +70,9 @@ class GsheetsFeederDB(Feeder, Database):
            # All checks done - archival process starts here
            m = Metadata().set_url(url)
            self._set_context(m, gw, row)
-            yield m
+
+            with logger.contextualize(row=row):
+                yield m

    def _set_context(self, m: Metadata, gw: GWorksheet, row: int) -> Metadata:
        # TODO: Check folder value not being recognised
@@ -99,16 +102,16 @@ class GsheetsFeederDB(Feeder, Database):
        return missing

    def started(self, item: Metadata) -> None:
-        logger.info(f"STARTED {item}")
+        logger.info("STARTED")
        gw, row = self._retrieve_gsheet(item)
        gw.set_cell(row, "status", "Archive in progress")

    def failed(self, item: Metadata, reason: str) -> None:
-        logger.error(f"FAILED {item}")
+        logger.error("FAILED")
        self._safe_status_update(item, f"Archive failed {reason}")

    def aborted(self, item: Metadata) -> None:
-        logger.warning(f"ABORTED {item}")
+        logger.warning("ABORTED")
        self._safe_status_update(item, "")

    def fetch(self, item: Metadata) -> Union[Metadata, bool]:
@@ -122,9 +125,7 @@ class GsheetsFeederDB(Feeder, Database):
        cell_updates = []
        row_values = gw.get_row(row)

-        spreadsheet = gw.wks.spreadsheet.title
-        worksheet = gw.wks.title
-        logger.info(f"DONE url='{item.get_url()}' {row=} on {spreadsheet=} : {worksheet=}")
+        logger.info("DONE")

        def batch_if_valid(col, val, final_value=None):
            final_value = final_value or val
@@ -132,7 +133,7 @@ class GsheetsFeederDB(Feeder, Database):
                if val and gw.col_exists(col) and gw.get_cell(row_values, col) == "":
                    cell_updates.append((row, col, final_value))
            except Exception as e:
-                logger.error(f"Unable to batch {col}={final_value} due to {e}")
+                logger.error(f"unable to batch {col}={final_value} due to {e}")

        status_message = item.status
        if cached:
@@ -192,15 +193,13 @@ class GsheetsFeederDB(Feeder, Database):
            gw, row = self._retrieve_gsheet(item)
            gw.set_cell(row, "status", new_status)
        except Exception as e:
-            logger.debug(f"Unable to update sheet: {e}")
+            logger.debug(f"unable to update sheet: {e}: {traceback.format_exc()}")

    def _retrieve_gsheet(self, item: Metadata) -> Tuple[GWorksheet, int]:
        if gsheet := item.get_context("gsheet"):
            gw: GWorksheet = gsheet.get("worksheet")
            row: int = gsheet.get("row")
        elif self.sheet_id:
-            logger.error(
-                f"Unable to retrieve Gsheet for {item.get_url()}, GsheetDB must be used alongside GsheetFeeder."
-            )
+            logger.error("unable to retrieve Gsheet, GsheetDB must be used alongside GsheetFeeder.")

        return gw, row
--- a/src/auto_archiver/modules/hash_enricher/hash_enricher.py
+++ b/src/auto_archiver/modules/hash_enricher/hash_enricher.py
@@ -9,7 +9,7 @@ making it suitable for handling large files efficiently.
 """

 import hashlib
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata
@@ -22,8 +22,7 @@ class HashEnricher(Enricher):
    """

    def enrich(self, to_enrich: Metadata) -> None:
-        url = to_enrich.get_url()
-        logger.debug(f"calculating media hashes for {url=} (using {self.algorithm})")
+        logger.debug(f"calculating media hashes with algo={self.algorithm}")

        for i, m in enumerate(to_enrich.media):
            if len(hd := self.calculate_hash(m.filename)):
--- a/src/auto_archiver/modules/html_formatter/html_formatter.py
+++ b/src/auto_archiver/modules/html_formatter/html_formatter.py
@@ -4,7 +4,7 @@ import os
 import pathlib
 from jinja2 import Environment, FileSystemLoader
 from urllib.parse import quote
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import json
 import base64

@@ -35,7 +35,7 @@ class HtmlFormatter(Formatter):
    def format(self, item: Metadata) -> Media:
        url = item.get_url()
        if item.is_empty():
-            logger.debug(f"[SKIP] FORMAT there is no media or metadata to format: {url=}")
+            logger.debug("nothing to format, skipping")
            return

        content = self.template.render(
--- a/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
+++ b/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
@@ -14,7 +14,7 @@ from datetime import datetime
 import traceback

 import requests
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from retrying import retry
 from tqdm import tqdm

@@ -45,11 +45,11 @@ class InstagramAPIExtractor(Extractor):
        url = item.get_url()
        url.replace("instagr.com", "instagram.com").replace("instagr.am", "instagram.com")
        insta_matches = self.valid_url.findall(url)
-        logger.info(f"{insta_matches=}")
+
        if not len(insta_matches) or len(insta_matches[0]) != 3:
            return
        if len(insta_matches) > 1:
-            logger.warning(f"Multiple instagram matches found in {url=}, using the first one")
+            logger.debug("multiple instagram matches found, using the first one")
            return
        g1, g2, g3 = insta_matches[0][0], insta_matches[0][1], insta_matches[0][2]
        if g1 == "":
@@ -65,7 +65,7 @@ class InstagramAPIExtractor(Extractor):
                return self.download_post(item, id=g3, context="story")
            return self.download_stories(item, g2)
        else:
-            logger.warning(f"Unknown instagram regex group match {g1=} found in {url=}")
+            logger.warning(f"unknown instagram regex group match {g1=}")
            return

    @retry(wait_random_min=1000, wait_random_max=3000, stop_max_attempt_number=5)
@@ -112,8 +112,8 @@ class InstagramAPIExtractor(Extractor):
                count_posts += len(stories)
                result.set("#stories", len(stories))
            except Exception as e:
-                result.append("errors", f"Error downloading stories for {username}")
-                logger.error(f"Error downloading stories for {username}: {e} {traceback.format_exc()}")
+                result.append("errors", f"error downloading stories for {username}")
+                logger.error(f"error downloading stories for {username}: {e} {traceback.format_exc()}")

            # download all posts
            try:
@@ -122,8 +122,8 @@ class InstagramAPIExtractor(Extractor):
                        result, user_id, max_to_download=self.full_profile_max_posts - count_posts
                    )
            except Exception as e:
-                result.append("errors", f"Error downloading posts for {username}")
-                logger.error(f"Error downloading posts for {username}: {e} {traceback.format_exc()}")
+                result.append("errors", f"error downloading posts for {username}")
+                logger.error(f"error downloading posts for {username}: {e} {traceback.format_exc()}")

            # download all tagged
            try:
@@ -132,8 +132,8 @@ class InstagramAPIExtractor(Extractor):
                        result, user_id, max_to_download=self.full_profile_max_posts - count_posts
                    )
            except Exception as e:
-                result.append("errors", f"Error downloading tagged posts for {username}")
-                logger.error(f"Error downloading tagged posts for {username}: {e} {traceback.format_exc()}")
+                result.append("errors", f"error downloading tagged posts for {username}")
+                logger.error(f"error downloading tagged posts for {username}: {e} {traceback.format_exc()}")

            # download all highlights
            try:
@@ -159,10 +159,10 @@ class InstagramAPIExtractor(Extractor):
            except Exception as e:
                result.append(
                    "errors",
-                    f"Error downloading highlight id{h.get('pk')} for {username}",
+                    f"error downloading highlight id{h.get('pk')} for {username}",
                )
                logger.error(
-                    f"Error downloading highlight id{h.get('pk')} for {username}: {e} {traceback.format_exc()}"
+                    f"error downloading highlight id{h.get('pk')} for {username}: {e} {traceback.format_exc()}"
                )
            if count_highlights >= max_to_download:
                logger.debug(f"HIGHLIGHTS reached max_to_download={self.full_profile_max_posts}")
@@ -208,8 +208,8 @@ class InstagramAPIExtractor(Extractor):
            try:
                self.scrape_item(result, h, "highlight")
            except Exception as e:
-                result.append("errors", f"Error downloading highlight {h.get('id')}")
-                logger.error(f"Error downloading highlight, skipping {h.get('id')}: {e} {traceback.format_exc()}")
+                result.append("errors", f"error downloading highlight {h.get('id')}")
+                logger.error(f"error downloading highlight, skipping {h.get('id')}: {e} {traceback.format_exc()}")

        return h_info

@@ -251,8 +251,8 @@ class InstagramAPIExtractor(Extractor):
                try:
                    self.scrape_item(result, p, "post")
                except Exception as e:
-                    result.append("errors", f"Error downloading post {p.get('id')}")
-                    logger.error(f"Error downloading post, skipping {p.get('id')}: {e} {traceback.format_exc()}")
+                    result.append("errors", f"error downloading post {p.get('id')}")
+                    logger.error(f"error downloading post, skipping {p.get('id')}: {e} {traceback.format_exc()}")
                pbar.update(1)
                post_count += 1
            if post_count >= max_to_download:
@@ -279,8 +279,8 @@ class InstagramAPIExtractor(Extractor):
                try:
                    self.scrape_item(result, p, "tagged")
                except Exception as e:
-                    result.append("errors", f"Error downloading tagged post {p.get('id')}")
-                    logger.error(f"Error downloading tagged post, skipping {p.get('id')}: {e} {traceback.format_exc()}")
+                    result.append("errors", f"error downloading tagged post {p.get('id')}")
+                    logger.error(f"error downloading tagged post, skipping {p.get('id')}: {e} {traceback.format_exc()}")
                pbar.update(1)
                tagged_count += 1
            if tagged_count >= max_to_download:
--- a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
@@ -8,7 +8,7 @@ import re
 import os
 import shutil
 import instaloader
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata
@@ -29,8 +29,9 @@ class InstagramExtractor(Extractor):
    # TODO: links to stories

    def setup(self) -> None:
-        logger.warning("Instagram Extractor is not actively maintained, and may not work as expected.")
-        logger.warning("Please consider using the Instagram Tbot Extractor or Instagram API Extractor instead.")
+        logger.warning(
+            "Instagram Extractor is not actively maintained, and may not work as expected.\nPlease consider using the Instagram Tbot Extractor or Instagram API Extractor instead."
+        )

        self.insta = instaloader.Instaloader(
            download_geotags=True,
@@ -43,12 +44,11 @@ class InstagramExtractor(Extractor):
            self.insta.load_session_from_file(self.username, self.session_file)
        except Exception:
            try:
-                logger.debug("Session file failed", exc_info=True)
-                logger.info("No valid session file found - Attempting login with use and password.")
+                logger.info("no valid session file found - Attempting login with use and password.")
                self.insta.login(self.username, self.password)
                self.insta.save_session_to_file(self.session_file)
            except Exception as e:
-                logger.error(f"Failed to setup Instagram Extractor with Instagrapi. {e}")
+                logger.error(f"failed to setup Instagram Extractor with Instagrapi. {e}")

    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()
@@ -72,14 +72,14 @@ class InstagramExtractor(Extractor):
                result = self.download_profile(url, profile_matches[0])
        except Exception as e:
            logger.error(
-                f"Failed to download with instagram extractor due to: {e}, make sure your account credentials are valid."
+                f"failed to download with instagram extractor due to: {e}, make sure your account credentials are valid."
            )
        finally:
            shutil.rmtree(self.download_folder, ignore_errors=True)
        return result

    def download_post(self, url: str, post_id: str) -> Metadata:
-        logger.debug(f"Instagram {post_id=} detected in {url=}")
+        logger.debug(f"Instagram {post_id=} detected")

        post = instaloader.Post.from_shortcode(self.insta.context, post_id)
        if self.insta.download_post(post, target=post.owner_username):
@@ -87,7 +87,7 @@ class InstagramExtractor(Extractor):

    def download_profile(self, url: str, username: str) -> Metadata:
        # gets posts, posts where username is tagged, igtv postss, stories, and highlights
-        logger.debug(f"Instagram {username=} detected in {url=}")
+        logger.debug(f"Instagram {username=} detected")

        profile = instaloader.Profile.from_username(self.insta.context, username)
        try:
@@ -95,27 +95,27 @@ class InstagramExtractor(Extractor):
                try:
                    self.insta.download_post(post, target=f"profile_post_{post.owner_username}")
                except Exception as e:
-                    logger.error(f"Failed to download post: {post.shortcode}: {e}")
+                    logger.error(f"failed to download post: {post.shortcode}: {e}")
        except Exception as e:
-            logger.error(f"Failed profile.get_posts: {e}")
+            logger.error(f"failed profile.get_posts: {e}")

        try:
            for post in profile.get_tagged_posts():
                try:
                    self.insta.download_post(post, target=f"tagged_post_{post.owner_username}")
                except Exception as e:
-                    logger.error(f"Failed to download tagged post: {post.shortcode}: {e}")
+                    logger.error(f"failed to download tagged post: {post.shortcode}: {e}")
        except Exception as e:
-            logger.error(f"Failed profile.get_tagged_posts: {e}")
+            logger.error(f"failed profile.get_tagged_posts: {e}")

        try:
            for post in profile.get_igtv_posts():
                try:
                    self.insta.download_post(post, target=f"igtv_post_{post.owner_username}")
                except Exception as e:
-                    logger.error(f"Failed to download igtv post: {post.shortcode}: {e}")
+                    logger.error(f"failed to download igtv post: {post.shortcode}: {e}")
        except Exception as e:
-            logger.error(f"Failed profile.get_igtv_posts: {e}")
+            logger.error(f"failed profile.get_igtv_posts: {e}")

        try:
            for story in self.insta.get_stories([profile.userid]):
@@ -123,9 +123,9 @@ class InstagramExtractor(Extractor):
                    try:
                        self.insta.download_storyitem(item, target=f"story_item_{story.owner_username}")
                    except Exception as e:
-                        logger.error(f"Failed to download story item: {item}: {e}")
+                        logger.error(f"failed to download story item: {item}: {e}")
        except Exception as e:
-            logger.error(f"Failed get_stories: {e}")
+            logger.error(f"failed get_stories: {e}")

        try:
            for highlight in self.insta.get_highlights(profile.userid):
@@ -133,9 +133,9 @@ class InstagramExtractor(Extractor):
                    try:
                        self.insta.download_storyitem(item, target=f"highlight_item_{highlight.owner_username}")
                    except Exception as e:
-                        logger.error(f"Failed to download highlight item: {item}: {e}")
+                        logger.error(f"failed to download highlight item: {item}: {e}")
        except Exception as e:
-            logger.error(f"Failed get_highlights: {e}")
+            logger.error(f"failed get_highlights: {e}")

        return self.process_downloads(url, f"@{username}", profile._asdict(), None)

@@ -158,4 +158,4 @@ class InstagramExtractor(Extractor):

            return result.success("instagram")
        except Exception as e:
-            logger.error(f"Could not fetch instagram post {url} due to: {e}")
+            logger.error(f"could not fetch instagram post due to: {e}")
--- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
@@ -12,7 +12,7 @@ import shutil
 import time
 from sqlite3 import OperationalError

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from telethon.sync import TelegramClient

 from auto_archiver.core import Extractor
--- a/src/auto_archiver/modules/json_enricher/json_enricher.py
+++ b/src/auto_archiver/modules/json_enricher/json_enricher.py
@@ -1,5 +1,5 @@
 import json
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import os

 from auto_archiver.core import Enricher
--- a/src/auto_archiver/modules/local_storage/local_storage.py
+++ b/src/auto_archiver/modules/local_storage/local_storage.py
@@ -1,7 +1,7 @@
 import shutil
 from typing import IO
 import os
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Media
 from auto_archiver.core import Storage
--- a/src/auto_archiver/modules/meta_enricher/meta_enricher.py
+++ b/src/auto_archiver/modules/meta_enricher/meta_enricher.py
@@ -1,6 +1,6 @@
 import datetime
 import os
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata
--- a/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
+++ b/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
@@ -1,6 +1,6 @@
 import subprocess
 import traceback
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata
--- a/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py
+++ b/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py
@@ -1,6 +1,6 @@
 import os

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import opentimestamps
 from opentimestamps.calendar import RemoteCalendar, DEFAULT_CALENDAR_WHITELIST
 from opentimestamps.core.timestamp import Timestamp, DetachedTimestampFile
--- a/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
+++ b/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
@@ -15,7 +15,7 @@ import traceback
 import pdqhash
 import numpy as np
 from PIL import Image, UnidentifiedImageError
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata
--- a/src/auto_archiver/modules/s3_storage/s3_storage.py
+++ b/src/auto_archiver/modules/s3_storage/s3_storage.py
@@ -2,7 +2,7 @@ from typing import IO

 import boto3
 import os
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Media
 from auto_archiver.core import Storage
--- a/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
+++ b/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
@@ -2,7 +2,7 @@ import ssl
 import os
 from slugify import slugify
 from urllib.parse import urlparse
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata, Media
--- a/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
+++ b/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
@@ -2,7 +2,7 @@ import requests
 import re
 import html
 from bs4 import BeautifulSoup
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata, Media
--- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
+++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
@@ -17,7 +17,7 @@ from telethon.errors.rpcerrorlist import (
 )

 from tqdm import tqdm
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata, Media
--- a/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
@@ -9,7 +9,7 @@ and identify important moments without watching the entire video.

 import ffmpeg
 import os
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Enricher
 from auto_archiver.core import Media, Metadata
--- a/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
+++ b/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
@@ -5,7 +5,7 @@ import hashlib

 from slugify import slugify
 import requests
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from rfc3161_client import (decode_timestamp_response,TimestampRequestBuilder,TimeStampResponse, VerifierBuilder)
 from rfc3161_client import VerificationError as Rfc3161VerificationError
--- a/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
@@ -4,7 +4,7 @@ import re
 import mimetypes
 import requests

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from pytwitter import Api
 from slugify import slugify

--- a/src/auto_archiver/modules/wacz_extractor_enricher/wacz_extractor_enricher.py
+++ b/src/auto_archiver/modules/wacz_extractor_enricher/wacz_extractor_enricher.py
@@ -4,7 +4,7 @@ import os
 import shutil
 import subprocess
 from zipfile import ZipFile
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 from warcio.archiveiterator import ArchiveIterator

 from auto_archiver.core import Media, Metadata
--- a/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py
+++ b/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py
@@ -1,5 +1,5 @@
 import json
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import time
 import requests

--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@@ -1,7 +1,7 @@
 import traceback
 import requests
 import time
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger

 from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata, Media
--- a/src/auto_archiver/utils/custom_logger.py
+++ b/src/auto_archiver/utils/custom_logger.py
@@ -0,0 +1,37 @@
+from loguru import logger
+import json
+
+
+def extract_log_data(record):
+    subset = {
+        "level": record["level"].name,
+        "time": record["time"].isoformat(timespec="seconds"),
+    }
+    subset["loc"] = f"{record['file'].name}:{record['function']}:{record['line']}"
+
+    for extra_key in ["trace", "url", "worksheet", "row"]:
+        if extra_val := record.get("extra", {}).get(extra_key):
+            subset[extra_key] = extra_val
+
+    subset["message"] = record["message"]
+    if exception := record.get("exception"):
+        subset["exception"] = exception
+    return subset
+
+
+def serialize_no_message(record):
+    subset = extract_log_data(record)
+    subset.pop("message", None)
+    return json.dumps(subset, ensure_ascii=False)
+
+
+def serialize(record):
+    return json.dumps(extract_log_data(record), ensure_ascii=False)
+
+
+def patching(record):
+    record["extra"]["serialized"] = serialize(record)
+    record["extra"]["serialize_no_message"] = serialize_no_message(record)
+
+
+logger = logger.patch(patching)
--- a/src/auto_archiver/utils/misc.py
+++ b/src/auto_archiver/utils/misc.py
@@ -7,7 +7,7 @@ from datetime import datetime, timezone
 from dateutil.parser import parse as parse_dt

 import requests
-from loguru import logger
+from auto_archiver.utils.custom_logger import logger


 def mkdir_if_not_exists(folder):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,7 +9,7 @@ from tempfile import TemporaryDirectory
 from typing import Dict, Tuple
 import hashlib

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger
 import pytest
 from auto_archiver.core.metadata import Metadata, Media
 from auto_archiver.core.module import ModuleFactory
--- a/tests/data/test_modules/example_extractor/example_extractor.py
+++ b/tests/data/test_modules/example_extractor/example_extractor.py
@@ -1,6 +1,6 @@
 from auto_archiver.core import Extractor

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger


 class ExampleExtractor(Extractor):
--- a/tests/data/test_modules/example_module/example_module.py
+++ b/tests/data/test_modules/example_module/example_module.py
@@ -1,6 +1,6 @@
 from auto_archiver.core import Extractor, Enricher, Feeder, Database, Storage, Formatter, Metadata

-from loguru import logger
+from auto_archiver.utils.custom_logger import logger


 class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter):
--- a/tests/test_implementation.py
+++ b/tests/test_implementation.py
@@ -25,7 +25,7 @@ def orchestration_file(orchestration_file_path):
 def autoarchiver(tmp_path, monkeypatch, request):
    def _autoarchiver(args=[]):
        def cleanup():
-            from loguru import logger
+            from auto_archiver.utils.custom_logger import logger

            if not logger._core.handlers.get(0):
                logger._core.handlers_count = 0