concludes logging standardization refactor

2026-06-07 19:08:30 +03:00 · 2025-06-26 17:20:04 +01:00
parent ad29cb4447
commit afd9090a4c
48 changed files with 207 additions and 252 deletions
--- a/scripts/telegram_setup.py
+++ b/scripts/telegram_setup.py
@@ -24,4 +24,4 @@ SESSION_FILE = "secrets/anon-insta"
 os.makedirs("secrets", exist_ok=True)
 with TelegramClient(SESSION_FILE, API_ID, API_HASH) as client:
-    logger.success(f"New session file created: {SESSION_FILE}.session")
+    logger.success(f"new session file created: {SESSION_FILE}.session")
--- a/src/auto_archiver/core/extractor.py
+++ b/src/auto_archiver/core/extractor.py
@@ -94,7 +94,7 @@ class Extractor(BaseModule):
                to_filename = to_filename[-64:]
        to_filename = os.path.join(self.tmp_dir, to_filename)
        if verbose:
-            logger.debug(f"downloading {to_filename=}")
+            logger.debug(f"Downloading {to_filename=}")
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
        }
--- a/src/auto_archiver/core/media.py
+++ b/src/auto_archiver/core/media.py
@@ -86,7 +86,7 @@ class Media:
    @property  # getter .mimetype
    def mimetype(self) -> str:
        if not self.filename or len(self.filename) == 0:
-            logger.warning(f"cannot get mimetype from media without filename: {self}")
+            logger.warning(f"Cannot get mimetype from media without filename: {self}")
            return ""
        if not self._mimetype:
            self._mimetype = mimetypes.guess_type(self.filename)[0]
@@ -116,7 +116,7 @@ class Media:
        # self.is_video() should be used together with this method
        try:
            streams = ffmpeg.probe(self.filename, select_streams="v")["streams"]
-            logger.debug(f"STREAMS FOR {self.filename} {streams}")
+            logger.debug(f"Streams for {self.filename}: {streams}")
            return any(s.get("duration_ts", 0) > 0 for s in streams)
        except Error:
            return False  # ffmpeg errors when reading bad files
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -539,11 +539,11 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
        for feeder in self.feeders:
            for item in feeder:
                with logger.contextualize(url=item.get_url(), trace=random_str(12)):
-                    logger.info("started processing")
+                    logger.info("Started processing")
                    yield self.feed_item(item)
                    url_count += 1
-        logger.info(f"processed {url_count} URL(s)")
+        logger.info(f"Processed {url_count} URL(s)")
        self.cleanup()
    def feed_item(self, item: Metadata) -> Metadata:
@@ -561,7 +561,7 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
            return self.archive(item)
        except KeyboardInterrupt:
            # catches keyboard interruptions to do a clean exit
-            logger.warning("caught interrupt")
+            logger.warning("Caught interrupt")
            for d in self.databases:
                d.aborted(item)
            self.cleanup()
@@ -620,25 +620,25 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
                try:
                    d.done(cached_result, cached=True)
                except Exception as e:
-                    logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
+                    logger.error(f"Database {d.name}: {e}: {traceback.format_exc()}")
            return cached_result
        # 3 - call extractors until one succeeds
        for a in self.extractors:
-            logger.info(f"trying extractor {a.name}")
+            logger.info(f"Trying extractor {a.name}")
            try:
                result.merge(a.download(result))
                if result.is_success():
                    break
            except Exception as e:
-                logger.error(f"archiver {a.name}: {e}: {traceback.format_exc()}")
+                logger.error(f"Extractor {a.name}: {e}: {traceback.format_exc()}")
        # 4 - call enrichers to work with archived content
        for e in self.enrichers:
            try:
                e.enrich(result)
            except Exception as exc:
-                logger.error(f"enricher {e.name}: {exc}: {traceback.format_exc()}")
+                logger.error(f"Enricher {e.name}: {exc}: {traceback.format_exc()}")
        # 5 - store all downloaded/generated media
        result.store(storages=self.storages)
@@ -657,7 +657,7 @@ Here's how that would look: \n\nsteps:\n  extractors:\n  - [your_extractor_name_
            try:
                d.done(result)
            except Exception as e:
-                logger.error(f"database {d.name}: {e}: {traceback.format_exc()}")
+                logger.error(f"Database {d.name}: {e}: {traceback.format_exc()}")
        return result
--- a/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/antibot_extractor_enricher.py
@@ -57,7 +57,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
                    continue  # Skip imported modules/classes/functions
                if isinstance(obj, type) and issubclass(obj, Dropin):
                    dropins.append(obj)
-        logger.debug(f"loaded drop-in classes: {', '.join([d.__name__ for d in dropins])}")
+        logger.debug(f"Loaded drop-in classes: {', '.join([d.__name__ for d in dropins])}")
        return dropins
    def sanitize_url(self, url: str) -> str:
@@ -86,10 +86,10 @@ class AntibotExtractorEnricher(Extractor, Enricher):
        try:
            with SB(uc=True, agent=self.agent, headed=None, user_data_dir=using_user_data_dir, proxy=self.proxy) as sb:
-                logger.info(f"selenium browser is up with agent {self.agent}, opening url...")
+                logger.info(f"Selenium browser is up with agent {self.agent}, opening url...")
                sb.uc_open_with_reconnect(url, 4)
-                logger.debug("handling CAPTCHAs for...")
+                logger.debug("Handling CAPTCHAs for...")
                sb.uc_gui_handle_cf()
                sb.uc_gui_click_rc()  # NB: using handle instead of click breaks some sites like reddit, for now we separate here but can have dropins deciding this in the future
@@ -97,7 +97,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
                dropin.open_page(url)
                if self.detect_auth_wall and self._hit_auth_wall(sb):
-                    logger.warning("skipping since auth wall or CAPTCHA was detected")
+                    logger.warning("Skipping since auth wall or CAPTCHA was detected")
                    return False
                sb.wait_for_ready_state_complete()
@@ -124,18 +124,18 @@ class AntibotExtractorEnricher(Extractor, Enricher):
                    js_css_selector=dropin.js_for_video_css_selectors(),
                    max_media=self.max_download_videos - downloaded_videos,
                )
-                logger.info("completed")
+                logger.info("Completed")
            return to_enrich
        except selenium.common.exceptions.SessionNotCreatedException as e:
            if custom_data_dir:  # the retry logic only works once
                logger.error(
-                    f"session not created error: {e}. Please remove the user_data_dir {self.user_data_dir} and try again, will retry without user data dir though."
+                    f"Session not created error: {e}. Please remove the user_data_dir {self.user_data_dir} and try again, will retry without user data dir though."
                )
                return self.enrich(to_enrich, custom_data_dir=False)
            raise e  # re-raise
        except Exception as e:
-            logger.error(f"runtime error: {e}: {traceback.format_exc()}")
+            logger.error(f"Runtime error: {e}: {traceback.format_exc()}")
            return False
    def _get_suitable_dropin(self, url: str, sb: SB):
@@ -145,7 +145,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
        """
        for dropin in self.dropins:
            if dropin.suitable(url):
-                logger.debug(f"using drop-in {dropin.__name__}")
+                logger.debug(f"Using drop-in {dropin.__name__}")
                return dropin(sb, self)
        return DefaultDropin(sb, self)
@@ -240,7 +240,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
        x = max(sb.execute_script("return document.documentElement.scrollWidth"), w)
        y = min(max(sb.execute_script("return document.documentElement.scrollHeight"), h), 25_000)
-        logger.debug(f"setting window size to {x}x{y} for full page screenshot.")
+        logger.debug(f"Setting window size to {x}x{y} for full page screenshot.")
        sb.set_window_size(x, y)
        screen_filename = os.path.join(self.tmp_dir, f"screenshot{random_str(6)}.png")
@@ -279,7 +279,7 @@ class AntibotExtractorEnricher(Extractor, Enricher):
        # js_for_css_selectors
        for src in sources:
            if len(all_urls) >= max_media:
-                logger.debug(f"reached max download limit of {max_media} images/videos.")
+                logger.debug(f"Reached max download limit of {max_media} images/videos.")
                break
            if not is_relevant_url(src):
                continue
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropin.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropin.py
@@ -144,7 +144,7 @@ class Dropin:
        with yt_dlp.YoutubeDL(validated_options) as ydl:
            for url in video_urls:
                try:
-                    logger.debug("downloading video from url")
+                    logger.debug(f"Downloading video from url: {url}")
                    info = ydl.extract_info(url, download=True)
                    filename = ydl_entry_to_filename(ydl, info)
                    if not filename:  # Failed to download video.
@@ -156,5 +156,5 @@ class Dropin:
                    to_enrich.add_media(media)
                    downloaded += 1
                except Exception as e:
-                    logger.error(f"download failed: {e} {traceback.format_exc()}")
+                    logger.error(f"Download failed: {e} {traceback.format_exc()}")
        return downloaded
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/linkedin.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/linkedin.py
@@ -62,7 +62,7 @@ class LinkedinDropin(Dropin):
            self.sb.wait_for_ready_state_complete()
        username, password = self._get_username_password("linkedin.com")
-        logger.debug("logging in to Linkedin with username: {}", username)
+        logger.debug("Logging in to Linkedin with username: {}", username)
        self.sb.type("#username", username)
        self.sb.type("#password", password)
        self.sb.click_if_visible("#password-visibility-toggle", timeout=0.5)
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/reddit.py
@@ -50,7 +50,7 @@ class RedditDropin(Dropin):
        self._close_cookies_banner()
        username, password = self._get_username_password("reddit.com")
-        logger.debug("logging in to Reddit with username: {}", username)
+        logger.debug("Logging in to Reddit with username: {}", username)
        self.sb.type("#login-username", username)
        self.sb.type("#login-password", password)
@@ -68,7 +68,7 @@ class RedditDropin(Dropin):
            self.sb.click_link_text("Log in")
            self.sb.wait_for_ready_state_complete()
            if self.sb.is_text_visible("Welcome back"):
-                logger.debug("login successful")
+                logger.debug("Login successful")
                self.sb.click_if_visible("this link")
    def _close_cookies_banner(self):
@@ -88,5 +88,5 @@ class RedditDropin(Dropin):
            .map(el => el.src || el.href)
            .filter(url => url && /\.(m3u8|mpd|ism)$/.test(url));
        """)
-        logger.debug("found {} video URLs", len(filtered_urls))
+        logger.debug("Found {} video URLs", len(filtered_urls))
        return 0, self._download_videos_with_ytdlp(filtered_urls, to_enrich)
--- a/src/auto_archiver/modules/antibot_extractor_enricher/dropins/vk.py
+++ b/src/auto_archiver/modules/antibot_extractor_enricher/dropins/vk.py
@@ -57,12 +57,12 @@ class VkDropin(Dropin):
        self.sb.open("https://vk.com")
        self.sb.wait_for_ready_state_complete()
        if "/feed" in self.sb.get_current_url():
-            logger.debug("already logged in to VK.")
+            logger.debug("Already logged in to VK.")
            return True
        # need to login
        username, password = self._get_username_password("vk.com")
-        logger.debug("logging in to VK with username: {}", username)
+        logger.debug("Logging in to VK with username: {}", username)
        self.sb.click('[data-testid="enter-another-way"]', timeout=10)
        self.sb.clear('input[name="login"][type="tel"]', by="css selector", timeout=10)
--- a/src/auto_archiver/modules/api_db/api_db.py
+++ b/src/auto_archiver/modules/api_db/api_db.py
@@ -36,9 +36,9 @@ class AAApiDb(Database):
        if not self.store_results:
            return
        if cached:
-            logger.debug("skipping saving archive to AA API because it was cached")
+            logger.debug("Skipping saving archive to AA API because it was cached")
            return
-        logger.debug("saving archive to the AA API.")
+        logger.debug("Saving archive to the AA API.")
        payload = {
            "author_id": self.author_id,
--- a/src/auto_archiver/modules/atlos_feeder_db_storage/atlos_feeder_db_storage.py
+++ b/src/auto_archiver/modules/atlos_feeder_db_storage/atlos_feeder_db_storage.py
@@ -72,7 +72,7 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
            f"/api/v2/source_material/metadata/{atlos_id}/auto_archiver",
            json={"metadata": {"processed": True, "status": "error", "error": reason}},
        )
-        logger.info(f"stored failure ID {atlos_id} on Atlos: {reason}")
+        logger.info(f"Stored failure ID {atlos_id} on Atlos: {reason}")
    def fetch(self, item: Metadata) -> Union[Metadata, bool]:
        """check and fetch if the given item has been archived already, each
@@ -88,7 +88,7 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
        """Mark an item as successfully archived in Atlos."""
        atlos_id = item.metadata.get("atlos_id")
        if not atlos_id:
-            logger.info("item has no Atlos ID, skipping")
+            logger.info("Item has no Atlos ID, skipping")
            return
        self._post(
            f"/api/v2/source_material/metadata/{atlos_id}/auto_archiver",
@@ -100,7 +100,7 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
                }
            },
        )
-        logger.info(f"stored success  ID {atlos_id} on Atlos")
+        logger.info(f"Stored success ID {atlos_id} on Atlos")
    # ! Atlos Module - Storage Methods
@@ -111,12 +111,12 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
    def upload(self, media: Media, metadata: Optional[Metadata] = None, **_kwargs) -> bool:
        """Upload a media file to Atlos if it has not been uploaded already."""
        if metadata is None:
-            logger.error(f"no metadata provided for {media.filename}")
+            logger.error(f"No metadata provided for {media.filename}")
            return False
        atlos_id = metadata.get("atlos_id")
        if not atlos_id:
-            logger.error(f"no Atlos ID found in metadata; can't store {media.filename} in Atlos.")
+            logger.error(f"No Atlos ID found in metadata; can't store {media.filename} in Atlos.")
            return False
        media_hash = calculate_file_hash(media.filename, hash_algo=hashlib.sha256, chunksize=4096)
@@ -135,7 +135,7 @@ class AtlosFeederDbStorage(Feeder, Database, Storage):
                params={"title": media.properties},
                files={"file": (os.path.basename(media.filename), file_obj)},
            )
-        logger.info(f"uploaded {media.filename} to Atlos with ID {atlos_id} and title {media.key}")
+        logger.info(f"Uploaded {media.filename} to Atlos with ID {atlos_id} and title {media.key}")
        return True
    def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool:
--- a/src/auto_archiver/modules/csv_feeder/csv_feeder.py
+++ b/src/auto_archiver/modules/csv_feeder/csv_feeder.py
@@ -20,19 +20,19 @@ class CSVFeeder(Feeder):
                        url_column = first_row.index(url_column)
                    except ValueError:
                        logger.error(
-                            f"column {url_column} not found in header row: {first_row}. Did you set the 'column' config correctly?"
+                            f"Column {url_column} not found in header row: {first_row}. Did you set the 'column' config correctly?"
                        )
                        return
                elif not (url_or_none(first_row[url_column])):
                    # it's a header row, but we've been given a column number already
-                    logger.debug(f"skipping header row: {first_row}")
+                    logger.debug(f"Skipping header row: {first_row}")
                else:
                    # first row isn't a header row, rewind the file
                    f.seek(0)
                for row in reader:
                    if not url_or_none(row[url_column]):
-                        logger.warning(f"not a valid URL in row: {row}, skipping")
+                        logger.warning(f"Not a valid URL in row: {row}, skipping")
                        continue
                    url = row[url_column]
                    yield Metadata().set_url(url)
--- a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
+++ b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
@@ -23,10 +23,10 @@ class GDriveStorage(Storage):
    def _setup_google_drive_service(self):
        """Initialize Google Drive service based on provided credentials."""
        if self.oauth_token:
-            logger.debug(f"using Google Drive OAuth token: {self.oauth_token}")
+            logger.debug(f"Using Google Drive OAuth token: {self.oauth_token}")
            self.service = self._initialize_with_oauth_token()
        elif self.service_account:
-            logger.debug(f"using Google Drive service account: {self.service_account}")
+            logger.debug(f"Using Google Drive service account: {self.service_account}")
            self.service = self._initialize_with_service_account()
        else:
            raise ValueError("Missing credentials: either `oauth_token` or `service_account` must be provided.")
@@ -41,7 +41,7 @@ class GDriveStorage(Storage):
        if not creds.valid and creds.expired and creds.refresh_token:
            creds.refresh(Request())
            with open(self.oauth_token, "w") as token_file:
-                logger.debug("saving refreshed OAuth token.")
+                logger.debug("Saving refreshed OAuth token.")
                token_file.write(creds.to_json())
        elif not creds.valid:
            raise ValueError("Invalid OAuth token. Please regenerate the token.")
@@ -62,7 +62,7 @@ class GDriveStorage(Storage):
        parent_id, folder_id = self.root_folder_id, None
        path_parts = media.key.split(os.path.sep)
        filename = path_parts[-1]
-        logger.info(f"looking for folders for {path_parts[0:-1]} before getting url for {filename=}")
+        logger.info(f"Looking for folders for {path_parts[0:-1]} before getting url for {filename=}")
        for folder in path_parts[0:-1]:
            folder_id = self._get_id_from_parent_and_name(parent_id, folder, use_mime_type=True, raise_on_missing=True)
            parent_id = folder_id
@@ -70,7 +70,7 @@ class GDriveStorage(Storage):
        file_id = self._get_id_from_parent_and_name(folder_id, filename, raise_on_missing=True)
        if not file_id:
            #
-            logger.info(f"file {filename} not found in folder {folder_id}")
+            logger.info(f"File {filename} not found in folder {folder_id}")
            return None
        return f"https://drive.google.com/file/d/{file_id}/view?usp=sharing"
@@ -83,7 +83,7 @@ class GDriveStorage(Storage):
        parent_id, upload_to = self.root_folder_id, None
        path_parts = media.key.split(os.path.sep)
        filename = path_parts[-1]
-        logger.info(f"checking folders {path_parts[0:-1]} exist (or creating) before uploading {filename=}")
+        logger.info(f"Checking folders {path_parts[0:-1]} exist (or creating) before uploading {filename=}")
        for folder in path_parts[0:-1]:
            upload_to = self._get_id_from_parent_and_name(parent_id, folder, use_mime_type=True, raise_on_missing=False)
            if upload_to is None:
@@ -91,7 +91,7 @@ class GDriveStorage(Storage):
            parent_id = upload_to
        # upload file to gd
-        logger.debug(f"uploading {filename=} to folder id {upload_to}")
+        logger.debug(f"Uploading {filename=} to folder id {upload_to}")
        file_metadata = {"name": [filename], "parents": [upload_to]}
        try:
            media = MediaFileUpload(media.filename, resumable=True)
@@ -100,11 +100,11 @@ class GDriveStorage(Storage):
                .create(supportsAllDrives=True, body=file_metadata, media_body=media, fields="id")
                .execute()
            )
-            logger.debug(f"uploadf: uploaded file {gd_file['id']} successfully in folder={upload_to}")
+            logger.debug(f"Uploadf: uploaded file {gd_file['id']} successfully in folder={upload_to}")
        except FileNotFoundError as e:
-            logger.error(f"gd uploadf: file not found {media.filename=} - {e}")
+            logger.error(f"GD uploadf: file not found {media.filename=} - {e}")
        except Exception as e:
-            logger.error(f"gd uploadf: error uploading {media.filename=} to {upload_to} - {e}")
+            logger.error(f"GD uploadf: error uploading {media.filename=} to {upload_to} - {e}")
    # must be implemented even if unused
    def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool:
@@ -133,7 +133,7 @@ class GDriveStorage(Storage):
            self.api_cache = getattr(self, "api_cache", {})
            cache_key = f"{parent_id}_{name}_{use_mime_type}"
            if cache_key in self.api_cache:
-                logger.debug(f"cache hit for {cache_key=}")
+                logger.debug(f"Cache hit for {cache_key=}")
                return self.api_cache[cache_key]
        # API logic
@@ -168,7 +168,7 @@ class GDriveStorage(Storage):
            else:
                logger.debug(f"{debug_header} not found, attempt {attempt + 1}/{retries}.")
                if attempt < retries - 1:
-                    logger.debug(f"sleeping for {sleep_seconds} second(s)")
+                    logger.debug(f"Sleeping for {sleep_seconds} second(s)")
                    time.sleep(sleep_seconds)
        if raise_on_missing:
@@ -180,7 +180,7 @@ class GDriveStorage(Storage):
        Creates a new GDrive folder @name inside folder @parent_id
        Returns id of the created folder
        """
-        logger.debug(f"creating new folder with {name=} inside {parent_id=}")
+        logger.debug(f"Creating new folder with {name=} inside {parent_id=}")
        file_metadata = {"name": [name], "mimeType": "application/vnd.google-apps.folder", "parents": [parent_id]}
        gd_folder = self.service.files().create(supportsAllDrives=True, body=file_metadata, fields="id").execute()
        return gd_folder.get("id")
--- a/src/auto_archiver/modules/generic_extractor/bluesky.py
+++ b/src/auto_archiver/modules/generic_extractor/bluesky.py
@@ -18,7 +18,7 @@ class Bluesky(GenericDropin):
        # download if embeds present (1 video XOR >=1 images)
        for media in self._download_bsky_embeds(post, archiver):
            result.add_media(media)
-        logger.debug(f"downloaded {len(result.media)} media files")
+        logger.debug(f"Downloaded {len(result.media)} media files")
        return result
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -67,7 +67,7 @@ class GenericExtractor(Extractor):
                os.execv(sys.executable, [sys.executable] + sys.argv)
    def update_package(self, package_name: str) -> bool:
-        logger.info(f"checking and updating {package_name}...")
+        logger.info(f"Checking and updating {package_name}...")
        from importlib.metadata import version as get_version
        old_version = get_version(package_name)
@@ -79,7 +79,7 @@ class GenericExtractor(Extractor):
                return True
            logger.info(f"{package_name} already up to date")
        except Exception as e:
-            logger.error(f"failed to update {package_name}: {e}")
+            logger.error(f"Failed to update {package_name}: {e}")
        return False
    def setup_po_tokens(self) -> None:
@@ -110,7 +110,7 @@ class GenericExtractor(Extractor):
        missing_tools = [tool for tool in ("node", "yarn", "npx") if shutil.which(tool) is None]
        if missing_tools:
            logger.error(
-                f"cannot set up PO Token script; missing required tools: {', '.join(missing_tools)}. "
+                f"Cannot set up PO Token script; missing required tools: {', '.join(missing_tools)}. "
                "Install these tools or run bgutils via Docker. "
                "See: https://github.com/Brainicism/bgutil-ytdlp-pot-provider"
            )
@@ -139,7 +139,7 @@ class GenericExtractor(Extractor):
                    f"https://github.com/Brainicism/bgutil-ytdlp-pot-provider/archive/refs/tags/{plugin_version}.zip"
                )
                zip_path = os.path.join(base_dir, f"{plugin_version}.zip")
-                logger.info(f"downloading bgutils release zip for version {plugin_version}...")
+                logger.info(f"Downloading bgutils release zip for version {plugin_version}...")
                urlretrieve(zip_url, zip_path)
                with zipfile.ZipFile(zip_path, "r") as z:
                    z.extractall(base_dir)
@@ -148,7 +148,7 @@ class GenericExtractor(Extractor):
                extracted_root = os.path.join(base_dir, f"bgutil-ytdlp-pot-provider-{plugin_version}")
                shutil.move(os.path.join(extracted_root, "server"), server_dir)
                shutil.rmtree(extracted_root)
-                logger.info("installing dependencies and transpiling PoT Generator script...")
+                logger.info("Installing dependencies and transpiling PoT Generator script...")
                subprocess.run(["yarn", "install", "--frozen-lockfile"], cwd=server_dir, check=True)
                subprocess.run(["npx", "tsc"], cwd=server_dir, check=True)
@@ -164,7 +164,7 @@ class GenericExtractor(Extractor):
            logger.info(f"PO Token script configured at: {script_path}")
        except Exception as e:
-            logger.error(f"failed to set up PO Token script: {e}")
+            logger.error(f"Failed to set up PO Token script: {e}")
    def suitable_extractors(self, url: str) -> Generator[str, None, None]:
        """
@@ -205,7 +205,7 @@ class GenericExtractor(Extractor):
                media = Media(cover_image_path)
                metadata.add_media(media, id="cover")
            except Exception as e:
-                logger.error(f"could not download cover image {thumbnail_url}: {e}")
+                logger.error(f"Could not download cover image {thumbnail_url}: {e}")
        dropin = self.dropin_for_name(info_extractor.ie_key())
        if dropin:
@@ -352,7 +352,7 @@ class GenericExtractor(Extractor):
        if not dropin:
            # TODO: add a proper link to 'how to create your own dropin'
-            logger.debug(f"""could not find valid dropin for {info_extractor.ie_key()}.
+            logger.debug(f"""Could not find valid dropin for {info_extractor.ie_key()}.
                     Why not try creating your own, and make sure it has a valid function called 'create_metadata'. Learn more: https://auto-archiver.readthedocs.io/en/latest/user_guidelines.html#""")
            return False
@@ -374,7 +374,7 @@ class GenericExtractor(Extractor):
        if "entries" in data:
            entries = data.get("entries", [])
            if not len(entries):
-                logger.info("YoutubeDLArchiver could not find any video")
+                logger.info("GenericExtractor could not find any video")
                return False
        else:
            entries = [data]
@@ -388,7 +388,7 @@ class GenericExtractor(Extractor):
                    # file was not downloaded or could not be retrieved, example: sensitive videos on YT without using cookies.
                    continue
-                logger.debug(f"using filename {filename} for entry {entry.get('id', 'unknown')}")
+                logger.debug(f"Using filename {filename} for entry {entry.get('id', 'unknown')}")
                new_media = Media(filename)
                for x in ["duration", "original_url", "fulltitle", "description", "upload_date"]:
@@ -403,12 +403,12 @@ class GenericExtractor(Extractor):
                            text = " ".join([line.text for line in subs])
                            new_media.set(f"subtitles_{lang}", text)
                        except Exception as e:
-                            logger.error(f"error loading subtitle file {val.get('filepath')}: {e}")
+                            logger.error(f"Error loading subtitle file {val.get('filepath')}: {e}")
                result.add_media(new_media)
            except Exception as e:
-                logger.error(f"error processing entry {entry}: {e}")
+                logger.error(f"Error processing entry {entry}: {e}")
        if not len(result.media):
-            logger.info(f"no media found for entry {entry}, skipping.")
+            logger.info(f"No media found for entry {entry}, skipping.")
            return False
        return self.add_metadata(data, info_extractor, url, result)
@@ -470,14 +470,14 @@ class GenericExtractor(Extractor):
        def _helper_for_successful_extract_info(data, info_extractor, url, ydl):
            if data.get("is_live", False) and not self.livestreams:
-                logger.warning("livestream detected, skipping due to 'livestreams' configuration setting")
+                logger.warning("Livestream detected, skipping due to 'livestreams' configuration setting")
                return False
            # it's a valid video, that the youtubdedl can download out of the box
            return self.get_metadata_for_video(data, info_extractor, url, ydl)
        try:
            if dropin_submodule and dropin_submodule.skip_ytdlp_download(url, info_extractor):
-                logger.debug(f"skipping using ytdlp to download files for {info_extractor.ie_key()}")
+                logger.debug(f"Skipping using ytdlp to download files for {info_extractor.ie_key()}")
                raise SkipYtdlp()
            # don't download since it can be a live stream
@@ -496,17 +496,17 @@ class GenericExtractor(Extractor):
            if not isinstance(e, SkipYtdlp):
                logger.debug(
-                    f'issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use dropin to get post data instead'
+                    f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use dropin to get post data instead'
                )
            try:
                result = self.get_metadata_for_post(info_extractor, url, ydl)
            except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e:
-                logger.error("error downloading metadata for post: {error}", error=str(post_e))
+                logger.error("Error downloading metadata for post: {error}", error=str(post_e))
                return False
            except Exception as generic_e:
                logger.debug(
-                    'attempt to extract using ytdlp extractor "{name}" failed:  \n  {error}',
+                    'Attempt to extract using ytdlp extractor "{name}" failed:  \n  {error}',
                    name=info_extractor.IE_NAME,
                    error=str(generic_e),
                    exc_info=True,
@@ -559,17 +559,17 @@ class GenericExtractor(Extractor):
        # order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file
        if auth:
            if "username" in auth and "password" in auth:
-                logger.debug("using provided auth username and password")
+                logger.debug("Using provided auth username and password")
                ydl_options.extend(("--username", auth["username"]))
                ydl_options.extend(("--password", auth["password"]))
            elif "cookie" in auth:
-                logger.debug("using provided auth cookie")
+                logger.debug("Using provided auth cookie")
                yt_dlp.utils.std_headers["cookie"] = auth["cookie"]
            elif "cookies_from_browser" in auth:
-                logger.debug(f"using extracted cookies from browser {auth['cookies_from_browser']}")
+                logger.debug(f"Using extracted cookies from browser {auth['cookies_from_browser']}")
                ydl_options.extend(("--cookies-from-browser", auth["cookies_from_browser"]))
            elif "cookies_file" in auth:
-                logger.debug(f"using cookies from file {auth['cookies_file']}")
+                logger.debug(f"Using cookies from file {auth['cookies_file']}")
                ydl_options.extend(("--cookies", auth["cookies_file"]))
        # Applying user-defined extractor_args
@@ -579,11 +579,11 @@ class GenericExtractor(Extractor):
                    arg_str = ";".join(f"{k}={v}" for k, v in args.items())
                else:
                    arg_str = str(args)
-                logger.debug(f"setting extractor_args: {key}:{arg_str}")
+                logger.debug(f"Setting extractor_args: {key}:{arg_str}")
                ydl_options.extend(["--extractor-args", f"{key}:{arg_str}"])
        if self.ytdlp_args:
-            logger.debug(f"adding additional ytdlp arguments: {self.ytdlp_args}")
+            logger.debug(f"Adding additional ytdlp arguments: {self.ytdlp_args}")
            ydl_options += self.ytdlp_args.split(" ")
        *_, validated_options = yt_dlp.parse_options(ydl_options)
--- a/src/auto_archiver/modules/generic_extractor/tiktok.py
+++ b/src/auto_archiver/modules/generic_extractor/tiktok.py
@@ -22,7 +22,7 @@ class Tiktok(GenericDropin):
        return any(extractor().suitable(url) for extractor in (TikTokIE, TikTokLiveIE, TikTokVMIE, TikTokUserIE))
    def extract_post(self, url: str, ie_instance):
-        logger.debug(f"using Tikwm API to attempt to download tiktok video from {url=}")
+        logger.debug("Using Tikwm API to attempt to download tiktok video")
        endpoint = self.TIKWM_ENDPOINT.format(url=url)
@@ -62,7 +62,7 @@ class Tiktok(GenericDropin):
        # get the video or fail
        video_downloaded = archiver.download_from_url(video_url, f"vid_{post.get('id', '')}")
        if not video_downloaded:
-            logger.error(f"failed to download video from {video_url}")
+            logger.error("Failed to download video")
            return False
        video_media = Media(video_downloaded)
        if duration := post.get("duration", None):
--- a/src/auto_archiver/modules/generic_extractor/twitter.py
+++ b/src/auto_archiver/modules/generic_extractor/twitter.py
@@ -40,7 +40,7 @@ class Twitter(GenericDropin):
                raise ValueError("Error retreiving post. Are you sure it exists?")
            timestamp = get_datetime_from_str(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
        except (ValueError, KeyError) as ex:
-            logger.warning(f"unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
+            logger.warning(f"Unable to parse tweet: {str(ex)}\nRetreived tweet data: {tweet}")
            return False
        full_text = tweet.pop("full_text", "")
@@ -49,7 +49,7 @@ class Twitter(GenericDropin):
        result.set_title(f"{author} - {full_text}").set_content(full_text).set_timestamp(timestamp)
        if not tweet.get("entities", {}).get("media"):
-            logger.debug("no media found, archiving tweet text only")
+            logger.debug("No media found, archiving tweet text only")
            result.status = "twitter-ytdl"
            return result
        for i, tw_media in enumerate(tweet["entities"]["media"]):
--- a/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py
+++ b/src/auto_archiver/modules/gsheet_feeder_db/gsheet_feeder_db.py
@@ -42,19 +42,19 @@ class GsheetsFeederDB(Feeder, Database):
        sh = self.open_sheet()
        for ii, worksheet in enumerate(sh.worksheets()):
            if not self.should_process_sheet(worksheet.title):
-                logger.debug(f"skipped worksheet '{worksheet.title}' due to allow/block rules")
+                logger.debug(f"Skipped worksheet '{worksheet.title}' due to allow/block rules")
                continue
-            logger.info(f"opening worksheet {ii=}: {worksheet.title=} header={self.header}")
+            logger.info(f"Opening worksheet {ii=}: {worksheet.title=} header={self.header}")
            gw = GWorksheet(worksheet, header_row=self.header, columns=self.columns)
            if len(missing_cols := self.missing_required_columns(gw)):
                logger.debug(
-                    f"skipped worksheet '{worksheet.title}' due to missing required column(s) for {missing_cols}"
+                    f"Skipped worksheet '{worksheet.title}' due to missing required column(s) for {missing_cols}"
                )
                continue
            with logger.contextualize(worksheet=f"{sh.title}:{worksheet.title}"):
                # process and yield metadata here:
                yield from self._process_rows(gw)
-            logger.info(f"finished worksheet {worksheet.title}")
+            logger.info(f"Finished worksheet {worksheet.title}")
    def _process_rows(self, gw: GWorksheet):
        for row in range(1 + self.header, gw.count_rows() + 1):
@@ -133,7 +133,7 @@ class GsheetsFeederDB(Feeder, Database):
                if val and gw.col_exists(col) and gw.get_cell(row_values, col) == "":
                    cell_updates.append((row, col, final_value))
            except Exception as e:
-                logger.error(f"unable to batch {col}={final_value} due to {e}")
+                logger.error(f"Unable to batch {col}={final_value} due to {e}")
        status_message = item.status
        if cached:
@@ -193,13 +193,13 @@ class GsheetsFeederDB(Feeder, Database):
            gw, row = self._retrieve_gsheet(item)
            gw.set_cell(row, "status", new_status)
        except Exception as e:
-            logger.debug(f"unable to update sheet: {e}: {traceback.format_exc()}")
+            logger.debug(f"Unable to update sheet: {e}: {traceback.format_exc()}")
    def _retrieve_gsheet(self, item: Metadata) -> Tuple[GWorksheet, int]:
        if gsheet := item.get_context("gsheet"):
            gw: GWorksheet = gsheet.get("worksheet")
            row: int = gsheet.get("row")
        elif self.sheet_id:
-            logger.error("unable to retrieve Gsheet, GsheetDB must be used alongside GsheetFeeder.")
+            logger.error("Unable to retrieve Gsheet, GsheetDB must be used alongside GsheetFeeder.")
        return gw, row
--- a/src/auto_archiver/modules/hash_enricher/hash_enricher.py
+++ b/src/auto_archiver/modules/hash_enricher/hash_enricher.py
@@ -22,7 +22,7 @@ class HashEnricher(Enricher):
    """
    def enrich(self, to_enrich: Metadata) -> None:
-        logger.debug(f"calculating media hashes with algo={self.algorithm}")
+        logger.debug(f"Calculating media hashes with algo={self.algorithm}")
        for i, m in enumerate(to_enrich.media):
            if len(hd := self.calculate_hash(m.filename)):
--- a/src/auto_archiver/modules/html_formatter/html_formatter.py
+++ b/src/auto_archiver/modules/html_formatter/html_formatter.py
@@ -35,7 +35,7 @@ class HtmlFormatter(Formatter):
    def format(self, item: Metadata) -> Media:
        url = item.get_url()
        if item.is_empty():
-            logger.debug("nothing to format, skipping")
+            logger.debug("Nothing to format, skipping")
            return
        content = self.template.render(
--- a/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
+++ b/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
@@ -49,7 +49,7 @@ class InstagramAPIExtractor(Extractor):
        if not len(insta_matches) or len(insta_matches[0]) != 3:
            return
        if len(insta_matches) > 1:
-            logger.debug("multiple instagram matches found, using the first one")
+            logger.debug("Multiple instagram matches found, using the first one")
            return
        g1, g2, g3 = insta_matches[0][0], insta_matches[0][1], insta_matches[0][2]
        if g1 == "":
@@ -65,13 +65,13 @@ class InstagramAPIExtractor(Extractor):
                return self.download_post(item, id=g3, context="story")
            return self.download_stories(item, g2)
        else:
-            logger.warning(f"unknown instagram regex group match {g1=}")
+            logger.warning(f"Unknown instagram regex group match {g1=}")
            return
    @retry(wait_random_min=1000, wait_random_max=3000, stop_max_attempt_number=5)
    def call_api(self, path: str, params: dict) -> dict:
        headers = {"accept": "application/json", "x-access-key": self.access_token}
-        logger.debug(f"calling {self.api_endpoint}/{path} with {params=}")
+        logger.debug(f"Calling {self.api_endpoint}/{path} with {params=}")
        return requests.get(f"{self.api_endpoint}/{path}", headers=headers, params=params).json()
    def cleanup_dict(self, d: dict | list) -> dict:
@@ -112,8 +112,8 @@ class InstagramAPIExtractor(Extractor):
                count_posts += len(stories)
                result.set("#stories", len(stories))
            except Exception as e:
-                result.append("errors", f"error downloading stories for {username}")
+                result.append("errors", f"Error downloading stories for {username}")
-                logger.error(f"error downloading stories for {username}: {e} {traceback.format_exc()}")
+                logger.error(f"Error downloading stories for {username}: {e} {traceback.format_exc()}")
            # download all posts
            try:
@@ -122,8 +122,8 @@ class InstagramAPIExtractor(Extractor):
                        result, user_id, max_to_download=self.full_profile_max_posts - count_posts
                    )
            except Exception as e:
-                result.append("errors", f"error downloading posts for {username}")
+                result.append("errors", f"Error downloading posts for {username}")
-                logger.error(f"error downloading posts for {username}: {e} {traceback.format_exc()}")
+                logger.error(f"Error downloading posts for {username}: {e} {traceback.format_exc()}")
            # download all tagged
            try:
@@ -132,8 +132,8 @@ class InstagramAPIExtractor(Extractor):
                        result, user_id, max_to_download=self.full_profile_max_posts - count_posts
                    )
            except Exception as e:
-                result.append("errors", f"error downloading tagged posts for {username}")
+                result.append("errors", f"Error downloading tagged posts for {username}")
-                logger.error(f"error downloading tagged posts for {username}: {e} {traceback.format_exc()}")
+                logger.error(f"Error downloading tagged posts for {username}: {e} {traceback.format_exc()}")
            # download all highlights
            try:
@@ -159,10 +159,10 @@ class InstagramAPIExtractor(Extractor):
            except Exception as e:
                result.append(
                    "errors",
-                    f"error downloading highlight id{h.get('pk')} for {username}",
+                    f"Error downloading highlight id{h.get('pk')} for {username}",
                )
                logger.error(
-                    f"error downloading highlight id{h.get('pk')} for {username}: {e} {traceback.format_exc()}"
+                    f"Error downloading highlight id{h.get('pk')} for {username}: {e} {traceback.format_exc()}"
                )
            if count_highlights >= max_to_download:
                logger.debug(f"HIGHLIGHTS reached max_to_download={self.full_profile_max_posts}")
@@ -210,8 +210,8 @@ class InstagramAPIExtractor(Extractor):
            try:
                self.scrape_item(result, h, "highlight")
            except Exception as e:
-                result.append("errors", f"error downloading highlight {h.get('id')}")
+                result.append("errors", f"Error downloading highlight {h.get('id')}")
-                logger.error(f"error downloading highlight, skipping {h.get('id')}: {e} {traceback.format_exc()}")
+                logger.error(f"Error downloading highlight, skipping {h.get('id')}: {e} {traceback.format_exc()}")
        return h_info
@@ -248,13 +248,13 @@ class InstagramAPIExtractor(Extractor):
                break
            posts, end_cursor = posts[0], posts[1]
            posts = posts[: min(max_to_download, len(posts))]
-            logger.info(f"parsing {len(posts)} posts, next {end_cursor=} {post_count=} {max_to_download=}")
+            logger.info(f"Parsing {len(posts)} posts, next {end_cursor=} {post_count=} {max_to_download=}")
            for p in posts:
                try:
                    self.scrape_item(result, p, "post")
                except Exception as e:
-                    result.append("errors", f"error downloading post {p.get('id')}")
+                    result.append("errors", f"Error downloading post {p.get('id')}")
-                    logger.error(f"error downloading post, skipping {p.get('id')}: {e} {traceback.format_exc()}")
+                    logger.error(f"Error downloading post, skipping {p.get('id')}: {e} {traceback.format_exc()}")
                pbar.update(1)
                post_count += 1
            if post_count >= max_to_download:
@@ -275,14 +275,14 @@ class InstagramAPIExtractor(Extractor):
                break
            next_page_id = resp.get("next_page_id")
-            logger.info(f"parsing {len(posts)} tagged posts, next {next_page_id=}")
+            logger.info(f"Parsing {len(posts)} tagged posts, next {next_page_id=}")
            posts = posts[: min(max_to_download, len(posts))]
            for p in posts:
                try:
                    self.scrape_item(result, p, "tagged")
                except Exception as e:
-                    result.append("errors", f"error downloading tagged post {p.get('id')}")
+                    result.append("errors", f"Error downloading tagged post {p.get('id')}")
-                    logger.error(f"error downloading tagged post, skipping {p.get('id')}: {e} {traceback.format_exc()}")
+                    logger.error(f"Error downloading tagged post, skipping {p.get('id')}: {e} {traceback.format_exc()}")
                pbar.update(1)
                tagged_count += 1
            if tagged_count >= max_to_download:
--- a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
@@ -7,6 +7,7 @@ highlights, and tagged posts. Authentication is required via username/password o
 import re
 import os
 import shutil
 import traceback
 import instaloader
 from auto_archiver.utils.custom_logger import logger
@@ -44,11 +45,11 @@ class InstagramExtractor(Extractor):
            self.insta.load_session_from_file(self.username, self.session_file)
        except Exception:
            try:
-                logger.info("no valid session file found - Attempting login with use and password.")
+                logger.info("No valid session file found - Attempting login with username and password.")
                self.insta.login(self.username, self.password)
                self.insta.save_session_to_file(self.session_file)
            except Exception as e:
-                logger.error(f"failed to setup Instagram Extractor with Instagrapi. {e}")
+                logger.error(f"Failed to setup Instagram Extractor with Instagrapi. {e}")
    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()
@@ -72,7 +73,7 @@ class InstagramExtractor(Extractor):
                result = self.download_profile(url, profile_matches[0])
        except Exception as e:
            logger.error(
-                f"failed to download with instagram extractor due to: {e}, make sure your account credentials are valid."
+                f"Failed to download with instagram extractor due to: {e}, make sure your account credentials are valid."
            )
        finally:
            shutil.rmtree(self.download_folder, ignore_errors=True)
@@ -95,27 +96,27 @@ class InstagramExtractor(Extractor):
                try:
                    self.insta.download_post(post, target=f"profile_post_{post.owner_username}")
                except Exception as e:
-                    logger.error(f"failed to download post: {post.shortcode}: {e}")
+                    logger.error(f"Failed to download post: {post.shortcode}: {e} {traceback.format_exc()}")
        except Exception as e:
-            logger.error(f"failed profile.get_posts: {e}")
+            logger.error(f"Failed profile.get_posts: {e}: {traceback.format_exc()}")
        try:
            for post in profile.get_tagged_posts():
                try:
                    self.insta.download_post(post, target=f"tagged_post_{post.owner_username}")
                except Exception as e:
-                    logger.error(f"failed to download tagged post: {post.shortcode}: {e}")
+                    logger.error(f"Failed to download tagged post: {post.shortcode}: {e} {traceback.format_exc()}")
        except Exception as e:
-            logger.error(f"failed profile.get_tagged_posts: {e}")
+            logger.error(f"Failed profile.get_tagged_posts: {e} {traceback.format_exc()}")
        try:
            for post in profile.get_igtv_posts():
                try:
                    self.insta.download_post(post, target=f"igtv_post_{post.owner_username}")
                except Exception as e:
-                    logger.error(f"failed to download igtv post: {post.shortcode}: {e}")
+                    logger.error(f"Failed to download igtv post: {post.shortcode}: {e} {traceback.format_exc()}")
        except Exception as e:
-            logger.error(f"failed profile.get_igtv_posts: {e}")
+            logger.error(f"Failed profile.get_igtv_posts: {e} {traceback.format_exc()}")
        try:
            for story in self.insta.get_stories([profile.userid]):
@@ -123,9 +124,9 @@ class InstagramExtractor(Extractor):
                    try:
                        self.insta.download_storyitem(item, target=f"story_item_{story.owner_username}")
                    except Exception as e:
-                        logger.error(f"failed to download story item: {item}: {e}")
+                        logger.error(f"Failed to download story item: {item}: {e} {traceback.format_exc()}")
        except Exception as e:
-            logger.error(f"failed get_stories: {e}")
+            logger.error(f"Failed get_stories: {e} {traceback.format_exc()}")
        try:
            for highlight in self.insta.get_highlights(profile.userid):
@@ -133,9 +134,9 @@ class InstagramExtractor(Extractor):
                    try:
                        self.insta.download_storyitem(item, target=f"highlight_item_{highlight.owner_username}")
                    except Exception as e:
-                        logger.error(f"failed to download highlight item: {item}: {e}")
+                        logger.error(f"Failed to download highlight item: {item}: {e} {traceback.format_exc()}")
        except Exception as e:
-            logger.error(f"failed get_highlights: {e}")
+            logger.error(f"Failed get_highlights: {e} {traceback.format_exc()}")
        return self.process_downloads(url, f"@{username}", profile._asdict(), None)
@@ -158,4 +159,4 @@ class InstagramExtractor(Extractor):
            return result.success("instagram")
        except Exception as e:
-            logger.error(f"could not fetch instagram post due to: {e}")
+            logger.error(f"Could not fetch instagram post due to: {e} {traceback.format_exc()}")
--- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
@@ -32,7 +32,7 @@ class InstagramTbotExtractor(Extractor):
        1. makes a copy of session_file that is removed in cleanup
        2. checks if the session file is valid
        """
-        logger.info(f"SETUP {self.name} checking login...")
+        logger.debug(f"SETUP {self.name} checking login...")
        self._prepare_session_file()
        self._initialize_telegram_client()
@@ -58,10 +58,10 @@ class InstagramTbotExtractor(Extractor):
                "If you do, disable at least one of the archivers for the first-time setup of the telethon session: {e}"
            )
        with self.client.start():
-            logger.info(f"SETUP {self.name} login works.")
+            logger.debug(f"SETUP {self.name} login works.")
    def cleanup(self) -> None:
-        logger.info(f"CLEANUP {self.name}.")
+        logger.debug(f"CLEANUP {self.name}.")
        session_file_name = self.session_file + ".session"
        if os.path.exists(session_file_name):
            os.remove(session_file_name)
@@ -79,17 +79,17 @@ class InstagramTbotExtractor(Extractor):
            # This may be outdated and replaced by the below message, but keeping until confirmed
            if "You must enter a URL to a post" in message:
-                logger.debug(f"invalid link {url=} for {self.name}: {message}")
+                logger.debug(f"Invalid link for {self.name}: {message}")
                return False
            if "Media not found or unavailable" in message:
-                logger.debug(f"No media found for link {url=} for {self.name}: {message}")
+                logger.debug(f"No media found for {self.name}: {message}")
                return False
            if message:
                result.set_content(message).set_title(message[:128])
            elif result.is_empty():
-                logger.debug(f"No media found for link {url=} for {self.name}: {message}")
+                logger.debug(f"No media found for {self.name}: {message}")
                return False
            return result.success("insta-via-bot")
--- a/src/auto_archiver/modules/json_enricher/json_enricher.py
+++ b/src/auto_archiver/modules/json_enricher/json_enricher.py
@@ -8,9 +8,7 @@ from auto_archiver.core import Media, Metadata
 class JsonEnricher(Enricher):
    def enrich(self, to_enrich: Metadata) -> None:
-        url = to_enrich.get_url()
+        logger.debug("Enriching as JSON")
        logger.debug(f"JSON Enricher for {url=}")
        item_path = os.path.join(self.tmp_dir, "metadata.json")
        with open(item_path, mode="w", encoding="utf-8") as outf:
--- a/src/auto_archiver/modules/local_storage/local_storage.py
+++ b/src/auto_archiver/modules/local_storage/local_storage.py
@@ -38,8 +38,7 @@ class LocalStorage(Storage):
        os.makedirs(os.path.dirname(dest), exist_ok=True)
        logger.debug(f"[{self.__class__.__name__}] storing file {media.filename} with key {media.key} to {dest}")
-        res = shutil.copy2(media.filename, dest)
+        shutil.copy2(media.filename, dest)
        logger.info(res)
        return True
    # must be implemented even if unused
--- a/src/auto_archiver/modules/meta_enricher/meta_enricher.py
+++ b/src/auto_archiver/modules/meta_enricher/meta_enricher.py
@@ -12,20 +12,17 @@ class MetaEnricher(Enricher):
    """
    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
        if to_enrich.is_empty():
-            logger.debug(f"[SKIP] META_ENRICHER there is no media or metadata to enrich: {url=}")
+            logger.debug("[SKIP] META_ENRICHER there is no media or metadata to enrich")
            return
-        logger.debug(f"calculating archive metadata information for {url=}")
+        logger.debug("Calculating archive metadata information")
        self.enrich_file_sizes(to_enrich)
        self.enrich_archive_duration(to_enrich)
    def enrich_file_sizes(self, to_enrich: Metadata):
-        logger.debug(
+        logger.debug(f"Calculating archive file sizes for {len(to_enrich.media)} media files")
            f"calculating archive file sizes for url={to_enrich.get_url()} ({len(to_enrich.media)} media files)"
        )
        total_size = 0
        for media in to_enrich.get_all_media():
            file_stats = os.stat(media.filename)
@@ -44,7 +41,7 @@ class MetaEnricher(Enricher):
            size /= 1024
    def enrich_archive_duration(self, to_enrich):
-        logger.debug(f"calculating archive duration for url={to_enrich.get_url()} ")
+        logger.debug("Calculating archive duration")
        archive_duration = datetime.datetime.now(datetime.timezone.utc) - to_enrich.get("_processed_at")
        to_enrich.set("archive_duration_seconds", archive_duration.seconds)
--- a/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
+++ b/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
@@ -12,8 +12,7 @@ class MetadataEnricher(Enricher):
    """
    def enrich(self, to_enrich: Metadata) -> None:
-        url = to_enrich.get_url()
+        logger.debug("Extracting EXIF metadata")
        logger.debug(f"extracting EXIF metadata for {url=}")
        for i, m in enumerate(to_enrich.media):
            if len(md := self.get_metadata(m.filename)):
@@ -31,8 +30,8 @@ class MetadataEnricher(Enricher):
                field, value = line.strip().split(":", 1)
                metadata[field.strip()] = value.strip()
            return metadata
-        except FileNotFoundError:
+        except FileNotFoundError as e:
-            logger.error("[exif_enricher] ExifTool not found. Make sure ExifTool is installed and added to PATH.")
+            logger.error(f"ExifTool not found. Make sure ExifTool is installed and added to PATH. {e}")
        except Exception as e:
            logger.error(f"Error occurred: {e}: {traceback.format_exc()}")
        return {}
--- a/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py
+++ b/src/auto_archiver/modules/opentimestamps_enricher/opentimestamps_enricher.py
@@ -1,4 +1,5 @@
 import os
 import traceback
 from auto_archiver.utils.custom_logger import logger
 import opentimestamps
@@ -14,13 +15,12 @@ from auto_archiver.utils.misc import get_current_timestamp
 class OpentimestampsEnricher(Enricher):
    def enrich(self, to_enrich: Metadata) -> None:
-        url = to_enrich.get_url()
+        logger.debug("OpenTimestamps timestamping files")
        logger.debug(f"OpenTimestamps timestamping files for {url=}")
        # Get the media files to timestamp
        media_files = [m for m in to_enrich.media if m.filename and not m.get("opentimestamps")]
        if not media_files:
-            logger.debug(f"No files found to timestamp in {url=}")
+            logger.debug("No files found to timestamp")
            return
        timestamp_files = []
@@ -94,7 +94,7 @@ class OpentimestampsEnricher(Enricher):
                        detached_timestamp.serialize(ctx)
                        f.write(ctx.getbytes())
                except Exception as e:
-                    logger.warning(f"Failed to serialize timestamp file: {e}")
+                    logger.warning(f"Failed to serialize timestamp file: {e} {traceback.format_exc()}")
                    continue
                # Create media for the timestamp file
@@ -113,16 +113,16 @@ class OpentimestampsEnricher(Enricher):
                media.set("opentimestamps", True)
            except Exception as e:
-                logger.warning(f"Error while timestamping {media.filename}: {e}")
+                logger.warning(f"Error while timestamping {media.filename}: {e} {traceback.format_exc()}")
        # Add timestamp files to the metadata
        if timestamp_files:
            to_enrich.set("opentimestamped", True)
            to_enrich.set("opentimestamps_count", len(timestamp_files))
-            logger.info(f"{len(timestamp_files)} OpenTimestamps proofs created for {url=}")
+            logger.info(f"{len(timestamp_files)} OpenTimestamps proofs created")
        else:
            to_enrich.set("opentimestamped", False)
-            logger.warning(f"No successful timestamps created for {url=}")
+            logger.warning("No successful timestamps created")
    def verify_timestamp(self, detached_timestamp):
        """
--- a/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
+++ b/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
@@ -28,8 +28,7 @@ class PdqHashEnricher(Enricher):
    """
    def enrich(self, to_enrich: Metadata) -> None:
-        url = to_enrich.get_url()
+        logger.debug("Calculating perceptual hashes")
        logger.debug(f"calculating perceptual hashes for {url=}")
        media_with_hashes = []
        for m in to_enrich.media:
@@ -44,7 +43,7 @@ class PdqHashEnricher(Enricher):
                    media.set("pdq_hash", hd)
                    media_with_hashes.append(media.filename)
-        logger.debug(f"calculated '{len(media_with_hashes)}' perceptual hashes for {url=}: {media_with_hashes}")
+        logger.debug(f"Calculated '{len(media_with_hashes)}' perceptual hashes: {media_with_hashes}")
    def calculate_pdq_hash(self, filename):
        # returns a hexadecimal string with the perceptual hash for the given filename
--- a/src/auto_archiver/modules/s3_storage/s3_storage.py
+++ b/src/auto_archiver/modules/s3_storage/s3_storage.py
@@ -56,7 +56,7 @@ class S3Storage(Storage):
            if existing_key := self.file_in_folder(path):
                media._key = existing_key
                media.set("previously archived", True)
-                logger.debug(f"skipping upload of {media.filename} because it already exists in {media.key}")
+                logger.debug(f"Skipping upload of {media.filename} because it already exists in {media.key}")
                return False
            _, ext = os.path.splitext(media.key)
--- a/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
+++ b/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
@@ -19,10 +19,10 @@ class SSLEnricher(Enricher):
        url = to_enrich.get_url()
        parsed = urlparse(url)
-        assert parsed.scheme in ["https"], f"Invalid URL scheme {url=}"
+        assert parsed.scheme in ["https"], "Invalid URL scheme"
        domain = parsed.netloc
-        logger.debug(f"fetching SSL certificate for {domain=} in {url=}")
+        logger.debug(f"Fetching SSL certificate for {domain=}")
        cert = ssl.get_server_certificate((domain, 443))
        cert_fn = os.path.join(self.tmp_dir, f"{slugify(domain)}.pem")
--- a/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
+++ b/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
@@ -38,7 +38,7 @@ class TelegramExtractor(Extractor):
        video = s.find("video")
        if video is None:
-            logger.warning("could not find video")
+            logger.warning("Could not find video")
            image_tags = s.find_all(class_="tgme_widget_message_photo_wrap")
            image_urls = []
--- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
+++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
@@ -65,7 +65,7 @@ class TelethonExtractor(Extractor):
                # get currently joined channels
                # https://docs.telethon.dev/en/stable/modules/custom.html#module-telethon.tl.custom.dialog
                joined_channel_ids = [c.id for c in self.client.get_dialogs() if c.is_channel]
-                logger.info(f"already part of {len(joined_channel_ids)} channels")
+                logger.info(f"Already part of {len(joined_channel_ids)} channels")
                i = 0
                pbar = tqdm(desc=f"joining {len(self.channel_invites)} invite links", total=len(self.channel_invites))
@@ -80,22 +80,22 @@ class TelethonExtractor(Extractor):
                            else:
                                ent = self.client.get_entity(invite)  # fails if not a member
                                logger.warning(
-                                    f"please add the property id='{ent.id}' to the 'channel_invites' configuration where {invite=}, not doing so can lead to a minutes-long setup time due to telegram's rate limiting."
+                                    f"Please add the property id='{ent.id}' to the 'channel_invites' configuration where {invite=}, not doing so can lead to a minutes-long setup time due to telegram's rate limiting."
                                )
                        except ValueError:
-                            logger.info(f"joining new channel {invite=}")
+                            logger.info(f"Joining new channel {invite=}")
                            try:
                                self.client(ImportChatInviteRequest(match.group(2)))
                            except UserAlreadyParticipantError:
-                                logger.info(f"already joined {invite=}")
+                                logger.info(f"Already joined {invite=}")
                            except InviteRequestSentError:
-                                logger.warning(f"already sent a join request with {invite} still no answer")
+                                logger.warning(f"Already sent a join request with {invite} still no answer")
                            except InviteHashExpiredError:
                                logger.warning(f"{invite=} has expired please find a more recent one")
                            except Exception as e:
-                                logger.error(f"could not join channel with {invite=} due to {e}")
+                                logger.error(f"Could not join channel with {invite=} due to {e}")
                        except FloodWaitError as e:
-                            logger.warning(f"got a flood error, need to wait {e.seconds} seconds")
+                            logger.warning(f"Got a flood error, need to wait {e.seconds} seconds")
                            time.sleep(e.seconds)
                            continue
                    else:
@@ -117,7 +117,7 @@ class TelethonExtractor(Extractor):
        url = item.get_url()
        # detect URLs that we definitely cannot handle
        match = self.valid_url.search(url)
-        logger.debug(f"TELETHON: {match=}")
+        logger.debug(f"Found telethon url {match=}")
        if not match:
            return False
@@ -135,10 +135,10 @@ class TelethonExtractor(Extractor):
                try:
                    stories = self.client(functions.stories.GetStoriesByIDRequest(peer=chat, id=[post_id]))
                    if not stories.stories:
-                        logger.info(f"No stories found for {url}, possibly it's private or the story has expired.")
+                        logger.info("No stories found, possibly it's private or the story has expired.")
                        return False
                    story = stories.stories[0]
-                    logger.debug(f"TELETHON got story {story.id=} {story.date=} {story.expire_date=}")
+                    logger.debug(f"Got story {story.id=} {story.date=} {story.expire_date=}")
                    result.set_timestamp(story.date).set("views", story.views.to_dict()).set(
                        "expire_date", story.expire_date
                    )
@@ -154,20 +154,20 @@ class TelethonExtractor(Extractor):
                try:
                    post = self.client.get_messages(chat, ids=post_id)
                except ValueError as e:
-                    logger.error(f"Could not fetch telegram {url} possibly it's private: {e}")
+                    logger.error(f"Could not fetch telegram URL possibly it's private: {e}")
                    return False
                except ChannelInvalidError as e:
                    logger.error(
-                        f"Could not fetch telegram {url}. This error may be fixed if you setup a bot_token in addition to api_id and api_hash (but then private channels will not be archived, we need to update this logic to handle both): {e}"
+                        f"Could not fetch telegram URL. This error may be fixed if you setup a bot_token in addition to api_id and api_hash (but then private channels will not be archived, we need to update this logic to handle both): {e}"
                    )
                    return False
-                logger.debug(f"TELETHON got post {post=}")
+                logger.debug(f"Got post {post=}")
                if post is None:
                    return False
                media_posts = self._get_media_posts_in_group(chat, post)
-                logger.debug(f"got {len(media_posts)=} for {url=}")
+                logger.debug(f"Got {len(media_posts)=}")
                group_id = post.grouped_id if post.grouped_id is not None else post.id
                title = post.message
--- a/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
@@ -27,12 +27,12 @@ class ThumbnailEnricher(Enricher):
        Calculates how many thumbnails to generate and at which timestamps based on the video duration, the number of thumbnails per minute and the max number of thumbnails.
        Thumbnails are equally distributed across the video duration.
        """
-        logger.debug(f"generating thumbnails for {to_enrich.get_url()}")
+        logger.debug("Generating thumbnails")
        for m_id, m in enumerate(to_enrich.media[::]):
            if m.is_video():
                folder = os.path.join(self.tmp_dir, random_str(24))
                os.makedirs(folder, exist_ok=True)
-                logger.debug(f"generating thumbnails for {m.filename}")
+                logger.debug(f"Generating thumbnails for {m.filename}")
                duration = m.get("duration")
                try:
@@ -42,10 +42,10 @@ class ThumbnailEnricher(Enricher):
                    )
                    to_enrich.media[m_id].set("duration", duration)
                except Exception as e:
-                    logger.warning(f"failed to get duration with FFMPEG from {m.filename}: {e}")
+                    logger.warning(f"Failed to get duration with FFMPEG from {m.filename}: {e}")
                if not duration or type(duration) not in [float, int] or duration <= 0:
-                    logger.warning(f"cannot generate thumbnails for {m.filename} without valid duration")
+                    logger.warning(f"Cannot generate thumbnails for {m.filename} without valid duration")
                    continue
                num_thumbs = int(min(max(1, (duration / 60) * self.thumbnails_per_minute), self.max_thumbnails))
--- a/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
+++ b/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
@@ -49,8 +49,7 @@ class TimestampingEnricher(Enricher):
            self.session.close()
    def enrich(self, to_enrich: Metadata) -> None:
-        url = to_enrich.get_url()
+        logger.debug(f"RFC3161 timestamping existing files")
        logger.debug(f"RFC3161 timestamping existing files for {url=}")
        # create a new text file with the existing media hashes
        hashes = [
@@ -58,7 +57,7 @@ class TimestampingEnricher(Enricher):
        ]
        if not len(hashes):
-            logger.debug(f"No hashes found in {url=}")
+            logger.debug(f"No hashes found")
            return
@@ -74,7 +73,7 @@ class TimestampingEnricher(Enricher):
            try:
                message = bytes(data_to_sign, encoding='utf8')
-                logger.debug(f"Timestamping {url=} with {tsa_url=}")
+                logger.debug(f"Timestamping with {tsa_url=}")
                signed: TimeStampResponse = self.sign_data(tsa_url, message)
                # fail if there's any issue with the certificates, uses certifi list of trusted CAs or the user-defined `cert_authorities`
@@ -92,7 +91,7 @@ class TimestampingEnricher(Enricher):
                timestamp_token_path = self.save_timestamp_token(signed.time_stamp_token(), tsa_url)
                timestamp_tokens.append(Media(filename=timestamp_token_path).set("tsa", tsa_url).set("cert_chain", cert_chain))
            except Exception as e:
-                logger.warning(f"Error while timestamping {url=} with {tsa_url=}: {e}")
+                logger.warning(f"Error while timestamping with {tsa_url=}: {e}")
        if len(timestamp_tokens):
            hashes_media.set("timestamp_authority_files", timestamp_tokens)
@@ -101,9 +100,9 @@ class TimestampingEnricher(Enricher):
            hashes_media.set("cryptography v", version("cryptography"))
            to_enrich.add_media(hashes_media, id="timestamped_hashes")
            to_enrich.set("timestamped", True)
-            logger.info(f"{len(timestamp_tokens)} timestamp tokens created for {url=}")
+            logger.info(f"{len(timestamp_tokens)} timestamp tokens created")
        else:
-            logger.warning(f"No successful timestamps for {url=}")
+            logger.warning(f"No successful timestamps found")
    def save_timestamp_token(self, timestamp_token: bytes, tsa_url: str) -> str:
        """
--- a/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
@@ -45,10 +45,9 @@ class TwitterApiExtractor(Extractor):
        if "https://t.co/" in url:
            try:
                r = requests.get(url, timeout=30)
                logger.debug(f"Expanded url {url} to {r.url}")
                url = r.url
-            except Exception:
+            except Exception as e:
-                logger.error(f"Failed to expand url {url}")
+                logger.error(f"Failed to expand Twitter URL: {e}")
        return url
    def download(self, item: Metadata) -> Metadata:
@@ -67,7 +66,7 @@ class TwitterApiExtractor(Extractor):
            return False, False
        username, tweet_id = matches[0]  # only one URL supported
-        logger.debug(f"Found {username=} and {tweet_id=} in {url=}")
+        logger.debug(f"Found {username=} and {tweet_id=}")
        return username, tweet_id
@@ -85,7 +84,7 @@ class TwitterApiExtractor(Extractor):
                media_fields=["type", "duration_ms", "url", "variants"],
                tweet_fields=["attachments", "author_id", "created_at", "entities", "id", "text", "possibly_sensitive"],
            )
-            logger.debug(tweet)
+            logger.debug(f"Got {tweet=}")
        except Exception as e:
            logger.error(f"Could not get tweet: {e}")
            return False
--- a/src/auto_archiver/modules/wacz_extractor_enricher/wacz_extractor_enricher.py
+++ b/src/auto_archiver/modules/wacz_extractor_enricher/wacz_extractor_enricher.py
@@ -94,7 +94,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
        # call docker if explicitly enabled or we are running on the host (not in docker)
        if self.use_docker:
-            logger.debug(f"generating WACZ in Docker for {url=}")
+            logger.debug("Generating WACZ in Docker")
            logger.debug(f"{browsertrix_home_host=} {browsertrix_home_container=}")
            if self.docker_commands:
                cmd = self.docker_commands + cmd
@@ -111,12 +111,12 @@ class WaczExtractorEnricher(Enricher, Extractor):
            if self.profile:
                profile_file = f"profile-{self.crawl_id}.tar.gz"
                profile_fn = os.path.join(browsertrix_home_container, profile_file)
-                logger.debug(f"copying {self.profile} to {profile_fn}")
+                logger.debug(f"Copying {self.profile} to {profile_fn}")
                shutil.copyfile(self.profile, profile_fn)
                cmd.extend(["--profile", os.path.join("/crawls", profile_file)])
        else:
-            logger.debug(f"generating WACZ without Docker for {url=}")
+            logger.debug("Generating WACZ without Docker")
            if self.profile:
                cmd.extend(["--profile", os.path.join("/app", str(self.profile))])
--- a/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py
+++ b/src/auto_archiver/modules/wayback_extractor_enricher/wayback_extractor_enricher.py
@@ -31,15 +31,15 @@ class WaybackExtractorEnricher(Enricher, Extractor):
        url = to_enrich.get_url()
        if UrlUtil.is_auth_wall(url):
-            logger.debug(f"[SKIP] WAYBACK since url is behind AUTH WALL: {url=}")
+            logger.debug("[SKIP] WAYBACK since url is behind AUTH WALL")
            return
        logger.debug(f"calling wayback for {url=}")
        if to_enrich.get("wayback"):
            logger.info(f"Wayback enricher had already been executed: {to_enrich.get('wayback')}")
            return True
        logger.debug("Calling Wayback")
        ia_headers = {"Accept": "application/json", "Authorization": f"LOW {self.key}:{self.secret}"}
        post_data = {"url": url}
        if self.if_not_archived_within:
@@ -68,7 +68,7 @@ class WaybackExtractorEnricher(Enricher, Extractor):
        attempt = 1
        while not wayback_url and time.time() - start_time <= self.timeout:
            try:
-                logger.debug(f"GETting status for {job_id=} on {url=} ({attempt=})")
+                logger.debug(f"GETting status for {job_id=} ({attempt=})")
                r_status = requests.get(
                    f"https://web.archive.org/save/status/{job_id}", headers=ia_headers, proxies=proxies
                )
@@ -79,13 +79,13 @@ class WaybackExtractorEnricher(Enricher, Extractor):
                    logger.error(f"Wayback failed with {r_json}")
                    return False
            except requests.exceptions.RequestException as e:
-                logger.warning(f"RequestException: fetching status for {url=} due to: {e}")
+                logger.warning(f"RequestException: fetching status due to: {e}")
                break
            except json.decoder.JSONDecodeError:
-                logger.error(f"Expected a JSON from Wayback and got {r.text} for {url=}")
+                logger.error(f"Expected a JSON from Wayback and got {r.text}")
                break
            except Exception as e:
-                logger.warning(f"error fetching status for {url=} due to: {e}")
+                logger.warning(f"error fetching status due to: {e}")
            if not wayback_url:
                attempt += 1
                time.sleep(1)  # TODO: can be improved with exponential backoff
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@@ -25,7 +25,7 @@ class WhisperEnricher(Enricher):
    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
-        logger.debug(f"WHISPER[{self.action}]: iterating media items for {url=}.")
+        logger.debug(f"WHISPER[{self.action}]: iterating media items")
        job_results = {}
        for i, m in enumerate(to_enrich.media):
@@ -35,7 +35,7 @@ class WhisperEnricher(Enricher):
                try:
                    job_id = self.submit_job(m)
                    job_results[job_id] = False
-                    logger.debug(f"JOB SUBMITTED: {job_id=} for {m.key=}")
+                    logger.debug(f"Job submitted: {job_id=} for {m.key=}")
                    to_enrich.media[i].set("whisper_model", {"job_id": job_id})
                except Exception as e:
                    logger.error(
@@ -72,14 +72,14 @@ class WhisperEnricher(Enricher):
            "type": self.action,
            # "language": "string" # may be a config
        }
-        logger.debug(f"calling API with {payload=}")
+        logger.debug(f"Calling API with {payload=}")
        response = requests.post(
            f"{self.api_endpoint}/jobs", json=payload, headers={"Authorization": f"Bearer {self.api_key}"}
        )
        assert response.status_code == 201, (
            f"calling the whisper api {self.api_endpoint} returned a non-success code: {response.status_code}"
        )
-        logger.debug(response.json())
+        logger.debug(f"Response from whisper API: {response.json()}")
        return response.json()["id"]
    def check_jobs(self, job_results: dict):
@@ -115,7 +115,7 @@ class WhisperEnricher(Enricher):
            assert r_res.status_code == 200, (
                f"Job artifacts did not respond with 200, instead with: {r_res.status_code}"
            )
-            logger.success(r_res.json())
+            logger.info(f"Job {job_id} completed successfully:{r_res.json()}")
            result = {}
            for art_id, artifact in enumerate(r_res.json()):
                subtitle = []
--- a/src/auto_archiver/utils/misc.py
+++ b/src/auto_archiver/utils/misc.py
@@ -6,7 +6,6 @@ import uuid
 from datetime import datetime, timezone
 from dateutil.parser import parse as parse_dt
 import requests
 from auto_archiver.utils.custom_logger import logger
@@ -15,18 +14,6 @@ def mkdir_if_not_exists(folder):
        os.makedirs(folder)
 def expand_url(url):
    # expand short URL links
    if "https://t.co/" in url:
        try:
            r = requests.get(url)
            logger.debug(f"Expanded url {url} to {r.url}")
            return r.url
        except Exception:
            logger.error(f"Failed to expand url {url}")
    return url
 def getattr_or(o: object, prop: str, default=None):
    try:
        res = getattr(o, prop)
--- a/tests/databases/test_api_db.py
+++ b/tests/databases/test_api_db.py
@@ -29,7 +29,7 @@ def test_fetch_fail_status(api_db, metadata, mocker):
    mock_get = mocker.patch("auto_archiver.modules.api_db.api_db.requests.get")
    mock_get.return_value.status_code = 400
    mock_get.return_value.json.return_value = {}
-    mock_error = mocker.patch("loguru.logger.error")
+    mock_error = mocker.patch("auto_archiver.utils.custom_logger.logger.error")
    assert api_db.fetch(metadata) is False
    mock_error.assert_called_once_with("AA API FAIL (400): {}")
--- a/tests/enrichers/test_meta_enricher.py
+++ b/tests/enrichers/test_meta_enricher.py
@@ -33,7 +33,6 @@ def test_enrich_skips_empty_metadata(meta_enricher, mock_metadata):
    """Test that enrich() does nothing when Metadata is empty."""
    mock_metadata.is_empty.return_value = True
    meta_enricher.enrich(mock_metadata)
    mock_metadata.get_url.assert_called_once()
 def test_enrich_file_sizes(meta_enricher, metadata, tmp_path):
--- a/tests/enrichers/test_metadata_enricher.py
+++ b/tests/enrichers/test_metadata_enricher.py
@@ -65,7 +65,7 @@ def test_enrich_empty_media(enricher, mocker):
 def test_get_metadata_error_handling(enricher, mocker):
    mocker.patch("subprocess.run", side_effect=Exception("Test error"))
-    mock_log = mocker.patch("loguru.logger.error")
+    mock_log = mocker.patch("auto_archiver.utils.custom_logger.logger.error")
    result = enricher.get_metadata("test.jpg")
    assert result == {}
    assert "Error occurred: " in mock_log.call_args[0][0]
--- a/tests/enrichers/test_pdq_hash_enricher.py
+++ b/tests/enrichers/test_pdq_hash_enricher.py
@@ -43,7 +43,7 @@ def test_enrich_skip_non_image(metadata_with_images, mocker):
 def test_enrich_handles_corrupted_image(metadata_with_images, mocker):
    mocker.patch("PIL.Image.open", side_effect=UnidentifiedImageError("Corrupted image"))
    mock_pdq = mocker.patch("pdqhash.compute")
-    mock_logger = mocker.patch("loguru.logger.error")
+    mock_logger = mocker.patch("auto_archiver.utils.custom_logger.logger.error")
    enricher = PdqHashEnricher()
    enricher.enrich(metadata_with_images)
--- a/tests/enrichers/test_thumbnail_enricher.py
+++ b/tests/enrichers/test_thumbnail_enricher.py
@@ -75,12 +75,12 @@ def test_enrich_thumbnail_limits(
 def test_enrich_handles_probe_failure(thumbnail_enricher, metadata_with_video, mocker):
    mocker.patch("ffmpeg.probe", side_effect=Exception("Probe error"))
    mocker.patch("os.makedirs")
-    mock_logger = mocker.patch("loguru.logger.warning")
+    mock_logger = mocker.patch("auto_archiver.utils.custom_logger.logger.warning")
    mocker.patch.object(Media, "is_video", return_value=True)
    thumbnail_enricher.enrich(metadata_with_video)
    # Ensure error was logged
-    mock_logger.assert_called_with("cannot generate thumbnails for video.mp4 without valid duration")
+    mock_logger.assert_called_with("Cannot generate thumbnails for video.mp4 without valid duration")
    # Ensure no thumbnails were created
    thumbnails = metadata_with_video.media[0].get("thumbnails")
    assert thumbnails is None
@@ -128,12 +128,12 @@ def test_enrich_handles_short_video(
 def test_uses_existing_duration_on_exception(thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment, mocker):
-    mock_logger = mocker.patch("loguru.logger.warning")
+    mock_logger = mocker.patch("auto_archiver.utils.custom_logger.logger.warning")
    mock_probe = mocker.patch("ffmpeg.probe", side_effect=Exception("New probe error"))
    metadata_with_video.media[0].set("duration", 3)
    thumbnail_enricher.enrich(metadata_with_video)
    mock_probe.assert_called_once()
-    mock_logger.assert_called_with("failed to get duration with FFMPEG from video.mp4: New probe error")
+    mock_logger.assert_called_with("Failed to get duration with FFMPEG from video.mp4: New probe error")
    assert mock_ffmpeg_environment["mock_output"].run.call_count == 3
--- a/tests/enrichers/test_wacz_enricher.py
+++ b/tests/enrichers/test_wacz_enricher.py
@@ -46,7 +46,7 @@ def test_setup_with_docker(wacz_enricher, mocker):
 def test_already_ran(wacz_enricher, metadata, mocker):
    metadata.add_media(Media("test.wacz"), id="browsertrix")
-    mock_log = mocker.patch("loguru.logger.info")
+    mock_log = mocker.patch("auto_archiver.utils.custom_logger.logger.info")
    assert wacz_enricher.enrich(metadata) is True
    assert "WACZ enricher had already been executed" in mock_log.call_args[0][0]
@@ -73,7 +73,7 @@ def test_download_success(wacz_enricher, mocker) -> None:
 def test_enrich_already_executed(wacz_enricher, mocker) -> None:
    """Test enrich  if already executed."""
-    mock_log = mocker.patch("loguru.logger.info")
+    mock_log = mocker.patch("auto_archiver.utils.custom_logger.logger.info")
    metadata = Metadata().set_url("https://example.com")
    media = Media(filename="some_file.wacz")
    metadata.add_media(media, id="browsertrix")
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -118,8 +118,7 @@ def test_check_required_values(orchestrator, caplog, test_args):
    with pytest.raises(SystemExit):
        orchestrator.setup_config(test_args)
-
+    assert "the following arguments are required: --example_module.required_field" in caplog.records[0].message
    assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"
 def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
--- a/tests/utils/test_misc.py
+++ b/tests/utils/test_misc.py
@@ -6,7 +6,6 @@ import pytest
 from auto_archiver.utils.misc import (
    mkdir_if_not_exists,
    expand_url,
    getattr_or,
    DateTimeEncoder,
    dump_payload,
@@ -39,26 +38,6 @@ class TestDirectoryUtils:
        assert existing_dir.exists()
 class TestURLExpansion:
    @pytest.mark.parametrize(
        "input_url,expected",
        [("https://example.com", "https://example.com"), ("https://t.co/test", "https://expanded.url")],
    )
    def test_expand_url(self, input_url, expected, mocker):
        mock_response = mocker.Mock()
        mock_response.url = "https://expanded.url"
        mocker.patch("requests.get", return_value=mock_response)
        result = expand_url(input_url)
        assert result == expected
    def test_expand_url_handles_errors(self, caplog, mocker):
        mocker.patch("requests.get", side_effect=Exception("Connection error"))
        url = "https://t.co/error"
        result = expand_url(url)
        assert result == url
        assert f"Failed to expand url {url}" in caplog.text
 class TestAttributeHandling:
    class Sample:
        exists = "value"