Remove ArchivingContext completely

Context for a specific url/item is now passed around via the metadata (metadata.set_context('key', 'val') and metadata.get_context('key', default='something') The only other thing that was passed around in ArchivingContext was the storage info, which is already accessible now via self.config
2026-06-13 05:38:29 +03:00 · 2025-01-30 17:50:54 +01:00
parent d76063c3f3
commit c25d5cae84
19 changed files with 59 additions and 122 deletions
--- a/src/auto_archiver/modules/csv_feeder/csv_feeder.py
+++ b/src/auto_archiver/modules/csv_feeder/csv_feeder.py
@@ -2,7 +2,7 @@ from loguru import logger
 import csv

 from auto_archiver.core import Feeder
-from auto_archiver.core import Metadata, ArchivingContext
+from auto_archiver.core import Metadata
 from auto_archiver.utils import url_or_none

 class CSVFeeder(Feeder):
@@ -19,5 +19,4 @@ class CSVFeeder(Feeder):
                for row in reader:
                    url = row[0]
                    logger.debug(f"Processing {url}")
-                    yield Metadata().set_url(url)
-            ArchivingContext.set("folder", "cli")
+                    yield Metadata().set_url(url)
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -6,7 +6,7 @@ from yt_dlp.extractor.common import InfoExtractor
 from loguru import logger

 from auto_archiver.core.extractor import Extractor
-from ...core import Metadata, Media, ArchivingContext
+from ...core import Metadata, Media

 class GenericExtractor(Extractor):
    _dropins = {}
--- a/src/auto_archiver/modules/gsheet_db/gsheet_db.py
+++ b/src/auto_archiver/modules/gsheet_db/gsheet_db.py
@@ -6,7 +6,7 @@ from urllib.parse import quote
 from loguru import logger

 from auto_archiver.core import Database
-from auto_archiver.core import Metadata, Media, ArchivingContext
+from auto_archiver.core import Metadata, Media
 from auto_archiver.modules.gsheet_feeder import GWorksheet


@@ -93,8 +93,7 @@ class GsheetsDb(Database):
            logger.debug(f"Unable to update sheet: {e}")

    def _retrieve_gsheet(self, item: Metadata) -> Tuple[GWorksheet, int]:
-        # TODO: to make gsheet_db less coupled with gsheet_feeder's "gsheet" parameter, this method could 1st try to fetch "gsheet" from ArchivingContext and, if missing, manage its own singleton - not needed for now
-        if gsheet := ArchivingContext.get("gsheet"):
+        if gsheet := item.get_context("gsheet"):
            gw: GWorksheet = gsheet.get("worksheet")
            row: int = gsheet.get("row")
        elif self.sheet_id:
--- a/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
+++ b/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
@@ -15,7 +15,7 @@ from loguru import logger
 from slugify import slugify

 from auto_archiver.core import Feeder
-from auto_archiver.core import Metadata, ArchivingContext
+from auto_archiver.core import Metadata
 from . import GWorksheet


@@ -60,17 +60,15 @@ class GsheetsFeeder(Feeder):

                # All checks done - archival process starts here
                m = Metadata().set_url(url)
-                ArchivingContext.set("gsheet", {"row": row, "worksheet": gw}, keep_on_reset=True)
                if gw.get_cell_or_default(row, 'folder', "") is None:
                    folder = ''
                else:
                    folder = slugify(gw.get_cell_or_default(row, 'folder', "").strip())
-                if len(folder):
-                    if self.use_sheet_names_in_stored_paths:
-                        ArchivingContext.set("folder", os.path.join(folder, slugify(self.sheet), slugify(wks.title)), True)
-                    else:
-                        ArchivingContext.set("folder", folder, True)
+                if len(folder) and self.use_sheet_names_in_stored_paths:
+                    folder = os.path.join(folder, slugify(self.sheet), slugify(wks.title))

+                m.set_context('folder', folder)
+                m.set_context('worksheet', {"row": row, "worksheet": gw})
                yield m

            logger.success(f'Finished worksheet {wks.title}')
--- a/src/auto_archiver/modules/hash_enricher/hash_enricher.py
+++ b/src/auto_archiver/modules/hash_enricher/hash_enricher.py
@@ -11,7 +11,7 @@ import hashlib
 from loguru import logger

 from auto_archiver.core import Enricher
-from auto_archiver.core import Metadata, ArchivingContext
+from auto_archiver.core import Metadata


 class HashEnricher(Enricher):
--- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
@@ -16,7 +16,7 @@ from loguru import logger
 from telethon.sync import TelegramClient

 from auto_archiver.core import Extractor
-from auto_archiver.core import Metadata, Media, ArchivingContext
+from auto_archiver.core import Metadata, Media
 from auto_archiver.utils import random_str


@@ -61,7 +61,7 @@ class InstagramTbotExtractor(Extractor):
        if not "instagram.com" in url: return False

        result = Metadata()
-        tmp_dir = ArchivingContext.get_tmp_dir()
+        tmp_dir = self.tmp_dir
        with self.client.start():
            chat = self.client.get_entity("instagram_load_bot")
            since_id = self.client.send_message(entity=chat, message=url).id
--- a/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
+++ b/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
 from loguru import logger

 from auto_archiver.core import Enricher
-from auto_archiver.core import Metadata, ArchivingContext, Media
+from auto_archiver.core import Metadata, Media


 class SSLEnricher(Enricher):
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@@ -3,7 +3,7 @@ import requests, time
 from loguru import logger

 from auto_archiver.core import Enricher
-from auto_archiver.core import Metadata, Media, ArchivingContext
+from auto_archiver.core import Metadata, Media
 from auto_archiver.modules.s3_storage import S3Storage
 from auto_archiver.core.module import get_module

@@ -25,7 +25,7 @@ class WhisperEnricher(Enricher):
        job_results = {}
        for i, m in enumerate(to_enrich.media):
            if m.is_video() or m.is_audio():
-                m.store(url=url, metadata=to_enrich)
+                m.store(url=url, metadata=to_enrich, storages=self.storages)
                try:
                    job_id = self.submit_job(m)
                    job_results[job_id] = False
@@ -110,7 +110,7 @@ class WhisperEnricher(Enricher):

    def _get_s3_storage(self) -> S3Storage:
        try:
-            return next(s for s in ArchivingContext.get("storages") if s.__class__ == S3Storage)
+            return next(s for s in self.storages if s.__class__ == S3Storage)
        except:
            logger.warning("No S3Storage instance found in storages")
            return