Remove ArchivingContext completely

Context for a specific url/item is now passed around via the metadata (metadata.set_context('key', 'val') and metadata.get_context('key', default='something')
The only other thing that was passed around in ArchivingContext was the storage info, which is already accessible now via self.config
This commit is contained in:
Patrick Robertson
2025-01-30 17:50:54 +01:00
parent d76063c3f3
commit c25d5cae84
19 changed files with 59 additions and 122 deletions

View File

@@ -2,7 +2,7 @@ from loguru import logger
import csv
from auto_archiver.core import Feeder
from auto_archiver.core import Metadata, ArchivingContext
from auto_archiver.core import Metadata
from auto_archiver.utils import url_or_none
class CSVFeeder(Feeder):
@@ -19,5 +19,4 @@ class CSVFeeder(Feeder):
for row in reader:
url = row[0]
logger.debug(f"Processing {url}")
yield Metadata().set_url(url)
ArchivingContext.set("folder", "cli")
yield Metadata().set_url(url)

View File

@@ -6,7 +6,7 @@ from yt_dlp.extractor.common import InfoExtractor
from loguru import logger
from auto_archiver.core.extractor import Extractor
from ...core import Metadata, Media, ArchivingContext
from ...core import Metadata, Media
class GenericExtractor(Extractor):
_dropins = {}

View File

@@ -6,7 +6,7 @@ from urllib.parse import quote
from loguru import logger
from auto_archiver.core import Database
from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.core import Metadata, Media
from auto_archiver.modules.gsheet_feeder import GWorksheet
@@ -93,8 +93,7 @@ class GsheetsDb(Database):
logger.debug(f"Unable to update sheet: {e}")
def _retrieve_gsheet(self, item: Metadata) -> Tuple[GWorksheet, int]:
# TODO: to make gsheet_db less coupled with gsheet_feeder's "gsheet" parameter, this method could 1st try to fetch "gsheet" from ArchivingContext and, if missing, manage its own singleton - not needed for now
if gsheet := ArchivingContext.get("gsheet"):
if gsheet := item.get_context("gsheet"):
gw: GWorksheet = gsheet.get("worksheet")
row: int = gsheet.get("row")
elif self.sheet_id:

View File

@@ -15,7 +15,7 @@ from loguru import logger
from slugify import slugify
from auto_archiver.core import Feeder
from auto_archiver.core import Metadata, ArchivingContext
from auto_archiver.core import Metadata
from . import GWorksheet
@@ -60,17 +60,15 @@ class GsheetsFeeder(Feeder):
# All checks done - archival process starts here
m = Metadata().set_url(url)
ArchivingContext.set("gsheet", {"row": row, "worksheet": gw}, keep_on_reset=True)
if gw.get_cell_or_default(row, 'folder', "") is None:
folder = ''
else:
folder = slugify(gw.get_cell_or_default(row, 'folder', "").strip())
if len(folder):
if self.use_sheet_names_in_stored_paths:
ArchivingContext.set("folder", os.path.join(folder, slugify(self.sheet), slugify(wks.title)), True)
else:
ArchivingContext.set("folder", folder, True)
if len(folder) and self.use_sheet_names_in_stored_paths:
folder = os.path.join(folder, slugify(self.sheet), slugify(wks.title))
m.set_context('folder', folder)
m.set_context('worksheet', {"row": row, "worksheet": gw})
yield m
logger.success(f'Finished worksheet {wks.title}')

View File

@@ -11,7 +11,7 @@ import hashlib
from loguru import logger
from auto_archiver.core import Enricher
from auto_archiver.core import Metadata, ArchivingContext
from auto_archiver.core import Metadata
class HashEnricher(Enricher):

View File

@@ -16,7 +16,7 @@ from loguru import logger
from telethon.sync import TelegramClient
from auto_archiver.core import Extractor
from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.core import Metadata, Media
from auto_archiver.utils import random_str
@@ -61,7 +61,7 @@ class InstagramTbotExtractor(Extractor):
if not "instagram.com" in url: return False
result = Metadata()
tmp_dir = ArchivingContext.get_tmp_dir()
tmp_dir = self.tmp_dir
with self.client.start():
chat = self.client.get_entity("instagram_load_bot")
since_id = self.client.send_message(entity=chat, message=url).id

View File

@@ -4,7 +4,7 @@ from urllib.parse import urlparse
from loguru import logger
from auto_archiver.core import Enricher
from auto_archiver.core import Metadata, ArchivingContext, Media
from auto_archiver.core import Metadata, Media
class SSLEnricher(Enricher):

View File

@@ -3,7 +3,7 @@ import requests, time
from loguru import logger
from auto_archiver.core import Enricher
from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.core import Metadata, Media
from auto_archiver.modules.s3_storage import S3Storage
from auto_archiver.core.module import get_module
@@ -25,7 +25,7 @@ class WhisperEnricher(Enricher):
job_results = {}
for i, m in enumerate(to_enrich.media):
if m.is_video() or m.is_audio():
m.store(url=url, metadata=to_enrich)
m.store(url=url, metadata=to_enrich, storages=self.storages)
try:
job_id = self.submit_job(m)
job_results[job_id] = False
@@ -110,7 +110,7 @@ class WhisperEnricher(Enricher):
def _get_s3_storage(self) -> S3Storage:
try:
return next(s for s in ArchivingContext.get("storages") if s.__class__ == S3Storage)
return next(s for s in self.storages if s.__class__ == S3Storage)
except:
logger.warning("No S3Storage instance found in storages")
return