mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-13 05:38:29 +03:00
Remove ArchivingContext completely
Context for a specific url/item is now passed around via the metadata (metadata.set_context('key', 'val') and metadata.get_context('key', default='something')
The only other thing that was passed around in ArchivingContext was the storage info, which is already accessible now via self.config
This commit is contained in:
@@ -2,7 +2,7 @@ from loguru import logger
|
||||
import csv
|
||||
|
||||
from auto_archiver.core import Feeder
|
||||
from auto_archiver.core import Metadata, ArchivingContext
|
||||
from auto_archiver.core import Metadata
|
||||
from auto_archiver.utils import url_or_none
|
||||
|
||||
class CSVFeeder(Feeder):
|
||||
@@ -19,5 +19,4 @@ class CSVFeeder(Feeder):
|
||||
for row in reader:
|
||||
url = row[0]
|
||||
logger.debug(f"Processing {url}")
|
||||
yield Metadata().set_url(url)
|
||||
ArchivingContext.set("folder", "cli")
|
||||
yield Metadata().set_url(url)
|
||||
@@ -6,7 +6,7 @@ from yt_dlp.extractor.common import InfoExtractor
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.core.extractor import Extractor
|
||||
from ...core import Metadata, Media, ArchivingContext
|
||||
from ...core import Metadata, Media
|
||||
|
||||
class GenericExtractor(Extractor):
|
||||
_dropins = {}
|
||||
|
||||
@@ -6,7 +6,7 @@ from urllib.parse import quote
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.core import Database
|
||||
from auto_archiver.core import Metadata, Media, ArchivingContext
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.modules.gsheet_feeder import GWorksheet
|
||||
|
||||
|
||||
@@ -93,8 +93,7 @@ class GsheetsDb(Database):
|
||||
logger.debug(f"Unable to update sheet: {e}")
|
||||
|
||||
def _retrieve_gsheet(self, item: Metadata) -> Tuple[GWorksheet, int]:
|
||||
# TODO: to make gsheet_db less coupled with gsheet_feeder's "gsheet" parameter, this method could 1st try to fetch "gsheet" from ArchivingContext and, if missing, manage its own singleton - not needed for now
|
||||
if gsheet := ArchivingContext.get("gsheet"):
|
||||
if gsheet := item.get_context("gsheet"):
|
||||
gw: GWorksheet = gsheet.get("worksheet")
|
||||
row: int = gsheet.get("row")
|
||||
elif self.sheet_id:
|
||||
|
||||
@@ -15,7 +15,7 @@ from loguru import logger
|
||||
from slugify import slugify
|
||||
|
||||
from auto_archiver.core import Feeder
|
||||
from auto_archiver.core import Metadata, ArchivingContext
|
||||
from auto_archiver.core import Metadata
|
||||
from . import GWorksheet
|
||||
|
||||
|
||||
@@ -60,17 +60,15 @@ class GsheetsFeeder(Feeder):
|
||||
|
||||
# All checks done - archival process starts here
|
||||
m = Metadata().set_url(url)
|
||||
ArchivingContext.set("gsheet", {"row": row, "worksheet": gw}, keep_on_reset=True)
|
||||
if gw.get_cell_or_default(row, 'folder', "") is None:
|
||||
folder = ''
|
||||
else:
|
||||
folder = slugify(gw.get_cell_or_default(row, 'folder', "").strip())
|
||||
if len(folder):
|
||||
if self.use_sheet_names_in_stored_paths:
|
||||
ArchivingContext.set("folder", os.path.join(folder, slugify(self.sheet), slugify(wks.title)), True)
|
||||
else:
|
||||
ArchivingContext.set("folder", folder, True)
|
||||
if len(folder) and self.use_sheet_names_in_stored_paths:
|
||||
folder = os.path.join(folder, slugify(self.sheet), slugify(wks.title))
|
||||
|
||||
m.set_context('folder', folder)
|
||||
m.set_context('worksheet', {"row": row, "worksheet": gw})
|
||||
yield m
|
||||
|
||||
logger.success(f'Finished worksheet {wks.title}')
|
||||
|
||||
@@ -11,7 +11,7 @@ import hashlib
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.core import Enricher
|
||||
from auto_archiver.core import Metadata, ArchivingContext
|
||||
from auto_archiver.core import Metadata
|
||||
|
||||
|
||||
class HashEnricher(Enricher):
|
||||
|
||||
@@ -16,7 +16,7 @@ from loguru import logger
|
||||
from telethon.sync import TelegramClient
|
||||
|
||||
from auto_archiver.core import Extractor
|
||||
from auto_archiver.core import Metadata, Media, ArchivingContext
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.utils import random_str
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class InstagramTbotExtractor(Extractor):
|
||||
if not "instagram.com" in url: return False
|
||||
|
||||
result = Metadata()
|
||||
tmp_dir = ArchivingContext.get_tmp_dir()
|
||||
tmp_dir = self.tmp_dir
|
||||
with self.client.start():
|
||||
chat = self.client.get_entity("instagram_load_bot")
|
||||
since_id = self.client.send_message(entity=chat, message=url).id
|
||||
|
||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.core import Enricher
|
||||
from auto_archiver.core import Metadata, ArchivingContext, Media
|
||||
from auto_archiver.core import Metadata, Media
|
||||
|
||||
|
||||
class SSLEnricher(Enricher):
|
||||
|
||||
@@ -3,7 +3,7 @@ import requests, time
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.core import Enricher
|
||||
from auto_archiver.core import Metadata, Media, ArchivingContext
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.modules.s3_storage import S3Storage
|
||||
from auto_archiver.core.module import get_module
|
||||
|
||||
@@ -25,7 +25,7 @@ class WhisperEnricher(Enricher):
|
||||
job_results = {}
|
||||
for i, m in enumerate(to_enrich.media):
|
||||
if m.is_video() or m.is_audio():
|
||||
m.store(url=url, metadata=to_enrich)
|
||||
m.store(url=url, metadata=to_enrich, storages=self.storages)
|
||||
try:
|
||||
job_id = self.submit_job(m)
|
||||
job_results[job_id] = False
|
||||
@@ -110,7 +110,7 @@ class WhisperEnricher(Enricher):
|
||||
|
||||
def _get_s3_storage(self) -> S3Storage:
|
||||
try:
|
||||
return next(s for s in ArchivingContext.get("storages") if s.__class__ == S3Storage)
|
||||
return next(s for s in self.storages if s.__class__ == S3Storage)
|
||||
except:
|
||||
logger.warning("No S3Storage instance found in storages")
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user