mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
creating global context and refactoring tmp_dir logic
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from .media import Media
|
||||
from .metadata import Metadata
|
||||
from .step import Step
|
||||
from .context import ArchivingContext
|
||||
|
||||
# cannot import ArchivingOrchestrator/Config to avoid circular dep
|
||||
# from .orchestrator import ArchivingOrchestrator
|
||||
|
||||
38
src/auto_archiver/core/context.py
Normal file
38
src/auto_archiver/core/context.py
Normal file
@@ -0,0 +1,38 @@
|
||||
|
||||
class ArchivingContext:
|
||||
"""
|
||||
Singleton context class.
|
||||
ArchivingContext._get_instance() to retrieve it if needed
|
||||
otherwise just
|
||||
ArchivingContext.set(key, value)
|
||||
and
|
||||
ArchivingContext.get(key, default)
|
||||
"""
|
||||
_instance = None
|
||||
|
||||
def __init__(self):
|
||||
self.configs = {}
|
||||
|
||||
@staticmethod
|
||||
def get_instance():
|
||||
if ArchivingContext._instance is None:
|
||||
ArchivingContext._instance = ArchivingContext()
|
||||
return ArchivingContext._instance
|
||||
|
||||
@staticmethod
|
||||
def set(key, value):
|
||||
ArchivingContext.get_instance().configs[key] = value
|
||||
|
||||
@staticmethod
|
||||
def get(key: str, default=None):
|
||||
return ArchivingContext.get_instance().configs.get(key, default)
|
||||
|
||||
# ---- custom getters/setters for widely used context values
|
||||
|
||||
@staticmethod
|
||||
def set_tmp_dir(tmp_dir: str):
|
||||
ArchivingContext.get_instance().configs["tmp_dir"] = tmp_dir
|
||||
|
||||
@staticmethod
|
||||
def get_tmp_dir() -> str:
|
||||
return ArchivingContext.get_instance().configs.get("tmp_dir")
|
||||
@@ -6,8 +6,9 @@ from dataclasses import dataclass, field
|
||||
from dataclasses_json import dataclass_json
|
||||
import mimetypes
|
||||
|
||||
# annotation order matters
|
||||
@dataclass_json
|
||||
|
||||
|
||||
@dataclass_json # annotation order matters
|
||||
@dataclass
|
||||
class Media:
|
||||
filename: str
|
||||
@@ -40,3 +41,13 @@ class Media:
|
||||
|
||||
def is_video(self) -> bool:
|
||||
return self.mimetype.startswith("video")
|
||||
|
||||
def is_audio(self) -> bool:
|
||||
return self.mimetype.startswith("audio")
|
||||
|
||||
def store(self):
|
||||
"""
|
||||
either stores this media entry and all its media descendants
|
||||
or returns if that process is already completed
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -3,13 +3,12 @@ from __future__ import annotations
|
||||
from ast import List, Set
|
||||
from typing import Any, Union, Dict
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses_json import dataclass_json
|
||||
from dataclasses_json import dataclass_json, config
|
||||
import datetime
|
||||
from urllib.parse import urlparse
|
||||
from dateutil.parser import parse as parse_dt
|
||||
from .media import Media
|
||||
|
||||
|
||||
# annotation order matters
|
||||
@dataclass_json
|
||||
@dataclass
|
||||
@@ -17,10 +16,14 @@ class Metadata:
|
||||
status: str = "no archiver"
|
||||
_processed_at: datetime = field(default_factory=datetime.datetime.utcnow)
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
tmp_keys: Set[str] = field(default_factory=set, repr=False, metadata={"exclude": True}) # keys that are not to be saved in DBs
|
||||
media: List[Media] = field(default_factory=list)
|
||||
rearchivable: bool = True # defaults to true, archivers can overwrite
|
||||
|
||||
# properties below are excluded from JSON representation
|
||||
tmp_keys: Set[str] = field(default_factory=set, repr=False, metadata=config(exclude=True))
|
||||
# tmp_metadata: Dict[str, Any] = field(default_factory=dict, repr=False, metadata=config(exclude=True)) # contains internal properties not to be leaked when .to_json/repr/str is called
|
||||
|
||||
|
||||
def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata:
|
||||
"""
|
||||
merges two Metadata instances, will overwrite according to overwrite_left flag
|
||||
@@ -93,12 +96,6 @@ class Metadata:
|
||||
def get_title(self) -> str:
|
||||
return self.get("title")
|
||||
|
||||
def set_tmp_dir(self, tmp_dir: str) -> Metadata:
|
||||
return self.set("tmp_dir", tmp_dir, True)
|
||||
|
||||
def get_tmp_dir(self) -> str:
|
||||
return self.get("tmp_dir")
|
||||
|
||||
def set_timestamp(self, timestamp: datetime.datetime) -> Metadata:
|
||||
if type(timestamp) == str:
|
||||
timestamp = parse_dt(timestamp)
|
||||
@@ -144,3 +141,7 @@ class Metadata:
|
||||
{k: v for k, v in self.metadata.items() if k not in self.tmp_keys},
|
||||
**{"processed_at": self._processed_at}
|
||||
)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.__repr__()
|
||||
|
||||
@@ -2,6 +2,8 @@ from __future__ import annotations
|
||||
from ast import List
|
||||
from typing import Union
|
||||
|
||||
from .context import ArchivingContext
|
||||
|
||||
from ..archivers import Archiver
|
||||
from ..feeders import Feeder
|
||||
from ..formatters import Formatter
|
||||
@@ -23,6 +25,7 @@ class ArchivingOrchestrator:
|
||||
self.archivers: List[Archiver] = config.archivers
|
||||
self.databases: List[Database] = config.databases
|
||||
self.storages: List[Storage] = config.storages
|
||||
ArchivingContext.set("storages", self.storages)
|
||||
|
||||
for a in self.archivers: a.setup()
|
||||
|
||||
@@ -33,7 +36,7 @@ class ArchivingOrchestrator:
|
||||
def feed_item(self, item: Metadata) -> Metadata:
|
||||
try:
|
||||
with tempfile.TemporaryDirectory(dir="./") as tmp_dir:
|
||||
item.set_tmp_dir(tmp_dir)
|
||||
ArchivingContext.set_tmp_dir(tmp_dir)
|
||||
return self.archive(item)
|
||||
except KeyboardInterrupt:
|
||||
# catches keyboard interruptions to do a clean exit
|
||||
|
||||
Reference in New Issue
Block a user