Archiving Context refactor complete

This commit is contained in:
msramalho
2023-03-23 14:28:45 +00:00
parent 906ed0f6e0
commit 6f6eb2db7a
11 changed files with 96 additions and 55 deletions

View File

@@ -1,5 +1,5 @@
from .media import Media
from .metadata import Metadata
from .media import Media
from .step import Step
from .context import ArchivingContext

View File

@@ -1,3 +1,5 @@
from loguru import logger
class ArchivingContext:
"""
@@ -7,11 +9,15 @@ class ArchivingContext:
ArchivingContext.set(key, value)
and
ArchivingContext.get(key, default)
When reset is called, all values are cleared EXCEPT if they were .set(keep_on_reset=True)
reset(full_reset=True) will recreate everything including the keep_on_reset status
"""
_instance = None
def __init__(self):
self.configs = {}
self.keep_on_reset = set()
@staticmethod
def get_instance():
@@ -20,13 +26,22 @@ class ArchivingContext:
return ArchivingContext._instance
@staticmethod
def set(key, value):
ArchivingContext.get_instance().configs[key] = value
def set(key, value, keep_on_reset: bool = False):
logger.error(f"SET [{key}]={value}")
ac = ArchivingContext.get_instance()
ac.configs[key] = value
if keep_on_reset: ac.keep_on_reset.add(key)
@staticmethod
def get(key: str, default=None):
return ArchivingContext.get_instance().configs.get(key, default)
@staticmethod
def reset(full_reset: bool = False):
ac = ArchivingContext.get_instance()
if full_reset: ac.keep_on_reset = set()
ac.configs = {k: v for k, v in ac.configs.items() if k in ac.keep_on_reset}
# ---- custom getters/setters for widely used context values
@staticmethod

View File

@@ -3,19 +3,43 @@ from __future__ import annotations
from ast import List
from typing import Any
from dataclasses import dataclass, field
from dataclasses_json import dataclass_json
from dataclasses_json import dataclass_json, config
import mimetypes
from .context import ArchivingContext
from loguru import logger
@dataclass_json # annotation order matters
@dataclass_json # annotation order matters
@dataclass
class Media:
filename: str
key: str = None
urls: List[str] = field(default_factory=list)
_mimetype: str = None # eg: image/jpeg
properties: dict = field(default_factory=dict)
_mimetype: str = None # eg: image/jpeg
_stored: bool = field(default=False, repr=False, metadata=config(exclude=True))
def store(self: Media, override_storages: List = None, url: str = "url-not-available"):
# stores the media into the provided/available storages [Storage]
# repeats the process for its properties, in case they have inner media themselves
# for now it only goes down 1 level but it's easy to make it recursive if needed
storages = override_storages or ArchivingContext.get("storages")
if not len(storages):
logger.warning(f"No storages found in local context or provided directly for {self.filename}.")
return
for s in storages:
s.store(self, url)
# Media can be inside media properties, examples include transformations on original media
for prop in self.properties.values():
if isinstance(prop, Media):
s.store(prop, url)
if isinstance(prop, list):
for prop_media in prop:
if isinstance(prop_media, Media):
s.store(prop_media, url)
def set(self, key: str, value: Any) -> Media:
self.properties[key] = value
@@ -44,10 +68,3 @@ class Media:
def is_audio(self) -> bool:
return self.mimetype.startswith("audio")
def store(self):
"""
either stores this media entry and all its media descendants
or returns if that process is already completed
"""
pass

View File

@@ -8,9 +8,10 @@ import datetime
from urllib.parse import urlparse
from dateutil.parser import parse as parse_dt
from .media import Media
from .context import ArchivingContext
# annotation order matters
@dataclass_json
@dataclass_json # annotation order matters
@dataclass
class Metadata:
status: str = "no archiver"
@@ -23,7 +24,6 @@ class Metadata:
tmp_keys: Set[str] = field(default_factory=set, repr=False, metadata=config(exclude=True))
# tmp_metadata: Dict[str, Any] = field(default_factory=dict, repr=False, metadata=config(exclude=True)) # contains internal properties not to be leaked when .to_json/repr/str is called
def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata:
"""
merges two Metadata instances, will overwrite according to overwrite_left flag
@@ -46,6 +46,12 @@ class Metadata:
return right.merge(self)
return self
def store(self: Metadata, override_storages: List = None):
# calls .store for all contained media. storages [Storage]
storages = override_storages or ArchivingContext.get("storages")
for media in self.media:
media.store(override_storages=storages)
def set(self, key: str, val: Any, is_tmp=False) -> Metadata:
# if not self.metadata: self.metadata = {}
self.metadata[key] = val
@@ -144,4 +150,3 @@ class Metadata:
def __str__(self) -> str:
return self.__repr__()

View File

@@ -25,7 +25,7 @@ class ArchivingOrchestrator:
self.archivers: List[Archiver] = config.archivers
self.databases: List[Database] = config.databases
self.storages: List[Storage] = config.storages
ArchivingContext.set("storages", self.storages)
ArchivingContext.set("storages", self.storages, keep_on_reset=True)
for a in self.archivers: a.setup()
@@ -35,6 +35,7 @@ class ArchivingOrchestrator:
def feed_item(self, item: Metadata) -> Metadata:
try:
ArchivingContext.reset()
with tempfile.TemporaryDirectory(dir="./") as tmp_dir:
ArchivingContext.set_tmp_dir(tmp_dir)
return self.archive(item)
@@ -108,22 +109,12 @@ class ArchivingOrchestrator:
# 5 - store media
# looks for Media in result.media and also result.media[x].properties (as list or dict values)
for s in self.storages:
for m in result.media:
s.store(m, result) # modifies media
# Media can be inside media properties, examples include transformations on original media
for prop in m.properties.values():
if isinstance(prop, Media):
s.store(prop, result)
if isinstance(prop, list) and len(prop) > 0 and isinstance(prop[0], Media):
for prop_media in prop:
s.store(prop_media, result)
result.store()
# 6 - format and store formatted if needed
# enrichers typically need access to already stored URLs etc
if (final_media := self.formatter.format(result)):
for s in self.storages:
s.store(final_media, result)
final_media.store()
result.set_final_media(final_media)
if result.is_empty():