mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 12:48:28 +03:00
removes rearchiving logic
This commit is contained in:
@@ -16,7 +16,6 @@ class Metadata:
|
||||
status: str = "no archiver"
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
media: List[Media] = field(default_factory=list)
|
||||
rearchivable: bool = True # defaults to true, archivers can overwrite
|
||||
|
||||
def __post_init__(self):
|
||||
self.set("_processed_at", datetime.datetime.utcnow())
|
||||
@@ -29,7 +28,6 @@ class Metadata:
|
||||
if overwrite_left:
|
||||
if right.status and len(right.status):
|
||||
self.status = right.status
|
||||
self.rearchivable |= right.rearchivable
|
||||
for k, v in right.metadata.items():
|
||||
assert k not in self.metadata or type(v) == type(self.get(k))
|
||||
if type(v) not in [dict, list, set] or k not in self.metadata:
|
||||
|
||||
@@ -62,11 +62,7 @@ class ArchivingOrchestrator:
|
||||
result.set_url(url)
|
||||
if original_url != url: result.set("original_url", original_url)
|
||||
|
||||
# 2 - rearchiving logic + notify start to DB
|
||||
# archivers can signal whether the content is rearchivable: eg: tweet vs webpage
|
||||
for a in self.archivers: result.rearchivable |= a.is_rearchivable(url)
|
||||
logger.debug(f"{result.rearchivable=} for {url=}")
|
||||
|
||||
# 2 - notify start to DB
|
||||
# signal to DB that archiving has started
|
||||
# and propagate already archived if it exists
|
||||
cached_result = None
|
||||
@@ -78,7 +74,7 @@ class ArchivingOrchestrator:
|
||||
d.started(result)
|
||||
if (local_result := d.fetch(result)):
|
||||
cached_result = (cached_result or Metadata()).merge(local_result)
|
||||
if cached_result and not cached_result.rearchivable:
|
||||
if cached_result:
|
||||
logger.debug("Found previously archived entry")
|
||||
for d in self.databases:
|
||||
d.done(cached_result)
|
||||
|
||||
Reference in New Issue
Block a user