mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 13:18:28 +03:00
merge logic started
This commit is contained in:
@@ -4,6 +4,7 @@ from ast import List
|
||||
from typing import Any, Union, Dict
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -21,12 +22,25 @@ class Metadata:
|
||||
self.status = status
|
||||
self.metadata = {}
|
||||
|
||||
# @staticmethod
|
||||
def merge(self: Metadata, right: Metadata, overwrite_left=True) -> Metadata:
|
||||
# should return a merged version of the Metadata
|
||||
# will work for archived() and enriched()
|
||||
# what if 2 metadatas contain the same keys? only one can remain! : overwrite_left
|
||||
pass
|
||||
"""
|
||||
merges to Metadata instances, will overwrite according to overwrite_left flag
|
||||
"""
|
||||
res = Metadata()
|
||||
if overwrite_left:
|
||||
res.status = right.status
|
||||
res.metadata = dict(self.metadata) # make a copy
|
||||
for k, v in right.metadata.items():
|
||||
print(type(v), type(self.get(k)))
|
||||
# assert type(v) == type(self.get(k))
|
||||
if type(v) not in [dict, list, set] or k not in res.metadata:
|
||||
res.set(k, v)
|
||||
else: # key conflict
|
||||
if type(v) in [dict, set]: res.set(k, self.get(k) | v)
|
||||
elif type(v) == list: res.set(k, self.get(k) + v)
|
||||
else: # invert and do same logic
|
||||
return right.merge(self)
|
||||
return res
|
||||
|
||||
# TODO: setters?
|
||||
def set(self, key: str, val: Any) -> Metadata:
|
||||
@@ -34,8 +48,10 @@ class Metadata:
|
||||
self.metadata[key] = val
|
||||
return self
|
||||
|
||||
def get(self, key: str, default: Any = None) -> Union[Metadata, str]:
|
||||
def get(self, key: str, default: Any = None, create_if_missing=False) -> Union[Metadata, str]:
|
||||
# goes through metadata and returns the Metadata available
|
||||
if create_if_missing and key not in self.metadata:
|
||||
self.metadata[key] = default
|
||||
return self.metadata.get(key, default)
|
||||
|
||||
# custom getter/setters
|
||||
@@ -50,7 +66,11 @@ class Metadata:
|
||||
return url
|
||||
|
||||
def get_media(self) -> List:
|
||||
return self.get("media", [])
|
||||
return self.get("media", [], create_if_missing=True)
|
||||
|
||||
def set_content(self, content: str) -> Metadata:
|
||||
# the main textual content/information from a social media post, webpage, ...
|
||||
return self.set("content", content)
|
||||
|
||||
def set_title(self, title: str) -> Metadata:
|
||||
return self.set("title", title)
|
||||
@@ -59,8 +79,10 @@ class Metadata:
|
||||
return self.set("title", title)
|
||||
|
||||
def add_media(self, filename: str) -> Metadata:
|
||||
# print(f"adding {filename} to {self.metadata.get('media')}")
|
||||
# return self.set("media", self.get_media() + [filename])
|
||||
return self.get_media().append(filename)
|
||||
|
||||
def as_json(self) -> str:
|
||||
# converts all metadata and data into JSON
|
||||
pass
|
||||
return json.dumps(self.metadata)
|
||||
|
||||
Reference in New Issue
Block a user