mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 20:58:29 +03:00
feat: re-enable HASH on gsheet
This commit is contained in:
@@ -64,6 +64,7 @@ class GsheetsDb(Database):
|
||||
batch_if_valid('title', item.get_title())
|
||||
batch_if_valid('text', item.get("content", ""))
|
||||
batch_if_valid('timestamp', item.get_timestamp())
|
||||
batch_if_valid('hash', media.get("hash", "not-calculated"))
|
||||
if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"):
|
||||
batch_if_valid('screenshot', "\n".join(screenshot.urls))
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import hashlib
|
||||
from loguru import logger
|
||||
|
||||
from . import Enricher
|
||||
from ..core import Metadata
|
||||
from ..core import Metadata, ArchivingContext
|
||||
|
||||
|
||||
class HashEnricher(Enricher):
|
||||
@@ -17,6 +17,7 @@ class HashEnricher(Enricher):
|
||||
algo_choices = self.configs()["algorithm"]["choices"]
|
||||
assert self.algorithm in algo_choices, f"Invalid hash algorithm selected, must be one of {algo_choices} (you selected {self.algorithm})."
|
||||
self.chunksize = int(self.chunksize)
|
||||
ArchivingContext.set("hash_enricher.algorithm", self.algorithm, keep_on_reset=True)
|
||||
|
||||
@staticmethod
|
||||
def configs() -> dict:
|
||||
|
||||
@@ -8,6 +8,7 @@ from loguru import logger
|
||||
from ..version import __version__
|
||||
from ..core import Metadata, Media, ArchivingContext
|
||||
from . import Formatter
|
||||
from ..enrichers import HashEnricher
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -46,11 +47,16 @@ class HtmlFormatter(Formatter):
|
||||
html_path = os.path.join(ArchivingContext.get_tmp_dir(), f"formatted{str(uuid.uuid4())}.html")
|
||||
with open(html_path, mode="w", encoding="utf-8") as outf:
|
||||
outf.write(content)
|
||||
return Media(filename=html_path)
|
||||
final_media = Media(filename=html_path)
|
||||
|
||||
he = HashEnricher({"hash_enricher": {"algorithm": ArchivingContext.get("hash_enricher.algorithm"), "chunksize": 1.6e7}})
|
||||
if len(hd := he.calculate_hash(final_media.filename)):
|
||||
final_media.set("hash", f"{he.algorithm}:{hd}")
|
||||
|
||||
return final_media
|
||||
|
||||
|
||||
# JINJA helper filters
|
||||
|
||||
class JinjaHelpers:
|
||||
@staticmethod
|
||||
def is_list(v) -> bool:
|
||||
|
||||
@@ -77,7 +77,7 @@ class Storage(Step):
|
||||
# filename_generator logic
|
||||
if self.filename_generator == "random": filename = str(uuid.uuid4())[:16]
|
||||
elif self.filename_generator == "static":
|
||||
he = HashEnricher({"hash_enricher": {"algorithm": "SHA-256", "chunksize": 1.6e7}})
|
||||
he = HashEnricher({"hash_enricher": {"algorithm": ArchivingContext.get("hash_enricher.algorithm"), "chunksize": 1.6e7}})
|
||||
hd = he.calculate_hash(media.filename)
|
||||
filename = hd[:24]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user