mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Tidy ups + unit tests:
1. Allow loading modules from --module_paths=/extra/path/here 2. Improved unit tests for module loading 3. Further small tidy ups/clean ups
This commit is contained in:
@@ -19,16 +19,6 @@ class HashEnricher(Enricher):
|
||||
Calculates hashes for Media instances
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict = None):
|
||||
"""
|
||||
Initialize the HashEnricher with a configuration dictionary.
|
||||
"""
|
||||
super().__init__()
|
||||
# TODO set these from the manifest?
|
||||
# Set default values
|
||||
self.algorithm = config.get("algorithm", "SHA-256") if config else "SHA-256"
|
||||
self.chunksize = config.get("chunksize", int(1.6e7)) if config else int(1.6e7)
|
||||
|
||||
|
||||
def enrich(self, to_enrich: Metadata) -> None:
|
||||
url = to_enrich.get_url()
|
||||
|
||||
@@ -12,7 +12,7 @@ from auto_archiver.core import Metadata, Media, ArchivingContext
|
||||
from auto_archiver.core import Formatter
|
||||
from auto_archiver.modules.hash_enricher import HashEnricher
|
||||
from auto_archiver.utils.misc import random_str
|
||||
|
||||
from auto_archiver.core.module import get_module
|
||||
|
||||
@dataclass
|
||||
class HtmlFormatter(Formatter):
|
||||
@@ -53,7 +53,7 @@ class HtmlFormatter(Formatter):
|
||||
outf.write(content)
|
||||
final_media = Media(filename=html_path, _mimetype="text/html")
|
||||
|
||||
he = HashEnricher({"hash_enricher": {"algorithm": ArchivingContext.get("hash_enricher.algorithm"), "chunksize": 1.6e7}})
|
||||
he = get_module('hash_enricher', self.config)
|
||||
if len(hd := he.calculate_hash(final_media.filename)):
|
||||
final_media.set("hash", f"{he.algorithm}:{hd}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user