Tidy ups + unit tests:

1. Allow loading modules from --module_paths=/extra/path/here
2. Improved unit tests for module loading
3. Further small tidy ups/clean ups
This commit is contained in:
Patrick Robertson
2025-01-29 18:42:12 +01:00
parent dcd5576f29
commit 3d37c494aa
13 changed files with 216 additions and 81 deletions

View File

@@ -19,16 +19,6 @@ class HashEnricher(Enricher):
Calculates hashes for Media instances
"""
def __init__(self, config: dict = None):
"""
Initialize the HashEnricher with a configuration dictionary.
"""
super().__init__()
# TODO set these from the manifest?
# Set default values
self.algorithm = config.get("algorithm", "SHA-256") if config else "SHA-256"
self.chunksize = config.get("chunksize", int(1.6e7)) if config else int(1.6e7)
def enrich(self, to_enrich: Metadata) -> None:
url = to_enrich.get_url()

View File

@@ -12,7 +12,7 @@ from auto_archiver.core import Metadata, Media, ArchivingContext
from auto_archiver.core import Formatter
from auto_archiver.modules.hash_enricher import HashEnricher
from auto_archiver.utils.misc import random_str
from auto_archiver.core.module import get_module
@dataclass
class HtmlFormatter(Formatter):
@@ -53,7 +53,7 @@ class HtmlFormatter(Formatter):
outf.write(content)
final_media = Media(filename=html_path, _mimetype="text/html")
he = HashEnricher({"hash_enricher": {"algorithm": ArchivingContext.get("hash_enricher.algorithm"), "chunksize": 1.6e7}})
he = get_module('hash_enricher', self.config)
if len(hd := he.calculate_hash(final_media.filename)):
final_media.set("hash", f"{he.algorithm}:{hd}")