mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
renaming and code improvements to json_e richer
This commit is contained in:
1
src/auto_archiver/modules/json_enricher/__init__.py
Normal file
1
src/auto_archiver/modules/json_enricher/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .json_enricher import JsonEnricher
|
||||
16
src/auto_archiver/modules/json_enricher/__manifest__.py
Normal file
16
src/auto_archiver/modules/json_enricher/__manifest__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"name": "JSON Enricher",
|
||||
"type": ["enricher"],
|
||||
"requires_setup": True,
|
||||
"dependencies": {
|
||||
"python": ["loguru"],
|
||||
},
|
||||
"configs": {},
|
||||
"description": """
|
||||
|
||||
Writes all archiving process metadata to a JSON file so it can be parsed by other tools. As this is an Enricher, it will not contain the final stored URLs.
|
||||
|
||||
WARNING: The resulting JSON may reveal sensitive information about the computer and settings in which the archiving process was run.
|
||||
|
||||
""",
|
||||
}
|
||||
19
src/auto_archiver/modules/json_enricher/json_enricher.py
Normal file
19
src/auto_archiver/modules/json_enricher/json_enricher.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import json
|
||||
from loguru import logger
|
||||
import os
|
||||
|
||||
from auto_archiver.core import Enricher
|
||||
from auto_archiver.core import Media, Metadata
|
||||
|
||||
|
||||
class JsonEnricher(Enricher):
|
||||
def enrich(self, to_enrich: Metadata) -> None:
|
||||
url = to_enrich.get_url()
|
||||
|
||||
logger.debug(f"JSON Enricher for {url=}")
|
||||
|
||||
item_path = os.path.join(self.tmp_dir, "metadata.json")
|
||||
with open(item_path, mode="w", encoding="utf-8") as outf:
|
||||
json.dump(to_enrich.to_dict(), outf, indent=4, default=str, ensure_ascii=False)
|
||||
|
||||
to_enrich.add_media(Media(filename=item_path), id="metadata_json")
|
||||
Reference in New Issue
Block a user