renaming and code improvements to json_e richer

2026-06-07 19:08:30 +03:00 · 2025-06-17 16:06:04 +01:00
parent 664ee8d037
commit 2f1a07abbf
5 changed files with 23 additions and 46 deletions
--- a/src/auto_archiver/modules/json_enricher/init.py
+++ b/src/auto_archiver/modules/json_enricher/init.py
@@ -0,0 +1 @@
+from .json_enricher import JsonEnricher
--- a/src/auto_archiver/modules/json_enricher/manifest.py
+++ b/src/auto_archiver/modules/json_enricher/manifest.py
@@ -0,0 +1,16 @@
+{
+    "name": "JSON Enricher",
+    "type": ["enricher"],
+    "requires_setup": True,
+    "dependencies": {
+        "python": ["loguru"],
+    },
+    "configs": {},
+    "description": """
+
+    Writes all archiving process metadata to a JSON file so it can be parsed by other tools. As this is an Enricher, it will not contain the final stored URLs. 
+	
+	WARNING: The resulting JSON may reveal sensitive information about the computer and settings in which the archiving process was run. 
+
+    """,
+}
--- a/src/auto_archiver/modules/metadata_json_enricher/metadata_json_enricher.py
+++ b/src/auto_archiver/modules/metadata_json_enricher/metadata_json_enricher.py
@@ -5,17 +5,15 @@ import os
 from auto_archiver.core import Enricher
 from auto_archiver.core import Media, Metadata

-class MetadataJsonEnricher(Enricher):
-    def __init__(self):
-        super().__init__()

+class JsonEnricher(Enricher):
    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()

-        logger.debug(f"Metadata JSON Enricher for {url=}")
+        logger.debug(f"JSON Enricher for {url=}")

-        item_path = os.path.join(self.tmp_dir, f"metadata.json")
+        item_path = os.path.join(self.tmp_dir, "metadata.json")
        with open(item_path, mode="w", encoding="utf-8") as outf:
-            json.dump(to_enrich.to_dict(), outf, indent=4, default=str)
-        
-        to_enrich.add_media(Media(filename=item_path), id="metadata_json")
+            json.dump(to_enrich.to_dict(), outf, indent=4, default=str, ensure_ascii=False)
+
+        to_enrich.add_media(Media(filename=item_path), id="metadata_json")
--- a/src/auto_archiver/modules/metadata_json_enricher/init.py
+++ b/src/auto_archiver/modules/metadata_json_enricher/init.py
@@ -1 +0,0 @@
-from .metadata_json_enricher import MetadataJsonEnricher
--- a/src/auto_archiver/modules/metadata_json_enricher/manifest.py
+++ b/src/auto_archiver/modules/metadata_json_enricher/manifest.py
@@ -1,37 +0,0 @@
-{
-    "name": "Metadata JSON Enricher",
-    "type": ["enricher"],
-    "requires_setup": True,
-    "dependencies": {
-        "python": ["loguru"],
-    },
-    "configs": {
-        # "width": {"default": 1280, "type": "int", "help": "width of the screenshots"},
-        # "height": {"default": 1024, "type": "int", "help": "height of the screenshots"},
-        # "timeout": {"default": 60, "type": "int", "help": "timeout for taking the screenshot"},
-        # "sleep_before_screenshot": {
-        #     "default": 4,
-        #     "type": "int",
-        #     "help": "seconds to wait for the pages to load before taking screenshot",
-        # },
-        # "http_proxy": {
-        #     "default": "",
-        #     "help": "http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port",
-        # },
-        # "save_to_pdf": {
-        #     "default": False,
-        #     "type": "bool",
-        #     "help": "save the page as pdf along with the screenshot. PDF saving options can be adjusted with the 'print_options' parameter",
-        # },
-        # "print_options": {
-        #     "default": {},
-        #     "help": "options to pass to the pdf printer, in JSON format. See https://www.selenium.dev/documentation/webdriver/interactions/print_page/ for more information",
-        #     "type": "json_loader",
-        # },
-    },
-    "description": """
-
-    Writes all the metadata to a json file so can be parsed by other tools.
-
-    """,
-}
				`@@ -1 +0,0 @@`
				`from .metadata_json_enricher import MetadataJsonEnricher`