Set up feeder manifests (not merged by source yet)

2026-06-13 05:38:29 +03:00 · 2025-01-23 09:16:42 +00:00
parent c517d35bdf
commit 79684f8348
82 changed files with 721 additions and 730 deletions
--- a/src/auto_archiver/core/init.py
+++ b/src/auto_archiver/core/init.py
@@ -4,4 +4,9 @@

 # cannot import ArchivingOrchestrator/Config to avoid circular dep
 # from .orchestrator import ArchivingOrchestrator
-# from .config import Config
+# from .config import Config
+
+from .media import Media
+from .step import Step
+from .context import ArchivingContext
+from .metadata import Metadata
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -190,7 +190,6 @@ class ArchivingOrchestrator:

        yaml_config = read_yaml(basic_config.config_file)
            
-        breakpoint()
        self.setup_complete_parser(basic_config, yaml_config, unused_args)

        self.install_modules()
--- a/src/auto_archiver/databases/init.py
+++ b/src/auto_archiver/databases/init.py
@@ -3,8 +3,3 @@

 """
 from .database import Database
-from .gsheet_db.gsheet_db import GsheetsDb
-from .console_db.console_db import ConsoleDb
-from .csv_db.csv_db import CSVDb
-from .api_db.api_db import AAApiDb
-from .atlos_db.atlos_db import AtlosDb
--- a/src/auto_archiver/databases/api_db/api_db.py
+++ b/src/auto_archiver/databases/api_db/api_db.py
@@ -1,70 +0,0 @@
-from typing import Union
-import requests, os
-from loguru import logger
-
-from .. import Database
-from ...core import Metadata
-
-
-class AAApiDb(Database):
-    """
-        Connects to auto-archiver-api instance
-    """
-    name = "auto_archiver_api_db"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
-        self.allow_rearchive = bool(self.allow_rearchive)
-        self.store_results = bool(self.store_results)
-        self.assert_valid_string("api_endpoint")
-
-    @staticmethod
-    def configs() -> dict:
-        return {
-            "api_endpoint": {"default": None, "help": "API endpoint where calls are made to"},
-            "api_token": {"default": None, "help": "API Bearer token."},
-            "public": {"default": False, "help": "whether the URL should be publicly available via the API"},
-            "author_id": {"default": None, "help": "which email to assign as author"},
-            "group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
-            "allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"},
-            "store_results": {"default": True, "help": "when set, will send the results to the API database."},
-            "tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
-        }
-    def fetch(self, item: Metadata) -> Union[Metadata, bool]:
-        """ query the database for the existence of this item.
-            Helps avoid re-archiving the same URL multiple times.
-        """
-        if not self.allow_rearchive: return
-        
-        params = {"url": item.get_url(), "limit": 15}
-        headers = {"Authorization": f"Bearer {self.api_token}", "accept": "application/json"}
-        response = requests.get(os.path.join(self.api_endpoint, "tasks/search-url"), params=params, headers=headers)
-
-        if response.status_code == 200:
-            if len(response.json()):
-                logger.success(f"API returned {len(response.json())} previously archived instance(s)")
-                fetched_metadata = [Metadata.from_dict(r["result"]) for r in response.json()]
-                return Metadata.choose_most_complete(fetched_metadata)
-        else:
-            logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
-        return False
-
-
-    def done(self, item: Metadata, cached: bool=False) -> None:
-        """archival result ready - should be saved to DB"""
-        if not self.store_results: return
-        if cached: 
-            logger.debug(f"skipping saving archive of {item.get_url()} to the AA API because it was cached")
-            return
-        logger.debug(f"saving archive of {item.get_url()} to the AA API.")
-
-        payload = {'result': item.to_json(), 'public': self.public, 'author_id': self.author_id, 'group_id': self.group_id, 'tags': list(self.tags)}
-        headers = {"Authorization": f"Bearer {self.api_token}"}
-        response = requests.post(os.path.join(self.api_endpoint, "submit-archive"), json=payload, headers=headers)
-
-        if response.status_code == 200:
-            logger.success(f"AA API: {response.json()}")
-        else:
-            logger.error(f"AA API FAIL ({response.status_code}): {response.json()}")
-
--- a/src/auto_archiver/databases/atlos_db/atlos_db.py
+++ b/src/auto_archiver/databases/atlos_db/atlos_db.py
@@ -1,79 +0,0 @@
-import os
-from typing import Union
-from loguru import logger
-from csv import DictWriter
-from dataclasses import asdict
-import requests
-
-from .. import Database
-from ...core import Metadata
-from ...utils import get_atlos_config_options
-
-
-class AtlosDb(Database):
-    """
-    Outputs results to Atlos
-    """
-
-    name = "atlos_db"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
-
-    @staticmethod
-    def configs() -> dict:
-        return get_atlos_config_options()
-
-    def failed(self, item: Metadata, reason: str) -> None:
-        """Update DB accordingly for failure"""
-        # If the item has no Atlos ID, there's nothing for us to do
-        if not item.metadata.get("atlos_id"):
-            logger.info(f"Item {item.get_url()} has no Atlos ID, skipping")
-            return
-
-        requests.post(
-            f"{self.atlos_url}/api/v2/source_material/metadata/{item.metadata['atlos_id']}/auto_archiver",
-            headers={"Authorization": f"Bearer {self.api_token}"},
-            json={"metadata": {"processed": True, "status": "error", "error": reason}},
-        ).raise_for_status()
-        logger.info(
-            f"Stored failure for {item.get_url()} (ID {item.metadata['atlos_id']}) on Atlos: {reason}"
-        )
-
-    def fetch(self, item: Metadata) -> Union[Metadata, bool]:
-        """check and fetch if the given item has been archived already, each
-        database should handle its own caching, and configuration mechanisms"""
-        return False
-
-    def _process_metadata(self, item: Metadata) -> dict:
-        """Process metadata for storage on Atlos. Will convert any datetime
-        objects to ISO format."""
-
-        return {
-            k: v.isoformat() if hasattr(v, "isoformat") else v
-            for k, v in item.metadata.items()
-        }
-
-    def done(self, item: Metadata, cached: bool = False) -> None:
-        """archival result ready - should be saved to DB"""
-
-        if not item.metadata.get("atlos_id"):
-            logger.info(f"Item {item.get_url()} has no Atlos ID, skipping")
-            return
-
-        requests.post(
-            f"{self.atlos_url}/api/v2/source_material/metadata/{item.metadata['atlos_id']}/auto_archiver",
-            headers={"Authorization": f"Bearer {self.api_token}"},
-            json={
-                "metadata": dict(
-                    processed=True,
-                    status="success",
-                    results=self._process_metadata(item),
-                )
-            },
-        ).raise_for_status()
-
-        logger.info(
-            f"Stored success for {item.get_url()} (ID {item.metadata['atlos_id']}) on Atlos"
-        )
--- a/src/auto_archiver/databases/console_db/console_db.py
+++ b/src/auto_archiver/databases/console_db/console_db.py
@@ -1,32 +0,0 @@
-from loguru import logger
-
-from .. import Database
-from ...core import Metadata
-
-
-class ConsoleDb(Database):
-    """
-        Outputs results to the console
-    """
-    name = "console_db"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
-
-    @staticmethod
-    def configs() -> dict:
-        return {}
-
-    def started(self, item: Metadata) -> None:
-        logger.warning(f"STARTED {item}")
-
-    def failed(self, item: Metadata, reason:str) -> None:
-        logger.error(f"FAILED {item}: {reason}")
-
-    def aborted(self, item: Metadata) -> None:
-        logger.warning(f"ABORTED {item}")
-
-    def done(self, item: Metadata, cached: bool=False) -> None:
-        """archival result ready - should be saved to DB"""
-        logger.success(f"DONE {item}")
--- a/src/auto_archiver/databases/csv_db/csv_db.py
+++ b/src/auto_archiver/databases/csv_db/csv_db.py
@@ -1,34 +0,0 @@
-import os
-from loguru import logger
-from csv import DictWriter
-from dataclasses import asdict
-
-from .. import Database
-from ...core import Metadata
-
-
-class CSVDb(Database):
-    """
-        Outputs results to a CSV file
-    """
-    name = "csv_db"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
-        self.assert_valid_string("csv_file")
-
-    @staticmethod
-    def configs() -> dict:
-        return {
-            "csv_file": {"default": "db.csv", "help": "CSV file name"}
-        }
-
-    def done(self, item: Metadata, cached: bool=False) -> None:
-        """archival result ready - should be saved to DB"""
-        logger.success(f"DONE {item}")
-        is_empty = not os.path.isfile(self.csv_file) or os.path.getsize(self.csv_file) == 0
-        with open(self.csv_file, "a", encoding="utf-8") as outf:
-            writer = DictWriter(outf, fieldnames=asdict(Metadata()))
-            if is_empty: writer.writeheader()
-            writer.writerow(asdict(item))
--- a/src/auto_archiver/databases/gsheet_db/gsheet_db.py
+++ b/src/auto_archiver/databases/gsheet_db/gsheet_db.py
@@ -1,112 +0,0 @@
-from typing import Union, Tuple
-import datetime
-from urllib.parse import quote
-
-from loguru import logger
-
-from .. import Database
-from ...core import Metadata, Media, ArchivingContext
-from ...utils import GWorksheet
-
-
-class GsheetsDb(Database):
-    """
-        NB: only works if GsheetFeeder is used. 
-        could be updated in the future to support non-GsheetFeeder metadata 
-    """
-    name = "gsheet_db"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
-
-    @staticmethod
-    def configs() -> dict:
-        return {}
-
-    def started(self, item: Metadata) -> None:
-        logger.warning(f"STARTED {item}")
-        gw, row = self._retrieve_gsheet(item)
-        gw.set_cell(row, 'status', 'Archive in progress')
-
-    def failed(self, item: Metadata, reason:str) -> None:
-        logger.error(f"FAILED {item}")
-        self._safe_status_update(item, f'Archive failed {reason}')
-
-    def aborted(self, item: Metadata) -> None:
-        logger.warning(f"ABORTED {item}")
-        self._safe_status_update(item, '')
-
-    def fetch(self, item: Metadata) -> Union[Metadata, bool]:
-        """check if the given item has been archived already"""
-        return False
-
-    def done(self, item: Metadata, cached: bool=False) -> None:
-        """archival result ready - should be saved to DB"""
-        logger.success(f"DONE {item.get_url()}")
-        gw, row = self._retrieve_gsheet(item)
-        # self._safe_status_update(item, 'done')
-
-        cell_updates = []
-        row_values = gw.get_row(row)
-
-        def batch_if_valid(col, val, final_value=None):
-            final_value = final_value or val
-            try:
-                if val and gw.col_exists(col) and gw.get_cell(row_values, col) == '':
-                    cell_updates.append((row, col, final_value))
-            except Exception as e:
-                logger.error(f"Unable to batch {col}={final_value} due to {e}")
-        status_message = item.status
-        if cached:
-            status_message = f"[cached] {status_message}"
-        cell_updates.append((row, 'status', status_message))
-
-        media: Media = item.get_final_media()
-        if hasattr(media, "urls"):
-            batch_if_valid('archive', "\n".join(media.urls))
-        batch_if_valid('date', True, datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=datetime.timezone.utc).isoformat())
-        batch_if_valid('title', item.get_title())
-        batch_if_valid('text', item.get("content", ""))
-        batch_if_valid('timestamp', item.get_timestamp())
-        if media: batch_if_valid('hash', media.get("hash", "not-calculated"))
-
-        # merge all pdq hashes into a single string, if present
-        pdq_hashes = []
-        all_media = item.get_all_media()
-        for m in all_media:
-            if pdq := m.get("pdq_hash"):
-                pdq_hashes.append(pdq)
-        if len(pdq_hashes):
-            batch_if_valid('pdq_hash', ",".join(pdq_hashes))
-
-        if (screenshot := item.get_media_by_id("screenshot")) and hasattr(screenshot, "urls"):
-            batch_if_valid('screenshot', "\n".join(screenshot.urls))
-
-        if (thumbnail := item.get_first_image("thumbnail")):
-            if hasattr(thumbnail, "urls"):
-                batch_if_valid('thumbnail', f'=IMAGE("{thumbnail.urls[0]}")')
-
-        if (browsertrix := item.get_media_by_id("browsertrix")):
-            batch_if_valid('wacz', "\n".join(browsertrix.urls))
-            batch_if_valid('replaywebpage', "\n".join([f'https://replayweb.page/?source={quote(wacz)}#view=pages&url={quote(item.get_url())}' for wacz in browsertrix.urls]))
-
-        gw.batch_set_cell(cell_updates)
-
-    def _safe_status_update(self, item: Metadata, new_status: str) -> None:
-        try:
-            gw, row = self._retrieve_gsheet(item)
-            gw.set_cell(row, 'status', new_status)
-        except Exception as e:
-            logger.debug(f"Unable to update sheet: {e}")
-
-    def _retrieve_gsheet(self, item: Metadata) -> Tuple[GWorksheet, int]:
-        # TODO: to make gsheet_db less coupled with gsheet_feeder's "gsheet" parameter, this method could 1st try to fetch "gsheet" from ArchivingContext and, if missing, manage its own singleton - not needed for now
-        if gsheet := ArchivingContext.get("gsheet"):
-            gw: GWorksheet = gsheet.get("worksheet")
-            row: int = gsheet.get("row")
-        elif self.sheet_id:
-            print(self.sheet_id)
-
-
-        return gw, row
--- a/src/auto_archiver/databases/api_db/init.py
+++ b/src/auto_archiver/databases/api_db/init.py
--- a/src/auto_archiver/databases/api_db/manifest.py
+++ b/src/auto_archiver/databases/api_db/manifest.py
--- a/src/auto_archiver/modules/api_db/api_db.py
+++ b/src/auto_archiver/modules/api_db/api_db.py
@@ -2,8 +2,8 @@ from typing import Union
 import requests, os
 from loguru import logger

-from . import Database
-from ..core import Metadata
+from auto_archiver.databases import Database
+from auto_archiver.core import Metadata


 class AAApiDb(Database):
@@ -19,18 +19,7 @@ class AAApiDb(Database):
        self.store_results = bool(self.store_results)
        self.assert_valid_string("api_endpoint")

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "api_endpoint": {"default": None, "help": "API endpoint where calls are made to"},
-            "api_token": {"default": None, "help": "API Bearer token."},
-            "public": {"default": False, "help": "whether the URL should be publicly available via the API"},
-            "author_id": {"default": None, "help": "which email to assign as author"},
-            "group_id": {"default": None, "help": "which group of users have access to the archive in case public=false as author"},
-            "allow_rearchive": {"default": True, "help": "if False then the API database will be queried prior to any archiving operations and stop if the link has already been archived"},
-            "store_results": {"default": True, "help": "when set, will send the results to the API database."},
-            "tags": {"default": [], "help": "what tags to add to the archived URL", "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))},
-        }
+
    def fetch(self, item: Metadata) -> Union[Metadata, bool]:
        """ query the database for the existence of this item.
            Helps avoid re-archiving the same URL multiple times.
--- a/src/auto_archiver/databases/atlos_db/init.py
+++ b/src/auto_archiver/databases/atlos_db/init.py
--- a/src/auto_archiver/databases/atlos_db/manifest.py
+++ b/src/auto_archiver/databases/atlos_db/manifest.py
@@ -7,7 +7,18 @@
        {"python": ["loguru",
                    ""],
         "bin": [""]},
-    "configs": {},
+    "configs": {
+        "api_token": {
+            "default": None,
+            "help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
+            "cli_set": lambda cli_val, _: cli_val
+        },
+        "atlos_url": {
+            "default": "https://platform.atlos.org",
+            "help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
+            "cli_set": lambda cli_val, _: cli_val
+        },
+    },
    "description": """
 Handles integration with the Atlos platform for managing archival results.

--- a/src/auto_archiver/modules/atlos_db/atlos_db.py
+++ b/src/auto_archiver/modules/atlos_db/atlos_db.py
@@ -5,9 +5,9 @@ from csv import DictWriter
 from dataclasses import asdict
 import requests

-from . import Database
-from ..core import Metadata
-from ..utils import get_atlos_config_options
+from auto_archiver.databases import Database
+from auto_archiver.core import Metadata
+from auto_archiver.utils import get_atlos_config_options


 class AtlosDb(Database):
@@ -21,6 +21,7 @@ class AtlosDb(Database):
        # without this STEP.__init__ is not called
        super().__init__(config)

+    # TODO
    @staticmethod
    def configs() -> dict:
        return get_atlos_config_options()
--- a/src/auto_archiver/modules/atlos_feeder/init.py
+++ b/src/auto_archiver/modules/atlos_feeder/init.py
--- a/src/auto_archiver/modules/atlos_feeder/manifest.py
+++ b/src/auto_archiver/modules/atlos_feeder/manifest.py
@@ -0,0 +1,34 @@
+{
+    "name": "Atlos Feeder",
+    "type": ["feeder"],
+    "requires_setup": True,
+    "external_dependencies": {
+        "python": ["loguru", "requests"],
+    },
+    "configs": {
+        "api_token": {
+            "default": None,
+            "help": "An Atlos API token. For more information, see https://docs.atlos.org/technical/api/",
+            "cli_set": lambda cli_val, _: cli_val
+        },
+        "atlos_url": {
+            "default": "https://platform.atlos.org",
+            "help": "The URL of your Atlos instance (e.g., https://platform.atlos.org), without a trailing slash.",
+            "cli_set": lambda cli_val, _: cli_val
+        },
+    },
+    "description": """
+    AtlosFeeder: A feeder module that integrates with the Atlos API to fetch source material URLs for archival.
+
+    ### Features
+    - Connects to the Atlos API to retrieve a list of source material URLs.
+    - Filters source materials based on visibility, processing status, and metadata.
+    - Converts filtered source materials into `Metadata` objects with the relevant `atlos_id` and URL.
+    - Iterates through paginated results using a cursor for efficient API interaction.
+
+    ### Notes
+    - Requires an Atlos API endpoint and a valid API token for authentication.
+    - Ensures only unprocessed, visible, and ready-to-archive URLs are returned.
+    - Handles pagination transparently when retrieving data from the Atlos API.
+    """
+}
--- a/src/auto_archiver/modules/atlos_feeder/atlos_feeder.py
+++ b/src/auto_archiver/modules/atlos_feeder/atlos_feeder.py
@@ -1,9 +1,9 @@
 from loguru import logger
 import requests

-from . import Feeder
-from ..core import Metadata, ArchivingContext
-from ..utils import get_atlos_config_options
+from auto_archiver.feeders import Feeder
+from auto_archiver.core import Metadata, ArchivingContext
+from auto_archiver.utils import get_atlos_config_options


 class AtlosFeeder(Feeder):
@@ -15,6 +15,7 @@ class AtlosFeeder(Feeder):
        if type(self.api_token) != str:
            raise Exception("Atlos Feeder did not receive an Atlos API token")

+    # TODO
    @staticmethod
    def configs() -> dict:
        return get_atlos_config_options()
--- a/src/auto_archiver/modules/cli_feeder/init.py
+++ b/src/auto_archiver/modules/cli_feeder/init.py
--- a/src/auto_archiver/modules/cli_feeder/manifest.py
+++ b/src/auto_archiver/modules/cli_feeder/manifest.py
@@ -0,0 +1,24 @@
+{
+    "name": "CLI Feeder",
+    "type": ["feeder"],
+    "requires_setup": False,
+    "external_dependencies": {
+        "python": ["loguru"],
+    },
+    "configs": {
+        "urls": {
+            "default": None,
+            "help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
+            "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
+        },
+    },
+    "description": """
+    Processes URLs to archive passed via the command line and feeds them into the archiving pipeline.
+
+    ### Features
+    - Takes a single URL or a list of URLs provided via the command line.
+    - Converts each URL into a `Metadata` object and yields it for processing.
+    - Ensures URLs are processed only if they are explicitly provided.
+
+    """
+}
--- a/src/auto_archiver/modules/cli_feeder/cli_feeder.py
+++ b/src/auto_archiver/modules/cli_feeder/cli_feeder.py
@@ -1,7 +1,7 @@
 from loguru import logger

-from . import Feeder
-from ..core import Metadata, ArchivingContext
+from auto_archiver.feeders import Feeder
+from auto_archiver.core import Metadata, ArchivingContext


 class CLIFeeder(Feeder):
@@ -13,15 +13,15 @@ class CLIFeeder(Feeder):
        if type(self.urls) != list or len(self.urls) == 0:
            raise Exception("CLI Feeder did not receive any URL to process")

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "urls": {
-                "default": None,
-                "help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
-                "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
-            },
-        }
+    # @staticmethod
+    # def configs() -> dict:
+    #     return {
+    #         "urls": {
+    #             "default": None,
+    #             "help": "URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml",
+    #             "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
+    #         },
+    #     }

    def __iter__(self) -> Metadata:
        for url in self.urls:
--- a/src/auto_archiver/databases/gsheet_db/init.py
+++ b/src/auto_archiver/databases/gsheet_db/init.py
--- a/src/auto_archiver/databases/console_db/manifest.py
+++ b/src/auto_archiver/databases/console_db/manifest.py
--- a/src/auto_archiver/modules/console_db/console_db.py
+++ b/src/auto_archiver/modules/console_db/console_db.py
@@ -1,7 +1,7 @@
 from loguru import logger

-from . import Database
-from ..core import Metadata
+from auto_archiver.databases import Database
+from auto_archiver.core import Metadata


 class ConsoleDb(Database):
@@ -14,10 +14,6 @@ class ConsoleDb(Database):
        # without this STEP.__init__ is not called
        super().__init__(config)

-    @staticmethod
-    def configs() -> dict:
-        return {}
-
    def started(self, item: Metadata) -> None:
        logger.warning(f"STARTED {item}")

--- a/src/auto_archiver/modules/csv_db/init.py
+++ b/src/auto_archiver/modules/csv_db/init.py
--- a/src/auto_archiver/databases/csv_db/manifest.py
+++ b/src/auto_archiver/databases/csv_db/manifest.py
--- a/src/auto_archiver/modules/csv_db/csv_db.py
+++ b/src/auto_archiver/modules/csv_db/csv_db.py
@@ -3,8 +3,8 @@ from loguru import logger
 from csv import DictWriter
 from dataclasses import asdict

-from . import Database
-from ..core import Metadata
+from auto_archiver.databases import Database
+from auto_archiver.core import Metadata


 class CSVDb(Database):
@@ -18,11 +18,6 @@ class CSVDb(Database):
        super().__init__(config)
        self.assert_valid_string("csv_file")

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "csv_file": {"default": "db.csv", "help": "CSV file name"}
-        }

    def done(self, item: Metadata, cached: bool=False) -> None:
        """archival result ready - should be saved to DB"""
--- a/src/auto_archiver/modules/csv_feeder/init.py
+++ b/src/auto_archiver/modules/csv_feeder/init.py
--- a/src/auto_archiver/modules/csv_feeder/manifest.py
+++ b/src/auto_archiver/modules/csv_feeder/manifest.py
@@ -0,0 +1,33 @@
+{
+    "name": "CSV Feeder",
+    "type": ["feeder"],
+    "requires_setup": False,
+    "external_dependencies": {
+        "python": ["loguru"],
+        "bin": [""]
+    },
+    "configs": {
+            "files": {
+                "default": None,
+                "help": "Path to the input file(s) to read the URLs from, comma separated. \
+                        Input files should be formatted with one URL per line",
+                "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))
+            },
+            "column": {
+                "default": None,
+                "help": "Column number or name to read the URLs from, 0-indexed",
+            }
+        },
+    "description": """
+    Reads URLs from CSV files and feeds them into the archiving process.
+
+    ### Features
+    - Supports reading URLs from multiple input files, specified as a comma-separated list.
+    - Allows specifying the column number or name to extract URLs from.
+    - Skips header rows if the first value is not a valid URL.
+    - Integrates with the `ArchivingContext` to manage URL feeding.
+
+    ### Setu N
+    - Input files should be formatted with one URL per line.
+    """
+}
--- a/src/auto_archiver/modules/csv_feeder/csv_feeder.py
+++ b/src/auto_archiver/modules/csv_feeder/csv_feeder.py
@@ -1,12 +1,15 @@
 from loguru import logger
 import csv

-from . import Feeder
-from ..core import Metadata, ArchivingContext
-from ..utils import url_or_none
+from auto_archiver.feeders import Feeder
+from auto_archiver.core import Metadata, ArchivingContext
+from auto_archiver.utils import url_or_none

 class CSVFeeder(Feeder):

+    name = "csv_feeder"
+
+
    @staticmethod
    def configs() -> dict:
        return {
--- a/src/auto_archiver/modules/gsheet_db/init.py
+++ b/src/auto_archiver/modules/gsheet_db/init.py
--- a/src/auto_archiver/databases/gsheet_db/manifest.py
+++ b/src/auto_archiver/databases/gsheet_db/manifest.py
--- a/src/auto_archiver/modules/gsheet_db/gsheet_db.py
+++ b/src/auto_archiver/modules/gsheet_db/gsheet_db.py
@@ -4,9 +4,9 @@ from urllib.parse import quote

 from loguru import logger

-from . import Database
-from ..core import Metadata, Media, ArchivingContext
-from ..utils import GWorksheet
+from auto_archiver.databases import Database
+from auto_archiver.core import Metadata, Media, ArchivingContext
+from auto_archiver.utils import GWorksheet


 class GsheetsDb(Database):
@@ -20,10 +20,6 @@ class GsheetsDb(Database):
        # without this STEP.__init__ is not called
        super().__init__(config)

-    @staticmethod
-    def configs() -> dict:
-        return {}
-
    def started(self, item: Metadata) -> None:
        logger.warning(f"STARTED {item}")
        gw, row = self._retrieve_gsheet(item)
--- a/src/auto_archiver/modules/gsheet_feeder/init.py
+++ b/src/auto_archiver/modules/gsheet_feeder/init.py
--- a/src/auto_archiver/modules/gsheet_feeder/manifest.py
+++ b/src/auto_archiver/modules/gsheet_feeder/manifest.py
@@ -0,0 +1,40 @@
+{
+    "name": "Google Sheets Feeder",
+    "type": ["feeder"],
+    "requires_setup": True,
+    "external_dependencies": {
+        "python": ["loguru", "gspread", "python-slugify"],
+    },
+    "configs": {
+        "allow_worksheets": {
+            "default": set(),
+            "help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
+            "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+        },
+        "block_worksheets": {
+            "default": set(),
+            "help": "(CSV) explicitly block some worksheets from being processed",
+            "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+        },
+        "use_sheet_names_in_stored_paths": {
+            "default": True,
+            "help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
+        }
+    },
+    "description": """
+    GsheetsFeeder: A Google Sheets-based feeder for the Auto Archiver.
+
+    This reads data from Google Sheets and filters rows based on user-defined rules.
+    The filtered rows are processed into `Metadata` objects.
+
+    ### Features
+    - Validates the sheet structure and filters rows based on input configurations.
+    - Processes only worksheets allowed by the `allow_worksheets` and `block_worksheets` configurations.
+    - Ensures only rows with valid URLs and unprocessed statuses are included for archival.
+    - Supports organizing stored files into folder paths based on sheet and worksheet names.
+
+    ### Notes
+    - Requires a Google Service Account JSON file for authentication. Suggested location is `secrets/gsheets_service_account.json`.
+    - Create the sheet using the template provided in the docs.
+    """
+}
--- a/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
+++ b/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
@@ -14,9 +14,9 @@ from loguru import logger
 from slugify import slugify

 # from . import Enricher
-from . import Feeder
-from ..core import Metadata, ArchivingContext
-from ..utils import Gsheets, GWorksheet
+from auto_archiver.feeders import Feeder
+from auto_archiver.core import Metadata, ArchivingContext
+from auto_archiver.utils import Gsheets, GWorksheet


 class GsheetsFeeder(Gsheets, Feeder):
@@ -27,26 +27,26 @@ class GsheetsFeeder(Gsheets, Feeder):
        super().__init__(config)
        self.gsheets_client = gspread.service_account(filename=self.service_account)

-    @staticmethod
-    def configs() -> dict:
-        return dict(
-            Gsheets.configs(),
-            ** {
-                "allow_worksheets": {
-                    "default": set(),
-                    "help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
-                    "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
-                },
-                "block_worksheets": {
-                    "default": set(),
-                    "help": "(CSV) explicitly block some worksheets from being processed",
-                    "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
-                },
-                "use_sheet_names_in_stored_paths": {
-                    "default": True,
-                    "help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
-                }
-            })
+    # @staticmethod
+    # def configs() -> dict:
+    #     return dict(
+    #         Gsheets.configs(),
+    #         ** {
+    #             "allow_worksheets": {
+    #                 "default": set(),
+    #                 "help": "(CSV) only worksheets whose name is included in allow are included (overrides worksheet_block), leave empty so all are allowed",
+    #                 "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+    #             },
+    #             "block_worksheets": {
+    #                 "default": set(),
+    #                 "help": "(CSV) explicitly block some worksheets from being processed",
+    #                 "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+    #             },
+    #             "use_sheet_names_in_stored_paths": {
+    #                 "default": True,
+    #                 "help": "if True the stored files path will include 'workbook_name/worksheet_name/...'",
+    #             }
+    #         })

    def __iter__(self) -> Metadata:
        sh = self.open_sheet()
--- a/src/auto_archiver/modules/hash_enricher/init.py
+++ b/src/auto_archiver/modules/hash_enricher/init.py
--- a/src/auto_archiver/modules/hash_enricher/manifest.py
+++ b/src/auto_archiver/modules/hash_enricher/manifest.py
@@ -0,0 +1,27 @@
+{
+    "name": "Hash Enricher",
+    "type": ["enricher"],
+    "requires_setup": False,
+    "external_dependencies": {
+                          "python": ["loguru"],
+    },
+    "configs": {
+            "algorithm": {"default": "SHA-256", "help": "hash algorithm to use", "choices": ["SHA-256", "SHA3-512"]},
+            "chunksize": {"default": int(1.6e7), "help": "number of bytes to use when reading files in chunks (if this value is too large you will run out of RAM), default is 16MB"},
+        },
+    "description": """
+Generates cryptographic hashes for media files to ensure data integrity and authenticity.
+
+### Features
+- Calculates cryptographic hashes (SHA-256 or SHA3-512) for media files stored in `Metadata` objects.
+- Ensures content authenticity, integrity validation, and duplicate identification.
+- Efficiently processes large files by reading file bytes in configurable chunk sizes.
+- Supports dynamic configuration of hash algorithms and chunk sizes.
+- Updates media metadata with the computed hash value in the format `<algorithm>:<hash>`.
+
+### Notes
+- Default hash algorithm is SHA-256, but SHA3-512 is also supported.
+- Chunk size defaults to 16 MB but can be adjusted based on memory requirements.
+- Useful for workflows requiring hash-based content validation or deduplication.
+""",
+}
--- a/src/auto_archiver/modules/hash_enricher/hash_enricher.py
+++ b/src/auto_archiver/modules/hash_enricher/hash_enricher.py
@@ -10,8 +10,8 @@ making it suitable for handling large files efficiently.
 import hashlib
 from loguru import logger

-from . import Enricher
-from ..core import Metadata, ArchivingContext
+from auto_archiver.enrichers import Enricher
+from auto_archiver.core import Metadata, ArchivingContext


 class HashEnricher(Enricher):
@@ -45,13 +45,6 @@ class HashEnricher(Enricher):

        ArchivingContext.set("hash_enricher.algorithm", self.algorithm, keep_on_reset=True)

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "algorithm": {"default": "SHA-256", "help": "hash algorithm to use", "choices": ["SHA-256", "SHA3-512"]},
-            "chunksize": {"default": int(1.6e7), "help": "number of bytes to use when reading files in chunks (if this value is too large you will run out of RAM), default is 16MB"},
-        }
-
    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
        logger.debug(f"calculating media hashes for {url=} (using {self.algorithm})")
--- a/src/auto_archiver/modules/instagram_api_archiver/manifest.py
+++ b/src/auto_archiver/modules/instagram_api_archiver/manifest.py
@@ -8,7 +8,7 @@
                    "retrying",
                    "tqdm",],
         },
-    "no_setup_required": False,
+    "requires_setup": True,
    "configs": {
        "access_token": {"default": None, "help": "a valid instagrapi-api token"},
        "api_endpoint": {"default": None, "help": "API endpoint to use"},
@@ -25,5 +25,22 @@
            "help": "if true, will remove empty values from the json output",
        },
    },
-    "description": "",
+    "description": """
+Archives various types of Instagram content using the Instagrapi API.
+
+### Features
+- Connects to an Instagrapi API deployment to fetch Instagram profiles, posts, stories, highlights, reels, and tagged content.
+- Supports advanced configuration options, including:
+  - Full profile download (all posts, stories, highlights, and tagged content).
+  - Limiting the number of posts to fetch for large profiles.
+  - Minimising JSON output to remove empty fields and redundant data.
+- Provides robust error handling and retries for API calls.
+- Ensures efficient media scraping, including handling nested or carousel media items.
+- Adds downloaded media and metadata to the result for further processing.
+
+### Notes
+- Requires a valid Instagrapi API token (`access_token`) and API endpoint (`api_endpoint`).
+- Full-profile downloads can be limited by setting `full_profile_max_posts`.
+- Designed to fetch content in batches for large profiles, minimising API load.
+""",
 }
--- a/src/auto_archiver/modules/instagram_api_archiver/instagram_api_archiver.py
+++ b/src/auto_archiver/modules/instagram_api_archiver/instagram_api_archiver.py
@@ -45,25 +45,6 @@ class InstagramAPIArchiver(Archiver):
        self.full_profile = bool(self.full_profile)
        self.minimize_json_output = bool(self.minimize_json_output)

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "access_token": {"default": None, "help": "a valid instagrapi-api token"},
-            "api_endpoint": {"default": None, "help": "API endpoint to use"},
-            "full_profile": {
-                "default": False,
-                "help": "if true, will download all posts, tagged posts, stories, and highlights for a profile, if false, will only download the profile pic and information.",
-            },
-            "full_profile_max_posts": {
-                "default": 0,
-                "help": "Use to limit the number of posts to download when full_profile is true. 0 means no limit. limit is applied softly since posts are fetched in batch, once to: posts, tagged posts, and highlights",
-            },
-            "minimize_json_output": {
-                "default": True,
-                "help": "if true, will remove empty values from the json output",
-            },
-        }
-
    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()

--- a/src/auto_archiver/modules/instagram_archiver/manifest.py
+++ b/src/auto_archiver/modules/instagram_archiver/manifest.py
@@ -3,10 +3,12 @@
    "type": ["extractor"],
    "entry_point": "instagram_archiver:InstagramArchiver",
    "external_dependencies": {
-        "python": ["instaloader",
-                   "loguru",],
+        "python": [
+            "instaloader",
+            "loguru",
+        ],
    },
-    "no_setup_required": False,
+    "requires_setup": True,
    "configs": {
        "username": {"default": None, "help": "a valid Instagram username"},
        "password": {
--- a/src/auto_archiver/modules/instagram_archiver/instagram_archiver.py
+++ b/src/auto_archiver/modules/instagram_archiver/instagram_archiver.py
@@ -45,16 +45,7 @@ class InstagramArchiver(Archiver):
            except Exception as e2:
                logger.error(f"Unable to finish login (retrying from file): {e2}\n{traceback.format_exc()}")

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "username": {"default": None, "help": "a valid Instagram username"},
-            "password": {"default": None, "help": "the corresponding Instagram account password"},
-            "download_folder": {"default": "instaloader", "help": "name of a folder to temporarily download content to"},
-            "session_file": {"default": "secrets/instaloader.session", "help": "path to the instagram session which saves session credentials"},
-            #TODO: fine-grain
-            # "download_stories": {"default": True, "help": "if the link is to a user profile: whether to get stories information"},
-        }
+

    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()
--- a/src/auto_archiver/modules/instagram_tbot_archiver/instagram_tbot_archiver.py
+++ b/src/auto_archiver/modules/instagram_tbot_archiver/instagram_tbot_archiver.py
@@ -34,15 +34,6 @@ class InstagramTbotArchiver(Archiver):
        self.assert_valid_string("api_hash")
        self.timeout = int(self.timeout)

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "api_id": {"default": None, "help": "telegram API_ID value, go to https://my.telegram.org/apps"},
-            "api_hash": {"default": None, "help": "telegram API_HASH value, go to https://my.telegram.org/apps"},
-            "session_file": {"default": "secrets/anon-insta", "help": "optional, records the telegram login session for future usage, '.session' will be appended to the provided value."},
-            "timeout": {"default": 45, "help": "timeout to fetch the instagram content in seconds."},
-        }
-
    def setup(self) -> None:
        """
        1. makes a copy of session_file that is removed in cleanup
--- a/src/auto_archiver/modules/meta_enricher/init.py
+++ b/src/auto_archiver/modules/meta_enricher/init.py
--- a/src/auto_archiver/modules/meta_enricher/manifest.py
+++ b/src/auto_archiver/modules/meta_enricher/manifest.py
@@ -0,0 +1,22 @@
+{
+    "name": "Archive Metadata Enricher",
+    "type": ["enricher"],
+    "requires_setup": False,
+    "external_dependencies": {
+                          "python": ["loguru"],
+    },
+    "description": """ 
+    Adds metadata information about the archive operations, Adds metadata about archive operations, including file sizes and archive duration./
+    To be included at the end of all enrichments.
+    
+    ### Features
+- Calculates the total size of all archived media files, storing the result in human-readable and byte formats.
+- Computes the duration of the archival process, storing the elapsed time in seconds.
+- Ensures all enrichments are performed only if the `Metadata` object contains valid data.
+- Adds detailed metadata to provide insights into file sizes and archival performance.
+
+### Notes
+- Skips enrichment if no media or metadata is available in the `Metadata` object.
+- File sizes are calculated using the `os.stat` module, ensuring accurate byte-level reporting.
+""",
+}
--- a/src/auto_archiver/modules/meta_enricher/meta_enricher.py
+++ b/src/auto_archiver/modules/meta_enricher/meta_enricher.py
@@ -2,8 +2,8 @@ import datetime
 import os
 from loguru import logger

-from . import Enricher
-from ..core import Metadata
+from auto_archiver.enrichers import Enricher
+from auto_archiver.core import Metadata


 class MetaEnricher(Enricher):
@@ -17,10 +17,6 @@ class MetaEnricher(Enricher):
        # without this STEP.__init__ is not called
        super().__init__(config)

-    @staticmethod
-    def configs() -> dict:
-        return {}
-
    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
        if to_enrich.is_empty():
@@ -28,7 +24,7 @@ class MetaEnricher(Enricher):
            return

        logger.debug(f"calculating archive metadata information for {url=}")
-        
+
        self.enrich_file_sizes(to_enrich)
        self.enrich_archive_duration(to_enrich)

@@ -40,10 +36,10 @@ class MetaEnricher(Enricher):
            media.set("bytes", file_stats.st_size)
            media.set("size", self.human_readable_bytes(file_stats.st_size))
            total_size += file_stats.st_size
-        
+
        to_enrich.set("total_bytes", total_size)
        to_enrich.set("total_size", self.human_readable_bytes(total_size))
-        
+

    def human_readable_bytes(self, size: int) -> str:
        # receives number of bytes and returns human readble size
--- a/src/auto_archiver/modules/metadata_enricher/init.py
+++ b/src/auto_archiver/modules/metadata_enricher/init.py
--- a/src/auto_archiver/modules/metadata_enricher/manifest.py
+++ b/src/auto_archiver/modules/metadata_enricher/manifest.py
@@ -0,0 +1,22 @@
+{
+    "name": "Media Metadata Enricher",
+    "type": ["enricher"],
+    "requires_setup": False,
+    "external_dependencies": {
+        "python": ["loguru"], 
+        "bin": ["exiftool"]
+
+    },
+    "description": """
+    Extracts metadata information from files using ExifTool.
+
+    ### Features
+    - Uses ExifTool to extract detailed metadata from media files.
+    - Processes file-specific data like camera settings, geolocation, timestamps, and other embedded metadata.
+    - Adds extracted metadata to the corresponding `Media` object within the `Metadata`.
+
+    ### Notes
+    - Requires ExifTool to be installed and accessible via the system's PATH.
+    - Skips enrichment for files where metadata extraction fails.
+    """
+}
--- a/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
+++ b/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
@@ -2,8 +2,8 @@ import subprocess
 import traceback
 from loguru import logger

-from . import Enricher
-from ..core import Metadata
+from auto_archiver.enrichers import Enricher
+from auto_archiver.core import Metadata


 class MetadataEnricher(Enricher):
@@ -16,9 +16,6 @@ class MetadataEnricher(Enricher):
        # without this STEP.__init__ is not called
        super().__init__(config)

-    @staticmethod
-    def configs() -> dict:
-        return {}

    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
--- a/src/auto_archiver/modules/pdq_hash_enricher/init.py
+++ b/src/auto_archiver/modules/pdq_hash_enricher/init.py
--- a/src/auto_archiver/modules/pdq_hash_enricher/manifest.py
+++ b/src/auto_archiver/modules/pdq_hash_enricher/manifest.py
@@ -0,0 +1,21 @@
+{
+    "name": "PDQ Hash Enricher",
+    "type": ["enricher"],
+    "requires_setup": False,
+    "external_dependencies": {
+        "python": ["loguru", "pdqhash", "numpy", "Pillow"],
+    },
+    "description": """
+    PDQ Hash Enricher for generating perceptual hashes of media files.
+
+    ### Features
+    - Calculates perceptual hashes for image files using the PDQ hashing algorithm.
+    - Enables detection of duplicate or near-duplicate visual content.
+    - Processes images stored in `Metadata` objects, adding computed hashes to the corresponding `Media` entries.
+    - Skips non-image media or files unsuitable for hashing (e.g., corrupted or unsupported formats).
+
+    ### Notes
+    - Best used after enrichers like `thumbnail_enricher` or `screenshot_enricher` to ensure images are available.
+    - Uses the `pdqhash` library to compute 256-bit perceptual hashes, which are stored as hexadecimal strings.
+    """
+}
--- a/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
+++ b/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
@@ -16,8 +16,8 @@ import numpy as np
 from PIL import Image, UnidentifiedImageError
 from loguru import logger

-from . import Enricher
-from ..core import Metadata
+from auto_archiver.enrichers import Enricher
+from auto_archiver.core import Metadata


 class PdqHashEnricher(Enricher):
@@ -31,10 +31,6 @@ class PdqHashEnricher(Enricher):
        # Without this STEP.__init__ is not called
        super().__init__(config)

-    @staticmethod
-    def configs() -> dict:
-        return {}
-
    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
        logger.debug(f"calculating perceptual hashes for {url=}")
--- a/src/auto_archiver/modules/screenshot_enricher/init.py
+++ b/src/auto_archiver/modules/screenshot_enricher/init.py
--- a/src/auto_archiver/modules/screenshot_enricher/manifest.py
+++ b/src/auto_archiver/modules/screenshot_enricher/manifest.py
@@ -0,0 +1,30 @@
+{
+    "name": "Screenshot Enricher",
+    "type": ["enricher"],
+    "requires_setup": True,
+    "external_dependencies": {
+        "python": ["loguru", "selenium"],
+        "bin": ["chromedriver"]
+    },
+    "configs": {
+            "width": {"default": 1280, "help": "width of the screenshots"},
+            "height": {"default": 720, "help": "height of the screenshots"},
+            "timeout": {"default": 60, "help": "timeout for taking the screenshot"},
+            "sleep_before_screenshot": {"default": 4, "help": "seconds to wait for the pages to load before taking screenshot"},
+            "http_proxy": {"default": "", "help": "http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port"},
+            "save_to_pdf": {"default": False, "help": "save the page as pdf along with the screenshot. PDF saving options can be adjusted with the 'print_options' parameter"},
+            "print_options": {"default": {}, "help": "options to pass to the pdf printer"}
+        },
+    "description": """
+    Captures screenshots and optionally saves web pages as PDFs using a WebDriver.
+
+    ### Features
+    - Takes screenshots of web pages, with configurable width, height, and timeout settings.
+    - Optionally saves pages as PDFs, with additional configuration for PDF printing options.
+    - Bypasses URLs detected as authentication walls.
+    - Integrates seamlessly with the metadata enrichment pipeline, adding screenshots and PDFs as media.
+
+    ### Notes
+    - Requires a WebDriver (e.g., ChromeDriver) installed and accessible via the system's PATH.
+    """
+}
--- a/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
+++ b/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
@@ -5,24 +5,30 @@ import base64
 from selenium.common.exceptions import TimeoutException


-from . import Enricher
-from ..utils import Webdriver, UrlUtil, random_str  
-from ..core import Media, Metadata, ArchivingContext
+from auto_archiver.enrichers import Enricher
+from auto_archiver.utils import Webdriver, UrlUtil, random_str
+from auto_archiver.core import Media, Metadata, ArchivingContext

 class ScreenshotEnricher(Enricher):
    name = "screenshot_enricher"

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "width": {"default": 1280, "help": "width of the screenshots"},
-            "height": {"default": 720, "help": "height of the screenshots"},
-            "timeout": {"default": 60, "help": "timeout for taking the screenshot"},
-            "sleep_before_screenshot": {"default": 4, "help": "seconds to wait for the pages to load before taking screenshot"},
-            "http_proxy": {"default": "", "help": "http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port"},
-            "save_to_pdf": {"default": False, "help": "save the page as pdf along with the screenshot. PDF saving options can be adjusted with the 'print_options' parameter"},
-            "print_options": {"default": {}, "help": "options to pass to the pdf printer"}
-        }
+    def __init__(self, config: dict) -> None:
+        super().__init__(config)
+    #     TODO?
+
+
+
+    # @staticmethod
+    # def configs() -> dict:
+    #     return {
+    #         "width": {"default": 1280, "help": "width of the screenshots"},
+    #         "height": {"default": 720, "help": "height of the screenshots"},
+    #         "timeout": {"default": 60, "help": "timeout for taking the screenshot"},
+    #         "sleep_before_screenshot": {"default": 4, "help": "seconds to wait for the pages to load before taking screenshot"},
+    #         "http_proxy": {"default": "", "help": "http proxy to use for the webdriver, eg http://proxy-user:password@proxy-ip:port"},
+    #         "save_to_pdf": {"default": False, "help": "save the page as pdf along with the screenshot. PDF saving options can be adjusted with the 'print_options' parameter"},
+    #         "print_options": {"default": {}, "help": "options to pass to the pdf printer"}
+    #     }

    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
--- a/src/auto_archiver/modules/ssl_enricher/init.py
+++ b/src/auto_archiver/modules/ssl_enricher/init.py
--- a/src/auto_archiver/modules/ssl_enricher/manifest.py
+++ b/src/auto_archiver/modules/ssl_enricher/manifest.py
@@ -0,0 +1,22 @@
+{
+    "name": "SSL Certificate Enricher",
+    "type": ["enricher"],
+    "requires_setup": False,
+    "external_dependencies": {
+        "python": ["loguru", "python-slugify"],
+    },
+    "configs": {
+        "skip_when_nothing_archived": {"default": True, "help": "if true, will skip enriching when no media is archived"},
+    },
+    "description": """
+    Retrieves SSL certificate information for a domain and stores it as a file.
+
+    ### Features
+    - Fetches SSL certificates for domains using the HTTPS protocol.
+    - Stores certificates in PEM format and adds them as media to the metadata.
+    - Skips enrichment if no media has been archived, based on the `skip_when_nothing_archived` configuration.
+
+    ### Notes
+    - Requires the target URL to use the HTTPS scheme; other schemes are not supported.
+    """
+}
--- a/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
+++ b/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
@@ -3,8 +3,8 @@ from slugify import slugify
 from urllib.parse import urlparse
 from loguru import logger

-from . import Enricher
-from ..core import Metadata, ArchivingContext, Media
+from auto_archiver.enrichers import Enricher
+from auto_archiver.core import Metadata, ArchivingContext, Media


 class SSLEnricher(Enricher):
@@ -15,13 +15,7 @@ class SSLEnricher(Enricher):

    def __init__(self, config: dict) -> None:
        super().__init__(config)
-        self. skip_when_nothing_archived = bool(self.skip_when_nothing_archived)
-
-    @staticmethod
-    def configs() -> dict:
-        return {
-            "skip_when_nothing_archived": {"default": True, "help": "if true, will skip enriching when no media is archived"},
-        }
+        self.skip_when_nothing_archived = bool(self.skip_when_nothing_archived)

    def enrich(self, to_enrich: Metadata) -> None:
        if not to_enrich.media and self.skip_when_nothing_archived: return
--- a/src/auto_archiver/modules/telegram_archiver/telegram_archiver.py
+++ b/src/auto_archiver/modules/telegram_archiver/telegram_archiver.py
@@ -16,9 +16,6 @@ class TelegramArchiver(Archiver):
    def __init__(self, config: dict) -> None:
        super().__init__(config)

-    @staticmethod
-    def configs() -> dict:
-        return {}

    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()
--- a/src/auto_archiver/modules/telethon_archiver/manifest.py
+++ b/src/auto_archiver/modules/telethon_archiver/manifest.py
@@ -21,7 +21,7 @@
                "default": {},
                "help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup",
                # TODO
-                #"cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
+                "cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
            }
        },
    "description": """
--- a/src/auto_archiver/modules/telethon_archiver/telethon_archiver.py
+++ b/src/auto_archiver/modules/telethon_archiver/telethon_archiver.py
@@ -23,20 +23,6 @@ class TelethonArchiver(Archiver):
        self.assert_valid_string("api_id")
        self.assert_valid_string("api_hash")

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "api_id": {"default": None, "help": "telegram API_ID value, go to https://my.telegram.org/apps"},
-            "api_hash": {"default": None, "help": "telegram API_HASH value, go to https://my.telegram.org/apps"},
-            "bot_token": {"default": None, "help": "optional, but allows access to more content such as large videos, talk to @botfather"},
-            "session_file": {"default": "secrets/anon", "help": "optional, records the telegram login session for future usage, '.session' will be appended to the provided value."},
-            "join_channels": {"default": True, "help": "disables the initial setup with channel_invites config, useful if you have a lot and get stuck"},
-            "channel_invites": {
-                "default": {},
-                "help": "(JSON string) private channel invite links (format: t.me/joinchat/HASH OR t.me/+HASH) and (optional but important to avoid hanging for minutes on startup) channel id (format: CHANNEL_ID taken from a post url like https://t.me/c/CHANNEL_ID/1), the telegram account will join any new channels on setup",
-                "cli_set": lambda cli_val, cur_val: dict(cur_val, **json.loads(cli_val))
-            }
-        }

    def setup(self) -> None:
        """
--- a/src/auto_archiver/modules/thumbnail_enricher/init.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/init.py
--- a/src/auto_archiver/modules/thumbnail_enricher/manifest.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/manifest.py
@@ -0,0 +1,27 @@
+{
+    "name": "Thumbnail Enricher",
+    "type": ["enricher"],
+    "requires_setup": False,
+    "external_dependencies": {
+        "python": ["loguru", "ffmpeg-python"],
+        "bin": ["ffmpeg"]
+    },
+    "configs": {
+            "thumbnails_per_minute": {"default": 60, "help": "how many thumbnails to generate per minute of video, can be limited by max_thumbnails"},
+            "max_thumbnails": {"default": 16, "help": "limit the number of thumbnails to generate per video, 0 means no limit"},
+        },
+    "description": """
+    Generates thumbnails for video files to provide visual previews.
+
+    ### Features
+    - Processes video files and generates evenly distributed thumbnails.
+    - Calculates the number of thumbnails based on video duration, `thumbnails_per_minute`, and `max_thumbnails`.
+    - Distributes thumbnails equally across the video's duration and stores them as media objects.
+    - Adds metadata for each thumbnail, including timestamps and IDs.
+
+    ### Notes
+    - Requires `ffmpeg` to be installed and accessible via the system's PATH.
+    - Handles videos without pre-existing duration metadata by probing with `ffmpeg`.
+    - Skips enrichment for non-video media files.
+    """
+}
--- a/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
@@ -9,9 +9,9 @@ and identify important moments without watching the entire video.
 import ffmpeg, os
 from loguru import logger

-from . import Enricher
-from ..core import Media, Metadata, ArchivingContext
-from ..utils.misc import random_str
+from auto_archiver.enrichers import Enricher
+from auto_archiver.core import Media, Metadata, ArchivingContext
+from auto_archiver.utils.misc import random_str


 class ThumbnailEnricher(Enricher):
@@ -25,13 +25,6 @@ class ThumbnailEnricher(Enricher):
        super().__init__(config)
        self.thumbnails_per_second = int(self.thumbnails_per_minute) / 60
        self.max_thumbnails = int(self.max_thumbnails)
-
-    @staticmethod
-    def configs() -> dict:
-        return {
-            "thumbnails_per_minute": {"default": 60, "help": "how many thumbnails to generate per minute of video, can be limited by max_thumbnails"},
-            "max_thumbnails": {"default": 16, "help": "limit the number of thumbnails to generate per video, 0 means no limit"},
-        }
    
    def enrich(self, to_enrich: Metadata) -> None:
        """
--- a/src/auto_archiver/modules/timestamping_enricher/init.py
+++ b/src/auto_archiver/modules/timestamping_enricher/init.py
--- a/src/auto_archiver/modules/timestamping_enricher/manifest.py
+++ b/src/auto_archiver/modules/timestamping_enricher/manifest.py
@@ -0,0 +1,40 @@
+{
+    "name": "Timestamping Enricher",
+    "type": ["enricher"],
+    "requires_setup": True,
+    "external_dependencies": {
+        "python": [
+            "loguru",
+            "slugify",
+            "tsp_client",
+            "asn1crypto",
+            "certvalidator",
+            "certifi"
+        ],
+    },
+    "configs": {
+        "tsa_urls": {
+            "default": [
+                "http://timestamp.digicert.com",
+                "http://timestamp.identrust.com",
+                "http://timestamp.globalsign.com/tsa/r6advanced1",
+                "http://tss.accv.es:8318/tsa"
+            ],
+            "help": "List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.",
+            "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+        }
+    },
+    "description": """
+    Generates RFC3161-compliant timestamp tokens using Time Stamp Authorities (TSA) for archived files.
+
+    ### Features
+    - Creates timestamp tokens to prove the existence of files at a specific time, useful for legal and authenticity purposes.
+    - Aggregates file hashes into a text file and timestamps the concatenated data.
+    - Uses multiple Time Stamp Authorities (TSAs) to ensure reliability and redundancy.
+    - Validates timestamping certificates against trusted Certificate Authorities (CAs) using the `certifi` trust store.
+
+    ### Notes
+    - Should be run after the `hash_enricher` to ensure file hashes are available.
+    - Requires internet access to interact with the configured TSAs.
+    """
+}
--- a/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
+++ b/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
@@ -8,9 +8,9 @@ from certvalidator import CertificateValidator, ValidationContext
 from asn1crypto import pem
 import certifi

-from . import Enricher
-from ..core import Metadata, ArchivingContext, Media
-from ..archivers import Archiver
+from auto_archiver.enrichers import Enricher
+from auto_archiver.core import Metadata, ArchivingContext, Media
+from auto_archiver.archivers import Archiver


 class TimestampingEnricher(Enricher):
@@ -26,36 +26,36 @@ class TimestampingEnricher(Enricher):
    def __init__(self, config: dict) -> None:
        super().__init__(config)

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "tsa_urls": { 
-                "default": [
-                    # [Adobe Approved Trust List] and [Windows Cert Store]
-                    "http://timestamp.digicert.com", 
-                    "http://timestamp.identrust.com",
-                    # "https://timestamp.entrust.net/TSS/RFC3161sha2TS", # not valid for timestamping
-                    # "https://timestamp.sectigo.com", # wait 15 seconds between each request.
-
-                    # [Adobe: European Union Trusted Lists].
-                    # "https://timestamp.sectigo.com/qualified", # wait 15 seconds between each request.
-                    
-                    # [Windows Cert Store]
-                    "http://timestamp.globalsign.com/tsa/r6advanced1",
-                    
-                    # [Adobe: European Union Trusted Lists] and [Windows Cert Store]
-                    # "http://ts.quovadisglobal.com/eu", # not valid for timestamping
-                    # "http://tsa.belgium.be/connect", # self-signed certificate in certificate chain
-                    # "https://timestamp.aped.gov.gr/qtss", # self-signed certificate in certificate chain
-                    # "http://tsa.sep.bg", # self-signed certificate in certificate chain
-                    # "http://tsa.izenpe.com", #unable to get local issuer certificate
-                    # "http://kstamp.keynectis.com/KSign", # unable to get local issuer certificate
-                    "http://tss.accv.es:8318/tsa",
-                ], 
-                "help": "List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.", 
-                "cli_set": lambda cli_val, cur_val: set(cli_val.split(",")) 
-            }
-        }
+    # @staticmethod
+    # def configs() -> dict:
+    #     return {
+    #         "tsa_urls": {
+    #             "default": [
+    #                 # [Adobe Approved Trust List] and [Windows Cert Store]
+    #                 "http://timestamp.digicert.com",
+    #                 "http://timestamp.identrust.com",
+    #                 # "https://timestamp.entrust.net/TSS/RFC3161sha2TS", # not valid for timestamping
+    #                 # "https://timestamp.sectigo.com", # wait 15 seconds between each request.
+    #
+    #                 # [Adobe: European Union Trusted Lists].
+    #                 # "https://timestamp.sectigo.com/qualified", # wait 15 seconds between each request.
+    #
+    #                 # [Windows Cert Store]
+    #                 "http://timestamp.globalsign.com/tsa/r6advanced1",
+    #
+    #                 # [Adobe: European Union Trusted Lists] and [Windows Cert Store]
+    #                 # "http://ts.quovadisglobal.com/eu", # not valid for timestamping
+    #                 # "http://tsa.belgium.be/connect", # self-signed certificate in certificate chain
+    #                 # "https://timestamp.aped.gov.gr/qtss", # self-signed certificate in certificate chain
+    #                 # "http://tsa.sep.bg", # self-signed certificate in certificate chain
+    #                 # "http://tsa.izenpe.com", #unable to get local issuer certificate
+    #                 # "http://kstamp.keynectis.com/KSign", # unable to get local issuer certificate
+    #                 "http://tss.accv.es:8318/tsa",
+    #             ],
+    #             "help": "List of RFC3161 Time Stamp Authorities to use, separate with commas if passed via the command line.",
+    #             "cli_set": lambda cli_val, cur_val: set(cli_val.split(","))
+    #         }
+    #     }

    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()
--- a/src/auto_archiver/modules/twitter_api_archiver/manifest.py
+++ b/src/auto_archiver/modules/twitter_api_archiver/manifest.py
@@ -12,7 +12,8 @@
    },
    "configs": {
            "bearer_token": {"default": None, "help": "[deprecated: see bearer_tokens] twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret"},
-            "bearer_tokens": {"default": [], "help": " a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line"},
+            "bearer_tokens": {"default": [], "help": " a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line",
+                              "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))},
            "consumer_key": {"default": None, "help": "twitter API consumer_key"},
            "consumer_secret": {"default": None, "help": "twitter API consumer_secret"},
            "access_token": {"default": None, "help": "twitter API access_token"},
--- a/src/auto_archiver/modules/twitter_api_archiver/twitter_api_archiver.py
+++ b/src/auto_archiver/modules/twitter_api_archiver/twitter_api_archiver.py
@@ -34,17 +34,6 @@ class TwitterApiArchiver(Archiver):
                             access_token=self.access_token, access_secret=self.access_secret))
        assert self.api_client is not None, "Missing Twitter API configurations, please provide either AND/OR (consumer_key, consumer_secret, access_token, access_secret) to use this archiver, you can provide both for better rate-limit results."

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "bearer_token": {"default": None, "help": "[deprecated: see bearer_tokens] twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret"},
-            "bearer_tokens": {"default": [], "help": " a list of twitter API bearer_token which is enough for archiving, if not provided you will need consumer_key, consumer_secret, access_token, access_secret, if provided you can still add those for better rate limits. CSV of bearer tokens if provided via the command line", "cli_set": lambda cli_val, cur_val: list(set(cli_val.split(",")))},
-            "consumer_key": {"default": None, "help": "twitter API consumer_key"},
-            "consumer_secret": {"default": None, "help": "twitter API consumer_secret"},
-            "access_token": {"default": None, "help": "twitter API access_token"},
-            "access_secret": {"default": None, "help": "twitter API access_secret"},
-        }
-    
    @property  # getter .mimetype
    def api_client(self) -> str:
        return self.apis[self.api_index]
--- a/src/auto_archiver/modules/vk_archiver/vk_archiver.py
+++ b/src/auto_archiver/modules/vk_archiver/vk_archiver.py
@@ -19,14 +19,6 @@ class VkArchiver(Archiver):
        self.assert_valid_string("password")
        self.vks = VkScraper(self.username, self.password, session_file=self.session_file)

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "username": {"default": None, "help": "valid VKontakte username"},
-            "password": {"default": None, "help": "valid VKontakte password"},
-            "session_file": {"default": "secrets/vk_config.v2.json", "help": "valid VKontakte password"},
-        }
-
    def download(self, item: Metadata) -> Metadata:
        url = item.get_url()

--- a/src/auto_archiver/modules/wacz_enricher/init.py
+++ b/src/auto_archiver/modules/wacz_enricher/init.py
--- a/src/auto_archiver/modules/wacz_enricher/manifest.py
+++ b/src/auto_archiver/modules/wacz_enricher/manifest.py
@@ -0,0 +1,39 @@
+{
+    "name": "WACZ Enricher",
+    "type": ["enricher", "archiver"],
+    "requires_setup": True,
+    "external_dependencies": {
+        "python": [
+            "loguru",
+            "jsonlines",
+            "warcio"
+        ],
+        # TODO?
+        "bin": [
+            "docker"
+        ]
+    },
+    "configs": {
+            "profile": {"default": None, "help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles)."},
+            "docker_commands": {"default": None, "help":"if a custom docker invocation is needed"},
+            "timeout": {"default": 120, "help": "timeout for WACZ generation in seconds"},
+            "extract_media": {"default": False, "help": "If enabled all the images/videos/audio present in the WACZ archive will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched."},
+            "extract_screenshot": {"default": True, "help": "If enabled the screenshot captured by browsertrix will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched."},
+            "socks_proxy_host": {"default": None, "help": "SOCKS proxy host for browsertrix-crawler, use in combination with socks_proxy_port. eg: user:password@host"},
+            "socks_proxy_port": {"default": None, "help": "SOCKS proxy port for browsertrix-crawler, use in combination with socks_proxy_host. eg 1234"},
+            "proxy_server": {"default": None, "help": "SOCKS server proxy URL, in development"},
+        },
+    "description": """
+    Creates .WACZ archives of web pages using the `browsertrix-crawler` tool, with options for media extraction and screenshot saving.
+
+    ### Features
+    - Archives web pages into .WACZ format using Docker or direct invocation of `browsertrix-crawler`.
+    - Supports custom profiles for archiving private or dynamic content.
+    - Extracts media (images, videos, audio) and screenshots from the archive, optionally adding them to the enrichment pipeline.
+    - Generates metadata from the archived page's content and structure (e.g., titles, text).
+
+    ### Notes
+    - Requires Docker for running `browsertrix-crawler` unless explicitly disabled.
+    - Configurable via parameters for timeout, media extraction, screenshots, and proxy settings.
+    """
+}
--- a/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
+++ b/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
@@ -5,10 +5,10 @@ from zipfile import ZipFile
 from loguru import logger
 from warcio.archiveiterator import ArchiveIterator

-from ..core import Media, Metadata, ArchivingContext
-from . import Enricher
-from ..archivers import Archiver
-from ..utils import UrlUtil, random_str
+from auto_archiver.core import Media, Metadata, ArchivingContext
+from auto_archiver.enrichers import Enricher
+from auto_archiver.archivers import Archiver
+from auto_archiver.utils import UrlUtil, random_str


 class WaczArchiverEnricher(Enricher, Archiver):
@@ -24,19 +24,6 @@ class WaczArchiverEnricher(Enricher, Archiver):
        # without this STEP.__init__ is not called
        super().__init__(config)

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "profile": {"default": None, "help": "browsertrix-profile (for profile generation see https://github.com/webrecorder/browsertrix-crawler#creating-and-using-browser-profiles)."},
-            "docker_commands": {"default": None, "help":"if a custom docker invocation is needed"},
-            "timeout": {"default": 120, "help": "timeout for WACZ generation in seconds"},
-            "extract_media": {"default": False, "help": "If enabled all the images/videos/audio present in the WACZ archive will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched."},
-            "extract_screenshot": {"default": True, "help": "If enabled the screenshot captured by browsertrix will be extracted into separate Media and appear in the html report. The .wacz file will be kept untouched."},
-            "socks_proxy_host": {"default": None, "help": "SOCKS proxy host for browsertrix-crawler, use in combination with socks_proxy_port. eg: user:password@host"},
-            "socks_proxy_port": {"default": None, "help": "SOCKS proxy port for browsertrix-crawler, use in combination with socks_proxy_host. eg 1234"},
-            "proxy_server": {"default": None, "help": "SOCKS server proxy URL, in development"},
-        }
-    
    def setup(self) -> None:
        self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER')
        self.docker_in_docker = os.environ.get('WACZ_ENABLE_DOCKER') and os.environ.get('RUNNING_IN_DOCKER')
--- a/src/auto_archiver/modules/wayback_enricher/init.py
+++ b/src/auto_archiver/modules/wayback_enricher/init.py
--- a/src/auto_archiver/modules/wayback_enricher/manifest.py
+++ b/src/auto_archiver/modules/wayback_enricher/manifest.py
@@ -0,0 +1,29 @@
+{
+    "name": "Wayback Machine Enricher",
+    "type": ["enricher", "archiver"],
+    "requires_setup": True,
+    "external_dependencies": {
+        "python": ["loguru", "requests"],
+    },
+    "configs": {
+        "timeout": {"default": 15, "help": "seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually."},
+        "if_not_archived_within": {"default": None, "help": "only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA"},
+        "key": {"default": None, "help": "wayback API key. to get credentials visit https://archive.org/account/s3.php"},
+        "secret": {"default": None, "help": "wayback API secret. to get credentials visit https://archive.org/account/s3.php"},
+        "proxy_http": {"default": None, "help": "http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port"},
+        "proxy_https": {"default": None, "help": "https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port"},
+    },
+    "description": """
+    Submits the current URL to the Wayback Machine for archiving and returns either a job ID or the completed archive URL.
+
+    ### Features
+    - Archives URLs using the Internet Archive's Wayback Machine API.
+    - Supports conditional archiving based on the existence of prior archives within a specified time range.
+    - Provides proxies for HTTP and HTTPS requests.
+    - Fetches and confirms the archive URL or provides a job ID for later status checks.
+
+    ### Notes
+    - Requires a valid Wayback Machine API key and secret.
+    - Handles rate-limiting by Wayback Machine and retries status checks with exponential backoff.
+    """
+}
--- a/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py
+++ b/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py
@@ -2,10 +2,10 @@ import json
 from loguru import logger
 import time, requests

-from . import Enricher
-from ..archivers import Archiver
-from ..utils import UrlUtil
-from ..core import Metadata
+from auto_archiver.enrichers import Enricher
+from auto_archiver.archivers import Archiver
+from auto_archiver.utils import UrlUtil
+from auto_archiver.core import Metadata

 class WaybackArchiverEnricher(Enricher, Archiver):
    """
@@ -21,17 +21,6 @@ class WaybackArchiverEnricher(Enricher, Archiver):
        assert type(self.secret) == str and len(self.secret) > 0, "please provide a value for the wayback_enricher API key"
        assert type(self.secret) == str and len(self.secret) > 0, "please provide a value for the wayback_enricher API secret"

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "timeout": {"default": 15, "help": "seconds to wait for successful archive confirmation from wayback, if more than this passes the result contains the job_id so the status can later be checked manually."},
-            "if_not_archived_within": {"default": None, "help": "only tell wayback to archive if no archive is available before the number of seconds specified, use None to ignore this option. For more information: https://docs.google.com/document/d/1Nsv52MvSjbLb2PCpHlat0gkzw0EvtSgpKHu4mk0MnrA"},
-            "key": {"default": None, "help": "wayback API key. to get credentials visit https://archive.org/account/s3.php"},
-            "secret": {"default": None, "help": "wayback API secret. to get credentials visit https://archive.org/account/s3.php"},
-            "proxy_http": {"default": None, "help": "http proxy to use for wayback requests, eg http://proxy-user:password@proxy-ip:port"},
-            "proxy_https": {"default": None, "help": "https proxy to use for wayback requests, eg https://proxy-user:password@proxy-ip:port"},
-        }
-
    def download(self, item: Metadata) -> Metadata:
        # this new Metadata object is required to avoid duplication
        result = Metadata()
--- a/src/auto_archiver/modules/whisper_enricher/init.py
+++ b/src/auto_archiver/modules/whisper_enricher/init.py
--- a/src/auto_archiver/modules/whisper_enricher/manifest.py
+++ b/src/auto_archiver/modules/whisper_enricher/manifest.py
@@ -0,0 +1,30 @@
+{
+    "name": "Whisper Enricher",
+    "type": ["enricher"],
+    "requires_setup": True,
+    "external_dependencies": {
+        "python": ["loguru", "requests"],
+    },
+    "configs": {
+        "api_endpoint": {"default": None, "help": "WhisperApi api endpoint, eg: https://whisperbox-api.com/api/v1, a deployment of https://github.com/bellingcat/whisperbox-transcribe."},
+        "api_key": {"default": None, "help": "WhisperApi api key for authentication"},
+        "include_srt": {"default": False, "help": "Whether to include a subtitle SRT (SubRip Subtitle file) for the video (can be used in video players)."},
+        "timeout": {"default": 90, "help": "How many seconds to wait at most for a successful job completion."},
+        "action": {"default": "translate", "help": "which Whisper operation to execute", "choices": ["transcribe", "translate", "language_detection"]},
+    },
+    "description": """
+    Integrates with a Whisper API service to transcribe, translate, or detect the language of audio and video files.
+
+    ### Features
+    - Submits audio or video files to a Whisper API deployment for processing.
+    - Supports operations such as transcription, translation, and language detection.
+    - Optionally generates SRT subtitle files for video content.
+    - Integrates with S3-compatible storage systems to make files publicly accessible for processing.
+    - Handles job submission, status checking, artifact retrieval, and cleanup.
+
+    ### Notes
+    - Requires a Whisper API endpoint and API key for authentication.
+    - Only compatible with S3-compatible storage systems for media file accessibility.
+    - Handles multiple jobs and retries for failed or incomplete processing.
+    """
+}
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@@ -2,9 +2,9 @@ import traceback
 import requests, time
 from loguru import logger

-from . import Enricher
-from ..core import Metadata, Media, ArchivingContext
-from ..storages import S3Storage
+from auto_archiver.enrichers import Enricher
+from auto_archiver.core import Metadata, Media, ArchivingContext
+from auto_archiver.storages import S3Storage


 class WhisperEnricher(Enricher):
@@ -22,17 +22,6 @@ class WhisperEnricher(Enricher):
        assert type(self.api_key) == str and len(self.api_key) > 0, "please provide a value for the whisper_enricher api_key"
        self.timeout = int(self.timeout)

-    @staticmethod
-    def configs() -> dict:
-        return {
-            "api_endpoint": {"default": None, "help": "WhisperApi api endpoint, eg: https://whisperbox-api.com/api/v1, a deployment of https://github.com/bellingcat/whisperbox-transcribe."},
-            "api_key": {"default": None, "help": "WhisperApi api key for authentication"},
-            "include_srt": {"default": False, "help": "Whether to include a subtitle SRT (SubRip Subtitle file) for the video (can be used in video players)."},
-            "timeout": {"default": 90, "help": "How many seconds to wait at most for a successful job completion."},
-            "action": {"default": "translate", "help": "which Whisper operation to execute", "choices": ["transcribe", "translate", "language_detection"]},
-
-        }
-
    def enrich(self, to_enrich: Metadata) -> None:
        if not self._get_s3_storage():
            logger.error("WhisperEnricher: To use the WhisperEnricher you need to use S3Storage so files are accessible publicly to the whisper service being called.")