From e1a937333666862217ddda1e9baea869535d3377 Mon Sep 17 00:00:00 2001
From: erinhmclark <erinhannahmary.clark@gmail.com>
Date: Mon, 27 Jan 2025 19:03:02 +0000
Subject: [PATCH] Refactoring for new config setup

---
 poetry.lock                                   | 55 ++++++-------
 src/auto_archiver/base_processors/__init__.py |  6 --
 src/auto_archiver/core/__init__.py            |  9 ++-
 .../{base_processors => core}/database.py     |  0
 .../{base_processors => core}/enricher.py     |  0
 .../{base_processors => core}/extractor.py    |  8 +-
 .../{base_processors => core}/feeder.py       |  0
 .../{base_processors => core}/formatter.py    |  0
 src/auto_archiver/core/module.py              | 69 ++++++++--------
 src/auto_archiver/core/orchestrator.py        | 27 ++++---
 .../{base_processors => core}/storage.py      |  0
 src/auto_archiver/modules/api_db/api_db.py    |  3 +-
 src/auto_archiver/modules/atlos/atlos.py      |  6 +-
 .../modules/atlos_db/atlos_db.py              |  8 +-
 .../modules/atlos_feeder/atlos_feeder.py      |  3 +-
 .../modules/cli_feeder/cli_feeder.py          |  2 +-
 .../modules/console_db/console_db.py          |  2 +-
 src/auto_archiver/modules/csv_db/csv_db.py    |  2 +-
 .../modules/csv_feeder/csv_feeder.py          |  4 +-
 .../modules/gdrive_storage/gdrive_storage.py  |  3 +-
 .../modules/generic_extractor/__manifest__.py | 80 ++++++++++++++-----
 .../modules/generic_extractor/bluesky.py      |  2 +-
 .../modules/generic_extractor/dropin.py       |  2 +-
 .../generic_extractor/generic_extractor.py    |  7 +-
 .../modules/generic_extractor/truth.py        |  2 +-
 .../modules/generic_extractor/twitter.py      |  2 +-
 .../modules/gsheet_db/gsheet_db.py            |  7 +-
 .../modules/gsheet_feeder/gsheet_feeder.py    |  3 +-
 .../modules/hash_enricher/__manifest__.py     |  4 +-
 .../modules/hash_enricher/hash_enricher.py    | 13 ++-
 .../modules/html_formatter/html_formatter.py  | 28 ++++---
 .../instagram_api_extractor.py                |  4 +-
 .../instagram_extractor.py                    |  4 +-
 .../instagram_tbot_extractor.py               |  9 +--
 .../modules/local_storage/local_storage.py    |  7 +-
 .../modules/meta_enricher/meta_enricher.py    |  2 +-
 .../metadata_enricher/metadata_enricher.py    |  2 +-
 .../modules/mute_formatter/mute_formatter.py  |  3 +-
 .../pdq_hash_enricher/pdq_hash_enricher.py    |  2 +-
 src/auto_archiver/modules/s3_storage/s3.py    |  5 +-
 .../screenshot_enricher.py                    |  6 +-
 .../modules/ssl_enricher/ssl_enricher.py      |  2 +-
 .../telegram_extractor/telegram_extractor.py  |  2 +-
 .../telethon_extractor/telethon_extractor.py  |  8 +-
 .../thumbnail_enricher/thumbnail_enricher.py  |  4 +-
 .../timestamping_enricher.py                  |  8 +-
 .../twitter_api_extractor.py                  |  3 +-
 .../modules/vk_extractor/vk_extractor.py      |  3 +-
 .../modules/wacz_enricher/wacz_enricher.py    |  7 +-
 .../wayback_enricher/wayback_enricher.py      |  9 +--
 .../whisper_enricher/whisper_enricher.py      | 10 +--
 src/auto_archiver/utils/gsheet.py             |  4 +-
 52 files changed, 219 insertions(+), 242 deletions(-)
 delete mode 100644 src/auto_archiver/base_processors/__init__.py
 rename src/auto_archiver/{base_processors => core}/database.py (100%)
 rename src/auto_archiver/{base_processors => core}/enricher.py (100%)
 rename src/auto_archiver/{base_processors => core}/extractor.py (94%)
 rename src/auto_archiver/{base_processors => core}/feeder.py (100%)
 rename src/auto_archiver/{base_processors => core}/formatter.py (100%)
 rename src/auto_archiver/{base_processors => core}/storage.py (100%)

diff --git a/poetry.lock b/poetry.lock
index 128ede2..6d6ad8c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -64,14 +64,14 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
 
 [[package]]
 name = "attrs"
-version = "24.3.0"
+version = "25.1.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"},
-    {file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"},
+    {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
+    {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
 ]
 
 [package.extras]
@@ -152,18 +152,18 @@ lxml = ["lxml"]
 
 [[package]]
 name = "boto3"
-version = "1.36.3"
+version = "1.36.6"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "boto3-1.36.3-py3-none-any.whl", hash = "sha256:f9843a5d06f501d66ada06f5a5417f671823af2cf319e36ceefa1bafaaaaa953"},
-    {file = "boto3-1.36.3.tar.gz", hash = "sha256:53a5307f6a3526ee2f8590e3c45efa504a3ea4532c1bfe4926c0c19bf188d141"},
+    {file = "boto3-1.36.6-py3-none-any.whl", hash = "sha256:6d473f0f340d02b4e9ad5b8e68786a09728101a8b950231b89ebdaf72b6dca21"},
+    {file = "boto3-1.36.6.tar.gz", hash = "sha256:b36feae061dc0793cf311468956a0a9e99215ce38bc99a1a4e55a5b105f16297"},
 ]
 
 [package.dependencies]
-botocore = ">=1.36.3,<1.37.0"
+botocore = ">=1.36.6,<1.37.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.11.0,<0.12.0"
 
@@ -172,14 +172,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.36.3"
+version = "1.36.6"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "botocore-1.36.3-py3-none-any.whl", hash = "sha256:536ab828e6f90dbb000e3702ac45fd76642113ae2db1b7b1373ad24104e89255"},
-    {file = "botocore-1.36.3.tar.gz", hash = "sha256:775b835e979da5c96548ed1a0b798101a145aec3cd46541d62e27dda5a94d7f8"},
+    {file = "botocore-1.36.6-py3-none-any.whl", hash = "sha256:f77bbbb03fb420e260174650fb5c0cc142ec20a96967734eed2b0ef24334ef34"},
+    {file = "botocore-1.36.6.tar.gz", hash = "sha256:4864c53d638da191a34daf3ede3ff1371a3719d952cc0c6bd24ce2836a38dd77"},
 ]
 
 [package.dependencies]
@@ -798,14 +798,14 @@ uritemplate = ">=3.0.1,<5"
 
 [[package]]
 name = "google-auth"
-version = "2.37.0"
+version = "2.38.0"
 description = "Google Authentication Library"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "google_auth-2.37.0-py2.py3-none-any.whl", hash = "sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0"},
-    {file = "google_auth-2.37.0.tar.gz", hash = "sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00"},
+    {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"},
+    {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"},
 ]
 
 [package.dependencies]
@@ -958,13 +958,14 @@ files = [
 
 [[package]]
 name = "instaloader"
-version = "4.14"
+version = "4.14.1"
 description = "Download pictures (or videos) along with their captions and other metadata from Instagram."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "instaloader-4.14.tar.gz", hash = "sha256:754425eb17af44ce4bb6056e4eacd044a518d13b5efc11b9d80eb229bb96c652"},
+    {file = "instaloader-4.14.1-py3-none-any.whl", hash = "sha256:43356f696231621ea5a93354f9a4578124fe131940ee9aa1e83c20f57e18f26d"},
+    {file = "instaloader-4.14.1.tar.gz", hash = "sha256:a41a7372a18fb096b3ed545469479884de9cf768e12020c0e0e67c488d9d599c"},
 ]
 
 [package.dependencies]
@@ -1135,14 +1136,14 @@ files = [
 
 [[package]]
 name = "marshmallow"
-version = "3.25.1"
+version = "3.26.0"
 description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "marshmallow-3.25.1-py3-none-any.whl", hash = "sha256:ec5d00d873ce473b7f2ffcb7104286a376c354cab0c2fa12f5573dab03e87210"},
-    {file = "marshmallow-3.25.1.tar.gz", hash = "sha256:f4debda3bb11153d81ac34b0d582bf23053055ee11e791b54b4b35493468040a"},
+    {file = "marshmallow-3.26.0-py3-none-any.whl", hash = "sha256:1287bca04e6a5f4094822ac153c03da5e214a0a60bcd557b140f3e66991b8ca1"},
+    {file = "marshmallow-3.26.0.tar.gz", hash = "sha256:eb36762a1cc76d7abf831e18a3a1b26d3d481bbc74581b8e532a3d3a8115e1cb"},
 ]
 
 [package.dependencies]
@@ -2087,14 +2088,14 @@ pyasn1 = ">=0.1.3"
 
 [[package]]
 name = "s3transfer"
-version = "0.11.1"
+version = "0.11.2"
 description = "An Amazon S3 Transfer Manager"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "s3transfer-0.11.1-py3-none-any.whl", hash = "sha256:8fa0aa48177be1f3425176dfe1ab85dcd3d962df603c3dbfc585e6bf857ef0ff"},
-    {file = "s3transfer-0.11.1.tar.gz", hash = "sha256:3f25c900a367c8b7f7d8f9c34edc87e300bde424f779dc9f0a8ae4f9df9264f6"},
+    {file = "s3transfer-0.11.2-py3-none-any.whl", hash = "sha256:be6ecb39fadd986ef1701097771f87e4d2f821f27f6071c872143884d2950fbc"},
+    {file = "s3transfer-0.11.2.tar.gz", hash = "sha256:3b39185cb72f5acc77db1a58b6e25b977f28d20496b6e58d6813d75f464d632f"},
 ]
 
 [package.dependencies]
@@ -2105,14 +2106,14 @@ crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"]
 
 [[package]]
 name = "selenium"
-version = "4.28.0"
+version = "4.28.1"
 description = "Official Python bindings for Selenium WebDriver"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "selenium-4.28.0-py3-none-any.whl", hash = "sha256:3d6a2e8e1b850a1078884ea19f4e011ecdc12263434d87a0b78769836fb82dd8"},
-    {file = "selenium-4.28.0.tar.gz", hash = "sha256:a9fae6eef48d470a1b0c6e45185d96f0dafb025e8da4b346cc41e4da3ac54fa0"},
+    {file = "selenium-4.28.1-py3-none-any.whl", hash = "sha256:4238847e45e24e4472cfcf3554427512c7aab9443396435b1623ef406fff1cc1"},
+    {file = "selenium-4.28.1.tar.gz", hash = "sha256:0072d08670d7ec32db901bd0107695a330cecac9f196e3afb3fa8163026e022a"},
 ]
 
 [package.dependencies]
@@ -2421,14 +2422,14 @@ test = ["pytest"]
 
 [[package]]
 name = "starlette"
-version = "0.45.2"
+version = "0.45.3"
 description = "The little ASGI library that shines."
 optional = false
 python-versions = ">=3.9"
 groups = ["docs"]
 files = [
-    {file = "starlette-0.45.2-py3-none-any.whl", hash = "sha256:4daec3356fb0cb1e723a5235e5beaf375d2259af27532958e2d79df549dad9da"},
-    {file = "starlette-0.45.2.tar.gz", hash = "sha256:bba1831d15ae5212b22feab2f218bab6ed3cd0fc2dc1d4442443bb1ee52260e0"},
+    {file = "starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d"},
+    {file = "starlette-0.45.3.tar.gz", hash = "sha256:2cbcba2a75806f8a41c722141486f37c28e30a0921c5f6fe4346cb0dcee1302f"},
 ]
 
 [package.dependencies]
diff --git a/src/auto_archiver/base_processors/__init__.py b/src/auto_archiver/base_processors/__init__.py
deleted file mode 100644
index 4995457..0000000
--- a/src/auto_archiver/base_processors/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from .database import Database
-from .enricher import Enricher
-from .feeder import Feeder
-from .storage import Storage
-from .extractor import Extractor
-from .formatter import Formatter
\ No newline at end of file
diff --git a/src/auto_archiver/core/__init__.py b/src/auto_archiver/core/__init__.py
index 10213b2..858bdfd 100644
--- a/src/auto_archiver/core/__init__.py
+++ b/src/auto_archiver/core/__init__.py
@@ -8,4 +8,11 @@ from .context import ArchivingContext
 
 # cannot import ArchivingOrchestrator/Config to avoid circular dep
 # from .orchestrator import ArchivingOrchestrator
-# from .config import Config
\ No newline at end of file
+# from .config import Config
+
+from .database import Database
+from .enricher import Enricher
+from .feeder import Feeder
+from .storage import Storage
+from .extractor import Extractor
+from .formatter import Formatter
\ No newline at end of file
diff --git a/src/auto_archiver/base_processors/database.py b/src/auto_archiver/core/database.py
similarity index 100%
rename from src/auto_archiver/base_processors/database.py
rename to src/auto_archiver/core/database.py
diff --git a/src/auto_archiver/base_processors/enricher.py b/src/auto_archiver/core/enricher.py
similarity index 100%
rename from src/auto_archiver/base_processors/enricher.py
rename to src/auto_archiver/core/enricher.py
diff --git a/src/auto_archiver/base_processors/extractor.py b/src/auto_archiver/core/extractor.py
similarity index 94%
rename from src/auto_archiver/base_processors/extractor.py
rename to src/auto_archiver/core/extractor.py
index 321b053..8d509ec 100644
--- a/src/auto_archiver/base_processors/extractor.py
+++ b/src/auto_archiver/core/extractor.py
@@ -15,20 +15,16 @@ import mimetypes, requests
 from loguru import logger
 from retrying import retry
 
-from ..core import Metadata, ArchivingContext
+from ..core import Metadata, ArchivingContext, BaseModule
 
 
 @dataclass
-class Extractor:
+class Extractor(BaseModule):
     """
     Base class for implementing extractors in the media archiving framework.
     Subclasses must implement the `download` method to define platform-specific behavior.
     """
 
-    def setup(self, *args, **kwargs) -> None:
-        # used when extractors need to login or do other one-time setup
-        pass
-
     def cleanup(self) -> None:
         # called when extractors are done, or upon errors, cleanup any resources
         pass
diff --git a/src/auto_archiver/base_processors/feeder.py b/src/auto_archiver/core/feeder.py
similarity index 100%
rename from src/auto_archiver/base_processors/feeder.py
rename to src/auto_archiver/core/feeder.py
diff --git a/src/auto_archiver/base_processors/formatter.py b/src/auto_archiver/core/formatter.py
similarity index 100%
rename from src/auto_archiver/base_processors/formatter.py
rename to src/auto_archiver/core/formatter.py
diff --git a/src/auto_archiver/core/module.py b/src/auto_archiver/core/module.py
index 29f9769..3ef43e5 100644
--- a/src/auto_archiver/core/module.py
+++ b/src/auto_archiver/core/module.py
@@ -153,46 +153,47 @@ class LazyBaseModule:
         return manifest
 
     def load(self):
-            if self._instance:
-                return self._instance
 
-            # check external dependencies are installed
-            def check_deps(deps, check):
-                for dep in deps:
-                    if not check(dep):
-                        logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
-                        exit(1)
-            
-            check_deps(self.dependencies.get('python', []), lambda dep: find_spec(dep))
-            check_deps(self.dependencies.get('bin', []), lambda dep: shutil.which(dep))
-            
+        if self._instance:
+            return self._instance
 
-            logger.debug(f"Loading module '{self.display_name}'...")
+        # check external dependencies are installed
+        def check_deps(deps, check):
+            for dep in deps:
+                if not check(dep):
+                    logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
+                    exit(1)
 
-            for qualname in [self.name, f'auto_archiver.modules.{self.name}']:
-                try:
-                    # first import the whole module, to make sure it's working properly
-                    __import__(qualname)
-                    break
-                except ImportError:
-                    pass
+        check_deps(self.dependencies.get('python', []), lambda dep: find_spec(dep))
+        check_deps(self.dependencies.get('bin', []), lambda dep: shutil.which(dep))
 
-            # then import the file for the entry point
-            file_name, class_name = self.entry_point.split('::')
-            sub_qualname = f'{qualname}.{file_name}'
 
-            __import__(f'{qualname}.{file_name}', fromlist=[self.entry_point])
-            
-            # finally, get the class instance
-            instance = getattr(sys.modules[sub_qualname], class_name)()
-            if not getattr(instance, 'name', None):
-                instance.name = self.name
-            
-            if not getattr(instance, 'display_name', None):
-                instance.display_name = self.display_name
+        logger.debug(f"Loading module '{self.display_name}'...")
 
-            self._instance = instance
-            return instance
+        for qualname in [self.name, f'auto_archiver.modules.{self.name}']:
+            try:
+                # first import the whole module, to make sure it's working properly
+                __import__(qualname)
+                break
+            except ImportError:
+                pass
+
+        # then import the file for the entry point
+        file_name, class_name = self.entry_point.split('::')
+        sub_qualname = f'{qualname}.{file_name}'
+
+        __import__(f'{qualname}.{file_name}', fromlist=[self.entry_point])
+
+        # finally, get the class instance
+        instance = getattr(sys.modules[sub_qualname], class_name)()
+        if not getattr(instance, 'name', None):
+            instance.name = self.name
+
+        if not getattr(instance, 'display_name', None):
+            instance.display_name = self.display_name
+
+        self._instance = instance
+        return instance
 
     def __repr__(self):
         return f"Module<'{self.display_name}' ({self.name})>"
\ No newline at end of file
diff --git a/src/auto_archiver/core/orchestrator.py b/src/auto_archiver/core/orchestrator.py
index 967f652..4f155db 100644
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -227,6 +227,10 @@ class ArchivingOrchestrator:
                     continue
                 if loaded_module:
                     step_items.append(loaded_module)
+                    # TODO temp solution
+                    if module_type == "storage":
+                        ArchivingContext.set("storages", step_items, keep_on_reset=True)
+
             check_steps_ok()
             self.config['steps'][f"{module_type}s"] = step_items
             
@@ -256,10 +260,7 @@ class ArchivingOrchestrator:
             exit()
 
         yaml_config = read_yaml(basic_config.config_file)
-
-
         self.setup_complete_parser(basic_config, yaml_config, unused_args)
-        
         self.install_modules()
 
         # log out the modules that were loaded
@@ -301,7 +302,7 @@ class ArchivingOrchestrator:
             logger.error(f'Got unexpected error on item {item}: {e}\n{traceback.format_exc()}')
             for d in self.config['steps']['databases']:
                 if type(e) == AssertionError: d.failed(item, str(e))
-                else: d.failed(item)
+                else: d.failed(item, reason="unexpected error")
 
 
     def archive(self, result: Metadata) -> Union[Metadata, None]:
@@ -319,27 +320,27 @@ class ArchivingOrchestrator:
 
         # 1 - sanitize - each archiver is responsible for cleaning/expanding its own URLs
         url = original_url
-        for a in self.archivers: url = a.sanitize_url(url)
+        for a in self.config["steps"]["extractors"]: url = a.sanitize_url(url)
         result.set_url(url)
         if original_url != url: result.set("original_url", original_url)
 
         # 2 - notify start to DBs, propagate already archived if feature enabled in DBs
         cached_result = None
-        for d in self.databases:
+        for d in self.config["steps"]["databases"]:
             d.started(result)
             if (local_result := d.fetch(result)):
                 cached_result = (cached_result or Metadata()).merge(local_result)
         if cached_result:
             logger.debug("Found previously archived entry")
-            for d in self.databases:
+            for d in self.config["steps"]["databases"]:
                 try: d.done(cached_result, cached=True)
                 except Exception as e:
                     logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}")
             return cached_result
 
-        # 3 - call archivers until one succeeds
-        for a in self.archivers:
-            logger.info(f"Trying archiver {a.name} for {url}")
+        # 3 - call extractors until one succeeds
+        for a in self.config["steps"]["extractors"]:
+            logger.info(f"Trying extractor {a.name} for {url}")
             try:
                 result.merge(a.download(result))
                 if result.is_success(): break
@@ -347,7 +348,7 @@ class ArchivingOrchestrator:
                 logger.error(f"ERROR archiver {a.name}: {e}: {traceback.format_exc()}")
 
         # 4 - call enrichers to work with archived content
-        for e in self.enrichers:
+        for e in self.config["steps"]["enrichers"]:
             try: e.enrich(result)
             except Exception as exc: 
                 logger.error(f"ERROR enricher {e.name}: {exc}: {traceback.format_exc()}")
@@ -356,7 +357,7 @@ class ArchivingOrchestrator:
         result.store()
 
         # 6 - format and store formatted if needed
-        if (final_media := self.formatter.format(result)):
+        if final_media := self.config["steps"]["formatters"][0].format(result):
             final_media.store(url=url, metadata=result)
             result.set_final_media(final_media)
 
@@ -364,7 +365,7 @@ class ArchivingOrchestrator:
             result.status = "nothing archived"
 
         # signal completion to databases and archivers
-        for d in self.databases:
+        for d in self.config["steps"]["databases"]:
             try: d.done(result)
             except Exception as e:
                 logger.error(f"ERROR database {d.name}: {e}: {traceback.format_exc()}")
diff --git a/src/auto_archiver/base_processors/storage.py b/src/auto_archiver/core/storage.py
similarity index 100%
rename from src/auto_archiver/base_processors/storage.py
rename to src/auto_archiver/core/storage.py
diff --git a/src/auto_archiver/modules/api_db/api_db.py b/src/auto_archiver/modules/api_db/api_db.py
index d2b43b7..a893aee 100644
--- a/src/auto_archiver/modules/api_db/api_db.py
+++ b/src/auto_archiver/modules/api_db/api_db.py
@@ -2,7 +2,7 @@ from typing import Union
 import requests, os
 from loguru import logger
 
-from auto_archiver.base_processors import Database
+from auto_archiver.core import Database
 from auto_archiver.core import Metadata
 
 
@@ -10,7 +10,6 @@ class AAApiDb(Database):
     """
         Connects to auto-archiver-api instance
     """
-    name = "auto_archiver_api_db"
 
     def __init__(self, config: dict) -> None:
         # without this STEP.__init__ is not called
diff --git a/src/auto_archiver/modules/atlos/atlos.py b/src/auto_archiver/modules/atlos/atlos.py
index 6a175d3..abc8a1a 100644
--- a/src/auto_archiver/modules/atlos/atlos.py
+++ b/src/auto_archiver/modules/atlos/atlos.py
@@ -5,15 +5,11 @@ import requests
 import hashlib
 
 from auto_archiver.core import Media, Metadata
-from auto_archiver.base_processors import Storage
+from auto_archiver.core import Storage
 from auto_archiver.utils import get_atlos_config_options
 
 
 class AtlosStorage(Storage):
-    name = "atlos_storage"
-
-    def __init__(self, config: dict) -> None:
-        super().__init__(config)
 
     def get_cdn_url(self, _media: Media) -> str:
         # It's not always possible to provide an exact URL, because it's
diff --git a/src/auto_archiver/modules/atlos_db/atlos_db.py b/src/auto_archiver/modules/atlos_db/atlos_db.py
index 2e24491..c45e215 100644
--- a/src/auto_archiver/modules/atlos_db/atlos_db.py
+++ b/src/auto_archiver/modules/atlos_db/atlos_db.py
@@ -6,7 +6,7 @@ from csv import DictWriter
 from dataclasses import asdict
 import requests
 
-from auto_archiver.base_processors import Database
+from auto_archiver.core import Database
 from auto_archiver.core import Metadata
 from auto_archiver.utils import get_atlos_config_options
 
@@ -16,12 +16,6 @@ class AtlosDb(Database):
     Outputs results to Atlos
     """
 
-    name = "atlos_db"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
-
     def failed(self, item: Metadata, reason: str) -> None:
         """Update DB accordingly for failure"""
         # If the item has no Atlos ID, there's nothing for us to do
diff --git a/src/auto_archiver/modules/atlos_feeder/atlos_feeder.py b/src/auto_archiver/modules/atlos_feeder/atlos_feeder.py
index 262f21b..9811a82 100644
--- a/src/auto_archiver/modules/atlos_feeder/atlos_feeder.py
+++ b/src/auto_archiver/modules/atlos_feeder/atlos_feeder.py
@@ -1,13 +1,12 @@
 from loguru import logger
 import requests
 
-from auto_archiver.base_processors import Feeder
+from auto_archiver.core import Feeder
 from auto_archiver.core import Metadata, ArchivingContext
 from auto_archiver.utils import get_atlos_config_options
 
 
 class AtlosFeeder(Feeder):
-    name = "atlos_feeder"
 
     def __init__(self, config: dict) -> None:
         # without this STEP.__init__ is not called
diff --git a/src/auto_archiver/modules/cli_feeder/cli_feeder.py b/src/auto_archiver/modules/cli_feeder/cli_feeder.py
index 09c46d4..62cb659 100644
--- a/src/auto_archiver/modules/cli_feeder/cli_feeder.py
+++ b/src/auto_archiver/modules/cli_feeder/cli_feeder.py
@@ -1,6 +1,6 @@
 from loguru import logger
 
-from auto_archiver.base_processors import Feeder
+from auto_archiver.core import Feeder
 from auto_archiver.core import Metadata, ArchivingContext
 
 
diff --git a/src/auto_archiver/modules/console_db/console_db.py b/src/auto_archiver/modules/console_db/console_db.py
index c581552..48609b0 100644
--- a/src/auto_archiver/modules/console_db/console_db.py
+++ b/src/auto_archiver/modules/console_db/console_db.py
@@ -1,6 +1,6 @@
 from loguru import logger
 
-from auto_archiver.base_processors import Database
+from auto_archiver.core import Database
 from auto_archiver.core import Metadata
 
 
diff --git a/src/auto_archiver/modules/csv_db/csv_db.py b/src/auto_archiver/modules/csv_db/csv_db.py
index 189b137..b5985e2 100644
--- a/src/auto_archiver/modules/csv_db/csv_db.py
+++ b/src/auto_archiver/modules/csv_db/csv_db.py
@@ -3,7 +3,7 @@ from loguru import logger
 from csv import DictWriter
 from dataclasses import asdict
 
-from auto_archiver.base_processors import Database
+from auto_archiver.core import Database
 from auto_archiver.core import Metadata
 
 
diff --git a/src/auto_archiver/modules/csv_feeder/csv_feeder.py b/src/auto_archiver/modules/csv_feeder/csv_feeder.py
index 7bff16e..ad0a035 100644
--- a/src/auto_archiver/modules/csv_feeder/csv_feeder.py
+++ b/src/auto_archiver/modules/csv_feeder/csv_feeder.py
@@ -1,14 +1,12 @@
 from loguru import logger
 import csv
 
-from auto_archiver.base_processors import Feeder
+from auto_archiver.core import Feeder
 from auto_archiver.core import Metadata, ArchivingContext
 from auto_archiver.utils import url_or_none
 
 class CSVFeeder(Feeder):
 
-    name = "csv_feeder"
-
     def __iter__(self) -> Metadata:
         url_column = self.column or 0
         for file in self.files:
diff --git a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
index 4bcdb90..c2d326d 100644
--- a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
+++ b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py
@@ -10,11 +10,10 @@ from google.oauth2.credentials import Credentials
 from google.auth.transport.requests import Request
 
 from auto_archiver.core import Media
-from auto_archiver.base_processors import Storage
+from auto_archiver.core import Storage
 
 
 class GDriveStorage(Storage):
-    name = "gdrive_storage"
 
     def __init__(self, config: dict) -> None:
         super().__init__(config)
diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py
index 73c264d..d5f363f 100644
--- a/src/auto_archiver/modules/generic_extractor/__manifest__.py
+++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py
@@ -1,13 +1,13 @@
 {
-    'name': 'Generic Extractor',
-    'version': '0.1.0',
-    'author': 'Bellingcat',
-    'type': ['extractor'],
-    'requires_setup': False,
-    'dependencies': {
-        'python': ['yt_dlp', 'requests', 'loguru', 'slugify'],
+    "name": "Generic Extractor",
+    "version": "0.1.0",
+    "author": "Bellingcat",
+    "type": ["extractor"],
+    "requires_setup": False,
+    "dependencies": {
+        "python": ["yt_dlp", "requests", "loguru", "slugify"],
     },
-    'description': """
+    "description": """
 This is the generic extractor used by auto-archiver, which uses `yt-dlp` under the hood.
 
 This module is responsible for downloading and processing media content from platforms
@@ -28,17 +28,53 @@ the broader archiving framework.
 custom dropins can be created to handle additional websites and passed to the archiver
 via the command line using the `--dropins` option (TODO!).
 """,
-    'configs': {
-            "facebook_cookie": {"default": None, "help": "optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx'"},
-            "subtitles": {"default": True, "help": "download subtitles if available"},
-            "comments": {"default": False, "help": "download all comments if available, may lead to large metadata"},
-            "livestreams": {"default": False, "help": "if set, will download live streams, otherwise will skip them; see --max-filesize for more control"},
-            "live_from_start": {"default": False, "help": "if set, will download live streams from their earliest available moment, otherwise starts now."},
-            "proxy": {"default": "", "help": "http/socks (https seems to not work atm) proxy to use for the webdriver, eg https://proxy-user:password@proxy-ip:port"},
-            "end_means_success": {"default": True, "help": "if True, any archived content will mean a 'success', if False this archiver will not return a 'success' stage; this is useful for cases when the yt-dlp will archive a video but ignore other types of content like images or text only pages that the subsequent archivers can retrieve."},
-            'allow_playlist': {"default": False, "help": "If True will also download playlists, set to False if the expectation is to download a single video."},
-            "max_downloads": {"default": "inf", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit."},
-            "cookies_from_browser": {"default": None, 'type': 'str', "help": "optional browser for ytdl to extract cookies from, can be one of: brave, chrome, chromium, edge, firefox, opera, safari, vivaldi, whale"},
-            "cookie_file": {"default": None, "help": "optional cookie file to use for Youtube, see instructions here on how to export from your browser: https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp"},
-        }
-}
\ No newline at end of file
+    "configs": {
+        "facebook_cookie": {
+            "default": None,
+            "help": "optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx'",
+        },
+        "subtitles": {"default": True, "help": "download subtitles if available", "type": "bool"},
+        "comments": {
+            "default": False,
+            "help": "download all comments if available, may lead to large metadata",
+            "type": "bool",
+        },
+        "livestreams": {
+            "default": False,
+            "help": "if set, will download live streams, otherwise will skip them; see --max-filesize for more control",
+            "type": "bool",
+        },
+        "live_from_start": {
+            "default": False,
+            "help": "if set, will download live streams from their earliest available moment, otherwise starts now.",
+            "type": "bool",
+        },
+        "proxy": {
+            "default": "",
+            "help": "http/socks (https seems to not work atm) proxy to use for the webdriver, eg https://proxy-user:password@proxy-ip:port",
+        },
+        "end_means_success": {
+            "default": True,
+            "help": "if True, any archived content will mean a 'success', if False this archiver will not return a 'success' stage; this is useful for cases when the yt-dlp will archive a video but ignore other types of content like images or text only pages that the subsequent archivers can retrieve.",
+            "type": "bool",
+        },
+        "allow_playlist": {
+            "default": False,
+            "help": "If True will also download playlists, set to False if the expectation is to download a single video.",
+            "type": "bool",
+        },
+        "max_downloads": {
+            "default": "inf",
+            "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.",
+        },
+        "cookies_from_browser": {
+            "default": None,
+            "type": "str",
+            "help": "optional browser for ytdl to extract cookies from, can be one of: brave, chrome, chromium, edge, firefox, opera, safari, vivaldi, whale",
+        },
+        "cookie_file": {
+            "default": None,
+            "help": "optional cookie file to use for Youtube, see instructions here on how to export from your browser: https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp",
+        },
+    },
+}
diff --git a/src/auto_archiver/modules/generic_extractor/bluesky.py b/src/auto_archiver/modules/generic_extractor/bluesky.py
index c75c373..1f92fd8 100644
--- a/src/auto_archiver/modules/generic_extractor/bluesky.py
+++ b/src/auto_archiver/modules/generic_extractor/bluesky.py
@@ -1,6 +1,6 @@
 from loguru import logger
 
-from auto_archiver.base_processors.extractor import Extractor
+from auto_archiver.core.extractor import Extractor
 from auto_archiver.core.metadata import Metadata, Media
 from .dropin import GenericDropin, InfoExtractor
 
diff --git a/src/auto_archiver/modules/generic_extractor/dropin.py b/src/auto_archiver/modules/generic_extractor/dropin.py
index 99cd71b..c5749ff 100644
--- a/src/auto_archiver/modules/generic_extractor/dropin.py
+++ b/src/auto_archiver/modules/generic_extractor/dropin.py
@@ -1,6 +1,6 @@
 from yt_dlp.extractor.common import InfoExtractor
 from auto_archiver.core.metadata import Metadata
-from auto_archiver.base_processors.extractor import Extractor
+from auto_archiver.core.extractor import Extractor
 
 class GenericDropin:
     """Base class for dropins for the generic extractor.
diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
index 8ceaabc..57924d9 100644
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -5,11 +5,10 @@ from yt_dlp.extractor.common import InfoExtractor
 
 from loguru import logger
 
-from auto_archiver.base_processors.extractor import Extractor
+from auto_archiver.core.extractor import Extractor
 from ...core import Metadata, Media, ArchivingContext
 
 class GenericExtractor(Extractor):
-    name = "youtubedl_archiver" #left as is for backwards compat
     _dropins = {}
 
     def suitable_extractors(self, url: str) -> list[str]:
@@ -268,7 +267,7 @@ class GenericExtractor(Extractor):
         if item.netloc in ['facebook.com', 'www.facebook.com'] and self.facebook_cookie:
             logger.debug('Using Facebook cookie')
             yt_dlp.utils.std_headers['cookie'] = self.facebook_cookie
-        
+
         ydl_options = {'outtmpl': os.path.join(ArchivingContext.get_tmp_dir(), f'%(id)s.%(ext)s'), 'quiet': False, 'noplaylist': not self.allow_playlist , 'writesubtitles': self.subtitles, 'writeautomaticsub': self.subtitles, "live_from_start": self.live_from_start, "proxy": self.proxy, "max_downloads": self.max_downloads, "playlistend": self.max_downloads}
 
         if item.netloc in ['youtube.com', 'www.youtube.com']:
@@ -285,6 +284,6 @@ class GenericExtractor(Extractor):
             result = self.download_for_extractor(info_extractor, url, ydl)
             if result:
                 return result
-       
+
 
         return False
diff --git a/src/auto_archiver/modules/generic_extractor/truth.py b/src/auto_archiver/modules/generic_extractor/truth.py
index f52a748..e65b4b1 100644
--- a/src/auto_archiver/modules/generic_extractor/truth.py
+++ b/src/auto_archiver/modules/generic_extractor/truth.py
@@ -2,7 +2,7 @@ from typing import Type
 
 from auto_archiver.utils import traverse_obj
 from auto_archiver.core.metadata import Metadata, Media
-from auto_archiver.base_processors.extractor import Extractor
+from auto_archiver.core.extractor import Extractor
 from yt_dlp.extractor.common import InfoExtractor
 
 from dateutil.parser import parse as parse_dt
diff --git a/src/auto_archiver/modules/generic_extractor/twitter.py b/src/auto_archiver/modules/generic_extractor/twitter.py
index 11399d4..83c1f4f 100644
--- a/src/auto_archiver/modules/generic_extractor/twitter.py
+++ b/src/auto_archiver/modules/generic_extractor/twitter.py
@@ -6,7 +6,7 @@ from slugify import slugify
 
 from auto_archiver.core.metadata import Metadata, Media
 from auto_archiver.utils import UrlUtil
-from auto_archiver.base_processors.extractor import Extractor
+from auto_archiver.core.extractor import Extractor
 
 from .dropin import GenericDropin, InfoExtractor
 
diff --git a/src/auto_archiver/modules/gsheet_db/gsheet_db.py b/src/auto_archiver/modules/gsheet_db/gsheet_db.py
index 239bc06..e7e8e5c 100644
--- a/src/auto_archiver/modules/gsheet_db/gsheet_db.py
+++ b/src/auto_archiver/modules/gsheet_db/gsheet_db.py
@@ -5,7 +5,7 @@ from urllib.parse import quote
 
 from loguru import logger
 
-from auto_archiver.base_processors import Database
+from auto_archiver.core import Database
 from auto_archiver.core import Metadata, Media, ArchivingContext
 from auto_archiver.modules.gsheet_feeder import GWorksheet
 
@@ -15,11 +15,6 @@ class GsheetsDb(Database):
         NB: only works if GsheetFeeder is used. 
         could be updated in the future to support non-GsheetFeeder metadata 
     """
-    name = "gsheet_db"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
 
     def started(self, item: Metadata) -> None:
         logger.warning(f"STARTED {item}")
diff --git a/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py b/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
index b57174f..66dd014 100644
--- a/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
+++ b/src/auto_archiver/modules/gsheet_feeder/gsheet_feeder.py
@@ -14,13 +14,12 @@ import gspread
 from loguru import logger
 from slugify import slugify
 
-from auto_archiver.base_processors import Feeder
+from auto_archiver.core import Feeder
 from auto_archiver.core import Metadata, ArchivingContext
 from . import GWorksheet
 
 
 class GsheetsFeeder(Feeder):
-    name = "gsheet_feeder"
 
     def __init__(self) -> None:
         """
diff --git a/src/auto_archiver/modules/hash_enricher/__manifest__.py b/src/auto_archiver/modules/hash_enricher/__manifest__.py
index 6e3cde3..f306808 100644
--- a/src/auto_archiver/modules/hash_enricher/__manifest__.py
+++ b/src/auto_archiver/modules/hash_enricher/__manifest__.py
@@ -8,9 +8,9 @@
     "configs": {
             "algorithm": {"default": "SHA-256", "help": "hash algorithm to use", "choices": ["SHA-256", "SHA3-512"]},
             # TODO add non-negative requirement to match previous implementation?
-            "chunksize": {"default": 1.6e7,
+            "chunksize": {"default": 16000000,
                           "help": "number of bytes to use when reading files in chunks (if this value is too large you will run out of RAM), default is 16MB",
-                          'type': 'positive_number',
+                          'type': 'int',
                           },
         },
     "description": """
diff --git a/src/auto_archiver/modules/hash_enricher/hash_enricher.py b/src/auto_archiver/modules/hash_enricher/hash_enricher.py
index 39ec75c..827b65f 100644
--- a/src/auto_archiver/modules/hash_enricher/hash_enricher.py
+++ b/src/auto_archiver/modules/hash_enricher/hash_enricher.py
@@ -10,7 +10,7 @@ making it suitable for handling large files efficiently.
 import hashlib
 from loguru import logger
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata, ArchivingContext
 
 
@@ -19,6 +19,17 @@ class HashEnricher(Enricher):
     Calculates hashes for Media instances
     """
 
+    def __init__(self, config: dict = None):
+        """
+        Initialize the HashEnricher with a configuration dictionary.
+        """
+        super().__init__()
+        # TODO set these from the manifest?
+        # Set default values
+        self.algorithm = config.get("algorithm", "SHA-256") if config else "SHA-256"
+        self.chunksize = config.get("chunksize", int(1.6e7)) if config else int(1.6e7)
+
+
     def enrich(self, to_enrich: Metadata) -> None:
         url = to_enrich.get_url()
         logger.debug(f"calculating media hashes for {url=} (using {self.algorithm})")
diff --git a/src/auto_archiver/modules/html_formatter/html_formatter.py b/src/auto_archiver/modules/html_formatter/html_formatter.py
index afa367b..e6e5e58 100644
--- a/src/auto_archiver/modules/html_formatter/html_formatter.py
+++ b/src/auto_archiver/modules/html_formatter/html_formatter.py
@@ -9,24 +9,30 @@ import base64
 
 from auto_archiver.version import __version__
 from auto_archiver.core import Metadata, Media, ArchivingContext
-from auto_archiver.base_processors import Formatter
+from auto_archiver.core import Formatter
 from auto_archiver.modules.hash_enricher import HashEnricher
 from auto_archiver.utils.misc import random_str
 
 
 @dataclass
 class HtmlFormatter(Formatter):
+    environment: Environment = None
+    template: any = None
 
-    # TODO: fix setting up template with new config method
-    # def __init__(self, config: dict) -> None:
-    #     # without this STEP.__init__ is not called
-    #     super().__init__(config)
-    #     self.environment = Environment(loader=FileSystemLoader(os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")), autoescape=True)
-    #     # JinjaHelper class static methods are added as filters
-    #     self.environment.filters.update({
-    #         k: v.__func__ for k, v in JinjaHelpers.__dict__.items() if isinstance(v, staticmethod)
-    #     })
-    #     self.template = self.environment.get_template("html_template.html")
+    def setup(self, config: dict) -> None:
+        """Sets up the Jinja2 environment and loads the template."""
+        super().setup(config)  # Ensure the base class logic is executed
+        template_dir = os.path.join(pathlib.Path(__file__).parent.resolve(), "templates/")
+        self.environment = Environment(loader=FileSystemLoader(template_dir), autoescape=True)
+
+        # JinjaHelper class static methods are added as filters
+        self.environment.filters.update({
+            k: v.__func__ for k, v in JinjaHelpers.__dict__.items() if isinstance(v, staticmethod)
+        })
+
+        # Load a specific template or default to "html_template.html"
+        template_name = self.config.get("template_name", "html_template.html")
+        self.template = self.environment.get_template(template_name)
 
     def format(self, item: Metadata) -> Media:
         url = item.get_url()
diff --git a/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py b/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
index c1271fc..3d7f9e5 100644
--- a/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
+++ b/src/auto_archiver/modules/instagram_api_extractor/instagram_api_extractor.py
@@ -16,7 +16,7 @@ from loguru import logger
 from retrying import retry
 from tqdm import tqdm
 
-from auto_archiver.base_processors import Extractor
+from auto_archiver.core import Extractor
 from auto_archiver.core import Media
 from auto_archiver.core import Metadata
 
@@ -28,8 +28,6 @@ class InstagramAPIExtractor(Extractor):
     # TODO: improvement collect aggregates of locations[0].location and mentions for all posts
     """
 
-    name = "instagram_api_extractor"
-
     global_pattern = re.compile(
         r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com)\/(stories(?:\/highlights)?|p|reel)?\/?([^\/\?]*)\/?(\d+)?"
     )
diff --git a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
index 2b9bece..1a246fb 100644
--- a/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
+++ b/src/auto_archiver/modules/instagram_extractor/instagram_extractor.py
@@ -7,7 +7,7 @@ import re, os, shutil, traceback
 import instaloader  # https://instaloader.github.io/as-module.html
 from loguru import logger
 
-from auto_archiver.base_processors import Extractor
+from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata
 from auto_archiver.core import Media
 
@@ -15,8 +15,6 @@ class InstagramExtractor(Extractor):
     """
     Uses Instaloader to download either a post (inc images, videos, text) or as much as possible from a profile (posts, stories, highlights, ...)
     """
-    name = "instagram_extractor"
-
     # NB: post regex should be tested before profile
     # https://regex101.com/r/MGPquX/1
     post_pattern = re.compile(r"(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com|instagr.am|instagr.com)\/(?:p|reel)\/(\w+)")
diff --git a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
index 36c8a06..60fa397 100644
--- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
@@ -15,7 +15,7 @@ from sqlite3 import OperationalError
 from loguru import logger
 from telethon.sync import TelegramClient
 
-from auto_archiver.base_processors import Extractor
+from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata, Media, ArchivingContext
 from auto_archiver.utils import random_str
 
@@ -26,13 +26,6 @@ class InstagramTbotExtractor(Extractor):
     https://github.com/adw0rd/instagrapi
     https://t.me/instagram_load_bot
     """
-    name = "instagram_tbot_extractor"
-
-    def __init__(self, config: dict) -> None:
-        super().__init__(config)
-        self.assert_valid_string("api_id")
-        self.assert_valid_string("api_hash")
-        self.timeout = int(self.timeout)
 
     def setup(self) -> None:
         """
diff --git a/src/auto_archiver/modules/local_storage/local_storage.py b/src/auto_archiver/modules/local_storage/local_storage.py
index 5d65414..4c44e9c 100644
--- a/src/auto_archiver/modules/local_storage/local_storage.py
+++ b/src/auto_archiver/modules/local_storage/local_storage.py
@@ -5,17 +5,12 @@ import os
 from loguru import logger
 
 from auto_archiver.core import Media
-from auto_archiver.base_processors import Storage
+from auto_archiver.core import Storage
 
 
 class LocalStorage(Storage):
     name = "local_storage"
 
-    def __init__(self) -> None:
-        super().__init__()
-        # TODO: fix up passing config values to 'steps'
-        # os.makedirs(self.save_to, exist_ok=True)
-
     def get_cdn_url(self, media: Media) -> str:
         # TODO: is this viable with Storage.configs on path/filename?
         dest = os.path.join(self.save_to, media.key)
diff --git a/src/auto_archiver/modules/meta_enricher/meta_enricher.py b/src/auto_archiver/modules/meta_enricher/meta_enricher.py
index fa86818..03fb01e 100644
--- a/src/auto_archiver/modules/meta_enricher/meta_enricher.py
+++ b/src/auto_archiver/modules/meta_enricher/meta_enricher.py
@@ -2,7 +2,7 @@ import datetime
 import os
 from loguru import logger
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata
 
 
diff --git a/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py b/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
index 20a278f..c052d0a 100644
--- a/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
+++ b/src/auto_archiver/modules/metadata_enricher/metadata_enricher.py
@@ -2,7 +2,7 @@ import subprocess
 import traceback
 from loguru import logger
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata
 
 
diff --git a/src/auto_archiver/modules/mute_formatter/mute_formatter.py b/src/auto_archiver/modules/mute_formatter/mute_formatter.py
index addb454..1c7cca2 100644
--- a/src/auto_archiver/modules/mute_formatter/mute_formatter.py
+++ b/src/auto_archiver/modules/mute_formatter/mute_formatter.py
@@ -2,11 +2,10 @@ from __future__ import annotations
 from dataclasses import dataclass
 
 from auto_archiver.core import Metadata, Media
-from auto_archiver.base_processors import Formatter
+from auto_archiver.core import Formatter
 
 
 @dataclass
 class MuteFormatter(Formatter):
-    name = "mute_formatter"
 
     def format(self, item: Metadata) -> Media: return None
diff --git a/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py b/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
index 65b0e59..e812e8b 100644
--- a/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
+++ b/src/auto_archiver/modules/pdq_hash_enricher/pdq_hash_enricher.py
@@ -16,7 +16,7 @@ import numpy as np
 from PIL import Image, UnidentifiedImageError
 from loguru import logger
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata
 
 
diff --git a/src/auto_archiver/modules/s3_storage/s3.py b/src/auto_archiver/modules/s3_storage/s3.py
index a637259..10d5f61 100644
--- a/src/auto_archiver/modules/s3_storage/s3.py
+++ b/src/auto_archiver/modules/s3_storage/s3.py
@@ -4,14 +4,13 @@ import boto3, os
 
 from auto_archiver.utils.misc import random_str
 from auto_archiver.core import Media
-from auto_archiver.base_processors import Storage
-# TODO
+from auto_archiver.core import Storage
+
 from auto_archiver.modules.hash_enricher import HashEnricher
 from loguru import logger
 
 NO_DUPLICATES_FOLDER = "no-dups/"
 class S3Storage(Storage):
-    name = "s3_storage"
 
     def __init__(self, config: dict) -> None:
         super().__init__(config)
diff --git a/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py b/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
index 0140875..be775ce 100644
--- a/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
+++ b/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
@@ -5,15 +5,11 @@ import base64
 from selenium.common.exceptions import TimeoutException
 
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.utils import Webdriver, UrlUtil, random_str
 from auto_archiver.core import Media, Metadata, ArchivingContext
 
 class ScreenshotEnricher(Enricher):
-    name = "screenshot_enricher"
-
-    def __init__(self, config: dict) -> None:
-        super().__init__(config)
 
     def enrich(self, to_enrich: Metadata) -> None:
         url = to_enrich.get_url()
diff --git a/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py b/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
index d15ee95..52237ee 100644
--- a/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
+++ b/src/auto_archiver/modules/ssl_enricher/ssl_enricher.py
@@ -3,7 +3,7 @@ from slugify import slugify
 from urllib.parse import urlparse
 from loguru import logger
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata, ArchivingContext, Media
 
 
diff --git a/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py b/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
index aa7e46f..d612e24 100644
--- a/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
+++ b/src/auto_archiver/modules/telegram_extractor/telegram_extractor.py
@@ -2,7 +2,7 @@ import requests, re, html
 from bs4 import BeautifulSoup
 from loguru import logger
 
-from auto_archiver.base_processors import Extractor
+from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata, Media
 
 
diff --git a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
index 8b49a10..f378e7e 100644
--- a/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
+++ b/src/auto_archiver/modules/telethon_extractor/telethon_extractor.py
@@ -8,21 +8,15 @@ from loguru import logger
 from tqdm import tqdm
 import re, time, json, os
 
-from auto_archiver.base_processors import Extractor
+from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata, Media, ArchivingContext
 from auto_archiver.utils import random_str
 
 
 class TelethonArchiver(Extractor):
-    name = "telethon_extractor"
     link_pattern = re.compile(r"https:\/\/t\.me(\/c){0,1}\/(.+)\/(\d+)")
     invite_pattern = re.compile(r"t.me(\/joinchat){0,1}\/\+?(.+)")
 
-    def __init__(self, config: dict) -> None:
-        super().__init__(config)
-        self.assert_valid_string("api_id")
-        self.assert_valid_string("api_hash")
-
 
     def setup(self) -> None:
         """
diff --git a/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py b/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
index 4a5a1db..b27243b 100644
--- a/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
@@ -9,7 +9,7 @@ and identify important moments without watching the entire video.
 import ffmpeg, os
 from loguru import logger
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.core import Media, Metadata, ArchivingContext
 from auto_archiver.utils.misc import random_str
 
@@ -42,7 +42,7 @@ class ThumbnailEnricher(Enricher):
                         logger.error(f"error getting duration of video {m.filename}: {e}")
                         return
 
-                num_thumbs = int(min(max(1, duration * self.thumbnails_per_second), self.max_thumbnails))
+                num_thumbs = int(min(max(1, duration * self.thumbnails_per_minute), self.max_thumbnails))
                 timestamps = [duration / (num_thumbs + 1) * i for i in range(1, num_thumbs + 1)]
 
                 thumbnails_media = []
diff --git a/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py b/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
index c90d42c..a7a0aee 100644
--- a/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
+++ b/src/auto_archiver/modules/timestamping_enricher/timestamping_enricher.py
@@ -8,9 +8,9 @@ from certvalidator import CertificateValidator, ValidationContext
 from asn1crypto import pem
 import certifi
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata, ArchivingContext, Media
-from auto_archiver.base_processors import Extractor
+from auto_archiver.core import Extractor
 
 
 class TimestampingEnricher(Enricher):
@@ -21,10 +21,6 @@ class TimestampingEnricher(Enricher):
 
     See https://gist.github.com/Manouchehri/fd754e402d98430243455713efada710 for list of timestamp authorities.
     """
-    name = "timestamping_enricher"
-
-    def __init__(self, config: dict) -> None:
-        super().__init__(config)
 
     def enrich(self, to_enrich: Metadata) -> None:
         url = to_enrich.get_url()
diff --git a/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py b/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
index ea669b4..6a4930a 100644
--- a/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
+++ b/src/auto_archiver/modules/twitter_api_extractor/twitter_api_extractor.py
@@ -8,11 +8,10 @@ from loguru import logger
 from pytwitter import Api
 from slugify import slugify
 
-from auto_archiver.base_processors import Extractor
+from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata,Media
 
 class TwitterApiExtractor(Extractor):
-    name = "twitter_api_extractor"
     link_pattern = re.compile(r"(?:twitter|x).com\/(?:\#!\/)?(\w+)\/status(?:es)?\/(\d+)")
 
     def __init__(self, config: dict) -> None:
diff --git a/src/auto_archiver/modules/vk_extractor/vk_extractor.py b/src/auto_archiver/modules/vk_extractor/vk_extractor.py
index eb4c171..1bce167 100644
--- a/src/auto_archiver/modules/vk_extractor/vk_extractor.py
+++ b/src/auto_archiver/modules/vk_extractor/vk_extractor.py
@@ -2,7 +2,7 @@ from loguru import logger
 from vk_url_scraper import VkScraper
 
 from auto_archiver.utils.misc import dump_payload
-from auto_archiver.base_processors import Extractor
+from auto_archiver.core import Extractor
 from auto_archiver.core import Metadata, Media, ArchivingContext
 
 
@@ -11,7 +11,6 @@ class VkExtractor(Extractor):
     VK videos are handled by YTDownloader, this archiver gets posts text and images.
     Currently only works for /wall posts
     """
-    name = "vk_extractor"
 
     def __init__(self, config: dict) -> None:
         super().__init__(config)
diff --git a/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py b/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
index 9ba43ae..1eb7398 100644
--- a/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
+++ b/src/auto_archiver/modules/wacz_enricher/wacz_enricher.py
@@ -6,7 +6,7 @@ from loguru import logger
 from warcio.archiveiterator import ArchiveIterator
 
 from auto_archiver.core import Media, Metadata, ArchivingContext
-from auto_archiver.base_processors import Extractor, Enricher
+from auto_archiver.core import Extractor, Enricher
 from auto_archiver.utils import UrlUtil, random_str
 
 
@@ -17,11 +17,6 @@ class WaczExtractorEnricher(Enricher, Extractor):
     it can become quite powerful for archiving private content.
     When used as an archiver it will extract the media from the .WACZ archive so it can be enriched.
     """
-    name = "wacz_archiver_enricher"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
 
     def setup(self) -> None:
         self.use_docker = os.environ.get('WACZ_ENABLE_DOCKER') or not os.environ.get('RUNNING_IN_DOCKER')
diff --git a/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py b/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py
index 6942727..0e25440 100644
--- a/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py
+++ b/src/auto_archiver/modules/wayback_enricher/wayback_enricher.py
@@ -2,7 +2,7 @@ import json
 from loguru import logger
 import time, requests
 
-from auto_archiver.base_processors import Extractor, Enricher
+from auto_archiver.core import Extractor, Enricher
 from auto_archiver.utils import UrlUtil
 from auto_archiver.core import Metadata
 
@@ -12,13 +12,6 @@ class WaybackExtractorEnricher(Enricher, Extractor):
 
     The Wayback machine will rate-limit IP heavy usage. 
     """
-    name = "wayback_archiver_enricher"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
-        assert type(self.secret) == str and len(self.secret) > 0, "please provide a value for the wayback_enricher API key"
-        assert type(self.secret) == str and len(self.secret) > 0, "please provide a value for the wayback_enricher API secret"
 
     def download(self, item: Metadata) -> Metadata:
         # this new Metadata object is required to avoid duplication
diff --git a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
index d14c537..09eb3db 100644
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@@ -2,7 +2,7 @@ import traceback
 import requests, time
 from loguru import logger
 
-from auto_archiver.base_processors import Enricher
+from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata, Media, ArchivingContext
 from auto_archiver.modules.s3_storage import S3Storage
 
@@ -13,14 +13,6 @@ class WhisperEnricher(Enricher):
     whisper API repository: https://github.com/bellingcat/whisperbox-transcribe/
     Only works if an S3 compatible storage is used
     """
-    name = "whisper_enricher"
-
-    def __init__(self, config: dict) -> None:
-        # without this STEP.__init__ is not called
-        super().__init__(config)
-        assert type(self.api_endpoint) == str and len(self.api_endpoint) > 0, "please provide a value for the whisper_enricher api_endpoint"
-        assert type(self.api_key) == str and len(self.api_key) > 0, "please provide a value for the whisper_enricher api_key"
-        self.timeout = int(self.timeout)
 
     def enrich(self, to_enrich: Metadata) -> None:
         if not self._get_s3_storage():
diff --git a/src/auto_archiver/utils/gsheet.py b/src/auto_archiver/utils/gsheet.py
index 485344f..7a8862f 100644
--- a/src/auto_archiver/utils/gsheet.py
+++ b/src/auto_archiver/utils/gsheet.py
@@ -1,9 +1,9 @@
 import json, gspread
 
-from ..core import Step
+from ..core import BaseModule
 
 
-class Gsheets(Step):
+class Gsheets(BaseModule):
     name = "gsheets"
 
     def __init__(self, config: dict) -> None: