Merge pull request #210 from bellingcat/logger_fix

Fix issue #200 + Refactor _LAZY_LOADED_MODULES
Merge pull request #194 from bellingcat/tests/add_module_tests
2026-06-10 20:28:28 +03:00 · 2025-02-19 15:11:42 +00:00 · 2025-02-19 13:51:43 +00:00 · 2025-02-19 13:36:50 +00:00 · 2025-02-19 13:14:08 +00:00 · 2025-02-19 12:25:35 +00:00
62 changed files with 2724 additions and 775 deletions
--- a/docs/scripts/scripts.py
+++ b/docs/scripts/scripts.py
@@ -1,6 +1,6 @@
 # iterate through all the modules in auto_archiver.modules and turn the __manifest__.py file into a markdown table
 from pathlib import Path
-from auto_archiver.core.module import available_modules
+from auto_archiver.core.module import ModuleFactory
 from auto_archiver.core.base_module import BaseModule
 from ruamel.yaml import YAML
 import io
@@ -19,6 +19,19 @@ type_color = {

 TABLE_HEADER = ("Option", "Description", "Default", "Type")

+EXAMPLE_YAML = """
+# steps configuration
+steps:
+...
+{steps_str}
+...
+
+# module configuration
+...
+
+{config_string}
+"""
+
 def generate_module_docs():
    yaml = YAML()
    SAVE_FOLDER.mkdir(exist_ok=True)
@@ -28,7 +41,7 @@ def generate_module_docs():
    configs_cheatsheet = "\n## Configuration Options\n"
    configs_cheatsheet += header_row

-    for module in sorted(available_modules(with_manifest=True), key=lambda x: (x.requires_setup, x.name)):
+    for module in sorted(ModuleFactory().available_modules(), key=lambda x: (x.requires_setup, x.name)):
        # generate the markdown file from the __manifest__.py file.

        manifest = module.manifest
@@ -45,11 +58,14 @@ def generate_module_docs():
 ```
 {description}
 """     
+        steps_str = "\n".join(f"  {t}s:\n  - {module.name}" for t in manifest['type'])
+
        if not manifest['configs']:
-            readme_str += "\n*This module has no configuration options.*\n"
+            config_string = f"# No configuration options for {module.name}.*\n"
        else:
-            config_yaml = {}
+
            config_table = header_row
+            config_yaml = {}
            for key, value in manifest['configs'].items():
                type = value.get('type', 'string')
                if type == 'auto_archiver.utils.json_loader':
@@ -65,11 +81,14 @@ def generate_module_docs():
                configs_cheatsheet += f"| `{module.name}.{key}` | {help} | {default} | {type} |\n"
            readme_str += "\n## Configuration Options\n"
            readme_str += "\n### YAML\n"
-            yaml_string = io.BytesIO()
-            yaml.dump({module.name: config_yaml}, yaml_string)
-            
-            readme_str += f"```{{code}} yaml\n{yaml_string.getvalue().decode('utf-8')}\n```\n"

+            config_string = io.BytesIO()
+            yaml.dump({module.name: config_yaml}, config_string)
+            config_string = config_string.getvalue().decode('utf-8')
+        yaml_string = EXAMPLE_YAML.format(steps_str=steps_str, config_string=config_string)
+        readme_str += f"```{{code}} yaml\n{yaml_string}\n```\n"
+
+        if manifest['configs']:
            readme_str += "\n### Command Line:\n"
            readme_str += config_table

@@ -103,3 +122,7 @@ def generate_index(modules_by_type):
    with open(SAVE_FOLDER / "module_list.md", "w") as f:
        print("writing", SAVE_FOLDER / "module_list.md")
        f.write(readme_str)
+
+
+if __name__ == "__main__":
+    generate_module_docs()
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -77,3 +77,6 @@ html_theme = 'sphinx_book_theme'
 html_static_path = ["../_static"]
 html_css_files = ["custom.css"]

+copybutton_prompt_text = r">>> |\.\.\."
+copybutton_prompt_is_regexp = True
+copybutton_only_copy_prompt_lines = False
--- a/docs/source/core_modules.md
+++ b/docs/source/core_modules.md
@@ -24,4 +24,5 @@ modules/extractor
 modules/enricher
 modules/storage
 modules/database
+modules/formatter
 ```
--- a/docs/source/how_to.md
+++ b/docs/source/how_to.md
@@ -45,3 +45,10 @@ The "archive location" link contains the path of the archived file, in local sto
 ![The archive result for a link in the demo sheet.](../demo-archive.png)

 ---
+
+```{toctree}
+:maxdepth: 1
+:glob:
+
+how_to/*
+```
--- a/docs/source/how_to/authentication.md
+++ b/docs/source/how_to/authentication.md
@@ -0,0 +1,57 @@
+# Authentication
+
+The Authentication framework for auto-archiver allows you to add login details for various websites in a flexible way, directly from the configuration file.
+
+There are two main use cases for authentication:
+* Some websites require some kind of authentication in order to view the content. Examples include Facebook, Telegram etc.
+* Some websites use anti-bot systems to block bot-like tools from accessig the website. Adding real login information to auto-archiver can sometimes bypass this.
+
+## The Authentication Config
+
+You can save your authentication information directly inside your orchestration config file, or as a separate file (for security/multi-deploy purposes). Whether storing your settings inside the orchestration file, or as a separate file, the configuration format is the same.
+
+```{code} yaml
+authentication:
+   # optional file to load authentication information from, for security or multi-system deploy purposes
+   load_from_file: path/to/authentication/file.txt
+   # optional setting to load cookies from the named browser on the system.
+   cookies_from_browser: firefox
+   # optional setting to load cookies from a cookies.txt/cookies.jar file. See note below on extracting these
+   cookies_file: path/to/cookies.jar
+
+   twitter.com,x.com:
+      username: myusername
+      password: 123
+    
+    facebook.com:
+       cookie: single_cookie
+
+    othersite.com:
+       api_key: 123
+       api_secret: 1234
+
+# All available options:
+  # - username: str - the username to use for login
+  # - password: str - the password to use for login
+  # - api_key: str - the API key to use for login
+  # - api_secret: str - the API secret to use for login
+  # - cookie: str - a cookie string to use for login (specific to this site)
+```
+
+### Recommendations for authentication
+
+1. **Store authentication information separately:**
+The authentication part of your configuration contains sensitive information. You should make efforts not to share this with others. For extra security, use the `load_from_file` option to keep your authentication settings out of your configuration file, ideally in a different folder.
+
+2. **Don't use your own personal credentials**
+Depending on the website you are extracting information from, there may be rules (Terms of Service) that prohibit you from scraping or extracting information using a bot. If you use your own personal account, there's a possibility it might get blocked/disabled. It's recommended to set up a separate, 'throwaway' account. In that way, if it gets blocked you can easily create another one to continue your archiving.
+
+
+### How to create a cookies.jar or pass cookies directly to auto-archiver
+
+auto-archiver uses yt-dlp's powerful cookies features under the hood. For instructions on how to extract a cookies.jar (or cookies.txt) file directly from your browser, see the FAQ in the [yt-dlp documentation](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp)
+
+```{note} For developers:
+
+For information on how to access and use authentication settings from within your module, see the `{generic_extractor}` for an example, or view the [`auth_for_site()` function in BaseModule](../autoapi/core/base_module/index.rst)
+```
--- a/poetry.lock
+++ b/poetry.lock
@@ -103,14 +103,14 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]

 [[package]]
 name = "authlib"
-version = "1.4.0"
+version = "1.4.1"
 description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "Authlib-1.4.0-py2.py3-none-any.whl", hash = "sha256:4bb20b978c8b636222b549317c1815e1fe62234fc1c5efe8855d84aebf3a74e3"},
-    {file = "authlib-1.4.0.tar.gz", hash = "sha256:1c1e6608b5ed3624aeeee136ca7f8c120d6f51f731aa152b153d54741840e1f2"},
+    {file = "Authlib-1.4.1-py2.py3-none-any.whl", hash = "sha256:edc29c3f6a3e72cd9e9f45fff67fc663a2c364022eb0371c003f22d5405915c1"},
+    {file = "authlib-1.4.1.tar.gz", hash = "sha256:30ead9ea4993cdbab821dc6e01e818362f92da290c04c7f6a1940f86507a790d"},
 ]

 [package.dependencies]
@@ -134,33 +134,34 @@ tomli = {version = "*", markers = "python_version < \"3.11\""}

 [[package]]
 name = "babel"
-version = "2.16.0"
+version = "2.17.0"
 description = "Internationalization utilities"
 optional = false
 python-versions = ">=3.8"
 groups = ["docs"]
 files = [
-    {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"},
-    {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"},
+    {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"},
+    {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"},
 ]

 [package.extras]
-dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
+dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"]

 [[package]]
 name = "beautifulsoup4"
-version = "4.12.3"
+version = "4.13.3"
 description = "Screen-scraping library"
 optional = false
-python-versions = ">=3.6.0"
+python-versions = ">=3.7.0"
 groups = ["main", "docs"]
 files = [
-    {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
-    {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
+    {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
+    {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
 ]

 [package.dependencies]
 soupsieve = ">1.2"
+typing-extensions = ">=4.0.0"

 [package.extras]
 cchardet = ["cchardet"]
@@ -171,18 +172,18 @@ lxml = ["lxml"]

 [[package]]
 name = "boto3"
-version = "1.36.6"
+version = "1.36.22"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "boto3-1.36.6-py3-none-any.whl", hash = "sha256:6d473f0f340d02b4e9ad5b8e68786a09728101a8b950231b89ebdaf72b6dca21"},
-    {file = "boto3-1.36.6.tar.gz", hash = "sha256:b36feae061dc0793cf311468956a0a9e99215ce38bc99a1a4e55a5b105f16297"},
+    {file = "boto3-1.36.22-py3-none-any.whl", hash = "sha256:39957eabdce009353d72d131046489fbbfa15891865d5f069f1e8bfa414e6b81"},
+    {file = "boto3-1.36.22.tar.gz", hash = "sha256:768c8a4d4a6227fe2258105efa086f1424cba5ca915a5eb2305b2cd979306ad1"},
 ]

 [package.dependencies]
-botocore = ">=1.36.6,<1.37.0"
+botocore = ">=1.36.22,<1.37.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.11.0,<0.12.0"

@@ -191,14 +192,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]

 [[package]]
 name = "botocore"
-version = "1.36.6"
+version = "1.36.22"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "botocore-1.36.6-py3-none-any.whl", hash = "sha256:f77bbbb03fb420e260174650fb5c0cc142ec20a96967734eed2b0ef24334ef34"},
-    {file = "botocore-1.36.6.tar.gz", hash = "sha256:4864c53d638da191a34daf3ede3ff1371a3719d952cc0c6bd24ce2836a38dd77"},
+    {file = "botocore-1.36.22-py3-none-any.whl", hash = "sha256:75d6b34acb0686ee4d54ff6eb285e78ccfe318407428769d1e3e13351714d890"},
+    {file = "botocore-1.36.22.tar.gz", hash = "sha256:59520247d5a479731724f97c995d5a1c2aae3b303b324f39d99efcfad1d3019e"},
 ]

 [package.dependencies]
@@ -207,7 +208,7 @@ python-dateutil = ">=2.1,<3.0.0"
 urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}

 [package.extras]
-crt = ["awscrt (==0.23.4)"]
+crt = ["awscrt (==0.23.8)"]

 [[package]]
 name = "brotli"
@@ -674,26 +675,26 @@ typing-inspect = ">=0.4.0,<1"

 [[package]]
 name = "dateparser"
-version = "1.2.0"
+version = "1.2.1"
 description = "Date parsing library designed to parse dates from HTML pages"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "dateparser-1.2.0-py2.py3-none-any.whl", hash = "sha256:0b21ad96534e562920a0083e97fd45fa959882d4162acc358705144520a35830"},
-    {file = "dateparser-1.2.0.tar.gz", hash = "sha256:7975b43a4222283e0ae15be7b4999d08c9a70e2d378ac87385b1ccf2cffbbb30"},
+    {file = "dateparser-1.2.1-py3-none-any.whl", hash = "sha256:bdcac262a467e6260030040748ad7c10d6bacd4f3b9cdb4cfd2251939174508c"},
+    {file = "dateparser-1.2.1.tar.gz", hash = "sha256:7e4919aeb48481dbfc01ac9683c8e20bfe95bb715a38c1e9f6af889f4f30ccc3"},
 ]

 [package.dependencies]
-python-dateutil = "*"
-pytz = "*"
-regex = "<2019.02.19 || >2019.02.19,<2021.8.27 || >2021.8.27"
-tzlocal = "*"
+python-dateutil = ">=2.7.0"
+pytz = ">=2024.2"
+regex = ">=2015.06.24,<2019.02.19 || >2019.02.19,<2021.8.27 || >2021.8.27"
+tzlocal = ">=0.2"

 [package.extras]
-calendars = ["convertdate", "hijri-converter"]
-fasttext = ["fasttext"]
-langdetect = ["langdetect"]
+calendars = ["convertdate (>=2.2.1)", "hijridate"]
+fasttext = ["fasttext (>=0.9.1)", "numpy (>=1.19.3,<2)"]
+langdetect = ["langdetect (>=1.0.0)"]

 [[package]]
 name = "docutils"
@@ -755,14 +756,14 @@ files = [

 [[package]]
 name = "google-api-core"
-version = "2.24.0"
+version = "2.24.1"
 description = "Google API client core library"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "google_api_core-2.24.0-py3-none-any.whl", hash = "sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9"},
-    {file = "google_api_core-2.24.0.tar.gz", hash = "sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf"},
+    {file = "google_api_core-2.24.1-py3-none-any.whl", hash = "sha256:bc78d608f5a5bf853b80bd70a795f703294de656c096c0968320830a4bc280f1"},
+    {file = "google_api_core-2.24.1.tar.gz", hash = "sha256:f8b36f5456ab0dd99a1b693a40a31d1e7757beea380ad1b38faaf8941eae9d8a"},
 ]

 [package.dependencies]
@@ -780,14 +781,14 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]

 [[package]]
 name = "google-api-python-client"
-version = "2.159.0"
+version = "2.161.0"
 description = "Google API Client Library for Python"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "google_api_python_client-2.159.0-py2.py3-none-any.whl", hash = "sha256:baef0bb631a60a0bd7c0bf12a5499e3a40cd4388484de7ee55c1950bf820a0cf"},
-    {file = "google_api_python_client-2.159.0.tar.gz", hash = "sha256:55197f430f25c907394b44fa078545ffef89d33fd4dca501b7db9f0d8e224bd6"},
+    {file = "google_api_python_client-2.161.0-py2.py3-none-any.whl", hash = "sha256:9476a5a4f200bae368140453df40f9cda36be53fa7d0e9a9aac4cdb859a26448"},
+    {file = "google_api_python_client-2.161.0.tar.gz", hash = "sha256:324c0cce73e9ea0a0d2afd5937e01b7c2d6a4d7e2579cdb6c384f9699d6c9f37"},
 ]

 [package.dependencies]
@@ -859,14 +860,14 @@ tool = ["click (>=6.0.0)"]

 [[package]]
 name = "googleapis-common-protos"
-version = "1.66.0"
+version = "1.67.0"
 description = "Common protobufs used in Google APIs"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"},
-    {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"},
+    {file = "googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741"},
+    {file = "googleapis_common_protos-1.67.0.tar.gz", hash = "sha256:21398025365f138be356d5923e9168737d94d46a72aefee4a6110a1f23463c86"},
 ]

 [package.dependencies]
@@ -1158,14 +1159,14 @@ files = [

 [[package]]
 name = "marshmallow"
-version = "3.26.0"
+version = "3.26.1"
 description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "marshmallow-3.26.0-py3-none-any.whl", hash = "sha256:1287bca04e6a5f4094822ac153c03da5e214a0a60bcd557b140f3e66991b8ca1"},
-    {file = "marshmallow-3.26.0.tar.gz", hash = "sha256:eb36762a1cc76d7abf831e18a3a1b26d3d481bbc74581b8e532a3d3a8115e1cb"},
+    {file = "marshmallow-3.26.1-py3-none-any.whl", hash = "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c"},
+    {file = "marshmallow-3.26.1.tar.gz", hash = "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6"},
 ]

 [package.dependencies]
@@ -1234,14 +1235,14 @@ files = [

 [[package]]
 name = "myst-parser"
-version = "4.0.0"
+version = "4.0.1"
 description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser,"
 optional = false
 python-versions = ">=3.10"
 groups = ["docs"]
 files = [
-    {file = "myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d"},
-    {file = "myst_parser-4.0.0.tar.gz", hash = "sha256:851c9dfb44e36e56d15d05e72f02b80da21a9e0d07cba96baf5e2d476bb91531"},
+    {file = "myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d"},
+    {file = "myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4"},
 ]

 [package.dependencies]
@@ -1253,10 +1254,10 @@ pyyaml = "*"
 sphinx = ">=7,<9"

 [package.extras]
-code-style = ["pre-commit (>=3.0,<4.0)"]
+code-style = ["pre-commit (>=4.0,<5.0)"]
 linkify = ["linkify-it-py (>=2.0,<3.0)"]
 rtd = ["ipython", "sphinx (>=7)", "sphinx-autodoc2 (>=0.5.0,<0.6.0)", "sphinx-book-theme (>=1.1,<2.0)", "sphinx-copybutton", "sphinx-design", "sphinx-pyscript", "sphinx-tippy (>=0.4.3)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.9.0,<0.10.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"]
-testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
+testing = ["beautifulsoup4", "coverage[toml]", "defusedxml", "pygments (<2.19)", "pytest (>=8,<9)", "pytest-cov", "pytest-param-files (>=0.6.0,<0.7.0)", "pytest-regressions", "sphinx-pytest"]
 testing-docutils = ["pygments", "pytest (>=8,<9)", "pytest-param-files (>=0.6.0,<0.7.0)"]

 [[package]]
@@ -1530,14 +1531,14 @@ testing = ["pytest", "pytest-benchmark"]

 [[package]]
 name = "proto-plus"
-version = "1.25.0"
-description = "Beautiful, Pythonic protocol buffers."
+version = "1.26.0"
+description = "Beautiful, Pythonic protocol buffers"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "proto_plus-1.25.0-py3-none-any.whl", hash = "sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961"},
-    {file = "proto_plus-1.25.0.tar.gz", hash = "sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91"},
+    {file = "proto_plus-1.26.0-py3-none-any.whl", hash = "sha256:bf2dfaa3da281fc3187d12d224c707cb57214fb2c22ba854eb0c105a3fb2d4d7"},
+    {file = "proto_plus-1.26.0.tar.gz", hash = "sha256:6e93d5f5ca267b54300880fff156b6a3386b3fa3f43b1da62e680fc0c586ef22"},
 ]

 [package.dependencies]
@@ -1814,6 +1815,24 @@ loguru = "*"
 [package.extras]
 test = ["pytest", "pytest-cov"]

+[[package]]
+name = "pytest-mock"
+version = "3.14.0"
+description = "Thin-wrapper around the mock package for easier use with pytest"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"},
+    {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"},
+]
+
+[package.dependencies]
+pytest = ">=6.2.5"
+
+[package.extras]
+dev = ["pre-commit", "pytest-asyncio", "tox"]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -1866,14 +1885,14 @@ requests = ">=2.28"

 [[package]]
 name = "pytz"
-version = "2024.2"
+version = "2025.1"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
-    {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"},
-    {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
+    {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"},
+    {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
 ]

 [[package]]
@@ -2122,14 +2141,14 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"]

 [[package]]
 name = "rich-argparse"
-version = "1.6.0"
+version = "1.7.0"
 description = "Rich help formatters for argparse and optparse"
 optional = false
 python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "rich_argparse-1.6.0-py3-none-any.whl", hash = "sha256:fbe70a1d821b3f2fa8958cddf0cae131870a6e9faa04ab52b409cb1eda809bd7"},
-    {file = "rich_argparse-1.6.0.tar.gz", hash = "sha256:092083c30da186f25bcdff8b1d47fdfb571288510fb051e0488a72cc3128de13"},
+    {file = "rich_argparse-1.7.0-py3-none-any.whl", hash = "sha256:b8ec8943588e9731967f4f97b735b03dc127c416f480a083060433a97baf2fd3"},
+    {file = "rich_argparse-1.7.0.tar.gz", hash = "sha256:f31d809c465ee43f367d599ccaf88b73bc2c4d75d74ed43f2d538838c53544ba"},
 ]

 [package.dependencies]
@@ -2362,24 +2381,24 @@ test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=8.0)", "setuptools

 [[package]]
 name = "sphinx-autoapi"
-version = "3.4.0"
+version = "3.6.0"
 description = "Sphinx API documentation generator"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 groups = ["docs"]
 files = [
-    {file = "sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92"},
-    {file = "sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c"},
+    {file = "sphinx_autoapi-3.6.0-py3-none-any.whl", hash = "sha256:f3b66714493cab140b0e896d33ce7137654a16ac1edb6563edcbd47bf975f711"},
+    {file = "sphinx_autoapi-3.6.0.tar.gz", hash = "sha256:c685f274e41d0842ae7e199460c322c4bd7fec816ccc2da8d806094b4f64af06"},
 ]

 [package.dependencies]
 astroid = [
    {version = ">=2.7", markers = "python_version < \"3.12\""},
-    {version = ">=3.0.0a1", markers = "python_version >= \"3.12\""},
+    {version = ">=3", markers = "python_version >= \"3.12\""},
 ]
 Jinja2 = "*"
 PyYAML = "*"
-sphinx = ">=6.1.0"
+sphinx = ">=7.4.0"

 [[package]]
 name = "sphinx-autobuild"
@@ -2679,14 +2698,14 @@ telegram = ["requests"]

 [[package]]
 name = "trio"
-version = "0.28.0"
+version = "0.29.0"
 description = "A friendly Python library for async concurrency and I/O"
 optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "trio-0.28.0-py3-none-any.whl", hash = "sha256:56d58977acc1635735a96581ec70513cc781b8b6decd299c487d3be2a721cd94"},
-    {file = "trio-0.28.0.tar.gz", hash = "sha256:4e547896fe9e8a5658e54e4c7c5fa1db748cbbbaa7c965e7d40505b928c73c05"},
+    {file = "trio-0.29.0-py3-none-any.whl", hash = "sha256:d8c463f1a9cc776ff63e331aba44c125f423a5a13c684307e828d930e625ba66"},
+    {file = "trio-0.29.0.tar.gz", hash = "sha256:ea0d3967159fc130acb6939a0be0e558e364fee26b5deeecc893a6b08c361bdf"},
 ]

 [package.dependencies]
@@ -2700,18 +2719,19 @@ sortedcontainers = "*"

 [[package]]
 name = "trio-websocket"
-version = "0.11.1"
+version = "0.12.1"
 description = "WebSocket library for Trio"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "trio-websocket-0.11.1.tar.gz", hash = "sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f"},
-    {file = "trio_websocket-0.11.1-py3-none-any.whl", hash = "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638"},
+    {file = "trio_websocket-0.12.1-py3-none-any.whl", hash = "sha256:608ec746bb287e5d5a66baf483e41194193c5cf05ffaad6240e7d1fcd80d1e6f"},
+    {file = "trio_websocket-0.12.1.tar.gz", hash = "sha256:d55ccd4d3eae27c494f3fdae14823317839bdcb8214d1173eacc4d42c69fc91b"},
 ]

 [package.dependencies]
 exceptiongroup = {version = "*", markers = "python_version < \"3.11\""}
+outcome = ">=1.2.0"
 trio = ">=0.11"
 wsproto = ">=0.14"

@@ -2778,14 +2798,14 @@ files = [

 [[package]]
 name = "tzlocal"
-version = "5.2"
+version = "5.3"
 description = "tzinfo object for the local timezone"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"},
-    {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"},
+    {file = "tzlocal-5.3-py3-none-any.whl", hash = "sha256:3814135a1bb29763c6e4f08fd6e41dbb435c7a60bfbb03270211bcc537187d8c"},
+    {file = "tzlocal-5.3.tar.gz", hash = "sha256:2fafbfc07e9d8b49ade18f898d6bcd37ae88ce3ad6486842a2e4f03af68323d2"},
 ]

 [package.dependencies]
@@ -3031,81 +3051,81 @@ test = ["websockets"]

 [[package]]
 name = "websockets"
-version = "14.2"
+version = "15.0"
 description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
 optional = false
 python-versions = ">=3.9"
 groups = ["main", "docs"]
 files = [
-    {file = "websockets-14.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e8179f95323b9ab1c11723e5d91a89403903f7b001828161b480a7810b334885"},
-    {file = "websockets-14.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d8c3e2cdb38f31d8bd7d9d28908005f6fa9def3324edb9bf336d7e4266fd397"},
-    {file = "websockets-14.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:714a9b682deb4339d39ffa674f7b674230227d981a37d5d174a4a83e3978a610"},
-    {file = "websockets-14.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e53c72052f2596fb792a7acd9704cbc549bf70fcde8a99e899311455974ca3"},
-    {file = "websockets-14.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fbd68850c837e57373d95c8fe352203a512b6e49eaae4c2f4088ef8cf21980"},
-    {file = "websockets-14.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b27ece32f63150c268593d5fdb82819584831a83a3f5809b7521df0685cd5d8"},
-    {file = "websockets-14.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4daa0faea5424d8713142b33825fff03c736f781690d90652d2c8b053345b0e7"},
-    {file = "websockets-14.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bc63cee8596a6ec84d9753fd0fcfa0452ee12f317afe4beae6b157f0070c6c7f"},
-    {file = "websockets-14.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a570862c325af2111343cc9b0257b7119b904823c675b22d4ac547163088d0d"},
-    {file = "websockets-14.2-cp310-cp310-win32.whl", hash = "sha256:75862126b3d2d505e895893e3deac0a9339ce750bd27b4ba515f008b5acf832d"},
-    {file = "websockets-14.2-cp310-cp310-win_amd64.whl", hash = "sha256:cc45afb9c9b2dc0852d5c8b5321759cf825f82a31bfaf506b65bf4668c96f8b2"},
-    {file = "websockets-14.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3bdc8c692c866ce5fefcaf07d2b55c91d6922ac397e031ef9b774e5b9ea42166"},
-    {file = "websockets-14.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c93215fac5dadc63e51bcc6dceca72e72267c11def401d6668622b47675b097f"},
-    {file = "websockets-14.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c9b6535c0e2cf8a6bf938064fb754aaceb1e6a4a51a80d884cd5db569886910"},
-    {file = "websockets-14.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a52a6d7cf6938e04e9dceb949d35fbdf58ac14deea26e685ab6368e73744e4c"},
-    {file = "websockets-14.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f05702e93203a6ff5226e21d9b40c037761b2cfb637187c9802c10f58e40473"},
-    {file = "websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22441c81a6748a53bfcb98951d58d1af0661ab47a536af08920d129b4d1c3473"},
-    {file = "websockets-14.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd9b868d78b194790e6236d9cbc46d68aba4b75b22497eb4ab64fa640c3af56"},
-    {file = "websockets-14.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1a5a20d5843886d34ff8c57424cc65a1deda4375729cbca4cb6b3353f3ce4142"},
-    {file = "websockets-14.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34277a29f5303d54ec6468fb525d99c99938607bc96b8d72d675dee2b9f5bf1d"},
-    {file = "websockets-14.2-cp311-cp311-win32.whl", hash = "sha256:02687db35dbc7d25fd541a602b5f8e451a238ffa033030b172ff86a93cb5dc2a"},
-    {file = "websockets-14.2-cp311-cp311-win_amd64.whl", hash = "sha256:862e9967b46c07d4dcd2532e9e8e3c2825e004ffbf91a5ef9dde519ee2effb0b"},
-    {file = "websockets-14.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f20522e624d7ffbdbe259c6b6a65d73c895045f76a93719aa10cd93b3de100c"},
-    {file = "websockets-14.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:647b573f7d3ada919fd60e64d533409a79dcf1ea21daeb4542d1d996519ca967"},
-    {file = "websockets-14.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6af99a38e49f66be5a64b1e890208ad026cda49355661549c507152113049990"},
-    {file = "websockets-14.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:091ab63dfc8cea748cc22c1db2814eadb77ccbf82829bac6b2fbe3401d548eda"},
-    {file = "websockets-14.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b374e8953ad477d17e4851cdc66d83fdc2db88d9e73abf755c94510ebddceb95"},
-    {file = "websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a39d7eceeea35db85b85e1169011bb4321c32e673920ae9c1b6e0978590012a3"},
-    {file = "websockets-14.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0a6f3efd47ffd0d12080594f434faf1cd2549b31e54870b8470b28cc1d3817d9"},
-    {file = "websockets-14.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:065ce275e7c4ffb42cb738dd6b20726ac26ac9ad0a2a48e33ca632351a737267"},
-    {file = "websockets-14.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e9d0e53530ba7b8b5e389c02282f9d2aa47581514bd6049d3a7cffe1385cf5fe"},
-    {file = "websockets-14.2-cp312-cp312-win32.whl", hash = "sha256:20e6dd0984d7ca3037afcb4494e48c74ffb51e8013cac71cf607fffe11df7205"},
-    {file = "websockets-14.2-cp312-cp312-win_amd64.whl", hash = "sha256:44bba1a956c2c9d268bdcdf234d5e5ff4c9b6dc3e300545cbe99af59dda9dcce"},
-    {file = "websockets-14.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f1372e511c7409a542291bce92d6c83320e02c9cf392223272287ce55bc224e"},
-    {file = "websockets-14.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4da98b72009836179bb596a92297b1a61bb5a830c0e483a7d0766d45070a08ad"},
-    {file = "websockets-14.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8a86a269759026d2bde227652b87be79f8a734e582debf64c9d302faa1e9f03"},
-    {file = "websockets-14.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86cf1aaeca909bf6815ea714d5c5736c8d6dd3a13770e885aafe062ecbd04f1f"},
-    {file = "websockets-14.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b0f6c3ba3b1240f602ebb3971d45b02cc12bd1845466dd783496b3b05783a5"},
-    {file = "websockets-14.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c3e101c246aa85bc8534e495952e2ca208bd87994650b90a23d745902db9a"},
-    {file = "websockets-14.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eabdb28b972f3729348e632ab08f2a7b616c7e53d5414c12108c29972e655b20"},
-    {file = "websockets-14.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2066dc4cbcc19f32c12a5a0e8cc1b7ac734e5b64ac0a325ff8353451c4b15ef2"},
-    {file = "websockets-14.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ab95d357cd471df61873dadf66dd05dd4709cae001dd6342edafc8dc6382f307"},
-    {file = "websockets-14.2-cp313-cp313-win32.whl", hash = "sha256:a9e72fb63e5f3feacdcf5b4ff53199ec8c18d66e325c34ee4c551ca748623bbc"},
-    {file = "websockets-14.2-cp313-cp313-win_amd64.whl", hash = "sha256:b439ea828c4ba99bb3176dc8d9b933392a2413c0f6b149fdcba48393f573377f"},
-    {file = "websockets-14.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7cd5706caec1686c5d233bc76243ff64b1c0dc445339bd538f30547e787c11fe"},
-    {file = "websockets-14.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ec607328ce95a2f12b595f7ae4c5d71bf502212bddcea528290b35c286932b12"},
-    {file = "websockets-14.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da85651270c6bfb630136423037dd4975199e5d4114cae6d3066641adcc9d1c7"},
-    {file = "websockets-14.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ecadc7ce90accf39903815697917643f5b7cfb73c96702318a096c00aa71f5"},
-    {file = "websockets-14.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1979bee04af6a78608024bad6dfcc0cc930ce819f9e10342a29a05b5320355d0"},
-    {file = "websockets-14.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dddacad58e2614a24938a50b85969d56f88e620e3f897b7d80ac0d8a5800258"},
-    {file = "websockets-14.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:89a71173caaf75fa71a09a5f614f450ba3ec84ad9fca47cb2422a860676716f0"},
-    {file = "websockets-14.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6af6a4b26eea4fc06c6818a6b962a952441e0e39548b44773502761ded8cc1d4"},
-    {file = "websockets-14.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:80c8efa38957f20bba0117b48737993643204645e9ec45512579132508477cfc"},
-    {file = "websockets-14.2-cp39-cp39-win32.whl", hash = "sha256:2e20c5f517e2163d76e2729104abc42639c41cf91f7b1839295be43302713661"},
-    {file = "websockets-14.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4c8cef610e8d7c70dea92e62b6814a8cd24fbd01d7103cc89308d2bfe1659ef"},
-    {file = "websockets-14.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d7d9cafbccba46e768be8a8ad4635fa3eae1ffac4c6e7cb4eb276ba41297ed29"},
-    {file = "websockets-14.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c76193c1c044bd1e9b3316dcc34b174bbf9664598791e6fb606d8d29000e070c"},
-    {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd475a974d5352390baf865309fe37dec6831aafc3014ffac1eea99e84e83fc2"},
-    {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c6c0097a41968b2e2b54ed3424739aab0b762ca92af2379f152c1aef0187e1c"},
-    {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7ff794c8b36bc402f2e07c0b2ceb4a2424147ed4785ff03e2a7af03711d60a"},
-    {file = "websockets-14.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dec254fcabc7bd488dab64846f588fc5b6fe0d78f641180030f8ea27b76d72c3"},
-    {file = "websockets-14.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bbe03eb853e17fd5b15448328b4ec7fb2407d45fb0245036d06a3af251f8e48f"},
-    {file = "websockets-14.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3c4aa3428b904d5404a0ed85f3644d37e2cb25996b7f096d77caeb0e96a3b42"},
-    {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:577a4cebf1ceaf0b65ffc42c54856214165fb8ceeba3935852fc33f6b0c55e7f"},
-    {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad1c1d02357b7665e700eca43a31d52814ad9ad9b89b58118bdabc365454b574"},
-    {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f390024a47d904613577df83ba700bd189eedc09c57af0a904e5c39624621270"},
-    {file = "websockets-14.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3c1426c021c38cf92b453cdf371228d3430acd775edee6bac5a4d577efc72365"},
-    {file = "websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b"},
-    {file = "websockets-14.2.tar.gz", hash = "sha256:5059ed9c54945efb321f097084b4c7e52c246f2c869815876a69d1efc4ad6eb5"},
+    {file = "websockets-15.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5e6ee18a53dd5743e6155b8ff7e8e477c25b29b440f87f65be8165275c87fef0"},
+    {file = "websockets-15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ee06405ea2e67366a661ed313e14cf2a86e84142a3462852eb96348f7219cee3"},
+    {file = "websockets-15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8711682a629bbcaf492f5e0af72d378e976ea1d127a2d47584fa1c2c080b436b"},
+    {file = "websockets-15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94c4a9b01eede952442c088d415861b0cf2053cbd696b863f6d5022d4e4e2453"},
+    {file = "websockets-15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45535fead66e873f411c1d3cf0d3e175e66f4dd83c4f59d707d5b3e4c56541c4"},
+    {file = "websockets-15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e389efe46ccb25a1f93d08c7a74e8123a2517f7b7458f043bd7529d1a63ffeb"},
+    {file = "websockets-15.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:67a04754d121ea5ca39ddedc3f77071651fb5b0bc6b973c71c515415b44ed9c5"},
+    {file = "websockets-15.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bd66b4865c8b853b8cca7379afb692fc7f52cf898786537dfb5e5e2d64f0a47f"},
+    {file = "websockets-15.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a4cc73a6ae0a6751b76e69cece9d0311f054da9b22df6a12f2c53111735657c8"},
+    {file = "websockets-15.0-cp310-cp310-win32.whl", hash = "sha256:89da58e4005e153b03fe8b8794330e3f6a9774ee9e1c3bd5bc52eb098c3b0c4f"},
+    {file = "websockets-15.0-cp310-cp310-win_amd64.whl", hash = "sha256:4ff380aabd7a74a42a760ee76c68826a8f417ceb6ea415bd574a035a111fd133"},
+    {file = "websockets-15.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:dd24c4d256558429aeeb8d6c24ebad4e982ac52c50bc3670ae8646c181263965"},
+    {file = "websockets-15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f83eca8cbfd168e424dfa3b3b5c955d6c281e8fc09feb9d870886ff8d03683c7"},
+    {file = "websockets-15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4095a1f2093002c2208becf6f9a178b336b7572512ee0a1179731acb7788e8ad"},
+    {file = "websockets-15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb915101dfbf318486364ce85662bb7b020840f68138014972c08331458d41f3"},
+    {file = "websockets-15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:45d464622314973d78f364689d5dbb9144e559f93dca11b11af3f2480b5034e1"},
+    {file = "websockets-15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace960769d60037ca9625b4c578a6f28a14301bd2a1ff13bb00e824ac9f73e55"},
+    {file = "websockets-15.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c7cd4b1015d2f60dfe539ee6c95bc968d5d5fad92ab01bb5501a77393da4f596"},
+    {file = "websockets-15.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4f7290295794b5dec470867c7baa4a14182b9732603fd0caf2a5bf1dc3ccabf3"},
+    {file = "websockets-15.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3abd670ca7ce230d5a624fd3d55e055215d8d9b723adee0a348352f5d8d12ff4"},
+    {file = "websockets-15.0-cp311-cp311-win32.whl", hash = "sha256:110a847085246ab8d4d119632145224d6b49e406c64f1bbeed45c6f05097b680"},
+    {file = "websockets-15.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d7bbbe2cd6ed80aceef2a14e9f1c1b61683194c216472ed5ff33b700e784e37"},
+    {file = "websockets-15.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cccc18077acd34c8072578394ec79563664b1c205f7a86a62e94fafc7b59001f"},
+    {file = "websockets-15.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4c22992e24f12de340ca5f824121a5b3e1a37ad4360b4e1aaf15e9d1c42582d"},
+    {file = "websockets-15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1206432cc6c644f6fc03374b264c5ff805d980311563202ed7fef91a38906276"},
+    {file = "websockets-15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d3cc75ef3e17490042c47e0523aee1bcc4eacd2482796107fd59dd1100a44bc"},
+    {file = "websockets-15.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b89504227a5311610e4be16071465885a0a3d6b0e82e305ef46d9b064ce5fb72"},
+    {file = "websockets-15.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56e3efe356416bc67a8e093607315951d76910f03d2b3ad49c4ade9207bf710d"},
+    {file = "websockets-15.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f2205cdb444a42a7919690238fb5979a05439b9dbb73dd47c863d39640d85ab"},
+    {file = "websockets-15.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aea01f40995fa0945c020228ab919b8dfc93fc8a9f2d3d705ab5b793f32d9e99"},
+    {file = "websockets-15.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a9f8e33747b1332db11cf7fcf4a9512bef9748cb5eb4d3f7fbc8c30d75dc6ffc"},
+    {file = "websockets-15.0-cp312-cp312-win32.whl", hash = "sha256:32e02a2d83f4954aa8c17e03fe8ec6962432c39aca4be7e8ee346b05a3476904"},
+    {file = "websockets-15.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc02b159b65c05f2ed9ec176b715b66918a674bd4daed48a9a7a590dd4be1aa"},
+    {file = "websockets-15.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d2244d8ab24374bed366f9ff206e2619345f9cd7fe79aad5225f53faac28b6b1"},
+    {file = "websockets-15.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3a302241fbe825a3e4fe07666a2ab513edfdc6d43ce24b79691b45115273b5e7"},
+    {file = "websockets-15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:10552fed076757a70ba2c18edcbc601c7637b30cdfe8c24b65171e824c7d6081"},
+    {file = "websockets-15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c53f97032b87a406044a1c33d1e9290cc38b117a8062e8a8b285175d7e2f99c9"},
+    {file = "websockets-15.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1caf951110ca757b8ad9c4974f5cac7b8413004d2f29707e4d03a65d54cedf2b"},
+    {file = "websockets-15.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bf1ab71f9f23b0a1d52ec1682a3907e0c208c12fef9c3e99d2b80166b17905f"},
+    {file = "websockets-15.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bfcd3acc1a81f106abac6afd42327d2cf1e77ec905ae11dc1d9142a006a496b6"},
+    {file = "websockets-15.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c8c5c8e1bac05ef3c23722e591ef4f688f528235e2480f157a9cfe0a19081375"},
+    {file = "websockets-15.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:86bfb52a9cfbcc09aba2b71388b0a20ea5c52b6517c0b2e316222435a8cdab72"},
+    {file = "websockets-15.0-cp313-cp313-win32.whl", hash = "sha256:26ba70fed190708551c19a360f9d7eca8e8c0f615d19a574292b7229e0ae324c"},
+    {file = "websockets-15.0-cp313-cp313-win_amd64.whl", hash = "sha256:ae721bcc8e69846af00b7a77a220614d9b2ec57d25017a6bbde3a99473e41ce8"},
+    {file = "websockets-15.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c348abc5924caa02a62896300e32ea80a81521f91d6db2e853e6b1994017c9f6"},
+    {file = "websockets-15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5294fcb410ed0a45d5d1cdedc4e51a60aab5b2b3193999028ea94afc2f554b05"},
+    {file = "websockets-15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c24ba103ecf45861e2e1f933d40b2d93f5d52d8228870c3e7bf1299cd1cb8ff1"},
+    {file = "websockets-15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc8821a03bcfb36e4e4705316f6b66af28450357af8a575dc8f4b09bf02a3dee"},
+    {file = "websockets-15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc5ae23ada6515f31604f700009e2df90b091b67d463a8401c1d8a37f76c1d7"},
+    {file = "websockets-15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ac67b542505186b3bbdaffbc303292e1ee9c8729e5d5df243c1f20f4bb9057e"},
+    {file = "websockets-15.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c86dc2068f1c5ca2065aca34f257bbf4f78caf566eb230f692ad347da191f0a1"},
+    {file = "websockets-15.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:30cff3ef329682b6182c01c568f551481774c476722020b8f7d0daacbed07a17"},
+    {file = "websockets-15.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:98dcf978d4c6048965d1762abd534c9d53bae981a035bfe486690ba11f49bbbb"},
+    {file = "websockets-15.0-cp39-cp39-win32.whl", hash = "sha256:37d66646f929ae7c22c79bc73ec4074d6db45e6384500ee3e0d476daf55482a9"},
+    {file = "websockets-15.0-cp39-cp39-win_amd64.whl", hash = "sha256:24d5333a9b2343330f0f4eb88546e2c32a7f5c280f8dd7d3cc079beb0901781b"},
+    {file = "websockets-15.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b499caef4bca9cbd0bd23cd3386f5113ee7378094a3cb613a2fa543260fe9506"},
+    {file = "websockets-15.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:17f2854c6bd9ee008c4b270f7010fe2da6c16eac5724a175e75010aacd905b31"},
+    {file = "websockets-15.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89f72524033abbfde880ad338fd3c2c16e31ae232323ebdfbc745cbb1b3dcc03"},
+    {file = "websockets-15.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1657a9eecb29d7838e3b415458cc494e6d1b194f7ac73a34aa55c6fb6c72d1f3"},
+    {file = "websockets-15.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e413352a921f5ad5d66f9e2869b977e88d5103fc528b6deb8423028a2befd842"},
+    {file = "websockets-15.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8561c48b0090993e3b2a54db480cab1d23eb2c5735067213bb90f402806339f5"},
+    {file = "websockets-15.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:190bc6ef8690cd88232a038d1b15714c258f79653abad62f7048249b09438af3"},
+    {file = "websockets-15.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:327adab7671f3726b0ba69be9e865bba23b37a605b585e65895c428f6e47e766"},
+    {file = "websockets-15.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd8ef197c87afe0a9009f7a28b5dc613bfc585d329f80b7af404e766aa9e8c7"},
+    {file = "websockets-15.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:789c43bf4a10cd067c24c321238e800b8b2716c863ddb2294d2fed886fa5a689"},
+    {file = "websockets-15.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7394c0b7d460569c9285fa089a429f58465db930012566c03046f9e3ab0ed181"},
+    {file = "websockets-15.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2ea4f210422b912ebe58ef0ad33088bc8e5c5ff9655a8822500690abc3b1232d"},
+    {file = "websockets-15.0-py3-none-any.whl", hash = "sha256:51ffd53c53c4442415b613497a34ba0aa7b99ac07f1e4a62db5dcd640ae6c3c3"},
+    {file = "websockets-15.0.tar.gz", hash = "sha256:ca36151289a15b39d8d683fd8b7abbe26fc50be311066c5f8dcf3cb8cee107ab"},
 ]

 [[package]]
@@ -3164,4 +3184,4 @@ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.13"
-content-hash = "b3a6142d6495bc4c8741e9411d29352af219851e4b84b263f991e1bb6db1614e"
+content-hash = "2d0a953383901fe12e97f6f56a76a9d8008788695425792eedbf739a18585188"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

 [project]
 name = "auto-archiver"
-version = "0.13.1"
+version = "0.13.4"
 description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."

 requires-python = ">=3.10,<3.13"
@@ -63,6 +63,7 @@ dependencies = [
 pytest = "^8.3.4"
 autopep8 = "^2.3.1"
 pytest-loguru = "^0.4.0"
+pytest-mock = "^3.14.0"

 [tool.poetry.group.docs.dependencies]
 sphinx = "^8.1.3"
--- a/src/auto_archiver/main.py
+++ b/src/auto_archiver/main.py
@@ -3,7 +3,7 @@ from auto_archiver.core.orchestrator import ArchivingOrchestrator
 import sys

 def main():
-    ArchivingOrchestrator().run(sys.argv[1:])
+    for _ in ArchivingOrchestrator()._command_line_run(sys.argv[1:]): pass

 if __name__ == "__main__":
    main()
--- a/src/auto_archiver/core/init.py
+++ b/src/auto_archiver/core/init.py
@@ -3,7 +3,7 @@
 """
 from .metadata import Metadata
 from .media import Media
-from .module import BaseModule
+from .base_module import BaseModule

 # cannot import ArchivingOrchestrator/Config to avoid circular dep
 # from .orchestrator import ArchivingOrchestrator
--- a/src/auto_archiver/core/base_module.py
+++ b/src/auto_archiver/core/base_module.py
@@ -1,13 +1,18 @@

-from urllib.parse import urlparse
-from typing import  Mapping, Any
+from __future__ import annotations
+
+from typing import  Mapping, Any, Type, TYPE_CHECKING
 from abc import ABC
 from copy import deepcopy, copy
 from tempfile import TemporaryDirectory
 from auto_archiver.utils import url as UrlUtil
+from auto_archiver.core.consts import MODULE_TYPES as CONF_MODULE_TYPES

 from loguru import logger

+if TYPE_CHECKING:
+    from .module import ModuleFactory
+
 class BaseModule(ABC):

    """
@@ -17,41 +22,24 @@ class BaseModule(ABC):
    however modules can have a .setup() method to run any setup code
    (e.g. logging in to a site, spinning up a browser etc.)

-    See BaseModule.MODULE_TYPES for the types of modules you can create, noting that
+    See consts.MODULE_TYPES for the types of modules you can create, noting that
    a subclass can be of multiple types. For example, a module that extracts data from
    a website and stores it in a database would be both an 'extractor' and a 'database' module.

    Each module is a python package, and should have a __manifest__.py file in the
    same directory as the module file. The __manifest__.py specifies the module information
-    like name, author, version, dependencies etc. See BaseModule._DEFAULT_MANIFEST for the
+    like name, author, version, dependencies etc. See DEFAULT_MANIFEST for the
    default manifest structure.

    """

-    MODULE_TYPES = [
-        'feeder',
-        'extractor',
-        'enricher',
-        'database',
-        'storage',
-        'formatter'
-    ]
-
-    _DEFAULT_MANIFEST = {
-    'name': '', # the display name of the module
-    'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
-    'type': [], # the type of the module, can be one or more of BaseModule.MODULE_TYPES
-    'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
-    'description': '', # a description of the module
-    'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
-    'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
-    'version': '1.0', # the version of the module
-    'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
-}
+    MODULE_TYPES = CONF_MODULE_TYPES

+    # NOTE: these here are declard as class variables, but they are overridden by the instance variables in the __init__ method
    config: Mapping[str, Any]
    authentication: Mapping[str, Mapping[str, str]]
    name: str
+    module_factory: ModuleFactory

    # this is set by the orchestrator prior to archiving
    tmp_dir: TemporaryDirectory = None
@@ -63,12 +51,6 @@ class BaseModule(ABC):
    def config_setup(self, config: dict):

        authentication = config.get('authentication', {})
-        # extract out concatenated sites
-        for key, val in copy(authentication).items():
-            if "," in key:
-                for site in key.split(","):
-                    authentication[site] = val
-                del authentication[key]

        # this is important. Each instance is given its own deepcopied config, so modules cannot
        # change values to affect other modules
@@ -89,16 +71,21 @@ class BaseModule(ABC):
        Returns the authentication information for a given site. This is used to authenticate
        with a site before extracting data. The site should be the domain of the site, e.g. 'twitter.com'
        
-        extract_cookies: bool - whether or not to extract cookies from the given browser and return the 
-        cookie jar (disabling can speed up) processing if you don't actually need the cookies jar
+        :param site: the domain of the site to get authentication information for
+        :param extract_cookies: whether or not to extract cookies from the given browser/file and return the cookie jar (disabling can speed up processing if you don't actually need the cookies jar).

-        Currently, the dict can have keys of the following types:
-        - username: str - the username to use for login
-        - password: str - the password to use for login
-        - api_key: str - the API key to use for login
-        - api_secret: str - the API secret to use for login
-        - cookie: str - a cookie string to use for login (specific to this site)
-        - cookies_jar: YoutubeDLCookieJar | http.cookiejar.MozillaCookieJar - a cookie jar compatible with requests (e.g. `requests.get(cookies=cookie_jar)`)
+        :returns: authdict dict of login information for the given site
+
+        **Global options:**\n
+        * cookies_from_browser: str - the name of the browser to extract cookies from (e.g. 'chrome', 'firefox' - uses ytdlp under the hood to extract\n
+        * cookies_file: str - the path to a cookies file to use for login\n
+
+        **Currently, the sites dict can have keys of the following types:**\n
+        * username: str - the username to use for login\n
+        * password: str - the password to use for login\n
+        * api_key: str - the API key to use for login\n
+        * api_secret: str - the API secret to use for login\n
+        * cookie: str - a cookie string to use for login (specific to this site)\n
        """
        # TODO: think about if/how we can deal with sites that have multiple domains (main one is x.com/twitter.com)
        # for now the user must enter them both, like "x.com,twitter.com" in their config. Maybe we just hard-code?
--- a/src/auto_archiver/core/config.py
+++ b/src/auto_archiver/core/config.py
@@ -11,7 +11,7 @@ from ruamel.yaml import YAML, CommentedMap, add_representer
 from loguru import logger

 from copy import deepcopy
-from .module import BaseModule
+from auto_archiver.core.consts import MODULE_TYPES

 from typing import Any, List, Type, Tuple

@@ -21,7 +21,7 @@ EMPTY_CONFIG = _yaml.load("""
 # Auto Archiver Configuration
 # Steps are the modules that will be run in the order they are defined

-steps:""" + "".join([f"\n   {module}s: []" for module in BaseModule.MODULE_TYPES]) + \
+steps:""" + "".join([f"\n   {module}s: []" for module in MODULE_TYPES]) + \
 """

 # Global configuration
@@ -129,6 +129,11 @@ def merge_dicts(dotdict: dict, yaml_dict: CommentedMap) -> CommentedMap:
                yaml_subdict[key] = value
                continue

+            if key == 'steps':
+                for module_type, modules in value.items():
+                    # overwrite the 'steps' from the config file with the ones from the CLI
+                    yaml_subdict[key][module_type] = modules
+
            if is_dict_type(value):
                update_dict(value, yaml_subdict[key])
            elif is_list_type(value):
@@ -137,7 +142,6 @@ def merge_dicts(dotdict: dict, yaml_dict: CommentedMap) -> CommentedMap:
                yaml_subdict[key] = value

    update_dict(from_dot_notation(dotdict), yaml_dict)
-
    return yaml_dict

 def read_yaml(yaml_filename: str) -> CommentedMap:
@@ -159,6 +163,11 @@ def read_yaml(yaml_filename: str) -> CommentedMap:
 def store_yaml(config: CommentedMap, yaml_filename: str) -> None:
    config_to_save = deepcopy(config)

+    auth_dict = config_to_save.get("authentication", {})
+    if auth_dict and auth_dict.get('load_from_file'):
+        # remove all other values from the config, don't want to store it in the config file
+        auth_dict = {"load_from_file": auth_dict["load_from_file"]}
+
    config_to_save.pop('urls', None)
    with open(yaml_filename, "w", encoding="utf-8") as outf:
        _yaml.dump(config_to_save, outf)
--- a/src/auto_archiver/core/consts.py
+++ b/src/auto_archiver/core/consts.py
@@ -0,0 +1,23 @@
+
+MODULE_TYPES = [
+    'feeder',
+    'extractor',
+    'enricher',
+    'database',
+    'storage',
+    'formatter'
+]
+
+MANIFEST_FILE = "__manifest__.py"
+
+DEFAULT_MANIFEST = {
+    'name': '', # the display name of the module
+    'author': 'Bellingcat', # creator of the module, leave this as Bellingcat or set your own name!
+    'type': [], # the type of the module, can be one or more of MODULE_TYPES
+    'requires_setup': True, # whether or not this module requires additional setup such as setting API Keys or installing additional softare
+    'description': '', # a description of the module
+    'dependencies': {}, # external dependencies, e.g. python packages or binaries, in dictionary format
+    'entry_point': '', # the entry point for the module, in the format 'module_name::ClassName'. This can be left blank to use the default entry point of module_name::ModuleName
+    'version': '1.0', # the version of the module
+    'configs': {} # any configuration options this module has, these will be exposed to the user in the config file or via the command line
+}
--- a/src/auto_archiver/core/module.py
+++ b/src/auto_archiver/core/module.py
@@ -6,7 +6,7 @@ by handling user configuration, validating the steps properties, and implementin
 from __future__ import annotations

 from dataclasses import dataclass
-from typing import List
+from typing import List, TYPE_CHECKING
 import shutil
 import ast
 import copy
@@ -16,99 +16,113 @@ import os
 from os.path import join
 from loguru import logger
 import auto_archiver
-from .base_module import BaseModule
+from auto_archiver.core.consts import DEFAULT_MANIFEST, MANIFEST_FILE

-_LAZY_LOADED_MODULES = {}
-
-MANIFEST_FILE = "__manifest__.py"
+if TYPE_CHECKING:
+    from .base_module import BaseModule


-def setup_paths(paths: list[str]) -> None:
-    """
-    Sets up the paths for the modules to be loaded from
-    
-    This is necessary for the modules to be imported correctly
-    
-    """
-    for path in paths:
-        # check path exists, if it doesn't, log a warning
-        if not os.path.exists(path):
-            logger.warning(f"Path '{path}' does not exist. Skipping...")
-            continue
+HAS_SETUP_PATHS = False

-        # see odoo/module/module.py -> initialize_sys_path
-        if path not in auto_archiver.modules.__path__:
-                auto_archiver.modules.__path__.append(path)
+class ModuleFactory:

-    # sort based on the length of the path, so that the longest path is last in the list
-    auto_archiver.modules.__path__ = sorted(auto_archiver.modules.__path__, key=len, reverse=True)
+    def __init__(self):
+        self._lazy_modules = {}

-def get_module(module_name: str, config: dict) -> BaseModule:
-    """
-    Gets and sets up a module using the provided config
-    
-    This will actually load and instantiate the module, and load all its dependencies (i.e. not lazy)
-    
-    """
-    return get_module_lazy(module_name).load(config)
+    def setup_paths(self, paths: list[str]) -> None:
+        """
+        Sets up the paths for the modules to be loaded from
+        
+        This is necessary for the modules to be imported correctly
+        
+        """
+        global HAS_SETUP_PATHS

-def get_module_lazy(module_name: str, suppress_warnings: bool = False) -> LazyBaseModule:
-    """
-    Lazily loads a module, returning a LazyBaseModule
-    
-    This has all the information about the module, but does not load the module itself or its dependencies
-    
-    To load an actual module, call .setup() on a lazy module
-    
-    """
-    if module_name in _LAZY_LOADED_MODULES:
-        return _LAZY_LOADED_MODULES[module_name]
-
-    available = available_modules(limit_to_modules=[module_name], suppress_warnings=suppress_warnings)
-    if not available:
-        raise IndexError(f"Module '{module_name}' not found. Are you sure it's installed/exists?")
-    return available[0]
-
-def available_modules(with_manifest: bool=False, limit_to_modules: List[str]= [], suppress_warnings: bool = False) -> List[LazyBaseModule]:
-    
-    # search through all valid 'modules' paths. Default is 'modules' in the current directory
-
-    # see odoo/modules/module.py -> get_modules
-    def is_really_module(module_path):
-        if os.path.isfile(join(module_path, MANIFEST_FILE)):
-            return True
-
-    all_modules = []
-
-    for module_folder in auto_archiver.modules.__path__:
-        # walk through each module in module_folder and check if it has a valid manifest
-        try:
-            possible_modules = os.listdir(module_folder)
-        except FileNotFoundError:
-            logger.warning(f"Module folder {module_folder} does not exist")
-            continue
-
-        for possible_module in possible_modules:
-            if limit_to_modules and possible_module not in limit_to_modules:
+        for path in paths:
+            # check path exists, if it doesn't, log a warning
+            if not os.path.exists(path):
+                logger.warning(f"Path '{path}' does not exist. Skipping...")
                continue

-            possible_module_path = join(module_folder, possible_module)
-            if not is_really_module(possible_module_path):
+            # see odoo/module/module.py -> initialize_sys_path
+            if path not in auto_archiver.modules.__path__:
+                    if HAS_SETUP_PATHS == True:
+                        logger.warning(f"You are attempting to re-initialise the module paths with: '{path}' for a 2nd time. \
+                                       This could lead to unexpected behaviour. It is recommended to only use a single modules path. \
+                                       If you wish to load modules from different paths then load a 2nd python interpreter (e.g. using multiprocessing).")
+                    auto_archiver.modules.__path__.append(path)
+
+        # sort based on the length of the path, so that the longest path is last in the list
+        auto_archiver.modules.__path__ = sorted(auto_archiver.modules.__path__, key=len, reverse=True)
+
+        HAS_SETUP_PATHS = True
+
+    def get_module(self, module_name: str, config: dict) -> BaseModule:
+        """
+        Gets and sets up a module using the provided config
+        
+        This will actually load and instantiate the module, and load all its dependencies (i.e. not lazy)
+        
+        """
+        return self.get_module_lazy(module_name).load(config)
+
+    def get_module_lazy(self, module_name: str, suppress_warnings: bool = False) -> LazyBaseModule:
+        """
+        Lazily loads a module, returning a LazyBaseModule
+        
+        This has all the information about the module, but does not load the module itself or its dependencies
+        
+        To load an actual module, call .setup() on a lazy module
+        
+        """
+        if module_name in self._lazy_modules:
+            return self._lazy_modules[module_name]
+
+        available = self.available_modules(limit_to_modules=[module_name], suppress_warnings=suppress_warnings)
+        if not available:
+            raise IndexError(f"Module '{module_name}' not found. Are you sure it's installed/exists?")
+        return available[0]
+
+    def available_modules(self, limit_to_modules: List[str]= [], suppress_warnings: bool = False) -> List[LazyBaseModule]:
+        
+        # search through all valid 'modules' paths. Default is 'modules' in the current directory
+
+        # see odoo/modules/module.py -> get_modules
+        def is_really_module(module_path):
+            if os.path.isfile(join(module_path, MANIFEST_FILE)):
+                return True
+
+        all_modules = []
+
+        for module_folder in auto_archiver.modules.__path__:
+            # walk through each module in module_folder and check if it has a valid manifest
+            try:
+                possible_modules = os.listdir(module_folder)
+            except FileNotFoundError:
+                logger.warning(f"Module folder {module_folder} does not exist")
                continue
-            if _LAZY_LOADED_MODULES.get(possible_module):
-                continue
-            lazy_module = LazyBaseModule(possible_module, possible_module_path)

-            _LAZY_LOADED_MODULES[possible_module] = lazy_module
+            for possible_module in possible_modules:
+                if limit_to_modules and possible_module not in limit_to_modules:
+                    continue

-            all_modules.append(lazy_module)
-    
-    if not suppress_warnings:
-        for module in limit_to_modules:
-            if not any(module == m.name for m in all_modules):
-                logger.warning(f"Module '{module}' not found. Are you sure it's installed?")
+                possible_module_path = join(module_folder, possible_module)
+                if not is_really_module(possible_module_path):
+                    continue
+                if self._lazy_modules.get(possible_module):
+                    continue
+                lazy_module = LazyBaseModule(possible_module, possible_module_path, factory=self)

-    return all_modules
+                self._lazy_modules[possible_module] = lazy_module
+
+                all_modules.append(lazy_module)
+        
+        if not suppress_warnings:
+            for module in limit_to_modules:
+                if not any(module == m.name for m in all_modules):
+                    logger.warning(f"Module '{module}' not found. Are you sure it's installed?")
+
+        return all_modules

@dataclass
 class LazyBaseModule:
@@ -123,14 +137,16 @@ class LazyBaseModule:
    type: list
    description: str
    path: str
+    module_factory: ModuleFactory

    _manifest: dict = None
    _instance: BaseModule = None
    _entry_point: str = None

-    def __init__(self, module_name, path):
+    def __init__(self, module_name, path, factory: ModuleFactory):
        self.name = module_name
        self.path = path
+        self.module_factory = factory

    @property
    def entry_point(self):
@@ -161,7 +177,7 @@ class LazyBaseModule:
            return self._manifest
        # print(f"Loading manifest for module {module_path}")
        # load the manifest file
-        manifest = copy.deepcopy(BaseModule._DEFAULT_MANIFEST)
+        manifest = copy.deepcopy(DEFAULT_MANIFEST)

        with open(join(self.path, MANIFEST_FILE)) as f:
            try:
@@ -189,13 +205,14 @@ class LazyBaseModule:
                    # clear out any empty strings that a user may have erroneously added
                    continue
                if not check(dep):
-                    logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
+                    logger.error(f"Module '{self.name}' requires external dependency '{dep}' which is not available/setup. \
+                                 Have you installed the required dependencies for the '{self.name}' module? See the README for more information.")
                    exit(1)

        def check_python_dep(dep):
            # first check if it's a module:
            try:
-                m = get_module_lazy(dep, suppress_warnings=True)
+                m = self.module_factory.get_module_lazy(dep, suppress_warnings=True)
                try:
                # we must now load this module and set it up with the config
                    m.load(config)
@@ -230,19 +247,21 @@ class LazyBaseModule:
        __import__(f'{qualname}.{file_name}', fromlist=[self.entry_point])
        # finally, get the class instance
        instance: BaseModule = getattr(sys.modules[sub_qualname], class_name)()
-        if not getattr(instance, 'name', None):
-            instance.name = self.name
-
-        if not getattr(instance, 'display_name', None):
-            instance.display_name = self.display_name
-
-        self._instance = instance

+        # set the name, display name and module factory
+        instance.name = self.name
+        instance.display_name = self.display_name
+        instance.module_factory = self.module_factory
+        
        # merge the default config with the user config
        default_config = dict((k, v['default']) for k, v in self.configs.items() if v.get('default'))
+
        config[self.name] = default_config  | config.get(self.name, {})
        instance.config_setup(config)
        instance.setup()
+
+        # save the instance for future easy loading
+        self._instance = instance
        return instance

    def __repr__(self):
--- a/src/auto_archiver/core/orchestrator.py
+++ b/src/auto_archiver/core/orchestrator.py
@@ -5,9 +5,10 @@
 """

 from __future__ import annotations
-from typing import Generator, Union, List, Type
+from typing import Generator, Union, List, Type, TYPE_CHECKING
 from urllib.parse import urlparse
 from ipaddress import ip_address
+from copy import copy
 import argparse
 import os
 import sys
@@ -21,15 +22,18 @@ from rich_argparse import RichHelpFormatter
 from .metadata import Metadata, Media
 from auto_archiver.version import __version__
 from .config import _yaml, read_yaml, store_yaml, to_dot_notation, merge_dicts, EMPTY_CONFIG, DefaultValidatingParser
-from .module import available_modules, LazyBaseModule, get_module, setup_paths
+from .module import ModuleFactory, LazyBaseModule
 from . import validators, Feeder, Extractor, Database, Storage, Formatter, Enricher
-from .module import BaseModule
-
+from .consts import MODULE_TYPES
 from loguru import logger

+if TYPE_CHECKING:
+    from .base_module import BaseModule
+    from .module import LazyBaseModule

 DEFAULT_CONFIG_FILE = "orchestration.yaml"

+
 class JsonParseAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        try:
@@ -42,51 +46,85 @@ class AuthenticationJsonParseAction(JsonParseAction):
    def __call__(self, parser, namespace, values, option_string=None):
        super().__call__(parser, namespace, values, option_string)
        auth_dict = getattr(namespace, self.dest)
-        if isinstance(auth_dict, str):
-            # if it's a string
+
+        def load_from_file(path):
            try:
-                with open(auth_dict, 'r') as f:
+                with open(path, 'r') as f:
                    try:
                        auth_dict = json.load(f)
                    except json.JSONDecodeError:
+                        f.seek(0)
                        # maybe it's yaml, try that
                        auth_dict = _yaml.load(f)
+                    if auth_dict.get('authentication'):
+                        auth_dict = auth_dict['authentication']
+                    auth_dict['load_from_file']  = path
+                    return auth_dict
            except:
-                pass
+                return None

+        if isinstance(auth_dict, dict) and auth_dict.get('from_file'):
+            auth_dict = load_from_file(auth_dict['from_file'])
+        elif isinstance(auth_dict, str):
+            # if it's a string
+            auth_dict = load_from_file(auth_dict)
+        
        if not isinstance(auth_dict, dict):
            raise argparse.ArgumentTypeError("Authentication must be a dictionary of site names and their authentication methods")
-        for site, auth in auth_dict.items():
-            if not isinstance(site, str) or not isinstance(auth, dict):
-                raise argparse.ArgumentTypeError("Authentication must be a dictionary of site names and their authentication methods")
+        global_options = ['cookies_from_browser', 'cookies_file', 'load_from_file']
+        for key, auth in auth_dict.items():
+            if key in global_options:
+                continue
+            if not isinstance(key, str) or not isinstance(auth, dict):
+                raise argparse.ArgumentTypeError(f"Authentication must be a dictionary of site names and their authentication methods. Valid global configs are {global_options}")
+        
+        # extract out concatenated sites
+        for key, val in copy(auth_dict).items():
+            if "," in key:
+                for site in key.split(","):
+                    auth_dict[site] = val
+                del auth_dict[key]
+
        setattr(namespace, self.dest, auth_dict)
+
+
 class UniqueAppendAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
-        if not hasattr(namespace, self.dest):
-            setattr(namespace, self.dest, [])
        for value in values:
            if value not in getattr(namespace, self.dest):
                getattr(namespace, self.dest).append(value)

+
 class ArchivingOrchestrator:

+    # instance variables
+    module_factory: ModuleFactory
+    setup_finished: bool
+    logger_id: int
+
+    # instance variables, used for convenience to access modules by step
    feeders: List[Type[Feeder]]
    extractors: List[Type[Extractor]]
    enrichers: List[Type[Enricher]]
    databases: List[Type[Database]]
    storages: List[Type[Storage]]
    formatters: List[Type[Formatter]]
-    
+
+    def __init__(self):
+        self.module_factory = ModuleFactory()
+        self.setup_finished = False
+        self.logger_id = None
+
    def setup_basic_parser(self):
        parser = argparse.ArgumentParser(
-                prog="auto-archiver",
-                add_help=False,
-                description="""
+            prog="auto-archiver",
+            add_help=False,
+            description="""
                Auto Archiver is a CLI tool to archive media/metadata from online URLs;
                it can read URLs from many sources (Google Sheets, Command Line, ...); and write results to many destinations too (CSV, Google Sheets, MongoDB, ...)!
                """,
-                epilog="Check the code at https://github.com/bellingcat/auto-archiver",
-                formatter_class=RichHelpFormatter,
+            epilog="Check the code at https://github.com/bellingcat/auto-archiver",
+            formatter_class=RichHelpFormatter,
        )
        parser.add_argument('--help', '-h', action='store_true', dest='help', help='show a full help message and exit')
        parser.add_argument('--version', action='version', version=__version__)
@@ -100,101 +138,115 @@ class ArchivingOrchestrator:
        return parser

    def setup_complete_parser(self, basic_config: dict, yaml_config: dict, unused_args: list[str]) -> None:
+
+
+        # modules parser to get the overridden 'steps' values
+        modules_parser = argparse.ArgumentParser(
+            add_help=False,
+        )
+        self.add_modules_args(modules_parser)
+        cli_modules, unused_args = modules_parser.parse_known_args(unused_args)
+        for module_type in MODULE_TYPES:
+            yaml_config['steps'][f"{module_type}s"] = getattr(cli_modules, f"{module_type}s", []) or yaml_config['steps'].get(f"{module_type}s", [])
+
        parser = DefaultValidatingParser(
            add_help=False,
        )
        self.add_additional_args(parser)

+        # merge command line module args (--feeders, --enrichers etc.) and add them to the config
+
        # check what mode we're in
        # if we have a config file, use that to decide which modules to load
        # if simple, we'll load just the modules that has requires_setup = False
        # if full, we'll load all modules
        # TODO: BUG** - basic_config won't have steps in it, since these args aren't added to 'basic_parser'
        # but should we add them? Or should we just add them to the 'complete' parser?
+
        if yaml_config != EMPTY_CONFIG:
            # only load the modules enabled in config
            # TODO: if some steps are empty (e.g. 'feeders' is empty), should we default to the 'simple' ones? Or only if they are ALL empty?
            enabled_modules = []
            # first loads the modules from the config file, then from the command line
-            for config in [yaml_config['steps'], basic_config.__dict__]:
-                for module_type in BaseModule.MODULE_TYPES:
-                    enabled_modules.extend(config.get(f"{module_type}s", []))
+            for module_type in MODULE_TYPES:
+                enabled_modules.extend(yaml_config['steps'].get(f"{module_type}s", []))

            # clear out duplicates, but keep the order
            enabled_modules = list(dict.fromkeys(enabled_modules))
-            avail_modules = available_modules(with_manifest=True, limit_to_modules=enabled_modules, suppress_warnings=True)
-            self.add_module_args(avail_modules, parser)
+            avail_modules = self.module_factory.available_modules(limit_to_modules=enabled_modules, suppress_warnings=True)
+            self.add_individual_module_args(avail_modules, parser)
        elif basic_config.mode == 'simple':
-            simple_modules = [module for module in available_modules(with_manifest=True) if not module.requires_setup]
-            self.add_module_args(simple_modules, parser)
+            simple_modules = [module for module in self.module_factory.available_modules() if not module.requires_setup]
+            self.add_individual_module_args(simple_modules, parser)

            # for simple mode, we use the cli_feeder and any modules that don't require setup
-            yaml_config['steps']['feeders'] = ['cli_feeder']
-            
+            if not yaml_config['steps']['feeders']:
+                yaml_config['steps']['feeders'] = ['cli_feeder']
+
            # add them to the config
            for module in simple_modules:
                for module_type in module.type:
                    yaml_config['steps'].setdefault(f"{module_type}s", []).append(module.name)
        else:
            # load all modules, they're not using the 'simple' mode
-            self.add_module_args(available_modules(with_manifest=True), parser)
-
+            self.add_individual_module_args(self.module_factory.available_modules(), parser)
+        
        parser.set_defaults(**to_dot_notation(yaml_config))

        # reload the parser with the new arguments, now that we have them
        parsed, unknown = parser.parse_known_args(unused_args)
-
        # merge the new config with the old one
-        self.config = merge_dicts(vars(parsed), yaml_config)
+        config = merge_dicts(vars(parsed), yaml_config)
+
        # clean out args from the base_parser that we don't want in the config
        for key in vars(basic_config):
-            self.config.pop(key, None)
+            config.pop(key, None)

        # setup the logging
-        self.setup_logging()
+        self.setup_logging(config)

        if unknown:
            logger.warning(f"Ignoring unknown/unused arguments: {unknown}\nPerhaps you don't have this module enabled?")
-        
-        if (self.config != yaml_config and basic_config.store) or not os.path.isfile(basic_config.config_file):
+
+        if (config != yaml_config and basic_config.store) or not os.path.isfile(basic_config.config_file):
            logger.info(f"Storing configuration file to {basic_config.config_file}")
-            store_yaml(self.config, basic_config.config_file)
-        
-        return self.config
+            store_yaml(config, basic_config.config_file)
+
+        return config
    
+    def add_modules_args(self, parser: argparse.ArgumentParser = None):
+        if not parser:
+            parser = self.parser
+
+        # Module loading from the command line
+        for module_type in MODULE_TYPES:
+            parser.add_argument(f'--{module_type}s', dest=f'{module_type}s', nargs='+', help=f'the {module_type}s to use', default=[], action=UniqueAppendAction)
+
    def add_additional_args(self, parser: argparse.ArgumentParser = None):
        if not parser:
            parser = self.parser

-
        # allow passing URLs directly on the command line
        parser.add_argument('urls', nargs='*', default=[], help='URL(s) to archive, either a single URL or a list of urls, should not come from config.yaml')

-        parser.add_argument('--feeders', dest='steps.feeders', nargs='+', default=['cli_feeder'], help='the feeders to use', action=UniqueAppendAction)
-        parser.add_argument('--enrichers', dest='steps.enrichers',  nargs='+', help='the enrichers to use', action=UniqueAppendAction)
-        parser.add_argument('--extractors', dest='steps.extractors', nargs='+', help='the extractors to use', action=UniqueAppendAction)
-        parser.add_argument('--databases', dest='steps.databases', nargs='+', help='the databases to use', action=UniqueAppendAction)
-        parser.add_argument('--storages', dest='steps.storages', nargs='+', help='the storages to use', action=UniqueAppendAction)
-        parser.add_argument('--formatters', dest='steps.formatters', nargs='+', help='the formatter to use', action=UniqueAppendAction)
-
        parser.add_argument('--authentication', dest='authentication', help='A dictionary of sites and their authentication methods \
                                                                            (token, username etc.) that extractors can use to log into \
                                                                            a website. If passing this on the command line, use a JSON string. \
-                                                                            You may also pass a path to a valid JSON/YAML file which will be parsed.',\
+                                                                            You may also pass a path to a valid JSON/YAML file which will be parsed.',
                                                                            default={},
+                                                                            nargs="?",
                                                                            action=AuthenticationJsonParseAction)
+
        # logging arguments
        parser.add_argument('--logging.level', action='store', dest='logging.level', choices=['INFO', 'DEBUG', 'ERROR', 'WARNING'], help='the logging level to use', default='INFO', type=str.upper)
        parser.add_argument('--logging.file', action='store', dest='logging.file', help='the logging file to write to', default=None)
        parser.add_argument('--logging.rotation', action='store', dest='logging.rotation', help='the logging rotation to use', default=None)

-
-    def add_module_args(self, modules: list[LazyBaseModule] = None, parser: argparse.ArgumentParser = None) -> None:
+    def add_individual_module_args(self, modules: list[LazyBaseModule] = None, parser: argparse.ArgumentParser = None) -> None:

        if not modules:
-            modules = available_modules(with_manifest=True)
-
-        module: LazyBaseModule
+            modules = self.module_factory.available_modules()
+        
        for module in modules:

            if not module.configs:
@@ -224,21 +276,29 @@ class ArchivingOrchestrator:
                arg.should_store = should_store

    def show_help(self, basic_config: dict):
-        # for the help message, we want to load *all* possible modules and show the help
-            # add configs as arg parser arguments
-        
+        # for the help message, we want to load manifests from *all* possible modules and show their help/settings
+        # add configs as arg parser arguments
+
+        self.add_modules_args(self.basic_parser)
        self.add_additional_args(self.basic_parser)
-        self.add_module_args(parser=self.basic_parser)
+        self.add_individual_module_args(parser=self.basic_parser)
        self.basic_parser.print_help()
        self.basic_parser.exit()
-    
-    def setup_logging(self):
+
+    def setup_logging(self, config):
        # setup loguru logging
-        logger.remove(0) # remove the default logger
-        logging_config = self.config['logging']
-        logger.add(sys.stderr, level=logging_config['level'])
-        if log_file := logging_config['file']:
-            logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])
+        try:
+            logger.remove(0)  # remove the default logger
+        except ValueError:
+            pass
+
+        logging_config = config['logging']
+
+        # add other logging info
+        if self.logger_id is None: # note - need direct comparison to None since need to consider falsy value 0
+            self.logger_id = logger.add(sys.stderr, level=logging_config['level'])
+            if log_file := logging_config['file']:
+                logger.add(log_file) if not logging_config['rotation'] else logger.add(log_file, rotation=logging_config['rotation'])

    def install_modules(self, modules_by_type):
        """
@@ -246,9 +306,9 @@ class ArchivingOrchestrator:
        orchestrator's attributes (self.feeders, self.extractors etc.). If no modules of a certain type
        are loaded, the program will exit with an error message.
        """
-        
+
        invalid_modules = []
-        for module_type in BaseModule.MODULE_TYPES:
+        for module_type in MODULE_TYPES:

            step_items = []
            modules_to_load = modules_by_type[f"{module_type}s"]
@@ -273,6 +333,7 @@ class ArchivingOrchestrator:
                        logger.error("No URLs provided. Please provide at least one URL via the command line, or set up an alternative feeder. Use --help for more information.")
                        exit()
                    # cli_feeder is a pseudo module, it just takes the command line args
+
                    def feed(self) -> Generator[Metadata]:
                        for url in urls:
                            logger.debug(f"Processing URL: '{url}'")
@@ -284,7 +345,6 @@ class ArchivingOrchestrator:
                        '__iter__': feed

                    })()
-  

                    pseudo_module.__iter__ = feed
                    step_items.append(pseudo_module)
@@ -293,7 +353,7 @@ class ArchivingOrchestrator:
                if module in invalid_modules:
                    continue
                try:
-                    loaded_module: BaseModule = get_module(module, self.config)
+                    loaded_module: BaseModule = self.module_factory.get_module(module, self.config)
                except (KeyboardInterrupt, Exception) as e:
                    logger.error(f"Error during setup of modules: {e}\n{traceback.format_exc()}")
                    if module_type == 'extractor' and loaded_module.name == module:
@@ -308,48 +368,85 @@ class ArchivingOrchestrator:

            check_steps_ok()
            setattr(self, f"{module_type}s", step_items)
-    
+
    def load_config(self, config_file: str) -> dict:
        if not os.path.exists(config_file) and config_file != DEFAULT_CONFIG_FILE:
            logger.error(f"The configuration file {config_file} was  not found. Make sure the file exists and try again, or run without the --config file to use the default settings.")
            exit()

        return read_yaml(config_file)
+    
+    def setup_config(self, args: list) -> dict:
+        """
+        Sets up the configuration file, merging the default config with the user's config
+
+        This function should only ever be run once.
+        """

-    def run(self, args: list) -> None:
-        
        self.setup_basic_parser()

        # parse the known arguments for now (basically, we want the config file)
        basic_config, unused_args = self.basic_parser.parse_known_args(args)

        # setup any custom module paths, so they'll show in the help and for arg parsing
-        setup_paths(basic_config.module_paths)
+        self.module_factory.setup_paths(basic_config.module_paths)

        # if help flag was called, then show the help
        if basic_config.help:
            self.show_help(basic_config)
-
+        # merge command line --feeder etc. args with what's in the yaml config
        yaml_config = self.load_config(basic_config.config_file)
-        self.setup_complete_parser(basic_config, yaml_config, unused_args)
+
+        return self.setup_complete_parser(basic_config, yaml_config, unused_args)
+
+    def setup(self, args: list):
+        """
+        Function to configure all setup of the orchestrator: setup configs and load modules.
+        
+        This method should only ever be called once
+        """
+
+        if self.setup_finished:
+            logger.warning("The `setup_config()` function should only ever be run once. \
+                           If you need to re-run the setup, please re-instantiate a new instance of the orchestrator. \
+                           For code implementatations, you should call .setup_config() once then you may call .feed() \
+                           multiple times to archive multiple URLs.")
+            return
+
+        self.setup_basic_parser()
+        self.config = self.setup_config(args)

        logger.info(f"======== Welcome to the AUTO ARCHIVER ({__version__}) ==========")
        self.install_modules(self.config['steps'])

        # log out the modules that were loaded
-        for module_type in BaseModule.MODULE_TYPES:
+        for module_type in MODULE_TYPES:
            logger.info(f"{module_type.upper()}S: " + ", ".join(m.display_name for m in getattr(self, f"{module_type}s")))
+        
+        self.setup_finished = True

-        for _ in self.feed():
-            pass
+    def _command_line_run(self, args: list) -> Generator[Metadata]:
+        """
+        This is the main entry point for the orchestrator, when run from the command line.

-    def cleanup(self)->None:
+        :param args: list of arguments to pass to the orchestrator - these are the command line args
+        
+        You should not call this method from code implementations.
+          
+        This method sets up the configuration, loads the modules, and runs the feed.
+        If you wish to make code invocations yourself, you should use the 'setup' and 'feed' methods separately.
+        To test configurations, without loading any modules you can also first call 'setup_configs'
+        """
+        self.setup(args)
+        return self.feed()
+
+    def cleanup(self) -> None:
        logger.info("Cleaning up")
        for e in self.extractors:
            e.cleanup()

    def feed(self) -> Generator[Metadata]:
-
+        
        url_count = 0
        for feeder in self.feeders:
            for item in feeder:
@@ -393,7 +490,6 @@ class ArchivingOrchestrator:
                    m.tmp_dir = None
                tmp_dir.cleanup()

-
    def archive(self, result: Metadata) -> Union[Metadata, None]:
        """
            Runs the archiving process for a single URL
@@ -440,13 +536,13 @@ class ArchivingOrchestrator:
            try:
                result.merge(a.download(result))
                if result.is_success(): break
-            except Exception as e: 
+            except Exception as e:
                logger.error(f"ERROR archiver {a.name}: {e}: {traceback.format_exc()}")

        # 4 - call enrichers to work with archived content
        for e in self.enrichers:
            try: e.enrich(result)
-            except Exception as exc: 
+            except Exception as exc:
                logger.error(f"ERROR enricher {e.name}: {exc}: {traceback.format_exc()}")

        # 5 - store all downloaded/generated media
@@ -474,13 +570,13 @@ class ArchivingOrchestrator:
        Blocks localhost, private, reserved, and link-local IPs and all non-http/https schemes.
        """
        assert url.startswith("http://") or url.startswith("https://"), f"Invalid URL scheme"
-        
+
        parsed = urlparse(url)
        assert parsed.scheme in ["http", "https"], f"Invalid URL scheme"
        assert parsed.hostname, f"Invalid URL hostname"
        assert parsed.hostname != "localhost", f"Invalid URL"

-        try: # special rules for IP addresses
+        try:  # special rules for IP addresses
            ip = ip_address(parsed.hostname)
        except ValueError: pass
        else:
@@ -489,9 +585,8 @@ class ArchivingOrchestrator:
            assert not ip.is_link_local, f"Invalid IP used"
            assert not ip.is_private, f"Invalid IP used"

-
    # Helper Properties
-    
+
    @property
    def all_modules(self) -> List[Type[BaseModule]]:
-        return self.feeders + self.extractors + self.enrichers + self.databases + self.storages + self.formatters
+        return self.feeders + self.extractors + self.enrichers + self.databases + self.storages + self.formatters
--- a/src/auto_archiver/core/storage.py
+++ b/src/auto_archiver/core/storage.py
@@ -14,7 +14,7 @@ from auto_archiver.utils.misc import random_str

 from auto_archiver.core import Media, BaseModule, Metadata
 from auto_archiver.modules.hash_enricher.hash_enricher import HashEnricher
-from auto_archiver.core.module import get_module
+
 class Storage(BaseModule):
    
    """
@@ -74,7 +74,7 @@ class Storage(BaseModule):
            filename = random_str(24)
        elif filename_generator == "static":
            # load the hash_enricher module
-            he = get_module(HashEnricher, self.config)
+            he = self.module_factory.get_module(HashEnricher, self.config)
            hd = he.calculate_hash(media.filename)
            filename = hd[:24]
        else:
--- a/src/auto_archiver/modules/atlos_db/init.py
+++ b/src/auto_archiver/modules/atlos_db/init.py
@@ -1 +1 @@
-from atlos_db import AtlosDb
+from .atlos_db import AtlosDb
--- a/src/auto_archiver/modules/atlos_storage/init.py
+++ b/src/auto_archiver/modules/atlos_storage/init.py
@@ -0,0 +1 @@
+from .atlos_storage import AtlosStorage
--- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py
+++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py
@@ -280,7 +280,7 @@ class GenericExtractor(Extractor):
        
        # set up auth
        auth = self.auth_for_site(url, extract_cookies=False)
-        # order of importance: username/pasword -> api_key -> cookie -> cookie_from_browser -> cookies_file
+        # order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file
        if auth:
            if 'username' in auth and 'password' in auth:
                logger.debug(f'Using provided auth username and password for {url}')
@@ -289,7 +289,7 @@ class GenericExtractor(Extractor):
            elif 'cookie' in auth:
                logger.debug(f'Using provided auth cookie for {url}')
                yt_dlp.utils.std_headers['cookie'] = auth['cookie']
-            elif 'cookie_from_browser' in auth:
+            elif 'cookies_from_browser' in auth:
                logger.debug(f'Using extracted cookies from browser {self.cookies_from_browser} for {url}')
                ydl_options['cookiesfrombrowser'] = auth['cookies_from_browser']
            elif 'cookies_file' in auth:
--- a/src/auto_archiver/modules/gsheet_feeder/manifest.py
+++ b/src/auto_archiver/modules/gsheet_feeder/manifest.py
@@ -10,7 +10,7 @@
        "sheet": {"default": None, "help": "name of the sheet to archive"},
        "sheet_id": {
            "default": None,
-            "help": "(alternative to sheet name) the id of the sheet to archive",
+            "help": "the id of the sheet to archive (alternative to 'sheet' config)",
        },
        "header": {"default": 1, "help": "index of the header row (starts at 1)", "type": "int"},
        "service_account": {
--- a/src/auto_archiver/modules/html_formatter/html_formatter.py
+++ b/src/auto_archiver/modules/html_formatter/html_formatter.py
@@ -10,7 +10,6 @@ from auto_archiver.version import __version__
 from auto_archiver.core import Metadata, Media
 from auto_archiver.core import Formatter
 from auto_archiver.utils.misc import random_str
-from auto_archiver.core.module import get_module

 class HtmlFormatter(Formatter):
    environment: Environment = None
@@ -50,7 +49,7 @@ class HtmlFormatter(Formatter):
        final_media = Media(filename=html_path, _mimetype="text/html")

        # get the already instantiated hash_enricher module
-        he = get_module('hash_enricher', self.config)
+        he = self.module_factory.get_module('hash_enricher', self.config)
        if len(hd := he.calculate_hash(final_media.filename)):
            final_media.set("hash", f"{he.algorithm}:{hd}")

--- a/src/auto_archiver/modules/html_formatter/templates/html_template.html
+++ b/src/auto_archiver/modules/html_formatter/templates/html_template.html
@@ -200,7 +200,7 @@
                el.innerHTML = decodeCertificate(certificate);

                let cyberChefUrl =
-                    `https://gchq.github.io/CyberChef/#recipe=Parse_X.509_certificate('PEM')&input=${btoa(certificate)}`;
+                    `https://gchq.github.io/CyberChef/#recipe=Parse_X.509_certificate('PEM')&input=${btoa(certificate).replace(/=+$/, '')}`;
                // create a new anchor with this url and append after the code
                let a = document.createElement("a");
                a.href = cyberChefUrl;
--- a/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
+++ b/src/auto_archiver/modules/instagram_tbot_extractor/instagram_tbot_extractor.py
@@ -77,13 +77,14 @@ class InstagramTbotExtractor(Extractor):
            chat, since_id = self._send_url_to_bot(url)
            message = self._process_messages(chat, since_id, tmp_dir, result)

+            # This may be outdated and replaced by the below message, but keeping until confirmed
            if "You must enter a URL to a post" in message:
                logger.debug(f"invalid link {url=} for {self.name}: {message}")
                return False
-            # # TODO: It currently returns this as a success - is that intentional?
-            # if "Media not found or unavailable" in message:
-            #     logger.debug(f"invalid link {url=} for {self.name}: {message}")
-            #     return False
+
+            if "Media not found or unavailable" in message:
+                logger.debug(f"No media found for link {url=} for {self.name}: {message}")
+                return False

            if message:
                result.set_content(message).set_title(message[:128])
--- a/src/auto_archiver/modules/screenshot_enricher/manifest.py
+++ b/src/auto_archiver/modules/screenshot_enricher/manifest.py
@@ -4,7 +4,6 @@
    "requires_setup": True,
    "dependencies": {
        "python": ["loguru", "selenium"],
-        "bin": ["geckodriver"]
    },
    "configs": {
            "width": {"default": 1280, "help": "width of the screenshots"},
--- a/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
+++ b/src/auto_archiver/modules/screenshot_enricher/screenshot_enricher.py
@@ -11,6 +11,10 @@ from auto_archiver.core import Media, Metadata

 class ScreenshotEnricher(Enricher):

+    def __init__(self, webdriver_factory=None):
+        super().__init__()
+        self.webdriver_factory = webdriver_factory or Webdriver
+
    def enrich(self, to_enrich: Metadata) -> None:
        url = to_enrich.get_url()

@@ -20,7 +24,8 @@ class ScreenshotEnricher(Enricher):

        logger.debug(f"Enriching screenshot for {url=}")
        auth = self.auth_for_site(url)
-        with Webdriver(self.width, self.height, self.timeout, facebook_accept_cookies='facebook.com' in url,
+        with self.webdriver_factory(
+                self.width, self.height, self.timeout, facebook_accept_cookies='facebook.com' in url,
                       http_proxy=self.http_proxy, print_options=self.print_options, auth=auth) as driver:
            try:
                driver.get(url)
@@ -38,3 +43,4 @@ class ScreenshotEnricher(Enricher):
                logger.info("TimeoutException loading page for screenshot")
            except Exception as e:
                logger.error(f"Got error while loading webdriver for screenshot enricher: {e}")
+
--- a/src/auto_archiver/modules/thumbnail_enricher/manifest.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/manifest.py
@@ -7,8 +7,12 @@
        "bin": ["ffmpeg"]
    },
    "configs": {
-            "thumbnails_per_minute": {"default": 60, "help": "how many thumbnails to generate per minute of video, can be limited by max_thumbnails"},
-            "max_thumbnails": {"default": 16, "help": "limit the number of thumbnails to generate per video, 0 means no limit"},
+            "thumbnails_per_minute": {"default": 60,
+                                      "type": "int",
+                                      "help": "how many thumbnails to generate per minute of video, can be limited by max_thumbnails"},
+            "max_thumbnails": {"default": 16,
+                               "type": "int",
+                               "help": "limit the number of thumbnails to generate per video, 0 means no limit"},
        },
    "description": """
    Generates thumbnails for video files to provide visual previews.
--- a/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
+++ b/src/auto_archiver/modules/thumbnail_enricher/thumbnail_enricher.py
@@ -42,7 +42,7 @@ class ThumbnailEnricher(Enricher):
                        logger.error(f"error getting duration of video {m.filename}: {e}")
                        return

-                num_thumbs = int(min(max(1, duration * self.thumbnails_per_minute), self.max_thumbnails))
+                num_thumbs = int(min(max(1, (duration / 60) * self.thumbnails_per_minute), self.max_thumbnails))
                timestamps = [duration / (num_thumbs + 1) * i for i in range(1, num_thumbs + 1)]

                thumbnails_media = []
--- a/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
+++ b/src/auto_archiver/modules/whisper_enricher/whisper_enricher.py
@@ -4,7 +4,6 @@ from loguru import logger

 from auto_archiver.core import Enricher
 from auto_archiver.core import Metadata, Media
-from auto_archiver.core.module import get_module

 class WhisperEnricher(Enricher):
    """
@@ -15,7 +14,7 @@ class WhisperEnricher(Enricher):

    def setup(self) -> None:
        self.stores = self.config['steps']['storages']
-        self.s3 = get_module("s3_storage", self.config)
+        self.s3 = self.module_factory.get_module("s3_storage", self.config)
        if not "s3_storage" in self.stores:
            logger.error("WhisperEnricher: To use the WhisperEnricher you need to use S3Storage so files are accessible publicly to the whisper service being called.")
            return
@@ -29,8 +28,7 @@ class WhisperEnricher(Enricher):
        job_results = {}
        for i, m in enumerate(to_enrich.media):
            if m.is_video() or m.is_audio():
-                # TODO: this used to pass all storage items to store now
-                # Now only passing S3, the rest will get added later in the usual order (?)
+                # Only storing S3, the rest will get added later in the usual order (?)
                m.store(url=url, metadata=to_enrich, storages=[self.s3])
                try:
                    job_id = self.submit_job(m)
--- a/src/auto_archiver/utils/misc.py
+++ b/src/auto_archiver/utils/misc.py
@@ -46,7 +46,7 @@ def dump_payload(p):


 def update_nested_dict(dictionary, update_dict):
-    # takes 2 dicts and overwrites the first with the second only on the changed balues
+    # takes 2 dicts and overwrites the first with the second only on the changed values
    for key, value in update_dict.items():
        if key in dictionary and isinstance(value, dict) and isinstance(dictionary[key], dict):
            update_nested_dict(dictionary[key], value)
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,12 +3,14 @@ pytest conftest file, for shared fixtures and configuration
 """
 import os
 import pickle
+from datetime import datetime, timezone
 from tempfile import TemporaryDirectory
 from typing import Dict, Tuple
 import hashlib
+
 import pytest
 from auto_archiver.core.metadata import Metadata
-from auto_archiver.core.module import get_module, _LAZY_LOADED_MODULES
+from auto_archiver.core.module import ModuleFactory

 # Test names inserted into this list will be run last. This is useful for expensive/costly tests
 # that you only want to run if everything else succeeds (e.g. API calls). The order here is important
@@ -20,19 +22,19 @@ TESTS_TO_RUN_LAST = ['test_twitter_api_archiver']
 def setup_module(request):
    def _setup_module(module_name, config={}):

+        module_factory = ModuleFactory()
+
        if isinstance(module_name, type):
            # get the module name:
            # if the class does not have a .name, use the name of the parent folder
            module_name = module_name.__module__.rsplit(".",2)[-2]

-        m = get_module(module_name, {module_name: config})
-
+        m = module_factory.get_module(module_name, {module_name: config})
        # add the tmp_dir to the module
        tmp_dir = TemporaryDirectory()
        m.tmp_dir = tmp_dir.name
-
+        
        def cleanup():
-            _LAZY_LOADED_MODULES.pop(module_name)
            tmp_dir.cleanup()
        request.addfinalizer(cleanup)

@@ -122,10 +124,36 @@ def pytest_runtest_setup(item):
 def unpickle():
    """
    Returns a helper function that unpickles a file
-    ** gets the file from the test_files directory: tests/data/test_files **
+    ** gets the file from the test_files directory: tests/data/ **
    """
    def _unpickle(path):
-        test_data_dir = os.path.join(os.path.dirname(__file__), "data", "test_files")
-        with open(os.path.join(test_data_dir, path), "rb") as f:
+        with open(os.path.join("tests/data", path), "rb") as f:
            return pickle.load(f)
-    return _unpickle
+    return _unpickle
+
+
+@pytest.fixture
+def mock_binary_dependencies(mocker):
+    mock_shutil_which = mocker.patch("shutil.which")
+    # Mock all binary dependencies as available
+    mock_shutil_which.return_value = "/usr/bin/fake_binary"
+    return mock_shutil_which
+
+
+@pytest.fixture
+def sample_datetime():
+    return datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)
+
+
+@pytest.fixture(autouse=True)
+def mock_sleep(mocker):
+    """Globally mock time.sleep to avoid delays."""
+    return mocker.patch("time.sleep")
+
+
+@pytest.fixture
+def metadata():
+    metadata = Metadata()
+    metadata.set("_processed_at", "2021-01-01T00:00:00")
+    metadata.set_url("https://example.com")
+    return metadata
--- a/tests/data/metadata_enricher_exif.pickle
+++ b/tests/data/metadata_enricher_exif.pickle
--- a/tests/data/metadata_enricher_ytshort_expected.pickle
+++ b/tests/data/metadata_enricher_ytshort_expected.pickle
--- a/tests/data/metadata_enricher_ytshort_input.pickle
+++ b/tests/data/metadata_enricher_ytshort_input.pickle
--- a/tests/data/test_service_account.json
+++ b/tests/data/test_service_account.json
@@ -0,0 +1,14 @@
+{
+    "type": "service_account",
+    "project_id": "some-project-id",
+    "private_key_id": "some-private-key-id",
+    "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDPlcaFJgt7HzoC\n4z0b18PzI2R5c892mLnNwRO8DOKid5INt6z5RAWKDPdnIyHjRBx74qNZl6768pia\nztQNgnud7mKcmvOvGrpUbFx2BdAw8xTyAlRVMalOBhUS9RKvjP5WgSwR5EKwfvzy\nrGioC6ml/segz5EchSaIzgASwB17ir0w6IrymBxUeNelfzCGJpCRhqG5nG+eEjct\nUYU0QIyihRD1Lq0f3Z3D0xfTLLZ630iFBj/Wr0BCJHkl6hdVuGhnyn4S98sMX1Bd\ntaJF/lWi4jdt7SoXD3+FWv66kHPpFfINMpReuB9u0ogfYkORgiRBOMhYBkGGQjUG\nOnBTxEc3AgMBAAECgf9bKiK8DdSz0ALzQbRLhgj2B9485jHI49wjgINOyceZ23uS\nQYXaO+DFLcgLqBkVSGanuHMpU0+qCpeM0v9yXSTIW8RguWMnFd8ID/yLRktxfQa1\n1FAQh+NlF4/gnuUoM8N/FYSy6R5grfaxwU8Qfg66IQXUB52OezSVu5lxNO4G5Rwv\nJ2e/+XYBUv/H26BnQSmjFCzbJkdbtrOeThpaLwLexKcollvoHKGyus0jpWg4C9Ez\n9EJaE+on4nd+cM1Vd+dWaHXoZ9Db9IvxPBqFJE8fynap7RDBeZK678OuCvQntrp4\nrTsE9hW8073Jhl/LbhfbDC0lhFR0JUHygVGE01ECgYEA+g+ddpGGY90yhhM76bTr\nkU6WwislMmfS0WDdLPemNgzLwCtkC2vsQgzg/egxqkVF5dJ9upiFhVgpYxY7ap9U\nSGFemb6T1ASl/1yeNhd0yc4PZFsJ29k+kNgSIlJYm9KDCIMqS1wPoXvFQhbMitOf\n/gLCPugxl67c+qg6nfuODTkCgYEA1IPngESOJnV8oa2WReWrO6+u6xb/OhqdmBzI\n5yq1z3f5gb98XESZR/rCH2vAOmHIJPn3XdZHsznOuxhZwGr1oztiRIurLmBlxQoL\n7tq0jDOUVSD2yeyQwKt5LaBH94P598FiauGxXM4raREWKtcNBGoOX1u1+kEBsoL4\ntf10Z+8CgYEA3QFkB+ECR8y91KW3NAzEjj5JG/8J9wyv1IGpuQ5/hhG1Gni/CSEv\nRAkh6QaIrpZe+ooYuQwIJhwPKBYEGW4MDZSRCYzYFnCtTY5L/j6o55sJG4cipX3R\nwC5XiKIC0mUxjhpvDP+miPBdHNYNnT0AkH1btEF/YzIW+Coq9GnZ2HECgYAOOpax\ne+WYpZ0mphy9qVcBtA2eJ/gGx+ltWeAJuk5aCcpm6Y9GDkHFFAETYX+JaSqhbysk\n2UgLs/8nf8XioEa6GyvFMyTPAh1OSBHseDBGgt2XpZFgi7pVbCW87FJlPCzsbcJN\nLbdWY2d8rWwyihuRBBjaQaW5j8ixTxuf88xreQKBgQCST4Fr8C5CkpakTA+KOost\nLOlziUBm0534mTg7dTcOE1H1+gxtqpXlXcJylpGz1lUXRlHCIutN5iPJcN5cxFES\nsP7wBd7BhficsMKDiWPm9XbP2zXVZu0ldUxA1mONMsS1P4p7i3Dh4uzrRDmSkTUL\njUpppYDumg3oM7wSJ6sTQA==\n-----END PRIVATE KEY-----",
+    "client_email": "some-email",
+    "client_id": "some-client-email",
+    "auth_uri": "https://example.com/o/oauth2/auth",
+    "token_uri": "https://oauth2.example.com/token",
+    "auth_provider_x509_cert_url": "https://www.example.com/oauth2/v1/certs",
+    "client_x509_cert_url": "https://www.example.com/robot/v1/metadata/x509/some-email",
+    "universe_domain": "example.com"
+  }
+  
--- a/tests/databases/test_api_db.py
+++ b/tests/databases/test_api_db.py
@@ -0,0 +1,59 @@
+import pytest
+
+from auto_archiver.core import Metadata
+from auto_archiver.modules.api_db import AAApiDb
+
+
+@pytest.fixture
+def api_db(setup_module):
+    configs: dict = {
+        "api_endpoint": "https://api.example.com",
+        "api_token": "test-token",
+        "public": False,
+        "author_id": "Someone",
+        "group_id": "123",
+        "use_api_cache": True,
+        "store_results": True,
+        "tags": "[]",
+    }
+    return setup_module(AAApiDb, configs)
+
+
+def test_fetch_no_cache(api_db, metadata):
+    # Test fetch
+    api_db.use_api_cache = False
+    assert api_db.fetch(metadata) is None
+
+
+def test_fetch_fail_status(api_db, metadata, mocker):
+    # Test response fail in fetch method
+    mock_get = mocker.patch("auto_archiver.modules.api_db.api_db.requests.get")
+    mock_get.return_value.status_code = 400
+    mock_get.return_value.json.return_value = {}
+    mock_error = mocker.patch("loguru.logger.error")
+    assert api_db.fetch(metadata) is False
+    mock_error.assert_called_once_with("AA API FAIL (400): {}")
+
+
+def test_fetch(api_db, metadata, mocker):
+    # Test successful fetch method
+    mock_get = mocker.patch("auto_archiver.modules.api_db.api_db.requests.get")
+    mock_datetime = mocker.patch("auto_archiver.core.metadata.datetime.datetime")
+    mock_datetime.now.return_value = "2021-01-01T00:00:00"
+    mock_get.return_value.status_code = 200
+    mock_get.return_value.json.return_value = [{"result": {}}, {"result":
+        {'media': [], 'metadata': {'_processed_at': '2021-01-01T00:00:00', 'url': 'https://example.com'},
+         'status': 'no archiver'}}]
+    assert api_db.fetch(metadata) == metadata
+
+
+def test_done_success(api_db, metadata, mocker):
+    mock_post = mocker.patch("auto_archiver.modules.api_db.api_db.requests.post")
+    mock_post.return_value.status_code = 201
+    api_db.done(metadata)
+    mock_post.assert_called_once()
+    mock_post.assert_called_once_with("https://api.example.com/interop/submit-archive",
+                                      json={'author_id': 'Someone', 'url': 'https://example.com',
+                                            'public': False, 'group_id': '123', 'tags': ['[', ']'], 'result': '{"status": "no archiver", "metadata": {"_processed_at": "2021-01-01T00:00:00", "url": "https://example.com"}, "media": []}'},
+                                      headers={'Authorization': 'Bearer test-token'})
+
--- a/tests/databases/test_atlos_db.py
+++ b/tests/databases/test_atlos_db.py
@@ -0,0 +1,110 @@
+import pytest
+from datetime import datetime
+
+from auto_archiver.core import Metadata
+from auto_archiver.modules.atlos_db import AtlosDb
+
+
+class FakeAPIResponse:
+    """Simulate a response object."""
+
+    def __init__(self, data: dict, raise_error: bool = False) -> None:
+        self._data = data
+        self.raise_error = raise_error
+
+    def raise_for_status(self) -> None:
+        if self.raise_error:
+            raise Exception("HTTP error")
+
+
+@pytest.fixture
+def atlos_db(setup_module) -> AtlosDb:
+    """Fixture for AtlosDb."""
+    configs: dict = {
+        "api_token": "abc123",
+        "atlos_url": "https://platform.atlos.org",
+    }
+    return setup_module("atlos_db", configs)
+
+
+def test_failed_no_atlos_id(atlos_db, metadata, mocker):
+    """Test failed() skips posting when no atlos_id present."""
+    post_mock = mocker.patch("requests.post")
+    atlos_db.failed(metadata, "failure reason")
+    post_mock.assert_not_called()
+
+
+def test_failed_with_atlos_id(atlos_db, metadata, mocker):
+    """Test failed() posts failure when atlos_id is present."""
+    metadata.set("atlos_id", 42)
+    fake_resp = FakeAPIResponse({}, raise_error=False)
+    post_mock = mocker.patch("requests.post", return_value=fake_resp)
+    atlos_db.failed(metadata, "failure reason")
+    expected_url = (
+        f"{atlos_db.atlos_url}/api/v2/source_material/metadata/42/auto_archiver"
+    )
+    expected_headers = {"Authorization": f"Bearer {atlos_db.api_token}"}
+    expected_json = {
+        "metadata": {"processed": True, "status": "error", "error": "failure reason"}
+    }
+    post_mock.assert_called_once_with(
+        expected_url, headers=expected_headers, json=expected_json
+    )
+
+
+def test_failed_http_error(atlos_db, metadata, mocker):
+    """Test failed() raises exception on HTTP error."""
+    metadata.set("atlos_id", 42)
+    fake_resp = FakeAPIResponse({}, raise_error=True)
+    mocker.patch("requests.post", return_value=fake_resp)
+    with pytest.raises(Exception, match="HTTP error"):
+        atlos_db.failed(metadata, "failure reason")
+
+
+def test_fetch_returns_false(atlos_db):
+    """Test fetch() always returns False."""
+    item = Metadata()
+    assert atlos_db.fetch(item) is False
+
+
+def test_done_no_atlos_id(atlos_db, mocker):
+    """Test done() skips posting when no atlos_id present."""
+    item = Metadata().set_url("http://example.com")
+    post_mock = mocker.patch("requests.post")
+    atlos_db.done(item)
+    post_mock.assert_not_called()
+
+
+def test_done_with_atlos_id(atlos_db, metadata, mocker):
+    """Test done() posts success when atlos_id is present."""
+    metadata.set("atlos_id", 99)
+    now = datetime.now()
+    metadata.set("timestamp", now)
+    fake_resp = FakeAPIResponse({}, raise_error=False)
+    post_mock = mocker.patch("requests.post", return_value=fake_resp)
+    atlos_db.done(metadata)
+    expected_url = (
+        f"{atlos_db.atlos_url}/api/v2/source_material/metadata/99/auto_archiver"
+    )
+    expected_headers = {"Authorization": f"Bearer {atlos_db.api_token}"}
+    expected_results = metadata.metadata.copy()
+    expected_results["timestamp"] = now.isoformat()
+    expected_json = {
+        "metadata": {
+            "processed": True,
+            "status": "success",
+            "results": expected_results,
+        }
+    }
+    post_mock.assert_called_once_with(
+        expected_url, headers=expected_headers, json=expected_json
+    )
+
+
+def test_done_http_error(atlos_db, metadata, mocker):
+    """Test done() raises exception on HTTP error."""
+    metadata.set("atlos_id", 123)
+    fake_resp = FakeAPIResponse({}, raise_error=True)
+    mocker.patch("requests.post", return_value=fake_resp)
+    with pytest.raises(Exception, match="HTTP error"):
+        atlos_db.done(metadata)
--- a/tests/databases/test_gsheet_db.py
+++ b/tests/databases/test_gsheet_db.py
@@ -1,6 +1,4 @@
 from datetime import datetime, timezone
-from unittest.mock import MagicMock, patch
-
 import pytest

 from auto_archiver.core import Metadata, Media
@@ -9,8 +7,8 @@ from auto_archiver.modules.gsheet_feeder import GWorksheet


@pytest.fixture
-def mock_gworksheet():
-    mock_gworksheet = MagicMock(spec=GWorksheet)
+def mock_gworksheet(mocker):
+    mock_gworksheet = mocker.MagicMock(spec=GWorksheet)
    mock_gworksheet.col_exists.return_value = True
    mock_gworksheet.get_cell.return_value = ""
    mock_gworksheet.get_row.return_value = {}
@@ -18,14 +16,14 @@ def mock_gworksheet():


@pytest.fixture
-def mock_metadata():
-    metadata: Metadata = MagicMock(spec=Metadata)
+def mock_metadata(mocker):
+    metadata: Metadata = mocker.MagicMock(spec=Metadata)
    metadata.get_url.return_value = "http://example.com"
    metadata.status = "done"
    metadata.get_title.return_value = "Example Title"
    metadata.get.return_value = "Example Content"
    metadata.get_timestamp.return_value = "2025-01-01T00:00:00"
-    metadata.get_final_media.return_value = MagicMock(spec=Media)
+    metadata.get_final_media.return_value = mocker.MagicMock(spec=Media)
    metadata.get_all_media.return_value = []
    metadata.get_media_by_id.return_value = None
    metadata.get_first_image.return_value = None
@@ -47,21 +45,21 @@ def metadata():


@pytest.fixture
-def mock_media():
+def mock_media(mocker):
    """Fixture for a mock Media object."""
-    mock_media = MagicMock(spec=Media)
+    mock_media = mocker.MagicMock(spec=Media)
    mock_media.urls = ["http://example.com/media"]
    mock_media.get.return_value = "not-calculated"
    return mock_media

@pytest.fixture
-def gsheets_db(mock_gworksheet, setup_module):
+def gsheets_db(mock_gworksheet, setup_module, mocker):
    db = setup_module("gsheet_db", {
        "allow_worksheets": "set()",
        "block_worksheets": "set()",
        "use_sheet_names_in_stored_paths": "True",
    })
-    db._retrieve_gsheet = MagicMock(return_value=(mock_gworksheet, 1))
+    db._retrieve_gsheet = mocker.MagicMock(return_value=(mock_gworksheet, 1))
    return db


@@ -109,27 +107,26 @@ def test_aborted(gsheets_db, mock_metadata, mock_gworksheet):
    mock_gworksheet.set_cell.assert_called_once_with(1, 'status', '')


-def test_done(gsheets_db, metadata, mock_gworksheet, expected_calls):
-    with patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00'):
-        gsheets_db.done(metadata)
+def test_done(gsheets_db, metadata, mock_gworksheet, expected_calls, mocker):
+    mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
+    gsheets_db.done(metadata)
    mock_gworksheet.batch_set_cell.assert_called_once_with(expected_calls)


-def test_done_cached(gsheets_db, metadata, mock_gworksheet):
-    with patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00'):
-        gsheets_db.done(metadata, cached=True)
+def test_done_cached(gsheets_db, metadata, mock_gworksheet, mocker):
+    mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
+    gsheets_db.done(metadata, cached=True)

    # Verify the status message includes "[cached]"
    call_args = mock_gworksheet.batch_set_cell.call_args[0][0]
    assert any(call[2].startswith("[cached]") for call in call_args)


-def test_done_missing_media(gsheets_db, metadata, mock_gworksheet):
+def test_done_missing_media(gsheets_db, metadata, mock_gworksheet, mocker):
    # clear media from metadata
    metadata.media = []
-    with patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp",
-               return_value='2025-02-01T00:00:00+00:00'):
-        gsheets_db.done(metadata)
+    mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
+    gsheets_db.done(metadata)
    # Verify nothing media-related gets updated
    call_args = mock_gworksheet.batch_set_cell.call_args[0][0]
    media_fields = {'archive', 'screenshot', 'thumbnail', 'wacz', 'replaywebpage'}
--- a/tests/enrichers/test_hash_enricher.py
+++ b/tests/enrichers/test_hash_enricher.py
@@ -2,7 +2,7 @@ import pytest

 from auto_archiver.modules.hash_enricher import HashEnricher
 from auto_archiver.core import Metadata, Media
-from auto_archiver.core.module import get_module_lazy
+from auto_archiver.core.module import ModuleFactory

@pytest.mark.parametrize("algorithm, filename, expected_hash", [
    ("SHA-256", "tests/data/testfile_1.txt", "1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014"),
@@ -22,7 +22,7 @@ def test_default_config_values(setup_module):

 def test_config():
    # test default config
-    c = get_module_lazy('hash_enricher').configs
+    c = ModuleFactory().get_module_lazy('hash_enricher').configs
    assert c["algorithm"]["default"] == "SHA-256"
    assert c["chunksize"]["default"] == 16000000
    assert c["algorithm"]["choices"] == ["SHA-256", "SHA3-512"]
--- a/tests/enrichers/test_meta_enricher.py
+++ b/tests/enrichers/test_meta_enricher.py
@@ -1,6 +1,5 @@
 import datetime
 from datetime import datetime, timedelta, timezone
-from unittest.mock import MagicMock, patch

 import pytest

@@ -9,29 +8,21 @@ from auto_archiver.modules.meta_enricher import MetaEnricher


@pytest.fixture
-def mock_metadata():
+def mock_metadata(mocker):
    """Creates a mock Metadata object."""
-    mock: Metadata = MagicMock(spec=Metadata)
+    mock: Metadata = mocker.MagicMock(spec=Metadata)
    mock.get_url.return_value = "https://example.com"
    mock.is_empty.return_value = False  # Default to not empty
    mock.get_all_media.return_value = []
    return mock

@pytest.fixture
-def mock_media():
+def mock_media(mocker):
    """Creates a mock Media object."""
-    mock: Media = MagicMock(spec=Media)
+    mock: Media = mocker.MagicMock(spec=Media)
    mock.filename = "mock_file.txt"
    return mock

-@pytest.fixture
-def metadata():
-    m = Metadata()
-    m.set_url("https://example.com")
-    m.set_title("Test Title")
-    m.set_content("Test Content")
-    return m
-

@pytest.fixture(autouse=True)
 def meta_enricher(setup_module):
@@ -90,14 +81,14 @@ def test_enrich_file_sizes_no_media(meta_enricher, metadata):
    assert metadata.get("total_size") == "0.0 bytes"


-def test_enrich_archive_duration(meta_enricher, metadata):
+def test_enrich_archive_duration(meta_enricher, metadata, mocker):
    # Set fixed "processed at" time in the past
    processed_at = datetime.now(timezone.utc) - timedelta(minutes=10, seconds=30)
    metadata.set("_processed_at", processed_at)
    # patch datetime
-    with patch("datetime.datetime") as mock_datetime:
-        mock_now = datetime.now(timezone.utc)
-        mock_datetime.now.return_value = mock_now
-        meta_enricher.enrich_archive_duration(metadata)
+    mock_datetime = mocker.patch("datetime.datetime")
+    mock_now = datetime.now(timezone.utc)
+    mock_datetime.now.return_value = mock_now
+    meta_enricher.enrich_archive_duration(metadata)

    assert metadata.get("archive_duration_seconds") == 630
--- a/tests/enrichers/test_metadata_enricher.py
+++ b/tests/enrichers/test_metadata_enricher.py
@@ -0,0 +1,88 @@
+
+import pytest
+
+from auto_archiver.core import Media
+
+
+@pytest.fixture
+def mock_media(mocker):
+    """Creates a mock Media object."""
+    mock: Media = mocker.MagicMock(spec=Media)
+    mock.filename = "mock_file.txt"
+    return mock
+
+
+@pytest.fixture
+def enricher(setup_module, mock_binary_dependencies):
+    return setup_module("metadata_enricher", {})
+
+
+@pytest.mark.parametrize(
+    "output,expected",
+    [
+        ("Key1: Value1\nKey2: Value2", {"Key1": "Value1", "Key2": "Value2"}),
+        ("InvalidLine", {}),
+        ("", {}),
+    ],
+)
+def test_get_metadata(enricher, output, expected, mocker):
+    mock_run = mocker.patch("subprocess.run")
+    mock_run.return_value.stdout = output
+    mock_run.return_value.stderr = ""
+    mock_run.return_value.returncode = 0
+
+    result = enricher.get_metadata("test.jpg")
+    assert result == expected
+    mock_run.assert_called_once_with(
+        ["exiftool", "test.jpg"], capture_output=True, text=True
+    )
+
+
+def test_get_metadata_exiftool_not_found(enricher, mocker):
+    mock_run = mocker.patch("subprocess.run")
+    mock_run.side_effect = FileNotFoundError
+    result = enricher.get_metadata("test.jpg")
+    assert result == {}
+
+
+def test_enrich_sets_metadata(enricher, mocker):
+    media1 = mocker.Mock(filename="img1.jpg")
+    media2 = mocker.Mock(filename="img2.jpg")
+    metadata = mocker.Mock()
+    metadata.media = [media1, media2]
+    enricher.get_metadata = lambda f: {"key": "value"} if f == "img1.jpg" else {}
+
+    enricher.enrich(metadata)
+
+    media1.set.assert_called_once_with("metadata", {"key": "value"})
+    media2.set.assert_not_called()
+    assert metadata.media == [media1, media2]
+
+
+def test_enrich_empty_media(enricher, mocker):
+    metadata = mocker.Mock()
+    metadata.media = []
+    # Should not raise errors
+    enricher.enrich(metadata)
+
+
+def test_get_metadata_error_handling(enricher, mocker):
+    mocker.patch("subprocess.run", side_effect=Exception("Test error"))
+    mock_log = mocker.patch("loguru.logger.error")
+    result = enricher.get_metadata("test.jpg")
+    assert result == {}
+    assert "Error occurred: " in mock_log.call_args[0][0]
+
+
+def test_metadata_pickle(enricher, unpickle, mocker):
+    mock_run = mocker.patch("subprocess.run")
+    # Uses pickled values
+    mock_run.return_value = unpickle("metadata_enricher_exif.pickle")
+    metadata = unpickle("metadata_enricher_ytshort_input.pickle")
+    expected = unpickle("metadata_enricher_ytshort_expected.pickle")
+    enricher.enrich(metadata)
+    expected_media = expected.media
+    actual_media = metadata.media
+    assert len(expected_media) == len(actual_media)
+    assert actual_media[0].properties.get("metadata") == expected_media[0].properties.get("metadata")
+
--- a/tests/enrichers/test_pdq_hash_enricher.py
+++ b/tests/enrichers/test_pdq_hash_enricher.py
@@ -0,0 +1,78 @@
+import pytest
+from PIL import UnidentifiedImageError
+
+from auto_archiver.core import Metadata, Media
+from auto_archiver.modules.pdq_hash_enricher import PdqHashEnricher
+
+
+@pytest.fixture
+def enricher(setup_module):
+    return setup_module("pdq_hash_enricher", {})
+
+
+@pytest.fixture
+def metadata_with_images():
+    m = Metadata()
+    m.set_url("https://example.com")
+    m.add_media(Media(filename="image1.jpg", key="image1"))
+    m.add_media(Media(filename="image2.jpg", key="image2"))
+    return m
+
+
+def test_successful_enrich(metadata_with_images, mocker):
+    with (
+        mocker.patch("pdqhash.compute", return_value=([1, 0, 1, 0] * 64, 100)),
+        mocker.patch("PIL.Image.open"),
+        mocker.patch.object(Media, "is_image", return_value=True) as mock_is_image,
+    ):
+        enricher = PdqHashEnricher()
+        enricher.enrich(metadata_with_images)
+
+        # Ensure the hash is set for image media
+        for media in metadata_with_images.media:
+            assert media.get("pdq_hash") is not None
+
+
+def test_enrich_skip_non_image(metadata_with_images, mocker):
+    mocker.patch.object(Media, "is_image", return_value=False)
+    mock_pdq = mocker.patch("pdqhash.compute")
+
+    enricher = PdqHashEnricher()
+    enricher.enrich(metadata_with_images)
+    mock_pdq.assert_not_called()
+
+
+def test_enrich_handles_corrupted_image(metadata_with_images, mocker):
+    mocker.patch("PIL.Image.open", side_effect=UnidentifiedImageError("Corrupted image"))
+    mock_pdq = mocker.patch("pdqhash.compute")
+    mock_logger = mocker.patch("loguru.logger.error")
+    enricher = PdqHashEnricher()
+    enricher.enrich(metadata_with_images)
+
+    assert mock_logger.call_count == len(metadata_with_images.media)
+    mock_pdq.assert_not_called()
+
+
+@pytest.mark.parametrize(
+    "media_id, should_have_hash",
+    [
+        ("screenshot", False),
+        ("warc-file-123", False),
+        ("regular-image", True),
+    ]
+)
+def test_enrich_excludes_by_filetype(media_id, should_have_hash, mocker):
+    metadata = Metadata()
+    metadata.set_url("https://example.com")
+    metadata.add_media(Media(filename="image.jpg").set("id", media_id))
+
+    mocker.patch("pdqhash.compute", return_value=([1, 0, 1, 0] * 64, 100))
+    mocker.patch("PIL.Image.open")
+    mocker.patch.object(Media, "is_image", return_value=True)
+
+    enricher = PdqHashEnricher()
+    enricher.enrich(metadata)
+
+    media_item = metadata.media[0]
+    assert (media_item.get("pdq_hash") is not None) == should_have_hash
+
--- a/tests/enrichers/test_screenshot_enricher.py
+++ b/tests/enrichers/test_screenshot_enricher.py
@@ -0,0 +1,195 @@
+import base64
+
+import pytest
+from selenium.common.exceptions import TimeoutException
+
+from auto_archiver.core import Metadata, Media
+from auto_archiver.modules.screenshot_enricher import ScreenshotEnricher
+
+
+@pytest.fixture
+def mock_selenium_env(mocker):
+    """Patches Selenium calls and driver checks in one place."""
+
+    # Patch external dependencies
+    mock_which = mocker.patch("shutil.which")
+    mock_driver_class = mocker.patch("auto_archiver.utils.webdriver.CookieSettingDriver")
+    mock_binary_paths = mocker.patch("selenium.webdriver.common.selenium_manager.SeleniumManager.binary_paths")
+    mock_is_file = mocker.patch("pathlib.Path.is_file", return_value=True)
+    mock_popen = mocker.patch("subprocess.Popen")
+    mock_is_connectable = mocker.patch("selenium.webdriver.common.service.Service.is_connectable", return_value=True)
+    mock_firefox_options = mocker.patch("selenium.webdriver.FirefoxOptions")
+    # Define side effect for `shutil.which`
+    def mock_which_side_effect(dep):
+        return "/mock/geckodriver" if dep == "geckodriver" else None
+    mock_which.side_effect = mock_which_side_effect
+
+    # Mock binary paths
+    mock_binary_paths.return_value = {
+        "driver_path": "/mock/driver",
+        "browser_path": "/mock/browser",
+    }
+    # Mock `subprocess.Popen`
+    mock_proc = mocker.MagicMock()
+    mock_proc.poll.return_value = None
+    mock_popen.return_value = mock_proc
+    # Mock `CookieSettingDriver`
+    mock_driver = mocker.MagicMock()
+    mock_driver_class.return_value = mock_driver
+    # Mock `FirefoxOptions`
+    mock_options_instance = mocker.MagicMock()
+    mock_firefox_options.return_value = mock_options_instance
+    yield mock_driver, mock_driver_class, mock_options_instance
+
+
+@pytest.fixture
+def common_patches(tmp_path, mocker):
+    """Patches common utilities used across multiple tests."""
+    mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=False)
+    mocker.patch("os.path.join", return_value=str(tmp_path / "test.png"))
+    mocker.patch("time.sleep")
+    yield
+
+
+@pytest.fixture
+def screenshot_enricher(setup_module, mock_binary_dependencies) -> ScreenshotEnricher:
+    configs: dict = {
+        "width": 1280,
+        "height": 720,
+        "timeout": 60,
+        "sleep_before_screenshot": 4,
+        "http_proxy": "",
+        "save_to_pdf": "False",
+        "print_options": {},
+    }
+    return setup_module("screenshot_enricher", configs)
+
+
+@pytest.fixture
+def metadata_with_video():
+    m = Metadata()
+    m.set_url("https://example.com")
+    m.add_media(Media(filename="video.mp4").set("id", "video1"))
+    return m
+
+
+def test_enrich_adds_screenshot(
+    screenshot_enricher,
+    metadata_with_video,
+    mock_selenium_env,
+    common_patches,
+    tmp_path,
+):
+    mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
+    screenshot_enricher.enrich(metadata_with_video)
+    mock_driver_class.assert_called_once_with(
+        cookies=None,
+        cookiejar=None,
+        facebook_accept_cookies=False,
+        options=mock_options_instance,
+    )
+    # Verify the actual calls on the returned mock_driver
+    mock_driver.get.assert_called_once_with("https://example.com")
+    mock_driver.save_screenshot.assert_called_once_with(str(tmp_path / "test.png"))
+    # Check that the media was added (2 = original video + screenshot)
+    assert len(metadata_with_video.media) == 2
+    assert metadata_with_video.media[1].properties.get("id") == "screenshot"
+
+
+@pytest.mark.parametrize(
+    "url,is_auth",
+    [
+        ("https://example.com", False),
+        ("https://private.com", True),
+    ],
+)
+def test_enrich_auth_wall(
+    screenshot_enricher,
+    metadata_with_video,
+    mock_selenium_env,
+    common_patches,
+    url,
+    is_auth,
+    mocker
+):
+    # Testing with and without is_auth_wall
+    mock_driver, mock_driver_class, _ = mock_selenium_env
+    mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=is_auth)
+    metadata_with_video.set_url(url)
+    screenshot_enricher.enrich(metadata_with_video)
+
+    if is_auth:
+        mock_driver.get.assert_not_called()
+        assert len(metadata_with_video.media) == 1
+        assert metadata_with_video.media[0].properties.get("id") == "video1"
+    else:
+        mock_driver.get.assert_called_once_with(url)
+        assert len(metadata_with_video.media) == 2
+        assert metadata_with_video.media[1].properties.get("id") == "screenshot"
+
+
+def test_handle_timeout_exception(
+    screenshot_enricher, metadata_with_video, mock_selenium_env, mocker
+):
+    mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
+
+    mock_driver.get.side_effect = TimeoutException
+    mock_log = mocker.patch("loguru.logger.info")
+    screenshot_enricher.enrich(metadata_with_video)
+    mock_log.assert_called_once_with("TimeoutException loading page for screenshot")
+    assert len(metadata_with_video.media) == 1
+
+
+def test_handle_general_exception(
+    screenshot_enricher, metadata_with_video, mock_selenium_env, mocker
+):
+    """Test proper handling of unexpected general exceptions"""
+    mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
+    # Simulate a generic exception when save_screenshot is called
+    mock_driver.get.return_value = None
+    mock_driver.save_screenshot.side_effect = Exception("Unexpected Error")
+
+    mock_log = mocker.patch("loguru.logger.error")
+    screenshot_enricher.enrich(metadata_with_video)
+    # Verify that the exception was logged with the log
+    mock_log.assert_called_once_with(
+        "Got error while loading webdriver for screenshot enricher: Unexpected Error"
+    )
+    # And no new media was added due to the error
+    assert len(metadata_with_video.media) == 1
+
+
+def test_pdf_creation(mocker, screenshot_enricher, metadata_with_video, mock_selenium_env):
+    """Test PDF creation when save_to_pdf is enabled"""
+    mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
+    # Override the save_to_pdf option
+    screenshot_enricher.save_to_pdf = True
+    # Mock the print_page method to return base64-encoded content
+    mock_driver.print_page.return_value = base64.b64encode(b"fake_pdf_content").decode("utf-8")
+    # Patch functions with mocker
+    mock_os_path_join = mocker.patch("os.path.join", side_effect=lambda *args: f"{args[-1]}")
+    mock_random_str = mocker.patch(
+        "auto_archiver.modules.screenshot_enricher.screenshot_enricher.random_str",
+        return_value="fixed123",
+    )
+    mock_open = mocker.patch("builtins.open", new_callable=mocker.mock_open)
+    mock_log_error = mocker.patch("loguru.logger.error")
+
+    screenshot_enricher.enrich(metadata_with_video)
+    # Verify screenshot and PDF creation
+    mock_driver.save_screenshot.assert_called_once()
+    mock_driver.print_page.assert_called_once_with(mock_driver.print_options)
+    # Check that PDF file was opened and written
+    mock_open.assert_any_call("pdf_fixed123.pdf", "wb")
+
+    # Ensure both screenshot and PDF were added as media
+    assert len(metadata_with_video.media) == 3
+    assert metadata_with_video.media[1].properties.get("id") == "screenshot"
+    assert metadata_with_video.media[2].properties.get("id") == "pdf"
+
+
+@pytest.fixture(autouse=True)
+def cleanup_files(tmp_path):
+    yield
+    for file in tmp_path.iterdir():
+        file.unlink()
--- a/tests/enrichers/test_ssl_enricher.py
+++ b/tests/enrichers/test_ssl_enricher.py
@@ -0,0 +1,54 @@
+import ssl
+import pytest
+
+from auto_archiver.core import Metadata, Media
+
+
+@pytest.fixture
+def enricher(setup_module):
+    configs: dict = {
+        "skip_when_nothing_archived": "True",
+    }
+    return setup_module("ssl_enricher", configs)
+
+
+@pytest.fixture
+def metadata():
+    m = Metadata()
+    m.set_url("https://example.com")
+    m.add_media(Media("tests/data/testfile_1.txt"))
+    m.add_media(Media("tests/data/testfile_2.txt"))
+    return m
+
+
+def test_http_raises(metadata, enricher):
+    metadata.set_url("http://example.com")
+    with pytest.raises(AssertionError) as exc_info:
+        enricher.enrich(metadata)
+    assert "Invalid URL scheme" in str(exc_info.value)
+
+
+def test_empty_metadata(metadata, enricher):
+    metadata.media = []
+    assert enricher.enrich(metadata) is None
+
+
+def test_ssl_enrich(metadata, enricher, mocker):
+    mocker.patch("ssl.get_server_certificate", return_value="TEST_CERT")
+    mock_file = mocker.patch("builtins.open", mocker.mock_open())
+    media_len_before = len(metadata.media)
+    enricher.enrich(metadata)
+
+    ssl.get_server_certificate.assert_called_once_with(("example.com", 443))
+    mock_file.assert_called_once_with(f"{enricher.tmp_dir}/example-com.pem", "w")
+    mock_file().write.assert_called_once_with("TEST_CERT")
+    assert len(metadata.media) == media_len_before + 1
+    # Ensure the certificate is added to metadata
+    assert any(media.filename.endswith("example-com.pem") for media in metadata.media)
+
+
+def test_ssl_error_handling(enricher, metadata, mocker):
+    mocker.patch("ssl.get_server_certificate", side_effect=ssl.SSLError("SSL error"))
+    with pytest.raises(ssl.SSLError, match="SSL error"):
+        enricher.enrich(metadata)
+
--- a/tests/enrichers/test_thumbnail_enricher.py
+++ b/tests/enrichers/test_thumbnail_enricher.py
@@ -0,0 +1,148 @@
+import pytest
+from auto_archiver.core import Metadata, Media
+from auto_archiver.modules.thumbnail_enricher import ThumbnailEnricher
+
+
+@pytest.fixture
+def thumbnail_enricher(setup_module, mock_binary_dependencies) -> ThumbnailEnricher:
+    config: dict = {
+        "thumbnails_per_minute": 60,
+        "max_thumbnails": 4,
+    }
+    return setup_module("thumbnail_enricher", config)
+
+
+@pytest.fixture
+def metadata_with_video():
+    m = Metadata()
+    m.set_url("https://example.com")
+    m.add_media(Media(filename="video.mp4").set("id", "video1"))
+    return m
+
+
+@pytest.fixture
+def mock_ffmpeg_environment(mocker):
+    # Mocking all the ffmpeg calls in one place
+    mock_ffmpeg_input = mocker.patch("ffmpeg.input")
+    mock_makedirs = mocker.patch("os.makedirs")
+    mocker.patch.object(Media, "is_video", return_value=True),
+    mock_probe = mocker.patch(
+        "ffmpeg.probe",
+        return_value={
+            "streams": [
+                {"codec_type": "video", "duration": "120"}
+            ]  # Default 2-minute duration, but can override in tests
+        },
+    )
+    mock_output = mocker.MagicMock()
+    mock_ffmpeg_input.return_value.filter.return_value.output.return_value = (
+        mock_output
+    )
+
+    return {
+        "mock_ffmpeg_input": mock_ffmpeg_input,
+        "mock_makedirs": mock_makedirs,
+        "mock_output": mock_output,
+        "mock_probe": mock_probe,
+    }
+
+
+@pytest.mark.parametrize("thumbnails_per_minute, max_thumbnails, expected_count", [
+    (10, 5, 5),  # Capped at max_thumbnails
+    (1, 10, 2),  # Less than max_thumbnails
+    (60, 7, 7),  # Matches exactly
+])
+def test_enrich_thumbnail_limits(
+    thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment,
+    thumbnails_per_minute, max_thumbnails, expected_count
+):
+    thumbnail_enricher.thumbnails_per_minute = thumbnails_per_minute
+    thumbnail_enricher.max_thumbnails = max_thumbnails
+
+    thumbnail_enricher.enrich(metadata_with_video)
+
+    assert mock_ffmpeg_environment["mock_output"].run.call_count == expected_count
+    thumbnails = metadata_with_video.media[0].get("thumbnails")
+    assert len(thumbnails) == expected_count
+
+def test_enrich_handles_probe_failure(thumbnail_enricher, metadata_with_video, mocker):
+
+    mocker.patch("ffmpeg.probe", side_effect=Exception("Probe error"))
+    mocker.patch("os.makedirs")
+    mock_logger = mocker.patch("loguru.logger.error")
+    mocker.patch.object(Media, "is_video", return_value=True)
+
+    thumbnail_enricher.enrich(metadata_with_video)
+    # Ensure error was logged
+    mock_logger.assert_called_with(
+        f"error getting duration of video video.mp4: Probe error"
+    )
+    # Ensure no thumbnails were created
+    thumbnails = metadata_with_video.media[0].get("thumbnails")
+    assert thumbnails is None
+
+
+def test_enrich_skips_non_video_files(thumbnail_enricher, metadata_with_video, mocker):
+        mocker.patch.object(Media, "is_video", return_value=False)
+        mock_ffmpeg = mocker.patch("ffmpeg.input")
+        thumbnail_enricher.enrich(metadata_with_video)
+        mock_ffmpeg.assert_not_called()
+
+
+@pytest.mark.parametrize("thumbnails_per_minute,max_thumbnails,expected_count", [
+    (60, 5, 5), # caught by max
+    (60, 20, 10), # caught by t/min
+    (0, 20, 1), # test min caught (1)
+    (11, 20, 1), # test min caught (1)
+    (12, 20, 2), # test caught by t/min
+])
+def test_enrich_handles_short_video(
+    thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment, thumbnails_per_minute, max_thumbnails, expected_count, mocker
+):
+    # override mock duration
+    fake_duration = 10
+    mocker.patch(
+        "ffmpeg.probe",
+        return_value={ "streams": [{"codec_type": "video", "duration": str(fake_duration)}]},
+    )
+    thumbnail_enricher.thumbnails_per_minute = thumbnails_per_minute
+    thumbnail_enricher.max_thumbnails = max_thumbnails
+
+    thumbnail_enricher.enrich(metadata_with_video)
+    assert mock_ffmpeg_environment["mock_output"].run.call_count == expected_count
+    thumbnails = metadata_with_video.media[0].get("thumbnails")
+    assert len(thumbnails) == expected_count
+
+
+def test_uses_existing_duration(
+    thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment
+):
+    metadata_with_video.media[0].set("duration", 60)
+    thumbnail_enricher.enrich(metadata_with_video)
+    mock_ffmpeg_environment["mock_probe"].assert_not_called()
+    assert mock_ffmpeg_environment["mock_output"].run.call_count == 4
+
+
+def test_enrich_metadata_structure(thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment, mocker):
+    fake_duration = 120
+    mocker.patch("ffmpeg.probe", return_value={'streams': [{'codec_type': 'video', 'duration': str(fake_duration)}]})
+    thumbnail_enricher.thumbnails_per_minute = 2
+    thumbnail_enricher.max_thumbnails = 4
+
+    thumbnail_enricher.enrich(metadata_with_video)
+
+    media_item = metadata_with_video.media[0]
+    thumbnails = media_item.get("thumbnails")
+
+    # Assert normal metadata
+    assert media_item.get("id") == "video1"
+    assert media_item.get("duration") == fake_duration
+    # Evenly spaced timestamps
+    expected_timestamps = ["24.000s", "48.000s", "72.000s", "96.000s"]
+    assert thumbnails is not None
+    assert len(thumbnails) == 4
+
+    for index, thumbnail in enumerate(thumbnails):
+        assert thumbnail.filename is not None
+        assert thumbnail.properties.get("id") == f"thumbnail_{index}"
+        assert thumbnail.properties.get("timestamp") == expected_timestamps[index]
--- a/tests/enrichers/test_wacz_enricher.py
+++ b/tests/enrichers/test_wacz_enricher.py
@@ -0,0 +1,112 @@
+import os
+from zipfile import ZipFile
+
+import pytest
+
+from auto_archiver.core import Metadata, Media
+
+
+@pytest.fixture
+def wacz_enricher(setup_module, mock_binary_dependencies):
+    configs: dict = {
+        "profile": None,
+        "docker_commands": None,
+        "timeout": 120,
+        "extract_media": False,
+        "extract_screenshot": True,
+        "socks_proxy_host": None,
+        "socks_proxy_port": None,
+        "proxy_server": None,
+    }
+    wacz = setup_module("wacz_enricher", configs)
+    return wacz
+
+
+def test_setup_without_docker(wacz_enricher, mocker):
+    mocker.patch.dict(os.environ, {"RUNNING_IN_DOCKER": "1"}, clear=True)
+    wacz_enricher.setup()
+    assert not wacz_enricher.docker_in_docker
+
+
+def test_setup_with_docker(wacz_enricher, mocker):
+    mocker.patch.dict(os.environ, {"WACZ_ENABLE_DOCKER": "1"}, clear=True)
+    wacz_enricher.setup()
+    assert wacz_enricher.use_docker
+
+
+def test_already_ran(wacz_enricher, metadata, mocker):
+    metadata.add_media(Media("test.wacz"), id="browsertrix")
+    mock_log = mocker.patch("loguru.logger.info")
+    assert wacz_enricher.enrich(metadata) is True
+    assert "WACZ enricher had already been executed" in mock_log.call_args[0][0]
+
+
+def test_basic_call_execution(wacz_enricher, mocker):
+    mock_run = mocker.patch("subprocess.run")
+    mock_run.return_value = mocker.Mock(returncode=0)
+    metadata = Metadata().set_url("https://example.com")
+    wacz_enricher.enrich(metadata)
+    assert mock_run.called
+    # Checks that the url is passed to the cmd
+    assert "--url https://example.com" in " ".join(mock_run.call_args[0][0])
+
+
+def test_download_success(wacz_enricher, mocker) -> None:
+    """Test download returns metadata on successful enrichment."""
+    basic_metadata = Metadata().set_url("https://example.com")
+    mocker.patch.object(wacz_enricher, "enrich", return_value=True)
+    result = wacz_enricher.download(basic_metadata)
+    assert result is not None
+    assert isinstance(result, Metadata)
+    assert result.status == "wacz: success"
+
+
+def test_enrich_already_executed(wacz_enricher, mocker) -> None:
+    """Test enrich  if already executed."""
+    mock_log = mocker.patch("loguru.logger.info")
+    metadata = Metadata().set_url("https://example.com")
+    media = Media(filename="some_file.wacz")
+    metadata.add_media(media, id="browsertrix")
+    result = wacz_enricher.enrich(metadata)
+    assert result is True
+    assert "WACZ enricher had already been executed:" in mock_log.call_args[0][0]
+
+
+def test_enrich_subprocess_exception(wacz_enricher, mocker, tmp_path) -> None:
+    """Test enrich returns False when subprocess fails."""
+    wacz_enricher.tmp_dir = str(tmp_path)
+    wacz_enricher.extract_media = False
+    wacz_enricher.extract_screenshot = True
+    mocker.patch("auto_archiver.utils.misc.random_str", return_value="TESTCOL")
+    mocker.patch("subprocess.run", side_effect=Exception("fail"))
+    basic_metadata = Metadata().set_url("https://example.com")
+    result = wacz_enricher.enrich(basic_metadata)
+    assert result is False
+
+
+def test_extract_media(wacz_enricher, metadata, tmp_path, mocker) -> None:
+    """Test extract_media_from_wacz extracts screenshot media."""
+    wacz_enricher.tmp_dir = str(tmp_path)
+
+    # Create a *real* zip file so ZipFile won't fail.
+    wacz_file = tmp_path / "dummy.wacz"
+    with ZipFile(wacz_file, "w") as zf:
+        zf.writestr("dummy.txt", "test content")
+
+    mocker.patch("os.listdir", return_value=[])
+    warc_data = (
+        b"WARC/1.0\r\n"
+        b"WARC-Type: resource\r\n"
+        b"Content-Type: image/png\r\n"
+        b"WARC-Target-URI: http://example.com/image.png\r\n"
+        b"Content-Length: 12\r\n"
+        b"\r\n"
+        b"image-bytes"
+        b"\r\n\r\nWARC/1.0\r\n\r\n"
+    )
+    mock_file = mocker.mock_open(read_data=warc_data)
+    mocker.patch("builtins.open", mock_file)
+    metadata.add_media(Media("something.wacz"), "browsertrix")
+    wacz_enricher.extract_media_from_wacz(metadata, str(wacz_file))
+    assert len(metadata.media) == 2
+    assert metadata.media[1].properties.get("id") == "browsertrix-screenshot"
--- a/tests/enrichers/test_wayback_enricher.py
+++ b/tests/enrichers/test_wayback_enricher.py
@@ -0,0 +1,168 @@
+import json
+import requests
+import pytest
+from auto_archiver.modules.wayback_extractor_enricher import WaybackExtractorEnricher
+from auto_archiver.core import Metadata
+
+
+@pytest.fixture
+def mock_is_auth_wall(mocker):
+    """Fixture to mock is_auth_wall behavior."""
+    def _mock_is_auth_wall(return_value: bool):
+        return mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=return_value)
+    return _mock_is_auth_wall
+
+@pytest.fixture
+def mock_post_success(mocker):
+    """Fixture to mock POST requests with a successful response."""
+    def _mock_post(json_data: dict = None, status_code: int = 200):
+        json_data = json_data or {"job_id": "job123"}
+        resp = mocker.Mock(status_code=status_code)
+        resp.json.return_value = json_data
+        return mocker.patch("requests.post", return_value=resp)
+    return _mock_post
+
+@pytest.fixture
+def mock_get_success(mocker):
+    """Fixture to mock GET requests returning a completed archive status."""
+    def _mock_get(json_data: dict = None, status_code: int = 200):
+        json_data = json_data or {
+            "status": "success",
+            "timestamp": "20250101010101",
+            "original_url": "https://example.com"
+        }
+        resp = mocker.Mock(status_code=status_code)
+        resp.json.return_value = json_data
+        return mocker.patch("requests.get", return_value=resp)
+    return _mock_get
+
+@pytest.fixture
+def wayback_extractor_enricher(setup_module) -> WaybackExtractorEnricher:
+    configs: dict = {
+        "timeout": 5,
+        "if_not_archived_within": None,
+        "key": "somekey",
+        "secret": "secret",
+        "proxy_http": None,
+        "proxy_https": None,
+    }
+    return setup_module("wayback_extractor_enricher", configs)
+
+
+def test_download_success(
+    wayback_extractor_enricher,
+    mock_is_auth_wall,
+    mock_post_success,
+    mock_get_success
+):
+    mock_is_auth_wall(False)
+    mock_post_success()
+    mock_get_success()
+    # Basic metadata to allow merge
+    metadata = Metadata().set_url("https://example.com")
+    result = wayback_extractor_enricher.download(metadata)
+    assert result.get("wayback") == "https://web.archive.org/web/20250101010101/https://example.com"
+
+def test_enrich_auth_wall(wayback_extractor_enricher, metadata, mock_is_auth_wall):
+    mock_is_auth_wall(True)
+    result = wayback_extractor_enricher.enrich(metadata)
+    assert result is None
+
+def test_enrich_already_enriched(wayback_extractor_enricher, metadata):
+    metadata.set("wayback", "existing")
+    result = wayback_extractor_enricher.enrich(metadata)
+    assert result is True
+
+def test_enrich_post_failure(
+    wayback_extractor_enricher,
+        metadata,
+    mock_is_auth_wall,
+    mock_post_success
+):
+    mock_is_auth_wall(False)
+    mock_post_success(json_data={"error": "server error"}, status_code=500)
+    result = wayback_extractor_enricher.enrich(metadata)
+    assert result is False
+    assert "Internet archive failed with status of 500" in metadata.get("wayback")
+
+def test_enrich_post_json_decode_error(
+    wayback_extractor_enricher,
+        metadata,
+    mock_is_auth_wall,
+    mocker
+):
+    mock_is_auth_wall(False)
+    resp = mocker.Mock(status_code=200)
+    resp.json.side_effect = json.decoder.JSONDecodeError("msg", "doc", 0)
+    resp.text = "invalid json"
+    mocker.patch("requests.post", return_value=resp)
+    assert wayback_extractor_enricher.enrich(metadata) is False
+
+def test_enrich_no_job_id(
+    wayback_extractor_enricher,
+        metadata,
+    mock_is_auth_wall,
+    mock_post_success
+):
+    mock_is_auth_wall(False)
+    mock_post_success(json_data={})
+    assert wayback_extractor_enricher.enrich(metadata) is False
+
+def test_enrich_get_success(
+    wayback_extractor_enricher,
+        metadata,
+    mock_is_auth_wall,
+    mock_post_success,
+    mock_get_success
+):
+    mock_is_auth_wall(False)
+    mock_post_success()
+    mock_get_success()
+    assert wayback_extractor_enricher.enrich(metadata) is True
+    assert metadata.get("wayback") == "https://web.archive.org/web/20250101010101/https://example.com"
+    assert metadata.get("check wayback") == "https://web.archive.org/web/*/https://example.com"
+
+def test_enrich_get_failure(
+    wayback_extractor_enricher,
+        metadata,
+    mock_is_auth_wall,
+    mock_post_success,
+    mock_get_success
+):
+    mock_is_auth_wall(False)
+    mock_post_success()
+    mock_get_success(json_data={"status": "failed"}, status_code=400)
+    assert wayback_extractor_enricher.enrich(metadata) is False
+
+def test_enrich_get_request_exception(
+    wayback_extractor_enricher,
+        metadata,
+    mock_is_auth_wall,
+    mock_post_success,
+    mocker
+):
+    mock_is_auth_wall(False)
+    mock_post_success()
+    mocker.patch("requests.get", side_effect=requests.exceptions.RequestException("error"))
+    mocker.patch("time.sleep", return_value=None)
+    # check it still enriches the job_id information
+    assert wayback_extractor_enricher.enrich(metadata) is True
+    assert metadata.get("wayback").get("job_id") == "job123"
+
+def test_enrich_get_json_decode_error(
+    wayback_extractor_enricher,
+        metadata,
+    mock_is_auth_wall,
+    mock_post_success,
+    mocker
+):
+    mock_is_auth_wall(False)
+    mock_post_success()
+    resp = mocker.Mock()
+    resp.json.side_effect = json.decoder.JSONDecodeError("msg", "doc", 0)
+    resp.text = "invalid json"
+    mocker.patch("requests.get", return_value=resp)
+    mocker.patch("time.sleep", return_value=None)
+    # check it still enriches the job_id information
+    assert wayback_extractor_enricher.enrich(metadata) is True
+    assert metadata.get("wayback").get("job_id") == "job123"
--- a/tests/enrichers/test_whisper_enricher.py
+++ b/tests/enrichers/test_whisper_enricher.py
@@ -0,0 +1,133 @@
+import pytest
+
+from auto_archiver.core import Metadata, Media
+from auto_archiver.modules.s3_storage import S3Storage
+from auto_archiver.modules.whisper_enricher import WhisperEnricher
+
+TEST_S3_URL = "http://cdn.example.com/test.mp4"
+
+
+@pytest.fixture
+def enricher(mocker):
+    """Fixture with mocked S3 and API dependencies"""
+    config = {
+        "api_endpoint": "http://testapi",
+        "api_key": "whisper-key",
+        "include_srt": False,
+        "timeout": 5,
+        "action": "translate",
+        "steps": {"storages": ["s3_storage"]}
+    }
+    mock_s3 = mocker.MagicMock(spec=S3Storage)
+    mock_s3.get_cdn_url.return_value = TEST_S3_URL
+    instance = WhisperEnricher()
+    instance.name = "whisper_enricher"
+    instance.display_name = "Whisper Enricher"
+    instance.config_setup({instance.name: config})
+    # bypassing the setup method and mocking S3 setup
+    instance.stores = config['steps']['storages']
+    instance.s3 = mock_s3
+    yield instance, mock_s3
+
+
+@pytest.fixture
+def metadata():
+    metadata = Metadata()
+    metadata.set_url("http://test.url")
+    metadata.set_title("test title")
+    return metadata
+
+
+@pytest.fixture
+def mock_requests(mocker):
+    mock_requests = mocker.patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests")
+    mock_response = mocker.MagicMock()
+    mock_response.status_code = 201
+    mock_response.json.return_value = {"id": "job123"}
+    mock_requests.post.return_value = mock_response
+    yield mock_requests
+
+
+def test_successful_job_submission(enricher, metadata, mock_requests, mocker):
+    """Test successful media processing with S3 configured"""
+    whisper, mock_s3 = enricher
+    # Configure mock S3 URL to match test expectation
+    mock_s3.get_cdn_url.return_value = TEST_S3_URL
+
+    # Create test media with matching CDN URL
+    m = Media("test.mp4")
+    m.mimetype = "video/mp4"
+    m.add_url(mock_s3.get_cdn_url.return_value)
+    metadata.media = [m]
+
+    # Mock the complete API interaction chain
+    mock_status_response = mocker.MagicMock()
+    mock_status_response.status_code = 200
+    mock_status_response.json.return_value = {
+        "status": "success",
+        "meta": {}
+    }
+    mock_artifacts_response = mocker.MagicMock()
+    mock_artifacts_response.status_code = 200
+    mock_artifacts_response.json.return_value = [{
+        "data": [{"start": 0, "end": 5, "text": "test transcript"}]
+    }]
+    # Set up mock response sequence
+    mock_requests.get.side_effect = [
+        mock_status_response,  # First call: status check
+        mock_artifacts_response  # Second call: artifacts check
+    ]
+
+    # Run enrichment (without opening file)
+    whisper.enrich(metadata)
+    # Check API interactions
+    mock_requests.post.assert_called_once_with(
+        "http://testapi/jobs",
+        json={"url": "http://cdn.example.com/test.mp4", "type": "translate"},
+        headers={"Authorization": "Bearer whisper-key"}
+    )
+    # Verify job status checks
+    assert mock_requests.get.call_count == 2
+    assert "artifact_0_text" in metadata.media[0].get("whisper_model")
+    assert metadata.media[0].get("whisper_model") == {'artifact_0_text': 'test transcript',
+                                                      'job_artifacts_check': 'http://testapi/jobs/job123/artifacts',
+                                                      'job_id': 'job123',
+                                                      'job_status_check': 'http://testapi/jobs/job123'}
+
+
+def test_submit_job(enricher, mocker):
+    """Test job submission method"""
+    whisper, _ = enricher
+    m = Media("test.mp4")
+    m.add_url(TEST_S3_URL)
+    mock_requests = mocker.patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests")
+    mock_response = mocker.MagicMock()
+    mock_response.status_code = 201
+    mock_response.json.return_value = {"id": "job123"}
+    mock_requests.post.return_value = mock_response
+    job_id = whisper.submit_job(m)
+    assert job_id == "job123"
+
+
+def test_submit_raises_status(enricher, mocker):
+    whisper, _ = enricher
+    m = Media("test.mp4")
+    m.add_url(TEST_S3_URL)
+    mock_requests = mocker.patch("auto_archiver.modules.whisper_enricher.whisper_enricher.requests")
+    mock_response = mocker.MagicMock()
+    mock_response.status_code = 400
+    mock_response.json.return_value = {"id": "job123"}
+    mock_requests.post.return_value = mock_response
+    with pytest.raises(AssertionError) as exc_info:
+        whisper.submit_job(m)
+    assert str(exc_info.value) == "calling the whisper api http://testapi returned a non-success code: 400"
+
+
+# @pytest.mark.parametrize("test_url, status", ["http://cdn.example.com/test.mp4",])
+def test_submit_job_fails(enricher):
+    """Test assertion fails with non-S3 URL"""
+    whisper, mock_s3 = enricher
+    m = Media("test.mp4")
+    m.add_url("http://cdn.wrongurl.com/test.mp4")
+    with pytest.raises(AssertionError):
+        whisper.submit_job(m)
--- a/tests/extractors/test_generic_extractor.py
+++ b/tests/extractors/test_generic_extractor.py
@@ -9,6 +9,7 @@ import pytest
 from auto_archiver.modules.generic_extractor.generic_extractor import GenericExtractor
 from .test_extractor_base import TestExtractorBase

+CI=os.getenv("GITHUB_ACTIONS", '') == 'true'
 class TestGenericExtractor(TestExtractorBase):
    """Tests Generic Extractor
    """
@@ -77,10 +78,11 @@ class TestGenericExtractor(TestExtractorBase):
        result = self.extractor.download(item)
        assert not result

-
+    @pytest.mark.skipif(CI, reason="Currently no way to authenticate when on CI. Youtube (yt-dlp) doesn't support logging in with username/password.")
    @pytest.mark.download
    def test_youtube_download(self, make_item):
        # url https://www.youtube.com/watch?v=5qap5aO4i9A
+
        item = make_item("https://www.youtube.com/watch?v=J---aiyznGQ")
        result = self.extractor.download(item)
        assert result.get_url() == "https://www.youtube.com/watch?v=J---aiyznGQ"
@@ -114,6 +116,7 @@ class TestGenericExtractor(TestExtractorBase):
        result = self.extractor.download(item)
        assert result is not False
    
+    @pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
    @pytest.mark.download
    def test_truthsocial_download_video(self, make_item):
        item = make_item("https://truthsocial.com/@DaynaTrueman/posts/110602446619561579")
@@ -121,18 +124,21 @@ class TestGenericExtractor(TestExtractorBase):
        assert len(result.media) == 1
        assert result is not False

+    @pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
    @pytest.mark.download
    def test_truthsocial_download_no_media(self, make_item):
        item = make_item("https://truthsocial.com/@bbcnewa/posts/109598702184774628")
        result = self.extractor.download(item)
        assert result is not False
    
+    @pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
    @pytest.mark.download
    def test_truthsocial_download_poll(self, make_item):
        item = make_item("https://truthsocial.com/@CNN_US/posts/113724326568555098")
        result = self.extractor.download(item)
        assert result is not False
    
+    @pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
    @pytest.mark.download
    def test_truthsocial_download_single_image(self, make_item):
        item = make_item("https://truthsocial.com/@mariabartiromo/posts/113861116433335006")
@@ -140,6 +146,7 @@ class TestGenericExtractor(TestExtractorBase):
        assert len(result.media) == 1
        assert result is not False

+    @pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
    @pytest.mark.download
    def test_truthsocial_download_multiple_images(self, make_item):
        item = make_item("https://truthsocial.com/@trrth/posts/113861302149349135")
--- a/tests/extractors/test_instagram_api_extractor.py
+++ b/tests/extractors/test_instagram_api_extractor.py
@@ -1,15 +1,12 @@
 from datetime import datetime
-from typing import Type

 import pytest
-from unittest.mock import patch, MagicMock

 from auto_archiver.core import Metadata
 from auto_archiver.modules.instagram_api_extractor.instagram_api_extractor import InstagramAPIExtractor
 from .test_extractor_base import TestExtractorBase


-
@pytest.fixture
 def mock_user_response():
    return {
@@ -115,74 +112,74 @@ class TestInstagramAPIExtractor(TestExtractorBase):
        # test gets text (metadata title)
        pass

-    def test_download_profile_basic(self, metadata, mock_user_response):
+    def test_download_profile_basic(self, metadata, mock_user_response, mocker):
        """Test basic profile download without full_profile"""
-        with patch.object(self.extractor, 'call_api') as mock_call, \
-                patch.object(self.extractor, 'download_from_url') as mock_download:
-            # Mock API responses
-            mock_call.return_value = mock_user_response
-            mock_download.return_value = "profile.jpg"
+        mock_call = mocker.patch.object(self.extractor, 'call_api')
+        mock_download = mocker.patch.object(self.extractor, 'download_from_url')
+        # Mock API responses
+        mock_call.return_value = mock_user_response
+        mock_download.return_value = "profile.jpg"

-            result = self.extractor.download_profile(metadata, "test_user")
-            assert result.status == "insta profile: success"
-            assert result.get_title() == "Test User"
-            assert result.get("data") == self.extractor.cleanup_dict(mock_user_response["user"])
-            # Verify profile picture download
-            mock_call.assert_called_once_with("v2/user/by/username", {"username": "test_user"})
-            mock_download.assert_called_once_with("http://example.com/profile.jpg")
-            assert len(result.media) == 1
-            assert result.media[0].filename == "profile.jpg"
+        result = self.extractor.download_profile(metadata, "test_user")
+        assert result.status == "insta profile: success"
+        assert result.get_title() == "Test User"
+        assert result.get("data") == self.extractor.cleanup_dict(mock_user_response["user"])
+        # Verify profile picture download
+        mock_call.assert_called_once_with("v2/user/by/username", {"username": "test_user"})
+        mock_download.assert_called_once_with("http://example.com/profile.jpg")
+        assert len(result.media) == 1
+        assert result.media[0].filename == "profile.jpg"

-    def test_download_profile_full(self, metadata, mock_user_response, mock_story_response):
+    def test_download_profile_full(self, metadata, mock_user_response, mock_story_response, mocker):
        """Test full profile download with stories/posts"""
-        with patch.object(self.extractor, 'call_api') as mock_call, \
-             patch.object(self.extractor, 'download_all_posts') as mock_posts, \
-             patch.object(self.extractor, 'download_all_highlights') as mock_highlights, \
-             patch.object(self.extractor, 'download_all_tagged') as mock_tagged, \
-             patch.object(self.extractor, '_download_stories_reusable') as mock_stories:
+        mock_call = mocker.patch.object(self.extractor, 'call_api')
+        mock_posts = mocker.patch.object(self.extractor, 'download_all_posts')
+        mock_highlights = mocker.patch.object(self.extractor, 'download_all_highlights')
+        mock_tagged = mocker.patch.object(self.extractor, 'download_all_tagged')
+        mock_stories = mocker.patch.object(self.extractor, '_download_stories_reusable')

-            self.extractor.full_profile = True
-            mock_call.side_effect = [
-                mock_user_response,
-                mock_story_response
-            ]
-            mock_highlights.return_value = None
-            mock_stories.return_value = mock_story_response
-            mock_posts.return_value = None
-            mock_tagged.return_value = None
+        self.extractor.full_profile = True
+        mock_call.side_effect = [
+            mock_user_response,
+            mock_story_response
+        ]
+        mock_highlights.return_value = None
+        mock_stories.return_value = mock_story_response
+        mock_posts.return_value = None
+        mock_tagged.return_value = None

-            result = self.extractor.download_profile(metadata, "test_user")
-            assert result.get("#stories") == len(mock_story_response)
-            mock_posts.assert_called_once_with(result, "123")
-            assert "errors" not in result.metadata
+        result = self.extractor.download_profile(metadata, "test_user")
+        assert result.get("#stories") == len(mock_story_response)
+        mock_posts.assert_called_once_with(result, "123")
+        assert "errors" not in result.metadata

-    def test_download_profile_not_found(self, metadata):
+    def test_download_profile_not_found(self, metadata, mocker):
        """Test profile not found error"""
-        with patch.object(self.extractor, 'call_api') as mock_call:
-            mock_call.return_value = {"user": None}
-            with pytest.raises(AssertionError) as exc_info:
-                self.extractor.download_profile(metadata, "invalid_user")
-            assert "User invalid_user not found" in str(exc_info.value)
+        mock_call = mocker.patch.object(self.extractor, 'call_api')
+        mock_call.return_value = {"user": None}
+        with pytest.raises(AssertionError) as exc_info:
+            self.extractor.download_profile(metadata, "invalid_user")
+        assert "User invalid_user not found" in str(exc_info.value)

-    def test_download_profile_error_handling(self, metadata, mock_user_response):
+    def test_download_profile_error_handling(self, metadata, mock_user_response, mocker):
        """Test error handling in full profile mode"""
-        with (patch.object(self.extractor, 'call_api') as mock_call, \
-                patch.object(self.extractor, 'download_all_highlights') as mock_highlights, \
-                patch.object(self.extractor, 'download_all_tagged') as mock_tagged, \
-                patch.object(self.extractor, '_download_stories_reusable') as stories_tagged, \
-                patch.object(self.extractor, 'download_all_posts') as mock_posts
-              ):
-            self.extractor.full_profile = True
-            mock_call.side_effect = [
-                mock_user_response,
-                Exception("Stories API failed"),
-                Exception("Posts API failed")
-            ]
-            mock_highlights.return_value = None
-            mock_tagged.return_value = None
-            stories_tagged.return_value = None
-            mock_posts.return_value = None
-            result = self.extractor.download_profile(metadata, "test_user")
+        mock_call = mocker.patch.object(self.extractor, 'call_api')
+        mock_highlights = mocker.patch.object(self.extractor, 'download_all_highlights')
+        mock_tagged = mocker.patch.object(self.extractor, 'download_all_tagged')
+        stories_tagged = mocker.patch.object(self.extractor, '_download_stories_reusable')
+        mock_posts = mocker.patch.object(self.extractor, 'download_all_posts')

-            assert result.is_success()
-            assert "Error downloading stories for test_user" in result.metadata["errors"]
+        self.extractor.full_profile = True
+        mock_call.side_effect = [
+            mock_user_response,
+            Exception("Stories API failed"),
+            Exception("Posts API failed")
+        ]
+        mock_highlights.return_value = None
+        mock_tagged.return_value = None
+        stories_tagged.return_value = None
+        mock_posts.return_value = None
+        result = self.extractor.download_profile(metadata, "test_user")
+
+        assert result.is_success()
+        assert "Error downloading stories for test_user" in result.metadata["errors"]
--- a/tests/extractors/test_instagram_tbot_extractor.py
+++ b/tests/extractors/test_instagram_tbot_extractor.py
@@ -1,94 +1,108 @@
 import os
-from typing import Type
-from unittest.mock import patch, MagicMock

 import pytest

 from auto_archiver.core import Metadata
-from auto_archiver.core.extractor import Extractor
 from auto_archiver.modules.instagram_tbot_extractor import InstagramTbotExtractor
 from tests.extractors.test_extractor_base import TestExtractorBase

-TESTFILES = os.path.join(os.path.dirname(__file__), "testfiles")
-

@pytest.fixture
-def session_file(tmpdir):
-    """Fixture to create a test session file."""
-    session_file = os.path.join(tmpdir, "test_session.session")
-    with open(session_file, "w") as f:
-        f.write("mock_session_data")
-    return session_file.replace(".session", "")
+def patch_extractor_methods(request, setup_module, mocker):
+    mocker.patch.object(InstagramTbotExtractor, '_prepare_session_file', return_value=None)
+    mocker.patch.object(InstagramTbotExtractor, '_initialize_telegram_client', return_value=None)
+    yield


-@pytest.fixture(autouse=True)
-def patch_extractor_methods(request, setup_module):
-    with patch.object(InstagramTbotExtractor, '_prepare_session_file', return_value=None), \
-            patch.object(InstagramTbotExtractor, '_initialize_telegram_client', return_value=None):
-        if hasattr(request, 'cls') and hasattr(request.cls, 'config'):
-            request.cls.extractor = setup_module("instagram_tbot_extractor", request.cls.config)
-
-        yield
-
@pytest.fixture
 def metadata_sample():
    m = Metadata()
    m.set_title("Test Title")
-    m.set_timestamp("2021-01-01T00:00:00Z")
+    m.set_timestamp("2021-01-01T00:00:00")
    m.set_url("https://www.instagram.com/p/1234567890")
    return m


-class TestInstagramTbotExtractor:
+@pytest.fixture
+def mock_telegram_client(mocker):
+    """Fixture to mock TelegramClient interactions."""
+    mock_client = mocker.patch("auto_archiver.modules.instagram_tbot_extractor.client")
+    instance = mocker.MagicMock()
+    mock_client.return_value = instance
+    return instance

+
+@pytest.fixture
+def extractor(setup_module, patch_extractor_methods, mocker):
    extractor_module = "instagram_tbot_extractor"
-    extractor: InstagramTbotExtractor
    config = {
        "api_id": 12345,
        "api_hash": "test_api_hash",
        "session_file": "test_session",
+        "timeout": 4
+    }
+    extractor = setup_module(extractor_module, config)
+    extractor.client = mocker.MagicMock()
+    extractor.session_file = "test_session"
+    return extractor
+
+
+def test_non_instagram_url(extractor, metadata_sample):
+    metadata_sample.set_url("https://www.youtube.com")
+    assert extractor.download(metadata_sample) is False
+
+
+def test_download_success(extractor, metadata_sample, mocker):
+    mocker.patch.object(extractor, "_send_url_to_bot", return_value=(mocker.MagicMock(), 101))
+    mocker.patch.object(extractor, "_process_messages", return_value="Sample Instagram post caption")
+    result = extractor.download(metadata_sample)
+    assert result.is_success()
+    assert result.status == "insta-via-bot: success"
+    assert result.metadata.get("title") == "Sample Instagram post caption"
+
+
+def test_download_invalid(extractor, metadata_sample, mocker):
+    mocker.patch.object(extractor, "_send_url_to_bot", return_value=(mocker.MagicMock(), 101))
+    mocker.patch.object(extractor, "_process_messages", return_value="You must enter a URL to a post")
+    assert extractor.download(metadata_sample) is False
+
+
+@pytest.mark.skip(reason="Requires authentication.")
+class TestInstagramTbotExtractorReal(TestExtractorBase):
+    # To run these tests set the TELEGRAM_API_ID and TELEGRAM_API_HASH environment variables, and ensure the session file exists.
+    # Note these are true at this point in time, but changes to source media could be reason for failure.
+    extractor_module = "instagram_tbot_extractor"
+    extractor: InstagramTbotExtractor
+    config = {
+        "api_id": os.environ.get("TELEGRAM_API_ID"),
+        "api_hash": os.environ.get("TELEGRAM_API_HASH"),
+        "session_file": "secrets/anon-insta",
    }

-    @pytest.fixture
-    def mock_telegram_client(self):
-        """Fixture to mock TelegramClient interactions."""
-        with patch("auto_archiver.modules.instagram_tbot_extractor._initialize_telegram_client") as mock_client:
-            instance = MagicMock()
-            mock_client.return_value = instance
-            yield instance
-
-    def test_extractor_is_initialized(self):
-        assert self.extractor is not None
-
-
-    @patch("time.sleep")
-    @pytest.mark.parametrize("url, expected_status, bot_responses", [
-        ("https://www.instagram.com/p/C4QgLbrIKXG", "insta-via-bot: success", [MagicMock(id=101, media=None, message="Are you new to Bellingcat? - The way we share our investigations is different. 💭\nWe want you to read our story but also learn ou")]),
-        ("https://www.instagram.com/reel/DEVLK8qoIbg/", "insta-via-bot: success", [MagicMock(id=101, media=None, message="Our volunteer community is at the centre of many incredible Bellingcat investigations and tools. Stephanie Ladel is one such vol")]),
-        # todo tbot not working for stories :(
-        ("https://www.instagram.com/stories/bellingcatofficial/3556336382743057476/", False, [MagicMock(id=101, media=None, message="Media not found or unavailable")]),
-        ("https://www.youtube.com/watch?v=ymCMy8OffHM", False, []),
-        ("https://www.instagram.com/p/INVALID", False, [MagicMock(id=101, media=None, message="You must enter a URL to a post")]),
+    @pytest.mark.parametrize("url, expected_status, message, len_media", [
+        ("https://www.instagram.com/p/C4QgLbrIKXG", "insta-via-bot: success",
+         "Are you new to Bellingcat? - The way we share our investigations is different. 💭\nWe want you to read our story but also learn ou",
+         6),
+        ("https://www.instagram.com/reel/DEVLK8qoIbg/", "insta-via-bot: success",
+         "Our volunteer community is at the centre of many incredible Bellingcat investigations and tools. Stephanie Ladel is one such vol",
+         3),
+        # instagram tbot not working (potentially intermittently?) for stories - replace with a live story to retest
+        # ("https://www.instagram.com/stories/bellingcatofficial/3556336382743057476/", False, "Media not found or unavailable"),
+        # Seems to be working intermittently for highlights
+        # ("https://www.instagram.com/stories/highlights/17868810693068139/", "insta-via-bot: success", None, 50),
+        # Marking invalid url as success
+        ("https://www.instagram.com/p/INVALID", "insta-via-bot: success", "Media not found or unavailable", 0),
+        ("https://www.youtube.com/watch?v=ymCMy8OffHM", False, None, 0),
    ])
-    def test_download(self, mock_sleep, url, expected_status, bot_responses, metadata_sample):
+    def test_download(self, url, expected_status, message, len_media, metadata_sample):
        """Test the `download()` method with various Instagram URLs."""
        metadata_sample.set_url(url)
-        self.extractor.client = MagicMock()
+
        result = self.extractor.download(metadata_sample)
-        pass
-        # TODO fully mock or use as authenticated test
-        # if expected_status:
-        #     assert result.is_success()
-        #     assert result.status == expected_status
-        #     assert result.metadata.get("title") in [msg.message[:128] for msg in bot_responses if msg.message]
-        # else:
-        #     assert result is False
-
-
-
-
-        # Test story
-# Test expired story
-# Test requires login/ access (?)
-# Test post
-# Test multiple images?
+        if expected_status:
+            assert result.is_success()
+            assert result.status == expected_status
+            assert result.metadata.get("title") == message
+            assert len(result.media) == len_media
+        else:
+            assert result is False
--- a/tests/extractors/test_twitter_api_extractor.py
+++ b/tests/extractors/test_twitter_api_extractor.py
@@ -23,7 +23,6 @@ class TestTwitterApiExtractor(TestExtractorBase):
    }

    @pytest.mark.parametrize("url, expected", [
-        ("https://t.co/yl3oOJatFp", "https://www.bellingcat.com/category/resources/"),  # t.co URL
        ("https://x.com/bellingcat/status/1874097816571961839", "https://x.com/bellingcat/status/1874097816571961839"), # x.com urls unchanged
        ("https://twitter.com/bellingcat/status/1874097816571961839", "https://twitter.com/bellingcat/status/1874097816571961839"), # twitter urls unchanged
        ("https://twitter.com/bellingcat/status/1874097816571961839?s=20&t=3d0g4ZQis7dCbSDg-mE7-w", "https://twitter.com/bellingcat/status/1874097816571961839?s=20&t=3d0g4ZQis7dCbSDg-mE7-w"), # don't strip params from twitter urls (changed Jan 2025)
@@ -32,7 +31,11 @@ class TestTwitterApiExtractor(TestExtractorBase):
    ])
    def test_sanitize_url(self, url, expected):
        assert expected == self.extractor.sanitize_url(url)
-    
+
+    @pytest.mark.download
+    def test_sanitize_url_download(self):
+        assert "https://www.bellingcat.com/category/resources/" == self.extractor.sanitize_url("https://t.co/yl3oOJatFp")
+
    @pytest.mark.parametrize("url, exptected_username, exptected_tweetid", [
        ("https://twitter.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
        ("https://x.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
--- a/tests/feeders/test_atlos_feeder.py
+++ b/tests/feeders/test_atlos_feeder.py
@@ -0,0 +1,108 @@
+import pytest
+from auto_archiver.modules.atlos_feeder import AtlosFeeder
+
+
+class FakeAPIResponse:
+    """Simulate a response object."""
+
+    def __init__(self, data: dict, raise_error: bool = False) -> None:
+        self._data = data
+        self.raise_error = raise_error
+
+    def json(self) -> dict:
+        return self._data
+
+    def raise_for_status(self) -> None:
+        if self.raise_error:
+            raise Exception("HTTP error")
+
+
+@pytest.fixture
+def atlos_feeder(setup_module) -> AtlosFeeder:
+    """Fixture for AtlosFeeder."""
+    configs: dict = {
+        "api_token": "abc123",
+        "atlos_url": "https://platform.atlos.org",
+    }
+    return setup_module("atlos_feeder", configs)
+
+
+@pytest.fixture
+def mock_atlos_api(mocker):
+    """Fixture to mock requests to Atlos API."""
+    def _mock_responses(responses):
+        mocker.patch(
+            "requests.get",
+            side_effect=[FakeAPIResponse(data) for data in responses],
+        )
+    return _mock_responses
+
+
+def test_atlos_feeder_iter_yields_valid_metadata(atlos_feeder, mock_atlos_api):
+    """Test valid items are yielded and invalid ones ignored."""
+    mock_atlos_api([
+        {
+            "next": None,
+            "results": [
+                {"source_url": "http://example.com", "id": 1,
+                 "metadata": {"auto_archiver": {"processed": False}},
+                 "visibility": "visible", "status": "complete"},
+                {"source_url": "", "id": 2,
+                 "metadata": {"auto_archiver": {"processed": False}},
+                 "visibility": "visible", "status": "complete"},
+                {"source_url": "http://example.org", "id": 3,
+                 "metadata": {"auto_archiver": {"processed": True}},
+                 "visibility": "visible", "status": "complete"},
+            ],
+        }
+    ])
+
+    items = list(atlos_feeder)
+    assert len(items) == 1
+    assert items[0].get_url() == "http://example.com"
+    assert items[0].get("atlos_id") == 1
+
+
+def test_atlos_feeder_multiple_pages(atlos_feeder, mock_atlos_api):
+    """Test iteration over multiple pages with valid items."""
+    mock_atlos_api([
+        {
+            "next": "cursor2",
+            "results": [
+                {"source_url": "http://example1.com", "id": 10,
+                 "metadata": {"auto_archiver": {"processed": False}},
+                 "visibility": "visible", "status": "complete"},
+            ],
+        },
+        {
+            "next": None,
+            "results": [
+                {"source_url": "http://example2.com", "id": 20,
+                 "metadata": {"auto_archiver": {"processed": False}},
+                 "visibility": "visible", "status": "complete"},
+            ],
+        },
+    ])
+
+    items = list(atlos_feeder)
+    assert len(items) == 2
+    assert items[0].get_url() == "http://example1.com"
+    assert items[0].get("atlos_id") == 10
+    assert items[1].get_url() == "http://example2.com"
+    assert items[1].get("atlos_id") == 20
+
+
+def test_atlos_feeder_no_results(atlos_feeder, mock_atlos_api):
+    """Test iteration stops when no results are returned."""
+    mock_atlos_api([{"next": None, "results": []}])
+    assert list(atlos_feeder) == []
+
+
+def test_atlos_feeder_http_error(atlos_feeder, mocker):
+    """Test raises an exception on HTTP error."""
+    mocker.patch(
+        "requests.get",
+        return_value=FakeAPIResponse({"next": None, "results": []}, raise_error=True),
+    )
+    with pytest.raises(Exception, match="HTTP error"):
+        list(atlos_feeder)
--- a/tests/feeders/test_gsheet_feeder.py
+++ b/tests/feeders/test_gsheet_feeder.py
@@ -2,27 +2,23 @@ from typing import Type

 import gspread
 import pytest
-from unittest.mock import patch, MagicMock
 from auto_archiver.modules.gsheet_feeder import GsheetsFeeder
 from auto_archiver.core import Metadata, Feeder


-def test_setup_without_sheet_and_sheet_id(setup_module):
+def test_setup_without_sheet_and_sheet_id(setup_module, mocker):
    # Ensure setup() raises AssertionError if neither sheet nor sheet_id is set.
-    with patch("gspread.service_account"):
-        with pytest.raises(AssertionError):
-            setup_module(
-                "gsheet_feeder",
-                {"service_account": "dummy.json", "sheet": None, "sheet_id": None},
-            )
+    mocker.patch("gspread.service_account")
+    with pytest.raises(AssertionError):
+        setup_module(
+            "gsheet_feeder",
+            {"service_account": "dummy.json", "sheet": None, "sheet_id": None},
+        )


@pytest.fixture
-def gsheet_feeder(setup_module) -> GsheetsFeeder:
-    with patch("gspread.service_account"):
-        feeder = setup_module(
-            "gsheet_feeder",
-            {
+def gsheet_feeder(setup_module, mocker) -> GsheetsFeeder:
+    config: dict = {
                "service_account": "dummy.json",
                "sheet": "test-auto-archiver",
                "sheet_id": None,
@@ -46,9 +42,13 @@ def gsheet_feeder(setup_module) -> GsheetsFeeder:
                "allow_worksheets": set(),
                "block_worksheets": set(),
                "use_sheet_names_in_stored_paths": True,
-            },
-        )
-    feeder.gsheets_client = MagicMock()
+            }
+    mocker.patch("gspread.service_account")
+    feeder = setup_module(
+        "gsheet_feeder",
+        config
+    )
+    feeder.gsheets_client = mocker.MagicMock()
    return feeder


@@ -129,56 +129,56 @@ def test__set_metadata_with_folder(gsheet_feeder: GsheetsFeeder):
    ],
 )
 def test_open_sheet_with_name_or_id(
-    setup_module, sheet, sheet_id, expected_method, expected_arg, description
+    setup_module, sheet, sheet_id, expected_method, expected_arg, description, mocker
 ):
    """Ensure open_sheet() correctly opens by name or ID based on configuration."""
-    with patch("gspread.service_account") as mock_service_account:
-        mock_client = MagicMock()
-        mock_service_account.return_value = mock_client
-        mock_client.open.return_value = "MockSheet"
-        mock_client.open_by_key.return_value = "MockSheet"
+    mock_service_account = mocker.patch("gspread.service_account")
+    mock_client = mocker.MagicMock()
+    mock_service_account.return_value = mock_client
+    mock_client.open.return_value = "MockSheet"
+    mock_client.open_by_key.return_value = "MockSheet"

-        # Setup module with parameterized values
-        feeder = setup_module(
-            "gsheet_feeder",
-            {"service_account": "dummy.json", "sheet": sheet, "sheet_id": sheet_id},
-        )
-        sheet_result = feeder.open_sheet()
-        # Validate the correct method was called
-        getattr(mock_client, expected_method).assert_called_once_with(
-            expected_arg
-        ), f"Failed: {description}"
-        assert sheet_result == "MockSheet", f"Failed: {description}"
+    # Setup module with parameterized values
+    feeder = setup_module(
+        "gsheet_feeder",
+        {"service_account": "dummy.json", "sheet": sheet, "sheet_id": sheet_id},
+    )
+    sheet_result = feeder.open_sheet()
+    # Validate the correct method was called
+    getattr(mock_client, expected_method).assert_called_once_with(
+        expected_arg
+    ), f"Failed: {description}"
+    assert sheet_result == "MockSheet", f"Failed: {description}"


@pytest.mark.usefixtures("setup_module")
-def test_open_sheet_with_sheet_id(setup_module):
+def test_open_sheet_with_sheet_id(setup_module, mocker):
    """Ensure open_sheet() correctly opens a sheet by ID."""
-    with patch("gspread.service_account") as mock_service_account:
-        mock_client = MagicMock()
-        mock_service_account.return_value = mock_client
-        mock_client.open_by_key.return_value = "MockSheet"
-        feeder = setup_module(
-            "gsheet_feeder",
-            {"service_account": "dummy.json", "sheet": None, "sheet_id": "ABC123"},
-        )
-        sheet = feeder.open_sheet()
-        mock_client.open_by_key.assert_called_once_with("ABC123")
-        assert sheet == "MockSheet"
+    mock_service_account = mocker.patch("gspread.service_account")
+    mock_client = mocker.MagicMock()
+    mock_service_account.return_value = mock_client
+    mock_client.open_by_key.return_value = "MockSheet"
+    feeder = setup_module(
+        "gsheet_feeder",
+        {"service_account": "dummy.json", "sheet": None, "sheet_id": "ABC123"},
+    )
+    sheet = feeder.open_sheet()
+    mock_client.open_by_key.assert_called_once_with("ABC123")
+    assert sheet == "MockSheet"


-def test_should_process_sheet(setup_module):
-    with patch("gspread.service_account"):
-        gdb = setup_module(
-            "gsheet_feeder",
-            {
-                "service_account": "dummy.json",
-                "sheet": "TestSheet",
-                "sheet_id": None,
-                "allow_worksheets": {"TestSheet", "Sheet2"},
-                "block_worksheets": {"Sheet3"},
-            },
-        )
+def test_should_process_sheet(setup_module, mocker):
+    mocker.patch("gspread.service_account")
+    gdb = setup_module(
+        "gsheet_feeder",
+        {
+            "service_account": "dummy.json",
+            "sheet": "TestSheet",
+            "sheet_id": None,
+            "allow_worksheets": {"TestSheet", "Sheet2"},
+            "block_worksheets": {"Sheet3"},
+        },
+    )
    assert gdb.should_process_sheet("TestSheet") == True
    assert gdb.should_process_sheet("Sheet3") == False
    # False if allow_worksheets is set
--- a/tests/feeders/test_gworksheet.py
+++ b/tests/feeders/test_gworksheet.py
@@ -1,13 +1,13 @@
+# Note this isn't a feeder, but contained as utility of the gsheet feeder module
 import pytest
-from unittest.mock import MagicMock

 from auto_archiver.modules.gsheet_feeder import GWorksheet


 class TestGWorksheet:
    @pytest.fixture
-    def mock_worksheet(self):
-        mock_ws = MagicMock()
+    def mock_worksheet(self, mocker):
+        mock_ws = mocker.MagicMock()
        mock_ws.get_values.return_value = [
            ["Link", "Archive Status", "Archive Location", "Archive Date"],
            ["url1", "archived", "filepath1", "2023-01-01"],
@@ -136,8 +136,8 @@ class TestGWorksheet:
        assert gworksheet.to_a1(row, col) == expected

    # Test empty worksheet
-    def test_empty_worksheet_initialization(self):
-        mock_ws = MagicMock()
+    def test_empty_worksheet_initialization(self, mocker):
+        mock_ws = mocker.MagicMock()
        mock_ws.get_values.return_value = []
        g = GWorksheet(mock_ws)
        assert g.headers == []
--- a/tests/storages/test_S3_storage.py
+++ b/tests/storages/test_S3_storage.py
@@ -1,6 +1,5 @@
 from typing import Type
 import pytest
-from unittest.mock import MagicMock, patch
 from auto_archiver.core import Media
 from auto_archiver.modules.s3_storage import S3Storage

@@ -11,7 +10,6 @@ class TestS3Storage:
    """
    module_name: str = "s3_storage"
    storage: Type[S3Storage]
-    s3: MagicMock
    config: dict = {
        "path_generator": "flat",
        "filename_generator": "static",
@@ -25,13 +23,14 @@ class TestS3Storage:
        "private": False,
    }

-    @patch('boto3.client')
    @pytest.fixture(autouse=True)
-    def setup_storage(self, setup_module):
+    def setup_storage(self, setup_module, mocker):
+        self.s3 = S3Storage()
        self.storage = setup_module(self.module_name, self.config)

    def test_client_initialization(self):
        """Test that S3 client is initialized with correct parameters"""
+
        assert self.storage.s3 is not None
        assert self.storage.s3.meta.region_name == 'test-region'

@@ -44,81 +43,63 @@ class TestS3Storage:
        media.key = "another/path.jpg"
        assert self.storage.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg"

-    def test_uploadf_sets_acl_public(self):
+    def test_uploadf_sets_acl_public(self, mocker):
        media = Media("test.txt")
-        mock_file = MagicMock()
-        with patch.object(self.storage.s3, 'upload_fileobj') as mock_s3_upload,  \
-            patch.object(self.storage, 'is_upload_needed', return_value=True):
-            self.storage.uploadf(mock_file, media)
-            mock_s3_upload.assert_called_once_with(
-                mock_file,
-                Bucket='test-bucket',
-                Key=media.key,
-                ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'}
-            )
+        mock_file = mocker.MagicMock()
+        mock_s3_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
+        mocker.patch.object(self.storage, 'is_upload_needed', return_value=True)
+        self.storage.uploadf(mock_file, media)
+        mock_s3_upload.assert_called_once_with(
+            mock_file,
+            Bucket='test-bucket',
+            Key=media.key,
+            ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'}
+        )

-    def test_upload_decision_logic(self):
+    def test_upload_decision_logic(self, mocker):
        """Test is_upload_needed under different conditions"""
        media = Media("test.txt")
-        # Test default state (random_no_duplicate=False)
        assert self.storage.is_upload_needed(media) is True
-        # Set duplicate checking config to true:
-
        self.storage.random_no_duplicate = True
-        with patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash') as mock_calc_hash, \
-                patch.object(self.storage, 'file_in_folder') as mock_file_in_folder:
-            mock_calc_hash.return_value = 'beepboop123beepboop123beepboop123'
-            mock_file_in_folder.return_value = 'existing_key.txt'
-            # Test duplicate result
-            assert self.storage.is_upload_needed(media) is False
-            assert media.key == 'existing_key.txt'
-            mock_file_in_folder.assert_called_with(
-                # (first 24 chars of hash)
-                'no-dups/beepboop123beepboop123be'
-            )
+        mock_calc_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value='beepboop123beepboop123beepboop123')
+        mock_file_in_folder = mocker.patch.object(self.storage, 'file_in_folder', return_value='existing_key.txt')
+        assert self.storage.is_upload_needed(media) is False
+        assert media.key == 'existing_key.txt'
+        mock_file_in_folder.assert_called_with('no-dups/beepboop123beepboop123be')

-
-    @patch.object(S3Storage, 'file_in_folder')
-    def test_skips_upload_when_duplicate_exists(self, mock_file_in_folder):
+    def test_skips_upload_when_duplicate_exists(self, mocker):
        """Test that upload skips when file_in_folder finds existing object"""
        self.storage.random_no_duplicate = True
-        mock_file_in_folder.return_value = "existing_folder/existing_file.txt"
-        # Create test media with calculated hash
+        mock_file_in_folder = mocker.patch.object(S3Storage, 'file_in_folder', return_value="existing_folder/existing_file.txt")
        media = Media("test.txt")
        media.key = "original_path.txt"
-        with patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash') as mock_calculate_hash:
-            mock_calculate_hash.return_value = "beepboop123beepboop123beepboop123"
-            # Verify upload
-            assert self.storage.is_upload_needed(media) is False
-            assert media.key == "existing_folder/existing_file.txt"
-            assert media.get("previously archived") is True
-            with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload:
-                result = self.storage.uploadf(None, media)
-                mock_upload.assert_not_called()
-                assert result is True
+        mock_calculate_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value="beepboop123beepboop123beepboop123")
+        assert self.storage.is_upload_needed(media) is False
+        assert media.key == "existing_folder/existing_file.txt"
+        assert media.get("previously archived") is True
+        mock_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
+        result = self.storage.uploadf(None, media)
+        mock_upload.assert_not_called()
+        assert result is True

-    @patch.object(S3Storage, 'is_upload_needed')
-    def test_uploads_with_correct_parameters(self, mock_upload_needed):
+    def test_uploads_with_correct_parameters(self, mocker):
        media = Media("test.txt")
        media.key = "original_key.txt"
-        mock_upload_needed.return_value = True
+        mocker.patch.object(S3Storage, 'is_upload_needed', return_value=True)
        media.mimetype = 'image/png'
-        mock_file = MagicMock()
+        mock_file = mocker.MagicMock()
+        mock_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
+        self.storage.uploadf(mock_file, media)
+        mock_upload.assert_called_once_with(
+            mock_file,
+            Bucket='test-bucket',
+            Key='original_key.txt',
+            ExtraArgs={
+                'ACL': 'public-read',
+                'ContentType': 'image/png'
+            }
+        )

-        with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload:
-            self.storage.uploadf(mock_file, media)
-            # verify call occured with these params
-            mock_upload.assert_called_once_with(
-                mock_file,
-                Bucket='test-bucket',
-                Key='original_key.txt',
-                ExtraArgs={
-                    'ACL': 'public-read',
-                    'ContentType': 'image/png'
-                }
-            )
-
-    def test_file_in_folder_exists(self):
-        with patch.object(self.storage.s3, 'list_objects') as mock_list_objects:
-            mock_list_objects.return_value = {'Contents': [{'Key': 'path/to/file.txt'}]}
-            assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt'
+    def test_file_in_folder_exists(self, mocker):
+        mock_list_objects = mocker.patch.object(self.storage.s3, 'list_objects', return_value={'Contents': [{'Key': 'path/to/file.txt'}]})
+        assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt'
--- a/tests/storages/test_atlos_storage.py
+++ b/tests/storages/test_atlos_storage.py
@@ -0,0 +1,142 @@
+import os
+import hashlib
+import pytest
+from auto_archiver.core import Media, Metadata
+from auto_archiver.modules.atlos_storage import AtlosStorage
+
+
+class FakeAPIResponse:
+    """Simulate a response object."""
+
+    def __init__(self, data: dict, raise_error: bool = False) -> None:
+        self._data = data
+        self.raise_error = raise_error
+
+    def json(self) -> dict:
+        return self._data
+
+    def raise_for_status(self) -> None:
+        if self.raise_error:
+            raise Exception("HTTP error")
+
+
+@pytest.fixture
+def atlos_storage(setup_module) -> AtlosStorage:
+    """Fixture for AtlosStorage."""
+    configs: dict = {
+        "api_token": "abc123",
+        "atlos_url": "https://platform.atlos.org",
+    }
+    return setup_module("atlos_storage", configs)
+
+
+@pytest.fixture
+def media(tmp_path) -> Media:
+    """Fixture for Media."""
+    content = b"media content"
+    file_path = tmp_path / "media.txt"
+    file_path.write_bytes(content)
+    media = Media(filename=str(file_path))
+    media.properties = {"something": "Title"}
+    media.key = "key"
+    return media
+
+
+def test_get_cdn_url(atlos_storage: AtlosStorage) -> None:
+    """Test get_cdn_url returns the configured atlos_url."""
+    media = Media(filename="dummy.mp4")
+    url = atlos_storage.get_cdn_url(media)
+    assert url == atlos_storage.atlos_url
+
+
+def test_hash(tmp_path, atlos_storage: AtlosStorage) -> None:
+    """Test _hash() computes the correct SHA-256 hash of a file."""
+    content = b"hello world"
+    file_path = tmp_path / "test.txt"
+    file_path.write_bytes(content)
+    media = Media(filename="dummy.mp4")
+    media.filename = str(file_path)
+    expected_hash = hashlib.sha256(content).hexdigest()
+    assert atlos_storage._hash(media) == expected_hash
+
+
+def test_upload_no_atlos_id(tmp_path, atlos_storage: AtlosStorage, media: Media, mocker) -> None:
+    """Test upload() returns False when metadata lacks atlos_id."""
+    metadata = Metadata()  # atlos_id not set
+    post_mock = mocker.patch("requests.post")
+    result = atlos_storage.upload(media, metadata)
+    assert result is False
+    post_mock.assert_not_called()
+
+
+def test_upload_already_uploaded(atlos_storage: AtlosStorage,
+                                 metadata: Metadata,
+                                 media: Media,
+                                 tmp_path,
+                                 mocker) -> None:
+    """Test upload() returns True if media hash already exists."""
+    content = b"media content"
+    metadata.set("atlos_id", 101)
+    media_hash = hashlib.sha256(content).hexdigest()
+    fake_get = FakeAPIResponse({
+        "result": {"artifacts": [{"file_hash_sha256": media_hash}]}
+    })
+    get_mock = mocker.patch("requests.get", return_value=fake_get)
+    post_mock = mocker.patch("requests.post")
+    result = atlos_storage.upload(media, metadata)
+    assert result is True
+    get_mock.assert_called_once()
+    post_mock.assert_not_called()
+
+
+def test_upload_not_uploaded(tmp_path, atlos_storage: AtlosStorage,
+                             metadata: Metadata,
+                             media: Media,
+                             mocker) -> None:
+    """Test upload() uploads media when not already present."""
+    metadata.set("atlos_id", 202)
+    fake_get = FakeAPIResponse({
+        "result": {"artifacts": [{"file_hash_sha256": "different_hash"}]}
+    })
+    get_mock = mocker.patch("requests.get", return_value=fake_get)
+    fake_post = FakeAPIResponse({}, raise_error=False)
+    post_mock = mocker.patch("requests.post", return_value=fake_post)
+    result = atlos_storage.upload(media, metadata)
+    assert result is True
+    get_mock.assert_called_once()
+    post_mock.assert_called_once()
+    expected_url = f"{atlos_storage.atlos_url}/api/v2/source_material/upload/202"
+    expected_headers = {"Authorization": f"Bearer {atlos_storage.api_token}"}
+    expected_params = {"title": media.properties}
+    call_kwargs = post_mock.call_args.kwargs
+    assert call_kwargs["headers"] == expected_headers
+    assert call_kwargs["params"] == expected_params
+    # Verify the URL passed to requests.post.
+    posted_url = call_kwargs.get("url") or post_mock.call_args.args[0]
+    assert posted_url == expected_url
+    # Verify files parameter contains the correct filename.
+    file_tuple = call_kwargs["files"]["file"]
+    assert file_tuple[0] == os.path.basename(media.filename)
+
+
+def test_upload_post_http_error(tmp_path,
+                                atlos_storage: AtlosStorage,
+                                metadata: Metadata,
+                                media: Media,
+                                mocker) -> None:
+    """Test upload() propagates HTTP error during POST."""
+    metadata.set("atlos_id", 303)
+    fake_get = FakeAPIResponse({
+        "result": {"artifacts": []}
+    })
+    mocker.patch("requests.get", return_value=fake_get)
+    fake_post = FakeAPIResponse({}, raise_error=True)
+    mocker.patch("requests.post", return_value=fake_post)
+    with pytest.raises(Exception, match="HTTP error"):
+        atlos_storage.upload(media, metadata)
+
+
+def test_uploadf_not_implemented(atlos_storage: AtlosStorage) -> None:
+    """Test uploadf() returns None (not implemented)."""
+    result = atlos_storage.uploadf(None, "dummy")
+    assert result is None
--- a/tests/storages/test_gdrive_storage.py
+++ b/tests/storages/test_gdrive_storage.py
@@ -1,44 +1,57 @@
 from typing import Type
 import pytest
-from unittest.mock import MagicMock, patch
+from oauth2client import service_account
+
 from auto_archiver.core import Media
 from auto_archiver.modules.gdrive_storage import GDriveStorage
 from auto_archiver.core.metadata import Metadata
 from tests.storages.test_storage_base import TestStorageBase


-class TestGDriveStorage:
-    """
-    Test suite for GDriveStorage.
-    """
-
+@pytest.fixture
+def gdrive_storage(setup_module, mocker):
    module_name: str = "gdrive_storage"
-    storage: Type[GDriveStorage]
+    storage: GDriveStorage
    config: dict = {'path_generator': 'url',
            'filename_generator': 'static',
            'root_folder_id': "fake_root_folder_id",
            'oauth_token': None,
            'service_account': 'fake_service_account.json'
                    }
-
-    @pytest.fixture(autouse=True)
-    def gdrive(self, setup_module):
-        with patch('google.oauth2.service_account.Credentials.from_service_account_file') as mock_creds:
-            self.storage = setup_module(self.module_name, self.config)
-
-    def test_initialize_fails_with_non_existent_creds(self):
-        """
-        Test that the Google Drive service raises a FileNotFoundError when the service account file does not exist.
-        """
-        # Act and Assert
-        with pytest.raises(FileNotFoundError) as exc_info:
-            self.storage.setup()
-        assert "No such file or directory" in str(exc_info.value)
+    mocker.patch('google.oauth2.service_account.Credentials.from_service_account_file')
+    return setup_module(module_name, config)


-    def test_path_parts(self):
-        media = Media(filename="test.jpg")
-        media.key = "folder1/folder2/test.jpg"
+def test_initialize_fails_with_non_existent_creds(setup_module):
+    """Test that the Google Drive service raises a FileNotFoundError when the service account file does not exist.
+        (and isn't mocked)
+    """
+    config: dict = {'path_generator': 'url',
+                    'filename_generator': 'static',
+                    'root_folder_id': "fake_root_folder_id",
+                    'oauth_token': None,
+                    'service_account': 'fake_service_account.json'
+                    }
+    with pytest.raises(FileNotFoundError) as exc_info:
+        setup_module("gdrive_storage", config)
+    assert "No such file or directory" in str(exc_info.value)
+
+
+def test_get_id_from_parent_and_name(gdrive_storage, mocker):
+    """Test _get_id_from_parent_and_name returns correct id from an API result."""
+    fake_list = mocker.MagicMock()
+    fake_list.execute.return_value = {"files": [{"id": "123", "name": "testname"}]}
+    fake_service = mocker.MagicMock()
+    # mock the files.list return value
+    fake_service.files.return_value.list.return_value = fake_list
+    gdrive_storage.service = fake_service
+    result = gdrive_storage._get_id_from_parent_and_name("parent", "mock", retries=1, use_mime_type=False)
+    assert result == "123"
+
+def test_path_parts():
+    media = Media(filename="test.jpg")
+    media.key = "folder1/folder2/test.jpg"
+


@pytest.mark.skip(reason="Requires real credentials")
--- a/tests/storages/test_local_storage.py
+++ b/tests/storages/test_local_storage.py
@@ -0,0 +1,54 @@
+
+import os
+from pathlib import Path
+
+import pytest
+
+from auto_archiver.core import Media
+from auto_archiver.modules.local_storage import LocalStorage
+
+
+@pytest.fixture
+def local_storage(setup_module) -> LocalStorage:
+    configs: dict = {
+        "path_generator": "flat",
+        "filename_generator": "static",
+        "save_to": "./local_archive",
+        "save_absolute": False,
+    }
+    return setup_module("local_storage", configs)
+
+
+@pytest.fixture
+def sample_media(tmp_path) -> Media:
+    """Fixture creating a Media object with temporary source file"""
+    src_file = tmp_path / "source.txt"
+    src_file.write_text("test content")
+    return Media(key="subdir/test.txt", filename=str(src_file))
+
+
+def test_get_cdn_url_relative(local_storage):
+    media = Media(key="test.txt", filename="dummy.txt")
+    expected = os.path.join(local_storage.save_to, media.key)
+    assert local_storage.get_cdn_url(media) == expected
+
+
+
+def test_get_cdn_url_absolute(local_storage):
+    media = Media(key="test.txt", filename="dummy.txt")
+    local_storage.save_absolute = True
+    expected = os.path.abspath(os.path.join(local_storage.save_to, media.key))
+    assert local_storage.get_cdn_url(media) == expected
+
+def test_upload_file_contents_and_metadata(local_storage, sample_media):
+    dest = os.path.join(local_storage.save_to, sample_media.key)
+    assert local_storage.upload(sample_media) is True
+    assert Path(sample_media.filename).read_text() == Path(dest).read_text()
+
+
+def test_upload_nonexistent_source(local_storage):
+    media = Media(key="missing.txt", filename="nonexistent.txt")
+    with pytest.raises(FileNotFoundError):
+        local_storage.upload(media)
+
+
--- a/tests/test_implementation.py
+++ b/tests/test_implementation.py
@@ -60,3 +60,15 @@ def test_run_auto_archiver_empty_file(caplog, autoarchiver, orchestration_file):

    # should treat an empty file as if there is no file at all
    assert " No URLs provided. Please provide at least one URL via the com" in caplog.text
+
+def test_call_autoarchiver_main(caplog, monkeypatch, tmp_path):
+    from auto_archiver.__main__ import main
+
+    # monkey patch to change the current working directory, so that we don't use the user's real config file
+    monkeypatch.chdir(tmp_path)
+    with monkeypatch.context() as m:
+        m.setattr(sys, "argv", ["auto-archiver"])
+        with pytest.raises(SystemExit):
+            main()
+
+    assert "No URLs provided. Please provide at least one" in caplog.text
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -162,4 +162,25 @@ def test_get_context():


 def test_choose_most_complete():
-    pass
+    m_more = Metadata()
+    m_more.set_title("Title 1")
+    m_more.set_content("Content 1")
+    m_more.set_url("https://example.com")
+
+    m_less = Metadata()
+    m_less.set_title("Title 2")
+    m_less.set_content("Content 2")
+    m_less.set_url("https://example.com")
+    m_less.set_context("key", "value")
+
+    res = Metadata.choose_most_complete([m_more, m_less])
+    assert res.metadata.get("title") == "Title 1"
+
+def test_choose_most_complete_from_pickles(unpickle):
+    # test most complete from pickles before and after an enricher has run
+    # Only compares length of media, not the actual media
+    m_before_enriching = unpickle("metadata_enricher_ytshort_input.pickle")
+    m_after_enriching = unpickle("metadata_enricher_ytshort_expected.pickle")
+    # Iterates `for r in results[1:]:`
+    res = Metadata.choose_most_complete([Metadata(), m_after_enriching, m_before_enriching])
+    assert res.media == m_after_enriching.media
--- a/tests/test_modules.py
+++ b/tests/test_modules.py
@@ -1,24 +1,18 @@
 import sys
 import pytest
-from auto_archiver.core.module import get_module_lazy, BaseModule, LazyBaseModule, _LAZY_LOADED_MODULES
+from auto_archiver.core.module import ModuleFactory, LazyBaseModule
+from auto_archiver.core.base_module import BaseModule

@pytest.fixture
 def example_module():
    import auto_archiver

+    module_factory = ModuleFactory()
+
    previous_path = auto_archiver.modules.__path__
    auto_archiver.modules.__path__.append("tests/data/test_modules/")

-    module = get_module_lazy("example_module")
-    yield module
-    # cleanup
-    try:
-        del module._manifest
-    except AttributeError:
-        pass
-    del _LAZY_LOADED_MODULES["example_module"]
-    sys.modules.pop("auto_archiver.modules.example_module.example_module", None)
-    auto_archiver.modules.__path__ = previous_path
+    return module_factory.get_module_lazy("example_module")

 def test_get_module_lazy(example_module):
    assert example_module.name == "example_module"
@@ -46,12 +40,14 @@ def test_module_dependency_check_loads_module(example_module):
    # monkey patch the manifest to include a nonexistnet dependency
    example_module.manifest["dependencies"]["python"] = ["hash_enricher"]

+    module_factory = example_module.module_factory
+
    loaded_module = example_module.load({})
    assert loaded_module is not None

    # check the dependency is loaded
-    assert _LAZY_LOADED_MODULES["hash_enricher"] is not None
-    assert _LAZY_LOADED_MODULES["hash_enricher"]._instance is not None
+    assert module_factory._lazy_modules["hash_enricher"] is not None
+    assert module_factory._lazy_modules["hash_enricher"]._instance is not None

 def test_load_module(example_module):

@@ -69,7 +65,7 @@ def test_load_module(example_module):
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
 def test_load_modules(module_name):
    # test that specific modules can be loaded
-    module = get_module_lazy(module_name)
+    module = ModuleFactory().get_module_lazy(module_name)
    assert module is not None
    assert isinstance(module, LazyBaseModule)
    assert module.name == module_name
@@ -86,7 +82,7 @@ def test_load_modules(module_name):

@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
 def test_lazy_base_module(module_name):
-    lazy_module = get_module_lazy(module_name)
+    lazy_module = ModuleFactory().get_module_lazy(module_name)

    assert lazy_module is not None
    assert isinstance(lazy_module, LazyBaseModule)
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -4,7 +4,7 @@ from argparse import ArgumentParser, ArgumentTypeError
 from auto_archiver.core.orchestrator import ArchivingOrchestrator
 from auto_archiver.version import __version__
 from auto_archiver.core.config import read_yaml, store_yaml
-from auto_archiver.core.module import _LAZY_LOADED_MODULES
+

 TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
 TEST_MODULES = "tests/data/test_modules/"
@@ -17,22 +17,7 @@ def test_args():

@pytest.fixture
 def orchestrator():
-    yield ArchivingOrchestrator()
-    # hack - the loguru logger starts with one logger, but if orchestrator has run before
-    # it'll remove the default logger, add it back in:
-
-    from loguru import logger
-
-    if not logger._core.handlers.get(0):
-        logger._core.handlers_count = 0
-        logger.add(sys.stderr)
-    # and remove the custom logger
-    if logger._core.handlers.get(1):
-        logger.remove(1)
-
-    # delete out any loaded modules
-    _LAZY_LOADED_MODULES.clear()
-
+    return ArchivingOrchestrator()

@pytest.fixture
 def basic_parser(orchestrator) -> ArgumentParser:
@@ -75,18 +60,36 @@ def test_help(orchestrator, basic_parser, capsys):
        orchestrator.show_help(args)

    assert exit_error.value.code == 0
-    assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in capsys.readouterr().out
+
+    logs = capsys.readouterr().out
+    assert "Usage: auto-archiver [--help] [--version] [--config CONFIG_FILE]" in logs
+
+    # basic config options
+    assert "--version" in logs
+
+    # setting modules options
+    assert "--feeders" in logs
+    assert "--extractors" in logs
+
+    # authentication options
+    assert "--authentication" in logs
+
+    # logging options
+    assert "--logging.level" in logs
+
+    # individual module configs
+    assert "--gsheet_feeder.sheet_id" in logs


 def test_add_custom_modules_path(orchestrator, test_args):
-    orchestrator.run(test_args)
+    orchestrator.setup_config(test_args)
    
    import auto_archiver
    assert "tests/data/test_modules/" in auto_archiver.modules.__path__

 def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):

-    orchestrator.run(test_args +  # we still need to load the real path to get the example_module 
+    orchestrator.setup_config(test_args +  # we still need to load the real path to get the example_module 
                          ["--module_paths", "tests/data/invalid_test_modules/"])

    assert caplog.records[0].message == "Path 'tests/data/invalid_test_modules/' does not exist. Skipping..."
@@ -97,7 +100,7 @@ def test_check_required_values(orchestrator, caplog, test_args):
    test_args = test_args[:-2]

    with pytest.raises(SystemExit) as exit_error:
-        orchestrator.run(test_args)
+        config = orchestrator.setup_config(test_args)

    assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"

@@ -111,24 +114,50 @@ def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
    store_yaml(test_yaml, tmp_file)

    # run the orchestrator
-    orchestrator.run(["--config", tmp_file, "--module_paths", TEST_MODULES])
-    assert orchestrator.config is not None
+    config = orchestrator.setup_config(["--config", tmp_file, "--module_paths", TEST_MODULES])
+    assert config is not None

 def test_load_authentication_string(orchestrator, test_args):

-    orchestrator.run(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
-    assert orchestrator.config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
+    config = orchestrator.setup_config(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
+    assert config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}

 def test_load_authentication_string_concat_site(orchestrator, test_args):
    
-    orchestrator.run(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
-    assert orchestrator.config['authentication'] == {"x.com": {"api_key": "my_key"},
+    config = orchestrator.setup_config(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
+    assert config['authentication'] == {"x.com": {"api_key": "my_key"},
                                                     "twitter.com": {"api_key": "my_key"}}

 def test_load_invalid_authentication_string(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
-        orchestrator.run(test_args + ["--authentication", "{\''invalid_json"])
+        orchestrator.setup_config(test_args + ["--authentication", "{\''invalid_json"])

 def test_load_authentication_invalid_dict(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
-        orchestrator.run(test_args + ["--authentication", "[true, false]"])
+        orchestrator.setup_config(test_args + ["--authentication", "[true, false]"])
+
+def test_load_modules_from_commandline(orchestrator, test_args):
+    args = test_args + ["--feeders", "example_module", "--extractors", "example_module", "--databases", "example_module", "--enrichers", "example_module", "--formatters", "example_module"]
+
+    orchestrator.setup(args)
+
+    assert len(orchestrator.feeders) == 1
+    assert len(orchestrator.extractors) == 1
+    assert len(orchestrator.databases) == 1
+    assert len(orchestrator.enrichers) == 1
+    assert len(orchestrator.formatters) == 1
+
+    assert orchestrator.feeders[0].name == "example_module"
+    assert orchestrator.extractors[0].name == "example_module"
+    assert orchestrator.databases[0].name == "example_module"
+    assert orchestrator.enrichers[0].name == "example_module"
+    assert orchestrator.formatters[0].name == "example_module"
+
+def test_load_settings_for_module_from_commandline(orchestrator, test_args):
+    args = test_args + ["--feeders", "gsheet_feeder", "--gsheet_feeder.sheet_id", "123", "--gsheet_feeder.service_account", "tests/data/test_service_account.json"]
+
+    orchestrator.setup(args)
+
+    assert len(orchestrator.feeders) == 1
+    assert orchestrator.feeders[0].name == "gsheet_feeder"
+    assert orchestrator.config['gsheet_feeder']['sheet_id'] == "123"
--- a/tests/utils/test_misc.py
+++ b/tests/utils/test_misc.py
@@ -0,0 +1,144 @@
+import hashlib
+import json
+from datetime import datetime, timezone
+
+import pytest
+
+from auto_archiver.utils.misc import (
+    mkdir_if_not_exists,
+    expand_url,
+    getattr_or,
+    DateTimeEncoder,
+    dump_payload,
+    get_datetime_from_str,
+    update_nested_dict,
+    calculate_file_hash,
+    random_str,
+    get_timestamp
+)
+
+
+@pytest.fixture
+def sample_file(tmp_path):
+    file_path = tmp_path / "test.txt"
+    file_path.write_text("test content")
+    return file_path
+
+
+class TestDirectoryUtils:
+    def test_mkdir_creates_new_directory(self, tmp_path):
+        new_dir = tmp_path / "new_folder"
+        mkdir_if_not_exists(new_dir)
+        assert new_dir.exists()
+        assert new_dir.is_dir()
+
+    def test_mkdir_exists_quietly(self, tmp_path):
+        existing_dir = tmp_path / "existing"
+        existing_dir.mkdir()
+        mkdir_if_not_exists(existing_dir)
+        assert existing_dir.exists()
+
+class TestURLExpansion:
+    @pytest.mark.parametrize("input_url,expected", [
+        ("https://example.com", "https://example.com"),
+        ("https://t.co/test", "https://expanded.url")
+    ])
+    def test_expand_url(self, input_url, expected, mocker):
+        mock_response = mocker.Mock()
+        mock_response.url = "https://expanded.url"
+        mocker.patch('requests.get', return_value=mock_response)
+        result = expand_url(input_url)
+        assert result == expected
+
+    def test_expand_url_handles_errors(self, caplog, mocker):
+        mocker.patch('requests.get', side_effect=Exception("Connection error"))
+        url = "https://t.co/error"
+        result = expand_url(url)
+        assert result == url
+        assert f"Failed to expand url {url}" in caplog.text
+
+class TestAttributeHandling:
+    class Sample:
+        exists = "value"
+        none = None
+
+    @pytest.mark.parametrize("obj,attr,default,expected", [
+        (Sample(), "exists", "default", "value"),
+        (Sample(), "none", "default", "default"),
+        (Sample(), "missing", "default", "default"),
+        (None, "anything", "fallback", "fallback"),
+    ])
+    def test_getattr_or(self, obj, attr, default, expected):
+        # Test gets attribute or returns a default value
+        assert getattr_or(obj, attr, default) == expected
+
+class TestDateTimeHandling:
+    def test_datetime_encoder(self, sample_datetime):
+        result = json.dumps({"dt": sample_datetime}, cls=DateTimeEncoder)
+        loaded = json.loads(result)
+        assert loaded["dt"] == str(sample_datetime)
+
+    def test_dump_payload(self, sample_datetime):
+        payload = {"timestamp": sample_datetime}
+        result = dump_payload(payload)
+        assert str(sample_datetime) in result
+
+    @pytest.mark.parametrize("dt_str,fmt,expected", [
+        ("2023-01-01 12:00:00+00:00", None, datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)),
+        ("20230101 120000", "%Y%m%d %H%M%S", datetime(2023, 1, 1, 12, 0)),
+        ("invalid", None, None),
+    ])
+    def test_datetime_from_string(self, dt_str, fmt, expected):
+        result = get_datetime_from_str(dt_str, fmt)
+        if expected is None:
+            assert result is None
+        else:
+            assert result == expected.replace(tzinfo=result.tzinfo)
+
+class TestDictUtils:
+    @pytest.mark.parametrize("original,update,expected", [
+        ({"a": 1}, {"b": 2}, {"a": 1, "b": 2}),
+        ({"nested": {"a": 1}}, {"nested": {"b": 2}}, {"nested": {"a": 1, "b": 2}}),
+        ({"a": {"b": {"c": 1}}}, {"a": {"b": {"c": 2}}}, {"a": {"b": {"c": 2}}}),
+    ])
+    def test_update_nested_dict(self, original, update, expected):
+        update_nested_dict(original, update)
+        assert original == expected
+
+class TestHashingUtils:
+    def test_file_hashing(self, sample_file):
+        expected = hashlib.sha256(b"test content").hexdigest()
+        assert calculate_file_hash(str(sample_file)) == expected
+
+    def test_large_file_hashing(self, tmp_path):
+        file_path = tmp_path / "large.bin"
+        content = b"0" * 16_000_000 * 2  # 32MB
+        file_path.write_bytes(content)
+
+        expected = hashlib.sha256(content).hexdigest()
+        assert calculate_file_hash(str(file_path)) == expected
+
+class TestMiscUtils:
+    def test_random_str_length(self):
+        for length in [8, 16, 32]:
+            assert len(random_str(length)) == length
+
+    def test_random_str_raises_too_long(self):
+        with pytest.raises(AssertionError) as exc_info:
+            random_str(64)
+            assert "length must be less than 32 as UUID4 is used" == str(exc_info.value)
+
+    def test_random_str_uniqueness(self):
+        assert random_str() != random_str()
+
+    @pytest.mark.parametrize("ts_input,utc,iso,expected_type", [
+        (datetime.now(), True, True, str),
+        ("2023-01-01T12:00:00+00:00", False, False, datetime),
+        (1672574400, True, True, str),
+    ])
+    def test_timestamp_parsing(self, ts_input, utc, iso, expected_type):
+        result = get_timestamp(ts_input, utc=utc, iso=iso)
+        assert isinstance(result, expected_type)
+
+    def test_invalid_timestamp_returns_none(self):
+        assert get_timestamp("invalid-date") is None
Author	SHA1	Message	Date
Patrick Robertson	5211c5de18	Merge pull request #210 from bellingcat/logger_fix Fix issue #200 + Refactor _LAZY_LOADED_MODULES	2025-02-19 15:11:42 +00:00
Erin Clark	6cdefaa751	Merge pull request #194 from bellingcat/tests/add_module_tests Add unit tests for individual modules. Includes a couple of small bug fixes and light refactoring.	2025-02-19 13:51:43 +00:00
Patrick Robertson	04507577b6	Version bump	2025-02-19 13:36:50 +00:00
erinhmclark	47a634fc63	Add WACZ, Wayback and local storage tests.	2025-02-19 13:14:08 +00:00
Patrick Robertson	a9802dd004	Remove the global _LAZY_LOADED_MODULES and allow each instance of ArchivingOrchestrator to load its own modules	2025-02-19 12:25:35 +00:00
erinhmclark	a8ffb19325	Fix auth key name for cookies_from_browser.	2025-02-19 10:40:54 +00:00
Patrick Robertson	eb60b271b9	Fix issue #200	2025-02-19 10:35:14 +00:00
erinhmclark	ddf2e76624	Include Atlos Storage __init__.py for module recognition.	2025-02-19 09:24:34 +00:00
erinhmclark	10a5ad62b8	Include Atlos tests, metadata fixture.	2025-02-19 09:18:41 +00:00
erinhmclark	f0fd9bf445	Updates tests to use pytest-mock.	2025-02-18 23:32:03 +00:00
erinhmclark	657fbd357d	Merge branch 'main' into tests/add_module_tests	2025-02-18 19:47:47 +00:00
erinhmclark	7b88df72cb	Update test_metadata_enricher.py	2025-02-18 19:46:57 +00:00
Patrick Robertson	3c543a3a6a	Various fixes for issues with new architecture (#208 ) * Add formatters to the TOC - fixes #204 * Add 'steps' settings to the example YAML in the docs. Fixes #206 * Improved docs on authentication architecture * Fix setting modules on the command line - they now override any module settings in the orchestration as opposed to appending * Fix tests for gsheet-feeder: add a test service_account.json (note: not real keys in there) * Rename the command line entrypoint to _command_line_run Also: make it clear that code implementation should not call this Make sure the command line entry returns (we don't want a generator) * Fix unit tests to use now code-entry points * Version bump * Move iterating of generator up to __main__ * Breakpoint * two minor fixes * Fix unit tests + add new '__main__' entry point implementation test * Skip youtube tests if running on CI. Should still run them locally * Fix full implementation run on GH actions * Fix skipif test for GH Actions CI * Add skipifs for truth - it blocks GH: --------- Co-authored-by: msramalho <19508417+msramalho@users.noreply.github.com>	2025-02-18 19:10:09 +00:00
erinhmclark	ce5a200d1f	Added tests, updated instagram_tbot_extractor.py raise failure.	2025-02-18 12:59:10 +00:00
erinhmclark	f4c623b11b	Merge branch 'main' into tests/add_module_tests	2025-02-17 09:03:04 +00:00
Patrick Robertson	6d43bc7d4d	Fix generator programmatic setup (#197 ) * Fix returning a generator of a generator * Move download test test to pytest.mark.download	2025-02-15 17:36:44 +00:00
Miguel Sozinho Ramalho	9297697ef5	makes orchestrator.run return the results to allow for code integration (#196 )	2025-02-15 12:41:26 +00:00
erinhmclark	8ed3ef2f33	Merge branch 'main' into tests/add_module_tests	2025-02-14 12:47:40 +00:00
erinhmclark	71b41dd901	Remove accidental path, yet again.	2025-02-14 10:05:32 +00:00
erinhmclark	b0756a6a34	Remove accidental full path.	2025-02-14 09:57:44 +00:00
erinhmclark	319c1e8f92	Add more tests.	2025-02-14 09:48:37 +00:00
erinhmclark	3fce593aad	Merge branch 'main' into tests/add_module_tests	2025-02-12 19:33:29 +00:00
erinhmclark	cbe98c729d	Enricher tests	2025-02-12 19:32:40 +00:00
erinhmclark	d9d936c2ca	Thumbnail enricher fix seconds to minutes.	2025-02-12 12:22:27 +00:00