Ruff format with defaults.

2026-06-12 21:28:29 +03:00 · 2025-03-10 18:44:54 +00:00
parent cbb0414e5f
commit 85abe1837a
155 changed files with 2539 additions and 1908 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,7 @@
 """
 pytest conftest file, for shared fixtures and configuration
 """
+
 import os
 import pickle
 from datetime import datetime, timezone
@@ -16,32 +17,34 @@ from auto_archiver.core.module import ModuleFactory
 # that you only want to run if everything else succeeds (e.g. API calls). The order here is important
 # what comes first will be run first (at the end of all other tests not mentioned)
 # format is the name of the module (python file) without the .py extension
-TESTS_TO_RUN_LAST = ['test_twitter_api_archiver']
+TESTS_TO_RUN_LAST = ["test_twitter_api_archiver"]
+

@pytest.fixture
 def setup_module(request):
    def _setup_module(module_name, config={}):
-
        module_factory = ModuleFactory()

        if isinstance(module_name, type):
            # get the module name:
            # if the class does not have a .name, use the name of the parent folder
-            module_name = module_name.__module__.rsplit(".",2)[-2]
+            module_name = module_name.__module__.rsplit(".", 2)[-2]

        m = module_factory.get_module(module_name, {module_name: config})
        # add the tmp_dir to the module
        tmp_dir = TemporaryDirectory()
        m.tmp_dir = tmp_dir.name
-        
+
        def cleanup():
            tmp_dir.cleanup()
+
        request.addfinalizer(cleanup)

        return m

    return _setup_module

+
@pytest.fixture
 def check_hash():
    def _check_hash(filename: str, hash: str):
@@ -51,6 +54,7 @@ def check_hash():

    return _check_hash

+
@pytest.fixture
 def make_item():
    def _make_item(url: str, **kwargs) -> Metadata:
@@ -62,7 +66,6 @@ def make_item():
    return _make_item


-
 def pytest_collection_modifyitems(items):
    module_mapping = {item: item.module.__name__.split(".")[-1] for item in items}

@@ -78,13 +81,13 @@ def pytest_collection_modifyitems(items):
    items[:] = sorted_items


-
 # Incremental testing - fail tests in a class if any previous test fails
 # taken from https://docs.pytest.org/en/latest/example/simple.html#incremental-testing-test-steps

 # store history of failures per test class name and per index in parametrize (if parametrize used)
 _test_failed_incremental: Dict[str, Dict[Tuple[int, ...], str]] = {}

+
 def pytest_runtest_makereport(item, call):
    if "incremental" in item.keywords:
        # incremental marker is used
@@ -93,17 +96,11 @@ def pytest_runtest_makereport(item, call):
            # retrieve the class name of the test
            cls_name = str(item.cls)
            # retrieve the index of the test (if parametrize is used in combination with incremental)
-            parametrize_index = (
-                tuple(item.callspec.indices.values())
-                if hasattr(item, "callspec")
-                else ()
-            )
+            parametrize_index = tuple(item.callspec.indices.values()) if hasattr(item, "callspec") else ()
            # retrieve the name of the test function
            test_name = item.originalname or item.name
            # store in _test_failed_incremental the original name of the failed test
-            _test_failed_incremental.setdefault(cls_name, {}).setdefault(
-                parametrize_index, test_name
-            )
+            _test_failed_incremental.setdefault(cls_name, {}).setdefault(parametrize_index, test_name)


 def pytest_runtest_setup(item):
@@ -119,16 +116,17 @@ def pytest_runtest_setup(item):
                pytest.xfail(f"previous test failed ({test_name})")


-
@pytest.fixture()
 def unpickle():
    """
    Returns a helper function that unpickles a file
    ** gets the file from the test_files directory: tests/data/ **
    """
+
    def _unpickle(path):
        with open(os.path.join("tests/data", path), "rb") as f:
            return pickle.load(f)
+
    return _unpickle


@@ -156,4 +154,4 @@ def metadata():
    metadata = Metadata()
    metadata.set("_processed_at", "2021-01-01T00:00:00")
    metadata.set_url("https://example.com")
-    return metadata
+    return metadata
--- a/tests/data/dropin.py
+++ b/tests/data/dropin.py
@@ -1,5 +1,6 @@
 # this is a dummy class used to test importing a dropin in the
 #  generic extractor by filename/path

+
 class Dropin:
-    pass
+    pass
--- a/tests/data/test_modules/example_module/init.py
+++ b/tests/data/test_modules/example_module/init.py
@@ -1 +1 @@
-from .example_module import ExampleModule
+from .example_module import ExampleModule
--- a/tests/data/test_modules/example_module/manifest.py
+++ b/tests/data/test_modules/example_module/manifest.py
@@ -16,14 +16,14 @@
    "dependencies": {
        "python": ["loguru"],
        "bin": ["bash"],
-        },
-    # configurations that this module takes. These are argparse-compliant dicationaries, that are 
+    },
+    # configurations that this module takes. These are argparse-compliant dicationaries, that are
    # used to create command line arguments when the programme is run.
    # The full name of the config option will become: `module_name.config_name`
    "configs": {
-            "csv_file": {"default": "db.csv", "help": "CSV file name"},
-            "required_field": {"required": True, "help": "required field in the CSV file"},
-        },
+        "csv_file": {"default": "db.csv", "help": "CSV file name"},
+        "required_field": {"required": True, "help": "required field in the CSV file"},
+    },
    # A description of the module, used for documentation
    "description": "This is an example module",
-}
+}
--- a/tests/data/test_modules/example_module/example_module.py
+++ b/tests/data/test_modules/example_module/example_module.py
@@ -1,5 +1,6 @@
 from auto_archiver.core import Extractor, Enricher, Feeder, Database, Storage, Formatter, Metadata

+
 class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter):
    def download(self, item):
        print("download")
@@ -7,7 +8,6 @@ class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter):
    def __iter__(self):
        yield Metadata().set_url("https://example.com")

-    
    def done(self, result):
        print("done")

@@ -16,13 +16,12 @@ class ExampleModule(Extractor, Enricher, Feeder, Database, Storage, Formatter):

    def get_cdn_url(self, media):
        return "nice_url"
-    
+
    def save(self, item):
        print("save")
-    
+
    def uploadf(self, file, key, **kwargs):
        print("uploadf")

-    
    def format(self, item):
        print("format")
--- a/tests/databases/test_api_db.py
+++ b/tests/databases/test_api_db.py
@@ -41,9 +41,16 @@ def test_fetch(api_db, metadata, mocker):
    mock_datetime = mocker.patch("auto_archiver.core.metadata.datetime.datetime")
    mock_datetime.now.return_value = "2021-01-01T00:00:00"
    mock_get.return_value.status_code = 200
-    mock_get.return_value.json.return_value = [{"result": {}}, {"result":
-        {'media': [], 'metadata': {'_processed_at': '2021-01-01T00:00:00', 'url': 'https://example.com'},
-         'status': 'no archiver'}}]
+    mock_get.return_value.json.return_value = [
+        {"result": {}},
+        {
+            "result": {
+                "media": [],
+                "metadata": {"_processed_at": "2021-01-01T00:00:00", "url": "https://example.com"},
+                "status": "no archiver",
+            }
+        },
+    ]
    assert api_db.fetch(metadata) == metadata


@@ -52,8 +59,15 @@ def test_done_success(api_db, metadata, mocker):
    mock_post.return_value.status_code = 201
    api_db.done(metadata)
    mock_post.assert_called_once()
-    mock_post.assert_called_once_with("https://api.example.com/interop/submit-archive",
-                                      json={'author_id': 'Someone', 'url': 'https://example.com',
-                                            'public': False, 'group_id': '123', 'tags': ['[', ']'], 'result': '{"status": "no archiver", "metadata": {"_processed_at": "2021-01-01T00:00:00", "url": "https://example.com"}, "media": []}'},
-                                      headers={'Authorization': 'Bearer test-token'})
-
+    mock_post.assert_called_once_with(
+        "https://api.example.com/interop/submit-archive",
+        json={
+            "author_id": "Someone",
+            "url": "https://example.com",
+            "public": False,
+            "group_id": "123",
+            "tags": ["[", "]"],
+            "result": '{"status": "no archiver", "metadata": {"_processed_at": "2021-01-01T00:00:00", "url": "https://example.com"}, "media": []}',
+        },
+        headers={"Authorization": "Bearer test-token"},
+    )
--- a/tests/databases/test_atlos_db.py
+++ b/tests/databases/test_atlos_db.py
@@ -50,9 +50,7 @@ def test_failed_with_atlos_id(atlos_db, metadata, mocker):
    post_mock = mocker.patch.object(atlos_db, "_post", return_value=fake_resp)
    atlos_db.failed(metadata, "failure reason")
    expected_endpoint = f"/api/v2/source_material/metadata/42/auto_archiver"
-    expected_json = {
-        "metadata": {"processed": True, "status": "error", "error": "failure reason"}
-    }
+    expected_json = {"metadata": {"processed": True, "status": "error", "error": "failure reason"}}
    post_mock.assert_called_once_with(expected_endpoint, json=expected_json)


--- a/tests/databases/test_csv_db.py
+++ b/tests/databases/test_csv_db.py
@@ -1,4 +1,3 @@
-
 from auto_archiver.modules.csv_db import CSVDb
 from auto_archiver.core import Metadata

@@ -9,12 +8,21 @@ def test_store_item(tmp_path, setup_module):
    temp_db = tmp_path / "temp_db.csv"
    db = setup_module(CSVDb, {"csv_file": temp_db.as_posix()})

-    item = Metadata().set_url("http://example.com").set_title("Example").set_content("Example content").success("my-archiver")
+    item = (
+        Metadata()
+        .set_url("http://example.com")
+        .set_title("Example")
+        .set_content("Example content")
+        .success("my-archiver")
+    )

    db.done(item)

    with open(temp_db, "r", encoding="utf-8") as f:
-        assert f.read().strip() == f"status,metadata,media\nmy-archiver: success,\"{{'_processed_at': {repr(item.get('_processed_at'))}, 'url': 'http://example.com', 'title': 'Example', 'content': 'Example content'}}\",[]"
+        assert (
+            f.read().strip()
+            == f"status,metadata,media\nmy-archiver: success,\"{{'_processed_at': {repr(item.get('_processed_at'))}, 'url': 'http://example.com', 'title': 'Example', 'content': 'Example content'}}\",[]"
+        )

    # TODO: csv db doesn't have a fetch method - need to add it (?)
-    # assert db.fetch(item) == item
+    # assert db.fetch(item) == item
--- a/tests/databases/test_gsheet_db.py
+++ b/tests/databases/test_gsheet_db.py
@@ -28,6 +28,7 @@ def mock_metadata(mocker):
    metadata.get_first_image.return_value = None
    return metadata

+
@pytest.fixture
 def metadata():
    metadata = Metadata()
@@ -51,6 +52,7 @@ def mock_media(mocker):
    mock_media.get.return_value = "not-calculated"
    return mock_media

+
@pytest.fixture
 def gsheets_db(mock_gworksheet, setup_module, mocker):
    mocker.patch("gspread.service_account")
@@ -59,7 +61,22 @@ def gsheets_db(mock_gworksheet, setup_module, mocker):
        "sheet_id": None,
        "header": 1,
        "service_account": "test/service_account.json",
-        "columns": {'url': 'link', 'status': 'archive status', 'folder': 'destination folder', 'archive': 'archive location', 'date': 'archive date', 'thumbnail': 'thumbnail', 'timestamp': 'upload timestamp', 'title': 'upload title', 'text': 'text content', 'screenshot': 'screenshot', 'hash': 'hash', 'pdq_hash': 'perceptual hashes', 'wacz': 'wacz', 'replaywebpage': 'replaywebpage'},
+        "columns": {
+            "url": "link",
+            "status": "archive status",
+            "folder": "destination folder",
+            "archive": "archive location",
+            "date": "archive date",
+            "thumbnail": "thumbnail",
+            "timestamp": "upload timestamp",
+            "title": "upload title",
+            "text": "text content",
+            "screenshot": "screenshot",
+            "hash": "hash",
+            "pdq_hash": "perceptual hashes",
+            "wacz": "wacz",
+            "replaywebpage": "replaywebpage",
+        },
        "allow_worksheets": set(),
        "block_worksheets": set(),
        "use_sheet_names_in_stored_paths": True,
@@ -78,20 +95,21 @@ def fixed_timestamp():
@pytest.fixture
 def expected_calls(mock_media, fixed_timestamp):
    """Fixture for the expected cell updates."""
-    return  [
-        (1, 'status', 'my-archiver: success'),
-        (1, 'archive', 'http://example.com/screenshot.png'),
-        (1, 'date', '2025-02-01T00:00:00+00:00'),
-        (1, 'title', 'Example Title'),
-        (1, 'text', 'Example Content'),
-        (1, 'timestamp', '2025-01-01T00:00:00+00:00'),
-        (1, 'hash', 'not-calculated'),
+    return [
+        (1, "status", "my-archiver: success"),
+        (1, "archive", "http://example.com/screenshot.png"),
+        (1, "date", "2025-02-01T00:00:00+00:00"),
+        (1, "title", "Example Title"),
+        (1, "text", "Example Content"),
+        (1, "timestamp", "2025-01-01T00:00:00+00:00"),
+        (1, "hash", "not-calculated"),
        # (1, 'screenshot', 'http://example.com/screenshot.png'),
        # (1, 'thumbnail', '=IMAGE("http://example.com/thumbnail.png")'),
        # (1, 'wacz', 'http://example.com/browsertrix.wacz'),
        # (1, 'replaywebpage', 'https://replayweb.page/?source=http%3A%2F%2Fexample.com%2Fbrowsertrix.wacz#view=pages&url=')
    ]

+
 def test_retrieve_gsheet(gsheets_db, metadata, mock_gworksheet):
    gw, row = gsheets_db._retrieve_gsheet(metadata)
    assert gw == mock_gworksheet
@@ -100,27 +118,34 @@ def test_retrieve_gsheet(gsheets_db, metadata, mock_gworksheet):

 def test_started(gsheets_db, mock_metadata, mock_gworksheet):
    gsheets_db.started(mock_metadata)
-    mock_gworksheet.set_cell.assert_called_once_with(1, 'status', 'Archive in progress')
+    mock_gworksheet.set_cell.assert_called_once_with(1, "status", "Archive in progress")
+

 def test_failed(gsheets_db, mock_metadata, mock_gworksheet):
    reason = "Test failure"
    gsheets_db.failed(mock_metadata, reason)
-    mock_gworksheet.set_cell.assert_called_once_with(1, 'status', f'Archive failed {reason}')
+    mock_gworksheet.set_cell.assert_called_once_with(1, "status", f"Archive failed {reason}")


 def test_aborted(gsheets_db, mock_metadata, mock_gworksheet):
    gsheets_db.aborted(mock_metadata)
-    mock_gworksheet.set_cell.assert_called_once_with(1, 'status', '')
+    mock_gworksheet.set_cell.assert_called_once_with(1, "status", "")


 def test_done(gsheets_db, metadata, mock_gworksheet, expected_calls, mocker):
-    mocker.patch("auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
+    mocker.patch(
+        "auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp",
+        return_value="2025-02-01T00:00:00+00:00",
+    )
    gsheets_db.done(metadata)
    mock_gworksheet.batch_set_cell.assert_called_once_with(expected_calls)


 def test_done_cached(gsheets_db, metadata, mock_gworksheet, mocker):
-    mocker.patch("auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
+    mocker.patch(
+        "auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp",
+        return_value="2025-02-01T00:00:00+00:00",
+    )
    gsheets_db.done(metadata, cached=True)

    # Verify the status message includes "[cached]"
@@ -131,15 +156,17 @@ def test_done_cached(gsheets_db, metadata, mock_gworksheet, mocker):
 def test_done_missing_media(gsheets_db, metadata, mock_gworksheet, mocker):
    # clear media from metadata
    metadata.media = []
-    mocker.patch("auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
+    mocker.patch(
+        "auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp",
+        return_value="2025-02-01T00:00:00+00:00",
+    )
    gsheets_db.done(metadata)
    # Verify nothing media-related gets updated
    call_args = mock_gworksheet.batch_set_cell.call_args[0][0]
-    media_fields = {'archive', 'screenshot', 'thumbnail', 'wacz', 'replaywebpage'}
+    media_fields = {"archive", "screenshot", "thumbnail", "wacz", "replaywebpage"}
    assert all(call[1] not in media_fields for call in call_args)

+
 def test_safe_status_update(gsheets_db, metadata, mock_gworksheet):
    gsheets_db._safe_status_update(metadata, "Test status")
-    mock_gworksheet.set_cell.assert_called_once_with(1, 'status', 'Test status')
-
-
+    mock_gworksheet.set_cell.assert_called_once_with(1, "status", "Test status")
--- a/tests/enrichers/test_hash_enricher.py
+++ b/tests/enrichers/test_hash_enricher.py
@@ -4,34 +4,50 @@ from auto_archiver.modules.hash_enricher import HashEnricher
 from auto_archiver.core import Metadata, Media
 from auto_archiver.core.module import ModuleFactory

-@pytest.mark.parametrize("algorithm, filename, expected_hash", [
-    ("SHA-256", "tests/data/testfile_1.txt", "1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014"),
-    ("SHA-256", "tests/data/testfile_2.txt", "60303ae22b998861bce3b28f33eec1be758a213c86c93c076dbe9f558c11c752"),
-    ("SHA3-512", "tests/data/testfile_1.txt", "d2d8cc4f369b340130bd2b29b8b54e918b7c260c3279176da9ccaa37c96eb71735fc97568e892dc6220bf4ae0d748edb46bd75622751556393be3f482e6f794e"),
-    ("SHA3-512", "tests/data/testfile_2.txt", "e35970edaa1e0d8af7d948491b2da0450a49fd9cc1e83c5db4c6f175f9550cf341f642f6be8cfb0bfa476e4258e5088c5ad549087bf02811132ac2fa22b734c6")
-])
+
+@pytest.mark.parametrize(
+    "algorithm, filename, expected_hash",
+    [
+        ("SHA-256", "tests/data/testfile_1.txt", "1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014"),
+        ("SHA-256", "tests/data/testfile_2.txt", "60303ae22b998861bce3b28f33eec1be758a213c86c93c076dbe9f558c11c752"),
+        (
+            "SHA3-512",
+            "tests/data/testfile_1.txt",
+            "d2d8cc4f369b340130bd2b29b8b54e918b7c260c3279176da9ccaa37c96eb71735fc97568e892dc6220bf4ae0d748edb46bd75622751556393be3f482e6f794e",
+        ),
+        (
+            "SHA3-512",
+            "tests/data/testfile_2.txt",
+            "e35970edaa1e0d8af7d948491b2da0450a49fd9cc1e83c5db4c6f175f9550cf341f642f6be8cfb0bfa476e4258e5088c5ad549087bf02811132ac2fa22b734c6",
+        ),
+    ],
+)
 def test_calculate_hash(algorithm, filename, expected_hash, setup_module):
    # test SHA-256
    he = setup_module(HashEnricher, {"algorithm": algorithm, "chunksize": 100})
    assert he.calculate_hash(filename) == expected_hash

+
 def test_default_config_values(setup_module):
    he = setup_module(HashEnricher)
    assert he.algorithm == "SHA-256"
    assert he.chunksize == 16000000

+
 def test_config():
    # test default config
-    c = ModuleFactory().get_module_lazy('hash_enricher').configs
+    c = ModuleFactory().get_module_lazy("hash_enricher").configs
    assert c["algorithm"]["default"] == "SHA-256"
    assert c["chunksize"]["default"] == 16000000
    assert c["algorithm"]["choices"] == ["SHA-256", "SHA3-512"]
    assert c["algorithm"]["help"] == "hash algorithm to use"
-    assert c["chunksize"]["help"] == "number of bytes to use when reading files in chunks (if this value is too large you will run out of RAM), default is 16MB"
+    assert (
+        c["chunksize"]["help"]
+        == "number of bytes to use when reading files in chunks (if this value is too large you will run out of RAM), default is 16MB"
+    )


 def test_hash_media(setup_module):
-
    he = setup_module(HashEnricher, {"algorithm": "SHA-256", "chunksize": 1})

    # generate metadata with two test files
@@ -46,4 +62,4 @@ def test_hash_media(setup_module):
    he.enrich(m)

    assert m.media[0].get("hash") == "SHA-256:1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014"
-    assert m.media[1].get("hash") == "SHA-256:60303ae22b998861bce3b28f33eec1be758a213c86c93c076dbe9f558c11c752"
+    assert m.media[1].get("hash") == "SHA-256:60303ae22b998861bce3b28f33eec1be758a213c86c93c076dbe9f558c11c752"
--- a/tests/enrichers/test_meta_enricher.py
+++ b/tests/enrichers/test_meta_enricher.py
@@ -16,6 +16,7 @@ def mock_metadata(mocker):
    mock.get_all_media.return_value = []
    return mock

+
@pytest.fixture
 def mock_media(mocker):
    """Creates a mock Media object."""
@@ -59,6 +60,7 @@ def test_enrich_file_sizes(meta_enricher, metadata, tmp_path):
    assert metadata.get("total_bytes") == 3000
    assert metadata.get("total_size") == "2.9 KB"

+
@pytest.mark.parametrize(
    "size, expected",
    [
@@ -74,6 +76,7 @@ def test_human_readable_bytes(size, expected):
    enricher = MetaEnricher()
    assert enricher.human_readable_bytes(size) == expected

+
 def test_enrich_file_sizes_no_media(meta_enricher, metadata):
    """Test that enrich_file_sizes() handles empty media list gracefully."""
    meta_enricher.enrich_file_sizes(metadata)
@@ -91,4 +94,4 @@ def test_enrich_archive_duration(meta_enricher, metadata, mocker):
    mock_datetime.now.return_value = mock_now
    meta_enricher.enrich_archive_duration(metadata)

-    assert metadata.get("archive_duration_seconds") == 630
+    assert metadata.get("archive_duration_seconds") == 630
--- a/tests/enrichers/test_metadata_enricher.py
+++ b/tests/enrichers/test_metadata_enricher.py
@@ -1,4 +1,3 @@
-
 import pytest

 from auto_archiver.core import Media
@@ -33,9 +32,7 @@ def test_get_metadata(enricher, output, expected, mocker):

    result = enricher.get_metadata("test.jpg")
    assert result == expected
-    mock_run.assert_called_once_with(
-        ["exiftool", "test.jpg"], capture_output=True, text=True
-    )
+    mock_run.assert_called_once_with(["exiftool", "test.jpg"], capture_output=True, text=True)


 def test_get_metadata_exiftool_not_found(enricher, mocker):
@@ -85,4 +82,3 @@ def test_metadata_pickle(enricher, unpickle, mocker):
    actual_media = metadata.media
    assert len(expected_media) == len(actual_media)
    assert actual_media[0].properties.get("metadata") == expected_media[0].properties.get("metadata")
-
--- a/tests/enrichers/test_pdq_hash_enricher.py
+++ b/tests/enrichers/test_pdq_hash_enricher.py
@@ -57,7 +57,7 @@ def test_enrich_handles_corrupted_image(metadata_with_images, mocker):
        ("screenshot", False),
        ("warc-file-123", False),
        ("regular-image", True),
-    ]
+    ],
 )
 def test_enrich_excludes_by_filetype(media_id, should_have_hash, mocker):
    metadata = Metadata()
@@ -73,4 +73,3 @@ def test_enrich_excludes_by_filetype(media_id, should_have_hash, mocker):

    media_item = metadata.media[0]
    assert (media_item.get("pdq_hash") is not None) == should_have_hash
-
--- a/tests/enrichers/test_screenshot_enricher.py
+++ b/tests/enrichers/test_screenshot_enricher.py
@@ -19,9 +19,11 @@ def mock_selenium_env(mocker):
    mock_popen = mocker.patch("subprocess.Popen")
    mock_is_connectable = mocker.patch("selenium.webdriver.common.service.Service.is_connectable", return_value=True)
    mock_firefox_options = mocker.patch("selenium.webdriver.FirefoxOptions")
+
    # Define side effect for `shutil.which`
    def mock_which_side_effect(dep):
        return "/mock/geckodriver" if dep == "geckodriver" else None
+
    mock_which.side_effect = mock_which_side_effect

    # Mock binary paths
@@ -104,13 +106,7 @@ def test_enrich_adds_screenshot(
    ],
 )
 def test_enrich_auth_wall(
-    screenshot_enricher,
-    metadata_with_video,
-    mock_selenium_env,
-    common_patches,
-    url,
-    is_auth,
-    mocker
+    screenshot_enricher, metadata_with_video, mock_selenium_env, common_patches, url, is_auth, mocker
 ):
    # Testing with and without is_auth_wall
    mock_driver, mock_driver_class, _ = mock_selenium_env
@@ -128,9 +124,7 @@ def test_enrich_auth_wall(
        assert metadata_with_video.media[1].properties.get("id") == "screenshot"


-def test_handle_timeout_exception(
-    screenshot_enricher, metadata_with_video, mock_selenium_env, mocker
-):
+def test_handle_timeout_exception(screenshot_enricher, metadata_with_video, mock_selenium_env, mocker):
    mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env

    mock_driver.get.side_effect = TimeoutException
@@ -140,9 +134,7 @@ def test_handle_timeout_exception(
    assert len(metadata_with_video.media) == 1


-def test_handle_general_exception(
-    screenshot_enricher, metadata_with_video, mock_selenium_env, mocker
-):
+def test_handle_general_exception(screenshot_enricher, metadata_with_video, mock_selenium_env, mocker):
    """Test proper handling of unexpected general exceptions"""
    mock_driver, mock_driver_class, mock_options_instance = mock_selenium_env
    # Simulate a generic exception when save_screenshot is called
@@ -152,9 +144,7 @@ def test_handle_general_exception(
    mock_log = mocker.patch("loguru.logger.error")
    screenshot_enricher.enrich(metadata_with_video)
    # Verify that the exception was logged with the log
-    mock_log.assert_called_once_with(
-        "Got error while loading webdriver for screenshot enricher: Unexpected Error"
-    )
+    mock_log.assert_called_once_with("Got error while loading webdriver for screenshot enricher: Unexpected Error")
    # And no new media was added due to the error
    assert len(metadata_with_video.media) == 1

--- a/tests/enrichers/test_ssl_enricher.py
+++ b/tests/enrichers/test_ssl_enricher.py
@@ -51,4 +51,3 @@ def test_ssl_error_handling(enricher, metadata, mocker):
    mocker.patch("ssl.get_server_certificate", side_effect=ssl.SSLError("SSL error"))
    with pytest.raises(ssl.SSLError, match="SSL error"):
        enricher.enrich(metadata)
-
--- a/tests/enrichers/test_thumbnail_enricher.py
+++ b/tests/enrichers/test_thumbnail_enricher.py
@@ -25,7 +25,7 @@ def mock_ffmpeg_environment(mocker):
    # Mocking all the ffmpeg calls in one place
    mock_ffmpeg_input = mocker.patch("ffmpeg.input")
    mock_makedirs = mocker.patch("os.makedirs")
-    mocker.patch.object(Media, "is_video", return_value=True),
+    (mocker.patch.object(Media, "is_video", return_value=True),)
    mock_probe = mocker.patch(
        "ffmpeg.probe",
        return_value={
@@ -35,9 +35,7 @@ def mock_ffmpeg_environment(mocker):
        },
    )
    mock_output = mocker.MagicMock()
-    mock_ffmpeg_input.return_value.filter.return_value.output.return_value = (
-        mock_output
-    )
+    mock_ffmpeg_input.return_value.filter.return_value.output.return_value = mock_output

    return {
        "mock_ffmpeg_input": mock_ffmpeg_input,
@@ -47,14 +45,21 @@ def mock_ffmpeg_environment(mocker):
    }


-@pytest.mark.parametrize("thumbnails_per_minute, max_thumbnails, expected_count", [
-    (10, 5, 5),  # Capped at max_thumbnails
-    (1, 10, 2),  # Less than max_thumbnails
-    (60, 7, 7),  # Matches exactly
-])
+@pytest.mark.parametrize(
+    "thumbnails_per_minute, max_thumbnails, expected_count",
+    [
+        (10, 5, 5),  # Capped at max_thumbnails
+        (1, 10, 2),  # Less than max_thumbnails
+        (60, 7, 7),  # Matches exactly
+    ],
+)
 def test_enrich_thumbnail_limits(
-    thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment,
-    thumbnails_per_minute, max_thumbnails, expected_count
+    thumbnail_enricher,
+    metadata_with_video,
+    mock_ffmpeg_environment,
+    thumbnails_per_minute,
+    max_thumbnails,
+    expected_count,
 ):
    thumbnail_enricher.thumbnails_per_minute = thumbnails_per_minute
    thumbnail_enricher.max_thumbnails = max_thumbnails
@@ -65,8 +70,8 @@ def test_enrich_thumbnail_limits(
    thumbnails = metadata_with_video.media[0].get("thumbnails")
    assert len(thumbnails) == expected_count

-def test_enrich_handles_probe_failure(thumbnail_enricher, metadata_with_video, mocker):

+def test_enrich_handles_probe_failure(thumbnail_enricher, metadata_with_video, mocker):
    mocker.patch("ffmpeg.probe", side_effect=Exception("Probe error"))
    mocker.patch("os.makedirs")
    mock_logger = mocker.patch("loguru.logger.error")
@@ -74,36 +79,43 @@ def test_enrich_handles_probe_failure(thumbnail_enricher, metadata_with_video, m

    thumbnail_enricher.enrich(metadata_with_video)
    # Ensure error was logged
-    mock_logger.assert_called_with(
-        f"error getting duration of video video.mp4: Probe error"
-    )
+    mock_logger.assert_called_with(f"error getting duration of video video.mp4: Probe error")
    # Ensure no thumbnails were created
    thumbnails = metadata_with_video.media[0].get("thumbnails")
    assert thumbnails is None


 def test_enrich_skips_non_video_files(thumbnail_enricher, metadata_with_video, mocker):
-        mocker.patch.object(Media, "is_video", return_value=False)
-        mock_ffmpeg = mocker.patch("ffmpeg.input")
-        thumbnail_enricher.enrich(metadata_with_video)
-        mock_ffmpeg.assert_not_called()
+    mocker.patch.object(Media, "is_video", return_value=False)
+    mock_ffmpeg = mocker.patch("ffmpeg.input")
+    thumbnail_enricher.enrich(metadata_with_video)
+    mock_ffmpeg.assert_not_called()


-@pytest.mark.parametrize("thumbnails_per_minute,max_thumbnails,expected_count", [
-    (60, 5, 5), # caught by max
-    (60, 20, 10), # caught by t/min
-    (0, 20, 1), # test min caught (1)
-    (11, 20, 1), # test min caught (1)
-    (12, 20, 2), # test caught by t/min
-])
+@pytest.mark.parametrize(
+    "thumbnails_per_minute,max_thumbnails,expected_count",
+    [
+        (60, 5, 5),  # caught by max
+        (60, 20, 10),  # caught by t/min
+        (0, 20, 1),  # test min caught (1)
+        (11, 20, 1),  # test min caught (1)
+        (12, 20, 2),  # test caught by t/min
+    ],
+)
 def test_enrich_handles_short_video(
-    thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment, thumbnails_per_minute, max_thumbnails, expected_count, mocker
+    thumbnail_enricher,
+    metadata_with_video,
+    mock_ffmpeg_environment,
+    thumbnails_per_minute,
+    max_thumbnails,
+    expected_count,
+    mocker,
 ):
    # override mock duration
    fake_duration = 10
    mocker.patch(
        "ffmpeg.probe",
-        return_value={ "streams": [{"codec_type": "video", "duration": str(fake_duration)}]},
+        return_value={"streams": [{"codec_type": "video", "duration": str(fake_duration)}]},
    )
    thumbnail_enricher.thumbnails_per_minute = thumbnails_per_minute
    thumbnail_enricher.max_thumbnails = max_thumbnails
@@ -114,9 +126,7 @@ def test_enrich_handles_short_video(
    assert len(thumbnails) == expected_count


-def test_uses_existing_duration(
-    thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment
-):
+def test_uses_existing_duration(thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment):
    metadata_with_video.media[0].set("duration", 60)
    thumbnail_enricher.enrich(metadata_with_video)
    mock_ffmpeg_environment["mock_probe"].assert_not_called()
@@ -125,7 +135,7 @@ def test_uses_existing_duration(

 def test_enrich_metadata_structure(thumbnail_enricher, metadata_with_video, mock_ffmpeg_environment, mocker):
    fake_duration = 120
-    mocker.patch("ffmpeg.probe", return_value={'streams': [{'codec_type': 'video', 'duration': str(fake_duration)}]})
+    mocker.patch("ffmpeg.probe", return_value={"streams": [{"codec_type": "video", "duration": str(fake_duration)}]})
    thumbnail_enricher.thumbnails_per_minute = 2
    thumbnail_enricher.max_thumbnails = 4

--- a/tests/enrichers/test_wayback_enricher.py
+++ b/tests/enrichers/test_wayback_enricher.py
@@ -8,34 +8,43 @@ from auto_archiver.core import Metadata
@pytest.fixture
 def mock_is_auth_wall(mocker):
    """Fixture to mock is_auth_wall behavior."""
+
    def _mock_is_auth_wall(return_value: bool):
        return mocker.patch("auto_archiver.utils.url.is_auth_wall", return_value=return_value)
+
    return _mock_is_auth_wall

+
@pytest.fixture
 def mock_post_success(mocker):
    """Fixture to mock POST requests with a successful response."""
+
    def _mock_post(json_data: dict = None, status_code: int = 200):
        json_data = {"job_id": "job123"} if json_data is None else json_data
        resp = mocker.Mock(status_code=status_code)
        resp.json.return_value = json_data
        return mocker.patch("requests.post", return_value=resp)
+
    return _mock_post

+
@pytest.fixture
 def mock_get_success(mocker):
    """Fixture to mock GET requests returning a completed archive status."""
+
    def _mock_get(json_data: dict = None, status_code: int = 200):
        json_data = json_data or {
            "status": "success",
            "timestamp": "20250101010101",
-            "original_url": "https://example.com"
+            "original_url": "https://example.com",
        }
        resp = mocker.Mock(status_code=status_code)
        resp.json.return_value = json_data
        return mocker.patch("requests.get", return_value=resp)
+
    return _mock_get

+
@pytest.fixture
 def wayback_extractor_enricher(setup_module) -> WaybackExtractorEnricher:
    configs: dict = {
@@ -49,12 +58,7 @@ def wayback_extractor_enricher(setup_module) -> WaybackExtractorEnricher:
    return setup_module("wayback_extractor_enricher", configs)


-def test_download_success(
-    wayback_extractor_enricher,
-    mock_is_auth_wall,
-    mock_post_success,
-    mock_get_success
-):
+def test_download_success(wayback_extractor_enricher, mock_is_auth_wall, mock_post_success, mock_get_success):
    mock_is_auth_wall(False)
    mock_post_success()
    mock_get_success()
@@ -63,34 +67,28 @@ def test_download_success(
    result = wayback_extractor_enricher.download(metadata)
    assert result.get("wayback") == "https://web.archive.org/web/20250101010101/https://example.com"

+
 def test_enrich_auth_wall(wayback_extractor_enricher, metadata, mock_is_auth_wall):
    mock_is_auth_wall(True)
    result = wayback_extractor_enricher.enrich(metadata)
    assert result is None

+
 def test_enrich_already_enriched(wayback_extractor_enricher, metadata):
    metadata.set("wayback", "existing")
    result = wayback_extractor_enricher.enrich(metadata)
    assert result is True

-def test_enrich_post_failure(
-    wayback_extractor_enricher,
-        metadata,
-    mock_is_auth_wall,
-    mock_post_success
-):
+
+def test_enrich_post_failure(wayback_extractor_enricher, metadata, mock_is_auth_wall, mock_post_success):
    mock_is_auth_wall(False)
    mock_post_success(json_data={"error": "server error"}, status_code=500)
    result = wayback_extractor_enricher.enrich(metadata)
    assert result is False
    assert "Internet archive failed with status of 500" in metadata.get("wayback")

-def test_enrich_post_json_decode_error(
-    wayback_extractor_enricher,
-        metadata,
-    mock_is_auth_wall,
-    mocker
-):
+
+def test_enrich_post_json_decode_error(wayback_extractor_enricher, metadata, mock_is_auth_wall, mocker):
    mock_is_auth_wall(False)
    resp = mocker.Mock(status_code=200)
    resp.json.side_effect = json.decoder.JSONDecodeError("msg", "doc", 0)
@@ -98,22 +96,15 @@ def test_enrich_post_json_decode_error(
    mocker.patch("requests.post", return_value=resp)
    assert wayback_extractor_enricher.enrich(metadata) is False

-def test_enrich_no_job_id(
-    wayback_extractor_enricher,
-        metadata,
-    mock_is_auth_wall,
-    mock_post_success
-):
+
+def test_enrich_no_job_id(wayback_extractor_enricher, metadata, mock_is_auth_wall, mock_post_success):
    mock_is_auth_wall(False)
    mock_post_success(json_data={})
    assert wayback_extractor_enricher.enrich(metadata) is False

+
 def test_enrich_get_success(
-    wayback_extractor_enricher,
-        metadata,
-    mock_is_auth_wall,
-    mock_post_success,
-    mock_get_success
+    wayback_extractor_enricher, metadata, mock_is_auth_wall, mock_post_success, mock_get_success
 ):
    mock_is_auth_wall(False)
    mock_post_success()
@@ -122,24 +113,18 @@ def test_enrich_get_success(
    assert metadata.get("wayback") == "https://web.archive.org/web/20250101010101/https://example.com"
    assert metadata.get("check wayback") == "https://web.archive.org/web/*/https://example.com"

+
 def test_enrich_get_failure(
-    wayback_extractor_enricher,
-        metadata,
-    mock_is_auth_wall,
-    mock_post_success,
-    mock_get_success
+    wayback_extractor_enricher, metadata, mock_is_auth_wall, mock_post_success, mock_get_success
 ):
    mock_is_auth_wall(False)
    mock_post_success()
    mock_get_success(json_data={"status": "failed"}, status_code=400)
    assert wayback_extractor_enricher.enrich(metadata) is False

+
 def test_enrich_get_request_exception(
-    wayback_extractor_enricher,
-        metadata,
-    mock_is_auth_wall,
-    mock_post_success,
-    mocker
+    wayback_extractor_enricher, metadata, mock_is_auth_wall, mock_post_success, mocker
 ):
    mock_is_auth_wall(False)
    mock_post_success()
@@ -149,12 +134,9 @@ def test_enrich_get_request_exception(
    assert wayback_extractor_enricher.enrich(metadata) is True
    assert metadata.get("wayback").get("job_id") == "job123"

+
 def test_enrich_get_json_decode_error(
-    wayback_extractor_enricher,
-        metadata,
-    mock_is_auth_wall,
-    mock_post_success,
-    mocker
+    wayback_extractor_enricher, metadata, mock_is_auth_wall, mock_post_success, mocker
 ):
    mock_is_auth_wall(False)
    mock_post_success()
--- a/tests/enrichers/test_whisper_enricher.py
+++ b/tests/enrichers/test_whisper_enricher.py
@@ -16,7 +16,7 @@ def enricher(mocker):
        "include_srt": False,
        "timeout": 5,
        "action": "translate",
-        "steps": {"storages": ["s3_storage"]}
+        "steps": {"storages": ["s3_storage"]},
    }
    mock_s3 = mocker.MagicMock(spec=S3Storage)
    mock_s3.get_cdn_url.return_value = TEST_S3_URL
@@ -25,7 +25,7 @@ def enricher(mocker):
    instance.display_name = "Whisper Enricher"
    instance.config_setup({instance.name: config})
    # bypassing the setup method and mocking S3 setup
-    instance.stores = config['steps']['storages']
+    instance.stores = config["steps"]["storages"]
    instance.s3 = mock_s3
    yield instance, mock_s3

@@ -63,19 +63,14 @@ def test_successful_job_submission(enricher, metadata, mock_requests, mocker):
    # Mock the complete API interaction chain
    mock_status_response = mocker.MagicMock()
    mock_status_response.status_code = 200
-    mock_status_response.json.return_value = {
-        "status": "success",
-        "meta": {}
-    }
+    mock_status_response.json.return_value = {"status": "success", "meta": {}}
    mock_artifacts_response = mocker.MagicMock()
    mock_artifacts_response.status_code = 200
-    mock_artifacts_response.json.return_value = [{
-        "data": [{"start": 0, "end": 5, "text": "test transcript"}]
-    }]
+    mock_artifacts_response.json.return_value = [{"data": [{"start": 0, "end": 5, "text": "test transcript"}]}]
    # Set up mock response sequence
    mock_requests.get.side_effect = [
        mock_status_response,  # First call: status check
-        mock_artifacts_response  # Second call: artifacts check
+        mock_artifacts_response,  # Second call: artifacts check
    ]

    # Run enrichment (without opening file)
@@ -84,15 +79,17 @@ def test_successful_job_submission(enricher, metadata, mock_requests, mocker):
    mock_requests.post.assert_called_once_with(
        "http://testapi/jobs",
        json={"url": "http://cdn.example.com/test.mp4", "type": "translate"},
-        headers={"Authorization": "Bearer whisper-key"}
+        headers={"Authorization": "Bearer whisper-key"},
    )
    # Verify job status checks
    assert mock_requests.get.call_count == 2
    assert "artifact_0_text" in metadata.media[0].get("whisper_model")
-    assert metadata.media[0].get("whisper_model") == {'artifact_0_text': 'test transcript',
-                                                      'job_artifacts_check': 'http://testapi/jobs/job123/artifacts',
-                                                      'job_id': 'job123',
-                                                      'job_status_check': 'http://testapi/jobs/job123'}
+    assert metadata.media[0].get("whisper_model") == {
+        "artifact_0_text": "test transcript",
+        "job_artifacts_check": "http://testapi/jobs/job123/artifacts",
+        "job_id": "job123",
+        "job_status_check": "http://testapi/jobs/job123",
+    }


 def test_submit_job(enricher, mocker):
--- a/tests/extractors/test_extractor_base.py
+++ b/tests/extractors/test_extractor_base.py
@@ -7,7 +7,6 @@ from auto_archiver.core.extractor import Extractor


 class TestExtractorBase(object):
-
    extractor_module: str = None
    config: dict = None

@@ -17,7 +16,7 @@ class TestExtractorBase(object):
        assert self.config is not None, "self.config must be a dict set on the subclass"

        self.extractor: Type[Extractor] = setup_module(self.extractor_module, self.config)
-    
+
    def assertValidResponseMetadata(self, test_response: Metadata, title: str, timestamp: str, status: str = ""):
        assert test_response is not False

--- a/tests/extractors/test_generic_extractor.py
+++ b/tests/extractors/test_generic_extractor.py
@@ -9,26 +9,28 @@ import pytest
 from auto_archiver.modules.generic_extractor.generic_extractor import GenericExtractor
 from .test_extractor_base import TestExtractorBase

-CI=os.getenv("GITHUB_ACTIONS", '') == 'true'
+CI = os.getenv("GITHUB_ACTIONS", "") == "true"
+
+
 class TestGenericExtractor(TestExtractorBase):
-    """Tests Generic Extractor
-    """
-    extractor_module = 'generic_extractor'
+    """Tests Generic Extractor"""
+
+    extractor_module = "generic_extractor"
    extractor: GenericExtractor

    config = {
-        'subtitles': False,
-        'comments': False,
-        'livestreams': False,
-        'live_from_start': False,
-        'end_means_success': True,
-        'allow_playlist': False,
-        'max_downloads': "inf",
-        'proxy': None,
-        'cookies_from_browser': False,
-        'cookie_file': None,
-        }
-    
+        "subtitles": False,
+        "comments": False,
+        "livestreams": False,
+        "live_from_start": False,
+        "end_means_success": True,
+        "allow_playlist": False,
+        "max_downloads": "inf",
+        "proxy": None,
+        "cookies_from_browser": False,
+        "cookie_file": None,
+    }
+
    def test_load_dropin(self):
        # test loading dropins that are in the generic_archiver package
        package = "auto_archiver.modules.generic_extractor"
@@ -38,21 +40,26 @@ class TestGenericExtractor(TestExtractorBase):
        path = os.path.join(dirname(dirname(__file__)), "data/")
        assert self.extractor.dropin_for_name("dropin", additional_paths=[path])

-
-
-    @pytest.mark.parametrize("url, is_suitable", [
-        ("https://www.youtube.com/watch?v=5qap5aO4i9A", True),
-        ("https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970?lang=en", True),
-        ("https://www.instagram.com/p/CU1J9JYJ9Zz/", True),
-        ("https://www.facebook.com/nytimes/videos/10160796550110716", True),
-        ("https://www.twitch.tv/videos/1167226570", True),
-        ("https://bellingcat.com/news/2021/10/08/ukrainian-soldiers-are-being-killed-by-landmines-in-the-donbas/", True),
-        ("https://google.com", True)])
+    @pytest.mark.parametrize(
+        "url, is_suitable",
+        [
+            ("https://www.youtube.com/watch?v=5qap5aO4i9A", True),
+            ("https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970?lang=en", True),
+            ("https://www.instagram.com/p/CU1J9JYJ9Zz/", True),
+            ("https://www.facebook.com/nytimes/videos/10160796550110716", True),
+            ("https://www.twitch.tv/videos/1167226570", True),
+            (
+                "https://bellingcat.com/news/2021/10/08/ukrainian-soldiers-are-being-killed-by-landmines-in-the-donbas/",
+                True,
+            ),
+            ("https://google.com", True),
+        ],
+    )
    def test_suitable_urls(self, make_item, url, is_suitable):
        """
-            Note: expected behaviour is to return True for all URLs, as YoutubeDLArchiver should be able to handle all URLs
-            This behaviour may be changed in the future (e.g. if we want the youtubedl archiver to just handle URLs it has extractors for,
-            and then if and only if all archivers fails, does it fall back to the generic archiver)
+        Note: expected behaviour is to return True for all URLs, as YoutubeDLArchiver should be able to handle all URLs
+        This behaviour may be changed in the future (e.g. if we want the youtubedl archiver to just handle URLs it has extractors for,
+        and then if and only if all archivers fails, does it fall back to the generic archiver)
        """
        assert self.extractor.suitable(url) == is_suitable

@@ -63,11 +70,14 @@ class TestGenericExtractor(TestExtractorBase):
        assert result.get_url() == "https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970"

    @pytest.mark.download
-    @pytest.mark.parametrize("url", [
-        "https://bsky.app/profile/colborne.bsky.social/post/3lcxcpgt6j42l",
-        "twitter.com/bellingcat/status/123",
-        "https://www.youtube.com/watch?v=1"
-    ])
+    @pytest.mark.parametrize(
+        "url",
+        [
+            "https://bsky.app/profile/colborne.bsky.social/post/3lcxcpgt6j42l",
+            "twitter.com/bellingcat/status/123",
+            "https://www.youtube.com/watch?v=1",
+        ],
+    )
    def test_download_nonexistent_media(self, make_item, url):
        """
        Test to make sure that the extractor doesn't break on non-existend posts/media
@@ -78,7 +88,10 @@ class TestGenericExtractor(TestExtractorBase):
        result = self.extractor.download(item)
        assert not result

-    @pytest.mark.skipif(CI, reason="Currently no way to authenticate when on CI. Youtube (yt-dlp) doesn't support logging in with username/password.")
+    @pytest.mark.skipif(
+        CI,
+        reason="Currently no way to authenticate when on CI. Youtube (yt-dlp) doesn't support logging in with username/password.",
+    )
    @pytest.mark.download
    def test_youtube_download(self, make_item):
        # url https://www.youtube.com/watch?v=5qap5aO4i9A
@@ -87,7 +100,10 @@ class TestGenericExtractor(TestExtractorBase):
        result = self.extractor.download(item)
        assert result.get_url() == "https://www.youtube.com/watch?v=J---aiyznGQ"
        assert result.get_title() == "Keyboard Cat! - THE ORIGINAL!"
-        assert result.get('description') == "Buy NEW Keyboard Cat Merch! https://keyboardcat.creator-spring.com\n\nxo Keyboard Cat memes make your day better!\nhttp://www.keyboardcatstore.com/\nhttps://www.facebook.com/thekeyboardcat\nhttp://www.charlieschmidt.com/"
+        assert (
+            result.get("description")
+            == "Buy NEW Keyboard Cat Merch! https://keyboardcat.creator-spring.com\n\nxo Keyboard Cat memes make your day better!\nhttp://www.keyboardcatstore.com/\nhttps://www.facebook.com/thekeyboardcat\nhttp://www.charlieschmidt.com/"
+        )
        assert len(result.media) == 2
        assert Path(result.media[0].filename).name == "J---aiyznGQ.webm"
        assert Path(result.media[1].filename).name == "hqdefault.jpg"
@@ -103,7 +119,7 @@ class TestGenericExtractor(TestExtractorBase):
        item = make_item("https://bsky.app/profile/bellingcat.com/post/3lfn3hbcxgc2q")
        result = self.extractor.download(item)
        assert result is not False
-    
+
    @pytest.mark.download
    def test_bluesky_download_no_media(self, make_item):
        item = make_item("https://bsky.app/profile/bellingcat.com/post/3lfphwmcs4c2z")
@@ -115,7 +131,7 @@ class TestGenericExtractor(TestExtractorBase):
        item = make_item("https://bsky.app/profile/bellingcat.com/post/3le2l4gsxlk2i")
        result = self.extractor.download(item)
        assert result is not False
-    
+
    @pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
    @pytest.mark.download
    def test_truthsocial_download_video(self, make_item):
@@ -130,14 +146,14 @@ class TestGenericExtractor(TestExtractorBase):
        item = make_item("https://truthsocial.com/@bbcnewa/posts/109598702184774628")
        result = self.extractor.download(item)
        assert result is not False
-    
+
    @pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
    @pytest.mark.download
    def test_truthsocial_download_poll(self, make_item):
        item = make_item("https://truthsocial.com/@CNN_US/posts/113724326568555098")
        result = self.extractor.download(item)
        assert result is not False
-    
+
    @pytest.mark.skipif(CI, reason="Truth social blocks GH actions.")
    @pytest.mark.download
    def test_truthsocial_download_single_image(self, make_item):
@@ -159,7 +175,7 @@ class TestGenericExtractor(TestExtractorBase):
        url = "https://x.com/Bellingcat/status/17197025860711058"
        response = self.extractor.download(make_item(url))
        assert not response
-    
+
    @pytest.mark.download
    def test_twitter_download_malformed_tweetid(self, make_item):
        # this tweet does not exist
@@ -169,7 +185,6 @@ class TestGenericExtractor(TestExtractorBase):

    @pytest.mark.download
    def test_twitter_download_tweet_no_media(self, make_item):
-        
        item = make_item("https://twitter.com/MeCookieMonster/status/1617921633456640001?s=20&t=3d0g4ZQis7dCbSDg-mE7-w")
        post = self.extractor.download(item)

@@ -177,9 +192,9 @@ class TestGenericExtractor(TestExtractorBase):
            post,
            "Onion rings are just vegetable donuts.",
            datetime.datetime(2023, 1, 24, 16, 25, 51, tzinfo=datetime.timezone.utc),
-            "yt-dlp_Twitter: success"
+            "yt-dlp_Twitter: success",
        )
-    
+
    @pytest.mark.download
    def test_twitter_download_video(self, make_item):
        url = "https://x.com/bellingcat/status/1871552600346415571"
@@ -187,26 +202,46 @@ class TestGenericExtractor(TestExtractorBase):
        self.assertValidResponseMetadata(
            post,
            "Bellingcat - This month's Bellingchat Premium is with @KolinaKoltai. She reveals how she investigated a platform allowing users to create AI-generated child sexual abuse material and explains why it's crucial to investigate the people behind these services",
-            datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc)
+            datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc),
        )

-    @pytest.mark.xfail(reason="Currently failing, sensitive content requires logged in users/cookies - not yet implemented")
+    @pytest.mark.xfail(
+        reason="Currently failing, sensitive content requires logged in users/cookies - not yet implemented"
+    )
    @pytest.mark.download
-    @pytest.mark.parametrize("url, title, timestamp, image_hash", [
-            ("https://x.com/SozinhoRamalho/status/1876710769913450647", "ignore tweet, testing sensitivity warning nudity", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), "image_hash"),
-            ("https://x.com/SozinhoRamalho/status/1876710875475681357", "ignore tweet, testing sensitivity warning violence", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), "image_hash"),
-            ("https://x.com/SozinhoRamalho/status/1876711053813227618", "ignore tweet, testing sensitivity warning sensitive", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), "image_hash"),
-            ("https://x.com/SozinhoRamalho/status/1876711141314801937", "ignore tweet, testing sensitivity warning nudity, violence, sensitivity", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc), "image_hash"),
-        ])
+    @pytest.mark.parametrize(
+        "url, title, timestamp, image_hash",
+        [
+            (
+                "https://x.com/SozinhoRamalho/status/1876710769913450647",
+                "ignore tweet, testing sensitivity warning nudity",
+                datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
+                "image_hash",
+            ),
+            (
+                "https://x.com/SozinhoRamalho/status/1876710875475681357",
+                "ignore tweet, testing sensitivity warning violence",
+                datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
+                "image_hash",
+            ),
+            (
+                "https://x.com/SozinhoRamalho/status/1876711053813227618",
+                "ignore tweet, testing sensitivity warning sensitive",
+                datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
+                "image_hash",
+            ),
+            (
+                "https://x.com/SozinhoRamalho/status/1876711141314801937",
+                "ignore tweet, testing sensitivity warning nudity, violence, sensitivity",
+                datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
+                "image_hash",
+            ),
+        ],
+    )
    def test_twitter_download_sensitive_media(self, url, title, timestamp, image_hash, make_item):
-
        """Download tweets with sensitive media"""

        post = self.extractor.download(make_item(url))
-        self.assertValidResponseMetadata(
-            post,
-            title,
-            timestamp
-        )
+        self.assertValidResponseMetadata(post, title, timestamp)
        assert len(post.media) == 1
-        assert post.media[0].hash == image_hash
+        assert post.media[0].hash == image_hash
--- a/tests/extractors/test_instagram_api_extractor.py
+++ b/tests/extractors/test_instagram_api_extractor.py
@@ -15,10 +15,11 @@ def mock_user_response():
            "username": "test_user",
            "full_name": "Test User",
            "profile_pic_url_hd": "http://example.com/profile.jpg",
-            "profile_pic_url": "http://example.com/profile_lowres.jpg"
+            "profile_pic_url": "http://example.com/profile_lowres.jpg",
        }
    }

+
@pytest.fixture
 def mock_post_response():
    return {
@@ -27,16 +28,14 @@ def mock_post_response():
        "caption_text": "Test Caption",
        "taken_at": datetime.now().timestamp(),
        "video_url": "http://example.com/video.mp4",
-        "thumbnail_url": "http://example.com/thumbnail.jpg"
+        "thumbnail_url": "http://example.com/thumbnail.jpg",
    }

+
@pytest.fixture
 def mock_story_response():
-    return [{
-        "id": "story_123",
-        "taken_at": datetime.now().timestamp(),
-        "video_url": "http://example.com/story.mp4"
-    }]
+    return [{"id": "story_123", "taken_at": datetime.now().timestamp(), "video_url": "http://example.com/story.mp4"}]
+

@pytest.fixture
 def mock_highlight_response():
@@ -46,11 +45,13 @@ def mock_highlight_response():
                "highlight:123": {
                    "id": "123",
                    "title": "Test Highlight",
-                    "items": [{
-                        "id": "item_123",
-                        "taken_at": datetime.now().timestamp(),
-                        "video_url": "http://example.com/highlight.mp4"
-                    }]
+                    "items": [
+                        {
+                            "id": "item_123",
+                            "taken_at": datetime.now().timestamp(),
+                            "video_url": "http://example.com/highlight.mp4",
+                        }
+                    ],
                }
            }
        }
@@ -81,24 +82,30 @@ class TestInstagramAPIExtractor(TestExtractorBase):
        m.set("netloc", "instagram.com")
        return m

-    @pytest.mark.parametrize("url,expected", [
-        ("https://instagram.com/user", [("", "user", "")]),
-        ("https://instagr.am/p/post_id", []),
-        ("https://youtube.com", []),
-        ("https://www.instagram.com/reel/reel_id", [("reel", "reel_id", "")]),
-        ("https://instagram.com/stories/highlights/123", [("stories/highlights", "123", "")]),
-        ("https://instagram.com/stories/user/123", [("stories", "user", "123")]),
-    ])
+    @pytest.mark.parametrize(
+        "url,expected",
+        [
+            ("https://instagram.com/user", [("", "user", "")]),
+            ("https://instagr.am/p/post_id", []),
+            ("https://youtube.com", []),
+            ("https://www.instagram.com/reel/reel_id", [("reel", "reel_id", "")]),
+            ("https://instagram.com/stories/highlights/123", [("stories/highlights", "123", "")]),
+            ("https://instagram.com/stories/user/123", [("stories", "user", "123")]),
+        ],
+    )
    def test_url_parsing(self, url, expected):
        assert self.extractor.valid_url.findall(url) == expected

    def test_initialize(self):
        assert self.extractor.api_endpoint[-1] != "/"

-    @pytest.mark.parametrize("input_dict,expected", [
-        ({"x": 0, "valid": "data"}, {"valid": "data"}),
-        ({"nested": {"y": None, "valid": [{}]}}, {"nested": {"valid": [{}]}}),
-    ])
+    @pytest.mark.parametrize(
+        "input_dict,expected",
+        [
+            ({"x": 0, "valid": "data"}, {"valid": "data"}),
+            ({"nested": {"y": None, "valid": [{}]}}, {"nested": {"valid": [{}]}}),
+        ],
+    )
    def test_cleanup_dict(self, input_dict, expected):
        assert self.extractor.cleanup_dict(input_dict) == expected

@@ -114,8 +121,8 @@ class TestInstagramAPIExtractor(TestExtractorBase):

    def test_download_profile_basic(self, metadata, mock_user_response, mocker):
        """Test basic profile download without full_profile"""
-        mock_call = mocker.patch.object(self.extractor, 'call_api')
-        mock_download = mocker.patch.object(self.extractor, 'download_from_url')
+        mock_call = mocker.patch.object(self.extractor, "call_api")
+        mock_download = mocker.patch.object(self.extractor, "download_from_url")
        # Mock API responses
        mock_call.return_value = mock_user_response
        mock_download.return_value = "profile.jpg"
@@ -132,17 +139,14 @@ class TestInstagramAPIExtractor(TestExtractorBase):

    def test_download_profile_full(self, metadata, mock_user_response, mock_story_response, mocker):
        """Test full profile download with stories/posts"""
-        mock_call = mocker.patch.object(self.extractor, 'call_api')
-        mock_posts = mocker.patch.object(self.extractor, 'download_all_posts')
-        mock_highlights = mocker.patch.object(self.extractor, 'download_all_highlights')
-        mock_tagged = mocker.patch.object(self.extractor, 'download_all_tagged')
-        mock_stories = mocker.patch.object(self.extractor, '_download_stories_reusable')
+        mock_call = mocker.patch.object(self.extractor, "call_api")
+        mock_posts = mocker.patch.object(self.extractor, "download_all_posts")
+        mock_highlights = mocker.patch.object(self.extractor, "download_all_highlights")
+        mock_tagged = mocker.patch.object(self.extractor, "download_all_tagged")
+        mock_stories = mocker.patch.object(self.extractor, "_download_stories_reusable")

        self.extractor.full_profile = True
-        mock_call.side_effect = [
-            mock_user_response,
-            mock_story_response
-        ]
+        mock_call.side_effect = [mock_user_response, mock_story_response]
        mock_highlights.return_value = None
        mock_stories.return_value = mock_story_response
        mock_posts.return_value = None
@@ -155,7 +159,7 @@ class TestInstagramAPIExtractor(TestExtractorBase):

    def test_download_profile_not_found(self, metadata, mocker):
        """Test profile not found error"""
-        mock_call = mocker.patch.object(self.extractor, 'call_api')
+        mock_call = mocker.patch.object(self.extractor, "call_api")
        mock_call.return_value = {"user": None}
        with pytest.raises(AssertionError) as exc_info:
            self.extractor.download_profile(metadata, "invalid_user")
@@ -163,18 +167,14 @@ class TestInstagramAPIExtractor(TestExtractorBase):

    def test_download_profile_error_handling(self, metadata, mock_user_response, mocker):
        """Test error handling in full profile mode"""
-        mock_call = mocker.patch.object(self.extractor, 'call_api')
-        mock_highlights = mocker.patch.object(self.extractor, 'download_all_highlights')
-        mock_tagged = mocker.patch.object(self.extractor, 'download_all_tagged')
-        stories_tagged = mocker.patch.object(self.extractor, '_download_stories_reusable')
-        mock_posts = mocker.patch.object(self.extractor, 'download_all_posts')
+        mock_call = mocker.patch.object(self.extractor, "call_api")
+        mock_highlights = mocker.patch.object(self.extractor, "download_all_highlights")
+        mock_tagged = mocker.patch.object(self.extractor, "download_all_tagged")
+        stories_tagged = mocker.patch.object(self.extractor, "_download_stories_reusable")
+        mock_posts = mocker.patch.object(self.extractor, "download_all_posts")

        self.extractor.full_profile = True
-        mock_call.side_effect = [
-            mock_user_response,
-            Exception("Stories API failed"),
-            Exception("Posts API failed")
-        ]
+        mock_call.side_effect = [mock_user_response, Exception("Stories API failed"), Exception("Posts API failed")]
        mock_highlights.return_value = None
        mock_tagged.return_value = None
        stories_tagged.return_value = None
@@ -182,4 +182,4 @@ class TestInstagramAPIExtractor(TestExtractorBase):
        result = self.extractor.download_profile(metadata, "test_user")

        assert result.is_success()
-        assert "Error downloading stories for test_user" in result.metadata["errors"]
+        assert "Error downloading stories for test_user" in result.metadata["errors"]
--- a/tests/extractors/test_instagram_extractor.py
+++ b/tests/extractors/test_instagram_extractor.py
@@ -5,8 +5,7 @@ from auto_archiver.modules.instagram_extractor import InstagramExtractor

@pytest.fixture
 def instagram_extractor(setup_module, mocker):
-
-    extractor_module: str = 'instagram_extractor'
+    extractor_module: str = "instagram_extractor"
    config: dict = {
        "username": "user_name",
        "password": "password123",
@@ -17,20 +16,26 @@ def instagram_extractor(setup_module, mocker):
    fake_loader.load_session_from_file.return_value = None
    fake_loader.login.return_value = None
    fake_loader.save_session_to_file.return_value = None
-    mocker.patch("instaloader.Instaloader", return_value=fake_loader,)
+    mocker.patch(
+        "instaloader.Instaloader",
+        return_value=fake_loader,
+    )
    return setup_module(extractor_module, config)


-@pytest.mark.parametrize("url", [
-    "https://www.instagram.com/p/",
-    "https://www.instagram.com/p/1234567890/",
-    "https://www.instagram.com/reel/1234567890/",
-    "https://www.instagram.com/username/",
-    "https://www.instagram.com/username/stories/",
-    "https://www.instagram.com/username/highlights/",
-])
+@pytest.mark.parametrize(
+    "url",
+    [
+        "https://www.instagram.com/p/",
+        "https://www.instagram.com/p/1234567890/",
+        "https://www.instagram.com/reel/1234567890/",
+        "https://www.instagram.com/username/",
+        "https://www.instagram.com/username/stories/",
+        "https://www.instagram.com/username/highlights/",
+    ],
+)
 def test_regex_matches(url: str, instagram_extractor: InstagramExtractor) -> None:
    """
    Ensure that the valid_url regex matches all provided Instagram URLs.
    """
-    assert instagram_extractor.valid_url.match(url)
+    assert instagram_extractor.valid_url.match(url)
--- a/tests/extractors/test_instagram_tbot_extractor.py
+++ b/tests/extractors/test_instagram_tbot_extractor.py
@@ -9,8 +9,8 @@ from tests.extractors.test_extractor_base import TestExtractorBase

@pytest.fixture
 def patch_extractor_methods(request, setup_module, mocker):
-    mocker.patch.object(InstagramTbotExtractor, '_prepare_session_file', return_value=None)
-    mocker.patch.object(InstagramTbotExtractor, '_initialize_telegram_client', return_value=None)
+    mocker.patch.object(InstagramTbotExtractor, "_prepare_session_file", return_value=None)
+    mocker.patch.object(InstagramTbotExtractor, "_initialize_telegram_client", return_value=None)
    yield


@@ -35,12 +35,7 @@ def mock_telegram_client(mocker):
@pytest.fixture
 def extractor(setup_module, patch_extractor_methods, mocker):
    extractor_module = "instagram_tbot_extractor"
-    config = {
-        "api_id": 12345,
-        "api_hash": "test_api_hash",
-        "session_file": "test_session",
-        "timeout": 4
-    }
+    config = {"api_id": 12345, "api_hash": "test_api_hash", "session_file": "test_session", "timeout": 4}
    extractor = setup_module(extractor_module, config)
    extractor.client = mocker.MagicMock()
    extractor.session_file = "test_session"
@@ -79,21 +74,30 @@ class TestInstagramTbotExtractorReal(TestExtractorBase):
        "session_file": "secrets/anon-insta",
    }

-    @pytest.mark.parametrize("url, expected_status, message, len_media", [
-        ("https://www.instagram.com/p/C4QgLbrIKXG", "insta-via-bot: success",
-         "Are you new to Bellingcat? - The way we share our investigations is different. 💭\nWe want you to read our story but also learn ou",
-         6),
-        ("https://www.instagram.com/reel/DEVLK8qoIbg/", "insta-via-bot: success",
-         "Our volunteer community is at the centre of many incredible Bellingcat investigations and tools. Stephanie Ladel is one such vol",
-         3),
-        # instagram tbot not working (potentially intermittently?) for stories - replace with a live story to retest
-        # ("https://www.instagram.com/stories/bellingcatofficial/3556336382743057476/", False, "Media not found or unavailable"),
-        # Seems to be working intermittently for highlights
-        # ("https://www.instagram.com/stories/highlights/17868810693068139/", "insta-via-bot: success", None, 50),
-        # Marking invalid url as success
-        ("https://www.instagram.com/p/INVALID", "insta-via-bot: success", "Media not found or unavailable", 0),
-        ("https://www.youtube.com/watch?v=ymCMy8OffHM", False, None, 0),
-    ])
+    @pytest.mark.parametrize(
+        "url, expected_status, message, len_media",
+        [
+            (
+                "https://www.instagram.com/p/C4QgLbrIKXG",
+                "insta-via-bot: success",
+                "Are you new to Bellingcat? - The way we share our investigations is different. 💭\nWe want you to read our story but also learn ou",
+                6,
+            ),
+            (
+                "https://www.instagram.com/reel/DEVLK8qoIbg/",
+                "insta-via-bot: success",
+                "Our volunteer community is at the centre of many incredible Bellingcat investigations and tools. Stephanie Ladel is one such vol",
+                3,
+            ),
+            # instagram tbot not working (potentially intermittently?) for stories - replace with a live story to retest
+            # ("https://www.instagram.com/stories/bellingcatofficial/3556336382743057476/", False, "Media not found or unavailable"),
+            # Seems to be working intermittently for highlights
+            # ("https://www.instagram.com/stories/highlights/17868810693068139/", "insta-via-bot: success", None, 50),
+            # Marking invalid url as success
+            ("https://www.instagram.com/p/INVALID", "insta-via-bot: success", "Media not found or unavailable", 0),
+            ("https://www.youtube.com/watch?v=ymCMy8OffHM", False, None, 0),
+        ],
+    )
    def test_download(self, url, expected_status, message, len_media, metadata_sample):
        """Test the `download()` method with various Instagram URLs."""
        metadata_sample.set_url(url)
--- a/tests/extractors/test_twitter_api_extractor.py
+++ b/tests/extractors/test_twitter_api_extractor.py
@@ -10,8 +10,7 @@ from auto_archiver.modules.twitter_api_extractor import TwitterApiExtractor

@pytest.mark.incremental
 class TestTwitterApiExtractor(TestExtractorBase):
-
-    extractor_module = 'twitter_api_extractor'
+    extractor_module = "twitter_api_extractor"

    config = {
        "bearer_tokens": [],
@@ -22,41 +21,79 @@ class TestTwitterApiExtractor(TestExtractorBase):
        "access_secret": os.environ.get("TWITTER_ACCESS_SECRET"),
    }

-    @pytest.mark.parametrize("url, expected", [
-        ("https://x.com/bellingcat/status/1874097816571961839", "https://x.com/bellingcat/status/1874097816571961839"), # x.com urls unchanged
-        ("https://twitter.com/bellingcat/status/1874097816571961839", "https://twitter.com/bellingcat/status/1874097816571961839"), # twitter urls unchanged
-        ("https://twitter.com/bellingcat/status/1874097816571961839?s=20&t=3d0g4ZQis7dCbSDg-mE7-w", "https://twitter.com/bellingcat/status/1874097816571961839?s=20&t=3d0g4ZQis7dCbSDg-mE7-w"), # don't strip params from twitter urls (changed Jan 2025)
-        ("https://www.bellingcat.com/category/resources/", "https://www.bellingcat.com/category/resources/"), # non-twitter/x urls unchanged
-        ("https://www.bellingcat.com/category/resources/?s=20&t=3d0g4ZQis7dCbSDg-mE7-w", "https://www.bellingcat.com/category/resources/?s=20&t=3d0g4ZQis7dCbSDg-mE7-w"), # shouldn't strip params from non-twitter/x URLs
-    ])
+    @pytest.mark.parametrize(
+        "url, expected",
+        [
+            (
+                "https://x.com/bellingcat/status/1874097816571961839",
+                "https://x.com/bellingcat/status/1874097816571961839",
+            ),  # x.com urls unchanged
+            (
+                "https://twitter.com/bellingcat/status/1874097816571961839",
+                "https://twitter.com/bellingcat/status/1874097816571961839",
+            ),  # twitter urls unchanged
+            (
+                "https://twitter.com/bellingcat/status/1874097816571961839?s=20&t=3d0g4ZQis7dCbSDg-mE7-w",
+                "https://twitter.com/bellingcat/status/1874097816571961839?s=20&t=3d0g4ZQis7dCbSDg-mE7-w",
+            ),  # don't strip params from twitter urls (changed Jan 2025)
+            (
+                "https://www.bellingcat.com/category/resources/",
+                "https://www.bellingcat.com/category/resources/",
+            ),  # non-twitter/x urls unchanged
+            (
+                "https://www.bellingcat.com/category/resources/?s=20&t=3d0g4ZQis7dCbSDg-mE7-w",
+                "https://www.bellingcat.com/category/resources/?s=20&t=3d0g4ZQis7dCbSDg-mE7-w",
+            ),  # shouldn't strip params from non-twitter/x URLs
+        ],
+    )
    def test_sanitize_url(self, url, expected):
        assert expected == self.extractor.sanitize_url(url)

    @pytest.mark.download
    def test_sanitize_url_download(self):
-        assert "https://www.bellingcat.com/category/resources/" == self.extractor.sanitize_url("https://t.co/yl3oOJatFp")
+        assert "https://www.bellingcat.com/category/resources/" == self.extractor.sanitize_url(
+            "https://t.co/yl3oOJatFp"
+        )

-    @pytest.mark.parametrize("url, exptected_username, exptected_tweetid", [
-        ("https://twitter.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
-        ("https://x.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
-        ("https://www.bellingcat.com/category/resources/", False, False)
-        ])
+    @pytest.mark.parametrize(
+        "url, exptected_username, exptected_tweetid",
+        [
+            ("https://twitter.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
+            ("https://x.com/bellingcat/status/1874097816571961839", "bellingcat", "1874097816571961839"),
+            ("https://www.bellingcat.com/category/resources/", False, False),
+        ],
+    )
    def test_get_username_tweet_id_from_url(self, url, exptected_username, exptected_tweetid):
-    
        username, tweet_id = self.extractor.get_username_tweet_id(url)
        assert exptected_username == username
        assert exptected_tweetid == tweet_id

    def test_choose_variants(self):
        # taken from the response for url https://x.com/bellingcat/status/1871552600346415571
-        variant_list = [MediaVariant(content_type='application/x-mpegURL', url='https://video.twimg.com/ext_tw_video/1871551993677852672/pu/pl/ovWo7ux-bKROwYIC.m3u8?tag=12&v=e1b'),
-                        MediaVariant(bit_rate=256000, content_type='video/mp4', url='https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/480x270/OqZIrKV0LFswMvxS.mp4?tag=12'),
-                        MediaVariant(bit_rate=832000, content_type='video/mp4', url='https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/640x360/uiDZDSmZ8MZn9hsi.mp4?tag=12'),
-                        MediaVariant(bit_rate=2176000, content_type='video/mp4', url='https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/1280x720/6Y340Esh568WZnRZ.mp4?tag=12')
-                        ]
+        variant_list = [
+            MediaVariant(
+                content_type="application/x-mpegURL",
+                url="https://video.twimg.com/ext_tw_video/1871551993677852672/pu/pl/ovWo7ux-bKROwYIC.m3u8?tag=12&v=e1b",
+            ),
+            MediaVariant(
+                bit_rate=256000,
+                content_type="video/mp4",
+                url="https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/480x270/OqZIrKV0LFswMvxS.mp4?tag=12",
+            ),
+            MediaVariant(
+                bit_rate=832000,
+                content_type="video/mp4",
+                url="https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/640x360/uiDZDSmZ8MZn9hsi.mp4?tag=12",
+            ),
+            MediaVariant(
+                bit_rate=2176000,
+                content_type="video/mp4",
+                url="https://video.twimg.com/ext_tw_video/1871551993677852672/pu/vid/avc1/1280x720/6Y340Esh568WZnRZ.mp4?tag=12",
+            ),
+        ]
        chosen_variant = self.extractor.choose_variant(variant_list)
        assert chosen_variant == variant_list[3]
-    
+
    @pytest.mark.skipif(not os.environ.get("TWITTER_BEARER_TOKEN"), reason="No Twitter bearer token provided")
    @pytest.mark.download
    def test_download_nonexistent_tweet(self, make_item):
@@ -76,7 +113,6 @@ class TestTwitterApiExtractor(TestExtractorBase):
    @pytest.mark.skipif(not os.environ.get("TWITTER_BEARER_TOKEN"), reason="No Twitter bearer token provided")
    @pytest.mark.download
    def test_download_tweet_no_media(self, make_item):
-        
        item = make_item("https://twitter.com/MeCookieMonster/status/1617921633456640001?s=20&t=3d0g4ZQis7dCbSDg-mE7-w")
        post = self.extractor.download(item)

@@ -84,7 +120,7 @@ class TestTwitterApiExtractor(TestExtractorBase):
            post,
            "Onion rings are just vegetable donuts.",
            datetime.datetime(2023, 1, 24, 16, 25, 51, tzinfo=datetime.timezone.utc),
-            "twitter-api: success"
+            "twitter-api: success",
        )

    @pytest.mark.skipif(not os.environ.get("TWITTER_BEARER_TOKEN"), reason="No Twitter bearer token provided")
@@ -95,27 +131,41 @@ class TestTwitterApiExtractor(TestExtractorBase):
        self.assertValidResponseMetadata(
            post,
            "This month's Bellingchat Premium is with @KolinaKoltai. She reveals how she investigated a platform allowing users to create AI-generated child sexual abuse material and explains why it's crucial to investigate the people behind these services https://t.co/SfBUq0hSD0 https://t.co/rIHx0WlKp8",
-            datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc)
+            datetime.datetime(2024, 12, 24, 13, 44, 46, tzinfo=datetime.timezone.utc),
        )

    @pytest.mark.skipif(not os.environ.get("TWITTER_BEARER_TOKEN"), reason="No Twitter bearer token provided")
-    @pytest.mark.parametrize("url, title, timestamp", [
-            ("https://x.com/SozinhoRamalho/status/1876710769913450647", "ignore tweet, testing sensitivity warning nudity https://t.co/t3u0hQsSB1", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc)),
-            ("https://x.com/SozinhoRamalho/status/1876710875475681357", "ignore tweet, testing sensitivity warning violence https://t.co/syYDSkpjZD", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc)),
-            ("https://x.com/SozinhoRamalho/status/1876711053813227618", "ignore tweet, testing sensitivity warning sensitive https://t.co/XE7cRdjzYq", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc)),
-            ("https://x.com/SozinhoRamalho/status/1876711141314801937", "ignore tweet, testing sensitivity warning nudity, violence, sensitivity https://t.co/YxCFbbhYE3", datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc)),
-        ])
+    @pytest.mark.parametrize(
+        "url, title, timestamp",
+        [
+            (
+                "https://x.com/SozinhoRamalho/status/1876710769913450647",
+                "ignore tweet, testing sensitivity warning nudity https://t.co/t3u0hQsSB1",
+                datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
+            ),
+            (
+                "https://x.com/SozinhoRamalho/status/1876710875475681357",
+                "ignore tweet, testing sensitivity warning violence https://t.co/syYDSkpjZD",
+                datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
+            ),
+            (
+                "https://x.com/SozinhoRamalho/status/1876711053813227618",
+                "ignore tweet, testing sensitivity warning sensitive https://t.co/XE7cRdjzYq",
+                datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
+            ),
+            (
+                "https://x.com/SozinhoRamalho/status/1876711141314801937",
+                "ignore tweet, testing sensitivity warning nudity, violence, sensitivity https://t.co/YxCFbbhYE3",
+                datetime.datetime(2024, 12, 31, 14, 18, 33, tzinfo=datetime.timezone.utc),
+            ),
+        ],
+    )
    @pytest.mark.download
    def test_download_sensitive_media(self, url, title, timestamp, check_hash, make_item):
-
        """Download tweets with sensitive media"""

        post = self.extractor.download(make_item(url))
-        self.assertValidResponseMetadata(
-            post,
-            title,
-            timestamp
-        )
+        self.assertValidResponseMetadata(post, title, timestamp)
        assert len(post.media) == 1
        # check the SHA1 hash (quick) of the media, to make sure it's valid
-        check_hash(post.media[0].filename, "3eea9c03b2dcedd1eb9a169d8bfd1cf877996fab4961de019a96eb9d32d2d733")
+        check_hash(post.media[0].filename, "3eea9c03b2dcedd1eb9a169d8bfd1cf877996fab4961de019a96eb9d32d2d733")
--- a/tests/extractors/test_vk_extractor.py
+++ b/tests/extractors/test_vk_extractor.py
@@ -9,6 +9,7 @@ def mock_vk_scraper(mocker):
    """Fixture to mock VkScraper."""
    return mocker.patch("auto_archiver.modules.vk_extractor.vk_extractor.VkScraper")

+
@pytest.fixture
 def vk_extractor(setup_module, mock_vk_scraper) -> VkExtractor:
    """Fixture to initialize VkExtractor with mocked VkScraper."""
@@ -39,7 +40,7 @@ def test_vk_url_but_scrape_returns_empty(vk_extractor, metadata):
 def test_successful_scrape_and_download(vk_extractor, metadata, mocker):
    mock_scrapes = [
        {"text": "Post Title", "datetime": "2023-01-01T00:00:00", "id": 1},
-        {"text": "Another Post", "datetime": "2023-01-02T00:00:00", "id": 2}
+        {"text": "Another Post", "datetime": "2023-01-02T00:00:00", "id": 2},
    ]
    mock_filenames = ["image1.jpg", "image2.png"]
    vk_extractor.vks.scrape.return_value = mock_scrapes
@@ -56,16 +57,16 @@ def test_successful_scrape_and_download(vk_extractor, metadata, mocker):
    assert len(result.media) == 2
    assert result.media[0].filename == "image1.jpg"
    assert result.media[1].filename == "image2.png"
-    vk_extractor.vks.download_media.assert_called_once_with(
-        mock_scrapes, vk_extractor.tmp_dir
-    )
+    vk_extractor.vks.download_media.assert_called_once_with(mock_scrapes, vk_extractor.tmp_dir)


 def test_adds_first_title_and_timestamp(vk_extractor):
    metadata = Metadata().set_url("https://vk.com/no-metadata")
    metadata.set_url("https://vk.com/no-metadata")
-    mock_scrapes = [{"text": "value", "datetime": "2023-01-01T00:00:00"},
-                    {"text": "value2", "datetime": "2023-01-02T00:00:00"}]
+    mock_scrapes = [
+        {"text": "value", "datetime": "2023-01-01T00:00:00"},
+        {"text": "value2", "datetime": "2023-01-02T00:00:00"},
+    ]
    vk_extractor.vks.scrape.return_value = mock_scrapes
    vk_extractor.vks.download_media.return_value = []
    result = vk_extractor.download(metadata)
@@ -73,4 +74,4 @@ def test_adds_first_title_and_timestamp(vk_extractor):
    assert result.get_title() == "value"
    # formatted timestamp
    assert result.get_timestamp() == "2023-01-01T00:00:00+00:00"
-    assert result.is_success()
+    assert result.is_success()
--- a/tests/feeders/test_atlos_feeder.py
+++ b/tests/feeders/test_atlos_feeder.py
@@ -36,29 +36,45 @@ def atlos_feeder(setup_module, mocker) -> AtlosFeeder:
@pytest.fixture
 def mock_atlos_api(atlos_feeder):
    """Fixture to update the atlos_feeder.session.get side_effect."""
+
    def _mock_responses(responses):
        atlos_feeder.session.get.side_effect = [FakeAPIResponse(data) for data in responses]
+
    return _mock_responses


 def test_atlos_feeder_iter_yields_valid_metadata(atlos_feeder, mock_atlos_api):
    """Test valid items are yielded and invalid ones ignored."""
-    mock_atlos_api([
-        {
-            "next": None,
-            "results": [
-                {"source_url": "http://example.com", "id": 1,
-                 "metadata": {"auto_archiver": {"processed": False}},
-                 "visibility": "visible", "status": "complete"},
-                {"source_url": "", "id": 2,
-                 "metadata": {"auto_archiver": {"processed": False}},
-                 "visibility": "visible", "status": "complete"},
-                {"source_url": "http://example.org", "id": 3,
-                 "metadata": {"auto_archiver": {"processed": True}},
-                 "visibility": "visible", "status": "complete"},
-            ],
-        }
-    ])
+    mock_atlos_api(
+        [
+            {
+                "next": None,
+                "results": [
+                    {
+                        "source_url": "http://example.com",
+                        "id": 1,
+                        "metadata": {"auto_archiver": {"processed": False}},
+                        "visibility": "visible",
+                        "status": "complete",
+                    },
+                    {
+                        "source_url": "",
+                        "id": 2,
+                        "metadata": {"auto_archiver": {"processed": False}},
+                        "visibility": "visible",
+                        "status": "complete",
+                    },
+                    {
+                        "source_url": "http://example.org",
+                        "id": 3,
+                        "metadata": {"auto_archiver": {"processed": True}},
+                        "visibility": "visible",
+                        "status": "complete",
+                    },
+                ],
+            }
+        ]
+    )

    items = list(atlos_feeder)
    assert len(items) == 1
@@ -68,24 +84,34 @@ def test_atlos_feeder_iter_yields_valid_metadata(atlos_feeder, mock_atlos_api):

 def test_atlos_feeder_multiple_pages(atlos_feeder, mock_atlos_api):
    """Test iteration over multiple pages with valid items."""
-    mock_atlos_api([
-        {
-            "next": "cursor2",
-            "results": [
-                {"source_url": "http://example1.com", "id": 10,
-                 "metadata": {"auto_archiver": {"processed": False}},
-                 "visibility": "visible", "status": "complete"},
-            ],
-        },
-        {
-            "next": None,
-            "results": [
-                {"source_url": "http://example2.com", "id": 20,
-                 "metadata": {"auto_archiver": {"processed": False}},
-                 "visibility": "visible", "status": "complete"},
-            ],
-        },
-    ])
+    mock_atlos_api(
+        [
+            {
+                "next": "cursor2",
+                "results": [
+                    {
+                        "source_url": "http://example1.com",
+                        "id": 10,
+                        "metadata": {"auto_archiver": {"processed": False}},
+                        "visibility": "visible",
+                        "status": "complete",
+                    },
+                ],
+            },
+            {
+                "next": None,
+                "results": [
+                    {
+                        "source_url": "http://example2.com",
+                        "id": 20,
+                        "metadata": {"auto_archiver": {"processed": False}},
+                        "visibility": "visible",
+                        "status": "complete",
+                    },
+                ],
+            },
+        ]
+    )

    items = list(atlos_feeder)
    assert len(items) == 2
--- a/tests/feeders/test_csv_feeder.py
+++ b/tests/feeders/test_csv_feeder.py
@@ -1,13 +1,16 @@
 import pytest

+
@pytest.fixture
 def headerless_csv_file():
    return "tests/data/csv_no_headers.csv"

+
@pytest.fixture
 def header_csv_file():
    return "tests/data/csv_with_headers.csv"

+
@pytest.fixture
 def header_csv_file_non_default_column():
    return "tests/data/csv_with_headers_non_default_column.csv"
@@ -23,6 +26,7 @@ def test_csv_feeder_no_headers(headerless_csv_file, setup_module):
    assert urls[0].get_url() == "https://example.com/1/"
    assert urls[1].get_url() == "https://example.com/2/"

+
 def test_csv_feeder_with_headers(header_csv_file, setup_module):
    from auto_archiver.modules.csv_feeder.csv_feeder import CSVFeeder

@@ -33,10 +37,10 @@ def test_csv_feeder_with_headers(header_csv_file, setup_module):
    assert urls[0].get_url() == "https://example.com/1/"
    assert urls[1].get_url() == "https://example.com/2/"

+
 def test_csv_feeder_wrong_column(header_csv_file, setup_module, caplog):
    from auto_archiver.modules.csv_feeder.csv_feeder import CSVFeeder

-
    with caplog.at_level("WARNING"):
        feeder = setup_module(CSVFeeder, {"files": [header_csv_file], "column": 1})
        urls = list(feeder)
@@ -54,4 +58,4 @@ def test_csv_feeder_column_by_name(header_csv_file, setup_module):
    urls = list(feeder)
    assert len(urls) == 2
    assert urls[0].get_url() == "https://example.com/1/"
-    assert urls[1].get_url() == "https://example.com/2/"
+    assert urls[1].get_url() == "https://example.com/2/"
--- a/tests/feeders/test_gsheet_feeder.py
+++ b/tests/feeders/test_gsheet_feeder.py
@@ -19,35 +19,32 @@ def test_setup_without_sheet_and_sheet_id(setup_module, mocker):
@pytest.fixture
 def gsheet_feeder(setup_module, mocker) -> GsheetsFeederDB:
    config: dict = {
-                "service_account": "dummy.json",
-                "sheet": "test-auto-archiver",
-                "sheet_id": None,
-                "header": 1,
-                "columns": {
-                    "url": "link",
-                    "status": "archive status",
-                    "folder": "destination folder",
-                    "archive": "archive location",
-                    "date": "archive date",
-                    "thumbnail": "thumbnail",
-                    "timestamp": "upload timestamp",
-                    "title": "upload title",
-                    "text": "text content",
-                    "screenshot": "screenshot",
-                    "hash": "hash",
-                    "pdq_hash": "perceptual hashes",
-                    "wacz": "wacz",
-                    "replaywebpage": "replaywebpage",
-                },
-                "allow_worksheets": set(),
-                "block_worksheets": set(),
-                "use_sheet_names_in_stored_paths": True,
-            }
+        "service_account": "dummy.json",
+        "sheet": "test-auto-archiver",
+        "sheet_id": None,
+        "header": 1,
+        "columns": {
+            "url": "link",
+            "status": "archive status",
+            "folder": "destination folder",
+            "archive": "archive location",
+            "date": "archive date",
+            "thumbnail": "thumbnail",
+            "timestamp": "upload timestamp",
+            "title": "upload title",
+            "text": "text content",
+            "screenshot": "screenshot",
+            "hash": "hash",
+            "pdq_hash": "perceptual hashes",
+            "wacz": "wacz",
+            "replaywebpage": "replaywebpage",
+        },
+        "allow_worksheets": set(),
+        "block_worksheets": set(),
+        "use_sheet_names_in_stored_paths": True,
+    }
    mocker.patch("gspread.service_account")
-    feeder = setup_module(
-        "gsheet_feeder_db",
-        config
-    )
+    feeder = setup_module("gsheet_feeder_db", config)
    feeder.gsheets_client = mocker.MagicMock()
    return feeder

@@ -128,9 +125,7 @@ def test__set_metadata_with_folder(gsheet_feeder: GsheetsFeederDB):
        (None, "ABC123", "open_by_key", "ABC123", "opening by sheet ID"),
    ],
 )
-def test_open_sheet_with_name_or_id(
-    setup_module, sheet, sheet_id, expected_method, expected_arg, description, mocker
-):
+def test_open_sheet_with_name_or_id(setup_module, sheet, sheet_id, expected_method, expected_arg, description, mocker):
    """Ensure open_sheet() correctly opens by name or ID based on configuration."""
    mock_service_account = mocker.patch("gspread.service_account")
    mock_client = mocker.MagicMock()
@@ -145,9 +140,7 @@ def test_open_sheet_with_name_or_id(
    )
    sheet_result = feeder.open_sheet()
    # Validate the correct method was called
-    getattr(mock_client, expected_method).assert_called_once_with(
-        expected_arg
-    ), f"Failed: {description}"
+    getattr(mock_client, expected_method).assert_called_once_with(expected_arg), f"Failed: {description}"
    assert sheet_result == "MockSheet", f"Failed: {description}"


@@ -220,9 +213,7 @@ class TestGSheetsFeederReal:

    @pytest.fixture(autouse=True)
    def setup_feeder(self, setup_module):
-        assert (
-            self.module_name is not None
-        ), "self.module_name must be set on the subclass"
+        assert self.module_name is not None, "self.module_name must be set on the subclass"
        assert self.config is not None, "self.config must be a dict set on the subclass"
        self.feeder: Type[Feeder] = setup_module(self.module_name, self.config)

@@ -241,9 +232,7 @@ class TestGSheetsFeederReal:
        """Ensure open_sheet() connects to a real Google Sheets instance."""
        sheet = self.feeder.open_sheet()
        assert sheet is not None, "open_sheet() should return a valid sheet instance"
-        assert hasattr(
-            sheet, "worksheets"
-        ), "Returned object should have worksheets method"
+        assert hasattr(sheet, "worksheets"), "Returned object should have worksheets method"

    def test_iter_yields_metadata_real_data(self):
        """Ensure __iter__() yields Metadata objects for real test sheet data."""
--- a/tests/feeders/test_gworksheet.py
+++ b/tests/feeders/test_gworksheet.py
@@ -81,40 +81,27 @@ class TestGWorksheet:
            (False, ""),
        ],
    )
-    def test_get_cell_or_default_handles_empty_values(
-        self, mock_worksheet, when_empty, expected
-    ):
+    def test_get_cell_or_default_handles_empty_values(self, mock_worksheet, when_empty, expected):
        mock_worksheet.get_values.return_value[1][0] = ""  # Empty URL cell
        g = GWorksheet(mock_worksheet)
-        assert (
-            g.get_cell_or_default(
-                2, "url", default="default", when_empty_use_default=when_empty
-            )
-            == expected
-        )
+        assert g.get_cell_or_default(2, "url", default="default", when_empty_use_default=when_empty) == expected

    def test_get_cell_or_default_handles_missing_columns(self, gworksheet):
-        assert (
-            gworksheet.get_cell_or_default(1, "invalid_col", default="safe") == "safe"
-        )
+        assert gworksheet.get_cell_or_default(1, "invalid_col", default="safe") == "safe"

    # Test write operations
    def test_set_cell_updates_correct_position(self, mock_worksheet, gworksheet):
        gworksheet.set_cell(2, "url", "new_url")
        mock_worksheet.update_cell.assert_called_once_with(2, 1, "new_url")

-    def test_batch_set_cell_formats_requests_correctly(
-        self, mock_worksheet, gworksheet
-    ):
+    def test_batch_set_cell_formats_requests_correctly(self, mock_worksheet, gworksheet):
        updates = [(2, "url", "new_url"), (3, "status", "processed")]
        gworksheet.batch_set_cell(updates)
        expected_batch = [
            {"range": "A2", "values": [["new_url"]]},
            {"range": "B3", "values": [["processed"]]},
        ]
-        mock_worksheet.batch_update.assert_called_once_with(
-            expected_batch, value_input_option="USER_ENTERED"
-        )
+        mock_worksheet.batch_update.assert_called_once_with(expected_batch, value_input_option="USER_ENTERED")

    def test_batch_set_cell_truncates_long_values(self, mock_worksheet, gworksheet):
        long_value = "x" * 50000
--- a/tests/formatters/test_html_formatter.py
+++ b/tests/formatters/test_html_formatter.py
@@ -5,13 +5,13 @@ from auto_archiver.core import Metadata, Media
 def test_format(setup_module):
    formatter = setup_module(HtmlFormatter)

-    metadata = Metadata().set("content", "Hello, world!").set_url('https://example.com')
+    metadata = Metadata().set("content", "Hello, world!").set_url("https://example.com")

    final_media = formatter.format(metadata)
    assert isinstance(final_media, Media)
    assert ".html" in final_media.filename
-    with open (final_media.filename, "r", encoding="utf-8") as f:
+    with open(final_media.filename, "r", encoding="utf-8") as f:
        content = f.read()
        assert "Hello, world!" in content
    assert final_media.mimetype == "text/html"
-    assert "SHA-256:" in final_media.get('hash')
+    assert "SHA-256:" in final_media.get("hash")
--- a/tests/storages/test_S3_storage.py
+++ b/tests/storages/test_S3_storage.py
@@ -8,6 +8,7 @@ class TestS3Storage:
    """
    Test suite for S3Storage.
    """
+
    module_name: str = "s3_storage"
    storage: Type[S3Storage]
    config: dict = {
@@ -32,10 +33,10 @@ class TestS3Storage:
        """Test that S3 client is initialized with correct parameters"""

        assert self.storage.s3 is not None
-        assert self.storage.s3.meta.region_name == 'test-region'
+        assert self.storage.s3.meta.region_name == "test-region"

    def test_get_cdn_url_generation(self):
-        """Test CDN URL formatting """
+        """Test CDN URL formatting"""
        media = Media("test.txt")
        media.key = "path/to/file.txt"
        url = self.storage.get_cdn_url(media)
@@ -46,14 +47,14 @@ class TestS3Storage:
    def test_uploadf_sets_acl_public(self, mocker):
        media = Media("test.txt")
        mock_file = mocker.MagicMock()
-        mock_s3_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
-        mocker.patch.object(self.storage, 'is_upload_needed', return_value=True)
+        mock_s3_upload = mocker.patch.object(self.storage.s3, "upload_fileobj")
+        mocker.patch.object(self.storage, "is_upload_needed", return_value=True)
        self.storage.uploadf(mock_file, media)
        mock_s3_upload.assert_called_once_with(
            mock_file,
-            Bucket='test-bucket',
+            Bucket="test-bucket",
            Key=media.key,
-            ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'}
+            ExtraArgs={"ACL": "public-read", "ContentType": "text/plain"},
        )

    def test_upload_decision_logic(self, mocker):
@@ -61,23 +62,31 @@ class TestS3Storage:
        media = Media("test.txt")
        assert self.storage.is_upload_needed(media) is True
        self.storage.random_no_duplicate = True
-        mock_calc_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value='beepboop123beepboop123beepboop123')
-        mock_file_in_folder = mocker.patch.object(self.storage, 'file_in_folder', return_value='existing_key.txt')
+        mock_calc_hash = mocker.patch(
+            "auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash",
+            return_value="beepboop123beepboop123beepboop123",
+        )
+        mock_file_in_folder = mocker.patch.object(self.storage, "file_in_folder", return_value="existing_key.txt")
        assert self.storage.is_upload_needed(media) is False
-        assert media.key == 'existing_key.txt'
-        mock_file_in_folder.assert_called_with('no-dups/beepboop123beepboop123be')
+        assert media.key == "existing_key.txt"
+        mock_file_in_folder.assert_called_with("no-dups/beepboop123beepboop123be")

    def test_skips_upload_when_duplicate_exists(self, mocker):
        """Test that upload skips when file_in_folder finds existing object"""
        self.storage.random_no_duplicate = True
-        mock_file_in_folder = mocker.patch.object(S3Storage, 'file_in_folder', return_value="existing_folder/existing_file.txt")
+        mock_file_in_folder = mocker.patch.object(
+            S3Storage, "file_in_folder", return_value="existing_folder/existing_file.txt"
+        )
        media = Media("test.txt")
        media.key = "original_path.txt"
-        mock_calculate_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value="beepboop123beepboop123beepboop123")
+        mock_calculate_hash = mocker.patch(
+            "auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash",
+            return_value="beepboop123beepboop123beepboop123",
+        )
        assert self.storage.is_upload_needed(media) is False
        assert media.key == "existing_folder/existing_file.txt"
        assert media.get("previously archived") is True
-        mock_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
+        mock_upload = mocker.patch.object(self.storage.s3, "upload_fileobj")
        result = self.storage.uploadf(None, media)
        mock_upload.assert_not_called()
        assert result is True
@@ -85,21 +94,20 @@ class TestS3Storage:
    def test_uploads_with_correct_parameters(self, mocker):
        media = Media("test.txt")
        media.key = "original_key.txt"
-        mocker.patch.object(S3Storage, 'is_upload_needed', return_value=True)
-        media.mimetype = 'image/png'
+        mocker.patch.object(S3Storage, "is_upload_needed", return_value=True)
+        media.mimetype = "image/png"
        mock_file = mocker.MagicMock()
-        mock_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
+        mock_upload = mocker.patch.object(self.storage.s3, "upload_fileobj")
        self.storage.uploadf(mock_file, media)
        mock_upload.assert_called_once_with(
            mock_file,
-            Bucket='test-bucket',
-            Key='original_key.txt',
-            ExtraArgs={
-                'ACL': 'public-read',
-                'ContentType': 'image/png'
-            }
+            Bucket="test-bucket",
+            Key="original_key.txt",
+            ExtraArgs={"ACL": "public-read", "ContentType": "image/png"},
        )

    def test_file_in_folder_exists(self, mocker):
-        mock_list_objects = mocker.patch.object(self.storage.s3, 'list_objects', return_value={'Contents': [{'Key': 'path/to/file.txt'}]})
-        assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt'
+        mock_list_objects = mocker.patch.object(
+            self.storage.s3, "list_objects", return_value={"Contents": [{"Key": "path/to/file.txt"}]}
+        )
+        assert self.storage.file_in_folder("path/to/") == "path/to/file.txt"
--- a/tests/storages/test_atlos_storage.py
+++ b/tests/storages/test_atlos_storage.py
@@ -101,7 +101,9 @@ def test_upload_not_uploaded(tmp_path, atlos_storage: AtlosStorage, metadata: Me
    assert file_tuple[0] == os.path.basename(media.filename)


-def test_upload_post_http_error(tmp_path, atlos_storage: AtlosStorage, metadata: Metadata, media: Media, mocker) -> None:
+def test_upload_post_http_error(
+    tmp_path, atlos_storage: AtlosStorage, metadata: Metadata, media: Media, mocker
+) -> None:
    """Test upload() propagates HTTP error during POST."""
    metadata.set("atlos_id", 303)
    fake_get_response = {"result": {"artifacts": []}}
@@ -109,4 +111,3 @@ def test_upload_post_http_error(tmp_path, atlos_storage: AtlosStorage, metadata:
    mocker.patch.object(atlos_storage, "_post", side_effect=Exception("HTTP error"))
    with pytest.raises(Exception, match="HTTP error"):
        atlos_storage.upload(media, metadata)
-
--- a/tests/storages/test_gdrive_storage.py
+++ b/tests/storages/test_gdrive_storage.py
@@ -12,26 +12,28 @@ from tests.storages.test_storage_base import TestStorageBase
 def gdrive_storage(setup_module, mocker):
    module_name: str = "gdrive_storage"
    storage: GDriveStorage
-    config: dict = {'path_generator': 'url',
-            'filename_generator': 'static',
-            'root_folder_id': "fake_root_folder_id",
-            'oauth_token': None,
-            'service_account': 'fake_service_account.json'
-                    }
-    mocker.patch('google.oauth2.service_account.Credentials.from_service_account_file')
+    config: dict = {
+        "path_generator": "url",
+        "filename_generator": "static",
+        "root_folder_id": "fake_root_folder_id",
+        "oauth_token": None,
+        "service_account": "fake_service_account.json",
+    }
+    mocker.patch("google.oauth2.service_account.Credentials.from_service_account_file")
    return setup_module(module_name, config)


 def test_initialize_fails_with_non_existent_creds(setup_module):
    """Test that the Google Drive service raises a FileNotFoundError when the service account file does not exist.
-        (and isn't mocked)
+    (and isn't mocked)
    """
-    config: dict = {'path_generator': 'url',
-                    'filename_generator': 'static',
-                    'root_folder_id': "fake_root_folder_id",
-                    'oauth_token': None,
-                    'service_account': 'fake_service_account.json'
-                    }
+    config: dict = {
+        "path_generator": "url",
+        "filename_generator": "static",
+        "root_folder_id": "fake_root_folder_id",
+        "oauth_token": None,
+        "service_account": "fake_service_account.json",
+    }
    with pytest.raises(FileNotFoundError) as exc_info:
        setup_module("gdrive_storage", config)
    assert "No such file or directory" in str(exc_info.value)
@@ -48,12 +50,12 @@ def test_get_id_from_parent_and_name(gdrive_storage, mocker):
    result = gdrive_storage._get_id_from_parent_and_name("parent", "mock", retries=1, use_mime_type=False)
    assert result == "123"

+
 def test_path_parts():
    media = Media(filename="test.jpg")
    media.key = "folder1/folder2/test.jpg"


-
@pytest.mark.skip(reason="Requires real credentials")
@pytest.mark.download
 class TestGDriveStorageConnected(TestStorageBase):
@@ -63,19 +65,17 @@ class TestGDriveStorageConnected(TestStorageBase):

    module_name: str = "gdrive_storage"
    storage: Type[GDriveStorage]
-    config: dict = {'path_generator': 'url',
-            'filename_generator': 'static',
-            # TODO: replace with real root folder id
-            'root_folder_id': "1TVY_oJt95_dmRSEdP9m5zFy7l50TeCSk",
-            'oauth_token': None,
-            'service_account': 'secrets/service_account.json'
-                    }
-
+    config: dict = {
+        "path_generator": "url",
+        "filename_generator": "static",
+        # TODO: replace with real root folder id
+        "root_folder_id": "1TVY_oJt95_dmRSEdP9m5zFy7l50TeCSk",
+        "oauth_token": None,
+        "service_account": "secrets/service_account.json",
+    }

    def test_initialize_with_real_credentials(self):
        """
        Test that the Google Drive service can be initialized with real credentials.
        """
        assert self.storage.service is not None
-
-
--- a/tests/storages/test_local_storage.py
+++ b/tests/storages/test_local_storage.py
@@ -1,4 +1,3 @@
-
 import os
 from pathlib import Path

@@ -34,13 +33,13 @@ def test_get_cdn_url_relative(local_storage):
    assert local_storage.get_cdn_url(media) == expected


-
 def test_get_cdn_url_absolute(local_storage):
    media = Media(key="test.txt", filename="dummy.txt")
    local_storage.save_absolute = True
    expected = os.path.abspath(os.path.join(local_storage.save_to, media.key))
    assert local_storage.get_cdn_url(media) == expected

+
 def test_upload_file_contents_and_metadata(local_storage, sample_media):
    dest = os.path.join(local_storage.save_to, sample_media.key)
    assert local_storage.upload(sample_media) is True
@@ -51,5 +50,3 @@ def test_upload_nonexistent_source(local_storage):
    media = Media(key="missing.txt", filename="nonexistent.txt")
    with pytest.raises(FileNotFoundError):
        local_storage.upload(media)
-
-
--- a/tests/storages/test_storage_base.py
+++ b/tests/storages/test_storage_base.py
@@ -7,16 +7,11 @@ from auto_archiver.core.storage import Storage


 class TestStorageBase(object):
-
    module_name: str = None
    config: dict = None

    @pytest.fixture(autouse=True)
    def setup_storage(self, setup_module):
-        assert (
-            self.module_name is not None
-        ), "self.module_name must be set on the subclass"
+        assert self.module_name is not None, "self.module_name must be set on the subclass"
        assert self.config is not None, "self.config must be a dict set on the subclass"
-        self.storage: Type[Storage] = setup_module(
-            self.module_name, self.config
-        )
+        self.storage: Type[Storage] = setup_module(self.module_name, self.config)
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -3,39 +3,46 @@ from auto_archiver.core import config
 from ruamel.yaml.scanner import ScannerError
 from ruamel.yaml.comments import CommentedMap

+
 def test_return_default_config_for_nonexistent_file():
    assert config.read_yaml("nonexistent_file.yaml") == config.EMPTY_CONFIG

+
 def test_return_default_config_for_empty_file(tmp_path):
    empty_file = tmp_path / "empty_file.yaml"
    empty_file.write_text("")
    assert config.read_yaml(empty_file) == config.EMPTY_CONFIG

+
 def test_raise_error_on_invalid_yaml(tmp_path):
    invalid_yaml = tmp_path / "invalid_yaml.yaml"
-    invalid_yaml.write_text("key: \"value_without_end_quote")
+    invalid_yaml.write_text('key: "value_without_end_quote')
    # make sure it raises ScannerError
    with pytest.raises(ScannerError):
        config.read_yaml(invalid_yaml)

+
 def test_write_yaml(tmp_path):
    yaml_file = tmp_path / "write_yaml.yaml"
    config.store_yaml(config.EMPTY_CONFIG, yaml_file.as_posix())
    assert "steps:\n" in yaml_file.read_text()

+
 def test_round_trip_comments(tmp_path):
    yaml_file = tmp_path / "round_trip_comments.yaml"

    with open(yaml_file, "w") as f:
-        f.write("generic_extractor:\n  facebook_cookie: abc # end of line comment\n  subtitles: true\n  # comments: false\n  # livestreams: false\n  list_type:\n    - value1\n    - value2")
+        f.write(
+            "generic_extractor:\n  facebook_cookie: abc # end of line comment\n  subtitles: true\n  # comments: false\n  # livestreams: false\n  list_type:\n    - value1\n    - value2"
+        )

    loaded = config.read_yaml(yaml_file)
    # check the comments are preserved
-    assert loaded['generic_extractor']['facebook_cookie'] == "abc"
-    assert loaded['generic_extractor'].ca.items['facebook_cookie'][2].value == "# end of line comment\n"
+    assert loaded["generic_extractor"]["facebook_cookie"] == "abc"
+    assert loaded["generic_extractor"].ca.items["facebook_cookie"][2].value == "# end of line comment\n"

    # add some more items to my_settings
-    loaded['generic_extractor']['list_type'].append("bellingcat")
+    loaded["generic_extractor"]["list_type"].append("bellingcat")
    config.store_yaml(loaded, yaml_file.as_posix())

    assert "# comments: false" in yaml_file.read_text()
@@ -43,14 +50,17 @@ def test_round_trip_comments(tmp_path):
    assert "abc # end of line comment" in yaml_file.read_text()
    assert "- value2\n  - bellingcat" in yaml_file.read_text()

+
 def test_merge_dicts():
    yaml_dict = config.EMPTY_CONFIG
-    yaml_dict['settings'] = CommentedMap(**{
+    yaml_dict["settings"] = CommentedMap(
+        **{
            "key1": ["a"],
            "key2": "old_value",
            "key3": ["a", "b", "c"],
            "key5": "value5",
-        })
+        }
+    )

    dotdict = {
        "settings.key1": ["b", "c"],
@@ -77,6 +87,7 @@ def test_check_types():
    assert config.is_dict_type([]) == False
    assert config.is_dict_type("") == False

+
 def test_from_dot_notation():
    dotdict = {
        "settings.key1": ["a", "b", "c"],
@@ -88,16 +99,17 @@ def test_from_dot_notation():
    assert normal_dict["settings"]["key2"] == "new_value"
    assert normal_dict["settings"]["key3"]["key4"] == "value"

+
 def test_to_dot_notation():
    yaml_dict = config.EMPTY_CONFIG
-    yaml_dict['settings'] = {
+    yaml_dict["settings"] = {
        "key1": ["a", "b", "c"],
        "key2": "new_value",
        "key3": {
            "key4": "value",
-        }
+        },
    }
    dotdict = config.to_dot_notation(yaml_dict)
    assert dotdict["settings.key1"] == ["a", "b", "c"]
    assert dotdict["settings.key2"] == "new_value"
-    assert dotdict["settings.key3.key4"] == "value"
+    assert dotdict["settings.key3.key4"] == "value"
--- a/tests/test_implementation.py
+++ b/tests/test_implementation.py
@@ -10,21 +10,23 @@ def orchestration_file_path(tmp_path):
    folder.mkdir(exist_ok=True)
    return (folder / "example_orch.yaml").as_posix()

+
@pytest.fixture
 def orchestration_file(orchestration_file_path):
-    def _orchestration_file(content=''):
+    def _orchestration_file(content=""):
        with open(orchestration_file_path, "w") as f:
            f.write(content)
        return orchestration_file_path
-    
+
    return _orchestration_file

+
@pytest.fixture
 def autoarchiver(tmp_path, monkeypatch, request):
    def _autoarchiver(args=[]):
-
        def cleanup():
            from loguru import logger
+
            if not logger._core.handlers.get(0):
                logger._core.handlers_count = 0
                logger.add(sys.stderr)
@@ -47,6 +49,7 @@ def test_run_auto_archiver_no_args(caplog, autoarchiver):

    assert "provide at least one URL via the command line, or set up an alternative feeder" in caplog.text

+
 def test_run_auto_archiver_invalid_file(caplog, autoarchiver):
    # exec 'auto-archiver' on the command lin
    with pytest.raises(SystemExit):
@@ -54,6 +57,7 @@ def test_run_auto_archiver_invalid_file(caplog, autoarchiver):

    assert "Make sure the file exists and try again, or run without th" in caplog.text

+
 def test_run_auto_archiver_empty_file(caplog, autoarchiver, orchestration_file):
    # create a valid (empty) orchestration file
    path = orchestration_file(content="")
@@ -64,6 +68,7 @@ def test_run_auto_archiver_empty_file(caplog, autoarchiver, orchestration_file):
    # should treat an empty file as if there is no file at all
    assert " No URLs provided. Please provide at least one URL via the com" in caplog.text

+
 def test_call_autoarchiver_main(caplog, monkeypatch, tmp_path):
    from auto_archiver.__main__ import main

@@ -75,4 +80,4 @@ def test_call_autoarchiver_main(caplog, monkeypatch, tmp_path):
        with pytest.raises(SystemExit):
            main()

-    assert "No URLs provided. Please provide at least one" in caplog.text
+    assert "No URLs provided. Please provide at least one" in caplog.text
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -62,18 +62,8 @@ def test_simple_merge(basic_metadata):


 def test_left_merge():
-    left = (
-        Metadata()
-        .set("tags", ["a"])
-        .set("stats", {"views": 10})
-        .set("status", "success")
-    )
-    right = (
-        Metadata()
-        .set("tags", ["b"])
-        .set("stats", {"likes": 5})
-        .set("status", "no archiver")
-    )
+    left = Metadata().set("tags", ["a"]).set("stats", {"views": 10}).set("status", "success")
+    right = Metadata().set("tags", ["b"]).set("stats", {"likes": 5}).set("status", "no archiver")

    left.merge(right, overwrite_left=True)
    assert left.get("status") == "no archiver"
@@ -120,6 +110,7 @@ def test_is_empty():
 def test_store():
    pass

+
 # Test Media operations


@@ -176,6 +167,7 @@ def test_choose_most_complete():
    res = Metadata.choose_most_complete([m_more, m_less])
    assert res.metadata.get("title") == "Title 1"

+
 def test_choose_most_complete_from_pickles(unpickle):
    # test most complete from pickles before and after an enricher has run
    # Only compares length of media, not the actual media
--- a/tests/test_modules.py
+++ b/tests/test_modules.py
@@ -3,6 +3,7 @@ import pytest
 from auto_archiver.core.module import ModuleFactory, LazyBaseModule
 from auto_archiver.core.base_module import BaseModule

+
@pytest.fixture
 def example_module():
    import auto_archiver
@@ -14,12 +15,14 @@ def example_module():

    return module_factory.get_module_lazy("example_module")

+
 def test_get_module_lazy(example_module):
    assert example_module.name == "example_module"
    assert example_module.display_name == "Example Module"

    assert example_module.manifest is not None

+
 def test_python_dependency_check(example_module):
    # example_module requires loguru, which is not installed
    # monkey patch the manifest to include a nonexistnet dependency
@@ -30,11 +33,13 @@ def test_python_dependency_check(example_module):

    assert load_error.value.code == 1

+
 def test_binary_dependency_check(example_module):
    # example_module requires ffmpeg, which is not installed
    # monkey patch the manifest to include a nonexistnet dependency
    example_module.manifest["dependencies"]["binary"] = ["does_not_exist"]

+
 def test_module_dependency_check_loads_module(example_module):
    # example_module requires cli_feeder, which is not installed
    # monkey patch the manifest to include a nonexistnet dependency
@@ -49,19 +54,20 @@ def test_module_dependency_check_loads_module(example_module):
    assert module_factory._lazy_modules["hash_enricher"] is not None
    assert module_factory._lazy_modules["hash_enricher"]._instance is not None

-def test_load_module(example_module):

+def test_load_module(example_module):
    # setup the module, and check that config is set to the default values
    loaded_module = example_module.load({})
    assert loaded_module is not None
    assert isinstance(loaded_module, BaseModule)
    assert loaded_module.name == "example_module"
    assert loaded_module.display_name == "Example Module"
-    assert loaded_module.config["example_module"] ==  {"csv_file" : "db.csv"}
+    assert loaded_module.config["example_module"] == {"csv_file": "db.csv"}

    # check that the vlaue is set on the module itself
    assert loaded_module.csv_file == "db.csv"

+
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
 def test_load_modules(module_name):
    # test that specific modules can be loaded
@@ -96,5 +102,3 @@ def test_lazy_base_module(module_name):
    assert len(lazy_module.configs) > 0
    assert len(lazy_module.description) > 0
    assert len(lazy_module.version) > 0
-
-
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -9,49 +9,63 @@ from auto_archiver.core import Metadata
 TEST_ORCHESTRATION = "tests/data/test_orchestration.yaml"
 TEST_MODULES = "tests/data/test_modules/"

+
@pytest.fixture
 def test_args():
-    return ["--config", TEST_ORCHESTRATION,
-            "--module_paths", TEST_MODULES,
-            "--example_module.required_field", "some_value"] # just set this for normal testing, we will remove it later
+    return [
+        "--config",
+        TEST_ORCHESTRATION,
+        "--module_paths",
+        TEST_MODULES,
+        "--example_module.required_field",
+        "some_value",
+    ]  # just set this for normal testing, we will remove it later
+

@pytest.fixture
 def orchestrator():
    return ArchivingOrchestrator()

+
@pytest.fixture
 def basic_parser(orchestrator) -> ArgumentParser:
    return orchestrator.setup_basic_parser()

+
 def test_setup_orchestrator(orchestrator):
    assert orchestrator is not None

+
 def test_parse_config():
    pass

+
 def test_parse_basic(basic_parser):
    args = basic_parser.parse_args(["--config", TEST_ORCHESTRATION])
    assert args.config_file == TEST_ORCHESTRATION

+
@pytest.mark.parametrize("mode", ["simple", "full"])
 def test_mode(basic_parser, mode):
    args = basic_parser.parse_args(["--mode", mode])
    assert args.mode == mode

+
 def test_mode_invalid(basic_parser, capsys):
    with pytest.raises(SystemExit) as exit_error:
        basic_parser.parse_args(["--mode", "invalid"])
    assert exit_error.value.code == 2
    assert "invalid choice" in capsys.readouterr().err

+
 def test_version(basic_parser, capsys):
    with pytest.raises(SystemExit) as exit_error:
        basic_parser.parse_args(["--version"])
    assert exit_error.value.code == 0
    assert capsys.readouterr().out == f"{__version__}\n"

-def test_help(orchestrator, basic_parser, capsys):

+def test_help(orchestrator, basic_parser, capsys):
    args = basic_parser.parse_args(["--help"])
    assert args.help == True

@@ -83,14 +97,17 @@ def test_help(orchestrator, basic_parser, capsys):

 def test_add_custom_modules_path(orchestrator, test_args):
    orchestrator.setup_config(test_args)
-    
+
    import auto_archiver
+
    assert "tests/data/test_modules/" in auto_archiver.modules.__path__

-def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):

-    orchestrator.setup_config(test_args +  # we still need to load the real path to get the example_module 
-                          ["--module_paths", "tests/data/invalid_test_modules/"])
+def test_add_custom_modules_path_invalid(orchestrator, caplog, test_args):
+    orchestrator.setup_config(
+        test_args  # we still need to load the real path to get the example_module
+        + ["--module_paths", "tests/data/invalid_test_modules/"]
+    )

    assert caplog.records[0].message == "Path 'tests/data/invalid_test_modules/' does not exist. Skipping..."

@@ -104,11 +121,11 @@ def test_check_required_values(orchestrator, caplog, test_args):

    assert caplog.records[1].message == "the following arguments are required: --example_module.required_field"

-def test_get_required_values_from_config(orchestrator, test_args, tmp_path):

+def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
    # load the default example yaml, add a required field, then run the orchestrator
    test_yaml = read_yaml(TEST_ORCHESTRATION)
-    test_yaml['example_module'] = {'required_field': 'some_value'}
+    test_yaml["example_module"] = {"required_field": "some_value"}
    # write it to a temp file
    tmp_file = (tmp_path / "temp_config.yaml").as_posix()
    store_yaml(test_yaml, tmp_file)
@@ -117,27 +134,42 @@ def test_get_required_values_from_config(orchestrator, test_args, tmp_path):
    config = orchestrator.setup_config(["--config", tmp_file, "--module_paths", TEST_MODULES])
    assert config is not None

-def test_load_authentication_string(orchestrator, test_args):

-    config = orchestrator.setup_config(test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}'])
-    assert config['authentication'] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
+def test_load_authentication_string(orchestrator, test_args):
+    config = orchestrator.setup_config(
+        test_args + ["--authentication", '{"facebook.com": {"username": "my_username", "password": "my_password"}}']
+    )
+    assert config["authentication"] == {"facebook.com": {"username": "my_username", "password": "my_password"}}
+

 def test_load_authentication_string_concat_site(orchestrator, test_args):
-    
    config = orchestrator.setup_config(test_args + ["--authentication", '{"x.com,twitter.com": {"api_key": "my_key"}}'])
-    assert config['authentication'] == {"x.com": {"api_key": "my_key"},
-                                                     "twitter.com": {"api_key": "my_key"}}
+    assert config["authentication"] == {"x.com": {"api_key": "my_key"}, "twitter.com": {"api_key": "my_key"}}
+

 def test_load_invalid_authentication_string(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
-        orchestrator.setup_config(test_args + ["--authentication", "{\''invalid_json"])
+        orchestrator.setup_config(test_args + ["--authentication", "{''invalid_json"])
+

 def test_load_authentication_invalid_dict(orchestrator, test_args):
    with pytest.raises(ArgumentTypeError):
        orchestrator.setup_config(test_args + ["--authentication", "[true, false]"])

+
 def test_load_modules_from_commandline(orchestrator, test_args):
-    args = test_args + ["--feeders", "example_module", "--extractors", "example_module", "--databases", "example_module", "--enrichers", "example_module", "--formatters", "example_module"]
+    args = test_args + [
+        "--feeders",
+        "example_module",
+        "--extractors",
+        "example_module",
+        "--databases",
+        "example_module",
+        "--enrichers",
+        "example_module",
+        "--formatters",
+        "example_module",
+    ]

    orchestrator.setup(args)

@@ -153,27 +185,37 @@ def test_load_modules_from_commandline(orchestrator, test_args):
    assert orchestrator.enrichers[0].name == "example_module"
    assert orchestrator.formatters[0].name == "example_module"

+
 def test_load_settings_for_module_from_commandline(orchestrator, test_args):
-    args = test_args + ["--feeders", "gsheet_feeder_db", "--gsheet_feeder_db.sheet_id", "123", "--gsheet_feeder_db.service_account", "tests/data/test_service_account.json"]
+    args = test_args + [
+        "--feeders",
+        "gsheet_feeder_db",
+        "--gsheet_feeder_db.sheet_id",
+        "123",
+        "--gsheet_feeder_db.service_account",
+        "tests/data/test_service_account.json",
+    ]

    orchestrator.setup(args)

    assert len(orchestrator.feeders) == 1
    assert orchestrator.feeders[0].name == "gsheet_feeder_db"
-    assert orchestrator.config['gsheet_feeder_db']['sheet_id'] == "123"
+    assert orchestrator.config["gsheet_feeder_db"]["sheet_id"] == "123"


 def test_multiple_orchestrator(test_args):
-
-    o1_args = test_args + ["--feeders", "gsheet_feeder_db", "--gsheet_feeder_db.service_account", "tests/data/test_service_account.json"]
+    o1_args = test_args + [
+        "--feeders",
+        "gsheet_feeder_db",
+        "--gsheet_feeder_db.service_account",
+        "tests/data/test_service_account.json",
+    ]
    o1 = ArchivingOrchestrator()

    with pytest.raises(ValueError) as exit_error:
        # this should fail because the gsheet_feeder_db requires a sheet_id / sheet
        o1.setup(o1_args)

-
-
    o2_args = test_args + ["--feeders", "example_module"]
    o2 = ArchivingOrchestrator()
    o2.setup(o2_args)
@@ -182,4 +224,4 @@ def test_multiple_orchestrator(test_args):

    output: Metadata = list(o2.feed())
    assert len(output) == 1
-    assert output[0].get_url() == "https://example.com"
+    assert output[0].get_url() == "https://example.com"
--- a/tests/utils/test_misc.py
+++ b/tests/utils/test_misc.py
@@ -14,7 +14,7 @@ from auto_archiver.utils.misc import (
    update_nested_dict,
    calculate_file_hash,
    random_str,
-    get_timestamp
+    get_timestamp,
 )


@@ -38,40 +38,46 @@ class TestDirectoryUtils:
        mkdir_if_not_exists(existing_dir)
        assert existing_dir.exists()

+
 class TestURLExpansion:
-    @pytest.mark.parametrize("input_url,expected", [
-        ("https://example.com", "https://example.com"),
-        ("https://t.co/test", "https://expanded.url")
-    ])
+    @pytest.mark.parametrize(
+        "input_url,expected",
+        [("https://example.com", "https://example.com"), ("https://t.co/test", "https://expanded.url")],
+    )
    def test_expand_url(self, input_url, expected, mocker):
        mock_response = mocker.Mock()
        mock_response.url = "https://expanded.url"
-        mocker.patch('requests.get', return_value=mock_response)
+        mocker.patch("requests.get", return_value=mock_response)
        result = expand_url(input_url)
        assert result == expected

    def test_expand_url_handles_errors(self, caplog, mocker):
-        mocker.patch('requests.get', side_effect=Exception("Connection error"))
+        mocker.patch("requests.get", side_effect=Exception("Connection error"))
        url = "https://t.co/error"
        result = expand_url(url)
        assert result == url
        assert f"Failed to expand url {url}" in caplog.text

+
 class TestAttributeHandling:
    class Sample:
        exists = "value"
        none = None

-    @pytest.mark.parametrize("obj,attr,default,expected", [
-        (Sample(), "exists", "default", "value"),
-        (Sample(), "none", "default", "default"),
-        (Sample(), "missing", "default", "default"),
-        (None, "anything", "fallback", "fallback"),
-    ])
+    @pytest.mark.parametrize(
+        "obj,attr,default,expected",
+        [
+            (Sample(), "exists", "default", "value"),
+            (Sample(), "none", "default", "default"),
+            (Sample(), "missing", "default", "default"),
+            (None, "anything", "fallback", "fallback"),
+        ],
+    )
    def test_getattr_or(self, obj, attr, default, expected):
        # Test gets attribute or returns a default value
        assert getattr_or(obj, attr, default) == expected

+
 class TestDateTimeHandling:
    def test_datetime_encoder(self, sample_datetime):
        result = json.dumps({"dt": sample_datetime}, cls=DateTimeEncoder)
@@ -83,11 +89,14 @@ class TestDateTimeHandling:
        result = dump_payload(payload)
        assert str(sample_datetime) in result

-    @pytest.mark.parametrize("dt_str,fmt,expected", [
-        ("2023-01-01 12:00:00+00:00", None, datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)),
-        ("20230101 120000", "%Y%m%d %H%M%S", datetime(2023, 1, 1, 12, 0)),
-        ("invalid", None, None),
-    ])
+    @pytest.mark.parametrize(
+        "dt_str,fmt,expected",
+        [
+            ("2023-01-01 12:00:00+00:00", None, datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc)),
+            ("20230101 120000", "%Y%m%d %H%M%S", datetime(2023, 1, 1, 12, 0)),
+            ("invalid", None, None),
+        ],
+    )
    def test_datetime_from_string(self, dt_str, fmt, expected):
        result = get_datetime_from_str(dt_str, fmt)
        if expected is None:
@@ -95,16 +104,21 @@ class TestDateTimeHandling:
        else:
            assert result == expected.replace(tzinfo=result.tzinfo)

+
 class TestDictUtils:
-    @pytest.mark.parametrize("original,update,expected", [
-        ({"a": 1}, {"b": 2}, {"a": 1, "b": 2}),
-        ({"nested": {"a": 1}}, {"nested": {"b": 2}}, {"nested": {"a": 1, "b": 2}}),
-        ({"a": {"b": {"c": 1}}}, {"a": {"b": {"c": 2}}}, {"a": {"b": {"c": 2}}}),
-    ])
+    @pytest.mark.parametrize(
+        "original,update,expected",
+        [
+            ({"a": 1}, {"b": 2}, {"a": 1, "b": 2}),
+            ({"nested": {"a": 1}}, {"nested": {"b": 2}}, {"nested": {"a": 1, "b": 2}}),
+            ({"a": {"b": {"c": 1}}}, {"a": {"b": {"c": 2}}}, {"a": {"b": {"c": 2}}}),
+        ],
+    )
    def test_update_nested_dict(self, original, update, expected):
        update_nested_dict(original, update)
        assert original == expected

+
 class TestHashingUtils:
    def test_file_hashing(self, sample_file):
        expected = hashlib.sha256(b"test content").hexdigest()
@@ -118,6 +132,7 @@ class TestHashingUtils:
        expected = hashlib.sha256(content).hexdigest()
        assert calculate_file_hash(str(file_path)) == expected

+
 class TestMiscUtils:
    def test_random_str_length(self):
        for length in [8, 16, 32]:
@@ -131,14 +146,17 @@ class TestMiscUtils:
    def test_random_str_uniqueness(self):
        assert random_str() != random_str()

-    @pytest.mark.parametrize("ts_input,utc,iso,expected_type", [
-        (datetime.now(), True, True, str),
-        ("2023-01-01T12:00:00+00:00", False, False, datetime),
-        (1672574400, True, True, str),
-    ])
+    @pytest.mark.parametrize(
+        "ts_input,utc,iso,expected_type",
+        [
+            (datetime.now(), True, True, str),
+            ("2023-01-01T12:00:00+00:00", False, False, datetime),
+            (1672574400, True, True, str),
+        ],
+    )
    def test_timestamp_parsing(self, ts_input, utc, iso, expected_type):
        result = get_timestamp(ts_input, utc=utc, iso=iso)
        assert isinstance(result, expected_type)

    def test_invalid_timestamp_returns_none(self):
-        assert get_timestamp("invalid-date") is None
+        assert get_timestamp("invalid-date") is None