mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-07 19:08:30 +03:00
Merge branch 'dev' into feat/nitter-alternative
This commit is contained in:
2
.github/workflows/ruff.yaml
vendored
2
.github/workflows/ruff.yaml
vendored
@@ -24,7 +24,7 @@ jobs:
|
|||||||
- name: Install Python
|
- name: Install Python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v6
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.12"
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
|||||||
70
poetry.lock
generated
70
poetry.lock
generated
@@ -755,23 +755,25 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "curl-cffi"
|
name = "curl-cffi"
|
||||||
version = "0.13.0"
|
version = "0.14.0"
|
||||||
description = "libcurl ffi bindings for Python, with impersonation support."
|
description = "libcurl ffi bindings for Python, with impersonation support."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.10"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "implementation_name == \"cpython\""
|
markers = "implementation_name == \"cpython\""
|
||||||
files = [
|
files = [
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:434cadbe8df2f08b2fc2c16dff2779fb40b984af99c06aa700af898e185bb9db"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-macosx_14_0_arm64.whl", hash = "sha256:e35e89c6a69872f9749d6d5fda642ed4fc159619329e99d577d0104c9aad5893"},
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:59afa877a9ae09efa04646a7d068eeea48915a95d9add0a29854e7781679fcd7"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-macosx_15_0_x86_64.whl", hash = "sha256:5945478cd28ad7dfb5c54473bcfb6743ee1d66554d57951fdf8fc0e7d8cf4e45"},
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d06ed389e45a7ca97b17c275dbedd3d6524560270e675c720e93a2018a766076"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c42e8fa3c667db9ccd2e696ee47adcd3cd5b0838d7282f3fc45f6c0ef3cfdfa7"},
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4e0de45ab3b7a835c72bd53640c2347415111b43421b5c7a1a0b18deae2e541"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:060fe2c99c41d3cb7f894de318ddf4b0301b08dca70453d769bd4e74b36b8483"},
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8eb4083371bbb94e9470d782de235fb5268bf43520de020c9e5e6be8f395443f"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b158c41a25388690dd0d40b5bc38d1e0f512135f17fdb8029868cbc1993d2e5b"},
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:28911b526e8cd4aa0e5e38401bfe6887e8093907272f1f67ca22e6beb2933a51"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_i686.whl", hash = "sha256:1439fbef3500fb723333c826adf0efb0e2e5065a703fb5eccce637a2250db34a"},
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d433ffcb455ab01dd0d7bde47109083aa38b59863aa183d29c668ae4c96bf8e"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7176f2c2d22b542e3cf261072a81deb018cfa7688930f95dddef215caddb469"},
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-win_amd64.whl", hash = "sha256:66a6b75ce971de9af64f1b6812e275f60b88880577bac47ef1fa19694fa21cd3"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:03f21ade2d72978c2bb8670e9b6de5260e2755092b02d94b70b906813662998d"},
|
||||||
{file = "curl_cffi-0.13.0-cp39-abi3-win_arm64.whl", hash = "sha256:d438a3b45244e874794bc4081dc1e356d2bb926dcc7021e5a8fef2e2105ef1d8"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:58ebf02de64ee5c95613209ddacb014c2d2f86298d7080c0a1c12ed876ee0690"},
|
||||||
{file = "curl_cffi-0.13.0.tar.gz", hash = "sha256:62ecd90a382bd5023750e3606e0aa7cb1a3a8ba41c14270b8e5e149ebf72c5ca"},
|
{file = "curl_cffi-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:6e503f9a103f6ae7acfb3890c843b53ec030785a22ae7682a22cc43afb94123e"},
|
||||||
|
{file = "curl_cffi-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:2eed50a969201605c863c4c31269dfc3e0da52916086ac54553cfa353022425c"},
|
||||||
|
{file = "curl_cffi-0.14.0.tar.gz", hash = "sha256:5ffbc82e59f05008ec08ea432f0e535418823cda44178ee518906a54f27a5f0f"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@@ -780,9 +782,9 @@ cffi = ">=1.12.0"
|
|||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
build = ["cibuildwheel", "wheel"]
|
build = ["cibuildwheel", "wheel"]
|
||||||
dev = ["charset_normalizer (>=3.3.2,<4.0)", "coverage (>=6.4.1,<7.0)", "cryptography (>=42.0.5,<43.0)", "httpx (==0.23.1)", "mypy (>=1.9.0,<2.0)", "pytest (>=8.1.1,<9.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-trio (>=0.8.0,<1.0)", "ruff (>=0.3.5,<1.0)", "trio (>=0.25.0,<1.0)", "trustme (>=1.1.0,<2.0)", "typing_extensions", "uvicorn (>=0.29.0,<1.0)", "websockets (>=12.0,<13.0)"]
|
dev = ["charset_normalizer (>=3.3.2,<4.0)", "coverage (>=6.4.1,<7.0)", "cryptography (>=42.0.5,<43.0)", "httpx (==0.23.1)", "mypy (>=1.9.0,<2.0)", "pytest (>=8.1.1,<9.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-trio (>=0.8.0,<1.0)", "ruff (>=0.3.5,<1.0)", "trio (>=0.25.0,<1.0)", "trustme (>=1.1.0,<2.0)", "typing_extensions", "uvicorn (>=0.29.0,<1.0)", "websockets (>=14.0)"]
|
||||||
extra = ["lxml_html_clean", "markdownify (>=1.1.0)", "readability-lxml (>=0.8.1)"]
|
extra = ["lxml_html_clean", "markdownify (>=1.1.0)", "readability-lxml (>=0.8.1)"]
|
||||||
test = ["charset_normalizer (>=3.3.2,<4.0)", "cryptography (>=42.0.5,<43.0)", "fastapi (==0.110.0)", "httpx (==0.23.1)", "proxy.py (>=2.4.3,<3.0)", "pytest (>=8.1.1,<9.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-trio (>=0.8.0,<1.0)", "python-multipart (>=0.0.9,<1.0)", "trio (>=0.25.0,<1.0)", "trustme (>=1.1.0,<2.0)", "typing_extensions", "uvicorn (>=0.29.0,<1.0)", "websockets (>=12.0,<13.0)"]
|
test = ["charset_normalizer (>=3.3.2,<4.0)", "cryptography (>=42.0.5,<43.0)", "fastapi (>=0.110.0,<1.0)", "httpx (==0.23.1)", "proxy.py (>=2.4.3,<3.0)", "pytest (>=8.1.1,<9.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-trio (>=0.8.0,<1.0)", "python-multipart (>=0.0.9,<1.0)", "trio (>=0.25.0,<1.0)", "trustme (>=1.1.0,<2.0)", "typing_extensions", "uvicorn (>=0.29.0,<1.0)", "websockets (>=14.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dataclasses-json"
|
name = "dataclasses-json"
|
||||||
@@ -3108,30 +3110,30 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ruff"
|
name = "ruff"
|
||||||
version = "0.9.10"
|
version = "0.15.2"
|
||||||
description = "An extremely fast Python linter and code formatter, written in Rust."
|
description = "An extremely fast Python linter and code formatter, written in Rust."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
groups = ["dev"]
|
groups = ["dev"]
|
||||||
files = [
|
files = [
|
||||||
{file = "ruff-0.9.10-py3-none-linux_armv6l.whl", hash = "sha256:eb4d25532cfd9fe461acc83498361ec2e2252795b4f40b17e80692814329e42d"},
|
{file = "ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d"},
|
||||||
{file = "ruff-0.9.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:188a6638dab1aa9bb6228a7302387b2c9954e455fb25d6b4470cb0641d16759d"},
|
{file = "ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e"},
|
||||||
{file = "ruff-0.9.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5284dcac6b9dbc2fcb71fdfc26a217b2ca4ede6ccd57476f52a587451ebe450d"},
|
{file = "ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87"},
|
||||||
{file = "ruff-0.9.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47678f39fa2a3da62724851107f438c8229a3470f533894b5568a39b40029c0c"},
|
{file = "ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9"},
|
||||||
{file = "ruff-0.9.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99713a6e2766b7a17147b309e8c915b32b07a25c9efd12ada79f217c9c778b3e"},
|
{file = "ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80"},
|
||||||
{file = "ruff-0.9.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524ee184d92f7c7304aa568e2db20f50c32d1d0caa235d8ddf10497566ea1a12"},
|
{file = "ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f"},
|
||||||
{file = "ruff-0.9.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df92aeac30af821f9acf819fc01b4afc3dfb829d2782884f8739fb52a8119a16"},
|
{file = "ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77"},
|
||||||
{file = "ruff-0.9.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de42e4edc296f520bb84954eb992a07a0ec5a02fecb834498415908469854a52"},
|
{file = "ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea"},
|
||||||
{file = "ruff-0.9.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d257f95b65806104b6b1ffca0ea53f4ef98454036df65b1eda3693534813ecd1"},
|
{file = "ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a"},
|
||||||
{file = "ruff-0.9.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60dec7201c0b10d6d11be00e8f2dbb6f40ef1828ee75ed739923799513db24c"},
|
{file = "ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956"},
|
||||||
{file = "ruff-0.9.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d838b60007da7a39c046fcdd317293d10b845001f38bcb55ba766c3875b01e43"},
|
{file = "ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4"},
|
||||||
{file = "ruff-0.9.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ccaf903108b899beb8e09a63ffae5869057ab649c1e9231c05ae354ebc62066c"},
|
{file = "ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de"},
|
||||||
{file = "ruff-0.9.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9567d135265d46e59d62dc60c0bfad10e9a6822e231f5b24032dba5a55be6b5"},
|
{file = "ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c"},
|
||||||
{file = "ruff-0.9.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f202f0d93738c28a89f8ed9eaba01b7be339e5d8d642c994347eaa81c6d75b8"},
|
{file = "ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8"},
|
||||||
{file = "ruff-0.9.10-py3-none-win32.whl", hash = "sha256:bfb834e87c916521ce46b1788fbb8484966e5113c02df216680102e9eb960029"},
|
{file = "ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f"},
|
||||||
{file = "ruff-0.9.10-py3-none-win_amd64.whl", hash = "sha256:f2160eeef3031bf4b17df74e307d4c5fb689a6f3a26a2de3f7ef4044e3c484f1"},
|
{file = "ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5"},
|
||||||
{file = "ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69"},
|
{file = "ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e"},
|
||||||
{file = "ruff-0.9.10.tar.gz", hash = "sha256:9bacb735d7bada9cfb0f2c227d3658fc443d90a727b47f206fb33f52f3c0eac7"},
|
{file = "ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -4313,4 +4315,4 @@ files = [
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = ">=3.10,<3.13"
|
python-versions = ">=3.10,<3.13"
|
||||||
content-hash = "e4fd9a44b9541f610e862ba51410092473a587a097c7a5a3e3b197ee6b30e4d4"
|
content-hash = "fd273f9a20a34e64849a61cb654a7eb75dfa9499375eb8efbdcc953f9a9f6f95"
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "auto-archiver"
|
name = "auto-archiver"
|
||||||
version = "1.2.1"
|
version = "1.2.2"
|
||||||
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
||||||
|
|
||||||
requires-python = ">=3.10,<3.13"
|
requires-python = ">=3.10,<3.13"
|
||||||
@@ -66,7 +66,7 @@ pytest = "^8.3.4"
|
|||||||
autopep8 = "^2.3.1"
|
autopep8 = "^2.3.1"
|
||||||
pytest-loguru = "^0.4.0"
|
pytest-loguru = "^0.4.0"
|
||||||
pytest-mock = "^3.14.0"
|
pytest-mock = "^3.14.0"
|
||||||
ruff = "^0.9.10"
|
ruff = "^0.15.2"
|
||||||
pre-commit = "^4.1.0"
|
pre-commit = "^4.1.0"
|
||||||
|
|
||||||
[tool.poetry.group.docs.dependencies]
|
[tool.poetry.group.docs.dependencies]
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ def _extract_metadata(self, webpage, video_id):
|
|||||||
...,
|
...,
|
||||||
"attachments",
|
"attachments",
|
||||||
...,
|
...,
|
||||||
lambda k, v: (k == "media" and str(v["id"]) == video_id and v["__typename"] == "Video"),
|
lambda k, v: k == "media" and str(v["id"]) == video_id and v["__typename"] == "Video",
|
||||||
),
|
),
|
||||||
expected_type=dict,
|
expected_type=dict,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -24,8 +24,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
|
|||||||
self.use_docker = os.environ.get("WACZ_ENABLE_DOCKER") or not os.environ.get("RUNNING_IN_DOCKER")
|
self.use_docker = os.environ.get("WACZ_ENABLE_DOCKER") or not os.environ.get("RUNNING_IN_DOCKER")
|
||||||
self.docker_in_docker = os.environ.get("WACZ_ENABLE_DOCKER") and os.environ.get("RUNNING_IN_DOCKER")
|
self.docker_in_docker = os.environ.get("WACZ_ENABLE_DOCKER") and os.environ.get("RUNNING_IN_DOCKER")
|
||||||
|
|
||||||
self.crawl_id = random_str(8)
|
self.cwd_dind = f"/crawls/crawls{random_str(8)}"
|
||||||
self.cwd_dind = f"/crawls/crawls{self.crawl_id}"
|
|
||||||
self.browsertrix_home_host = os.environ.get("BROWSERTRIX_HOME_HOST")
|
self.browsertrix_home_host = os.environ.get("BROWSERTRIX_HOME_HOST")
|
||||||
self.browsertrix_home_container = os.environ.get("BROWSERTRIX_HOME_CONTAINER") or self.browsertrix_home_host
|
self.browsertrix_home_container = os.environ.get("BROWSERTRIX_HOME_CONTAINER") or self.browsertrix_home_host
|
||||||
# create crawls folder if not exists, so it can be safely removed in cleanup
|
# create crawls folder if not exists, so it can be safely removed in cleanup
|
||||||
@@ -51,7 +50,8 @@ class WaczExtractorEnricher(Enricher, Extractor):
|
|||||||
|
|
||||||
url = to_enrich.get_url()
|
url = to_enrich.get_url()
|
||||||
|
|
||||||
collection = self.crawl_id
|
crawl_id = random_str(8)
|
||||||
|
collection = crawl_id
|
||||||
browsertrix_home_host = self.browsertrix_home_host or os.path.abspath(self.tmp_dir)
|
browsertrix_home_host = self.browsertrix_home_host or os.path.abspath(self.tmp_dir)
|
||||||
browsertrix_home_container = self.browsertrix_home_container or browsertrix_home_host
|
browsertrix_home_container = self.browsertrix_home_container or browsertrix_home_host
|
||||||
|
|
||||||
@@ -83,8 +83,10 @@ class WaczExtractorEnricher(Enricher, Extractor):
|
|||||||
# "--blockAds" # note: this has been known to cause issues on cloudflare protected sites
|
# "--blockAds" # note: this has been known to cause issues on cloudflare protected sites
|
||||||
]
|
]
|
||||||
|
|
||||||
|
crawl_cwd_dind = os.path.join(self.cwd_dind, crawl_id)
|
||||||
if self.docker_in_docker:
|
if self.docker_in_docker:
|
||||||
cmd.extend(["--cwd", self.cwd_dind])
|
os.makedirs(crawl_cwd_dind, exist_ok=True)
|
||||||
|
cmd.extend(["--cwd", crawl_cwd_dind])
|
||||||
|
|
||||||
if self.auth_for_site(url):
|
if self.auth_for_site(url):
|
||||||
# there's an auth for this site, but browsertrix only supports username/password auth
|
# there's an auth for this site, but browsertrix only supports username/password auth
|
||||||
@@ -109,7 +111,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
|
|||||||
] + cmd
|
] + cmd
|
||||||
|
|
||||||
if self.profile:
|
if self.profile:
|
||||||
profile_file = f"profile-{self.crawl_id}.tar.gz"
|
profile_file = f"profile-{crawl_id}.tar.gz"
|
||||||
profile_fn = os.path.join(browsertrix_home_container, profile_file)
|
profile_fn = os.path.join(browsertrix_home_container, profile_file)
|
||||||
logger.debug(f"Copying {self.profile} to {profile_fn}")
|
logger.debug(f"Copying {self.profile} to {profile_fn}")
|
||||||
shutil.copyfile(self.profile, profile_fn)
|
shutil.copyfile(self.profile, profile_fn)
|
||||||
@@ -137,7 +139,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if self.docker_in_docker:
|
if self.docker_in_docker:
|
||||||
wacz_fn = os.path.join(self.cwd_dind, "collections", collection, f"{collection}.wacz")
|
wacz_fn = os.path.join(crawl_cwd_dind, "collections", collection, f"{collection}.wacz")
|
||||||
elif self.use_docker:
|
elif self.use_docker:
|
||||||
wacz_fn = os.path.join(browsertrix_home_container, "collections", collection, f"{collection}.wacz")
|
wacz_fn = os.path.join(browsertrix_home_container, "collections", collection, f"{collection}.wacz")
|
||||||
else:
|
else:
|
||||||
@@ -152,7 +154,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
|
|||||||
self.extract_media_from_wacz(to_enrich, wacz_fn)
|
self.extract_media_from_wacz(to_enrich, wacz_fn)
|
||||||
|
|
||||||
if self.docker_in_docker:
|
if self.docker_in_docker:
|
||||||
jsonl_fn = os.path.join(self.cwd_dind, "collections", collection, "pages", "pages.jsonl")
|
jsonl_fn = os.path.join(crawl_cwd_dind, "collections", collection, "pages", "pages.jsonl")
|
||||||
elif self.use_docker:
|
elif self.use_docker:
|
||||||
jsonl_fn = os.path.join(browsertrix_home_container, "collections", collection, "pages", "pages.jsonl")
|
jsonl_fn = os.path.join(browsertrix_home_container, "collections", collection, "pages", "pages.jsonl")
|
||||||
else:
|
else:
|
||||||
|
|||||||
1
tests/core/__init__.py
Normal file
1
tests/core/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Core module tests
|
||||||
198
tests/core/test_media.py
Normal file
198
tests/core/test_media.py
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
"""
|
||||||
|
Tests for the Media class from auto_archiver.core.media
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from auto_archiver.core.media import Media
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaBasics:
|
||||||
|
"""Test basic Media properties and methods."""
|
||||||
|
|
||||||
|
def test_media_creation_with_filename(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
assert media.filename == "test.mp4"
|
||||||
|
assert media.urls == []
|
||||||
|
assert media.properties == {}
|
||||||
|
|
||||||
|
def test_media_key_property(self):
|
||||||
|
media = Media(filename="test.mp4", _key="my_key")
|
||||||
|
assert media.key == "my_key"
|
||||||
|
|
||||||
|
def test_media_set_get_properties(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
result = media.set("author", "John Doe")
|
||||||
|
assert result is media # returns self for chaining
|
||||||
|
assert media.get("author") == "John Doe"
|
||||||
|
assert media.get("nonexistent") is None
|
||||||
|
assert media.get("nonexistent", "default") == "default"
|
||||||
|
|
||||||
|
def test_media_add_url(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
media.add_url("https://example.com/test.mp4")
|
||||||
|
assert "https://example.com/test.mp4" in media.urls
|
||||||
|
media.add_url("https://cdn.example.com/test.mp4")
|
||||||
|
assert len(media.urls) == 2
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaMimetype:
|
||||||
|
"""Test mimetype detection and handling."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"filename,expected_mimetype",
|
||||||
|
[
|
||||||
|
("video.mp4", "video/mp4"),
|
||||||
|
("image.jpg", "image/jpeg"),
|
||||||
|
("image.png", "image/png"),
|
||||||
|
("audio.mp3", "audio/mpeg"),
|
||||||
|
("document.pdf", "application/pdf"),
|
||||||
|
("text.txt", "text/plain"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_mimetype_detection(self, filename, expected_mimetype):
|
||||||
|
media = Media(filename=filename)
|
||||||
|
assert media.mimetype == expected_mimetype
|
||||||
|
|
||||||
|
def test_mimetype_setter(self):
|
||||||
|
media = Media(filename="file.unknown")
|
||||||
|
media.mimetype = "custom/type"
|
||||||
|
assert media.mimetype == "custom/type"
|
||||||
|
|
||||||
|
def test_mimetype_empty_filename(self):
|
||||||
|
media = Media(filename="")
|
||||||
|
assert media.mimetype == ""
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaTypeChecks:
|
||||||
|
"""Test media type checking methods."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"filename,is_video,is_audio,is_image",
|
||||||
|
[
|
||||||
|
("video.mp4", True, False, False),
|
||||||
|
("video.avi", True, False, False),
|
||||||
|
("audio.mp3", False, True, False),
|
||||||
|
("audio.wav", False, True, False),
|
||||||
|
("image.jpg", False, False, True),
|
||||||
|
("image.png", False, False, True),
|
||||||
|
("document.pdf", False, False, False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_type_checks(self, filename, is_video, is_audio, is_image):
|
||||||
|
media = Media(filename=filename)
|
||||||
|
assert media.is_video() == is_video
|
||||||
|
assert media.is_audio() == is_audio
|
||||||
|
assert media.is_image() == is_image
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaStore:
|
||||||
|
"""Test media storage functionality."""
|
||||||
|
|
||||||
|
def test_store_with_no_storages(self, caplog):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
metadata = Mock()
|
||||||
|
media.store(metadata, storages=[])
|
||||||
|
assert "No storages found" in caplog.text
|
||||||
|
|
||||||
|
def test_store_with_storage(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
metadata = Mock()
|
||||||
|
mock_storage = Mock()
|
||||||
|
media.store(metadata, url="https://example.com", storages=[mock_storage])
|
||||||
|
mock_storage.store.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaInnerMedia:
|
||||||
|
"""Test nested media retrieval."""
|
||||||
|
|
||||||
|
def test_all_inner_media_no_nested(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
inner = list(media.all_inner_media(include_self=False))
|
||||||
|
assert len(inner) == 0
|
||||||
|
|
||||||
|
inner_with_self = list(media.all_inner_media(include_self=True))
|
||||||
|
assert len(inner_with_self) == 1
|
||||||
|
assert inner_with_self[0] is media
|
||||||
|
|
||||||
|
def test_all_inner_media_with_nested(self):
|
||||||
|
parent = Media(filename="parent.mp4")
|
||||||
|
child = Media(filename="child.jpg")
|
||||||
|
grandchild = Media(filename="grandchild.png")
|
||||||
|
|
||||||
|
child.set("thumbnail", grandchild)
|
||||||
|
parent.set("preview", child)
|
||||||
|
|
||||||
|
inner = list(parent.all_inner_media(include_self=False))
|
||||||
|
assert len(inner) == 2
|
||||||
|
assert child in inner
|
||||||
|
assert grandchild in inner
|
||||||
|
|
||||||
|
def test_all_inner_media_with_list_property(self):
|
||||||
|
parent = Media(filename="parent.mp4")
|
||||||
|
child1 = Media(filename="frame1.jpg")
|
||||||
|
child2 = Media(filename="frame2.jpg")
|
||||||
|
|
||||||
|
parent.set("frames", [child1, child2])
|
||||||
|
|
||||||
|
inner = list(parent.all_inner_media(include_self=False))
|
||||||
|
assert len(inner) == 2
|
||||||
|
assert child1 in inner
|
||||||
|
assert child2 in inner
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaIsStored:
|
||||||
|
"""Test the is_stored method."""
|
||||||
|
|
||||||
|
def test_is_stored_no_urls(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
storage = Mock()
|
||||||
|
storage.config = {"steps": {"storages": ["s3", "local"]}}
|
||||||
|
assert media.is_stored(storage) is False
|
||||||
|
|
||||||
|
def test_is_stored_partial_urls(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
media.add_url("https://s3.example.com/test.mp4")
|
||||||
|
storage = Mock()
|
||||||
|
storage.config = {"steps": {"storages": ["s3", "local"]}}
|
||||||
|
assert media.is_stored(storage) is False
|
||||||
|
|
||||||
|
def test_is_stored_full_urls(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
media.add_url("https://s3.example.com/test.mp4")
|
||||||
|
media.add_url("file:///local/test.mp4")
|
||||||
|
storage = Mock()
|
||||||
|
storage.config = {"steps": {"storages": ["s3", "local"]}}
|
||||||
|
assert media.is_stored(storage) is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaValidVideo:
|
||||||
|
"""Test video validation functionality."""
|
||||||
|
|
||||||
|
def test_is_valid_video_with_valid_probe(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
|
||||||
|
mock_streams = {"streams": [{"duration_ts": 1000}]}
|
||||||
|
|
||||||
|
with patch("ffmpeg.probe", return_value=mock_streams):
|
||||||
|
assert media.is_valid_video() is True
|
||||||
|
|
||||||
|
def test_is_valid_video_with_no_duration(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
|
||||||
|
mock_streams = {"streams": [{"duration_ts": 0}]}
|
||||||
|
|
||||||
|
with patch("ffmpeg.probe", return_value=mock_streams):
|
||||||
|
assert media.is_valid_video() is False
|
||||||
|
|
||||||
|
def test_is_valid_video_with_ffmpeg_error(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
|
||||||
|
with patch("ffmpeg.probe", side_effect=Exception("ffmpeg error")):
|
||||||
|
with patch("os.path.getsize", return_value=100):
|
||||||
|
# Falls back to file size check, small file
|
||||||
|
assert media.is_valid_video() is False
|
||||||
|
|
||||||
|
with patch("os.path.getsize", return_value=30000):
|
||||||
|
# Falls back to file size check, larger file
|
||||||
|
assert media.is_valid_video() is True
|
||||||
98
tests/core/test_validators.py
Normal file
98
tests/core/test_validators.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
"""
|
||||||
|
Tests for validators module from auto_archiver.core.validators
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from auto_archiver.core.validators import positive_number, valid_file, json_loader
|
||||||
|
|
||||||
|
|
||||||
|
class TestPositiveNumber:
|
||||||
|
"""Test the positive_number validator."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"value,expected",
|
||||||
|
[
|
||||||
|
(0, 0),
|
||||||
|
(1, 1),
|
||||||
|
(100, 100),
|
||||||
|
(0.5, 0.5),
|
||||||
|
(999999, 999999),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_positive_values(self, value, expected):
|
||||||
|
assert positive_number(value) == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"value",
|
||||||
|
[
|
||||||
|
-1,
|
||||||
|
-100,
|
||||||
|
-0.5,
|
||||||
|
-999999,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_negative_values_raise_error(self, value):
|
||||||
|
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
|
||||||
|
positive_number(value)
|
||||||
|
assert "not a positive number" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidFile:
|
||||||
|
"""Test the valid_file validator."""
|
||||||
|
|
||||||
|
def test_valid_file_exists(self, tmp_path):
|
||||||
|
test_file = tmp_path / "test.txt"
|
||||||
|
test_file.write_text("test content")
|
||||||
|
result = valid_file(str(test_file))
|
||||||
|
assert result == str(test_file)
|
||||||
|
|
||||||
|
def test_valid_file_not_exists(self):
|
||||||
|
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
|
||||||
|
valid_file("/nonexistent/path/to/file.txt")
|
||||||
|
assert "does not exist" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_valid_file_directory_not_file(self, tmp_path):
|
||||||
|
# A directory is not a file
|
||||||
|
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
|
||||||
|
valid_file(str(tmp_path))
|
||||||
|
assert "does not exist" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestJsonLoader:
|
||||||
|
"""Test the json_loader validator."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"json_str,expected",
|
||||||
|
[
|
||||||
|
('{"key": "value"}', {"key": "value"}),
|
||||||
|
('{"number": 123}', {"number": 123}),
|
||||||
|
('{"list": [1, 2, 3]}', {"list": [1, 2, 3]}),
|
||||||
|
('{"nested": {"inner": "value"}}', {"nested": {"inner": "value"}}),
|
||||||
|
("[]", []),
|
||||||
|
("[1, 2, 3]", [1, 2, 3]),
|
||||||
|
('"string"', "string"),
|
||||||
|
("123", 123),
|
||||||
|
("true", True),
|
||||||
|
("false", False),
|
||||||
|
("null", None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_valid_json(self, json_str, expected):
|
||||||
|
assert json_loader(json_str) == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"invalid_json",
|
||||||
|
[
|
||||||
|
"{invalid}",
|
||||||
|
"{'single': 'quotes'}",
|
||||||
|
"{missing: quotes}",
|
||||||
|
'{"unclosed": "brace"',
|
||||||
|
"",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_invalid_json_raises_error(self, invalid_json):
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json_loader(invalid_json)
|
||||||
62
tests/databases/test_console_db.py
Normal file
62
tests/databases/test_console_db.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""
|
||||||
|
Tests for the ConsoleDb module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def console_db(setup_module):
|
||||||
|
return setup_module("console_db")
|
||||||
|
|
||||||
|
|
||||||
|
class TestConsoleDb:
|
||||||
|
"""Test the ConsoleDb functionality."""
|
||||||
|
|
||||||
|
def test_started_logs_info(self, console_db, make_item, caplog):
|
||||||
|
"""Test that started() logs an info message."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
|
||||||
|
with caplog.at_level("INFO"):
|
||||||
|
console_db.started(item)
|
||||||
|
|
||||||
|
assert "STARTED" in caplog.text
|
||||||
|
assert "example.com" in caplog.text
|
||||||
|
|
||||||
|
def test_failed_logs_error(self, console_db, make_item, caplog):
|
||||||
|
"""Test that failed() logs an error message with reason."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
reason = "Connection timeout"
|
||||||
|
|
||||||
|
with caplog.at_level("ERROR"):
|
||||||
|
console_db.failed(item, reason)
|
||||||
|
|
||||||
|
assert "FAILED" in caplog.text
|
||||||
|
assert "Connection timeout" in caplog.text
|
||||||
|
|
||||||
|
def test_aborted_logs_warning(self, console_db, make_item, caplog):
|
||||||
|
"""Test that aborted() logs a warning message."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
|
||||||
|
with caplog.at_level("WARNING"):
|
||||||
|
console_db.aborted(item)
|
||||||
|
|
||||||
|
assert "ABORTED" in caplog.text
|
||||||
|
|
||||||
|
def test_done_logs_success(self, console_db, make_item, caplog):
|
||||||
|
"""Test that done() logs a success message."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
|
||||||
|
with caplog.at_level("INFO"):
|
||||||
|
console_db.done(item)
|
||||||
|
|
||||||
|
assert "DONE" in caplog.text
|
||||||
|
|
||||||
|
def test_done_cached(self, console_db, make_item, caplog):
|
||||||
|
"""Test done() with cached=True (should behave the same)."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
|
||||||
|
with caplog.at_level("INFO"):
|
||||||
|
console_db.done(item, cached=True)
|
||||||
|
|
||||||
|
assert "DONE" in caplog.text
|
||||||
72
tests/enrichers/test_json_enricher.py
Normal file
72
tests/enrichers/test_json_enricher.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"""
|
||||||
|
Tests for the JsonEnricher module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def json_enricher(setup_module):
|
||||||
|
return setup_module("json_enricher")
|
||||||
|
|
||||||
|
|
||||||
|
class TestJsonEnricher:
|
||||||
|
"""Test the JsonEnricher functionality."""
|
||||||
|
|
||||||
|
def test_enrich_creates_json_file(self, json_enricher, make_item):
|
||||||
|
"""Test that enrich creates a metadata.json file."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
item.set("title", "Test Title")
|
||||||
|
item.set("description", "Test description")
|
||||||
|
|
||||||
|
json_enricher.enrich(item)
|
||||||
|
|
||||||
|
# Check that a media with id 'metadata_json' was added
|
||||||
|
json_media = item.get_media_by_id("metadata_json")
|
||||||
|
assert json_media is not None
|
||||||
|
assert json_media.filename.endswith("metadata.json")
|
||||||
|
assert os.path.exists(json_media.filename)
|
||||||
|
|
||||||
|
def test_enrich_json_content(self, json_enricher, make_item):
|
||||||
|
"""Test that the JSON content is correct."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
item.set("title", "Test Title")
|
||||||
|
item.set("custom_field", "custom_value")
|
||||||
|
|
||||||
|
json_enricher.enrich(item)
|
||||||
|
|
||||||
|
json_media = item.get_media_by_id("metadata_json")
|
||||||
|
with open(json_media.filename, "r", encoding="utf-8") as f:
|
||||||
|
content = json.load(f)
|
||||||
|
|
||||||
|
# The to_dict() returns nested structure: {status, metadata: {...}, media: [...]}
|
||||||
|
assert content["metadata"]["title"] == "Test Title"
|
||||||
|
assert content["metadata"]["custom_field"] == "custom_value"
|
||||||
|
assert content["metadata"]["url"] == "https://example.com/test"
|
||||||
|
|
||||||
|
def test_enrich_handles_special_characters(self, json_enricher, make_item):
|
||||||
|
"""Test that special characters are handled correctly."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
item.set("title", "Test with émojis 🎉 and üñíçödé")
|
||||||
|
|
||||||
|
json_enricher.enrich(item)
|
||||||
|
|
||||||
|
json_media = item.get_media_by_id("metadata_json")
|
||||||
|
with open(json_media.filename, "r", encoding="utf-8") as f:
|
||||||
|
content = json.load(f)
|
||||||
|
|
||||||
|
# Access the nested metadata structure
|
||||||
|
assert "émojis 🎉" in content["metadata"]["title"]
|
||||||
|
assert "üñíçödé" in content["metadata"]["title"]
|
||||||
|
|
||||||
|
def test_enrich_empty_metadata(self, json_enricher, make_item):
|
||||||
|
"""Test enriching metadata with minimal content."""
|
||||||
|
item = make_item("https://example.com/minimal")
|
||||||
|
|
||||||
|
json_enricher.enrich(item)
|
||||||
|
|
||||||
|
json_media = item.get_media_by_id("metadata_json")
|
||||||
|
assert json_media is not None
|
||||||
|
assert os.path.exists(json_media.filename)
|
||||||
70
tests/feeders/test_cli_feeder.py
Normal file
70
tests/feeders/test_cli_feeder.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""
|
||||||
|
Tests for the CLIFeeder module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from auto_archiver.modules.cli_feeder.cli_feeder import CLIFeeder
|
||||||
|
from auto_archiver.core.consts import SetupError
|
||||||
|
from auto_archiver.core.metadata import Metadata
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def cli_feeder_instance():
|
||||||
|
"""Create a CLIFeeder instance with mocked config."""
|
||||||
|
|
||||||
|
def _create(urls):
|
||||||
|
feeder = CLIFeeder()
|
||||||
|
# Mock the config structure that cli_feeder expects
|
||||||
|
feeder.config = {"urls": urls}
|
||||||
|
feeder.name = "cli_feeder"
|
||||||
|
feeder.tmp_dir = "/tmp"
|
||||||
|
return feeder
|
||||||
|
|
||||||
|
return _create
|
||||||
|
|
||||||
|
|
||||||
|
class TestCLIFeeder:
|
||||||
|
"""Test the CLIFeeder functionality."""
|
||||||
|
|
||||||
|
def test_iter_yields_metadata_for_urls(self, cli_feeder_instance):
|
||||||
|
"""Test that iteration yields Metadata objects for each URL."""
|
||||||
|
urls = ["https://example.com/1", "https://example.com/2", "https://example.com/3"]
|
||||||
|
feeder = cli_feeder_instance(urls)
|
||||||
|
feeder.setup()
|
||||||
|
|
||||||
|
items = list(feeder)
|
||||||
|
|
||||||
|
assert len(items) == 3
|
||||||
|
assert all(isinstance(item, Metadata) for item in items)
|
||||||
|
assert items[0].get_url() == "https://example.com/1"
|
||||||
|
assert items[1].get_url() == "https://example.com/2"
|
||||||
|
assert items[2].get_url() == "https://example.com/3"
|
||||||
|
|
||||||
|
def test_iter_single_url(self, cli_feeder_instance):
|
||||||
|
"""Test iteration with a single URL."""
|
||||||
|
feeder = cli_feeder_instance(["https://example.com/single"])
|
||||||
|
feeder.setup()
|
||||||
|
|
||||||
|
items = list(feeder)
|
||||||
|
|
||||||
|
assert len(items) == 1
|
||||||
|
assert items[0].get_url() == "https://example.com/single"
|
||||||
|
|
||||||
|
def test_setup_raises_without_urls(self, cli_feeder_instance):
|
||||||
|
"""Test that setup raises SetupError when no URLs provided."""
|
||||||
|
feeder = cli_feeder_instance([])
|
||||||
|
|
||||||
|
with pytest.raises(SetupError) as exc_info:
|
||||||
|
feeder.setup()
|
||||||
|
|
||||||
|
assert "No URLs provided" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_setup_raises_with_none_urls(self, cli_feeder_instance):
|
||||||
|
"""Test that setup raises SetupError when urls is None."""
|
||||||
|
feeder = cli_feeder_instance(None)
|
||||||
|
|
||||||
|
with pytest.raises(SetupError) as exc_info:
|
||||||
|
feeder.setup()
|
||||||
|
|
||||||
|
assert "No URLs provided" in str(exc_info.value)
|
||||||
43
tests/formatters/test_mute_formatter.py
Normal file
43
tests/formatters/test_mute_formatter.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
"""
|
||||||
|
Tests for the MuteFormatter module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from auto_archiver.core.metadata import Metadata
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mute_formatter(setup_module):
|
||||||
|
return setup_module("mute_formatter")
|
||||||
|
|
||||||
|
|
||||||
|
class TestMuteFormatter:
|
||||||
|
"""Test the MuteFormatter functionality."""
|
||||||
|
|
||||||
|
def test_format_returns_none(self, mute_formatter, make_item):
|
||||||
|
"""Test that format always returns None (mutes output)."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
item.set("title", "Test Title")
|
||||||
|
|
||||||
|
result = mute_formatter.format(item)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_format_with_empty_metadata(self, mute_formatter):
|
||||||
|
"""Test format with empty metadata."""
|
||||||
|
item = Metadata().set_url("https://example.com/empty")
|
||||||
|
|
||||||
|
result = mute_formatter.format(item)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_format_with_media(self, mute_formatter, make_item):
|
||||||
|
"""Test that format still returns None even with media attached."""
|
||||||
|
from auto_archiver.core.media import Media
|
||||||
|
|
||||||
|
item = make_item("https://example.com/with-media")
|
||||||
|
item.add_media(Media(filename="test.mp4"))
|
||||||
|
|
||||||
|
result = mute_formatter.format(item)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
Reference in New Issue
Block a user