Merge branch 'dev' into feat/nitter-alternative

This commit is contained in:
msramalho
2026-03-02 12:16:22 +00:00
12 changed files with 593 additions and 45 deletions

View File

@@ -24,7 +24,7 @@ jobs:
- name: Install Python
uses: actions/setup-python@v6
with:
python-version: "3.11"
python-version: "3.12"
- name: Install dependencies
run: |
python -m pip install --upgrade pip

70
poetry.lock generated
View File

@@ -755,23 +755,25 @@ files = [
[[package]]
name = "curl-cffi"
version = "0.13.0"
version = "0.14.0"
description = "libcurl ffi bindings for Python, with impersonation support."
optional = false
python-versions = ">=3.9"
python-versions = ">=3.10"
groups = ["main"]
markers = "implementation_name == \"cpython\""
files = [
{file = "curl_cffi-0.13.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:434cadbe8df2f08b2fc2c16dff2779fb40b984af99c06aa700af898e185bb9db"},
{file = "curl_cffi-0.13.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:59afa877a9ae09efa04646a7d068eeea48915a95d9add0a29854e7781679fcd7"},
{file = "curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d06ed389e45a7ca97b17c275dbedd3d6524560270e675c720e93a2018a766076"},
{file = "curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4e0de45ab3b7a835c72bd53640c2347415111b43421b5c7a1a0b18deae2e541"},
{file = "curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8eb4083371bbb94e9470d782de235fb5268bf43520de020c9e5e6be8f395443f"},
{file = "curl_cffi-0.13.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:28911b526e8cd4aa0e5e38401bfe6887e8093907272f1f67ca22e6beb2933a51"},
{file = "curl_cffi-0.13.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d433ffcb455ab01dd0d7bde47109083aa38b59863aa183d29c668ae4c96bf8e"},
{file = "curl_cffi-0.13.0-cp39-abi3-win_amd64.whl", hash = "sha256:66a6b75ce971de9af64f1b6812e275f60b88880577bac47ef1fa19694fa21cd3"},
{file = "curl_cffi-0.13.0-cp39-abi3-win_arm64.whl", hash = "sha256:d438a3b45244e874794bc4081dc1e356d2bb926dcc7021e5a8fef2e2105ef1d8"},
{file = "curl_cffi-0.13.0.tar.gz", hash = "sha256:62ecd90a382bd5023750e3606e0aa7cb1a3a8ba41c14270b8e5e149ebf72c5ca"},
{file = "curl_cffi-0.14.0-cp39-abi3-macosx_14_0_arm64.whl", hash = "sha256:e35e89c6a69872f9749d6d5fda642ed4fc159619329e99d577d0104c9aad5893"},
{file = "curl_cffi-0.14.0-cp39-abi3-macosx_15_0_x86_64.whl", hash = "sha256:5945478cd28ad7dfb5c54473bcfb6743ee1d66554d57951fdf8fc0e7d8cf4e45"},
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c42e8fa3c667db9ccd2e696ee47adcd3cd5b0838d7282f3fc45f6c0ef3cfdfa7"},
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:060fe2c99c41d3cb7f894de318ddf4b0301b08dca70453d769bd4e74b36b8483"},
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b158c41a25388690dd0d40b5bc38d1e0f512135f17fdb8029868cbc1993d2e5b"},
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_28_i686.whl", hash = "sha256:1439fbef3500fb723333c826adf0efb0e2e5065a703fb5eccce637a2250db34a"},
{file = "curl_cffi-0.14.0-cp39-abi3-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e7176f2c2d22b542e3cf261072a81deb018cfa7688930f95dddef215caddb469"},
{file = "curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:03f21ade2d72978c2bb8670e9b6de5260e2755092b02d94b70b906813662998d"},
{file = "curl_cffi-0.14.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:58ebf02de64ee5c95613209ddacb014c2d2f86298d7080c0a1c12ed876ee0690"},
{file = "curl_cffi-0.14.0-cp39-abi3-win_amd64.whl", hash = "sha256:6e503f9a103f6ae7acfb3890c843b53ec030785a22ae7682a22cc43afb94123e"},
{file = "curl_cffi-0.14.0-cp39-abi3-win_arm64.whl", hash = "sha256:2eed50a969201605c863c4c31269dfc3e0da52916086ac54553cfa353022425c"},
{file = "curl_cffi-0.14.0.tar.gz", hash = "sha256:5ffbc82e59f05008ec08ea432f0e535418823cda44178ee518906a54f27a5f0f"},
]
[package.dependencies]
@@ -780,9 +782,9 @@ cffi = ">=1.12.0"
[package.extras]
build = ["cibuildwheel", "wheel"]
dev = ["charset_normalizer (>=3.3.2,<4.0)", "coverage (>=6.4.1,<7.0)", "cryptography (>=42.0.5,<43.0)", "httpx (==0.23.1)", "mypy (>=1.9.0,<2.0)", "pytest (>=8.1.1,<9.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-trio (>=0.8.0,<1.0)", "ruff (>=0.3.5,<1.0)", "trio (>=0.25.0,<1.0)", "trustme (>=1.1.0,<2.0)", "typing_extensions", "uvicorn (>=0.29.0,<1.0)", "websockets (>=12.0,<13.0)"]
dev = ["charset_normalizer (>=3.3.2,<4.0)", "coverage (>=6.4.1,<7.0)", "cryptography (>=42.0.5,<43.0)", "httpx (==0.23.1)", "mypy (>=1.9.0,<2.0)", "pytest (>=8.1.1,<9.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-trio (>=0.8.0,<1.0)", "ruff (>=0.3.5,<1.0)", "trio (>=0.25.0,<1.0)", "trustme (>=1.1.0,<2.0)", "typing_extensions", "uvicorn (>=0.29.0,<1.0)", "websockets (>=14.0)"]
extra = ["lxml_html_clean", "markdownify (>=1.1.0)", "readability-lxml (>=0.8.1)"]
test = ["charset_normalizer (>=3.3.2,<4.0)", "cryptography (>=42.0.5,<43.0)", "fastapi (==0.110.0)", "httpx (==0.23.1)", "proxy.py (>=2.4.3,<3.0)", "pytest (>=8.1.1,<9.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-trio (>=0.8.0,<1.0)", "python-multipart (>=0.0.9,<1.0)", "trio (>=0.25.0,<1.0)", "trustme (>=1.1.0,<2.0)", "typing_extensions", "uvicorn (>=0.29.0,<1.0)", "websockets (>=12.0,<13.0)"]
test = ["charset_normalizer (>=3.3.2,<4.0)", "cryptography (>=42.0.5,<43.0)", "fastapi (>=0.110.0,<1.0)", "httpx (==0.23.1)", "proxy.py (>=2.4.3,<3.0)", "pytest (>=8.1.1,<9.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-trio (>=0.8.0,<1.0)", "python-multipart (>=0.0.9,<1.0)", "trio (>=0.25.0,<1.0)", "trustme (>=1.1.0,<2.0)", "typing_extensions", "uvicorn (>=0.29.0,<1.0)", "websockets (>=14.0)"]
[[package]]
name = "dataclasses-json"
@@ -3108,30 +3110,30 @@ files = [
[[package]]
name = "ruff"
version = "0.9.10"
version = "0.15.2"
description = "An extremely fast Python linter and code formatter, written in Rust."
optional = false
python-versions = ">=3.7"
groups = ["dev"]
files = [
{file = "ruff-0.9.10-py3-none-linux_armv6l.whl", hash = "sha256:eb4d25532cfd9fe461acc83498361ec2e2252795b4f40b17e80692814329e42d"},
{file = "ruff-0.9.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:188a6638dab1aa9bb6228a7302387b2c9954e455fb25d6b4470cb0641d16759d"},
{file = "ruff-0.9.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5284dcac6b9dbc2fcb71fdfc26a217b2ca4ede6ccd57476f52a587451ebe450d"},
{file = "ruff-0.9.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47678f39fa2a3da62724851107f438c8229a3470f533894b5568a39b40029c0c"},
{file = "ruff-0.9.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99713a6e2766b7a17147b309e8c915b32b07a25c9efd12ada79f217c9c778b3e"},
{file = "ruff-0.9.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524ee184d92f7c7304aa568e2db20f50c32d1d0caa235d8ddf10497566ea1a12"},
{file = "ruff-0.9.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df92aeac30af821f9acf819fc01b4afc3dfb829d2782884f8739fb52a8119a16"},
{file = "ruff-0.9.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de42e4edc296f520bb84954eb992a07a0ec5a02fecb834498415908469854a52"},
{file = "ruff-0.9.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d257f95b65806104b6b1ffca0ea53f4ef98454036df65b1eda3693534813ecd1"},
{file = "ruff-0.9.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60dec7201c0b10d6d11be00e8f2dbb6f40ef1828ee75ed739923799513db24c"},
{file = "ruff-0.9.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d838b60007da7a39c046fcdd317293d10b845001f38bcb55ba766c3875b01e43"},
{file = "ruff-0.9.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ccaf903108b899beb8e09a63ffae5869057ab649c1e9231c05ae354ebc62066c"},
{file = "ruff-0.9.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9567d135265d46e59d62dc60c0bfad10e9a6822e231f5b24032dba5a55be6b5"},
{file = "ruff-0.9.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f202f0d93738c28a89f8ed9eaba01b7be339e5d8d642c994347eaa81c6d75b8"},
{file = "ruff-0.9.10-py3-none-win32.whl", hash = "sha256:bfb834e87c916521ce46b1788fbb8484966e5113c02df216680102e9eb960029"},
{file = "ruff-0.9.10-py3-none-win_amd64.whl", hash = "sha256:f2160eeef3031bf4b17df74e307d4c5fb689a6f3a26a2de3f7ef4044e3c484f1"},
{file = "ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69"},
{file = "ruff-0.9.10.tar.gz", hash = "sha256:9bacb735d7bada9cfb0f2c227d3658fc443d90a727b47f206fb33f52f3c0eac7"},
{file = "ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d"},
{file = "ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e"},
{file = "ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87"},
{file = "ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9"},
{file = "ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80"},
{file = "ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f"},
{file = "ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77"},
{file = "ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea"},
{file = "ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a"},
{file = "ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956"},
{file = "ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4"},
{file = "ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de"},
{file = "ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c"},
{file = "ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8"},
{file = "ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f"},
{file = "ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5"},
{file = "ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e"},
{file = "ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342"},
]
[[package]]
@@ -4313,4 +4315,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.13"
content-hash = "e4fd9a44b9541f610e862ba51410092473a587a097c7a5a3e3b197ee6b30e4d4"
content-hash = "fd273f9a20a34e64849a61cb654a7eb75dfa9499375eb8efbdcc953f9a9f6f95"

View File

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[project]
name = "auto-archiver"
version = "1.2.1"
version = "1.2.2"
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
requires-python = ">=3.10,<3.13"
@@ -66,7 +66,7 @@ pytest = "^8.3.4"
autopep8 = "^2.3.1"
pytest-loguru = "^0.4.0"
pytest-mock = "^3.14.0"
ruff = "^0.9.10"
ruff = "^0.15.2"
pre-commit = "^4.1.0"
[tool.poetry.group.docs.dependencies]

View File

@@ -34,7 +34,7 @@ def _extract_metadata(self, webpage, video_id):
...,
"attachments",
...,
lambda k, v: (k == "media" and str(v["id"]) == video_id and v["__typename"] == "Video"),
lambda k, v: k == "media" and str(v["id"]) == video_id and v["__typename"] == "Video",
),
expected_type=dict,
)

View File

@@ -24,8 +24,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
self.use_docker = os.environ.get("WACZ_ENABLE_DOCKER") or not os.environ.get("RUNNING_IN_DOCKER")
self.docker_in_docker = os.environ.get("WACZ_ENABLE_DOCKER") and os.environ.get("RUNNING_IN_DOCKER")
self.crawl_id = random_str(8)
self.cwd_dind = f"/crawls/crawls{self.crawl_id}"
self.cwd_dind = f"/crawls/crawls{random_str(8)}"
self.browsertrix_home_host = os.environ.get("BROWSERTRIX_HOME_HOST")
self.browsertrix_home_container = os.environ.get("BROWSERTRIX_HOME_CONTAINER") or self.browsertrix_home_host
# create crawls folder if not exists, so it can be safely removed in cleanup
@@ -51,7 +50,8 @@ class WaczExtractorEnricher(Enricher, Extractor):
url = to_enrich.get_url()
collection = self.crawl_id
crawl_id = random_str(8)
collection = crawl_id
browsertrix_home_host = self.browsertrix_home_host or os.path.abspath(self.tmp_dir)
browsertrix_home_container = self.browsertrix_home_container or browsertrix_home_host
@@ -83,8 +83,10 @@ class WaczExtractorEnricher(Enricher, Extractor):
# "--blockAds" # note: this has been known to cause issues on cloudflare protected sites
]
crawl_cwd_dind = os.path.join(self.cwd_dind, crawl_id)
if self.docker_in_docker:
cmd.extend(["--cwd", self.cwd_dind])
os.makedirs(crawl_cwd_dind, exist_ok=True)
cmd.extend(["--cwd", crawl_cwd_dind])
if self.auth_for_site(url):
# there's an auth for this site, but browsertrix only supports username/password auth
@@ -109,7 +111,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
] + cmd
if self.profile:
profile_file = f"profile-{self.crawl_id}.tar.gz"
profile_file = f"profile-{crawl_id}.tar.gz"
profile_fn = os.path.join(browsertrix_home_container, profile_file)
logger.debug(f"Copying {self.profile} to {profile_fn}")
shutil.copyfile(self.profile, profile_fn)
@@ -137,7 +139,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
return False
if self.docker_in_docker:
wacz_fn = os.path.join(self.cwd_dind, "collections", collection, f"{collection}.wacz")
wacz_fn = os.path.join(crawl_cwd_dind, "collections", collection, f"{collection}.wacz")
elif self.use_docker:
wacz_fn = os.path.join(browsertrix_home_container, "collections", collection, f"{collection}.wacz")
else:
@@ -152,7 +154,7 @@ class WaczExtractorEnricher(Enricher, Extractor):
self.extract_media_from_wacz(to_enrich, wacz_fn)
if self.docker_in_docker:
jsonl_fn = os.path.join(self.cwd_dind, "collections", collection, "pages", "pages.jsonl")
jsonl_fn = os.path.join(crawl_cwd_dind, "collections", collection, "pages", "pages.jsonl")
elif self.use_docker:
jsonl_fn = os.path.join(browsertrix_home_container, "collections", collection, "pages", "pages.jsonl")
else:

1
tests/core/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Core module tests

198
tests/core/test_media.py Normal file
View File

@@ -0,0 +1,198 @@
"""
Tests for the Media class from auto_archiver.core.media
"""
import pytest
from unittest.mock import Mock, patch
from auto_archiver.core.media import Media
class TestMediaBasics:
"""Test basic Media properties and methods."""
def test_media_creation_with_filename(self):
media = Media(filename="test.mp4")
assert media.filename == "test.mp4"
assert media.urls == []
assert media.properties == {}
def test_media_key_property(self):
media = Media(filename="test.mp4", _key="my_key")
assert media.key == "my_key"
def test_media_set_get_properties(self):
media = Media(filename="test.mp4")
result = media.set("author", "John Doe")
assert result is media # returns self for chaining
assert media.get("author") == "John Doe"
assert media.get("nonexistent") is None
assert media.get("nonexistent", "default") == "default"
def test_media_add_url(self):
media = Media(filename="test.mp4")
media.add_url("https://example.com/test.mp4")
assert "https://example.com/test.mp4" in media.urls
media.add_url("https://cdn.example.com/test.mp4")
assert len(media.urls) == 2
class TestMediaMimetype:
"""Test mimetype detection and handling."""
@pytest.mark.parametrize(
"filename,expected_mimetype",
[
("video.mp4", "video/mp4"),
("image.jpg", "image/jpeg"),
("image.png", "image/png"),
("audio.mp3", "audio/mpeg"),
("document.pdf", "application/pdf"),
("text.txt", "text/plain"),
],
)
def test_mimetype_detection(self, filename, expected_mimetype):
media = Media(filename=filename)
assert media.mimetype == expected_mimetype
def test_mimetype_setter(self):
media = Media(filename="file.unknown")
media.mimetype = "custom/type"
assert media.mimetype == "custom/type"
def test_mimetype_empty_filename(self):
media = Media(filename="")
assert media.mimetype == ""
class TestMediaTypeChecks:
"""Test media type checking methods."""
@pytest.mark.parametrize(
"filename,is_video,is_audio,is_image",
[
("video.mp4", True, False, False),
("video.avi", True, False, False),
("audio.mp3", False, True, False),
("audio.wav", False, True, False),
("image.jpg", False, False, True),
("image.png", False, False, True),
("document.pdf", False, False, False),
],
)
def test_type_checks(self, filename, is_video, is_audio, is_image):
media = Media(filename=filename)
assert media.is_video() == is_video
assert media.is_audio() == is_audio
assert media.is_image() == is_image
class TestMediaStore:
"""Test media storage functionality."""
def test_store_with_no_storages(self, caplog):
media = Media(filename="test.mp4")
metadata = Mock()
media.store(metadata, storages=[])
assert "No storages found" in caplog.text
def test_store_with_storage(self):
media = Media(filename="test.mp4")
metadata = Mock()
mock_storage = Mock()
media.store(metadata, url="https://example.com", storages=[mock_storage])
mock_storage.store.assert_called_once()
class TestMediaInnerMedia:
"""Test nested media retrieval."""
def test_all_inner_media_no_nested(self):
media = Media(filename="test.mp4")
inner = list(media.all_inner_media(include_self=False))
assert len(inner) == 0
inner_with_self = list(media.all_inner_media(include_self=True))
assert len(inner_with_self) == 1
assert inner_with_self[0] is media
def test_all_inner_media_with_nested(self):
parent = Media(filename="parent.mp4")
child = Media(filename="child.jpg")
grandchild = Media(filename="grandchild.png")
child.set("thumbnail", grandchild)
parent.set("preview", child)
inner = list(parent.all_inner_media(include_self=False))
assert len(inner) == 2
assert child in inner
assert grandchild in inner
def test_all_inner_media_with_list_property(self):
parent = Media(filename="parent.mp4")
child1 = Media(filename="frame1.jpg")
child2 = Media(filename="frame2.jpg")
parent.set("frames", [child1, child2])
inner = list(parent.all_inner_media(include_self=False))
assert len(inner) == 2
assert child1 in inner
assert child2 in inner
class TestMediaIsStored:
"""Test the is_stored method."""
def test_is_stored_no_urls(self):
media = Media(filename="test.mp4")
storage = Mock()
storage.config = {"steps": {"storages": ["s3", "local"]}}
assert media.is_stored(storage) is False
def test_is_stored_partial_urls(self):
media = Media(filename="test.mp4")
media.add_url("https://s3.example.com/test.mp4")
storage = Mock()
storage.config = {"steps": {"storages": ["s3", "local"]}}
assert media.is_stored(storage) is False
def test_is_stored_full_urls(self):
media = Media(filename="test.mp4")
media.add_url("https://s3.example.com/test.mp4")
media.add_url("file:///local/test.mp4")
storage = Mock()
storage.config = {"steps": {"storages": ["s3", "local"]}}
assert media.is_stored(storage) is True
class TestMediaValidVideo:
"""Test video validation functionality."""
def test_is_valid_video_with_valid_probe(self):
media = Media(filename="test.mp4")
mock_streams = {"streams": [{"duration_ts": 1000}]}
with patch("ffmpeg.probe", return_value=mock_streams):
assert media.is_valid_video() is True
def test_is_valid_video_with_no_duration(self):
media = Media(filename="test.mp4")
mock_streams = {"streams": [{"duration_ts": 0}]}
with patch("ffmpeg.probe", return_value=mock_streams):
assert media.is_valid_video() is False
def test_is_valid_video_with_ffmpeg_error(self):
media = Media(filename="test.mp4")
with patch("ffmpeg.probe", side_effect=Exception("ffmpeg error")):
with patch("os.path.getsize", return_value=100):
# Falls back to file size check, small file
assert media.is_valid_video() is False
with patch("os.path.getsize", return_value=30000):
# Falls back to file size check, larger file
assert media.is_valid_video() is True

View File

@@ -0,0 +1,98 @@
"""
Tests for validators module from auto_archiver.core.validators
"""
import argparse
import json
import pytest
from auto_archiver.core.validators import positive_number, valid_file, json_loader
class TestPositiveNumber:
"""Test the positive_number validator."""
@pytest.mark.parametrize(
"value,expected",
[
(0, 0),
(1, 1),
(100, 100),
(0.5, 0.5),
(999999, 999999),
],
)
def test_positive_values(self, value, expected):
assert positive_number(value) == expected
@pytest.mark.parametrize(
"value",
[
-1,
-100,
-0.5,
-999999,
],
)
def test_negative_values_raise_error(self, value):
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
positive_number(value)
assert "not a positive number" in str(exc_info.value)
class TestValidFile:
"""Test the valid_file validator."""
def test_valid_file_exists(self, tmp_path):
test_file = tmp_path / "test.txt"
test_file.write_text("test content")
result = valid_file(str(test_file))
assert result == str(test_file)
def test_valid_file_not_exists(self):
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
valid_file("/nonexistent/path/to/file.txt")
assert "does not exist" in str(exc_info.value)
def test_valid_file_directory_not_file(self, tmp_path):
# A directory is not a file
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
valid_file(str(tmp_path))
assert "does not exist" in str(exc_info.value)
class TestJsonLoader:
"""Test the json_loader validator."""
@pytest.mark.parametrize(
"json_str,expected",
[
('{"key": "value"}', {"key": "value"}),
('{"number": 123}', {"number": 123}),
('{"list": [1, 2, 3]}', {"list": [1, 2, 3]}),
('{"nested": {"inner": "value"}}', {"nested": {"inner": "value"}}),
("[]", []),
("[1, 2, 3]", [1, 2, 3]),
('"string"', "string"),
("123", 123),
("true", True),
("false", False),
("null", None),
],
)
def test_valid_json(self, json_str, expected):
assert json_loader(json_str) == expected
@pytest.mark.parametrize(
"invalid_json",
[
"{invalid}",
"{'single': 'quotes'}",
"{missing: quotes}",
'{"unclosed": "brace"',
"",
],
)
def test_invalid_json_raises_error(self, invalid_json):
with pytest.raises(json.JSONDecodeError):
json_loader(invalid_json)

View File

@@ -0,0 +1,62 @@
"""
Tests for the ConsoleDb module
"""
import pytest
@pytest.fixture
def console_db(setup_module):
return setup_module("console_db")
class TestConsoleDb:
"""Test the ConsoleDb functionality."""
def test_started_logs_info(self, console_db, make_item, caplog):
"""Test that started() logs an info message."""
item = make_item("https://example.com/test")
with caplog.at_level("INFO"):
console_db.started(item)
assert "STARTED" in caplog.text
assert "example.com" in caplog.text
def test_failed_logs_error(self, console_db, make_item, caplog):
"""Test that failed() logs an error message with reason."""
item = make_item("https://example.com/test")
reason = "Connection timeout"
with caplog.at_level("ERROR"):
console_db.failed(item, reason)
assert "FAILED" in caplog.text
assert "Connection timeout" in caplog.text
def test_aborted_logs_warning(self, console_db, make_item, caplog):
"""Test that aborted() logs a warning message."""
item = make_item("https://example.com/test")
with caplog.at_level("WARNING"):
console_db.aborted(item)
assert "ABORTED" in caplog.text
def test_done_logs_success(self, console_db, make_item, caplog):
"""Test that done() logs a success message."""
item = make_item("https://example.com/test")
with caplog.at_level("INFO"):
console_db.done(item)
assert "DONE" in caplog.text
def test_done_cached(self, console_db, make_item, caplog):
"""Test done() with cached=True (should behave the same)."""
item = make_item("https://example.com/test")
with caplog.at_level("INFO"):
console_db.done(item, cached=True)
assert "DONE" in caplog.text

View File

@@ -0,0 +1,72 @@
"""
Tests for the JsonEnricher module
"""
import json
import os
import pytest
@pytest.fixture
def json_enricher(setup_module):
return setup_module("json_enricher")
class TestJsonEnricher:
"""Test the JsonEnricher functionality."""
def test_enrich_creates_json_file(self, json_enricher, make_item):
"""Test that enrich creates a metadata.json file."""
item = make_item("https://example.com/test")
item.set("title", "Test Title")
item.set("description", "Test description")
json_enricher.enrich(item)
# Check that a media with id 'metadata_json' was added
json_media = item.get_media_by_id("metadata_json")
assert json_media is not None
assert json_media.filename.endswith("metadata.json")
assert os.path.exists(json_media.filename)
def test_enrich_json_content(self, json_enricher, make_item):
"""Test that the JSON content is correct."""
item = make_item("https://example.com/test")
item.set("title", "Test Title")
item.set("custom_field", "custom_value")
json_enricher.enrich(item)
json_media = item.get_media_by_id("metadata_json")
with open(json_media.filename, "r", encoding="utf-8") as f:
content = json.load(f)
# The to_dict() returns nested structure: {status, metadata: {...}, media: [...]}
assert content["metadata"]["title"] == "Test Title"
assert content["metadata"]["custom_field"] == "custom_value"
assert content["metadata"]["url"] == "https://example.com/test"
def test_enrich_handles_special_characters(self, json_enricher, make_item):
"""Test that special characters are handled correctly."""
item = make_item("https://example.com/test")
item.set("title", "Test with émojis 🎉 and üñíçödé")
json_enricher.enrich(item)
json_media = item.get_media_by_id("metadata_json")
with open(json_media.filename, "r", encoding="utf-8") as f:
content = json.load(f)
# Access the nested metadata structure
assert "émojis 🎉" in content["metadata"]["title"]
assert "üñíçödé" in content["metadata"]["title"]
def test_enrich_empty_metadata(self, json_enricher, make_item):
"""Test enriching metadata with minimal content."""
item = make_item("https://example.com/minimal")
json_enricher.enrich(item)
json_media = item.get_media_by_id("metadata_json")
assert json_media is not None
assert os.path.exists(json_media.filename)

View File

@@ -0,0 +1,70 @@
"""
Tests for the CLIFeeder module
"""
import pytest
from auto_archiver.modules.cli_feeder.cli_feeder import CLIFeeder
from auto_archiver.core.consts import SetupError
from auto_archiver.core.metadata import Metadata
@pytest.fixture
def cli_feeder_instance():
"""Create a CLIFeeder instance with mocked config."""
def _create(urls):
feeder = CLIFeeder()
# Mock the config structure that cli_feeder expects
feeder.config = {"urls": urls}
feeder.name = "cli_feeder"
feeder.tmp_dir = "/tmp"
return feeder
return _create
class TestCLIFeeder:
"""Test the CLIFeeder functionality."""
def test_iter_yields_metadata_for_urls(self, cli_feeder_instance):
"""Test that iteration yields Metadata objects for each URL."""
urls = ["https://example.com/1", "https://example.com/2", "https://example.com/3"]
feeder = cli_feeder_instance(urls)
feeder.setup()
items = list(feeder)
assert len(items) == 3
assert all(isinstance(item, Metadata) for item in items)
assert items[0].get_url() == "https://example.com/1"
assert items[1].get_url() == "https://example.com/2"
assert items[2].get_url() == "https://example.com/3"
def test_iter_single_url(self, cli_feeder_instance):
"""Test iteration with a single URL."""
feeder = cli_feeder_instance(["https://example.com/single"])
feeder.setup()
items = list(feeder)
assert len(items) == 1
assert items[0].get_url() == "https://example.com/single"
def test_setup_raises_without_urls(self, cli_feeder_instance):
"""Test that setup raises SetupError when no URLs provided."""
feeder = cli_feeder_instance([])
with pytest.raises(SetupError) as exc_info:
feeder.setup()
assert "No URLs provided" in str(exc_info.value)
def test_setup_raises_with_none_urls(self, cli_feeder_instance):
"""Test that setup raises SetupError when urls is None."""
feeder = cli_feeder_instance(None)
with pytest.raises(SetupError) as exc_info:
feeder.setup()
assert "No URLs provided" in str(exc_info.value)

View File

@@ -0,0 +1,43 @@
"""
Tests for the MuteFormatter module
"""
import pytest
from auto_archiver.core.metadata import Metadata
@pytest.fixture
def mute_formatter(setup_module):
return setup_module("mute_formatter")
class TestMuteFormatter:
"""Test the MuteFormatter functionality."""
def test_format_returns_none(self, mute_formatter, make_item):
"""Test that format always returns None (mutes output)."""
item = make_item("https://example.com/test")
item.set("title", "Test Title")
result = mute_formatter.format(item)
assert result is None
def test_format_with_empty_metadata(self, mute_formatter):
"""Test format with empty metadata."""
item = Metadata().set_url("https://example.com/empty")
result = mute_formatter.format(item)
assert result is None
def test_format_with_media(self, mute_formatter, make_item):
"""Test that format still returns None even with media attached."""
from auto_archiver.core.media import Media
item = make_item("https://example.com/with-media")
item.add_media(Media(filename="test.mp4"))
result = mute_formatter.format(item)
assert result is None