mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-07 19:08:30 +03:00
10
Dockerfile
10
Dockerfile
@@ -41,11 +41,21 @@ COPY ./src/ .
|
|||||||
RUN /poetry-venv/bin/poetry install --only main --no-cache
|
RUN /poetry-venv/bin/poetry install --only main --no-cache
|
||||||
|
|
||||||
|
|
||||||
|
# Run as non-root user to avoid permission issues with mounted volumes (see #342)
|
||||||
|
# The base image already has an 'ubuntu' user at UID/GID 1000.
|
||||||
|
# Ensure directories that need write access at runtime are writable.
|
||||||
|
RUN chown 1000:1000 /app && \
|
||||||
|
chown -R 1000:1000 /app/.venv/lib/python3.12/site-packages/seleniumbase/drivers/ && \
|
||||||
|
mkdir -p /app/local_archive /app/secrets /tmp/archive && \
|
||||||
|
chown -R 1000:1000 /app/local_archive /app/secrets /tmp/archive
|
||||||
|
|
||||||
# Update PATH to include virtual environment binaries
|
# Update PATH to include virtual environment binaries
|
||||||
# Allowing entry point to run the application directly with Python
|
# Allowing entry point to run the application directly with Python
|
||||||
ENV VIRTUAL_ENV=/app/.venv \
|
ENV VIRTUAL_ENV=/app/.venv \
|
||||||
PATH="/app/.venv/bin:$PATH"
|
PATH="/app/.venv/bin:$PATH"
|
||||||
|
|
||||||
|
USER 1000
|
||||||
|
|
||||||
ENTRYPOINT ["python3", "-m", "auto_archiver"]
|
ENTRYPOINT ["python3", "-m", "auto_archiver"]
|
||||||
|
|
||||||
# should be executed with 2 volumes (3 if local_storage is used)
|
# should be executed with 2 volumes (3 if local_storage is used)
|
||||||
|
|||||||
@@ -6,6 +6,9 @@ services:
|
|||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
container_name: auto-archiver
|
container_name: auto-archiver
|
||||||
|
# Override user to match host UID/GID and avoid permission issues on volumes.
|
||||||
|
# Set USER_ID and GROUP_ID env vars, or defaults to 1000:1000.
|
||||||
|
user: "${USER_ID:-1000}:${GROUP_ID:-1000}"
|
||||||
volumes:
|
volumes:
|
||||||
- ./secrets:/app/secrets
|
- ./secrets:/app/secrets
|
||||||
- ./local_archive:/app/local_archive
|
- ./local_archive:/app/local_archive
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "auto-archiver"
|
name = "auto-archiver"
|
||||||
version = "1.2.2"
|
version = "1.2.3"
|
||||||
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)."
|
||||||
|
|
||||||
requires-python = ">=3.10,<3.13"
|
requires-python = ">=3.10,<3.13"
|
||||||
|
|||||||
@@ -88,8 +88,18 @@ class AntibotExtractorEnricher(Extractor, Enricher):
|
|||||||
using_user_data_dir = self.user_data_dir if custom_data_dir else None
|
using_user_data_dir = self.user_data_dir if custom_data_dir else None
|
||||||
url = to_enrich.get_url()
|
url = to_enrich.get_url()
|
||||||
|
|
||||||
|
# Use xvfb in Docker environments where no display is available
|
||||||
|
use_xvfb = bool(os.environ.get("RUNNING_IN_DOCKER"))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with SB(uc=True, agent=self.agent, headed=None, user_data_dir=using_user_data_dir, proxy=self.proxy) as sb:
|
with SB(
|
||||||
|
uc=True,
|
||||||
|
agent=self.agent,
|
||||||
|
headed=None,
|
||||||
|
user_data_dir=using_user_data_dir,
|
||||||
|
proxy=self.proxy,
|
||||||
|
xvfb=use_xvfb,
|
||||||
|
) as sb:
|
||||||
logger.info(f"Selenium browser is up with agent {self.agent}, opening url...")
|
logger.info(f"Selenium browser is up with agent {self.agent}, opening url...")
|
||||||
sb.uc_open_with_reconnect(url, 4)
|
sb.uc_open_with_reconnect(url, 4)
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ from auto_archiver.utils import url as UrlUtil, get_datetime_from_str
|
|||||||
from auto_archiver.core.extractor import Extractor
|
from auto_archiver.core.extractor import Extractor
|
||||||
from auto_archiver.utils.deletion_detection import detect_deletion, flag_as_deleted
|
from auto_archiver.utils.deletion_detection import detect_deletion, flag_as_deleted
|
||||||
from auto_archiver.modules.generic_extractor.dropin import GenericDropin, InfoExtractor
|
from auto_archiver.modules.generic_extractor.dropin import GenericDropin, InfoExtractor
|
||||||
|
import requests
|
||||||
|
from retrying import retry
|
||||||
|
|
||||||
|
|
||||||
class Twitter(GenericDropin):
|
class Twitter(GenericDropin):
|
||||||
@@ -29,7 +31,85 @@ class Twitter(GenericDropin):
|
|||||||
|
|
||||||
def extract_post(self, url: str, ie_instance: InfoExtractor):
|
def extract_post(self, url: str, ie_instance: InfoExtractor):
|
||||||
twid = ie_instance._match_valid_url(url).group("id")
|
twid = ie_instance._match_valid_url(url).group("id")
|
||||||
return ie_instance._extract_status(twid=twid)
|
try:
|
||||||
|
post_data = ie_instance._extract_status(twid=twid)
|
||||||
|
if not post_data or not post_data.get("user") or not post_data.get("created_at"):
|
||||||
|
raise ValueError("Error retrieving post with twitter dropin")
|
||||||
|
return post_data
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"yt-dlp twitter extraction failed: {e}")
|
||||||
|
# try fxtwitter API as fallback
|
||||||
|
return self._fetch_fxtwitter(twid)
|
||||||
|
|
||||||
|
def _fetch_fxtwitter(self, twid: str) -> dict:
|
||||||
|
"""Fetch tweet data from fxtwitter API and convert to expected format."""
|
||||||
|
fxtwitter_url = f"https://api.fxtwitter.com/status/{twid}"
|
||||||
|
logger.info(f"Falling back to fxtwitter API for tweet extraction: {fxtwitter_url}")
|
||||||
|
|
||||||
|
@retry(wait_random_min=500, wait_random_max=2000, stop_max_attempt_number=3)
|
||||||
|
def fetch_fxtwitter_data(url):
|
||||||
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0"}
|
||||||
|
resp = requests.get(url, headers=headers, timeout=15)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise ValueError(f"Failed to retrieve tweet from fxtwitter API: {resp.status_code}")
|
||||||
|
data = resp.json()
|
||||||
|
if "tweet" not in data:
|
||||||
|
raise ValueError(f"No tweet data in fxtwitter response: {data.get('message', 'Unknown error')}")
|
||||||
|
return data["tweet"]
|
||||||
|
|
||||||
|
tweet = fetch_fxtwitter_data(fxtwitter_url)
|
||||||
|
|
||||||
|
# Convert fxtwitter format to expected format
|
||||||
|
author = tweet.get("author", {}).get("name", "")
|
||||||
|
created_at = tweet.get("created_at", "") # Format: "Sun Feb 08 18:45:00 +0000 2026"
|
||||||
|
full_text = tweet.get("text", "") or tweet.get("raw_text", "")
|
||||||
|
|
||||||
|
# Convert media format
|
||||||
|
media = []
|
||||||
|
fx_media = tweet.get("media", {})
|
||||||
|
|
||||||
|
# Handle photos
|
||||||
|
for photo in fx_media.get("photos", []):
|
||||||
|
media.append({"type": "photo", "media_url_https": photo.get("url", "")})
|
||||||
|
|
||||||
|
# Handle videos
|
||||||
|
for video in fx_media.get("videos", []):
|
||||||
|
variants = video.get("variants", [])
|
||||||
|
# Convert to expected variant format
|
||||||
|
converted_variants = []
|
||||||
|
for var in variants:
|
||||||
|
converted_variants.append(
|
||||||
|
{
|
||||||
|
"url": var.get("url", ""),
|
||||||
|
"content_type": var.get("content_type", "video/mp4"),
|
||||||
|
"bitrate": var.get("bitrate", 0),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if converted_variants:
|
||||||
|
media.append({"type": "video", "video_info": {"variants": converted_variants}})
|
||||||
|
|
||||||
|
# Handle animated gifs (fxtwitter may include these in videos)
|
||||||
|
for item in fx_media.get("all", []):
|
||||||
|
if item.get("type") == "gif":
|
||||||
|
variants = item.get("variants", [])
|
||||||
|
converted_variants = []
|
||||||
|
for var in variants:
|
||||||
|
converted_variants.append(
|
||||||
|
{
|
||||||
|
"url": var.get("url", ""),
|
||||||
|
"content_type": var.get("content_type", "video/mp4"),
|
||||||
|
"bitrate": var.get("bitrate", 0),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if converted_variants:
|
||||||
|
media.append({"type": "animated_gif", "video_info": {"variants": converted_variants}})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"user": {"name": author},
|
||||||
|
"created_at": created_at,
|
||||||
|
"full_text": full_text,
|
||||||
|
"entities": {"media": media},
|
||||||
|
}
|
||||||
|
|
||||||
def keys_to_clean(self, video_data, info_extractor):
|
def keys_to_clean(self, video_data, info_extractor):
|
||||||
return ["user", "created_at", "entities", "favorited", "translator_type"]
|
return ["user", "created_at", "entities", "favorited", "translator_type"]
|
||||||
|
|||||||
1
tests/core/__init__.py
Normal file
1
tests/core/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Core module tests
|
||||||
198
tests/core/test_media.py
Normal file
198
tests/core/test_media.py
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
"""
|
||||||
|
Tests for the Media class from auto_archiver.core.media
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from auto_archiver.core.media import Media
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaBasics:
|
||||||
|
"""Test basic Media properties and methods."""
|
||||||
|
|
||||||
|
def test_media_creation_with_filename(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
assert media.filename == "test.mp4"
|
||||||
|
assert media.urls == []
|
||||||
|
assert media.properties == {}
|
||||||
|
|
||||||
|
def test_media_key_property(self):
|
||||||
|
media = Media(filename="test.mp4", _key="my_key")
|
||||||
|
assert media.key == "my_key"
|
||||||
|
|
||||||
|
def test_media_set_get_properties(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
result = media.set("author", "John Doe")
|
||||||
|
assert result is media # returns self for chaining
|
||||||
|
assert media.get("author") == "John Doe"
|
||||||
|
assert media.get("nonexistent") is None
|
||||||
|
assert media.get("nonexistent", "default") == "default"
|
||||||
|
|
||||||
|
def test_media_add_url(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
media.add_url("https://example.com/test.mp4")
|
||||||
|
assert "https://example.com/test.mp4" in media.urls
|
||||||
|
media.add_url("https://cdn.example.com/test.mp4")
|
||||||
|
assert len(media.urls) == 2
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaMimetype:
|
||||||
|
"""Test mimetype detection and handling."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"filename,expected_mimetype",
|
||||||
|
[
|
||||||
|
("video.mp4", "video/mp4"),
|
||||||
|
("image.jpg", "image/jpeg"),
|
||||||
|
("image.png", "image/png"),
|
||||||
|
("audio.mp3", "audio/mpeg"),
|
||||||
|
("document.pdf", "application/pdf"),
|
||||||
|
("text.txt", "text/plain"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_mimetype_detection(self, filename, expected_mimetype):
|
||||||
|
media = Media(filename=filename)
|
||||||
|
assert media.mimetype == expected_mimetype
|
||||||
|
|
||||||
|
def test_mimetype_setter(self):
|
||||||
|
media = Media(filename="file.unknown")
|
||||||
|
media.mimetype = "custom/type"
|
||||||
|
assert media.mimetype == "custom/type"
|
||||||
|
|
||||||
|
def test_mimetype_empty_filename(self):
|
||||||
|
media = Media(filename="")
|
||||||
|
assert media.mimetype == ""
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaTypeChecks:
|
||||||
|
"""Test media type checking methods."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"filename,is_video,is_audio,is_image",
|
||||||
|
[
|
||||||
|
("video.mp4", True, False, False),
|
||||||
|
("video.avi", True, False, False),
|
||||||
|
("audio.mp3", False, True, False),
|
||||||
|
("audio.wav", False, True, False),
|
||||||
|
("image.jpg", False, False, True),
|
||||||
|
("image.png", False, False, True),
|
||||||
|
("document.pdf", False, False, False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_type_checks(self, filename, is_video, is_audio, is_image):
|
||||||
|
media = Media(filename=filename)
|
||||||
|
assert media.is_video() == is_video
|
||||||
|
assert media.is_audio() == is_audio
|
||||||
|
assert media.is_image() == is_image
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaStore:
|
||||||
|
"""Test media storage functionality."""
|
||||||
|
|
||||||
|
def test_store_with_no_storages(self, caplog):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
metadata = Mock()
|
||||||
|
media.store(metadata, storages=[])
|
||||||
|
assert "No storages found" in caplog.text
|
||||||
|
|
||||||
|
def test_store_with_storage(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
metadata = Mock()
|
||||||
|
mock_storage = Mock()
|
||||||
|
media.store(metadata, url="https://example.com", storages=[mock_storage])
|
||||||
|
mock_storage.store.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaInnerMedia:
|
||||||
|
"""Test nested media retrieval."""
|
||||||
|
|
||||||
|
def test_all_inner_media_no_nested(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
inner = list(media.all_inner_media(include_self=False))
|
||||||
|
assert len(inner) == 0
|
||||||
|
|
||||||
|
inner_with_self = list(media.all_inner_media(include_self=True))
|
||||||
|
assert len(inner_with_self) == 1
|
||||||
|
assert inner_with_self[0] is media
|
||||||
|
|
||||||
|
def test_all_inner_media_with_nested(self):
|
||||||
|
parent = Media(filename="parent.mp4")
|
||||||
|
child = Media(filename="child.jpg")
|
||||||
|
grandchild = Media(filename="grandchild.png")
|
||||||
|
|
||||||
|
child.set("thumbnail", grandchild)
|
||||||
|
parent.set("preview", child)
|
||||||
|
|
||||||
|
inner = list(parent.all_inner_media(include_self=False))
|
||||||
|
assert len(inner) == 2
|
||||||
|
assert child in inner
|
||||||
|
assert grandchild in inner
|
||||||
|
|
||||||
|
def test_all_inner_media_with_list_property(self):
|
||||||
|
parent = Media(filename="parent.mp4")
|
||||||
|
child1 = Media(filename="frame1.jpg")
|
||||||
|
child2 = Media(filename="frame2.jpg")
|
||||||
|
|
||||||
|
parent.set("frames", [child1, child2])
|
||||||
|
|
||||||
|
inner = list(parent.all_inner_media(include_self=False))
|
||||||
|
assert len(inner) == 2
|
||||||
|
assert child1 in inner
|
||||||
|
assert child2 in inner
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaIsStored:
|
||||||
|
"""Test the is_stored method."""
|
||||||
|
|
||||||
|
def test_is_stored_no_urls(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
storage = Mock()
|
||||||
|
storage.config = {"steps": {"storages": ["s3", "local"]}}
|
||||||
|
assert media.is_stored(storage) is False
|
||||||
|
|
||||||
|
def test_is_stored_partial_urls(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
media.add_url("https://s3.example.com/test.mp4")
|
||||||
|
storage = Mock()
|
||||||
|
storage.config = {"steps": {"storages": ["s3", "local"]}}
|
||||||
|
assert media.is_stored(storage) is False
|
||||||
|
|
||||||
|
def test_is_stored_full_urls(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
media.add_url("https://s3.example.com/test.mp4")
|
||||||
|
media.add_url("file:///local/test.mp4")
|
||||||
|
storage = Mock()
|
||||||
|
storage.config = {"steps": {"storages": ["s3", "local"]}}
|
||||||
|
assert media.is_stored(storage) is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestMediaValidVideo:
|
||||||
|
"""Test video validation functionality."""
|
||||||
|
|
||||||
|
def test_is_valid_video_with_valid_probe(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
|
||||||
|
mock_streams = {"streams": [{"duration_ts": 1000}]}
|
||||||
|
|
||||||
|
with patch("ffmpeg.probe", return_value=mock_streams):
|
||||||
|
assert media.is_valid_video() is True
|
||||||
|
|
||||||
|
def test_is_valid_video_with_no_duration(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
|
||||||
|
mock_streams = {"streams": [{"duration_ts": 0}]}
|
||||||
|
|
||||||
|
with patch("ffmpeg.probe", return_value=mock_streams):
|
||||||
|
assert media.is_valid_video() is False
|
||||||
|
|
||||||
|
def test_is_valid_video_with_ffmpeg_error(self):
|
||||||
|
media = Media(filename="test.mp4")
|
||||||
|
|
||||||
|
with patch("ffmpeg.probe", side_effect=Exception("ffmpeg error")):
|
||||||
|
with patch("os.path.getsize", return_value=100):
|
||||||
|
# Falls back to file size check, small file
|
||||||
|
assert media.is_valid_video() is False
|
||||||
|
|
||||||
|
with patch("os.path.getsize", return_value=30000):
|
||||||
|
# Falls back to file size check, larger file
|
||||||
|
assert media.is_valid_video() is True
|
||||||
98
tests/core/test_validators.py
Normal file
98
tests/core/test_validators.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
"""
|
||||||
|
Tests for validators module from auto_archiver.core.validators
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from auto_archiver.core.validators import positive_number, valid_file, json_loader
|
||||||
|
|
||||||
|
|
||||||
|
class TestPositiveNumber:
|
||||||
|
"""Test the positive_number validator."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"value,expected",
|
||||||
|
[
|
||||||
|
(0, 0),
|
||||||
|
(1, 1),
|
||||||
|
(100, 100),
|
||||||
|
(0.5, 0.5),
|
||||||
|
(999999, 999999),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_positive_values(self, value, expected):
|
||||||
|
assert positive_number(value) == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"value",
|
||||||
|
[
|
||||||
|
-1,
|
||||||
|
-100,
|
||||||
|
-0.5,
|
||||||
|
-999999,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_negative_values_raise_error(self, value):
|
||||||
|
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
|
||||||
|
positive_number(value)
|
||||||
|
assert "not a positive number" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidFile:
|
||||||
|
"""Test the valid_file validator."""
|
||||||
|
|
||||||
|
def test_valid_file_exists(self, tmp_path):
|
||||||
|
test_file = tmp_path / "test.txt"
|
||||||
|
test_file.write_text("test content")
|
||||||
|
result = valid_file(str(test_file))
|
||||||
|
assert result == str(test_file)
|
||||||
|
|
||||||
|
def test_valid_file_not_exists(self):
|
||||||
|
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
|
||||||
|
valid_file("/nonexistent/path/to/file.txt")
|
||||||
|
assert "does not exist" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_valid_file_directory_not_file(self, tmp_path):
|
||||||
|
# A directory is not a file
|
||||||
|
with pytest.raises(argparse.ArgumentTypeError) as exc_info:
|
||||||
|
valid_file(str(tmp_path))
|
||||||
|
assert "does not exist" in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestJsonLoader:
|
||||||
|
"""Test the json_loader validator."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"json_str,expected",
|
||||||
|
[
|
||||||
|
('{"key": "value"}', {"key": "value"}),
|
||||||
|
('{"number": 123}', {"number": 123}),
|
||||||
|
('{"list": [1, 2, 3]}', {"list": [1, 2, 3]}),
|
||||||
|
('{"nested": {"inner": "value"}}', {"nested": {"inner": "value"}}),
|
||||||
|
("[]", []),
|
||||||
|
("[1, 2, 3]", [1, 2, 3]),
|
||||||
|
('"string"', "string"),
|
||||||
|
("123", 123),
|
||||||
|
("true", True),
|
||||||
|
("false", False),
|
||||||
|
("null", None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_valid_json(self, json_str, expected):
|
||||||
|
assert json_loader(json_str) == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"invalid_json",
|
||||||
|
[
|
||||||
|
"{invalid}",
|
||||||
|
"{'single': 'quotes'}",
|
||||||
|
"{missing: quotes}",
|
||||||
|
'{"unclosed": "brace"',
|
||||||
|
"",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_invalid_json_raises_error(self, invalid_json):
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json_loader(invalid_json)
|
||||||
62
tests/databases/test_console_db.py
Normal file
62
tests/databases/test_console_db.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""
|
||||||
|
Tests for the ConsoleDb module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def console_db(setup_module):
|
||||||
|
return setup_module("console_db")
|
||||||
|
|
||||||
|
|
||||||
|
class TestConsoleDb:
|
||||||
|
"""Test the ConsoleDb functionality."""
|
||||||
|
|
||||||
|
def test_started_logs_info(self, console_db, make_item, caplog):
|
||||||
|
"""Test that started() logs an info message."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
|
||||||
|
with caplog.at_level("INFO"):
|
||||||
|
console_db.started(item)
|
||||||
|
|
||||||
|
assert "STARTED" in caplog.text
|
||||||
|
assert "example.com" in caplog.text
|
||||||
|
|
||||||
|
def test_failed_logs_error(self, console_db, make_item, caplog):
|
||||||
|
"""Test that failed() logs an error message with reason."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
reason = "Connection timeout"
|
||||||
|
|
||||||
|
with caplog.at_level("ERROR"):
|
||||||
|
console_db.failed(item, reason)
|
||||||
|
|
||||||
|
assert "FAILED" in caplog.text
|
||||||
|
assert "Connection timeout" in caplog.text
|
||||||
|
|
||||||
|
def test_aborted_logs_warning(self, console_db, make_item, caplog):
|
||||||
|
"""Test that aborted() logs a warning message."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
|
||||||
|
with caplog.at_level("WARNING"):
|
||||||
|
console_db.aborted(item)
|
||||||
|
|
||||||
|
assert "ABORTED" in caplog.text
|
||||||
|
|
||||||
|
def test_done_logs_success(self, console_db, make_item, caplog):
|
||||||
|
"""Test that done() logs a success message."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
|
||||||
|
with caplog.at_level("INFO"):
|
||||||
|
console_db.done(item)
|
||||||
|
|
||||||
|
assert "DONE" in caplog.text
|
||||||
|
|
||||||
|
def test_done_cached(self, console_db, make_item, caplog):
|
||||||
|
"""Test done() with cached=True (should behave the same)."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
|
||||||
|
with caplog.at_level("INFO"):
|
||||||
|
console_db.done(item, cached=True)
|
||||||
|
|
||||||
|
assert "DONE" in caplog.text
|
||||||
72
tests/enrichers/test_json_enricher.py
Normal file
72
tests/enrichers/test_json_enricher.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"""
|
||||||
|
Tests for the JsonEnricher module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def json_enricher(setup_module):
|
||||||
|
return setup_module("json_enricher")
|
||||||
|
|
||||||
|
|
||||||
|
class TestJsonEnricher:
|
||||||
|
"""Test the JsonEnricher functionality."""
|
||||||
|
|
||||||
|
def test_enrich_creates_json_file(self, json_enricher, make_item):
|
||||||
|
"""Test that enrich creates a metadata.json file."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
item.set("title", "Test Title")
|
||||||
|
item.set("description", "Test description")
|
||||||
|
|
||||||
|
json_enricher.enrich(item)
|
||||||
|
|
||||||
|
# Check that a media with id 'metadata_json' was added
|
||||||
|
json_media = item.get_media_by_id("metadata_json")
|
||||||
|
assert json_media is not None
|
||||||
|
assert json_media.filename.endswith("metadata.json")
|
||||||
|
assert os.path.exists(json_media.filename)
|
||||||
|
|
||||||
|
def test_enrich_json_content(self, json_enricher, make_item):
|
||||||
|
"""Test that the JSON content is correct."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
item.set("title", "Test Title")
|
||||||
|
item.set("custom_field", "custom_value")
|
||||||
|
|
||||||
|
json_enricher.enrich(item)
|
||||||
|
|
||||||
|
json_media = item.get_media_by_id("metadata_json")
|
||||||
|
with open(json_media.filename, "r", encoding="utf-8") as f:
|
||||||
|
content = json.load(f)
|
||||||
|
|
||||||
|
# The to_dict() returns nested structure: {status, metadata: {...}, media: [...]}
|
||||||
|
assert content["metadata"]["title"] == "Test Title"
|
||||||
|
assert content["metadata"]["custom_field"] == "custom_value"
|
||||||
|
assert content["metadata"]["url"] == "https://example.com/test"
|
||||||
|
|
||||||
|
def test_enrich_handles_special_characters(self, json_enricher, make_item):
|
||||||
|
"""Test that special characters are handled correctly."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
item.set("title", "Test with émojis 🎉 and üñíçödé")
|
||||||
|
|
||||||
|
json_enricher.enrich(item)
|
||||||
|
|
||||||
|
json_media = item.get_media_by_id("metadata_json")
|
||||||
|
with open(json_media.filename, "r", encoding="utf-8") as f:
|
||||||
|
content = json.load(f)
|
||||||
|
|
||||||
|
# Access the nested metadata structure
|
||||||
|
assert "émojis 🎉" in content["metadata"]["title"]
|
||||||
|
assert "üñíçödé" in content["metadata"]["title"]
|
||||||
|
|
||||||
|
def test_enrich_empty_metadata(self, json_enricher, make_item):
|
||||||
|
"""Test enriching metadata with minimal content."""
|
||||||
|
item = make_item("https://example.com/minimal")
|
||||||
|
|
||||||
|
json_enricher.enrich(item)
|
||||||
|
|
||||||
|
json_media = item.get_media_by_id("metadata_json")
|
||||||
|
assert json_media is not None
|
||||||
|
assert os.path.exists(json_media.filename)
|
||||||
@@ -60,7 +60,7 @@ class TestAntibotExtractorEnricher(TestExtractorBase):
|
|||||||
"https://en.wikipedia.org/wiki/Western_barn_owl",
|
"https://en.wikipedia.org/wiki/Western_barn_owl",
|
||||||
"western barn owl",
|
"western barn owl",
|
||||||
"Tyto alba",
|
"Tyto alba",
|
||||||
5,
|
3, # Reduced due to Wikipedia rate limiting (429 errors)
|
||||||
0,
|
0,
|
||||||
False,
|
False,
|
||||||
),
|
),
|
||||||
@@ -142,9 +142,9 @@ class TestAntibotExtractorEnricher(TestExtractorBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
image_media = [m for m in result.media if m.is_image() and not m.get("id") == "screenshot"]
|
image_media = [m for m in result.media if m.is_image() and not m.get("id") == "screenshot"]
|
||||||
assert len(image_media) == image_count, f"Expected {image_count} image items, got {len(image_media)}"
|
assert len(image_media) >= image_count, f"Expected at least {image_count} image items, got {len(image_media)}"
|
||||||
video_media = [m for m in result.media if m.is_video()]
|
video_media = [m for m in result.media if m.is_video()]
|
||||||
assert len(video_media) == video_count, f"Expected {video_count} video items, got {len(video_media)}"
|
assert len(video_media) >= video_count, f"Expected at least {video_count} video items, got {len(video_media)}"
|
||||||
|
|
||||||
for expected_id in ["screenshot", "pdf", "html_source_code"]:
|
for expected_id in ["screenshot", "pdf", "html_source_code"]:
|
||||||
assert any(m.get("id") == expected_id for m in result.media), (
|
assert any(m.get("id") == expected_id for m in result.media), (
|
||||||
|
|||||||
238
tests/extractors/test_twitter_dropin.py
Normal file
238
tests/extractors/test_twitter_dropin.py
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
"""
|
||||||
|
Tests for the Twitter dropin extractor with fxtwitter fallback
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
|
||||||
|
from auto_archiver.modules.generic_extractor.twitter import Twitter
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def twitter_dropin():
|
||||||
|
return Twitter()
|
||||||
|
|
||||||
|
|
||||||
|
class TestTwitterFxTwitterFallback:
|
||||||
|
"""Test the fxtwitter API fallback functionality."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_fxtwitter_video_response(self):
|
||||||
|
return {
|
||||||
|
"code": 200,
|
||||||
|
"message": "OK",
|
||||||
|
"tweet": {
|
||||||
|
"url": "https://x.com/user/status/123456789",
|
||||||
|
"id": "123456789",
|
||||||
|
"text": "Test tweet with video",
|
||||||
|
"author": {
|
||||||
|
"id": "111",
|
||||||
|
"name": "Test User",
|
||||||
|
"screen_name": "testuser",
|
||||||
|
},
|
||||||
|
"created_at": "Sun Feb 08 18:45:00 +0000 2026",
|
||||||
|
"media": {
|
||||||
|
"all": [
|
||||||
|
{
|
||||||
|
"type": "video",
|
||||||
|
"url": "https://video.twimg.com/test.mp4",
|
||||||
|
"variants": [
|
||||||
|
{"url": "https://video.twimg.com/test.m3u8", "content_type": "application/x-mpegURL"},
|
||||||
|
{
|
||||||
|
"url": "https://video.twimg.com/test_480.mp4",
|
||||||
|
"content_type": "video/mp4",
|
||||||
|
"bitrate": 632000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://video.twimg.com/test_720.mp4",
|
||||||
|
"content_type": "video/mp4",
|
||||||
|
"bitrate": 2176000,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"videos": [
|
||||||
|
{
|
||||||
|
"url": "https://video.twimg.com/test.mp4",
|
||||||
|
"variants": [
|
||||||
|
{"url": "https://video.twimg.com/test.m3u8", "content_type": "application/x-mpegURL"},
|
||||||
|
{
|
||||||
|
"url": "https://video.twimg.com/test_480.mp4",
|
||||||
|
"content_type": "video/mp4",
|
||||||
|
"bitrate": 632000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://video.twimg.com/test_720.mp4",
|
||||||
|
"content_type": "video/mp4",
|
||||||
|
"bitrate": 2176000,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_fxtwitter_photo_response(self):
|
||||||
|
return {
|
||||||
|
"code": 200,
|
||||||
|
"message": "OK",
|
||||||
|
"tweet": {
|
||||||
|
"url": "https://x.com/user/status/123456790",
|
||||||
|
"id": "123456790",
|
||||||
|
"text": "Test tweet with photo",
|
||||||
|
"author": {
|
||||||
|
"id": "111",
|
||||||
|
"name": "Test User",
|
||||||
|
"screen_name": "testuser",
|
||||||
|
},
|
||||||
|
"created_at": "Mon Feb 09 10:30:00 +0000 2026",
|
||||||
|
"media": {
|
||||||
|
"all": [
|
||||||
|
{
|
||||||
|
"type": "photo",
|
||||||
|
"url": "https://pbs.twimg.com/media/test.jpg?name=orig",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"photos": [
|
||||||
|
{
|
||||||
|
"type": "photo",
|
||||||
|
"url": "https://pbs.twimg.com/media/test.jpg?name=orig",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_fetch_fxtwitter_video(self, twitter_dropin, mock_fxtwitter_video_response):
|
||||||
|
"""Test fetching a tweet with video via fxtwitter API."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = mock_fxtwitter_video_response
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
result = twitter_dropin._fetch_fxtwitter("123456789")
|
||||||
|
|
||||||
|
assert result["user"]["name"] == "Test User"
|
||||||
|
assert result["created_at"] == "Sun Feb 08 18:45:00 +0000 2026"
|
||||||
|
assert result["full_text"] == "Test tweet with video"
|
||||||
|
assert len(result["entities"]["media"]) == 1
|
||||||
|
assert result["entities"]["media"][0]["type"] == "video"
|
||||||
|
assert "video_info" in result["entities"]["media"][0]
|
||||||
|
assert len(result["entities"]["media"][0]["video_info"]["variants"]) == 3
|
||||||
|
|
||||||
|
def test_fetch_fxtwitter_photo(self, twitter_dropin, mock_fxtwitter_photo_response):
|
||||||
|
"""Test fetching a tweet with photo via fxtwitter API."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = mock_fxtwitter_photo_response
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
result = twitter_dropin._fetch_fxtwitter("123456790")
|
||||||
|
|
||||||
|
assert result["user"]["name"] == "Test User"
|
||||||
|
assert result["created_at"] == "Mon Feb 09 10:30:00 +0000 2026"
|
||||||
|
assert result["full_text"] == "Test tweet with photo"
|
||||||
|
assert len(result["entities"]["media"]) == 1
|
||||||
|
assert result["entities"]["media"][0]["type"] == "photo"
|
||||||
|
assert result["entities"]["media"][0]["media_url_https"] == "https://pbs.twimg.com/media/test.jpg?name=orig"
|
||||||
|
|
||||||
|
def test_fetch_fxtwitter_no_media(self, twitter_dropin):
|
||||||
|
"""Test fetching a text-only tweet via fxtwitter API."""
|
||||||
|
mock_response_data = {
|
||||||
|
"code": 200,
|
||||||
|
"message": "OK",
|
||||||
|
"tweet": {
|
||||||
|
"id": "123456791",
|
||||||
|
"text": "Just text, no media",
|
||||||
|
"author": {"name": "Text Only User"},
|
||||||
|
"created_at": "Tue Feb 10 12:00:00 +0000 2026",
|
||||||
|
"media": {},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = mock_response_data
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
result = twitter_dropin._fetch_fxtwitter("123456791")
|
||||||
|
|
||||||
|
assert result["user"]["name"] == "Text Only User"
|
||||||
|
assert result["full_text"] == "Just text, no media"
|
||||||
|
assert result["entities"]["media"] == []
|
||||||
|
|
||||||
|
def test_fetch_fxtwitter_api_error(self, twitter_dropin):
|
||||||
|
"""Test handling of fxtwitter API errors."""
|
||||||
|
with patch("requests.get") as mock_get:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 404
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
twitter_dropin._fetch_fxtwitter("nonexistent")
|
||||||
|
|
||||||
|
|
||||||
|
class TestTwitterChooseVariant:
|
||||||
|
"""Test the video variant selection logic."""
|
||||||
|
|
||||||
|
def test_choose_highest_quality_video(self, twitter_dropin):
|
||||||
|
"""Test that the highest quality video variant is selected."""
|
||||||
|
variants = [
|
||||||
|
{"url": "https://video.twimg.com/vid/320x240/test.mp4", "content_type": "video/mp4"},
|
||||||
|
{"url": "https://video.twimg.com/vid/1280x720/test.mp4", "content_type": "video/mp4"},
|
||||||
|
{"url": "https://video.twimg.com/vid/640x480/test.mp4", "content_type": "video/mp4"},
|
||||||
|
]
|
||||||
|
|
||||||
|
result = twitter_dropin.choose_variant(variants)
|
||||||
|
|
||||||
|
assert result["url"] == "https://video.twimg.com/vid/1280x720/test.mp4"
|
||||||
|
|
||||||
|
def test_choose_variant_fallback_for_non_mp4(self, twitter_dropin):
|
||||||
|
"""Test fallback when no mp4 variant is available."""
|
||||||
|
variants = [
|
||||||
|
{"url": "https://video.twimg.com/test.m3u8", "content_type": "application/x-mpegURL"},
|
||||||
|
]
|
||||||
|
|
||||||
|
result = twitter_dropin.choose_variant(variants)
|
||||||
|
|
||||||
|
assert result["url"] == "https://video.twimg.com/test.m3u8"
|
||||||
|
|
||||||
|
def test_choose_variant_prefers_mp4(self, twitter_dropin):
|
||||||
|
"""Test that mp4 is preferred over other formats when quality is equal."""
|
||||||
|
variants = [
|
||||||
|
{"url": "https://video.twimg.com/test.m3u8", "content_type": "application/x-mpegURL"},
|
||||||
|
{"url": "https://video.twimg.com/vid/1280x720/test.mp4", "content_type": "video/mp4"},
|
||||||
|
]
|
||||||
|
|
||||||
|
result = twitter_dropin.choose_variant(variants)
|
||||||
|
|
||||||
|
assert result["content_type"] == "video/mp4"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.download
|
||||||
|
class TestTwitterFxTwitterLive:
|
||||||
|
"""Live integration tests for fxtwitter API - requires network access."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"tweet_id,expected_media_type",
|
||||||
|
[
|
||||||
|
("2020569571682312581", "video"), # Video tweet
|
||||||
|
("2020410438198890618", "video"), # Video tweet
|
||||||
|
("2020341585502957801", "photo"), # Photo tweet
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_fetch_real_tweets(self, twitter_dropin, tweet_id, expected_media_type):
|
||||||
|
"""Test fetching real tweets from fxtwitter API."""
|
||||||
|
result = twitter_dropin._fetch_fxtwitter(tweet_id)
|
||||||
|
|
||||||
|
assert result["user"]["name"] # Author should be non-empty
|
||||||
|
assert result["created_at"] # Should have timestamp
|
||||||
|
assert result["full_text"] # Should have text content
|
||||||
|
|
||||||
|
media = result["entities"]["media"]
|
||||||
|
assert len(media) >= 1
|
||||||
|
assert media[0]["type"] == expected_media_type
|
||||||
70
tests/feeders/test_cli_feeder.py
Normal file
70
tests/feeders/test_cli_feeder.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""
|
||||||
|
Tests for the CLIFeeder module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from auto_archiver.modules.cli_feeder.cli_feeder import CLIFeeder
|
||||||
|
from auto_archiver.core.consts import SetupError
|
||||||
|
from auto_archiver.core.metadata import Metadata
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def cli_feeder_instance():
|
||||||
|
"""Create a CLIFeeder instance with mocked config."""
|
||||||
|
|
||||||
|
def _create(urls):
|
||||||
|
feeder = CLIFeeder()
|
||||||
|
# Mock the config structure that cli_feeder expects
|
||||||
|
feeder.config = {"urls": urls}
|
||||||
|
feeder.name = "cli_feeder"
|
||||||
|
feeder.tmp_dir = "/tmp"
|
||||||
|
return feeder
|
||||||
|
|
||||||
|
return _create
|
||||||
|
|
||||||
|
|
||||||
|
class TestCLIFeeder:
|
||||||
|
"""Test the CLIFeeder functionality."""
|
||||||
|
|
||||||
|
def test_iter_yields_metadata_for_urls(self, cli_feeder_instance):
|
||||||
|
"""Test that iteration yields Metadata objects for each URL."""
|
||||||
|
urls = ["https://example.com/1", "https://example.com/2", "https://example.com/3"]
|
||||||
|
feeder = cli_feeder_instance(urls)
|
||||||
|
feeder.setup()
|
||||||
|
|
||||||
|
items = list(feeder)
|
||||||
|
|
||||||
|
assert len(items) == 3
|
||||||
|
assert all(isinstance(item, Metadata) for item in items)
|
||||||
|
assert items[0].get_url() == "https://example.com/1"
|
||||||
|
assert items[1].get_url() == "https://example.com/2"
|
||||||
|
assert items[2].get_url() == "https://example.com/3"
|
||||||
|
|
||||||
|
def test_iter_single_url(self, cli_feeder_instance):
|
||||||
|
"""Test iteration with a single URL."""
|
||||||
|
feeder = cli_feeder_instance(["https://example.com/single"])
|
||||||
|
feeder.setup()
|
||||||
|
|
||||||
|
items = list(feeder)
|
||||||
|
|
||||||
|
assert len(items) == 1
|
||||||
|
assert items[0].get_url() == "https://example.com/single"
|
||||||
|
|
||||||
|
def test_setup_raises_without_urls(self, cli_feeder_instance):
|
||||||
|
"""Test that setup raises SetupError when no URLs provided."""
|
||||||
|
feeder = cli_feeder_instance([])
|
||||||
|
|
||||||
|
with pytest.raises(SetupError) as exc_info:
|
||||||
|
feeder.setup()
|
||||||
|
|
||||||
|
assert "No URLs provided" in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_setup_raises_with_none_urls(self, cli_feeder_instance):
|
||||||
|
"""Test that setup raises SetupError when urls is None."""
|
||||||
|
feeder = cli_feeder_instance(None)
|
||||||
|
|
||||||
|
with pytest.raises(SetupError) as exc_info:
|
||||||
|
feeder.setup()
|
||||||
|
|
||||||
|
assert "No URLs provided" in str(exc_info.value)
|
||||||
43
tests/formatters/test_mute_formatter.py
Normal file
43
tests/formatters/test_mute_formatter.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
"""
|
||||||
|
Tests for the MuteFormatter module
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from auto_archiver.core.metadata import Metadata
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mute_formatter(setup_module):
|
||||||
|
return setup_module("mute_formatter")
|
||||||
|
|
||||||
|
|
||||||
|
class TestMuteFormatter:
|
||||||
|
"""Test the MuteFormatter functionality."""
|
||||||
|
|
||||||
|
def test_format_returns_none(self, mute_formatter, make_item):
|
||||||
|
"""Test that format always returns None (mutes output)."""
|
||||||
|
item = make_item("https://example.com/test")
|
||||||
|
item.set("title", "Test Title")
|
||||||
|
|
||||||
|
result = mute_formatter.format(item)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_format_with_empty_metadata(self, mute_formatter):
|
||||||
|
"""Test format with empty metadata."""
|
||||||
|
item = Metadata().set_url("https://example.com/empty")
|
||||||
|
|
||||||
|
result = mute_formatter.format(item)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_format_with_media(self, mute_formatter, make_item):
|
||||||
|
"""Test that format still returns None even with media attached."""
|
||||||
|
from auto_archiver.core.media import Media
|
||||||
|
|
||||||
|
item = make_item("https://example.com/with-media")
|
||||||
|
item.add_media(Media(filename="test.mp4"))
|
||||||
|
|
||||||
|
result = mute_formatter.format(item)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
Reference in New Issue
Block a user