Files
auto-archiver/tests/test_none_filename_handling.py
2026-03-02 17:01:16 +00:00

260 lines
9.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Tests for handling Media objects with None filename.
When download_from_url fails, it returns None. Various enrichers and
the metadata deduplication logic must gracefully handle Media objects
where filename is None, rather than crashing with TypeError.
"""
from datetime import datetime, timezone
from unittest.mock import MagicMock
import pytest
from auto_archiver.core.metadata import Metadata, Media
from auto_archiver.modules.hash_enricher import HashEnricher
from auto_archiver.modules.meta_enricher import MetaEnricher
# ── HashEnricher ──────────────────────────────────────────────────────
class TestHashEnricherNoneFilename:
"""hash_enricher should skip media with None filename without crashing."""
@pytest.fixture(autouse=True)
def setup(self, setup_module):
self.enricher = setup_module(HashEnricher, {"algorithm": "SHA-256", "chunksize": 100})
def test_skips_none_filename(self):
m = Metadata().set_url("https://example.com")
media = Media(filename=None)
media.set("src", "https://example.com/video.mp4")
m.add_media(media)
# Should not raise
self.enricher.enrich(m)
# No hash should be set
assert m.media[0].get("hash") is None
def test_hashes_valid_skips_none(self, tmp_path):
"""Mix of valid and None-filename media: only valid ones get hashed."""
valid_file = tmp_path / "test.txt"
valid_file.write_text("hello world")
m = Metadata().set_url("https://example.com")
m.add_media(Media(filename=str(valid_file)))
m.add_media(Media(filename=None))
self.enricher.enrich(m)
assert m.media[0].get("hash") is not None
assert m.media[1].get("hash") is None
def test_all_none_filenames(self):
"""All media have None filename enricher should not crash."""
m = Metadata().set_url("https://example.com")
m.add_media(Media(filename=None))
m.add_media(Media(filename=None))
self.enricher.enrich(m)
assert len(m.media) == 2
for media in m.media:
assert media.get("hash") is None
# ── MetaEnricher ──────────────────────────────────────────────────────
class TestMetaEnricherNoneFilename:
"""meta_enricher should skip media with None filename without crashing."""
@pytest.fixture(autouse=True)
def setup(self, setup_module):
self.enricher = setup_module(MetaEnricher, {})
def test_skips_none_filename(self):
m = Metadata().set_url("https://example.com")
m.set("_processed_at", datetime.now(timezone.utc))
media = Media(filename=None)
media.set("src", "https://example.com/video.mp4")
m.add_media(media)
# Should not raise
self.enricher.enrich(m)
assert m.get("total_bytes") == 0
def test_sizes_valid_skips_none(self, tmp_path):
"""Mix of valid and None-filename media: only valid ones get sized."""
valid_file = tmp_path / "test.txt"
valid_file.write_text("A" * 500)
m = Metadata().set_url("https://example.com")
m.set("_processed_at", datetime.now(timezone.utc))
m.add_media(Media(filename=str(valid_file)))
m.add_media(Media(filename=None))
self.enricher.enrich(m)
assert m.media[0].get("bytes") == 500
assert m.media[1].get("bytes") is None
assert m.get("total_bytes") == 500
# ── Metadata.remove_duplicate_media_by_hash ───────────────────────────
class TestRemoveDuplicateMediaNoneFilename:
"""remove_duplicate_media_by_hash should keep media with None filename."""
def test_none_filename_kept(self):
m = Metadata().set_url("https://example.com")
none_media = Media(filename=None)
none_media.set("src", "https://example.com/video.mp4")
m.add_media(none_media)
m.remove_duplicate_media_by_hash()
assert len(m.media) == 1
assert m.media[0].filename is None
def test_none_and_valid_mixed(self, tmp_path):
"""None-filename media is kept alongside valid-filename media."""
valid_file = tmp_path / "test.txt"
valid_file.write_text("content")
m = Metadata().set_url("https://example.com")
m.add_media(Media(filename=str(valid_file)))
none_media = Media(filename=None)
none_media.set("src", "https://example.com/video.mp4")
m.add_media(none_media)
m.remove_duplicate_media_by_hash()
assert len(m.media) == 2
def test_multiple_none_filename_all_kept(self):
"""Multiple None-filename media are all kept (can't deduplicate without file)."""
m = Metadata().set_url("https://example.com")
m.add_media(Media(filename=None))
m.add_media(Media(filename=None))
m.remove_duplicate_media_by_hash()
assert len(m.media) == 2
# ── Twitter dropin create_metadata ────────────────────────────────────
class TestTwitterDropinNoneFilename:
"""Twitter dropin should skip media when download_from_url returns None."""
@pytest.fixture
def twitter_dropin(self):
from auto_archiver.modules.generic_extractor.twitter import Twitter
return Twitter()
def test_create_metadata_skips_failed_photo_download(self, twitter_dropin):
"""When download_from_url returns None for a photo, it's not added to media."""
tweet = {
"user": {"name": "Test User"},
"created_at": "Sun Feb 08 18:45:00 +0000 2026",
"full_text": "Test tweet with photo",
"entities": {
"media": [
{"type": "photo", "media_url_https": "https://pbs.twimg.com/media/test.jpg"},
]
},
}
mock_archiver = MagicMock()
mock_archiver.download_from_url.return_value = None # simulate failed download
result = twitter_dropin.create_metadata(tweet, None, mock_archiver, "https://x.com/test/status/123")
# The result should have no media since the download failed
assert len(result.media) == 0
def test_create_metadata_skips_failed_video_download(self, twitter_dropin):
"""When download_from_url returns None for a video, it's not added to media."""
tweet = {
"user": {"name": "Test User"},
"created_at": "Sun Feb 08 18:45:00 +0000 2026",
"full_text": "Test tweet with video",
"entities": {
"media": [
{
"type": "video",
"video_info": {
"variants": [
{
"url": "https://video.twimg.com/vid/1280x720/test.mp4",
"content_type": "video/mp4",
},
]
},
},
]
},
}
mock_archiver = MagicMock()
mock_archiver.download_from_url.return_value = None
result = twitter_dropin.create_metadata(tweet, None, mock_archiver, "https://x.com/test/status/123")
assert len(result.media) == 0
def test_create_metadata_keeps_successful_download(self, twitter_dropin, tmp_path):
"""When download_from_url succeeds, media is added."""
tweet = {
"user": {"name": "Test User"},
"created_at": "Sun Feb 08 18:45:00 +0000 2026",
"full_text": "Test tweet with photo",
"entities": {
"media": [
{"type": "photo", "media_url_https": "https://pbs.twimg.com/media/test.jpg"},
]
},
}
test_file = tmp_path / "test.jpg"
test_file.write_text("fake image data")
mock_archiver = MagicMock()
mock_archiver.download_from_url.return_value = str(test_file)
result = twitter_dropin.create_metadata(tweet, None, mock_archiver, "https://x.com/test/status/123")
assert len(result.media) == 1
assert result.media[0].filename == str(test_file)
def test_create_metadata_mixed_downloads(self, twitter_dropin, tmp_path):
"""One download succeeds, one fails only successful one is kept."""
tweet = {
"user": {"name": "Test User"},
"created_at": "Sun Feb 08 18:45:00 +0000 2026",
"full_text": "Test tweet with two photos",
"entities": {
"media": [
{"type": "photo", "media_url_https": "https://pbs.twimg.com/media/test1.jpg"},
{"type": "photo", "media_url_https": "https://pbs.twimg.com/media/test2.jpg"},
]
},
}
test_file = tmp_path / "test1.jpg"
test_file.write_text("fake image data")
mock_archiver = MagicMock()
# First call succeeds, second fails
mock_archiver.download_from_url.side_effect = [str(test_file), None]
result = twitter_dropin.create_metadata(tweet, None, mock_archiver, "https://x.com/test/status/123")
assert len(result.media) == 1
assert result.media[0].filename == str(test_file)