mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
Refactoring of storage code:
1. Fix some bugs in local_storage 2. Refactor unit tests to not set Media.key explicitly (unless it's well-known beforehand, which it isn't) 3. Limit length of URL for 'url' type path_generator 4. Throw an error if 'save_to' of local storage is too long 5. A few other tidyups
This commit is contained in:
@@ -14,8 +14,8 @@ def enricher(setup_module):
|
||||
def metadata_with_images():
|
||||
m = Metadata()
|
||||
m.set_url("https://example.com")
|
||||
m.add_media(Media(filename="image1.jpg", key="image1"))
|
||||
m.add_media(Media(filename="image2.jpg", key="image2"))
|
||||
m.add_media(Media(filename="image1.jpg", _key="image1"))
|
||||
m.add_media(Media(filename="image2.jpg", _key="image2"))
|
||||
return m
|
||||
|
||||
|
||||
|
||||
@@ -37,10 +37,10 @@ class TestS3Storage:
|
||||
def test_get_cdn_url_generation(self):
|
||||
"""Test CDN URL formatting """
|
||||
media = Media("test.txt")
|
||||
media.key = "path/to/file.txt"
|
||||
media._key = "path/to/file.txt"
|
||||
url = self.storage.get_cdn_url(media)
|
||||
assert url == "https://cdn.example.com/path/to/file.txt"
|
||||
media.key = "another/path.jpg"
|
||||
media._key = "another/path.jpg"
|
||||
assert self.storage.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg"
|
||||
|
||||
def test_uploadf_sets_acl_public(self, mocker):
|
||||
@@ -72,7 +72,7 @@ class TestS3Storage:
|
||||
self.storage.random_no_duplicate = True
|
||||
mock_file_in_folder = mocker.patch.object(S3Storage, 'file_in_folder', return_value="existing_folder/existing_file.txt")
|
||||
media = Media("test.txt")
|
||||
media.key = "original_path.txt"
|
||||
media._key = "original_path.txt"
|
||||
mock_calculate_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value="beepboop123beepboop123beepboop123")
|
||||
assert self.storage.is_upload_needed(media) is False
|
||||
assert media.key == "existing_folder/existing_file.txt"
|
||||
@@ -84,7 +84,7 @@ class TestS3Storage:
|
||||
|
||||
def test_uploads_with_correct_parameters(self, mocker):
|
||||
media = Media("test.txt")
|
||||
media.key = "original_key.txt"
|
||||
media._key = "original_key.txt"
|
||||
mocker.patch.object(S3Storage, 'is_upload_needed', return_value=True)
|
||||
media.mimetype = 'image/png'
|
||||
mock_file = mocker.MagicMock()
|
||||
|
||||
@@ -44,7 +44,7 @@ def media(tmp_path) -> Media:
|
||||
file_path.write_bytes(content)
|
||||
media = Media(filename=str(file_path))
|
||||
media.properties = {"something": "Title"}
|
||||
media.key = "key"
|
||||
media._key = "key"
|
||||
return media
|
||||
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ def test_get_id_from_parent_and_name(gdrive_storage, mocker):
|
||||
|
||||
def test_path_parts():
|
||||
media = Media(filename="test.jpg")
|
||||
media.key = "folder1/folder2/test.jpg"
|
||||
media._key = "folder1/folder2/test.jpg"
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -4,9 +4,9 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Media
|
||||
from auto_archiver.core import Media, Metadata
|
||||
from auto_archiver.modules.local_storage import LocalStorage
|
||||
|
||||
from auto_archiver.core.consts import SetupError
|
||||
|
||||
@pytest.fixture
|
||||
def local_storage(setup_module, tmp_path) -> LocalStorage:
|
||||
@@ -25,37 +25,32 @@ def sample_media(tmp_path) -> Media:
|
||||
"""Fixture creating a Media object with temporary source file"""
|
||||
src_file = tmp_path / "source.txt"
|
||||
src_file.write_text("test content")
|
||||
return Media(key="subdir/test.txt", filename=str(src_file))
|
||||
return Media(filename=str(src_file))
|
||||
|
||||
def test_really_long_website_url_save(local_storage, tmp_path):
|
||||
long_filename = os.path.join(local_storage.save_to, "file"*100 + ".txt")
|
||||
src_file = tmp_path / "source.txt"
|
||||
src_file.write_text("test content")
|
||||
media = Media(key=long_filename, filename=str(src_file))
|
||||
assert local_storage.upload(media) is True
|
||||
assert src_file.read_text() == Path(local_storage.get_cdn_url(media)).read_text()
|
||||
def test_too_long_save_path(setup_module):
|
||||
with pytest.raises(SetupError):
|
||||
setup_module("local_storage", {"save_to": "long"*100})
|
||||
|
||||
def test_get_cdn_url_relative(local_storage):
|
||||
media = Media(key="test.txt", filename="dummy.txt")
|
||||
media = Media(filename="dummy.txt")
|
||||
local_storage.set_key(media, "https://example.com", Metadata())
|
||||
expected = os.path.join(local_storage.save_to, media.key)
|
||||
assert local_storage.get_cdn_url(media) == expected
|
||||
|
||||
|
||||
|
||||
def test_get_cdn_url_absolute(local_storage):
|
||||
media = Media(key="test.txt", filename="dummy.txt")
|
||||
media = Media(filename="dummy.txt")
|
||||
local_storage.save_absolute = True
|
||||
local_storage.set_key(media, "https://example.com", Metadata())
|
||||
expected = os.path.abspath(os.path.join(local_storage.save_to, media.key))
|
||||
assert local_storage.get_cdn_url(media) == expected
|
||||
|
||||
def test_upload_file_contents_and_metadata(local_storage, sample_media):
|
||||
local_storage.store(sample_media, "https://example.com", Metadata())
|
||||
dest = os.path.join(local_storage.save_to, sample_media.key)
|
||||
assert local_storage.upload(sample_media) is True
|
||||
assert Path(sample_media.filename).read_text() == Path(dest).read_text()
|
||||
|
||||
|
||||
def test_upload_nonexistent_source(local_storage):
|
||||
media = Media(key="missing.txt", filename="nonexistent.txt")
|
||||
media = Media(_key="missing.txt", filename="nonexistent.txt")
|
||||
with pytest.raises(FileNotFoundError):
|
||||
local_storage.upload(media)
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ def storage_base():
|
||||
|
||||
],
|
||||
)
|
||||
def test_storage_setup(storage_base, path_generator, filename_generator, url, expected_key, mocker):
|
||||
def test_storage_name_generation(storage_base, path_generator, filename_generator, url, expected_key, mocker):
|
||||
mock_random = mocker.patch("auto_archiver.core.storage.random_str")
|
||||
mock_random.return_value = "pretend-random"
|
||||
|
||||
@@ -92,7 +92,4 @@ def test_really_long_name(storage_base):
|
||||
url = f"https://example.com/{'file'*100}"
|
||||
media = Media(filename="dummy.txt")
|
||||
storage.set_key(media, url, Metadata())
|
||||
assert len(media.key) <= storage.max_file_length()
|
||||
assert media.key == "https-example-com-filefilefilefilefilefilefilefilefilefilefilefile\
|
||||
filefilefilefilefilefilefilefilefilefilefilefilefilefilefilefilefilefilefilefilefilefilefile\
|
||||
filefilefilefilefilefilefilefilefilefilefilefilefilefilefilefilefile/6ae8a75555209fd6c44157c0.txt"
|
||||
assert media.key == f"https-example-com-{'file'*13}/6ae8a75555209fd6c44157c0.txt"
|
||||
Reference in New Issue
Block a user