Merge branch 'main' into linting_etc

# Conflicts:
#	src/auto_archiver/core/consts.py
#	src/auto_archiver/core/orchestrator.py
#	src/auto_archiver/core/storage.py
#	src/auto_archiver/modules/local_storage/local_storage.py
#	src/auto_archiver/modules/s3_storage/s3_storage.py
#	tests/storages/test_S3_storage.py
#	tests/storages/test_local_storage.py
#	tests/storages/test_storage_base.py
This commit is contained in:
erinhmclark
2025-03-11 10:39:47 +00:00
19 changed files with 614 additions and 320 deletions

View File

@@ -8,7 +8,6 @@ class TestS3Storage:
"""
Test suite for S3Storage.
"""
module_name: str = "s3_storage"
storage: Type[S3Storage]
config: dict = {
@@ -33,28 +32,28 @@ class TestS3Storage:
"""Test that S3 client is initialized with correct parameters"""
assert self.storage.s3 is not None
assert self.storage.s3.meta.region_name == "test-region"
assert self.storage.s3.meta.region_name == 'test-region'
def test_get_cdn_url_generation(self):
"""Test CDN URL formatting"""
"""Test CDN URL formatting """
media = Media("test.txt")
media.key = "path/to/file.txt"
media._key = "path/to/file.txt"
url = self.storage.get_cdn_url(media)
assert url == "https://cdn.example.com/path/to/file.txt"
media.key = "another/path.jpg"
media._key = "another/path.jpg"
assert self.storage.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg"
def test_uploadf_sets_acl_public(self, mocker):
media = Media("test.txt")
mock_file = mocker.MagicMock()
mock_s3_upload = mocker.patch.object(self.storage.s3, "upload_fileobj")
mocker.patch.object(self.storage, "is_upload_needed", return_value=True)
mock_s3_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
mocker.patch.object(self.storage, 'is_upload_needed', return_value=True)
self.storage.uploadf(mock_file, media)
mock_s3_upload.assert_called_once_with(
mock_file,
Bucket="test-bucket",
Bucket='test-bucket',
Key=media.key,
ExtraArgs={"ACL": "public-read", "ContentType": "text/plain"},
ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'}
)
def test_upload_decision_logic(self, mocker):
@@ -62,52 +61,45 @@ class TestS3Storage:
media = Media("test.txt")
assert self.storage.is_upload_needed(media) is True
self.storage.random_no_duplicate = True
mock_calc_hash = mocker.patch(
"auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash",
return_value="beepboop123beepboop123beepboop123",
)
mock_file_in_folder = mocker.patch.object(self.storage, "file_in_folder", return_value="existing_key.txt")
mock_calc_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value='beepboop123beepboop123beepboop123')
mock_file_in_folder = mocker.patch.object(self.storage, 'file_in_folder', return_value='existing_key.txt')
assert self.storage.is_upload_needed(media) is False
assert media.key == "existing_key.txt"
mock_file_in_folder.assert_called_with("no-dups/beepboop123beepboop123be")
assert media.key == 'existing_key.txt'
mock_file_in_folder.assert_called_with('no-dups/beepboop123beepboop123be')
def test_skips_upload_when_duplicate_exists(self, mocker):
"""Test that upload skips when file_in_folder finds existing object"""
self.storage.random_no_duplicate = True
mock_file_in_folder = mocker.patch.object(
S3Storage, "file_in_folder", return_value="existing_folder/existing_file.txt"
)
mock_file_in_folder = mocker.patch.object(S3Storage, 'file_in_folder', return_value="existing_folder/existing_file.txt")
media = Media("test.txt")
media.key = "original_path.txt"
mock_calculate_hash = mocker.patch(
"auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash",
return_value="beepboop123beepboop123beepboop123",
)
media._key = "original_path.txt"
mock_calculate_hash = mocker.patch('auto_archiver.modules.s3_storage.s3_storage.calculate_file_hash', return_value="beepboop123beepboop123beepboop123")
assert self.storage.is_upload_needed(media) is False
assert media.key == "existing_folder/existing_file.txt"
assert media.get("previously archived") is True
mock_upload = mocker.patch.object(self.storage.s3, "upload_fileobj")
mock_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
result = self.storage.uploadf(None, media)
mock_upload.assert_not_called()
assert result is True
def test_uploads_with_correct_parameters(self, mocker):
media = Media("test.txt")
media.key = "original_key.txt"
mocker.patch.object(S3Storage, "is_upload_needed", return_value=True)
media.mimetype = "image/png"
media._key = "original_key.txt"
mocker.patch.object(S3Storage, 'is_upload_needed', return_value=True)
media.mimetype = 'image/png'
mock_file = mocker.MagicMock()
mock_upload = mocker.patch.object(self.storage.s3, "upload_fileobj")
mock_upload = mocker.patch.object(self.storage.s3, 'upload_fileobj')
self.storage.uploadf(mock_file, media)
mock_upload.assert_called_once_with(
mock_file,
Bucket="test-bucket",
Key="original_key.txt",
ExtraArgs={"ACL": "public-read", "ContentType": "image/png"},
Bucket='test-bucket',
Key='original_key.txt',
ExtraArgs={
'ACL': 'public-read',
'ContentType': 'image/png'
}
)
def test_file_in_folder_exists(self, mocker):
mock_list_objects = mocker.patch.object(
self.storage.s3, "list_objects", return_value={"Contents": [{"Key": "path/to/file.txt"}]}
)
assert self.storage.file_in_folder("path/to/") == "path/to/file.txt"
mock_list_objects = mocker.patch.object(self.storage.s3, 'list_objects', return_value={'Contents': [{'Key': 'path/to/file.txt'}]})
assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt'

View File

@@ -44,7 +44,7 @@ def media(tmp_path) -> Media:
file_path.write_bytes(content)
media = Media(filename=str(file_path))
media.properties = {"something": "Title"}
media.key = "key"
media._key = "key"
return media

View File

@@ -53,7 +53,7 @@ def test_get_id_from_parent_and_name(gdrive_storage, mocker):
def test_path_parts():
media = Media(filename="test.jpg")
media.key = "folder1/folder2/test.jpg"
media._key = "folder1/folder2/test.jpg"
@pytest.mark.skip(reason="Requires real credentials")

View File

@@ -1,15 +1,17 @@
import os
from pathlib import Path
import pytest
from auto_archiver.core import Media
from auto_archiver.core import Media, Metadata
from auto_archiver.modules.local_storage import LocalStorage
from auto_archiver.core.consts import SetupError
@pytest.fixture
def local_storage(setup_module, tmp_path) -> LocalStorage:
save_to = tmp_path / "local_archive"
save_to.mkdir()
configs: dict = {
"path_generator": "flat",
"filename_generator": "static",
@@ -18,35 +20,41 @@ def local_storage(setup_module, tmp_path) -> LocalStorage:
}
return setup_module("local_storage", configs)
@pytest.fixture
def sample_media(tmp_path) -> Media:
"""Fixture creating a Media object with temporary source file"""
src_file = tmp_path / "source.txt"
src_file.write_text("test content")
return Media(key="subdir/test.txt", filename=str(src_file))
return Media(filename=str(src_file))
def test_too_long_save_path(setup_module):
with pytest.raises(SetupError):
setup_module("local_storage", {"save_to": "long"*100})
def test_get_cdn_url_relative(local_storage):
media = Media(key="test.txt", filename="dummy.txt")
local_storage.filename_generator = "random"
media = Media(filename="dummy.txt")
local_storage.set_key(media, "https://example.com", Metadata())
expected = os.path.join(local_storage.save_to, media.key)
assert local_storage.get_cdn_url(media) == expected
def test_get_cdn_url_absolute(local_storage):
media = Media(key="test.txt", filename="dummy.txt")
local_storage.filename_generator = "random"
media = Media(filename="dummy.txt")
local_storage.save_absolute = True
local_storage.set_key(media, "https://example.com", Metadata())
expected = os.path.abspath(os.path.join(local_storage.save_to, media.key))
assert local_storage.get_cdn_url(media) == expected
def test_upload_file_contents_and_metadata(local_storage, sample_media):
local_storage.store(sample_media, "https://example.com", Metadata())
dest = os.path.join(local_storage.save_to, sample_media.key)
assert local_storage.upload(sample_media) is True
assert Path(sample_media.filename).read_text() == Path(dest).read_text()
def test_upload_nonexistent_source(local_storage):
media = Media(key="missing.txt", filename="nonexistent.txt")
media = Media(_key="missing.txt", filename="nonexistent.txt")
with pytest.raises(FileNotFoundError):
local_storage.upload(media)

View File

@@ -2,16 +2,94 @@ from typing import Type
import pytest
from auto_archiver.core.metadata import Metadata
from auto_archiver.core.metadata import Metadata, Media
from auto_archiver.core.storage import Storage
from auto_archiver.core.module import ModuleFactory
class TestStorageBase(object):
module_name: str = None
config: dict = None
@pytest.fixture(autouse=True)
def setup_storage(self, setup_module):
assert self.module_name is not None, "self.module_name must be set on the subclass"
assert (
self.module_name is not None
), "self.module_name must be set on the subclass"
assert self.config is not None, "self.config must be a dict set on the subclass"
self.storage: Type[Storage] = setup_module(self.module_name, self.config)
self.storage: Type[Storage] = setup_module(
self.module_name, self.config
)
class TestBaseStorage(Storage):
name = "test_storage"
def get_cdn_url(self, media):
return "cdn_url"
def uploadf(self, file, key, **kwargs):
return True
@pytest.fixture
def dummy_file(tmp_path):
# create dummy.txt file
dummy_file = tmp_path / "dummy.txt"
dummy_file.write_text("test content")
return str(dummy_file)
@pytest.fixture
def storage_base():
def _storage_base(config):
storage_base = TestBaseStorage()
storage_base.config_setup({TestBaseStorage.name : config})
storage_base.module_factory = ModuleFactory()
return storage_base
return _storage_base
@pytest.mark.parametrize(
"path_generator, filename_generator, url, expected_key",
[
("flat", "static", "https://example.com/file/", "folder/6ae8a75555209fd6c44157c0.txt"),
("flat", "random", "https://example.com/file/", "folder/pretend-random.txt"),
("url", "static", "https://example.com/file/", "folder/https-example-com-file/6ae8a75555209fd6c44157c0.txt"),
("url", "random", "https://example.com/file/", "folder/https-example-com-file/pretend-random.txt"),
("random", "static", "https://example.com/file/", "folder/pretend-random/6ae8a75555209fd6c44157c0.txt"),
("random", "random", "https://example.com/file/", "folder/pretend-random/pretend-random.txt"),
],
)
def test_storage_name_generation(storage_base, path_generator, filename_generator, url,
expected_key, mocker, tmp_path, dummy_file):
mock_random = mocker.patch("auto_archiver.core.storage.random_str")
mock_random.return_value = "pretend-random"
config: dict = {
"path_generator": path_generator,
"filename_generator": filename_generator,
}
storage: Storage = storage_base(config)
assert storage.path_generator == path_generator
assert storage.filename_generator == filename_generator
metadata = Metadata()
metadata.set_context("folder", "folder")
media = Media(filename=dummy_file)
storage.set_key(media, url, metadata)
print(media.key)
assert media.key == expected_key
def test_really_long_name(storage_base, dummy_file):
config: dict = {
"path_generator": "url",
"filename_generator": "static",
}
storage: Storage = storage_base(config)
url = f"https://example.com/{'file'*100}"
media = Media(filename=dummy_file)
storage.set_key(media, url, Metadata())
assert media.key == f"https-example-com-{'file'*13}/6ae8a75555209fd6c44157c0.txt"