diff --git a/src/auto_archiver/core/media.py b/src/auto_archiver/core/media.py index 2cb6fc9..952a025 100644 --- a/src/auto_archiver/core/media.py +++ b/src/auto_archiver/core/media.py @@ -65,7 +65,7 @@ class Media: def is_stored(self, in_storage) -> bool: # checks if the media is already stored in the given storage - return len(self.urls) > 0 and any([u for u in self.urls if in_storage.get_cdn_url() in u]) + return len(self.urls) > 0 and any([u for u in self.urls if in_storage.get_cdn_url(self) in u]) def set(self, key: str, value: Any) -> Media: self.properties[key] = value diff --git a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py index b764f1d..cc9cf3d 100644 --- a/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py +++ b/src/auto_archiver/modules/gdrive_storage/gdrive_storage.py @@ -74,7 +74,8 @@ class GDriveStorage(Storage): parent_id = folder_id # get id of file inside folder (or sub folder) - file_id = self._get_id_from_parent_and_name(folder_id, filename) + # TODO: supressing the error as being checked before first upload + file_id = self._get_id_from_parent_and_name(folder_id, filename, raise_on_missing=False) return f"https://drive.google.com/file/d/{file_id}/view?usp=sharing" def upload(self, media: Media, **kwargs) -> bool: @@ -106,7 +107,13 @@ class GDriveStorage(Storage): # must be implemented even if unused def uploadf(self, file: IO[bytes], key: str, **kwargs: dict) -> bool: pass - def _get_id_from_parent_and_name(self, parent_id: str, name: str, retries: int = 1, sleep_seconds: int = 10, use_mime_type: bool = False, raise_on_missing: bool = True, use_cache=False): + def _get_id_from_parent_and_name(self, parent_id: str, + name: str, + retries: int = 1, + sleep_seconds: int = 10, + use_mime_type: bool = False, + raise_on_missing: bool = True, + use_cache=False): """ Retrieves the id of a folder or file from its @name and the @parent_id folder Optionally does multiple @retries and sleeps @sleep_seconds between them diff --git a/tests/storages/test_S3_storage.py b/tests/storages/test_S3_storage.py index 60b40e6..2594e73 100644 --- a/tests/storages/test_S3_storage.py +++ b/tests/storages/test_S3_storage.py @@ -1,103 +1,11 @@ from typing import Type import pytest -from unittest.mock import MagicMock, patch, PropertyMock +from unittest.mock import MagicMock, patch from auto_archiver.core import Media from auto_archiver.modules.hash_enricher import HashEnricher from auto_archiver.modules.s3_storage import s3_storage -@patch('boto3.client') -@pytest.fixture -def s3_store(setup_module): - config: dict = { - "path_generator": "flat", - "filename_generator": "static", - "bucket": "test-bucket", - "region": "test-region", - "key": "test-key", - "secret": "test-secret", - "random_no_duplicate": False, - "endpoint_url": "https://{region}.example.com", - "cdn_url": "https://cdn.example.com/{key}", - "private": False, - } - s3_storage = setup_module("s3_storage", config) - return s3_storage - -def test_client_initialization(s3_store): - """Test that S3 client is initialized with correct parameters""" - assert s3_store.s3 is not None - assert s3_store.s3.meta.region_name == 'test-region' - - -def test_get_cdn_url_generation(s3_store): - """Test CDN URL formatting """ - media = Media("test.txt") - media.key = "path/to/file.txt" - url = s3_store.get_cdn_url(media) - assert url == "https://cdn.example.com/path/to/file.txt" - media.key = "another/path.jpg" - assert s3_store.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg" - - -@patch.object(s3_storage.S3Storage, 'file_in_folder') -def test_skips_upload_when_duplicate_exists(mock_file_in_folder, s3_store): - """Test that upload skips when file_in_folder finds existing object""" - # Setup test-specific configuration - s3_store.random_no_duplicate = True - mock_file_in_folder.return_value = "existing_folder/existing_file.txt" - # Create test media with calculated hash - media = Media("test.txt") - media.key = "original_path.txt" - - # Mock hash calculation - with patch.object(s3_store, 'calculate_hash') as mock_calculate_hash: - mock_calculate_hash.return_value = "testhash123" - # Verify upload - assert s3_store.is_upload_needed(media) is False - assert media.key == "existing_folder/existing_file.txt" - assert media.get("previously archived") is True - - with patch.object(s3_store.s3, 'upload_fileobj') as mock_upload: - result = s3_store.uploadf(None, media) - mock_upload.assert_not_called() - assert result is True - -@patch.object(s3_storage.S3Storage, 'is_upload_needed') -def test_uploads_with_correct_parameters(mock_upload_needed, s3_store): - media = Media("test.txt") - mock_upload_needed.return_value = True - media.mimetype = 'image/png' - mock_file = MagicMock() - - with patch.object(s3_store.s3, 'upload_fileobj') as mock_upload: - s3_store.uploadf(mock_file, media) - - # Verify core upload parameters - mock_upload.assert_called_once_with( - mock_file, - Bucket='test-bucket', - # Key='original_key.txt', - Key=None, - ExtraArgs={ - 'ACL': 'public-read', - 'ContentType': 'image/png' - } - ) - - - - - - - - -# ============================================================ - - - - - class TestGDriveStorage: """ Test suite for GDriveStorage. @@ -121,10 +29,9 @@ class TestGDriveStorage: @patch('boto3.client') @pytest.fixture(autouse=True) def setup_storage(self, setup_module): - he = HashEnricher() self.storage = setup_module(self.module_name, self.config) - def test_client_initialization(self, setup_storage): + def test_client_initialization(self): """Test that S3 client is initialized with correct parameters""" assert self.storage.s3 is not None assert self.storage.s3.meta.region_name == 'test-region' @@ -138,37 +45,55 @@ class TestGDriveStorage: media.key = "another/path.jpg" assert self.storage.get_cdn_url(media) == "https://cdn.example.com/another/path.jpg" + def test_uploadf_sets_acl_public(self): + media = Media("test.txt") + mock_file = MagicMock() + with patch.object(self.storage.s3, 'upload_fileobj') as mock_s3_upload, \ + patch.object(self.storage, 'is_upload_needed', return_value=True): + self.storage.uploadf(mock_file, media) + mock_s3_upload.assert_called_once_with( + mock_file, + Bucket='test-bucket', + Key=media.key, + ExtraArgs={'ACL': 'public-read', 'ContentType': 'text/plain'} + ) + def test_upload_decision_logic(self): """Test is_upload_needed under different conditions""" media = Media("test.txt") - - # Test random_no_duplicate disabled + # Test default state (random_no_duplicate=False) assert self.storage.is_upload_needed(media) is True + # Set duplicate checking config to true: - # Test duplicate exists self.storage.random_no_duplicate = True - with patch.object(self.storage, 'file_in_folder', return_value='existing.txt'): + with patch('auto_archiver.modules.hash_enricher.HashEnricher.calculate_hash') as mock_calc_hash, \ + patch.object(self.storage, 'file_in_folder') as mock_file_in_folder: + mock_calc_hash.return_value = 'beepboop123beepboop123beepboop123' + mock_file_in_folder.return_value = 'existing_key.txt' + # Test duplicate result assert self.storage.is_upload_needed(media) is False - assert media.key == 'existing.txt' + assert media.key == 'existing_key.txt' + mock_file_in_folder.assert_called_with( + # (first 24 chars of hash) + 'no-dups/beepboop123beepboop123be' + ) + @patch.object(s3_storage.S3Storage, 'file_in_folder') def test_skips_upload_when_duplicate_exists(self, mock_file_in_folder): """Test that upload skips when file_in_folder finds existing object""" - # Setup test-specific configuration self.storage.random_no_duplicate = True mock_file_in_folder.return_value = "existing_folder/existing_file.txt" # Create test media with calculated hash media = Media("test.txt") media.key = "original_path.txt" - # Mock hash calculation - with patch.object(self.storage, 'calculate_hash') as mock_calculate_hash: - mock_calculate_hash.return_value = "testhash123" + with patch('auto_archiver.modules.hash_enricher.HashEnricher.calculate_hash') as mock_calculate_hash: + mock_calculate_hash.return_value = "beepboop123beepboop123beepboop123" # Verify upload assert self.storage.is_upload_needed(media) is False assert media.key == "existing_folder/existing_file.txt" assert media.get("previously archived") is True - with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload: result = self.storage.uploadf(None, media) mock_upload.assert_not_called() @@ -177,21 +102,25 @@ class TestGDriveStorage: @patch.object(s3_storage.S3Storage, 'is_upload_needed') def test_uploads_with_correct_parameters(self, mock_upload_needed): media = Media("test.txt") + media.key = "original_key.txt" mock_upload_needed.return_value = True media.mimetype = 'image/png' mock_file = MagicMock() with patch.object(self.storage.s3, 'upload_fileobj') as mock_upload: self.storage.uploadf(mock_file, media) - - # Verify core upload parameters + # verify call occured with these params mock_upload.assert_called_once_with( mock_file, Bucket='test-bucket', - # Key='original_key.txt', - Key=None, + Key='original_key.txt', ExtraArgs={ 'ACL': 'public-read', 'ContentType': 'image/png' } - ) \ No newline at end of file + ) + + def test_file_in_folder_exists(self): + with patch.object(self.storage.s3, 'list_objects') as mock_list_objects: + mock_list_objects.return_value = {'Contents': [{'Key': 'path/to/file.txt'}]} + assert self.storage.file_in_folder('path/to/') == 'path/to/file.txt' \ No newline at end of file