mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
Ruff format with defaults.
This commit is contained in:
@@ -36,29 +36,45 @@ def atlos_feeder(setup_module, mocker) -> AtlosFeeder:
|
||||
@pytest.fixture
|
||||
def mock_atlos_api(atlos_feeder):
|
||||
"""Fixture to update the atlos_feeder.session.get side_effect."""
|
||||
|
||||
def _mock_responses(responses):
|
||||
atlos_feeder.session.get.side_effect = [FakeAPIResponse(data) for data in responses]
|
||||
|
||||
return _mock_responses
|
||||
|
||||
|
||||
def test_atlos_feeder_iter_yields_valid_metadata(atlos_feeder, mock_atlos_api):
|
||||
"""Test valid items are yielded and invalid ones ignored."""
|
||||
mock_atlos_api([
|
||||
{
|
||||
"next": None,
|
||||
"results": [
|
||||
{"source_url": "http://example.com", "id": 1,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
{"source_url": "", "id": 2,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
{"source_url": "http://example.org", "id": 3,
|
||||
"metadata": {"auto_archiver": {"processed": True}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
],
|
||||
}
|
||||
])
|
||||
mock_atlos_api(
|
||||
[
|
||||
{
|
||||
"next": None,
|
||||
"results": [
|
||||
{
|
||||
"source_url": "http://example.com",
|
||||
"id": 1,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible",
|
||||
"status": "complete",
|
||||
},
|
||||
{
|
||||
"source_url": "",
|
||||
"id": 2,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible",
|
||||
"status": "complete",
|
||||
},
|
||||
{
|
||||
"source_url": "http://example.org",
|
||||
"id": 3,
|
||||
"metadata": {"auto_archiver": {"processed": True}},
|
||||
"visibility": "visible",
|
||||
"status": "complete",
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
items = list(atlos_feeder)
|
||||
assert len(items) == 1
|
||||
@@ -68,24 +84,34 @@ def test_atlos_feeder_iter_yields_valid_metadata(atlos_feeder, mock_atlos_api):
|
||||
|
||||
def test_atlos_feeder_multiple_pages(atlos_feeder, mock_atlos_api):
|
||||
"""Test iteration over multiple pages with valid items."""
|
||||
mock_atlos_api([
|
||||
{
|
||||
"next": "cursor2",
|
||||
"results": [
|
||||
{"source_url": "http://example1.com", "id": 10,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"next": None,
|
||||
"results": [
|
||||
{"source_url": "http://example2.com", "id": 20,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible", "status": "complete"},
|
||||
],
|
||||
},
|
||||
])
|
||||
mock_atlos_api(
|
||||
[
|
||||
{
|
||||
"next": "cursor2",
|
||||
"results": [
|
||||
{
|
||||
"source_url": "http://example1.com",
|
||||
"id": 10,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible",
|
||||
"status": "complete",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"next": None,
|
||||
"results": [
|
||||
{
|
||||
"source_url": "http://example2.com",
|
||||
"id": 20,
|
||||
"metadata": {"auto_archiver": {"processed": False}},
|
||||
"visibility": "visible",
|
||||
"status": "complete",
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
items = list(atlos_feeder)
|
||||
assert len(items) == 2
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def headerless_csv_file():
|
||||
return "tests/data/csv_no_headers.csv"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def header_csv_file():
|
||||
return "tests/data/csv_with_headers.csv"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def header_csv_file_non_default_column():
|
||||
return "tests/data/csv_with_headers_non_default_column.csv"
|
||||
@@ -23,6 +26,7 @@ def test_csv_feeder_no_headers(headerless_csv_file, setup_module):
|
||||
assert urls[0].get_url() == "https://example.com/1/"
|
||||
assert urls[1].get_url() == "https://example.com/2/"
|
||||
|
||||
|
||||
def test_csv_feeder_with_headers(header_csv_file, setup_module):
|
||||
from auto_archiver.modules.csv_feeder.csv_feeder import CSVFeeder
|
||||
|
||||
@@ -33,10 +37,10 @@ def test_csv_feeder_with_headers(header_csv_file, setup_module):
|
||||
assert urls[0].get_url() == "https://example.com/1/"
|
||||
assert urls[1].get_url() == "https://example.com/2/"
|
||||
|
||||
|
||||
def test_csv_feeder_wrong_column(header_csv_file, setup_module, caplog):
|
||||
from auto_archiver.modules.csv_feeder.csv_feeder import CSVFeeder
|
||||
|
||||
|
||||
with caplog.at_level("WARNING"):
|
||||
feeder = setup_module(CSVFeeder, {"files": [header_csv_file], "column": 1})
|
||||
urls = list(feeder)
|
||||
@@ -54,4 +58,4 @@ def test_csv_feeder_column_by_name(header_csv_file, setup_module):
|
||||
urls = list(feeder)
|
||||
assert len(urls) == 2
|
||||
assert urls[0].get_url() == "https://example.com/1/"
|
||||
assert urls[1].get_url() == "https://example.com/2/"
|
||||
assert urls[1].get_url() == "https://example.com/2/"
|
||||
|
||||
@@ -19,35 +19,32 @@ def test_setup_without_sheet_and_sheet_id(setup_module, mocker):
|
||||
@pytest.fixture
|
||||
def gsheet_feeder(setup_module, mocker) -> GsheetsFeederDB:
|
||||
config: dict = {
|
||||
"service_account": "dummy.json",
|
||||
"sheet": "test-auto-archiver",
|
||||
"sheet_id": None,
|
||||
"header": 1,
|
||||
"columns": {
|
||||
"url": "link",
|
||||
"status": "archive status",
|
||||
"folder": "destination folder",
|
||||
"archive": "archive location",
|
||||
"date": "archive date",
|
||||
"thumbnail": "thumbnail",
|
||||
"timestamp": "upload timestamp",
|
||||
"title": "upload title",
|
||||
"text": "text content",
|
||||
"screenshot": "screenshot",
|
||||
"hash": "hash",
|
||||
"pdq_hash": "perceptual hashes",
|
||||
"wacz": "wacz",
|
||||
"replaywebpage": "replaywebpage",
|
||||
},
|
||||
"allow_worksheets": set(),
|
||||
"block_worksheets": set(),
|
||||
"use_sheet_names_in_stored_paths": True,
|
||||
}
|
||||
"service_account": "dummy.json",
|
||||
"sheet": "test-auto-archiver",
|
||||
"sheet_id": None,
|
||||
"header": 1,
|
||||
"columns": {
|
||||
"url": "link",
|
||||
"status": "archive status",
|
||||
"folder": "destination folder",
|
||||
"archive": "archive location",
|
||||
"date": "archive date",
|
||||
"thumbnail": "thumbnail",
|
||||
"timestamp": "upload timestamp",
|
||||
"title": "upload title",
|
||||
"text": "text content",
|
||||
"screenshot": "screenshot",
|
||||
"hash": "hash",
|
||||
"pdq_hash": "perceptual hashes",
|
||||
"wacz": "wacz",
|
||||
"replaywebpage": "replaywebpage",
|
||||
},
|
||||
"allow_worksheets": set(),
|
||||
"block_worksheets": set(),
|
||||
"use_sheet_names_in_stored_paths": True,
|
||||
}
|
||||
mocker.patch("gspread.service_account")
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder_db",
|
||||
config
|
||||
)
|
||||
feeder = setup_module("gsheet_feeder_db", config)
|
||||
feeder.gsheets_client = mocker.MagicMock()
|
||||
return feeder
|
||||
|
||||
@@ -128,9 +125,7 @@ def test__set_metadata_with_folder(gsheet_feeder: GsheetsFeederDB):
|
||||
(None, "ABC123", "open_by_key", "ABC123", "opening by sheet ID"),
|
||||
],
|
||||
)
|
||||
def test_open_sheet_with_name_or_id(
|
||||
setup_module, sheet, sheet_id, expected_method, expected_arg, description, mocker
|
||||
):
|
||||
def test_open_sheet_with_name_or_id(setup_module, sheet, sheet_id, expected_method, expected_arg, description, mocker):
|
||||
"""Ensure open_sheet() correctly opens by name or ID based on configuration."""
|
||||
mock_service_account = mocker.patch("gspread.service_account")
|
||||
mock_client = mocker.MagicMock()
|
||||
@@ -145,9 +140,7 @@ def test_open_sheet_with_name_or_id(
|
||||
)
|
||||
sheet_result = feeder.open_sheet()
|
||||
# Validate the correct method was called
|
||||
getattr(mock_client, expected_method).assert_called_once_with(
|
||||
expected_arg
|
||||
), f"Failed: {description}"
|
||||
getattr(mock_client, expected_method).assert_called_once_with(expected_arg), f"Failed: {description}"
|
||||
assert sheet_result == "MockSheet", f"Failed: {description}"
|
||||
|
||||
|
||||
@@ -220,9 +213,7 @@ class TestGSheetsFeederReal:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_feeder(self, setup_module):
|
||||
assert (
|
||||
self.module_name is not None
|
||||
), "self.module_name must be set on the subclass"
|
||||
assert self.module_name is not None, "self.module_name must be set on the subclass"
|
||||
assert self.config is not None, "self.config must be a dict set on the subclass"
|
||||
self.feeder: Type[Feeder] = setup_module(self.module_name, self.config)
|
||||
|
||||
@@ -241,9 +232,7 @@ class TestGSheetsFeederReal:
|
||||
"""Ensure open_sheet() connects to a real Google Sheets instance."""
|
||||
sheet = self.feeder.open_sheet()
|
||||
assert sheet is not None, "open_sheet() should return a valid sheet instance"
|
||||
assert hasattr(
|
||||
sheet, "worksheets"
|
||||
), "Returned object should have worksheets method"
|
||||
assert hasattr(sheet, "worksheets"), "Returned object should have worksheets method"
|
||||
|
||||
def test_iter_yields_metadata_real_data(self):
|
||||
"""Ensure __iter__() yields Metadata objects for real test sheet data."""
|
||||
|
||||
@@ -81,40 +81,27 @@ class TestGWorksheet:
|
||||
(False, ""),
|
||||
],
|
||||
)
|
||||
def test_get_cell_or_default_handles_empty_values(
|
||||
self, mock_worksheet, when_empty, expected
|
||||
):
|
||||
def test_get_cell_or_default_handles_empty_values(self, mock_worksheet, when_empty, expected):
|
||||
mock_worksheet.get_values.return_value[1][0] = "" # Empty URL cell
|
||||
g = GWorksheet(mock_worksheet)
|
||||
assert (
|
||||
g.get_cell_or_default(
|
||||
2, "url", default="default", when_empty_use_default=when_empty
|
||||
)
|
||||
== expected
|
||||
)
|
||||
assert g.get_cell_or_default(2, "url", default="default", when_empty_use_default=when_empty) == expected
|
||||
|
||||
def test_get_cell_or_default_handles_missing_columns(self, gworksheet):
|
||||
assert (
|
||||
gworksheet.get_cell_or_default(1, "invalid_col", default="safe") == "safe"
|
||||
)
|
||||
assert gworksheet.get_cell_or_default(1, "invalid_col", default="safe") == "safe"
|
||||
|
||||
# Test write operations
|
||||
def test_set_cell_updates_correct_position(self, mock_worksheet, gworksheet):
|
||||
gworksheet.set_cell(2, "url", "new_url")
|
||||
mock_worksheet.update_cell.assert_called_once_with(2, 1, "new_url")
|
||||
|
||||
def test_batch_set_cell_formats_requests_correctly(
|
||||
self, mock_worksheet, gworksheet
|
||||
):
|
||||
def test_batch_set_cell_formats_requests_correctly(self, mock_worksheet, gworksheet):
|
||||
updates = [(2, "url", "new_url"), (3, "status", "processed")]
|
||||
gworksheet.batch_set_cell(updates)
|
||||
expected_batch = [
|
||||
{"range": "A2", "values": [["new_url"]]},
|
||||
{"range": "B3", "values": [["processed"]]},
|
||||
]
|
||||
mock_worksheet.batch_update.assert_called_once_with(
|
||||
expected_batch, value_input_option="USER_ENTERED"
|
||||
)
|
||||
mock_worksheet.batch_update.assert_called_once_with(expected_batch, value_input_option="USER_ENTERED")
|
||||
|
||||
def test_batch_set_cell_truncates_long_values(self, mock_worksheet, gworksheet):
|
||||
long_value = "x" * 50000
|
||||
|
||||
Reference in New Issue
Block a user