Ruff format with defaults.

This commit is contained in:
erinhmclark
2025-03-10 18:44:54 +00:00
parent cbb0414e5f
commit 85abe1837a
155 changed files with 2539 additions and 1908 deletions

View File

@@ -36,29 +36,45 @@ def atlos_feeder(setup_module, mocker) -> AtlosFeeder:
@pytest.fixture
def mock_atlos_api(atlos_feeder):
"""Fixture to update the atlos_feeder.session.get side_effect."""
def _mock_responses(responses):
atlos_feeder.session.get.side_effect = [FakeAPIResponse(data) for data in responses]
return _mock_responses
def test_atlos_feeder_iter_yields_valid_metadata(atlos_feeder, mock_atlos_api):
"""Test valid items are yielded and invalid ones ignored."""
mock_atlos_api([
{
"next": None,
"results": [
{"source_url": "http://example.com", "id": 1,
"metadata": {"auto_archiver": {"processed": False}},
"visibility": "visible", "status": "complete"},
{"source_url": "", "id": 2,
"metadata": {"auto_archiver": {"processed": False}},
"visibility": "visible", "status": "complete"},
{"source_url": "http://example.org", "id": 3,
"metadata": {"auto_archiver": {"processed": True}},
"visibility": "visible", "status": "complete"},
],
}
])
mock_atlos_api(
[
{
"next": None,
"results": [
{
"source_url": "http://example.com",
"id": 1,
"metadata": {"auto_archiver": {"processed": False}},
"visibility": "visible",
"status": "complete",
},
{
"source_url": "",
"id": 2,
"metadata": {"auto_archiver": {"processed": False}},
"visibility": "visible",
"status": "complete",
},
{
"source_url": "http://example.org",
"id": 3,
"metadata": {"auto_archiver": {"processed": True}},
"visibility": "visible",
"status": "complete",
},
],
}
]
)
items = list(atlos_feeder)
assert len(items) == 1
@@ -68,24 +84,34 @@ def test_atlos_feeder_iter_yields_valid_metadata(atlos_feeder, mock_atlos_api):
def test_atlos_feeder_multiple_pages(atlos_feeder, mock_atlos_api):
"""Test iteration over multiple pages with valid items."""
mock_atlos_api([
{
"next": "cursor2",
"results": [
{"source_url": "http://example1.com", "id": 10,
"metadata": {"auto_archiver": {"processed": False}},
"visibility": "visible", "status": "complete"},
],
},
{
"next": None,
"results": [
{"source_url": "http://example2.com", "id": 20,
"metadata": {"auto_archiver": {"processed": False}},
"visibility": "visible", "status": "complete"},
],
},
])
mock_atlos_api(
[
{
"next": "cursor2",
"results": [
{
"source_url": "http://example1.com",
"id": 10,
"metadata": {"auto_archiver": {"processed": False}},
"visibility": "visible",
"status": "complete",
},
],
},
{
"next": None,
"results": [
{
"source_url": "http://example2.com",
"id": 20,
"metadata": {"auto_archiver": {"processed": False}},
"visibility": "visible",
"status": "complete",
},
],
},
]
)
items = list(atlos_feeder)
assert len(items) == 2

View File

@@ -1,13 +1,16 @@
import pytest
@pytest.fixture
def headerless_csv_file():
return "tests/data/csv_no_headers.csv"
@pytest.fixture
def header_csv_file():
return "tests/data/csv_with_headers.csv"
@pytest.fixture
def header_csv_file_non_default_column():
return "tests/data/csv_with_headers_non_default_column.csv"
@@ -23,6 +26,7 @@ def test_csv_feeder_no_headers(headerless_csv_file, setup_module):
assert urls[0].get_url() == "https://example.com/1/"
assert urls[1].get_url() == "https://example.com/2/"
def test_csv_feeder_with_headers(header_csv_file, setup_module):
from auto_archiver.modules.csv_feeder.csv_feeder import CSVFeeder
@@ -33,10 +37,10 @@ def test_csv_feeder_with_headers(header_csv_file, setup_module):
assert urls[0].get_url() == "https://example.com/1/"
assert urls[1].get_url() == "https://example.com/2/"
def test_csv_feeder_wrong_column(header_csv_file, setup_module, caplog):
from auto_archiver.modules.csv_feeder.csv_feeder import CSVFeeder
with caplog.at_level("WARNING"):
feeder = setup_module(CSVFeeder, {"files": [header_csv_file], "column": 1})
urls = list(feeder)
@@ -54,4 +58,4 @@ def test_csv_feeder_column_by_name(header_csv_file, setup_module):
urls = list(feeder)
assert len(urls) == 2
assert urls[0].get_url() == "https://example.com/1/"
assert urls[1].get_url() == "https://example.com/2/"
assert urls[1].get_url() == "https://example.com/2/"

View File

@@ -19,35 +19,32 @@ def test_setup_without_sheet_and_sheet_id(setup_module, mocker):
@pytest.fixture
def gsheet_feeder(setup_module, mocker) -> GsheetsFeederDB:
config: dict = {
"service_account": "dummy.json",
"sheet": "test-auto-archiver",
"sheet_id": None,
"header": 1,
"columns": {
"url": "link",
"status": "archive status",
"folder": "destination folder",
"archive": "archive location",
"date": "archive date",
"thumbnail": "thumbnail",
"timestamp": "upload timestamp",
"title": "upload title",
"text": "text content",
"screenshot": "screenshot",
"hash": "hash",
"pdq_hash": "perceptual hashes",
"wacz": "wacz",
"replaywebpage": "replaywebpage",
},
"allow_worksheets": set(),
"block_worksheets": set(),
"use_sheet_names_in_stored_paths": True,
}
"service_account": "dummy.json",
"sheet": "test-auto-archiver",
"sheet_id": None,
"header": 1,
"columns": {
"url": "link",
"status": "archive status",
"folder": "destination folder",
"archive": "archive location",
"date": "archive date",
"thumbnail": "thumbnail",
"timestamp": "upload timestamp",
"title": "upload title",
"text": "text content",
"screenshot": "screenshot",
"hash": "hash",
"pdq_hash": "perceptual hashes",
"wacz": "wacz",
"replaywebpage": "replaywebpage",
},
"allow_worksheets": set(),
"block_worksheets": set(),
"use_sheet_names_in_stored_paths": True,
}
mocker.patch("gspread.service_account")
feeder = setup_module(
"gsheet_feeder_db",
config
)
feeder = setup_module("gsheet_feeder_db", config)
feeder.gsheets_client = mocker.MagicMock()
return feeder
@@ -128,9 +125,7 @@ def test__set_metadata_with_folder(gsheet_feeder: GsheetsFeederDB):
(None, "ABC123", "open_by_key", "ABC123", "opening by sheet ID"),
],
)
def test_open_sheet_with_name_or_id(
setup_module, sheet, sheet_id, expected_method, expected_arg, description, mocker
):
def test_open_sheet_with_name_or_id(setup_module, sheet, sheet_id, expected_method, expected_arg, description, mocker):
"""Ensure open_sheet() correctly opens by name or ID based on configuration."""
mock_service_account = mocker.patch("gspread.service_account")
mock_client = mocker.MagicMock()
@@ -145,9 +140,7 @@ def test_open_sheet_with_name_or_id(
)
sheet_result = feeder.open_sheet()
# Validate the correct method was called
getattr(mock_client, expected_method).assert_called_once_with(
expected_arg
), f"Failed: {description}"
getattr(mock_client, expected_method).assert_called_once_with(expected_arg), f"Failed: {description}"
assert sheet_result == "MockSheet", f"Failed: {description}"
@@ -220,9 +213,7 @@ class TestGSheetsFeederReal:
@pytest.fixture(autouse=True)
def setup_feeder(self, setup_module):
assert (
self.module_name is not None
), "self.module_name must be set on the subclass"
assert self.module_name is not None, "self.module_name must be set on the subclass"
assert self.config is not None, "self.config must be a dict set on the subclass"
self.feeder: Type[Feeder] = setup_module(self.module_name, self.config)
@@ -241,9 +232,7 @@ class TestGSheetsFeederReal:
"""Ensure open_sheet() connects to a real Google Sheets instance."""
sheet = self.feeder.open_sheet()
assert sheet is not None, "open_sheet() should return a valid sheet instance"
assert hasattr(
sheet, "worksheets"
), "Returned object should have worksheets method"
assert hasattr(sheet, "worksheets"), "Returned object should have worksheets method"
def test_iter_yields_metadata_real_data(self):
"""Ensure __iter__() yields Metadata objects for real test sheet data."""

View File

@@ -81,40 +81,27 @@ class TestGWorksheet:
(False, ""),
],
)
def test_get_cell_or_default_handles_empty_values(
self, mock_worksheet, when_empty, expected
):
def test_get_cell_or_default_handles_empty_values(self, mock_worksheet, when_empty, expected):
mock_worksheet.get_values.return_value[1][0] = "" # Empty URL cell
g = GWorksheet(mock_worksheet)
assert (
g.get_cell_or_default(
2, "url", default="default", when_empty_use_default=when_empty
)
== expected
)
assert g.get_cell_or_default(2, "url", default="default", when_empty_use_default=when_empty) == expected
def test_get_cell_or_default_handles_missing_columns(self, gworksheet):
assert (
gworksheet.get_cell_or_default(1, "invalid_col", default="safe") == "safe"
)
assert gworksheet.get_cell_or_default(1, "invalid_col", default="safe") == "safe"
# Test write operations
def test_set_cell_updates_correct_position(self, mock_worksheet, gworksheet):
gworksheet.set_cell(2, "url", "new_url")
mock_worksheet.update_cell.assert_called_once_with(2, 1, "new_url")
def test_batch_set_cell_formats_requests_correctly(
self, mock_worksheet, gworksheet
):
def test_batch_set_cell_formats_requests_correctly(self, mock_worksheet, gworksheet):
updates = [(2, "url", "new_url"), (3, "status", "processed")]
gworksheet.batch_set_cell(updates)
expected_batch = [
{"range": "A2", "values": [["new_url"]]},
{"range": "B3", "values": [["processed"]]},
]
mock_worksheet.batch_update.assert_called_once_with(
expected_batch, value_input_option="USER_ENTERED"
)
mock_worksheet.batch_update.assert_called_once_with(expected_batch, value_input_option="USER_ENTERED")
def test_batch_set_cell_truncates_long_values(self, mock_worksheet, gworksheet):
long_value = "x" * 50000