mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-12 21:28:29 +03:00
Merge GSheet Feeder and Database.
This commit is contained in:
@@ -2,8 +2,7 @@ from datetime import datetime, timezone
|
||||
import pytest
|
||||
|
||||
from auto_archiver.core import Metadata, Media
|
||||
from auto_archiver.modules.gsheet_db import GsheetsDb
|
||||
from auto_archiver.modules.gsheet_feeder import GWorksheet
|
||||
from auto_archiver.modules.gsheet_feeder_db import GsheetsFeederDB, GWorksheet
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -32,8 +31,9 @@ def mock_metadata(mocker):
|
||||
@pytest.fixture
|
||||
def metadata():
|
||||
metadata = Metadata()
|
||||
metadata.add_media(Media(filename="screenshot.png", urls=["http://example.com/screenshot.png"]).set("id", "screenshot"))
|
||||
metadata.add_media(Media(filename="browsertrix", urls=["http://example.com/browsertrix.wacz"]).set("id", "browsertrix"))
|
||||
metadata.add_media(Media(filename="screenshot", urls=["http://example.com/screenshot.png"]))
|
||||
metadata.add_media(Media(filename="browsertrix", urls=["http://example.com/browsertrix.wacz"]))
|
||||
metadata.add_media(Media(filename="thumbnail", urls=["http://example.com/thumbnail.png"]))
|
||||
metadata.set_url("http://example.com")
|
||||
metadata.set_title("Example Title")
|
||||
metadata.set_content("Example Content")
|
||||
@@ -52,12 +52,19 @@ def mock_media(mocker):
|
||||
return mock_media
|
||||
|
||||
@pytest.fixture
|
||||
def gsheets_db(mock_gworksheet, setup_module, mocker) -> GsheetsDb:
|
||||
db = setup_module("gsheet_db", {
|
||||
"allow_worksheets": "set()",
|
||||
"block_worksheets": "set()",
|
||||
"use_sheet_names_in_stored_paths": "True",
|
||||
})
|
||||
def gsheets_db(mock_gworksheet, setup_module, mocker):
|
||||
mocker.patch("gspread.service_account")
|
||||
config: dict = {
|
||||
"sheet": "testsheet",
|
||||
"sheet_id": None,
|
||||
"header": 1,
|
||||
"service_account": "test/service_account.json",
|
||||
"columns": {'url': 'link', 'status': 'archive status', 'folder': 'destination folder', 'archive': 'archive location', 'date': 'archive date', 'thumbnail': 'thumbnail', 'timestamp': 'upload timestamp', 'title': 'upload title', 'text': 'text content', 'screenshot': 'screenshot', 'hash': 'hash', 'pdq_hash': 'perceptual hashes', 'wacz': 'wacz', 'replaywebpage': 'replaywebpage'},
|
||||
"allow_worksheets": set(),
|
||||
"block_worksheets": set(),
|
||||
"use_sheet_names_in_stored_paths": True,
|
||||
}
|
||||
db = setup_module("gsheet_feeder_db", config)
|
||||
db._retrieve_gsheet = mocker.MagicMock(return_value=(mock_gworksheet, 1))
|
||||
return db
|
||||
|
||||
@@ -79,10 +86,10 @@ def expected_calls(mock_media, fixed_timestamp):
|
||||
(1, 'text', 'Example Content'),
|
||||
(1, 'timestamp', '2025-01-01T00:00:00+00:00'),
|
||||
(1, 'hash', 'not-calculated'),
|
||||
(1, 'screenshot', 'http://example.com/screenshot.png'),
|
||||
(1, 'thumbnail', '=IMAGE("http://example.com/screenshot.png")'),
|
||||
(1, 'wacz', 'http://example.com/browsertrix.wacz'),
|
||||
(1, 'replaywebpage', 'https://replayweb.page/?source=http%3A//example.com/browsertrix.wacz#view=pages&url=http%3A//example.com')
|
||||
# (1, 'screenshot', 'http://example.com/screenshot.png'),
|
||||
# (1, 'thumbnail', '=IMAGE("http://example.com/thumbnail.png")'),
|
||||
# (1, 'wacz', 'http://example.com/browsertrix.wacz'),
|
||||
# (1, 'replaywebpage', 'https://replayweb.page/?source=http%3A%2F%2Fexample.com%2Fbrowsertrix.wacz#view=pages&url=')
|
||||
]
|
||||
|
||||
def test_retrieve_gsheet(gsheets_db, metadata, mock_gworksheet):
|
||||
@@ -107,13 +114,13 @@ def test_aborted(gsheets_db, mock_metadata, mock_gworksheet):
|
||||
|
||||
|
||||
def test_done(gsheets_db, metadata, mock_gworksheet, expected_calls, mocker):
|
||||
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
mocker.patch("auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
gsheets_db.done(metadata)
|
||||
mock_gworksheet.batch_set_cell.assert_called_once_with(expected_calls)
|
||||
|
||||
|
||||
def test_done_cached(gsheets_db, metadata, mock_gworksheet, mocker):
|
||||
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
mocker.patch("auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
gsheets_db.done(metadata, cached=True)
|
||||
|
||||
# Verify the status message includes "[cached]"
|
||||
@@ -124,7 +131,7 @@ def test_done_cached(gsheets_db, metadata, mock_gworksheet, mocker):
|
||||
def test_done_missing_media(gsheets_db, metadata, mock_gworksheet, mocker):
|
||||
# clear media from metadata
|
||||
metadata.media = []
|
||||
mocker.patch("auto_archiver.modules.gsheet_db.gsheet_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
mocker.patch("auto_archiver.modules.gsheet_feeder_db.gsheet_feeder_db.get_current_timestamp", return_value='2025-02-01T00:00:00+00:00')
|
||||
gsheets_db.done(metadata)
|
||||
# Verify nothing media-related gets updated
|
||||
call_args = mock_gworksheet.batch_set_cell.call_args[0][0]
|
||||
|
||||
@@ -2,7 +2,7 @@ from typing import Type
|
||||
|
||||
import gspread
|
||||
import pytest
|
||||
from auto_archiver.modules.gsheet_feeder import GsheetsFeeder
|
||||
from auto_archiver.modules.gsheet_feeder_db import GsheetsFeederDB
|
||||
from auto_archiver.core import Metadata, Feeder
|
||||
|
||||
|
||||
@@ -11,13 +11,13 @@ def test_setup_without_sheet_and_sheet_id(setup_module, mocker):
|
||||
mocker.patch("gspread.service_account")
|
||||
with pytest.raises(ValueError):
|
||||
setup_module(
|
||||
"gsheet_feeder",
|
||||
"gsheet_feeder_db",
|
||||
{"service_account": "dummy.json", "sheet": None, "sheet_id": None},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def gsheet_feeder(setup_module, mocker) -> GsheetsFeeder:
|
||||
def gsheet_feeder(setup_module, mocker) -> GsheetsFeederDB:
|
||||
config: dict = {
|
||||
"service_account": "dummy.json",
|
||||
"sheet": "test-auto-archiver",
|
||||
@@ -45,7 +45,7 @@ def gsheet_feeder(setup_module, mocker) -> GsheetsFeeder:
|
||||
}
|
||||
mocker.patch("gspread.service_account")
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
"gsheet_feeder_db",
|
||||
config
|
||||
)
|
||||
feeder.gsheets_client = mocker.MagicMock()
|
||||
@@ -90,7 +90,7 @@ class MockWorksheet:
|
||||
return matching.get(col_name, default)
|
||||
|
||||
|
||||
def test__process_rows(gsheet_feeder: GsheetsFeeder):
|
||||
def test__process_rows(gsheet_feeder: GsheetsFeederDB):
|
||||
testworksheet = MockWorksheet()
|
||||
metadata_items = list(gsheet_feeder._process_rows(testworksheet))
|
||||
assert len(metadata_items) == 3
|
||||
@@ -98,7 +98,7 @@ def test__process_rows(gsheet_feeder: GsheetsFeeder):
|
||||
assert metadata_items[0].get("url") == "http://example.com"
|
||||
|
||||
|
||||
def test__set_metadata(gsheet_feeder: GsheetsFeeder):
|
||||
def test__set_metadata(gsheet_feeder: GsheetsFeederDB):
|
||||
worksheet = MockWorksheet()
|
||||
metadata = Metadata()
|
||||
gsheet_feeder._set_context(metadata, worksheet, 1)
|
||||
@@ -106,12 +106,12 @@ def test__set_metadata(gsheet_feeder: GsheetsFeeder):
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Not recognising folder column")
|
||||
def test__set_metadata_with_folder_pickled(gsheet_feeder: GsheetsFeeder, worksheet):
|
||||
def test__set_metadata_with_folder_pickled(gsheet_feeder: GsheetsFeederDB, worksheet):
|
||||
gsheet_feeder._set_context(worksheet, 7)
|
||||
assert Metadata.get_context("gsheet") == {"row": 1, "worksheet": worksheet}
|
||||
|
||||
|
||||
def test__set_metadata_with_folder(gsheet_feeder: GsheetsFeeder):
|
||||
def test__set_metadata_with_folder(gsheet_feeder: GsheetsFeederDB):
|
||||
testworksheet = MockWorksheet()
|
||||
metadata = Metadata()
|
||||
testworksheet.wks.title = "TestSheet"
|
||||
@@ -140,7 +140,7 @@ def test_open_sheet_with_name_or_id(
|
||||
|
||||
# Setup module with parameterized values
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
"gsheet_feeder_db",
|
||||
{"service_account": "dummy.json", "sheet": sheet, "sheet_id": sheet_id},
|
||||
)
|
||||
sheet_result = feeder.open_sheet()
|
||||
@@ -159,7 +159,7 @@ def test_open_sheet_with_sheet_id(setup_module, mocker):
|
||||
mock_service_account.return_value = mock_client
|
||||
mock_client.open_by_key.return_value = "MockSheet"
|
||||
feeder = setup_module(
|
||||
"gsheet_feeder",
|
||||
"gsheet_feeder_db",
|
||||
{"service_account": "dummy.json", "sheet": None, "sheet_id": "ABC123"},
|
||||
)
|
||||
sheet = feeder.open_sheet()
|
||||
@@ -170,7 +170,7 @@ def test_open_sheet_with_sheet_id(setup_module, mocker):
|
||||
def test_should_process_sheet(setup_module, mocker):
|
||||
mocker.patch("gspread.service_account")
|
||||
gdb = setup_module(
|
||||
"gsheet_feeder",
|
||||
"gsheet_feeder_db",
|
||||
{
|
||||
"service_account": "dummy.json",
|
||||
"sheet": "TestSheet",
|
||||
@@ -187,10 +187,10 @@ def test_should_process_sheet(setup_module, mocker):
|
||||
|
||||
@pytest.mark.skip(reason="Requires a real connection")
|
||||
class TestGSheetsFeederReal:
|
||||
"""Testing GSheetsFeeder class"""
|
||||
"""Testing GsheetsFeeder class"""
|
||||
|
||||
module_name: str = "gsheet_feeder"
|
||||
feeder: GsheetsFeeder
|
||||
module_name: str = "gsheet_feeder_db"
|
||||
feeder: GsheetsFeederDB
|
||||
# You must follow the setup process explain in the docs for this to work
|
||||
config: dict = {
|
||||
"service_account": "secrets/service_account.json",
|
||||
|
||||
Reference in New Issue
Block a user