from datetime import datetime from unittest import mock from unittest.mock import MagicMock, patch import pytest from db import models, schemas from auto_archiver import Metadata from auto_archiver.core import Media class Test_create_archive_task(): URL = "https://example-live.com" archive = schemas.ArchiveCreate(url=URL, tags=["tag-celery"], public=True, author_id="rick@example.com", group_id="interstellar") @patch("worker.main.insert_result_into_db") @patch("worker.main.get_store_until", return_value=datetime.now()) @patch("worker.main.load_orchestrator") @patch("celery.app.task.Task.request") def test_success(self, m_req, m_load, m_store, m_insert, db_session): from worker.main import create_archive_task m_req.id = "this-just-in" mock_orchestrator = self.mock_orchestrator_choice(m_load) task = create_archive_task(self.archive.model_dump_json()) m_load.assert_called_once_with("interstellar") m_store.assert_called_once_with("interstellar") m_insert.assert_called_once() mock_orchestrator.feed_item.assert_called_once() assert task["status"] == "success" assert task["metadata"]["url"] == self.URL assert len(task["media"]) == 0 def test_raise_invalid(self): from worker.main import create_archive_task with pytest.raises(Exception): create_archive_task(self.archive.model_dump_json()) @patch("worker.main.insert_result_into_db", side_effect=Exception) @patch("worker.main.load_orchestrator") def test_raise_db_error(self, m_load, m_insert): from worker.main import create_archive_task mock_orchestrator = self.mock_orchestrator_choice(m_load) with pytest.raises(Exception): create_archive_task(self.archive.model_dump_json()) mock_orchestrator.feed_item.assert_called_once() @patch("worker.main.insert_result_into_db", return_value=None) @patch("worker.main.load_orchestrator") def test_raise_empty_result(self, m_load, m_insert): from worker.main import create_archive_task mock_orchestrator = self.mock_orchestrator_choice(m_load) with pytest.raises(Exception) as e: create_archive_task(self.archive.model_dump_json()) assert "UNABLE TO archive" in str(e) mock_orchestrator.feed_item.assert_called_once() def mock_orchestrator_choice(self, m_load): mock_orchestrator = mock.MagicMock() mock_orchestrator.configure_mock(feed_item=mock.MagicMock(return_value=Metadata().set_url(self.URL).success())) m_load.return_value = mock_orchestrator return mock_orchestrator class Test_create_sheet_task(): URL = "https://example-live.com" sheet = schemas.SubmitSheet(sheet_id="123", author_id="rick@example.com", group_id="interstellar", tags=["spaceship"]) @patch("worker.main.models.generate_uuid", return_value="constant-uuid") @patch("worker.main.get_store_until", return_value=datetime.now()) @patch("worker.main.load_orchestrator") def test_success(self, m_load, m_store, m_uuid, db_session): from worker.main import create_sheet_task assert db_session.query(models.Archive).filter(models.Archive.url == self.URL).count() == 0 mock_metadata = Metadata().set_url(self.URL).success() mock_metadata.add_media(Media("fn1.txt", urls=["outcome1.com"])) m_orch = MagicMock() m_orch.feed.return_value = iter([False, mock_metadata, mock_metadata]) m_load.return_value = m_orch res = create_sheet_task(self.sheet.model_dump_json()) m_load.assert_called_once_with("interstellar", True, {'configurations': {'gsheet_feeder': {'sheet_id': '123'}}}) m_orch.feed.assert_called_once() m_store.assert_called_with("interstellar") m_store.call_count == 2 m_uuid.call_count == 2 assert type(res) == dict assert res["stats"]["archived"] == 1 assert res["stats"]["failed"] == 1 assert len(res["stats"]["errors"]) == 1 assert res["sheet_id"] == "123" assert res["success"] assert type(res["time"]) == datetime # query created archive entry inserted = db_session.query(models.Archive).filter(models.Archive.url == self.URL).one() assert inserted is not None assert inserted.url == self.URL assert len(inserted.tags) == 1 assert inserted.tags[0].id == "spaceship" assert inserted.group_id == "interstellar" assert inserted.author_id == "rick@example.com" assert inserted.public == False def test_get_all_urls(db_session): from worker.main import get_all_urls from auto_archiver import Metadata meta = Metadata().set_url("https://example.com") m1 = meta.add_media(Media("fn1.txt", urls=["outcome1.com"])) m2 = meta.add_media(Media("fn2.txt", urls=["outcome2.com"])) m3 = meta.add_media(Media("fn3.txt", urls=["outcome3.com"])) m1.set("screenshot", Media("screenshot.png", urls=["screenshot.com"])) m2.set("thumbnails", [Media("thumb1.png", urls=["thumb1.com"]), Media("thumb2.png", urls=["thumb2.com"])]) m3.set("ssl_data", Media("ssl_data.txt", urls=["ssl_data.com"]).to_dict()) m3.set("bad_data", {"bad": "dict is ignored"}) urls = [u.url for u in get_all_urls(meta)] assert len(urls) == 7 assert "outcome1.com" in urls assert "outcome2.com" in urls assert "outcome3.com" in urls assert "screenshot.com" in urls assert "thumb1.com" in urls assert "thumb2.com" in urls assert "ssl_data.com" in urls