From d9f40c8e082953a5084f2c1023a7cf6e19c4d4e8 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Wed, 19 Feb 2025 18:24:46 +0000 Subject: [PATCH] bumps auto-archiver to 0.13.4 --- app/tests/worker/test_worker_main.py | 21 +++++++++++---------- app/worker/main.py | 11 ++++++----- poetry.lock | 6 +++--- worker.Dockerfile | 2 +- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/app/tests/worker/test_worker_main.py b/app/tests/worker/test_worker_main.py index b6ca8f3..d40c457 100644 --- a/app/tests/worker/test_worker_main.py +++ b/app/tests/worker/test_worker_main.py @@ -1,7 +1,6 @@ from datetime import datetime -from unittest import mock -from unittest.mock import MagicMock, patch +from unittest.mock import patch import pytest @@ -24,7 +23,7 @@ class Test_create_archive_task(): from app.worker.main import create_archive_task m_req.id = "this-just-in" - m_orchestrator.return_value.run.return_value = iter([Metadata().set_url(self.URL).success()]) + m_orchestrator.return_value.feed.return_value = iter([Metadata().set_url(self.URL).success()]) task = create_archive_task(self.archive.model_dump_json()) @@ -32,7 +31,8 @@ class Test_create_archive_task(): m_store.assert_called_once_with("interstellar") m_insert.assert_called_once() m_urls.assert_called_once() - m_orchestrator.return_value.run.assert_called_once() + m_orchestrator.return_value.feed.assert_called_once() + m_orchestrator.return_value.setup.assert_called_once() assert task["status"] == "success" assert task["metadata"]["url"] == self.URL @@ -47,25 +47,25 @@ class Test_create_archive_task(): @patch("app.worker.main.get_orchestrator_args") def test_raise_db_error(self, m_args, m_orchestrator): from app.worker.main import create_archive_task - m_orchestrator.return_value.run.side_effect = Exception("Orchestrator failed") + m_orchestrator.return_value.feed.side_effect = Exception("Orchestrator failed") with pytest.raises(Exception) as e: create_archive_task(self.archive.model_dump_json()) assert str(e.value) == "Orchestrator failed" m_args.assert_called_once() - m_orchestrator.return_value.run.assert_called_once() + m_orchestrator.return_value.feed.assert_called_once() @patch("app.worker.main.ArchivingOrchestrator") @patch("app.worker.main.insert_result_into_db", return_value=None) @patch("app.worker.main.get_orchestrator_args") def test_raise_empty_result(self, m_args, m_insert, m_orchestrator): from app.worker.main import create_archive_task - m_orchestrator.return_value.run.return_value = iter([None]) + m_orchestrator.return_value.feed.return_value = iter([None]) with pytest.raises(Exception) as e: create_archive_task(self.archive.model_dump_json()) assert str(e.value) == "UNABLE TO archive: https://example-live.com" - m_orchestrator.return_value.run.assert_called_once() + m_orchestrator.return_value.feed.assert_called_once() class Test_create_sheet_task(): @@ -85,12 +85,13 @@ class Test_create_sheet_task(): mock_metadata = Metadata().set_url(self.URL).success() mock_metadata.add_media(Media("fn1.txt", urls=["outcome1.com"])) - m_orchestrator.return_value.run.return_value = iter([False, mock_metadata, mock_metadata]) + m_orchestrator.return_value.feed.return_value = iter([False, mock_metadata, mock_metadata]) res = create_sheet_task(self.sheet.model_dump_json()) m_args.assert_called_once_with("interstellar", True, ["--gsheet_feeder.sheet_id", "123"]) - m_orchestrator.return_value.run.assert_called_once() + m_orchestrator.return_value.setup.assert_called_once() + m_orchestrator.return_value.feed.assert_called_once() m_store.assert_called_with("interstellar") m_store.call_count == 2 m_uuid.call_count == 2 diff --git a/app/worker/main.py b/app/worker/main.py index a9a10c6..d3e50b8 100644 --- a/app/worker/main.py +++ b/app/worker/main.py @@ -33,10 +33,10 @@ def create_archive_task(self, archive_json: str): # call auto-archiver args = get_orchestrator_args(archive.group_id, False, [archive.url]) - # args = get_orchestrator_args(archive.group_id, False, [archive.url, "--extractors", "generic_extractor"]) - logger.debug(args) try: - result = next(ArchivingOrchestrator().run(args), None) + orchestrator = ArchivingOrchestrator() + orchestrator.setup(args) + result = next(orchestrator.feed()) except SystemExit as e: log_error(e, f"create_archive_task: SystemExit from AA") except Exception as e: @@ -61,11 +61,12 @@ def create_sheet_task(self, sheet_json: str): logger.info(f"[queue={queue_name}] SHEET START {sheet=}") args = get_orchestrator_args(sheet.group_id, True, ["--gsheet_feeder.sheet_id", sheet.sheet_id]) - logger.info(f"[queue={queue_name}] {args=}") + orchestrator = ArchivingOrchestrator() + orchestrator.setup(args) stats = {"archived": 0, "failed": 0, "errors": []} try: - for result in ArchivingOrchestrator().run(args): + for result in orchestrator.feed(): try: assert result, f"ERROR archiving URL for sheet {sheet.sheet_id}" archive = schemas.ArchiveCreate( diff --git a/poetry.lock b/poetry.lock index c818604..79f7907 100644 --- a/poetry.lock +++ b/poetry.lock @@ -154,14 +154,14 @@ cryptography = "*" [[package]] name = "auto-archiver" -version = "0.13.2" +version = "0.13.4" description = "Automatically archive links to videos, images, and social media content from Google Sheets (and more)." optional = false python-versions = "<3.13,>=3.10" groups = ["main"] files = [ - {file = "auto_archiver-0.13.2-py3-none-any.whl", hash = "sha256:672671080bdc2e4cd50792b3521a8a1d70aabb50ee3f779ed30879162b0c352b"}, - {file = "auto_archiver-0.13.2.tar.gz", hash = "sha256:b0d5505206bdb02f2ddb1b3a3a622780cc06ace0f3440b272f1d9bc9c314c9e2"}, + {file = "auto_archiver-0.13.4-py3-none-any.whl", hash = "sha256:490ee0dbc86e3481ee06cdbfbbaf397cbc9733b4aaac8cac233f29af5dc4ba53"}, + {file = "auto_archiver-0.13.4.tar.gz", hash = "sha256:dac206f643e8101bb1efdea2e6cbdfaca1e3ae50cfe3fa34b466b7518337d675"}, ] [package.dependencies] diff --git a/worker.Dockerfile b/worker.Dockerfile index 99ddf8a..4e24f87 100644 --- a/worker.Dockerfile +++ b/worker.Dockerfile @@ -1,5 +1,5 @@ # From python:3.10 -FROM bellingcat/auto-archiver +FROM bellingcat/auto-archiver:v0.13.4 # set work directory WORKDIR /aa-api