From a9dd278d24b4cb7ba80af38c477ac624beba5e35 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Tue, 11 Feb 2025 18:18:49 +0000 Subject: [PATCH] further worker/web separation and tests fixed --- app/logs/.gitkeep | 0 app/shared/business_logic.py | 5 +- app/shared/db/worker_crud.py | 61 ++++++++ app/shared/schemas.py | 1 - app/tests/conftest.py | 14 +- app/tests/{ => shared}/db/test_models.py | 0 app/tests/shared/db/test_worker_crud.py | 98 ++++++++++++ app/tests/user-groups.test.yaml | 24 +-- app/tests/{ => web}/db/test_crud.py | 145 +++--------------- app/tests/{ => web}/endpoints/test_default.py | 20 +-- .../endpoints/test_interoperability.py | 10 +- app/tests/{ => web}/endpoints/test_sheet.py | 4 +- app/tests/{ => web}/endpoints/test_task.py | 6 +- app/tests/{ => web}/endpoints/test_url.py | 20 +-- app/tests/web/test_main.py | 6 +- app/tests/web/test_security.py | 22 +-- app/tests/worker/test_worker_main.py | 32 ++-- app/{shared => web}/db/crud.py | 58 +------ app/web/db/user_state.py | 3 +- app/web/endpoints/default.py | 4 +- app/web/endpoints/interoperability.py | 18 ++- app/web/endpoints/sheet.py | 2 +- app/web/endpoints/url.py | 2 +- app/web/events.py | 6 +- app/web/main.py | 5 +- app/web/middleware.py | 2 +- app/web/utils/metrics.py | 2 +- app/worker/main.py | 11 +- 28 files changed, 301 insertions(+), 280 deletions(-) create mode 100644 app/logs/.gitkeep create mode 100644 app/shared/db/worker_crud.py rename app/tests/{ => shared}/db/test_models.py (100%) create mode 100644 app/tests/shared/db/test_worker_crud.py rename app/tests/{ => web}/db/test_crud.py (77%) rename app/tests/{ => web}/endpoints/test_default.py (85%) rename app/tests/{ => web}/endpoints/test_interoperability.py (83%) rename app/tests/{ => web}/endpoints/test_sheet.py (98%) rename app/tests/{ => web}/endpoints/test_task.py (91%) rename app/tests/{ => web}/endpoints/test_url.py (91%) rename app/{shared => web}/db/crud.py (84%) diff --git a/app/logs/.gitkeep b/app/logs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/app/shared/business_logic.py b/app/shared/business_logic.py index 2691ffe..ffd9cb2 100644 --- a/app/shared/business_logic.py +++ b/app/shared/business_logic.py @@ -4,11 +4,12 @@ import datetime from sqlalchemy.orm import Session -from app.shared.db import crud +from app.shared.db import worker_crud def get_store_archive_until(db: Session, group_id: str) -> datetime.datetime: - group = crud.get_group(db, group_id) + group = worker_crud.get_group(db, group_id) + assert group, f"Group {group_id} not found." max_lifespan = group.permissions.get("max_archive_lifespan_months", -1) if max_lifespan == -1: return None diff --git a/app/shared/db/worker_crud.py b/app/shared/db/worker_crud.py new file mode 100644 index 0000000..93962b9 --- /dev/null +++ b/app/shared/db/worker_crud.py @@ -0,0 +1,61 @@ +from sqlalchemy.orm import Session +from datetime import datetime + +from app.shared.db import models +from app.shared import schemas + +# TODO: isolate database operations away from worker and into WEB +# ONLY WORKER +def update_sheet_last_url_archived_at(db: Session, sheet_id: str): + db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first() + if db_sheet: + db_sheet.last_url_archived_at = datetime.now() + db.commit() + return True + return False + + +# ONLY WORKER and INTEROP + +def get_group(db: Session, group_name: str) -> models.Group: + return db.query(models.Group).filter(models.Group.id == group_name).first() + +def create_or_get_user(db: Session, author_id: str) -> models.User: + if type(author_id) == str: author_id = author_id.lower() + db_user = db.query(models.User).filter(models.User.email == author_id).first() + if not db_user: + db_user = models.User(email=author_id) + db.add(db_user) + db.commit() + db.refresh(db_user) + return db_user + + +def create_tag(db: Session, tag: str) -> models.Tag: + db_tag = db.query(models.Tag).filter(models.Tag.id == tag).first() + if not db_tag: + db_tag = models.Tag(id=tag) + db.add(db_tag) + db.commit() + db.refresh(db_tag) + return db_tag + + +def create_task(db: Session, task: schemas.ArchiveCreate, tags: list[models.Tag], urls: list[models.ArchiveUrl]) -> models.Archive: + # TODO: rename task to archive + db_task = models.Archive(id=task.id, url=task.url, result=task.result, public=task.public, author_id=task.author_id, group_id=task.group_id, sheet_id=task.sheet_id, store_until=task.store_until) + db_task.tags = tags + db_task.urls = urls + db.add(db_task) + db.commit() + db.refresh(db_task) + return db_task + + +def store_archived_url(db: Session, archive: schemas.ArchiveCreate) -> models.Archive: + # create and load user, tags, if needed + create_or_get_user(db, archive.author_id) + db_tags = [create_tag(db, tag) for tag in archive.tags] + # insert everything + db_task = create_task(db, task=archive, tags=db_tags, urls=archive.urls) + return db_task diff --git a/app/shared/schemas.py b/app/shared/schemas.py index b76e711..f860f45 100644 --- a/app/shared/schemas.py +++ b/app/shared/schemas.py @@ -113,5 +113,4 @@ class CelerySheetTask(BaseModel): class SubmitManualArchive(ArchiveTrigger): - url: None = None result: str # should be a Metadata.to_json() diff --git a/app/tests/conftest.py b/app/tests/conftest.py index 41a3fb1..89cfe9a 100644 --- a/app/tests/conftest.py +++ b/app/tests/conftest.py @@ -3,8 +3,8 @@ from fastapi.testclient import TestClient import pytest from unittest.mock import patch from app.shared.config import ALLOW_ANY_EMAIL -from app.web.db.user_state import UserState from app.shared.settings import Settings +from app.web.db.user_state import UserState @pytest.fixture(autouse=True) @@ -21,7 +21,7 @@ def get_settings(): @pytest.fixture(autouse=True) def mock_settings(): - with patch('shared.settings.Settings', return_value=Settings(_env_file=".env.test")) as mock_settings: + with patch('app.shared.settings.Settings', return_value=Settings(_env_file=".env.test")) as mock_settings: yield mock_settings @@ -29,7 +29,7 @@ def mock_settings(): def test_db(get_settings: Settings): from app.shared.db import models from app.shared.db.database import make_engine - from app.shared.db.crud import get_user_groups + from app.web.db.crud import get_user_groups get_user_groups.cache_clear() make_engine.cache_clear() @@ -62,8 +62,8 @@ def db_session(test_db): @pytest.fixture() def app(db_session): - from web.main import app_factory - from app.shared.db import crud + from app.web.main import app_factory + from app.web.db import crud app = app_factory() crud.upsert_user_groups(db_session) return app @@ -77,7 +77,7 @@ def client(app): @pytest.fixture() def app_with_auth(app, db_session): - from web.security import get_token_or_user_auth, get_user_auth, get_user_state + from app.web.security import get_token_or_user_auth, get_user_auth, get_user_state app.dependency_overrides[get_token_or_user_auth] = lambda: "rick@example.com" app.dependency_overrides[get_user_auth] = lambda: "morty@example.com" app.dependency_overrides[get_user_state] = lambda: UserState(db_session, "MORTY@example.com") @@ -92,7 +92,7 @@ def client_with_auth(app_with_auth): @pytest.fixture() def app_with_token(app): - from web.security import token_api_key_auth, get_token_or_user_auth + from app.web.security import token_api_key_auth, get_token_or_user_auth app.dependency_overrides[token_api_key_auth] = lambda: ALLOW_ANY_EMAIL app.dependency_overrides[get_token_or_user_auth] = lambda: ALLOW_ANY_EMAIL return app diff --git a/app/tests/db/test_models.py b/app/tests/shared/db/test_models.py similarity index 100% rename from app/tests/db/test_models.py rename to app/tests/shared/db/test_models.py diff --git a/app/tests/shared/db/test_worker_crud.py b/app/tests/shared/db/test_worker_crud.py new file mode 100644 index 0000000..70f0fda --- /dev/null +++ b/app/tests/shared/db/test_worker_crud.py @@ -0,0 +1,98 @@ +from app.shared.db import models + + +from app.tests.web.db.test_crud import test_data + + +def test_get_group(test_data, db_session): + from app.shared.db import worker_crud + + assert worker_crud.get_group(db_session, "spaceship") is not None + assert worker_crud.get_group(db_session, "interdimensional") is not None + assert worker_crud.get_group(db_session, "animated-characters") is not None + assert worker_crud.get_group(db_session, "non-existent!@#!%!") is None + + +def test_create_or_get_user(test_data, db_session): + from app.shared.db import worker_crud + + assert db_session.query(models.User).count() == 3 + + # already exists + assert (u1 := worker_crud.create_or_get_user(db_session, "rick@example.com")) is not None + assert u1.email == "rick@example.com" + + # new user + assert (u2 := worker_crud.create_or_get_user(db_session, "beth@example.com")) is not None + assert u2.email == "beth@example.com" + + assert db_session.query(models.User).count() == 4 + + +def test_create_tag(db_session): + from app.shared.db import worker_crud + + assert db_session.query(models.Tag).count() == 0 + + # create first + create_tag = worker_crud.create_tag(db_session, "tag-101") + assert create_tag is not None + assert create_tag.id == "tag-101" + assert db_session.query(models.Tag).count() == 1 + assert db_session.query(models.Tag).filter(models.Tag.id == "tag-101").first() == create_tag + + # same id does not add new db entry + existing_tag = worker_crud.create_tag(db_session, "tag-101") + assert existing_tag == create_tag + assert db_session.query(models.Tag).count() == 1 + + # create second + second_tag = worker_crud.create_tag(db_session, "tag-102") + assert second_tag is not None + assert second_tag.id == "tag-102" + assert db_session.query(models.Tag).count() == 2 + + +def test_create_task(db_session): + from app.shared.db import worker_crud + from app.shared import schemas + + task = schemas.ArchiveCreate( + id="archive-id-456-101", + url="https://example-0.com", + result={}, + public=False, + author_id="rick@example.com", + group_id="spaceship", + tags=[], + urls=[] + ) + + # with tags and urls + nt = worker_crud.create_task(db_session, task, [models.Tag(id="tag-101")], [models.ArchiveUrl(url="https://example-0.com/0", key="media_0")]) + + assert nt is not None + assert nt.id == "archive-id-456-101" + assert nt.url == "https://example-0.com" + assert nt.author_id == "rick@example.com" + assert nt.public == False + assert nt.group_id == "spaceship" + assert len(nt.tags) == 1 + assert nt.tags[0].id == "tag-101" + assert len(nt.urls) == 1 + assert nt.urls[0].url == "https://example-0.com/0" + assert nt.urls[0].key == "media_0" + assert nt.created_at is not None + + # without tags and urls + task.id = "archive-id-456-102" + nt = worker_crud.create_task(db_session, task, [], []) + assert nt is not None + assert nt.id == "archive-id-456-102" + assert nt.url == "https://example-0.com" + assert nt.author_id == "rick@example.com" + assert nt.public == False + assert nt.group_id == "spaceship" + assert len(nt.tags) == 0 + assert len(nt.urls) == 0 + assert nt.created_at is not None diff --git a/app/tests/user-groups.test.yaml b/app/tests/user-groups.test.yaml index ccbbfec..16a3ba7 100644 --- a/app/tests/user-groups.test.yaml +++ b/app/tests/user-groups.test.yaml @@ -19,17 +19,17 @@ domains: orchestrators: - spaceship: tests/orchestration.test.yaml - interdimensional: tests/orchestration.test.yaml - default: tests/orchestration.test.yaml + spaceship: app/tests/orchestration.test.yaml + interdimensional: app/tests/orchestration.test.yaml + default: app/tests/orchestration.test.yaml -default_orchestrator: tests/orchestration.test.yaml +default_orchestrator: app/tests/orchestration.test.yaml groups: spaceship: description: "The spaceship crew" - orchestrator: tests/orchestration.test.yaml - orchestrator_sheet: tests/orchestration.test.yaml + orchestrator: app/tests/orchestration.test.yaml + orchestrator_sheet: app/tests/orchestration.test.yaml permissions: read: ["all"] archive_url: true @@ -43,8 +43,8 @@ groups: priority: "high" interdimensional: description: "Interdimensional travelers" - orchestrator: tests/orchestration.test.yaml - orchestrator_sheet: tests/orchestration.test.yaml + orchestrator: app/tests/orchestration.test.yaml + orchestrator_sheet: app/tests/orchestration.test.yaml permissions: read: ["interdimensional", "animated-characters"] archive_url: true @@ -58,8 +58,8 @@ groups: priority: "high" animated-characters: description: "Animated characters" - orchestrator: tests/orchestration.test.yaml - orchestrator_sheet: tests/orchestration.test.yaml + orchestrator: app/tests/orchestration.test.yaml + orchestrator_sheet: app/tests/orchestration.test.yaml permissions: read: ["animated-characters"] archive_url: true @@ -72,8 +72,8 @@ groups: priority: "low" default: description: "Public access" - orchestrator: tests/orchestration.test.yaml - orchestrator_sheet: tests/orchestration.test.yaml + orchestrator: app/tests/orchestration.test.yaml + orchestrator_sheet: app/tests/orchestration.test.yaml permissions: # read: [] archive_url: true diff --git a/app/tests/db/test_crud.py b/app/tests/web/db/test_crud.py similarity index 77% rename from app/tests/db/test_crud.py rename to app/tests/web/db/test_crud.py index a7317b3..625aee7 100644 --- a/app/tests/db/test_crud.py +++ b/app/tests/web/db/test_crud.py @@ -4,7 +4,7 @@ from unittest.mock import patch import pytest import yaml from app.shared.db import models -from shared.settings import Settings +from app.shared.settings import Settings authors = ["rick@example.com", "morty@example.com", "jerry@example.com"] @@ -55,18 +55,16 @@ def test_data(db_session): # setup groups assert db_session.query(models.Group).count() == 0 - from app.shared.db import crud + from app.web.db import crud crud.upsert_user_groups(db_session) assert db_session.query(models.Group).count() == 4 assert db_session.query(models.User).count() == 3 def test_get_archive(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud from app.shared.config import ALLOW_ANY_EMAIL - print(db_session.query(models.Group).all()) - # each author's archives work assert (a0 := crud.get_archive(db_session, "archive-id-456-0", authors[0])) is not None assert a0.id == "archive-id-456-0" @@ -93,7 +91,7 @@ def test_get_archive(test_data, db_session): def test_search_archives_by_url(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud from app.shared.config import ALLOW_ANY_EMAIL # rick's archives are private @@ -141,7 +139,7 @@ def test_search_archives_by_url(test_data, db_session): def test_search_archives_by_email(test_data, db_session): from app.shared.config import ALLOW_ANY_EMAIL - from app.shared.db import crud + from app.web.db import crud # lower/upper case assert len(crud.search_archives_by_email(db_session, "rick@example.com")) == 34 @@ -160,9 +158,9 @@ def test_search_archives_by_email(test_data, db_session): assert a2[0].created_at == datetime(2021, 1, 1) -@patch("db.crud.DATABASE_QUERY_LIMIT", new=25) +@patch("app.web.db.crud.DATABASE_QUERY_LIMIT", new=25) def test_max_query_limit(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud from app.shared.config import ALLOW_ANY_EMAIL assert len(crud.search_archives_by_url(db_session, "https://example", ALLOW_ANY_EMAIL)) == 25 @@ -172,53 +170,8 @@ def test_max_query_limit(test_data, db_session): assert len(crud.search_archives_by_email(db_session, "rick@example.com", limit=1000)) == 25 -def test_create_task(db_session): - from app.shared.db import crud - from app.shared import schemas - - task = schemas.ArchiveCreate( - id="archive-id-456-101", - url="https://example-0.com", - result={}, - public=False, - author_id="rick@example.com", - group_id="spaceship", - tags=[], - urls=[] - ) - - # with tags and urls - nt = crud.create_task(db_session, task, [models.Tag(id="tag-101")], [models.ArchiveUrl(url="https://example-0.com/0", key="media_0")]) - - assert nt is not None - assert nt.id == "archive-id-456-101" - assert nt.url == "https://example-0.com" - assert nt.author_id == "rick@example.com" - assert nt.public == False - assert nt.group_id == "spaceship" - assert len(nt.tags) == 1 - assert nt.tags[0].id == "tag-101" - assert len(nt.urls) == 1 - assert nt.urls[0].url == "https://example-0.com/0" - assert nt.urls[0].key == "media_0" - assert nt.created_at is not None - - # without tags and urls - task.id = "archive-id-456-102" - nt = crud.create_task(db_session, task, [], []) - assert nt is not None - assert nt.id == "archive-id-456-102" - assert nt.url == "https://example-0.com" - assert nt.author_id == "rick@example.com" - assert nt.public == False - assert nt.group_id == "spaceship" - assert len(nt.tags) == 0 - assert len(nt.urls) == 0 - assert nt.created_at is not None - - def test_soft_delete(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud # none deleted yet assert crud.get_archive(db_session, "archive-id-456-0", "rick@example.com") is not None @@ -236,7 +189,7 @@ def test_soft_delete(test_data, db_session): def test_count_archives(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud assert crud.count_archives(db_session) == 100 db_session.query(models.Archive).filter(models.Archive.id == "archive-id-456-0").delete() @@ -245,7 +198,7 @@ def test_count_archives(test_data, db_session): def test_count_archive_urls(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud assert crud.count_archive_urls(db_session) == 1000 db_session.query(models.ArchiveUrl).filter(models.ArchiveUrl.url == "https://example-0.com/0").delete() @@ -260,7 +213,7 @@ def test_count_archive_urls(test_data, db_session): def test_count_users(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud assert crud.count_users(db_session) == 3 db_session.query(models.User).filter(models.User.email == "rick@example.com").delete() @@ -269,7 +222,7 @@ def test_count_users(test_data, db_session): def test_count_by_users_since(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud # 100y window assert len(cu := crud.count_by_user_since(db_session, 60 * 60 * 24 * 31 * 12 * 100)) == 3 @@ -278,32 +231,8 @@ def test_count_by_users_since(test_data, db_session): assert cu[2].total == 33 -def test_create_tag(db_session): - from app.shared.db import crud - - assert db_session.query(models.Tag).count() == 0 - - # create first - create_tag = crud.create_tag(db_session, "tag-101") - assert create_tag is not None - assert create_tag.id == "tag-101" - assert db_session.query(models.Tag).count() == 1 - assert db_session.query(models.Tag).filter(models.Tag.id == "tag-101").first() == create_tag - - # same id does not add new db entry - existing_tag = crud.create_tag(db_session, "tag-101") - assert existing_tag == create_tag - assert db_session.query(models.Tag).count() == 1 - - # create second - second_tag = crud.create_tag(db_session, "tag-102") - assert second_tag is not None - assert second_tag.id == "tag-102" - assert db_session.query(models.Tag).count() == 2 - - def test_is_user_in_group(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud from app.shared.config import ALLOW_ANY_EMAIL # see user-groups.test.yaml @@ -339,36 +268,12 @@ def test_is_user_in_group(test_data, db_session): ] for email, group, expected in test_pairs: print(f"{email} in {group} == {expected}") - assert crud.is_user_in_group(db_session, email, group) == expected + assert crud.is_user_in_group(email, group) == expected -def test_get_group(test_data, db_session): - from app.shared.db import crud - - assert crud.get_group(db_session, "spaceship") is not None - assert crud.get_group(db_session, "interdimensional") is not None - assert crud.get_group(db_session, "animated-characters") is not None - assert crud.get_group(db_session, "non-existent!@#!%!") is None - - -def test_create_or_get_user(test_data, db_session): - from app.shared.db import crud - - assert db_session.query(models.User).count() == 3 - - # already exists - assert (u1 := crud.create_or_get_user(db_session, "rick@example.com")) is not None - assert u1.email == "rick@example.com" - - # new user - assert (u2 := crud.create_or_get_user(db_session, "beth@example.com")) is not None - assert u2.email == "beth@example.com" - - assert db_session.query(models.User).count() == 4 - def test_upsert_group(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud assert db_session.query(models.Group).count() == 4 @@ -397,29 +302,29 @@ def test_upsert_group(test_data, db_session): def test_upsert_user_groups(db_session): - from app.shared.db import crud + from app.web.db import crud - @patch('db.crud.get_settings', new=lambda: bad_setings) + @patch('app.web.db.crud.get_settings', new=lambda: bad_setings) def test_missing_yaml(db_session): with pytest.raises(FileNotFoundError): crud.upsert_user_groups(db_session) - @patch('db.crud.get_settings', new=lambda: bad_setings) + @patch('app.web.db.crud.get_settings', new=lambda: bad_setings) def test_broken_yaml(db_session): with pytest.raises(yaml.YAMLError): crud.upsert_user_groups(db_session) bad_setings = Settings(_env_file=".env.test") - bad_setings.USER_GROUPS_FILENAME = "tests/user-groups.test.missing.yaml" + bad_setings.USER_GROUPS_FILENAME = "app/tests/user-groups.test.missing.yaml" test_missing_yaml(db_session) - bad_setings.USER_GROUPS_FILENAME = "tests/user-groups.test.broken.yaml" + bad_setings.USER_GROUPS_FILENAME = "app/tests/user-groups.test.broken.yaml" test_broken_yaml(db_session) def test_create_sheet(db_session): - from app.shared.db import crud + from app.web.db import crud assert db_session.query(models.Sheet).count() == 0 @@ -440,7 +345,7 @@ def test_create_sheet(db_session): def test_get_user_sheet(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud assert crud.get_user_sheet(db_session, "", "sheet-0") is None assert crud.get_user_sheet(db_session, "morty@example.com", "sheet-0") is None @@ -451,7 +356,7 @@ def test_get_user_sheet(test_data, db_session): def test_get_user_sheets(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud assert len(crud.get_user_sheets(db_session, "")) == 0 rick_sheets = crud.get_user_sheets(db_session, "rick@example.com") @@ -459,10 +364,10 @@ def test_get_user_sheets(test_data, db_session): assert [s.id for s in rick_sheets] == ["sheet-0", "sheet-0-2"] assert len(crud.get_user_sheets(db_session, "morty@example.com")) == 1 + def test_delete_sheet(test_data, db_session): - from app.shared.db import crud + from app.web.db import crud assert crud.delete_sheet(db_session, "sheet-0", "") == False assert crud.delete_sheet(db_session, "sheet-0", "rick@example.com") == True assert crud.delete_sheet(db_session, "sheet-0", "rick@example.com") == False - diff --git a/app/tests/endpoints/test_default.py b/app/tests/web/endpoints/test_default.py similarity index 85% rename from app/tests/endpoints/test_default.py rename to app/tests/web/endpoints/test_default.py index 4215ec6..54bc14b 100644 --- a/app/tests/endpoints/test_default.py +++ b/app/tests/web/endpoints/test_default.py @@ -2,7 +2,7 @@ from unittest.mock import AsyncMock, MagicMock, patch from fastapi.testclient import TestClient import pytest from app.shared.config import VERSION -from tests.db.test_crud import test_data +from app.tests.web.db.test_crud import test_data def test_endpoint_home(client_with_auth): @@ -14,9 +14,9 @@ def test_endpoint_home(client_with_auth): assert "groups" not in j -@patch("endpoints.default.bearer_security", new_callable=AsyncMock) -@patch("endpoints.default.get_user_auth", new_callable=AsyncMock, return_value="test@example.com") -@patch("endpoints.default.crud.get_user_groups", return_value=["group1", "group2"]) +@patch("app.web.endpoints.default.bearer_security", new_callable=AsyncMock) +@patch("app.web.endpoints.default.get_user_auth", new_callable=AsyncMock, return_value="test@example.com") +@patch("app.web.endpoints.default.crud.get_user_groups", return_value=["group1", "group2"]) def test_endpoint_home_with_groups(m1, m2, m3, client_with_auth): r = client_with_auth.get("/") assert r.status_code == 200 @@ -27,9 +27,9 @@ def test_endpoint_home_with_groups(m1, m2, m3, client_with_auth): assert j["groups"] == ["group1", "group2"] -@patch("endpoints.default.bearer_security", new_callable=AsyncMock) -@patch("endpoints.default.get_user_auth", new_callable=AsyncMock, return_value="test@example.com") -@patch("endpoints.default.crud.get_user_groups", side_effect=Exception('mocked error')) +@patch("app.web.endpoints.default.bearer_security", new_callable=AsyncMock) +@patch("app.web.endpoints.default.get_user_auth", new_callable=AsyncMock, return_value="test@example.com") +@patch("app.web.endpoints.default.crud.get_user_groups", side_effect=Exception('mocked error')) def test_endpoint_home_with_groups_exception(m1, m2, m3, client_with_auth): # mocks call that triggers an internal error r = client_with_auth.get("/") assert r.status_code == 200 @@ -52,7 +52,7 @@ def test_endpoint_active_no_auth(client, test_no_auth): def test_endpoint_active(app): m_user_state = MagicMock() - from web.security import get_user_state + from app.web.security import get_user_state app.dependency_overrides[get_user_state] = lambda: m_user_state # inactive user @@ -103,7 +103,7 @@ async def test_prometheus_metrics(test_data, client_with_token, get_settings): assert 'disk_utilization{type="used"}' not in r.text # after metrics calculation - from web.utils.metrics import measure_regular_metrics + from app.web.utils.metrics import measure_regular_metrics await measure_regular_metrics(get_settings.DATABASE_PATH, 60 * 60 * 24 * 31 * 12 * 100) r2 = client_with_token.get("/metrics") assert 'disk_utilization{type="used"}' in r2.text @@ -117,7 +117,7 @@ async def test_prometheus_metrics(test_data, client_with_token, get_settings): assert 'database_metrics_counter_total{query="count_by_user",user="jerry@example.com"} 33.0' in r2.text # 30s window, should not change the gauges nor the total in the counters - from web.utils.metrics import measure_regular_metrics + from app.web.utils.metrics import measure_regular_metrics await measure_regular_metrics(get_settings.DATABASE_PATH, 30) r3 = client_with_token.get("/metrics") assert 'database_metrics{query="count_archives"} 100.0' in r3.text diff --git a/app/tests/endpoints/test_interoperability.py b/app/tests/web/endpoints/test_interoperability.py similarity index 83% rename from app/tests/endpoints/test_interoperability.py rename to app/tests/web/endpoints/test_interoperability.py index 0d35ca2..64629ae 100644 --- a/app/tests/endpoints/test_interoperability.py +++ b/app/tests/web/endpoints/test_interoperability.py @@ -1,9 +1,9 @@ from datetime import datetime import json -from unittest.mock import patch +from unittest.mock import MagicMock, patch from app.shared.config import ALLOW_ANY_EMAIL -from app.shared.db import crud +from app.web.db import crud def test_submit_manual_archive_unauthenticated(client, test_no_auth): @@ -14,11 +14,11 @@ def test_submit_manual_archive_not_user_auth(client_with_auth, test_no_auth): test_no_auth(client_with_auth.post, "/interop/submit-archive") -@patch("endpoints.interoperability.get_store_until", return_value=datetime.now()) +@patch("app.web.endpoints.interoperability.business_logic", return_value=MagicMock(get_store_archive_until=MagicMock(return_value=datetime))) def test_submit_manual_archive(m1, client_with_token, db_session): # normal workflow aa_metadata = json.dumps({"status": "test: success", "metadata": {"url": "http://example.com"}, "media": [{"filename": "fn1", "urls": ["http://example.s3.com"]}]}) - r = client_with_token.post("/interop/submit-archive", json={"result": aa_metadata, "public": True, "author_id": "jerry@gmail.com", "group_id": "spaceship", "tags": ["test"]}) + r = client_with_token.post("/interop/submit-archive", json={"result": aa_metadata, "public": True, "author_id": "jerry@gmail.com", "group_id": "spaceship", "tags": ["test"], "url": "http://example.com"}) assert r.status_code == 201 assert "id" in r.json() @@ -35,6 +35,6 @@ def test_submit_manual_archive(m1, client_with_token, db_session): # cannot have the same URL twice aa_metadata = json.dumps({"status": "test: success", "metadata": {"url": "http://example.com"}, "media": [{"filename": "fn1", "urls": ["http://example.com", "http://example.com"]}]}) - r = client_with_token.post("/interop/submit-archive", json={"result": aa_metadata, "public": False, "author_id": "jerry@gmail.com", "tags": ["test"]}) + r = client_with_token.post("/interop/submit-archive", json={"result": aa_metadata, "public": False, "author_id": "jerry@gmail.com", "tags": ["test"], "url": "http://example.com"}) assert r.status_code == 422 assert r.json() == {"detail": "Cannot insert into DB due to integrity error, likely duplicate urls."} diff --git a/app/tests/endpoints/test_sheet.py b/app/tests/web/endpoints/test_sheet.py similarity index 98% rename from app/tests/endpoints/test_sheet.py rename to app/tests/web/endpoints/test_sheet.py index 3241c7c..aedecac 100644 --- a/app/tests/endpoints/test_sheet.py +++ b/app/tests/web/endpoints/test_sheet.py @@ -45,7 +45,7 @@ def test_create_sheet_endpoint(app_with_auth, db_session): assert response.json() == {"detail": "User does not have access to this group."} # switch to jerry who's got less quota/permissions - from web.security import get_user_state + from app.web.security import get_user_state from app.web.db.user_state import UserState app_with_auth.dependency_overrides[get_user_state] = lambda: UserState(db_session, "jerry@example.com") client_jerry = TestClient(app_with_auth) @@ -144,7 +144,7 @@ def test_delete_sheet_endpoint(client_with_auth, db_session): class TestArchiveUserSheetEndpoint: - @patch("endpoints.sheet.celery", return_value=MagicMock()) + @patch("app.web.endpoints.sheet.celery", return_value=MagicMock()) def test_normal_flow(self, m_celery, client_with_auth, db_session): from app.shared.db import models db_session.add(models.Sheet(id="123-sheet-id", name="Test Sheet 1", author_id="morty@example.com", group_id="spaceship", frequency="hourly")) diff --git a/app/tests/endpoints/test_task.py b/app/tests/web/endpoints/test_task.py similarity index 91% rename from app/tests/endpoints/test_task.py rename to app/tests/web/endpoints/test_task.py index 9585c39..937ad46 100644 --- a/app/tests/endpoints/test_task.py +++ b/app/tests/web/endpoints/test_task.py @@ -5,7 +5,7 @@ def test_endpoint_task_status_no_auth(client, test_no_auth): test_no_auth(client.get, "/task/test-task-id") -@patch("endpoints.task.AsyncResult") +@patch("app.web.endpoints.task.AsyncResult") def test_get_status_success(mock_async_result, client_with_auth): mock_async_result.return_value.status = "SUCCESS" mock_async_result.return_value.result = {"data": "some result"} @@ -20,7 +20,7 @@ def test_get_status_success(mock_async_result, client_with_auth): } -@patch("endpoints.task.AsyncResult") +@patch("app.web.endpoints.task.AsyncResult") def test_get_status_failure(mock_async_result, client_with_auth): mock_async_result.return_value.status = "FAILURE" @@ -36,7 +36,7 @@ def test_get_status_failure(mock_async_result, client_with_auth): } -@patch("endpoints.task.AsyncResult") +@patch("app.web.endpoints.task.AsyncResult") def test_get_status_pending(mock_async_result, client_with_auth): mock_async_result.return_value.status = "PENDING" mock_async_result.return_value.result = None diff --git a/app/tests/endpoints/test_url.py b/app/tests/web/endpoints/test_url.py similarity index 91% rename from app/tests/endpoints/test_url.py rename to app/tests/web/endpoints/test_url.py index d6f43f0..008973f 100644 --- a/app/tests/endpoints/test_url.py +++ b/app/tests/web/endpoints/test_url.py @@ -8,8 +8,8 @@ def test_archive_url_unauthenticated(client, test_no_auth): test_no_auth(client.post, "/url/archive") -@patch("endpoints.url.UserState") -@patch("endpoints.url.celery", return_value=MagicMock()) +@patch("app.web.endpoints.url.UserState") +@patch("app.web.endpoints.url.celery", return_value=MagicMock()) def test_archive_url(m_celery, m2, client_with_auth): m_signature = MagicMock() m_signature.delay.return_value = TaskResult(id="123-456-789", status="PENDING", result="") @@ -81,7 +81,7 @@ def test_archive_url(m_celery, m2, client_with_auth): assert m_signature.delay.call_count == 2 -@patch("endpoints.url.UserState") +@patch("app.web.endpoints.url.UserState") def test_archive_url_quotas(m1, client_with_auth): m_user_state = MagicMock() m1.return_value = m_user_state @@ -102,7 +102,7 @@ def test_archive_url_quotas(m1, client_with_auth): m_user_state.has_quota_max_monthly_mbs.assert_called_once() -@patch("endpoints.url.celery", return_value=MagicMock()) +@patch("app.web.endpoints.url.celery", return_value=MagicMock()) def test_archive_url_with_api_token(m_celery, client_with_token): m_signature = MagicMock() m_signature.delay.return_value = TaskResult(id="123-456-789", status="PENDING", result="") @@ -130,9 +130,11 @@ def test_search_by_url(client_with_auth, client_with_token, db_session): assert response.status_code == 200 assert response.json() == [] - from app.shared.db import crud, schemas + from app.shared import schemas + from app.shared.db import worker_crud for i in range(11): - crud.create_task(db_session, ArchiveCreate(id=f"url-456-{i}", url="https://example.com" if i < 10 else "https://something-else.com", result={}, public=True, author_id="rick@example.com"), [], []) + #TODO: fix as this method is gone to shared.db + worker_crud.create_task(db_session, ArchiveCreate(id=f"url-456-{i}", url="https://example.com" if i < 10 else "https://something-else.com", result={}, public=True, author_id="rick@example.com"), [], []) # NB: this insertion is too fast for the ordering to be correct as they are within the same second response = client_with_auth.get("/url/search?url=https://example.com") @@ -165,7 +167,7 @@ def test_search_by_url(client_with_auth, client_with_token, db_session): assert len(response.json()) == 10 -@patch("endpoints.url.UserState") +@patch("app.web.endpoints.url.UserState") def test_search_no_read_access(mock_user_state, client_with_auth): mock_user_state.return_value.read = False mock_user_state.return_value.read_public = False @@ -184,8 +186,8 @@ def test_delete_task(client_with_auth, db_session): assert response.status_code == 200 assert response.json() == {"id": "delete-123-456-789", "deleted": False} - from app.shared.db import crud - crud.create_task(db_session, ArchiveCreate(id="delete-123-456-789", url="https://example.com", result={}, public=True, author_id="morty@example.com"), [], []) + from app.shared.db import worker_crud + worker_crud.create_task(db_session, ArchiveCreate(id="delete-123-456-789", url="https://example.com", result={}, public=True, author_id="morty@example.com"), [], []) response = client_with_auth.delete("/url/delete-123-456-789") assert response.status_code == 200 diff --git a/app/tests/web/test_main.py b/app/tests/web/test_main.py index 817125c..95cc5d6 100644 --- a/app/tests/web/test_main.py +++ b/app/tests/web/test_main.py @@ -17,9 +17,9 @@ def test_alembic(db_session): alembic.config.main(argv=['--raiseerr', 'upgrade', 'head']) alembic.config.main(argv=['--raiseerr', 'downgrade', 'base']) -@patch("endpoints.default.crud.soft_delete_task", side_effect=Exception('mocked error')) +@patch("app.web.endpoints.default.crud.soft_delete_task", side_effect=Exception('mocked error')) def test_logging_middleware(m1, client_with_auth): - from web.utils.metrics import EXCEPTION_COUNTER + from app.web.utils.metrics import EXCEPTION_COUNTER assert len(EXCEPTION_COUNTER.collect()[0].samples) == 0 with pytest.raises(Exception, match="mocked error"): client_with_auth.delete("/url/123") @@ -36,7 +36,7 @@ def test_serve_local_archive_logic(get_settings): try: # modify the settings get_settings.SERVE_LOCAL_ARCHIVE = "/app/local_archive_test" - from web.main import app_factory + from app.web.main import app_factory app = app_factory(get_settings) # test diff --git a/app/tests/web/test_security.py b/app/tests/web/test_security.py index e9cb1e8..3b45f2d 100644 --- a/app/tests/web/test_security.py +++ b/app/tests/web/test_security.py @@ -8,7 +8,7 @@ from app.shared.config import ALLOW_ANY_EMAIL def test_secure_compare(): - from web.security import secure_compare + from app.web.security import secure_compare assert secure_compare("test", "test") assert not secure_compare("test", "test2") @@ -16,14 +16,14 @@ def test_secure_compare(): @pytest.mark.asyncio async def test_get_token_or_user_auth_with_api(): - from web.security import get_token_or_user_auth + from app.web.security import get_token_or_user_auth mock_api = HTTPAuthorizationCredentials(scheme="lorem", credentials="this_is_the_test_api_token") assert await get_token_or_user_auth(mock_api) == ALLOW_ANY_EMAIL @pytest.mark.asyncio async def test_get_token_or_user_auth_with_user(): - from web.security import get_token_or_user_auth + from app.web.security import get_token_or_user_auth bad_user = HTTPAuthorizationCredentials(scheme="ipsum", credentials="invalid") e: pytest.ExceptionInfo = None with pytest.raises(HTTPException) as e: @@ -32,18 +32,18 @@ async def test_get_token_or_user_auth_with_user(): assert e.value.detail == "invalid access_token" -@patch("web.security.authenticate_user", return_value=(True, "summer@example.com")) +@patch("app.web.security.authenticate_user", return_value=(True, "summer@example.com")) @pytest.mark.asyncio async def test_get_user_auth(m1): - from web.security import get_user_auth + from app.web.security import get_user_auth good_user = HTTPAuthorizationCredentials(scheme="ipsum", credentials="valid-and-good") assert await get_user_auth(good_user) == "summer@example.com" -@patch("web.security.secure_compare", return_value=False) +@patch("app.web.security.secure_compare", return_value=False) @pytest.mark.asyncio async def test_token_api_key_auth_exception(m1): - from web.security import token_api_key_auth + from app.web.security import token_api_key_auth e: pytest.ExceptionInfo = None with pytest.raises(HTTPException) as e: @@ -54,12 +54,12 @@ async def test_token_api_key_auth_exception(m1): @pytest.mark.asyncio async def test_authenticate_user(): - from web.security import authenticate_user + from app.web.security import authenticate_user assert authenticate_user("test") == (False, "invalid access_token") assert authenticate_user(123) == (False, "invalid access_token") - with patch("web.security.requests.get") as mock_get: + with patch("app.web.security.requests.get") as mock_get: # bad response from oauth2 mock_get.return_value.status_code = 403 assert authenticate_user("this-will-call-requests") == (False, "invalid token") @@ -100,9 +100,9 @@ async def test_authenticate_user(): @pytest.mark.asyncio async def test_authenticate_user_exception(): - from web.security import authenticate_user + from app.web.security import authenticate_user - with patch("web.security.requests.get") as mock_get: + with patch("app.web.security.requests.get") as mock_get: mock_get.return_value.status_code = 200 mock_get.return_value.json.side_effect = Exception("mocked error") assert authenticate_user("this-will-call-requests") == (False, "exception occurred") diff --git a/app/tests/worker/test_worker_main.py b/app/tests/worker/test_worker_main.py index a0fb9ed..b8cb4cb 100644 --- a/app/tests/worker/test_worker_main.py +++ b/app/tests/worker/test_worker_main.py @@ -16,12 +16,12 @@ class Test_create_archive_task(): URL = "https://example-live.com" archive = schemas.ArchiveCreate(url=URL, tags=["tag-celery"], public=True, author_id="rick@example.com", group_id="interstellar") - @patch("worker.main.insert_result_into_db") - @patch("worker.main.get_store_until", return_value=datetime.now()) - @patch("worker.main.load_orchestrator") + @patch("app.worker.main.insert_result_into_db") + @patch("app.worker.main.get_store_until", return_value=datetime.now()) + @patch("app.worker.main.load_orchestrator") @patch("celery.app.task.Task.request") def test_success(self, m_req, m_load, m_store, m_insert, db_session): - from worker.main import create_archive_task + from app.worker.main import create_archive_task m_req.id = "this-just-in" mock_orchestrator = self.mock_orchestrator_choice(m_load) @@ -38,14 +38,14 @@ class Test_create_archive_task(): assert len(task["media"]) == 0 def test_raise_invalid(self): - from worker.main import create_archive_task + from app.worker.main import create_archive_task with pytest.raises(Exception): create_archive_task(self.archive.model_dump_json()) - @patch("worker.main.insert_result_into_db", side_effect=Exception) - @patch("worker.main.load_orchestrator") + @patch("app.worker.main.insert_result_into_db", side_effect=Exception) + @patch("app.worker.main.load_orchestrator") def test_raise_db_error(self, m_load, m_insert): - from worker.main import create_archive_task + from app.worker.main import create_archive_task mock_orchestrator = self.mock_orchestrator_choice(m_load) with pytest.raises(Exception): @@ -53,10 +53,10 @@ class Test_create_archive_task(): mock_orchestrator.feed_item.assert_called_once() - @patch("worker.main.insert_result_into_db", return_value=None) - @patch("worker.main.load_orchestrator") + @patch("app.worker.main.insert_result_into_db", return_value=None) + @patch("app.worker.main.load_orchestrator") def test_raise_empty_result(self, m_load, m_insert): - from worker.main import create_archive_task + from app.worker.main import create_archive_task mock_orchestrator = self.mock_orchestrator_choice(m_load) with pytest.raises(Exception) as e: @@ -75,11 +75,11 @@ class Test_create_sheet_task(): URL = "https://example-live.com" sheet = schemas.SubmitSheet(sheet_id="123", author_id="rick@example.com", group_id="interstellar", tags=["spaceship"]) - @patch("worker.main.models.generate_uuid", return_value="constant-uuid") - @patch("worker.main.get_store_until", return_value=datetime.now()) - @patch("worker.main.load_orchestrator") + @patch("app.worker.main.models.generate_uuid", return_value="constant-uuid") + @patch("app.worker.main.get_store_until", return_value=datetime.now()) + @patch("app.worker.main.load_orchestrator") def test_success(self, m_load, m_store, m_uuid, db_session): - from worker.main import create_sheet_task + from app.worker.main import create_sheet_task assert db_session.query(models.Archive).filter(models.Archive.url == self.URL).count() == 0 @@ -116,7 +116,7 @@ class Test_create_sheet_task(): def test_get_all_urls(db_session): - from worker.main import get_all_urls + from app.worker.main import get_all_urls from auto_archiver import Metadata meta = Metadata().set_url("https://example.com") diff --git a/app/shared/db/crud.py b/app/web/db/crud.py similarity index 84% rename from app/shared/db/crud.py rename to app/web/db/crud.py index c3c2d00..d1aa92e 100644 --- a/app/shared/db/crud.py +++ b/app/web/db/crud.py @@ -9,7 +9,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.shared.config import ALLOW_ANY_EMAIL from app.shared.db.database import get_db from app.shared.db import models -from app.shared import schemas from app.shared.settings import get_settings from app.shared.user_groups import UserGroups from app.shared.utils.misc import fnv1a_hash_mod @@ -55,16 +54,6 @@ def search_archives_by_url(db: Session, url: str, email: str, skip: int = 0, lim def search_archives_by_email(db: Session, email: str, skip: int = 0, limit: int = 100): return base_query(db).filter(models.Archive.author_id == email).order_by(models.Archive.created_at.desc()).offset(skip).limit(get_limit(limit)).all() -#TODO: rename task to archive -def create_task(db: Session, task: schemas.ArchiveCreate, tags: list[models.Tag], urls: list[models.ArchiveUrl]) -> models.Archive: - db_task = models.Archive(id=task.id, url=task.url, result=task.result, public=task.public, author_id=task.author_id, group_id=task.group_id, sheet_id=task.sheet_id, store_until=task.store_until) - db_task.tags = tags - db_task.urls = urls - db.add(db_task) - db.commit() - db.refresh(db_task) - return db_task - def soft_delete_task(db: Session, task_id: str, email: str) -> bool: # TODO: implement hard-delete with cronjob that deletes from S3 @@ -113,17 +102,7 @@ async def soft_delete_expired_archives(db: AsyncSession) -> dict: # --------------- TAG -def create_tag(db: Session, tag: str) -> models.Tag: - db_tag = db.query(models.Tag).filter(models.Tag.id == tag).first() - if not db_tag: - db_tag = models.Tag(id=tag) - db.add(db_tag) - db.commit() - db.refresh(db_tag) - return db_tag - - -def is_user_in_group(db: Session, email: str, group_name: str) -> models.Group: +def is_user_in_group(email: str, group_name: str) -> models.Group: if email == ALLOW_ANY_EMAIL: return True return len(group_name) and len(email) and group_name in get_user_groups(email) @@ -150,21 +129,6 @@ def get_user_groups(email: str) -> list[str]: # --------------- INIT User-Groups -def get_group(db: Session, group_name: str) -> models.Group: - return db.query(models.Group).filter(models.Group.id == group_name).first() - - -def create_or_get_user(db: Session, author_id: str) -> models.User: - if type(author_id) == str: author_id = author_id.lower() - db_user = db.query(models.User).filter(models.User.email == author_id).first() - if not db_user: - db_user = models.User(email=author_id) - db.add(db_user) - db.commit() - db.refresh(db_user) - return db_user - - def upsert_group(db: Session, group_name: str, description: str, orchestrator: str, orchestrator_sheet: str, permissions: dict, domains: list) -> models.Group: db_group = db.query(models.Group).filter(models.Group.id == group_name).first() if db_group is None: @@ -285,14 +249,6 @@ async def delete_stale_sheets(db: AsyncSession, inactivity_days: int) -> dict: await db.commit() return dict(deleted) -def update_sheet_last_url_archived_at(db: Session, sheet_id: str): - db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first() - if db_sheet: - db_sheet.last_url_archived_at = datetime.now() - db.commit() - return True - return False - def delete_sheet(db: Session, sheet_id: str, email: str) -> bool: db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id, models.Sheet.author_id == email).first() @@ -300,15 +256,3 @@ def delete_sheet(db: Session, sheet_id: str, email: str) -> bool: db.delete(db_sheet) db.commit() return db_sheet is not None - - -#--- Celery worker tasks - - -def store_archived_url(db: Session, archive: schemas.ArchiveCreate) -> models.Archive: - # create and load user, tags, if needed - create_or_get_user(db, archive.author_id) - db_tags = [create_tag(db, tag) for tag in archive.tags] - # insert everything - db_task = create_task(db, task=archive, tags=db_tags, urls=archive.urls) - return db_task \ No newline at end of file diff --git a/app/web/db/user_state.py b/app/web/db/user_state.py index d7f5192..dc7b112 100644 --- a/app/web/db/user_state.py +++ b/app/web/db/user_state.py @@ -5,9 +5,10 @@ from sqlalchemy.orm import Session from sqlalchemy import func from datetime import datetime -from app.shared.db import crud, models +from app.shared.db import models from app.shared.user_groups import GroupInfo, GroupPermissions from app.shared.schemas import Usage, UsageResponse +from app.web.db import crud class UserState: """ diff --git a/app/web/endpoints/default.py b/app/web/endpoints/default.py index 86d7d70..186b574 100644 --- a/app/web/endpoints/default.py +++ b/app/web/endpoints/default.py @@ -5,7 +5,7 @@ from fastapi.responses import FileResponse, JSONResponse from app.shared.config import VERSION, BREAKING_CHANGES from app.shared.log import log_error -from app.shared.db import crud +from app.web.db import crud from app.shared.schemas import ActiveUser, UsageResponse from app.web.db.user_state import UserState from app.web.security import get_user_auth, bearer_security, get_user_state @@ -56,4 +56,4 @@ def get_user_usage( @default_router.get('/favicon.ico', include_in_schema=False) async def favicon() -> FileResponse: - return FileResponse("web/static/favicon.ico") + return FileResponse("app/web/static/favicon.ico") diff --git a/app/web/endpoints/interoperability.py b/app/web/endpoints/interoperability.py index 33eff6e..2c7660a 100644 --- a/app/web/endpoints/interoperability.py +++ b/app/web/endpoints/interoperability.py @@ -9,7 +9,7 @@ from sqlalchemy.orm import Session from app.shared.aa_utils import get_all_urls from app.shared.config import ALLOW_ANY_EMAIL from app.shared import business_logic, schemas -from app.shared.db import crud +from app.shared.db import worker_crud from app.shared.db.database import get_db_dependency from app.web.security import token_api_key_auth from app.shared.db import models @@ -26,10 +26,20 @@ def submit_manual_archive( auth=Depends(token_api_key_auth), db: Session = Depends(get_db_dependency) ): - result: Metadata = Metadata.from_json(manual.result) + try: + result: Metadata = Metadata.from_json(manual.result) + except json.JSONDecodeError as e: + log_error(e) + raise HTTPException(status_code=422, detail="Invalid JSON in result field.") manual.author_id = manual.author_id or ALLOW_ANY_EMAIL manual.tags.add("manual") + try: + store_until=business_logic.get_store_archive_until(db, manual.group_id) + except AssertionError as e: + log_error(e) + raise HTTPException(status_code=422, detail=str(e)) + try: archive = schemas.ArchiveCreate( author_id=manual.author_id, @@ -40,10 +50,10 @@ def submit_manual_archive( id=models.generate_uuid(), result=json.loads(result.to_json()), urls=get_all_urls(result), - store_until=business_logic.get_store_archive_until(db, manual.group_id), + store_until=store_until, ) - db_archive = crud.store_archived_url(db, archive) + db_archive = worker_crud.store_archived_url(db, archive) logger.debug(f"[MANUAL ARCHIVE STORED] {db_archive.author_id} {db_archive.url}") return JSONResponse({"id": db_archive.id}, status_code=201) except sqlalchemy.exc.IntegrityError as e: diff --git a/app/web/endpoints/sheet.py b/app/web/endpoints/sheet.py index d202834..89699d0 100644 --- a/app/web/endpoints/sheet.py +++ b/app/web/endpoints/sheet.py @@ -9,7 +9,7 @@ from app.web.db.user_state import UserState from app.shared import schemas from app.shared.task_messaging import get_celery from app.web.security import get_user_state -from app.shared.db import crud +from app.web.db import crud from app.shared.db.database import get_db_dependency sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"]) diff --git a/app/web/endpoints/url.py b/app/web/endpoints/url.py index 86a7c67..7e3de02 100644 --- a/app/web/endpoints/url.py +++ b/app/web/endpoints/url.py @@ -9,7 +9,7 @@ from app.shared.config import ALLOW_ANY_EMAIL from app.shared import schemas from app.shared.task_messaging import get_celery from app.web.security import get_token_or_user_auth, get_user_state -from app.shared.db import crud +from app.web.db import crud from app.web.db.user_state import UserState from app.shared.db.database import get_db_dependency diff --git a/app/web/events.py b/app/web/events.py index 65b7347..4dfdf25 100644 --- a/app/web/events.py +++ b/app/web/events.py @@ -9,11 +9,12 @@ from fastapi_utils.tasks import repeat_every from loguru import logger from fastapi_mail import FastMail, MessageSchema, MessageType -from app.shared.db import crud, models +from app.shared.db import models from app.shared.db.database import get_db, get_db_async, make_engine, wal_checkpoint from app.shared import schemas from app.shared.settings import get_settings from app.shared.task_messaging import get_celery +from app.web.db import crud from app.web.utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions celery = get_celery() @@ -24,12 +25,9 @@ async def lifespan(app: FastAPI): # see https://fastapi.tiangolo.com/advanced/events/#lifespan # STARTUP - logger.debug("HERE 00") engine = make_engine(get_settings().DATABASE_PATH) models.Base.metadata.create_all(bind=engine) - logger.debug("HERE 01") alembic.config.main(prog="alembic", argv=['--raiseerr', 'upgrade', 'head']) - logger.debug("HERE 02") logging.getLogger("uvicorn.access").disabled = True # loguru asyncio.create_task(redis_subscribe_worker_exceptions(get_settings().REDIS_EXCEPTIONS_CHANNEL)) asyncio.create_task(repeat_measure_regular_metrics()) diff --git a/app/web/main.py b/app/web/main.py index a11908e..aabb4f4 100644 --- a/app/web/main.py +++ b/app/web/main.py @@ -15,7 +15,7 @@ from app.web.middleware import logging_middleware from app.shared import schemas from app.shared.task_messaging import get_celery -from app.shared.db import crud +from app.web.db import crud from app.web.security import get_user_auth, token_api_key_auth, get_token_or_user_auth from app.shared.config import VERSION, API_DESCRIPTION from app.shared.db.database import get_db_dependency @@ -141,7 +141,8 @@ def app_factory(settings = get_settings()): def archive_sheet(sheet: schemas.SubmitSheet, email=Depends(get_user_auth), db: Session = Depends(get_db_dependency)): logger.info(f"SHEET TASK for {sheet=}") sheet.author_id = email - if not crud.is_user_in_group(db, email, sheet.group_id): + #NB: no longer working + if not crud.is_user_in_group(email, sheet.group_id): raise HTTPException(status_code=403, detail="User does not have access to this group.") task = celery.signature("create_sheet_task", args=[sheet.model_dump_json()]).delay() return JSONResponse({"id": task.id}) diff --git a/app/web/middleware.py b/app/web/middleware.py index 3663af9..aa5c077 100644 --- a/app/web/middleware.py +++ b/app/web/middleware.py @@ -2,6 +2,7 @@ from loguru import logger from fastapi import Request from app.shared.log import log_error +from app.web.utils.metrics import EXCEPTION_COUNTER async def logging_middleware(request: Request, call_next): @@ -10,7 +11,6 @@ async def logging_middleware(request: Request, call_next): logger.info(f"{request.client.host}:{request.client.port} {request.method} {request.url._url} - HTTP {response.status_code}") return response except Exception as e: - from web.utils.metrics import EXCEPTION_COUNTER EXCEPTION_COUNTER.labels(type=e.__class__.__name__).inc() logger.info(f"{request.client.host}:{request.client.port} {request.method} {request.url._url} - {e.__class__.__name__} {e}") log_error(e) diff --git a/app/web/utils/metrics.py b/app/web/utils/metrics.py index 0a2d793..64aaf9c 100644 --- a/app/web/utils/metrics.py +++ b/app/web/utils/metrics.py @@ -4,7 +4,7 @@ import os import shutil from prometheus_client import Counter, Gauge -from app.shared.db import crud +from app.web.db import crud from app.shared.db.database import get_db from app.shared.log import log_error from app.shared.task_messaging import get_redis diff --git a/app/worker/main.py b/app/worker/main.py index 966cd5c..8ced19b 100644 --- a/app/worker/main.py +++ b/app/worker/main.py @@ -7,13 +7,14 @@ from sqlalchemy import exc from auto_archiver import Config, ArchivingOrchestrator, Metadata -from app.shared.db import crud, models +from app.shared.db import models from app.shared.db.database import get_db from app.shared import business_logic, schemas from app.shared.task_messaging import get_celery, get_redis from app.shared.settings import get_settings from app.shared.log import log_error from app.shared.aa_utils import get_all_urls +from app.shared.db import worker_crud settings = get_settings() @@ -79,7 +80,7 @@ def create_sheet_task(self, sheet_json: str): if stats["archived"] > 0: with get_db() as session: - crud.update_sheet_last_url_archived_at(session, sheet.sheet_id) + worker_crud.update_sheet_last_url_archived_at(session, sheet.sheet_id) logger.info(f"SHEET DONE {sheet=}") # TODO: is this used anywhere? maybe drop it @@ -88,11 +89,11 @@ def create_sheet_task(self, sheet_json: str): def load_orchestrator(group_id: str, orchestrator_for_sheet: bool = False, overwrite_configs: dict = {}) -> ArchivingOrchestrator: with get_db() as session: - group = crud.get_group(session, group_id) + group = worker_crud.get_group(session, group_id) if orchestrator_for_sheet: orchestrator_fn = group.orchestrator_sheet else: - orchestrator_fn = crud.get_group(session, group_id).orchestrator + orchestrator_fn = worker_crud.get_group(session, group_id).orchestrator assert orchestrator_fn, f"no orchestrator found for {group_id}" @@ -103,7 +104,7 @@ def load_orchestrator(group_id: str, orchestrator_for_sheet: bool = False, overw def insert_result_into_db(archive: schemas.ArchiveCreate) -> str: with get_db() as session: - db_task = crud.store_archived_url(session, archive) + db_task = worker_crud.store_archived_url(session, archive) logger.debug(f"[ARCHIVE STORED] {db_task.author_id} {db_task.url}") return db_task.id