further worker/web separation and tests fixed

This commit is contained in:
msramalho
2025-02-11 18:18:49 +00:00
parent b452ec9869
commit a9dd278d24
28 changed files with 301 additions and 280 deletions

0
app/logs/.gitkeep Normal file
View File

View File

@@ -4,11 +4,12 @@
import datetime
from sqlalchemy.orm import Session
from app.shared.db import crud
from app.shared.db import worker_crud
def get_store_archive_until(db: Session, group_id: str) -> datetime.datetime:
group = crud.get_group(db, group_id)
group = worker_crud.get_group(db, group_id)
assert group, f"Group {group_id} not found."
max_lifespan = group.permissions.get("max_archive_lifespan_months", -1)
if max_lifespan == -1: return None

View File

@@ -0,0 +1,61 @@
from sqlalchemy.orm import Session
from datetime import datetime
from app.shared.db import models
from app.shared import schemas
# TODO: isolate database operations away from worker and into WEB
# ONLY WORKER
def update_sheet_last_url_archived_at(db: Session, sheet_id: str):
db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first()
if db_sheet:
db_sheet.last_url_archived_at = datetime.now()
db.commit()
return True
return False
# ONLY WORKER and INTEROP
def get_group(db: Session, group_name: str) -> models.Group:
return db.query(models.Group).filter(models.Group.id == group_name).first()
def create_or_get_user(db: Session, author_id: str) -> models.User:
if type(author_id) == str: author_id = author_id.lower()
db_user = db.query(models.User).filter(models.User.email == author_id).first()
if not db_user:
db_user = models.User(email=author_id)
db.add(db_user)
db.commit()
db.refresh(db_user)
return db_user
def create_tag(db: Session, tag: str) -> models.Tag:
db_tag = db.query(models.Tag).filter(models.Tag.id == tag).first()
if not db_tag:
db_tag = models.Tag(id=tag)
db.add(db_tag)
db.commit()
db.refresh(db_tag)
return db_tag
def create_task(db: Session, task: schemas.ArchiveCreate, tags: list[models.Tag], urls: list[models.ArchiveUrl]) -> models.Archive:
# TODO: rename task to archive
db_task = models.Archive(id=task.id, url=task.url, result=task.result, public=task.public, author_id=task.author_id, group_id=task.group_id, sheet_id=task.sheet_id, store_until=task.store_until)
db_task.tags = tags
db_task.urls = urls
db.add(db_task)
db.commit()
db.refresh(db_task)
return db_task
def store_archived_url(db: Session, archive: schemas.ArchiveCreate) -> models.Archive:
# create and load user, tags, if needed
create_or_get_user(db, archive.author_id)
db_tags = [create_tag(db, tag) for tag in archive.tags]
# insert everything
db_task = create_task(db, task=archive, tags=db_tags, urls=archive.urls)
return db_task

View File

@@ -113,5 +113,4 @@ class CelerySheetTask(BaseModel):
class SubmitManualArchive(ArchiveTrigger):
url: None = None
result: str # should be a Metadata.to_json()

View File

@@ -3,8 +3,8 @@ from fastapi.testclient import TestClient
import pytest
from unittest.mock import patch
from app.shared.config import ALLOW_ANY_EMAIL
from app.web.db.user_state import UserState
from app.shared.settings import Settings
from app.web.db.user_state import UserState
@pytest.fixture(autouse=True)
@@ -21,7 +21,7 @@ def get_settings():
@pytest.fixture(autouse=True)
def mock_settings():
with patch('shared.settings.Settings', return_value=Settings(_env_file=".env.test")) as mock_settings:
with patch('app.shared.settings.Settings', return_value=Settings(_env_file=".env.test")) as mock_settings:
yield mock_settings
@@ -29,7 +29,7 @@ def mock_settings():
def test_db(get_settings: Settings):
from app.shared.db import models
from app.shared.db.database import make_engine
from app.shared.db.crud import get_user_groups
from app.web.db.crud import get_user_groups
get_user_groups.cache_clear()
make_engine.cache_clear()
@@ -62,8 +62,8 @@ def db_session(test_db):
@pytest.fixture()
def app(db_session):
from web.main import app_factory
from app.shared.db import crud
from app.web.main import app_factory
from app.web.db import crud
app = app_factory()
crud.upsert_user_groups(db_session)
return app
@@ -77,7 +77,7 @@ def client(app):
@pytest.fixture()
def app_with_auth(app, db_session):
from web.security import get_token_or_user_auth, get_user_auth, get_user_state
from app.web.security import get_token_or_user_auth, get_user_auth, get_user_state
app.dependency_overrides[get_token_or_user_auth] = lambda: "rick@example.com"
app.dependency_overrides[get_user_auth] = lambda: "morty@example.com"
app.dependency_overrides[get_user_state] = lambda: UserState(db_session, "MORTY@example.com")
@@ -92,7 +92,7 @@ def client_with_auth(app_with_auth):
@pytest.fixture()
def app_with_token(app):
from web.security import token_api_key_auth, get_token_or_user_auth
from app.web.security import token_api_key_auth, get_token_or_user_auth
app.dependency_overrides[token_api_key_auth] = lambda: ALLOW_ANY_EMAIL
app.dependency_overrides[get_token_or_user_auth] = lambda: ALLOW_ANY_EMAIL
return app

View File

@@ -0,0 +1,98 @@
from app.shared.db import models
from app.tests.web.db.test_crud import test_data
def test_get_group(test_data, db_session):
from app.shared.db import worker_crud
assert worker_crud.get_group(db_session, "spaceship") is not None
assert worker_crud.get_group(db_session, "interdimensional") is not None
assert worker_crud.get_group(db_session, "animated-characters") is not None
assert worker_crud.get_group(db_session, "non-existent!@#!%!") is None
def test_create_or_get_user(test_data, db_session):
from app.shared.db import worker_crud
assert db_session.query(models.User).count() == 3
# already exists
assert (u1 := worker_crud.create_or_get_user(db_session, "rick@example.com")) is not None
assert u1.email == "rick@example.com"
# new user
assert (u2 := worker_crud.create_or_get_user(db_session, "beth@example.com")) is not None
assert u2.email == "beth@example.com"
assert db_session.query(models.User).count() == 4
def test_create_tag(db_session):
from app.shared.db import worker_crud
assert db_session.query(models.Tag).count() == 0
# create first
create_tag = worker_crud.create_tag(db_session, "tag-101")
assert create_tag is not None
assert create_tag.id == "tag-101"
assert db_session.query(models.Tag).count() == 1
assert db_session.query(models.Tag).filter(models.Tag.id == "tag-101").first() == create_tag
# same id does not add new db entry
existing_tag = worker_crud.create_tag(db_session, "tag-101")
assert existing_tag == create_tag
assert db_session.query(models.Tag).count() == 1
# create second
second_tag = worker_crud.create_tag(db_session, "tag-102")
assert second_tag is not None
assert second_tag.id == "tag-102"
assert db_session.query(models.Tag).count() == 2
def test_create_task(db_session):
from app.shared.db import worker_crud
from app.shared import schemas
task = schemas.ArchiveCreate(
id="archive-id-456-101",
url="https://example-0.com",
result={},
public=False,
author_id="rick@example.com",
group_id="spaceship",
tags=[],
urls=[]
)
# with tags and urls
nt = worker_crud.create_task(db_session, task, [models.Tag(id="tag-101")], [models.ArchiveUrl(url="https://example-0.com/0", key="media_0")])
assert nt is not None
assert nt.id == "archive-id-456-101"
assert nt.url == "https://example-0.com"
assert nt.author_id == "rick@example.com"
assert nt.public == False
assert nt.group_id == "spaceship"
assert len(nt.tags) == 1
assert nt.tags[0].id == "tag-101"
assert len(nt.urls) == 1
assert nt.urls[0].url == "https://example-0.com/0"
assert nt.urls[0].key == "media_0"
assert nt.created_at is not None
# without tags and urls
task.id = "archive-id-456-102"
nt = worker_crud.create_task(db_session, task, [], [])
assert nt is not None
assert nt.id == "archive-id-456-102"
assert nt.url == "https://example-0.com"
assert nt.author_id == "rick@example.com"
assert nt.public == False
assert nt.group_id == "spaceship"
assert len(nt.tags) == 0
assert len(nt.urls) == 0
assert nt.created_at is not None

View File

@@ -19,17 +19,17 @@ domains:
orchestrators:
spaceship: tests/orchestration.test.yaml
interdimensional: tests/orchestration.test.yaml
default: tests/orchestration.test.yaml
spaceship: app/tests/orchestration.test.yaml
interdimensional: app/tests/orchestration.test.yaml
default: app/tests/orchestration.test.yaml
default_orchestrator: tests/orchestration.test.yaml
default_orchestrator: app/tests/orchestration.test.yaml
groups:
spaceship:
description: "The spaceship crew"
orchestrator: tests/orchestration.test.yaml
orchestrator_sheet: tests/orchestration.test.yaml
orchestrator: app/tests/orchestration.test.yaml
orchestrator_sheet: app/tests/orchestration.test.yaml
permissions:
read: ["all"]
archive_url: true
@@ -43,8 +43,8 @@ groups:
priority: "high"
interdimensional:
description: "Interdimensional travelers"
orchestrator: tests/orchestration.test.yaml
orchestrator_sheet: tests/orchestration.test.yaml
orchestrator: app/tests/orchestration.test.yaml
orchestrator_sheet: app/tests/orchestration.test.yaml
permissions:
read: ["interdimensional", "animated-characters"]
archive_url: true
@@ -58,8 +58,8 @@ groups:
priority: "high"
animated-characters:
description: "Animated characters"
orchestrator: tests/orchestration.test.yaml
orchestrator_sheet: tests/orchestration.test.yaml
orchestrator: app/tests/orchestration.test.yaml
orchestrator_sheet: app/tests/orchestration.test.yaml
permissions:
read: ["animated-characters"]
archive_url: true
@@ -72,8 +72,8 @@ groups:
priority: "low"
default:
description: "Public access"
orchestrator: tests/orchestration.test.yaml
orchestrator_sheet: tests/orchestration.test.yaml
orchestrator: app/tests/orchestration.test.yaml
orchestrator_sheet: app/tests/orchestration.test.yaml
permissions:
# read: []
archive_url: true

View File

@@ -4,7 +4,7 @@ from unittest.mock import patch
import pytest
import yaml
from app.shared.db import models
from shared.settings import Settings
from app.shared.settings import Settings
authors = ["rick@example.com", "morty@example.com", "jerry@example.com"]
@@ -55,18 +55,16 @@ def test_data(db_session):
# setup groups
assert db_session.query(models.Group).count() == 0
from app.shared.db import crud
from app.web.db import crud
crud.upsert_user_groups(db_session)
assert db_session.query(models.Group).count() == 4
assert db_session.query(models.User).count() == 3
def test_get_archive(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
from app.shared.config import ALLOW_ANY_EMAIL
print(db_session.query(models.Group).all())
# each author's archives work
assert (a0 := crud.get_archive(db_session, "archive-id-456-0", authors[0])) is not None
assert a0.id == "archive-id-456-0"
@@ -93,7 +91,7 @@ def test_get_archive(test_data, db_session):
def test_search_archives_by_url(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
from app.shared.config import ALLOW_ANY_EMAIL
# rick's archives are private
@@ -141,7 +139,7 @@ def test_search_archives_by_url(test_data, db_session):
def test_search_archives_by_email(test_data, db_session):
from app.shared.config import ALLOW_ANY_EMAIL
from app.shared.db import crud
from app.web.db import crud
# lower/upper case
assert len(crud.search_archives_by_email(db_session, "rick@example.com")) == 34
@@ -160,9 +158,9 @@ def test_search_archives_by_email(test_data, db_session):
assert a2[0].created_at == datetime(2021, 1, 1)
@patch("db.crud.DATABASE_QUERY_LIMIT", new=25)
@patch("app.web.db.crud.DATABASE_QUERY_LIMIT", new=25)
def test_max_query_limit(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
from app.shared.config import ALLOW_ANY_EMAIL
assert len(crud.search_archives_by_url(db_session, "https://example", ALLOW_ANY_EMAIL)) == 25
@@ -172,53 +170,8 @@ def test_max_query_limit(test_data, db_session):
assert len(crud.search_archives_by_email(db_session, "rick@example.com", limit=1000)) == 25
def test_create_task(db_session):
from app.shared.db import crud
from app.shared import schemas
task = schemas.ArchiveCreate(
id="archive-id-456-101",
url="https://example-0.com",
result={},
public=False,
author_id="rick@example.com",
group_id="spaceship",
tags=[],
urls=[]
)
# with tags and urls
nt = crud.create_task(db_session, task, [models.Tag(id="tag-101")], [models.ArchiveUrl(url="https://example-0.com/0", key="media_0")])
assert nt is not None
assert nt.id == "archive-id-456-101"
assert nt.url == "https://example-0.com"
assert nt.author_id == "rick@example.com"
assert nt.public == False
assert nt.group_id == "spaceship"
assert len(nt.tags) == 1
assert nt.tags[0].id == "tag-101"
assert len(nt.urls) == 1
assert nt.urls[0].url == "https://example-0.com/0"
assert nt.urls[0].key == "media_0"
assert nt.created_at is not None
# without tags and urls
task.id = "archive-id-456-102"
nt = crud.create_task(db_session, task, [], [])
assert nt is not None
assert nt.id == "archive-id-456-102"
assert nt.url == "https://example-0.com"
assert nt.author_id == "rick@example.com"
assert nt.public == False
assert nt.group_id == "spaceship"
assert len(nt.tags) == 0
assert len(nt.urls) == 0
assert nt.created_at is not None
def test_soft_delete(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
# none deleted yet
assert crud.get_archive(db_session, "archive-id-456-0", "rick@example.com") is not None
@@ -236,7 +189,7 @@ def test_soft_delete(test_data, db_session):
def test_count_archives(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
assert crud.count_archives(db_session) == 100
db_session.query(models.Archive).filter(models.Archive.id == "archive-id-456-0").delete()
@@ -245,7 +198,7 @@ def test_count_archives(test_data, db_session):
def test_count_archive_urls(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
assert crud.count_archive_urls(db_session) == 1000
db_session.query(models.ArchiveUrl).filter(models.ArchiveUrl.url == "https://example-0.com/0").delete()
@@ -260,7 +213,7 @@ def test_count_archive_urls(test_data, db_session):
def test_count_users(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
assert crud.count_users(db_session) == 3
db_session.query(models.User).filter(models.User.email == "rick@example.com").delete()
@@ -269,7 +222,7 @@ def test_count_users(test_data, db_session):
def test_count_by_users_since(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
# 100y window
assert len(cu := crud.count_by_user_since(db_session, 60 * 60 * 24 * 31 * 12 * 100)) == 3
@@ -278,32 +231,8 @@ def test_count_by_users_since(test_data, db_session):
assert cu[2].total == 33
def test_create_tag(db_session):
from app.shared.db import crud
assert db_session.query(models.Tag).count() == 0
# create first
create_tag = crud.create_tag(db_session, "tag-101")
assert create_tag is not None
assert create_tag.id == "tag-101"
assert db_session.query(models.Tag).count() == 1
assert db_session.query(models.Tag).filter(models.Tag.id == "tag-101").first() == create_tag
# same id does not add new db entry
existing_tag = crud.create_tag(db_session, "tag-101")
assert existing_tag == create_tag
assert db_session.query(models.Tag).count() == 1
# create second
second_tag = crud.create_tag(db_session, "tag-102")
assert second_tag is not None
assert second_tag.id == "tag-102"
assert db_session.query(models.Tag).count() == 2
def test_is_user_in_group(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
from app.shared.config import ALLOW_ANY_EMAIL
# see user-groups.test.yaml
@@ -339,36 +268,12 @@ def test_is_user_in_group(test_data, db_session):
]
for email, group, expected in test_pairs:
print(f"{email} in {group} == {expected}")
assert crud.is_user_in_group(db_session, email, group) == expected
assert crud.is_user_in_group(email, group) == expected
def test_get_group(test_data, db_session):
from app.shared.db import crud
assert crud.get_group(db_session, "spaceship") is not None
assert crud.get_group(db_session, "interdimensional") is not None
assert crud.get_group(db_session, "animated-characters") is not None
assert crud.get_group(db_session, "non-existent!@#!%!") is None
def test_create_or_get_user(test_data, db_session):
from app.shared.db import crud
assert db_session.query(models.User).count() == 3
# already exists
assert (u1 := crud.create_or_get_user(db_session, "rick@example.com")) is not None
assert u1.email == "rick@example.com"
# new user
assert (u2 := crud.create_or_get_user(db_session, "beth@example.com")) is not None
assert u2.email == "beth@example.com"
assert db_session.query(models.User).count() == 4
def test_upsert_group(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
assert db_session.query(models.Group).count() == 4
@@ -397,29 +302,29 @@ def test_upsert_group(test_data, db_session):
def test_upsert_user_groups(db_session):
from app.shared.db import crud
from app.web.db import crud
@patch('db.crud.get_settings', new=lambda: bad_setings)
@patch('app.web.db.crud.get_settings', new=lambda: bad_setings)
def test_missing_yaml(db_session):
with pytest.raises(FileNotFoundError):
crud.upsert_user_groups(db_session)
@patch('db.crud.get_settings', new=lambda: bad_setings)
@patch('app.web.db.crud.get_settings', new=lambda: bad_setings)
def test_broken_yaml(db_session):
with pytest.raises(yaml.YAMLError):
crud.upsert_user_groups(db_session)
bad_setings = Settings(_env_file=".env.test")
bad_setings.USER_GROUPS_FILENAME = "tests/user-groups.test.missing.yaml"
bad_setings.USER_GROUPS_FILENAME = "app/tests/user-groups.test.missing.yaml"
test_missing_yaml(db_session)
bad_setings.USER_GROUPS_FILENAME = "tests/user-groups.test.broken.yaml"
bad_setings.USER_GROUPS_FILENAME = "app/tests/user-groups.test.broken.yaml"
test_broken_yaml(db_session)
def test_create_sheet(db_session):
from app.shared.db import crud
from app.web.db import crud
assert db_session.query(models.Sheet).count() == 0
@@ -440,7 +345,7 @@ def test_create_sheet(db_session):
def test_get_user_sheet(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
assert crud.get_user_sheet(db_session, "", "sheet-0") is None
assert crud.get_user_sheet(db_session, "morty@example.com", "sheet-0") is None
@@ -451,7 +356,7 @@ def test_get_user_sheet(test_data, db_session):
def test_get_user_sheets(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
assert len(crud.get_user_sheets(db_session, "")) == 0
rick_sheets = crud.get_user_sheets(db_session, "rick@example.com")
@@ -459,10 +364,10 @@ def test_get_user_sheets(test_data, db_session):
assert [s.id for s in rick_sheets] == ["sheet-0", "sheet-0-2"]
assert len(crud.get_user_sheets(db_session, "morty@example.com")) == 1
def test_delete_sheet(test_data, db_session):
from app.shared.db import crud
from app.web.db import crud
assert crud.delete_sheet(db_session, "sheet-0", "") == False
assert crud.delete_sheet(db_session, "sheet-0", "rick@example.com") == True
assert crud.delete_sheet(db_session, "sheet-0", "rick@example.com") == False

View File

@@ -2,7 +2,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
from fastapi.testclient import TestClient
import pytest
from app.shared.config import VERSION
from tests.db.test_crud import test_data
from app.tests.web.db.test_crud import test_data
def test_endpoint_home(client_with_auth):
@@ -14,9 +14,9 @@ def test_endpoint_home(client_with_auth):
assert "groups" not in j
@patch("endpoints.default.bearer_security", new_callable=AsyncMock)
@patch("endpoints.default.get_user_auth", new_callable=AsyncMock, return_value="test@example.com")
@patch("endpoints.default.crud.get_user_groups", return_value=["group1", "group2"])
@patch("app.web.endpoints.default.bearer_security", new_callable=AsyncMock)
@patch("app.web.endpoints.default.get_user_auth", new_callable=AsyncMock, return_value="test@example.com")
@patch("app.web.endpoints.default.crud.get_user_groups", return_value=["group1", "group2"])
def test_endpoint_home_with_groups(m1, m2, m3, client_with_auth):
r = client_with_auth.get("/")
assert r.status_code == 200
@@ -27,9 +27,9 @@ def test_endpoint_home_with_groups(m1, m2, m3, client_with_auth):
assert j["groups"] == ["group1", "group2"]
@patch("endpoints.default.bearer_security", new_callable=AsyncMock)
@patch("endpoints.default.get_user_auth", new_callable=AsyncMock, return_value="test@example.com")
@patch("endpoints.default.crud.get_user_groups", side_effect=Exception('mocked error'))
@patch("app.web.endpoints.default.bearer_security", new_callable=AsyncMock)
@patch("app.web.endpoints.default.get_user_auth", new_callable=AsyncMock, return_value="test@example.com")
@patch("app.web.endpoints.default.crud.get_user_groups", side_effect=Exception('mocked error'))
def test_endpoint_home_with_groups_exception(m1, m2, m3, client_with_auth): # mocks call that triggers an internal error
r = client_with_auth.get("/")
assert r.status_code == 200
@@ -52,7 +52,7 @@ def test_endpoint_active_no_auth(client, test_no_auth):
def test_endpoint_active(app):
m_user_state = MagicMock()
from web.security import get_user_state
from app.web.security import get_user_state
app.dependency_overrides[get_user_state] = lambda: m_user_state
# inactive user
@@ -103,7 +103,7 @@ async def test_prometheus_metrics(test_data, client_with_token, get_settings):
assert 'disk_utilization{type="used"}' not in r.text
# after metrics calculation
from web.utils.metrics import measure_regular_metrics
from app.web.utils.metrics import measure_regular_metrics
await measure_regular_metrics(get_settings.DATABASE_PATH, 60 * 60 * 24 * 31 * 12 * 100)
r2 = client_with_token.get("/metrics")
assert 'disk_utilization{type="used"}' in r2.text
@@ -117,7 +117,7 @@ async def test_prometheus_metrics(test_data, client_with_token, get_settings):
assert 'database_metrics_counter_total{query="count_by_user",user="jerry@example.com"} 33.0' in r2.text
# 30s window, should not change the gauges nor the total in the counters
from web.utils.metrics import measure_regular_metrics
from app.web.utils.metrics import measure_regular_metrics
await measure_regular_metrics(get_settings.DATABASE_PATH, 30)
r3 = client_with_token.get("/metrics")
assert 'database_metrics{query="count_archives"} 100.0' in r3.text

View File

@@ -1,9 +1,9 @@
from datetime import datetime
import json
from unittest.mock import patch
from unittest.mock import MagicMock, patch
from app.shared.config import ALLOW_ANY_EMAIL
from app.shared.db import crud
from app.web.db import crud
def test_submit_manual_archive_unauthenticated(client, test_no_auth):
@@ -14,11 +14,11 @@ def test_submit_manual_archive_not_user_auth(client_with_auth, test_no_auth):
test_no_auth(client_with_auth.post, "/interop/submit-archive")
@patch("endpoints.interoperability.get_store_until", return_value=datetime.now())
@patch("app.web.endpoints.interoperability.business_logic", return_value=MagicMock(get_store_archive_until=MagicMock(return_value=datetime)))
def test_submit_manual_archive(m1, client_with_token, db_session):
# normal workflow
aa_metadata = json.dumps({"status": "test: success", "metadata": {"url": "http://example.com"}, "media": [{"filename": "fn1", "urls": ["http://example.s3.com"]}]})
r = client_with_token.post("/interop/submit-archive", json={"result": aa_metadata, "public": True, "author_id": "jerry@gmail.com", "group_id": "spaceship", "tags": ["test"]})
r = client_with_token.post("/interop/submit-archive", json={"result": aa_metadata, "public": True, "author_id": "jerry@gmail.com", "group_id": "spaceship", "tags": ["test"], "url": "http://example.com"})
assert r.status_code == 201
assert "id" in r.json()
@@ -35,6 +35,6 @@ def test_submit_manual_archive(m1, client_with_token, db_session):
# cannot have the same URL twice
aa_metadata = json.dumps({"status": "test: success", "metadata": {"url": "http://example.com"}, "media": [{"filename": "fn1", "urls": ["http://example.com", "http://example.com"]}]})
r = client_with_token.post("/interop/submit-archive", json={"result": aa_metadata, "public": False, "author_id": "jerry@gmail.com", "tags": ["test"]})
r = client_with_token.post("/interop/submit-archive", json={"result": aa_metadata, "public": False, "author_id": "jerry@gmail.com", "tags": ["test"], "url": "http://example.com"})
assert r.status_code == 422
assert r.json() == {"detail": "Cannot insert into DB due to integrity error, likely duplicate urls."}

View File

@@ -45,7 +45,7 @@ def test_create_sheet_endpoint(app_with_auth, db_session):
assert response.json() == {"detail": "User does not have access to this group."}
# switch to jerry who's got less quota/permissions
from web.security import get_user_state
from app.web.security import get_user_state
from app.web.db.user_state import UserState
app_with_auth.dependency_overrides[get_user_state] = lambda: UserState(db_session, "jerry@example.com")
client_jerry = TestClient(app_with_auth)
@@ -144,7 +144,7 @@ def test_delete_sheet_endpoint(client_with_auth, db_session):
class TestArchiveUserSheetEndpoint:
@patch("endpoints.sheet.celery", return_value=MagicMock())
@patch("app.web.endpoints.sheet.celery", return_value=MagicMock())
def test_normal_flow(self, m_celery, client_with_auth, db_session):
from app.shared.db import models
db_session.add(models.Sheet(id="123-sheet-id", name="Test Sheet 1", author_id="morty@example.com", group_id="spaceship", frequency="hourly"))

View File

@@ -5,7 +5,7 @@ def test_endpoint_task_status_no_auth(client, test_no_auth):
test_no_auth(client.get, "/task/test-task-id")
@patch("endpoints.task.AsyncResult")
@patch("app.web.endpoints.task.AsyncResult")
def test_get_status_success(mock_async_result, client_with_auth):
mock_async_result.return_value.status = "SUCCESS"
mock_async_result.return_value.result = {"data": "some result"}
@@ -20,7 +20,7 @@ def test_get_status_success(mock_async_result, client_with_auth):
}
@patch("endpoints.task.AsyncResult")
@patch("app.web.endpoints.task.AsyncResult")
def test_get_status_failure(mock_async_result, client_with_auth):
mock_async_result.return_value.status = "FAILURE"
@@ -36,7 +36,7 @@ def test_get_status_failure(mock_async_result, client_with_auth):
}
@patch("endpoints.task.AsyncResult")
@patch("app.web.endpoints.task.AsyncResult")
def test_get_status_pending(mock_async_result, client_with_auth):
mock_async_result.return_value.status = "PENDING"
mock_async_result.return_value.result = None

View File

@@ -8,8 +8,8 @@ def test_archive_url_unauthenticated(client, test_no_auth):
test_no_auth(client.post, "/url/archive")
@patch("endpoints.url.UserState")
@patch("endpoints.url.celery", return_value=MagicMock())
@patch("app.web.endpoints.url.UserState")
@patch("app.web.endpoints.url.celery", return_value=MagicMock())
def test_archive_url(m_celery, m2, client_with_auth):
m_signature = MagicMock()
m_signature.delay.return_value = TaskResult(id="123-456-789", status="PENDING", result="")
@@ -81,7 +81,7 @@ def test_archive_url(m_celery, m2, client_with_auth):
assert m_signature.delay.call_count == 2
@patch("endpoints.url.UserState")
@patch("app.web.endpoints.url.UserState")
def test_archive_url_quotas(m1, client_with_auth):
m_user_state = MagicMock()
m1.return_value = m_user_state
@@ -102,7 +102,7 @@ def test_archive_url_quotas(m1, client_with_auth):
m_user_state.has_quota_max_monthly_mbs.assert_called_once()
@patch("endpoints.url.celery", return_value=MagicMock())
@patch("app.web.endpoints.url.celery", return_value=MagicMock())
def test_archive_url_with_api_token(m_celery, client_with_token):
m_signature = MagicMock()
m_signature.delay.return_value = TaskResult(id="123-456-789", status="PENDING", result="")
@@ -130,9 +130,11 @@ def test_search_by_url(client_with_auth, client_with_token, db_session):
assert response.status_code == 200
assert response.json() == []
from app.shared.db import crud, schemas
from app.shared import schemas
from app.shared.db import worker_crud
for i in range(11):
crud.create_task(db_session, ArchiveCreate(id=f"url-456-{i}", url="https://example.com" if i < 10 else "https://something-else.com", result={}, public=True, author_id="rick@example.com"), [], [])
#TODO: fix as this method is gone to shared.db
worker_crud.create_task(db_session, ArchiveCreate(id=f"url-456-{i}", url="https://example.com" if i < 10 else "https://something-else.com", result={}, public=True, author_id="rick@example.com"), [], [])
# NB: this insertion is too fast for the ordering to be correct as they are within the same second
response = client_with_auth.get("/url/search?url=https://example.com")
@@ -165,7 +167,7 @@ def test_search_by_url(client_with_auth, client_with_token, db_session):
assert len(response.json()) == 10
@patch("endpoints.url.UserState")
@patch("app.web.endpoints.url.UserState")
def test_search_no_read_access(mock_user_state, client_with_auth):
mock_user_state.return_value.read = False
mock_user_state.return_value.read_public = False
@@ -184,8 +186,8 @@ def test_delete_task(client_with_auth, db_session):
assert response.status_code == 200
assert response.json() == {"id": "delete-123-456-789", "deleted": False}
from app.shared.db import crud
crud.create_task(db_session, ArchiveCreate(id="delete-123-456-789", url="https://example.com", result={}, public=True, author_id="morty@example.com"), [], [])
from app.shared.db import worker_crud
worker_crud.create_task(db_session, ArchiveCreate(id="delete-123-456-789", url="https://example.com", result={}, public=True, author_id="morty@example.com"), [], [])
response = client_with_auth.delete("/url/delete-123-456-789")
assert response.status_code == 200

View File

@@ -17,9 +17,9 @@ def test_alembic(db_session):
alembic.config.main(argv=['--raiseerr', 'upgrade', 'head'])
alembic.config.main(argv=['--raiseerr', 'downgrade', 'base'])
@patch("endpoints.default.crud.soft_delete_task", side_effect=Exception('mocked error'))
@patch("app.web.endpoints.default.crud.soft_delete_task", side_effect=Exception('mocked error'))
def test_logging_middleware(m1, client_with_auth):
from web.utils.metrics import EXCEPTION_COUNTER
from app.web.utils.metrics import EXCEPTION_COUNTER
assert len(EXCEPTION_COUNTER.collect()[0].samples) == 0
with pytest.raises(Exception, match="mocked error"):
client_with_auth.delete("/url/123")
@@ -36,7 +36,7 @@ def test_serve_local_archive_logic(get_settings):
try:
# modify the settings
get_settings.SERVE_LOCAL_ARCHIVE = "/app/local_archive_test"
from web.main import app_factory
from app.web.main import app_factory
app = app_factory(get_settings)
# test

View File

@@ -8,7 +8,7 @@ from app.shared.config import ALLOW_ANY_EMAIL
def test_secure_compare():
from web.security import secure_compare
from app.web.security import secure_compare
assert secure_compare("test", "test")
assert not secure_compare("test", "test2")
@@ -16,14 +16,14 @@ def test_secure_compare():
@pytest.mark.asyncio
async def test_get_token_or_user_auth_with_api():
from web.security import get_token_or_user_auth
from app.web.security import get_token_or_user_auth
mock_api = HTTPAuthorizationCredentials(scheme="lorem", credentials="this_is_the_test_api_token")
assert await get_token_or_user_auth(mock_api) == ALLOW_ANY_EMAIL
@pytest.mark.asyncio
async def test_get_token_or_user_auth_with_user():
from web.security import get_token_or_user_auth
from app.web.security import get_token_or_user_auth
bad_user = HTTPAuthorizationCredentials(scheme="ipsum", credentials="invalid")
e: pytest.ExceptionInfo = None
with pytest.raises(HTTPException) as e:
@@ -32,18 +32,18 @@ async def test_get_token_or_user_auth_with_user():
assert e.value.detail == "invalid access_token"
@patch("web.security.authenticate_user", return_value=(True, "summer@example.com"))
@patch("app.web.security.authenticate_user", return_value=(True, "summer@example.com"))
@pytest.mark.asyncio
async def test_get_user_auth(m1):
from web.security import get_user_auth
from app.web.security import get_user_auth
good_user = HTTPAuthorizationCredentials(scheme="ipsum", credentials="valid-and-good")
assert await get_user_auth(good_user) == "summer@example.com"
@patch("web.security.secure_compare", return_value=False)
@patch("app.web.security.secure_compare", return_value=False)
@pytest.mark.asyncio
async def test_token_api_key_auth_exception(m1):
from web.security import token_api_key_auth
from app.web.security import token_api_key_auth
e: pytest.ExceptionInfo = None
with pytest.raises(HTTPException) as e:
@@ -54,12 +54,12 @@ async def test_token_api_key_auth_exception(m1):
@pytest.mark.asyncio
async def test_authenticate_user():
from web.security import authenticate_user
from app.web.security import authenticate_user
assert authenticate_user("test") == (False, "invalid access_token")
assert authenticate_user(123) == (False, "invalid access_token")
with patch("web.security.requests.get") as mock_get:
with patch("app.web.security.requests.get") as mock_get:
# bad response from oauth2
mock_get.return_value.status_code = 403
assert authenticate_user("this-will-call-requests") == (False, "invalid token")
@@ -100,9 +100,9 @@ async def test_authenticate_user():
@pytest.mark.asyncio
async def test_authenticate_user_exception():
from web.security import authenticate_user
from app.web.security import authenticate_user
with patch("web.security.requests.get") as mock_get:
with patch("app.web.security.requests.get") as mock_get:
mock_get.return_value.status_code = 200
mock_get.return_value.json.side_effect = Exception("mocked error")
assert authenticate_user("this-will-call-requests") == (False, "exception occurred")

View File

@@ -16,12 +16,12 @@ class Test_create_archive_task():
URL = "https://example-live.com"
archive = schemas.ArchiveCreate(url=URL, tags=["tag-celery"], public=True, author_id="rick@example.com", group_id="interstellar")
@patch("worker.main.insert_result_into_db")
@patch("worker.main.get_store_until", return_value=datetime.now())
@patch("worker.main.load_orchestrator")
@patch("app.worker.main.insert_result_into_db")
@patch("app.worker.main.get_store_until", return_value=datetime.now())
@patch("app.worker.main.load_orchestrator")
@patch("celery.app.task.Task.request")
def test_success(self, m_req, m_load, m_store, m_insert, db_session):
from worker.main import create_archive_task
from app.worker.main import create_archive_task
m_req.id = "this-just-in"
mock_orchestrator = self.mock_orchestrator_choice(m_load)
@@ -38,14 +38,14 @@ class Test_create_archive_task():
assert len(task["media"]) == 0
def test_raise_invalid(self):
from worker.main import create_archive_task
from app.worker.main import create_archive_task
with pytest.raises(Exception):
create_archive_task(self.archive.model_dump_json())
@patch("worker.main.insert_result_into_db", side_effect=Exception)
@patch("worker.main.load_orchestrator")
@patch("app.worker.main.insert_result_into_db", side_effect=Exception)
@patch("app.worker.main.load_orchestrator")
def test_raise_db_error(self, m_load, m_insert):
from worker.main import create_archive_task
from app.worker.main import create_archive_task
mock_orchestrator = self.mock_orchestrator_choice(m_load)
with pytest.raises(Exception):
@@ -53,10 +53,10 @@ class Test_create_archive_task():
mock_orchestrator.feed_item.assert_called_once()
@patch("worker.main.insert_result_into_db", return_value=None)
@patch("worker.main.load_orchestrator")
@patch("app.worker.main.insert_result_into_db", return_value=None)
@patch("app.worker.main.load_orchestrator")
def test_raise_empty_result(self, m_load, m_insert):
from worker.main import create_archive_task
from app.worker.main import create_archive_task
mock_orchestrator = self.mock_orchestrator_choice(m_load)
with pytest.raises(Exception) as e:
@@ -75,11 +75,11 @@ class Test_create_sheet_task():
URL = "https://example-live.com"
sheet = schemas.SubmitSheet(sheet_id="123", author_id="rick@example.com", group_id="interstellar", tags=["spaceship"])
@patch("worker.main.models.generate_uuid", return_value="constant-uuid")
@patch("worker.main.get_store_until", return_value=datetime.now())
@patch("worker.main.load_orchestrator")
@patch("app.worker.main.models.generate_uuid", return_value="constant-uuid")
@patch("app.worker.main.get_store_until", return_value=datetime.now())
@patch("app.worker.main.load_orchestrator")
def test_success(self, m_load, m_store, m_uuid, db_session):
from worker.main import create_sheet_task
from app.worker.main import create_sheet_task
assert db_session.query(models.Archive).filter(models.Archive.url == self.URL).count() == 0
@@ -116,7 +116,7 @@ class Test_create_sheet_task():
def test_get_all_urls(db_session):
from worker.main import get_all_urls
from app.worker.main import get_all_urls
from auto_archiver import Metadata
meta = Metadata().set_url("https://example.com")

View File

@@ -9,7 +9,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.shared.config import ALLOW_ANY_EMAIL
from app.shared.db.database import get_db
from app.shared.db import models
from app.shared import schemas
from app.shared.settings import get_settings
from app.shared.user_groups import UserGroups
from app.shared.utils.misc import fnv1a_hash_mod
@@ -55,16 +54,6 @@ def search_archives_by_url(db: Session, url: str, email: str, skip: int = 0, lim
def search_archives_by_email(db: Session, email: str, skip: int = 0, limit: int = 100):
return base_query(db).filter(models.Archive.author_id == email).order_by(models.Archive.created_at.desc()).offset(skip).limit(get_limit(limit)).all()
#TODO: rename task to archive
def create_task(db: Session, task: schemas.ArchiveCreate, tags: list[models.Tag], urls: list[models.ArchiveUrl]) -> models.Archive:
db_task = models.Archive(id=task.id, url=task.url, result=task.result, public=task.public, author_id=task.author_id, group_id=task.group_id, sheet_id=task.sheet_id, store_until=task.store_until)
db_task.tags = tags
db_task.urls = urls
db.add(db_task)
db.commit()
db.refresh(db_task)
return db_task
def soft_delete_task(db: Session, task_id: str, email: str) -> bool:
# TODO: implement hard-delete with cronjob that deletes from S3
@@ -113,17 +102,7 @@ async def soft_delete_expired_archives(db: AsyncSession) -> dict:
# --------------- TAG
def create_tag(db: Session, tag: str) -> models.Tag:
db_tag = db.query(models.Tag).filter(models.Tag.id == tag).first()
if not db_tag:
db_tag = models.Tag(id=tag)
db.add(db_tag)
db.commit()
db.refresh(db_tag)
return db_tag
def is_user_in_group(db: Session, email: str, group_name: str) -> models.Group:
def is_user_in_group(email: str, group_name: str) -> models.Group:
if email == ALLOW_ANY_EMAIL: return True
return len(group_name) and len(email) and group_name in get_user_groups(email)
@@ -150,21 +129,6 @@ def get_user_groups(email: str) -> list[str]:
# --------------- INIT User-Groups
def get_group(db: Session, group_name: str) -> models.Group:
return db.query(models.Group).filter(models.Group.id == group_name).first()
def create_or_get_user(db: Session, author_id: str) -> models.User:
if type(author_id) == str: author_id = author_id.lower()
db_user = db.query(models.User).filter(models.User.email == author_id).first()
if not db_user:
db_user = models.User(email=author_id)
db.add(db_user)
db.commit()
db.refresh(db_user)
return db_user
def upsert_group(db: Session, group_name: str, description: str, orchestrator: str, orchestrator_sheet: str, permissions: dict, domains: list) -> models.Group:
db_group = db.query(models.Group).filter(models.Group.id == group_name).first()
if db_group is None:
@@ -285,14 +249,6 @@ async def delete_stale_sheets(db: AsyncSession, inactivity_days: int) -> dict:
await db.commit()
return dict(deleted)
def update_sheet_last_url_archived_at(db: Session, sheet_id: str):
db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id).first()
if db_sheet:
db_sheet.last_url_archived_at = datetime.now()
db.commit()
return True
return False
def delete_sheet(db: Session, sheet_id: str, email: str) -> bool:
db_sheet = db.query(models.Sheet).filter(models.Sheet.id == sheet_id, models.Sheet.author_id == email).first()
@@ -300,15 +256,3 @@ def delete_sheet(db: Session, sheet_id: str, email: str) -> bool:
db.delete(db_sheet)
db.commit()
return db_sheet is not None
#--- Celery worker tasks
def store_archived_url(db: Session, archive: schemas.ArchiveCreate) -> models.Archive:
# create and load user, tags, if needed
create_or_get_user(db, archive.author_id)
db_tags = [create_tag(db, tag) for tag in archive.tags]
# insert everything
db_task = create_task(db, task=archive, tags=db_tags, urls=archive.urls)
return db_task

View File

@@ -5,9 +5,10 @@ from sqlalchemy.orm import Session
from sqlalchemy import func
from datetime import datetime
from app.shared.db import crud, models
from app.shared.db import models
from app.shared.user_groups import GroupInfo, GroupPermissions
from app.shared.schemas import Usage, UsageResponse
from app.web.db import crud
class UserState:
"""

View File

@@ -5,7 +5,7 @@ from fastapi.responses import FileResponse, JSONResponse
from app.shared.config import VERSION, BREAKING_CHANGES
from app.shared.log import log_error
from app.shared.db import crud
from app.web.db import crud
from app.shared.schemas import ActiveUser, UsageResponse
from app.web.db.user_state import UserState
from app.web.security import get_user_auth, bearer_security, get_user_state
@@ -56,4 +56,4 @@ def get_user_usage(
@default_router.get('/favicon.ico', include_in_schema=False)
async def favicon() -> FileResponse:
return FileResponse("web/static/favicon.ico")
return FileResponse("app/web/static/favicon.ico")

View File

@@ -9,7 +9,7 @@ from sqlalchemy.orm import Session
from app.shared.aa_utils import get_all_urls
from app.shared.config import ALLOW_ANY_EMAIL
from app.shared import business_logic, schemas
from app.shared.db import crud
from app.shared.db import worker_crud
from app.shared.db.database import get_db_dependency
from app.web.security import token_api_key_auth
from app.shared.db import models
@@ -26,10 +26,20 @@ def submit_manual_archive(
auth=Depends(token_api_key_auth),
db: Session = Depends(get_db_dependency)
):
result: Metadata = Metadata.from_json(manual.result)
try:
result: Metadata = Metadata.from_json(manual.result)
except json.JSONDecodeError as e:
log_error(e)
raise HTTPException(status_code=422, detail="Invalid JSON in result field.")
manual.author_id = manual.author_id or ALLOW_ANY_EMAIL
manual.tags.add("manual")
try:
store_until=business_logic.get_store_archive_until(db, manual.group_id)
except AssertionError as e:
log_error(e)
raise HTTPException(status_code=422, detail=str(e))
try:
archive = schemas.ArchiveCreate(
author_id=manual.author_id,
@@ -40,10 +50,10 @@ def submit_manual_archive(
id=models.generate_uuid(),
result=json.loads(result.to_json()),
urls=get_all_urls(result),
store_until=business_logic.get_store_archive_until(db, manual.group_id),
store_until=store_until,
)
db_archive = crud.store_archived_url(db, archive)
db_archive = worker_crud.store_archived_url(db, archive)
logger.debug(f"[MANUAL ARCHIVE STORED] {db_archive.author_id} {db_archive.url}")
return JSONResponse({"id": db_archive.id}, status_code=201)
except sqlalchemy.exc.IntegrityError as e:

View File

@@ -9,7 +9,7 @@ from app.web.db.user_state import UserState
from app.shared import schemas
from app.shared.task_messaging import get_celery
from app.web.security import get_user_state
from app.shared.db import crud
from app.web.db import crud
from app.shared.db.database import get_db_dependency
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])

View File

@@ -9,7 +9,7 @@ from app.shared.config import ALLOW_ANY_EMAIL
from app.shared import schemas
from app.shared.task_messaging import get_celery
from app.web.security import get_token_or_user_auth, get_user_state
from app.shared.db import crud
from app.web.db import crud
from app.web.db.user_state import UserState
from app.shared.db.database import get_db_dependency

View File

@@ -9,11 +9,12 @@ from fastapi_utils.tasks import repeat_every
from loguru import logger
from fastapi_mail import FastMail, MessageSchema, MessageType
from app.shared.db import crud, models
from app.shared.db import models
from app.shared.db.database import get_db, get_db_async, make_engine, wal_checkpoint
from app.shared import schemas
from app.shared.settings import get_settings
from app.shared.task_messaging import get_celery
from app.web.db import crud
from app.web.utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
celery = get_celery()
@@ -24,12 +25,9 @@ async def lifespan(app: FastAPI):
# see https://fastapi.tiangolo.com/advanced/events/#lifespan
# STARTUP
logger.debug("HERE 00")
engine = make_engine(get_settings().DATABASE_PATH)
models.Base.metadata.create_all(bind=engine)
logger.debug("HERE 01")
alembic.config.main(prog="alembic", argv=['--raiseerr', 'upgrade', 'head'])
logger.debug("HERE 02")
logging.getLogger("uvicorn.access").disabled = True # loguru
asyncio.create_task(redis_subscribe_worker_exceptions(get_settings().REDIS_EXCEPTIONS_CHANNEL))
asyncio.create_task(repeat_measure_regular_metrics())

View File

@@ -15,7 +15,7 @@ from app.web.middleware import logging_middleware
from app.shared import schemas
from app.shared.task_messaging import get_celery
from app.shared.db import crud
from app.web.db import crud
from app.web.security import get_user_auth, token_api_key_auth, get_token_or_user_auth
from app.shared.config import VERSION, API_DESCRIPTION
from app.shared.db.database import get_db_dependency
@@ -141,7 +141,8 @@ def app_factory(settings = get_settings()):
def archive_sheet(sheet: schemas.SubmitSheet, email=Depends(get_user_auth), db: Session = Depends(get_db_dependency)):
logger.info(f"SHEET TASK for {sheet=}")
sheet.author_id = email
if not crud.is_user_in_group(db, email, sheet.group_id):
#NB: no longer working
if not crud.is_user_in_group(email, sheet.group_id):
raise HTTPException(status_code=403, detail="User does not have access to this group.")
task = celery.signature("create_sheet_task", args=[sheet.model_dump_json()]).delay()
return JSONResponse({"id": task.id})

View File

@@ -2,6 +2,7 @@
from loguru import logger
from fastapi import Request
from app.shared.log import log_error
from app.web.utils.metrics import EXCEPTION_COUNTER
async def logging_middleware(request: Request, call_next):
@@ -10,7 +11,6 @@ async def logging_middleware(request: Request, call_next):
logger.info(f"{request.client.host}:{request.client.port} {request.method} {request.url._url} - HTTP {response.status_code}")
return response
except Exception as e:
from web.utils.metrics import EXCEPTION_COUNTER
EXCEPTION_COUNTER.labels(type=e.__class__.__name__).inc()
logger.info(f"{request.client.host}:{request.client.port} {request.method} {request.url._url} - {e.__class__.__name__} {e}")
log_error(e)

View File

@@ -4,7 +4,7 @@ import os
import shutil
from prometheus_client import Counter, Gauge
from app.shared.db import crud
from app.web.db import crud
from app.shared.db.database import get_db
from app.shared.log import log_error
from app.shared.task_messaging import get_redis

View File

@@ -7,13 +7,14 @@ from sqlalchemy import exc
from auto_archiver import Config, ArchivingOrchestrator, Metadata
from app.shared.db import crud, models
from app.shared.db import models
from app.shared.db.database import get_db
from app.shared import business_logic, schemas
from app.shared.task_messaging import get_celery, get_redis
from app.shared.settings import get_settings
from app.shared.log import log_error
from app.shared.aa_utils import get_all_urls
from app.shared.db import worker_crud
settings = get_settings()
@@ -79,7 +80,7 @@ def create_sheet_task(self, sheet_json: str):
if stats["archived"] > 0:
with get_db() as session:
crud.update_sheet_last_url_archived_at(session, sheet.sheet_id)
worker_crud.update_sheet_last_url_archived_at(session, sheet.sheet_id)
logger.info(f"SHEET DONE {sheet=}")
# TODO: is this used anywhere? maybe drop it
@@ -88,11 +89,11 @@ def create_sheet_task(self, sheet_json: str):
def load_orchestrator(group_id: str, orchestrator_for_sheet: bool = False, overwrite_configs: dict = {}) -> ArchivingOrchestrator:
with get_db() as session:
group = crud.get_group(session, group_id)
group = worker_crud.get_group(session, group_id)
if orchestrator_for_sheet:
orchestrator_fn = group.orchestrator_sheet
else:
orchestrator_fn = crud.get_group(session, group_id).orchestrator
orchestrator_fn = worker_crud.get_group(session, group_id).orchestrator
assert orchestrator_fn, f"no orchestrator found for {group_id}"
@@ -103,7 +104,7 @@ def load_orchestrator(group_id: str, orchestrator_for_sheet: bool = False, overw
def insert_result_into_db(archive: schemas.ArchiveCreate) -> str:
with get_db() as session:
db_task = crud.store_archived_url(session, archive)
db_task = worker_crud.store_archived_url(session, archive)
logger.debug(f"[ARCHIVE STORED] {db_task.author_id} {db_task.url}")
return db_task.id