Add pre-commit with GiHub Action (#56)

* Update pyproject.toml

* add pre-commit

* Create .pre-commit-config.yaml

* Comment out ruff

* Update .pre-commit-config.yaml

* General formatting

* Create format-and-fail.yml

* Update ci.yml

* Add pre-commit to dev dependencies

* Update pyproject.toml
This commit is contained in:
Michael Plunkett
2025-02-26 10:42:28 -06:00
committed by GitHub
parent d9f36957cd
commit 7e48f706df
68 changed files with 473 additions and 182 deletions

View File

@@ -1,11 +1,11 @@
from logging.config import fileConfig
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
from sqlalchemy import engine_from_config, pool
from app.shared.settings import get_settings
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

View File

@@ -5,8 +5,8 @@ Revises: 1636724ec4b1
Create Date: 2025-02-08 15:22:20.392522
"""
from alembic import op
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,8 +5,8 @@ Revises: a23aaf3ae930
Create Date: 2025-02-05 19:19:01.984396
"""
from alembic import op
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,8 +5,8 @@ Revises: 02b2f6d17ed0
Create Date: 2025-02-11 21:53:23.293274
"""
from alembic import op
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,8 +5,8 @@ Revises: fa012ec405b8
Create Date: 2024-11-04 11:12:30.237299
"""
from alembic import op
import sqlalchemy as sa
from alembic import op
from sqlalchemy.engine.reflection import Inspector

View File

@@ -1,12 +1,13 @@
"""modify archive url to have uuid id instead of url unique constraint
Revision ID: 9369a264945b
Revises:
Revises:
Create Date: 2023-12-20 17:24:59.320691
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = '9369a264945b'
down_revision = None

View File

@@ -5,8 +5,8 @@ Revises: 89121d2c96d8
Create Date: 2025-02-04 12:19:20.753570
"""
from alembic import op
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.

View File

@@ -5,8 +5,8 @@ Revises: 93a611e4c066
Create Date: 2024-10-31 09:36:50.360710
"""
from alembic import op
import sqlalchemy as sa
from alembic import op
from sqlalchemy.engine.reflection import Inspector

View File

@@ -1,11 +1,13 @@
# TODO: code in this file should eventually be moved to the auto-archiver code base
from typing import List
from loguru import logger
from auto_archiver.core import Media, Metadata
from loguru import logger
from app.shared.db import models
def get_all_urls(result: Metadata) -> List[models.ArchiveUrl]:
db_urls = []
for m in result.media:
@@ -29,4 +31,3 @@ def convert_if_media(media):
except Exception as e:
logger.debug(f"error parsing {media} : {e}")
return False

View File

@@ -2,6 +2,7 @@
import datetime
from sqlalchemy.orm import Session
from app.shared.db import worker_crud

View File

@@ -1,8 +1,14 @@
from functools import lru_cache
from sqlalchemy import Engine, create_engine, event, text
from sqlalchemy.orm import sessionmaker
from contextlib import asynccontextmanager, contextmanager
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, AsyncEngine, async_sessionmaker
from functools import lru_cache
from sqlalchemy import Engine, create_engine, event, text
from sqlalchemy.ext.asyncio import (
AsyncEngine,
AsyncSession,
async_sessionmaker,
create_async_engine,
)
from sqlalchemy.orm import sessionmaker
from app.shared.settings import get_settings

View File

@@ -1,8 +1,17 @@
from sqlalchemy import Column, String, JSON, DateTime, Boolean, Table, ForeignKey
from sqlalchemy.sql import func
from sqlalchemy.orm import relationship, declarative_base
import uuid
from sqlalchemy import (
JSON,
Boolean,
Column,
DateTime,
ForeignKey,
String,
Table,
)
from sqlalchemy.orm import declarative_base, relationship
from sqlalchemy.sql import func
Base = declarative_base()

View File

@@ -1,8 +1,10 @@
from sqlalchemy.orm import Session
from datetime import datetime
from app.shared.db import models
from sqlalchemy.orm import Session
from app.shared import schemas
from app.shared.db import models
# TODO: isolate database operations away from worker and into WEB
# ONLY WORKER

View File

@@ -1,4 +1,5 @@
import traceback
from loguru import logger
@@ -6,7 +7,7 @@ from loguru import logger
logger.add("logs/api_logs.log", retention="30 days")
logger.add("logs/error_logs.log", retention="30 days", level="ERROR")
def log_error(e: Exception, traceback_str: str = None, extra:str = ""):
if not traceback_str: traceback_str = traceback.format_exc()
if extra: extra = f"{extra}\n"

View File

@@ -1,7 +1,8 @@
from datetime import datetime
from typing import Annotated
from annotated_types import Len
from pydantic import BaseModel
from datetime import datetime
class SubmitSheet(BaseModel):

View File

@@ -1,14 +1,15 @@
from functools import lru_cache
import os
from functools import lru_cache
from typing import Annotated, Set
from annotated_types import Len
from fastapi_mail import ConnectionConfig
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Annotated, Set
from annotated_types import Len
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=os.environ.get("ENVIRONMENT_FILE") , env_file_encoding='utf-8', extra='ignore', str_strip_whitespace=True)
# general
@@ -37,14 +38,14 @@ class Settings(BaseSettings):
if self.REDIS_PASSWORD:
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
return f"redis://{self.REDIS_HOSTNAME}:6379"
# cronjobs
CRON_ARCHIVE_SHEETS: bool = False
CRON_DELETE_STALE_SHEETS: bool = False
DELETE_STALE_SHEETS_DAYS: int = 14
CRON_DELETE_SCHEDULED_ARCHIVES: bool = False
DELETE_SCHEDULED_ARCHIVES_CHECK_EVERY_N_DAYS: int = 7
# observability
REPEAT_COUNT_METRICS_SECONDS: int = 30
@@ -73,4 +74,4 @@ class Settings(BaseSettings):
@lru_cache
def get_settings():
return Settings()
return Settings()

View File

@@ -1,8 +1,9 @@
from functools import lru_cache
from celery import Celery
import redis
from celery import Celery
import redis
from app.shared.settings import get_settings

View File

@@ -1,9 +1,16 @@
import json
import os
from typing import Dict, List, Set
import yaml
from loguru import logger
from pydantic import BaseModel, computed_field, field_validator, Field, model_validator
from typing import Dict, List, Set
from pydantic import (
BaseModel,
Field,
computed_field,
field_validator,
model_validator,
)
from typing_extensions import Self

View File

@@ -7,4 +7,4 @@ def fnv1a_hash_mod(s: str, modulo:int) -> int:
hash ^= ord(char)
hash *= fnv_prime
hash &= 0xFFFFFFFF # Keep it 32-bit
return (hash if hash < 0x80000000 else hash - 0x100000000) % modulo
return (hash if hash < 0x80000000 else hash - 0x100000000) % modulo

View File

@@ -1,12 +1,14 @@
import os
from typing import AsyncGenerator
from fastapi.testclient import TestClient
import pytest
from unittest.mock import patch
import pytest
import pytest_asyncio
from sqlalchemy.ext.asyncio import AsyncSession, AsyncEngine
from app.web.config import ALLOW_ANY_EMAIL
from fastapi.testclient import TestClient
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession
from app.shared.settings import Settings
from app.web.config import ALLOW_ANY_EMAIL
from app.web.db.user_state import UserState
@@ -65,10 +67,11 @@ def db_session(test_db):
@pytest_asyncio.fixture()
async def async_test_db(get_settings: Settings):
import asyncio
from app.shared.db import models
from app.shared.db.database import make_async_engine
from app.web.db.crud import get_user_group_names
import asyncio
get_user_group_names.cache_clear()
engine = await make_async_engine(get_settings.ASYNC_DATABASE_PATH)
@@ -108,8 +111,8 @@ async def async_db_session(async_test_db: AsyncEngine) -> AsyncGenerator[AsyncSe
@pytest.fixture()
def app(db_session):
from app.web.main import app_factory
from app.web.db import crud
from app.web.main import app_factory
app = app_factory()
crud.upsert_user_groups(db_session)
return app
@@ -123,7 +126,11 @@ def client(app):
@pytest.fixture()
def app_with_auth(app, db_session):
from app.web.security import get_token_or_user_auth, get_user_auth, get_user_state
from app.web.security import (
get_token_or_user_auth,
get_user_auth,
get_user_state,
)
app.dependency_overrides[get_token_or_user_auth] = lambda: "rick@example.com"
app.dependency_overrides[get_user_auth] = lambda: "morty@example.com"
app.dependency_overrides[get_user_state] = lambda: UserState(db_session, "MORTY@example.com")
@@ -138,7 +145,7 @@ def client_with_auth(app_with_auth):
@pytest.fixture()
def app_with_token(app):
from app.web.security import token_api_key_auth, get_token_or_user_auth
from app.web.security import get_token_or_user_auth, token_api_key_auth
app.dependency_overrides[token_api_key_auth] = lambda: ALLOW_ANY_EMAIL
app.dependency_overrides[get_token_or_user_auth] = lambda: ALLOW_ANY_EMAIL
return app

View File

@@ -1,3 +1,3 @@
{
"client_email": "fake_service_account@fake_service_account.iam.gserviceaccount.com"
}
"client_email": "fake_service_account@fake_service_account.iam.gserviceaccount.com"
}

View File

@@ -15,7 +15,7 @@ configurations:
gsheet_feeder:
service_account: "app/tests/fake_service_account.json"
cli_feeder:
urls:
urls:
- "url1"
hash_enricher:
algorithm: "SHA-256"

View File

@@ -3,4 +3,4 @@ def test_generate_uuid():
assert generate_uuid() != generate_uuid()
assert len(generate_uuid()) == 36
assert generate_uuid().count("-") == 4
assert generate_uuid().count("-") == 4

View File

@@ -1,10 +1,9 @@
from app.shared.db import models
from app.shared.db import worker_crud, models
from datetime import datetime
from app.shared.db import models, worker_crud
from app.tests.web.db.test_crud import test_data
def test_update_sheet_last_url_archived_at(db_session):
# Create test sheet
@@ -19,7 +18,7 @@ def test_update_sheet_last_url_archived_at(db_session):
db_session.refresh(test_sheet)
assert isinstance(test_sheet.last_url_archived_at, datetime)
assert test_sheet.last_url_archived_at > before
# Test non-existent sheet
assert worker_crud.update_sheet_last_url_archived_at(db_session, "non-existent-sheet") is False
@@ -73,8 +72,8 @@ def test_create_tag(db_session):
def test_create_task(db_session):
from app.shared.db import worker_crud
from app.shared import schemas
from app.shared.db import worker_crud
task = schemas.ArchiveCreate(
id="archive-id-456-101",
@@ -114,4 +113,4 @@ def test_create_task(db_session):
assert nt.group_id == "spaceship"
assert len(nt.tags) == 0
assert len(nt.urls) == 0
assert nt.created_at is not None
assert nt.created_at is not None

View File

@@ -1,7 +1,12 @@
from datetime import datetime, timedelta
from unittest.mock import MagicMock, patch
import pytest
from app.shared.business_logic import get_store_archive_until, get_store_archive_until_or_never
from app.shared.business_logic import (
get_store_archive_until,
get_store_archive_until_or_never,
)
class Test_get_store_archive_until:

View File

@@ -11,7 +11,7 @@ def test_fnv1a_hash_mod():
# Test different modulos
hash1 = fnv1a_hash_mod("test", 5)
hash2 = fnv1a_hash_mod("test", 10)
hash2 = fnv1a_hash_mod("test", 10)
assert 0 <= hash1 < 5
assert 0 <= hash2 < 10
@@ -28,4 +28,4 @@ def test_fnv1a_hash_mod():
assert 0 <= fnv1a_hash_mod("测试", 10) < 10
# Test modulo = 1 edge case
assert fnv1a_hash_mod("test", 1) == 0
assert fnv1a_hash_mod("test", 1) == 0

View File

@@ -3,4 +3,4 @@ This is just an invalid yaml for testing
still broken: True
- one
- two
- two

View File

@@ -84,4 +84,4 @@ groups:
# max_archive_lifespan_months: 12
max_monthly_urls: 1
# max_monthly_mbs: 50
priority: "low"
priority: "low"

View File

@@ -3,10 +3,12 @@ from unittest.mock import patch
import pytest
import yaml
from app.shared.db import models
from app.shared.settings import Settings
from app.web.db import crud
authors = ["rick@example.com", "morty@example.com", "jerry@example.com"]
@@ -373,6 +375,7 @@ async def test_get_sheets_by_id_hash(async_db_session):
@pytest.mark.asyncio
async def test_delete_stale_sheets(async_db_session):
from datetime import datetime, timedelta
from sqlalchemy.sql import select
now = datetime.now()
@@ -435,4 +438,4 @@ async def test_delete_stale_sheets(async_db_session):
# Running again should not delete anything
deleted = await crud.delete_stale_sheets(async_db_session, 7)
assert len(deleted) == 0
assert len(deleted) == 0

View File

@@ -1,5 +1,6 @@
from unittest.mock import MagicMock, PropertyMock, patch
import pytest
from app.shared.db import models

View File

@@ -1,10 +1,12 @@
from unittest.mock import MagicMock
from fastapi.testclient import TestClient
import pytest
from fastapi.testclient import TestClient
from app.shared.schemas import Usage, UsageResponse
from app.shared.user_groups import GroupInfo
from app.web.config import VERSION
from app.tests.web.db.test_crud import test_data
from app.web.config import VERSION
def test_endpoint_home(client_with_auth):

View File

@@ -1,5 +1,5 @@
from datetime import datetime
import json
from datetime import datetime
from unittest.mock import MagicMock, patch
from app.shared.db import models

View File

@@ -1,5 +1,5 @@
from datetime import datetime
import json
from datetime import datetime
from unittest.mock import MagicMock, patch
from fastapi.testclient import TestClient
@@ -45,8 +45,8 @@ def test_create_sheet_endpoint(app_with_auth, db_session):
assert response.json() == {"detail": "User does not have access to this group."}
# switch to jerry who's got less quota/permissions
from app.web.security import get_user_state
from app.web.db.user_state import UserState
from app.web.security import get_user_state
app_with_auth.dependency_overrides[get_user_state] = lambda: UserState(db_session, "jerry@example.com")
client_jerry = TestClient(app_with_auth)

View File

@@ -1,10 +1,10 @@
import os
from unittest.mock import patch
from fastapi.testclient import TestClient
import shutil
from unittest.mock import patch
import pytest
from fastapi.testclient import TestClient
def test_lifespan(app):
with TestClient(app) as client:
@@ -25,7 +25,7 @@ def test_logging_middleware(m1, client_with_auth):
client_with_auth.delete("/url/123")
# creates one empty and one from above
assert len(EXCEPTION_COUNTER.collect()[0].samples) == 2
def test_serve_local_archive_logic(get_settings):
# create a test file first
@@ -38,7 +38,7 @@ def test_serve_local_archive_logic(get_settings):
get_settings.SERVE_LOCAL_ARCHIVE = "/app/local_archive_test"
from app.web.main import app_factory
app = app_factory(get_settings)
# test
client = TestClient(app)
r = client.get("/app/local_archive_test/temp.txt")

View File

@@ -1,8 +1,8 @@
from unittest.mock import Mock, patch
import pytest
from fastapi import HTTPException
from fastapi.security import HTTPAuthorizationCredentials
import pytest
from app.web.config import ALLOW_ANY_EMAIL
@@ -108,8 +108,8 @@ async def test_authenticate_user_exception():
def test_get_user_state():
from app.web.security import get_user_state
from app.web.db.user_state import UserState
from app.web.security import get_user_state
mock_session = Mock()
test_email = "test@example.com"

View File

@@ -1,13 +1,12 @@
from datetime import datetime
from unittest.mock import patch
import pytest
from app.shared.db import models
from app.shared import schemas
from auto_archiver.core import Media, Metadata
from app.shared import schemas
from app.shared.db import models
class Test_create_archive_task():
URL = "https://example-live.com"

View File

@@ -1,3 +1,4 @@
from app.web.main import app_factory
app = app_factory
app = app_factory

View File

@@ -5,7 +5,7 @@ API_DESCRIPTION = """
**Usage notes:**
- The API requires a Bearer token for most operations, which you can obtain by logging in with your Google account.
- You can use this API to archive single URLs or entire Google Sheets.
- You can use this API to archive single URLs or entire Google Sheets.
- Once you submit a URL or Sheet for archiving, the API will return a task_id that you can use to check the status of the archiving process. It works asynchronously.
"""
BREAKING_CHANGES = {"minVersion": "0.4.0", "message": "The latest update has breaking changes, please update the extension to the most recent version."}

View File

@@ -1,18 +1,19 @@
from collections import defaultdict
from functools import lru_cache
from sqlalchemy.orm import Session, load_only
from sqlalchemy import Column, or_, func, select
from loguru import logger
from datetime import datetime, timedelta
from sqlalchemy.ext.asyncio import AsyncSession
from functools import lru_cache
from cachetools import LRUCache, cached
from cachetools.keys import hashkey
from loguru import logger
from sqlalchemy import Column, func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session, load_only
from app.web.config import ALLOW_ANY_EMAIL
from app.shared.db import models
from app.shared.settings import get_settings
from app.shared.user_groups import UserGroups
from app.shared.utils.misc import fnv1a_hash_mod
from app.web.config import ALLOW_ANY_EMAIL
from app.web.utils.misc import convert_priority_to_queue_dict
@@ -117,7 +118,7 @@ async def get_group_priority_async(db: AsyncSession, group_id: str) -> dict:
@cached(cache=LRUCache(maxsize=128), key=lambda db, email: hashkey(email))
def get_user_group_names(db: Session, email: str) -> list[str]:
"""
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
"""
# TODO: the read: [group1, group2] permissions don't currently work
if not email or not len(email) or "@" not in email: return []
@@ -173,7 +174,7 @@ def upsert_user_groups(db: Session):
def display_email_pii(email: str):
return f"'{email[0:3]}...@{email.split('@')[1]}'"
"""
reads the user_groups yaml file and inserts any new users, groups,
reads the user_groups yaml file and inserts any new users, groups,
along with new participation of users in groups
"""
filename = get_settings().USER_GROUPS_FILENAME
@@ -192,6 +193,7 @@ def upsert_user_groups(db: Session):
for group in explicit_groups:
group_domains[group].add(domain)
import json
# upsert groups and save a map of groupid -> dbobject
for group_id, g in ug.groups.items():
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, g.service_account_email, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))

View File

@@ -1,13 +1,14 @@
from typing import Dict, Set
import sqlalchemy
from sqlalchemy.orm import Session
from sqlalchemy import func
from datetime import datetime
from typing import Dict, Set
import sqlalchemy
from sqlalchemy import func
from sqlalchemy.orm import Session
from app.shared.db import models
from app.shared.user_groups import GroupInfo, GroupPermissions
from app.shared.schemas import Usage, UsageResponse
from app.shared.user_groups import GroupInfo, GroupPermissions
from app.web.db import crud
from app.web.utils.misc import convert_priority_to_queue_dict

View File

@@ -1,13 +1,15 @@
from typing import Dict
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse, JSONResponse
from app.web.config import VERSION, BREAKING_CHANGES
from app.shared.schemas import ActiveUser, UsageResponse
from app.shared.user_groups import GroupInfo
from app.web.config import BREAKING_CHANGES, VERSION
from app.web.db.user_state import UserState
from app.web.security import get_user_state
from app.shared.user_groups import GroupInfo
default_router = APIRouter()
@@ -42,7 +44,7 @@ def get_user_usage(
if not user.active:
raise HTTPException(status_code=403, detail="User is not active.")
return user.usage()
@default_router.get('/favicon.ico', include_in_schema=False)

View File

@@ -1,19 +1,19 @@
import json
import sqlalchemy
from auto_archiver.core import Metadata
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import JSONResponse
from loguru import logger
import sqlalchemy
from auto_archiver.core import Metadata
from sqlalchemy.orm import Session
from app.shared.aa_utils import get_all_urls
from app.web.config import ALLOW_ANY_EMAIL
from app.shared import business_logic, schemas
from app.shared.db import worker_crud
from app.shared.aa_utils import get_all_urls
from app.shared.db import models, worker_crud
from app.shared.db.database import get_db_dependency
from app.web.security import token_api_key_auth
from app.shared.db import models
from app.shared.log import log_error
from app.web.config import ALLOW_ANY_EMAIL
from app.web.security import token_api_key_auth
interoperability_router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])

View File

@@ -1,16 +1,16 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import JSONResponse
from sqlalchemy import exc
from sqlalchemy.orm import Session
from app.web.db.user_state import UserState
from app.shared import schemas
from app.shared.task_messaging import get_celery
from app.web.security import get_user_state
from app.web.db import crud
from app.shared.db.database import get_db_dependency
from app.shared.task_messaging import get_celery
from app.web.db import crud
from app.web.db.user_state import UserState
from app.web.security import get_user_state
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
@@ -78,4 +78,4 @@ def archive_user_sheet(
group_queue = user.priority_group(sheet.group_id)
task = celery.signature("create_sheet_task", args=[schemas.SubmitSheet(sheet_id=id, author_id=user.email, group_id=sheet.group_id).model_dump_json()]).apply_async(**group_queue)
return JSONResponse({"id": task.id}, status_code=201)
return JSONResponse({"id": task.id}, status_code=201)

View File

@@ -3,10 +3,10 @@ from fastapi import APIRouter, Depends
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from app.shared.task_messaging import get_celery
from app.web.security import get_token_or_user_auth
from app.shared import schemas
from app.shared.log import log_error
from app.shared.task_messaging import get_celery
from app.web.security import get_token_or_user_auth
from app.web.utils.misc import custom_jsonable_encoder

View File

@@ -1,22 +1,22 @@
from datetime import datetime
from urllib.parse import urlparse
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import JSONResponse
from datetime import datetime
from loguru import logger
from sqlalchemy.orm import Session
from app.web.config import ALLOW_ANY_EMAIL
from app.shared import schemas
from app.shared.db.database import get_db_dependency
from app.shared.task_messaging import get_celery
from app.web.security import get_token_or_user_auth, get_user_state
from app.web.config import ALLOW_ANY_EMAIL
from app.web.db import crud
from app.web.db.user_state import UserState
from app.shared.db.database import get_db_dependency
from urllib.parse import urlparse
from app.web.security import get_token_or_user_auth, get_user_state
from app.web.utils.misc import convert_priority_to_queue_dict
url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
celery = get_celery()
@@ -47,7 +47,7 @@ def archive_url(
else:
archive_create.author_id = archive.author_id or email
group_queue = convert_priority_to_queue_dict("high")
task = celery.signature("create_archive_task", args=[archive_create.model_dump_json()]).apply_async(**group_queue)
task_response = schemas.Task(id=task.id)
@@ -74,8 +74,8 @@ def search_by_url(
@url_router.delete("/{id}", summary="Delete a single URL archive by id.")
def delete_archive(
id:str,
user: UserState = Depends(get_user_state),
id:str,
user: UserState = Depends(get_user_state),
db: Session = Depends(get_db_dependency)
) -> schemas.DeleteResponse:
logger.info(f"deleting url archive task {id} request by {user.email}")

View File

@@ -1,22 +1,32 @@
import asyncio
from collections import defaultdict
import datetime
import logging
from collections import defaultdict
from contextlib import asynccontextmanager
import alembic.config
from fastapi import FastAPI
from contextlib import asynccontextmanager
from fastapi_mail import FastMail, MessageSchema, MessageType
from fastapi_utils.tasks import repeat_every
from loguru import logger
from fastapi_mail import FastMail, MessageSchema, MessageType
from app.shared.db import models
from app.shared.db.database import get_db, get_db_async, make_engine, wal_checkpoint
from app.shared import schemas
from app.shared.db import models
from app.shared.db.database import (
get_db,
get_db_async,
make_engine,
wal_checkpoint,
)
from app.shared.settings import get_settings
from app.shared.task_messaging import get_celery
from app.web.db import crud
from app.web.middleware import increase_exceptions_counter
from app.web.utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
from app.web.utils.metrics import (
measure_regular_metrics,
redis_subscribe_worker_exceptions,
)
celery = get_celery()
@@ -183,4 +193,4 @@ async def delete_stale_sheets():
async def generate_users_export_csv():
#TODO: implement a cronjob that regularly requested user data to a CSV file
# see https://colab.research.google.com/drive/1QDbo3QXHPBdiTuANlA1AWVvN-rqxuCPa?authuser=0#scrollTo=4nPXeSdK8RBT
pass
pass

View File

@@ -1,24 +1,23 @@
import os
from fastapi import FastAPI, Depends
from fastapi.staticfiles import StaticFiles
from fastapi import Depends, FastAPI
from fastapi.middleware.cors import CORSMiddleware
from prometheus_fastapi_instrumentator import Instrumentator
from fastapi.staticfiles import StaticFiles
from loguru import logger
from prometheus_fastapi_instrumentator import Instrumentator
from app.web.middleware import logging_middleware
from app.shared.task_messaging import get_celery
from app.web.security import token_api_key_auth
from app.web.config import VERSION, API_DESCRIPTION
from app.web.events import lifespan
from app.shared.settings import get_settings
from app.shared.task_messaging import get_celery
from app.web.config import API_DESCRIPTION, VERSION
from app.web.endpoints.default import default_router
from app.web.endpoints.url import url_router
from app.web.endpoints.interoperability import interoperability_router
from app.web.endpoints.sheet import sheet_router
from app.web.endpoints.task import task_router
from app.web.endpoints.interoperability import interoperability_router
from app.web.endpoints.url import url_router
from app.web.events import lifespan
from app.web.middleware import logging_middleware
from app.web.security import token_api_key_auth
celery = get_celery()
@@ -57,4 +56,4 @@ def app_factory(settings = get_settings()):
logger.warning(f"MOUNTing local archive, use this in development only {settings.SERVE_LOCAL_ARCHIVE}")
app.mount(settings.SERVE_LOCAL_ARCHIVE, StaticFiles(directory=local_dir), name=settings.SERVE_LOCAL_ARCHIVE)
return app
return app

View File

@@ -1,7 +1,9 @@
import traceback
from loguru import logger
from fastapi import Request
from loguru import logger
from app.shared.log import log_error
from app.web.utils.metrics import EXCEPTION_COUNTER
@@ -25,7 +27,7 @@ async def increase_exceptions_counter(e: Exception, location:str="cronjob"):
last_trace = traceback.extract_tb(e.__traceback__)[-1]
_file, _line, func_name, _text = last_trace
location = func_name
except Exception as e:
except Exception as e:
logger.error(f"Unable to get function name from cronjob exception traceback: {e}")
EXCEPTION_COUNTER.labels(type=e.__class__.__name__, location=location).inc()
log_error(e)
log_error(e)

View File

@@ -1,14 +1,17 @@
import secrets
import requests
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from loguru import logger
import requests, secrets
from fastapi import HTTPException, status, Depends
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from sqlalchemy.orm import Session
from app.web.config import ALLOW_ANY_EMAIL
from app.shared.settings import get_settings
from app.shared.db.database import get_db_dependency
from app.shared.settings import get_settings
from app.web.config import ALLOW_ANY_EMAIL
from app.web.db.user_state import UserState
settings = get_settings()
bearer_security = HTTPBearer()
@@ -80,4 +83,4 @@ def authenticate_user(access_token):
def get_user_state(email:str=Depends(get_user_auth), db:Session=Depends(get_db_dependency)):
return UserState(db, email)
return UserState(db, email)

View File

@@ -2,12 +2,13 @@ import asyncio
import json
import os
import shutil
from prometheus_client import Counter, Gauge
from app.web.db import crud
from app.shared.db.database import get_db
from app.shared.log import log_error
from app.shared.task_messaging import get_redis
from app.web.db import crud
# Custom metrics

View File

@@ -1,4 +1,5 @@
import base64
from fastapi.encoders import jsonable_encoder

View File

@@ -1,21 +1,22 @@
import datetime
import json
import traceback
import traceback, datetime
from auto_archiver.core.orchestrator import ArchivingOrchestrator
from celery.signals import task_failure
from loguru import logger
from sqlalchemy import exc
from auto_archiver.core.orchestrator import ArchivingOrchestrator
from app.shared.db import models
from app.shared.db.database import get_db
from app.shared import business_logic, schemas
from app.shared.task_messaging import get_celery, get_redis
from app.shared.settings import get_settings
from app.shared.log import log_error
from app.shared.aa_utils import get_all_urls
from app.shared.db import worker_crud
from app.shared.db import models, worker_crud
from app.shared.db.database import get_db
from app.shared.log import log_error
from app.shared.settings import get_settings
from app.shared.task_messaging import get_celery, get_redis
from app.worker.worker_log import setup_celery_logger
settings = get_settings()
celery = get_celery("worker")
@@ -26,7 +27,7 @@ USER_GROUPS_FILENAME = settings.USER_GROUPS_FILENAME
setup_celery_logger(celery)
# TODO: these are temporary PATCHES for new aa's functionality
# logger.add("app/worker/worker_log.log", level="DEBUG")
# logger.add("app/worker/worker_log.log", level="DEBUG")
logger.remove = lambda x: print(f"logger.remove({x})")
# TODO: after release, as it requires updating past entries with sheet_id where tag is used, drop tags

View File

@@ -1,9 +1,11 @@
from loguru import logger
from celery import Celery
import sys
from celery import Celery
from loguru import logger
from app.shared.task_messaging import get_celery
celery = get_celery("worker")
def setup_celery_logger(celery):
@@ -22,7 +24,7 @@ def setup_celery_logger(celery):
if message.strip():
logger.info(message.strip())
# Required to prevent issues with buffered output
def flush(self): pass
def flush(self): pass
def isatty(self): return False
sys.stdout = InterceptHandler()