mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-12 13:38:33 +03:00
Add pre-commit with GiHub Action (#56)
* Update pyproject.toml * add pre-commit * Create .pre-commit-config.yaml * Comment out ruff * Update .pre-commit-config.yaml * General formatting * Create format-and-fail.yml * Update ci.yml * Add pre-commit to dev dependencies * Update pyproject.toml
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
from logging.config import fileConfig
|
||||
from sqlalchemy import engine_from_config
|
||||
from sqlalchemy import pool
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: 1636724ec4b1
|
||||
Create Date: 2025-02-08 15:22:20.392522
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: a23aaf3ae930
|
||||
Create Date: 2025-02-05 19:19:01.984396
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: 02b2f6d17ed0
|
||||
Create Date: 2025-02-11 21:53:23.293274
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: fa012ec405b8
|
||||
Create Date: 2024-11-04 11:12:30.237299
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.engine.reflection import Inspector
|
||||
|
||||
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
"""modify archive url to have uuid id instead of url unique constraint
|
||||
|
||||
Revision ID: 9369a264945b
|
||||
Revises:
|
||||
Revises:
|
||||
Create Date: 2023-12-20 17:24:59.320691
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '9369a264945b'
|
||||
down_revision = None
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: 89121d2c96d8
|
||||
Create Date: 2025-02-04 12:19:20.753570
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
||||
@@ -5,8 +5,8 @@ Revises: 93a611e4c066
|
||||
Create Date: 2024-10-31 09:36:50.360710
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.engine.reflection import Inspector
|
||||
|
||||
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# TODO: code in this file should eventually be moved to the auto-archiver code base
|
||||
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
|
||||
from auto_archiver.core import Media, Metadata
|
||||
from loguru import logger
|
||||
|
||||
from app.shared.db import models
|
||||
|
||||
|
||||
def get_all_urls(result: Metadata) -> List[models.ArchiveUrl]:
|
||||
db_urls = []
|
||||
for m in result.media:
|
||||
@@ -29,4 +31,3 @@ def convert_if_media(media):
|
||||
except Exception as e:
|
||||
logger.debug(f"error parsing {media} : {e}")
|
||||
return False
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
|
||||
import datetime
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.db import worker_crud
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
from functools import lru_cache
|
||||
from sqlalchemy import Engine, create_engine, event, text
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, AsyncEngine, async_sessionmaker
|
||||
from functools import lru_cache
|
||||
|
||||
from sqlalchemy import Engine, create_engine, event, text
|
||||
from sqlalchemy.ext.asyncio import (
|
||||
AsyncEngine,
|
||||
AsyncSession,
|
||||
async_sessionmaker,
|
||||
create_async_engine,
|
||||
)
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
@@ -1,8 +1,17 @@
|
||||
from sqlalchemy import Column, String, JSON, DateTime, Boolean, Table, ForeignKey
|
||||
from sqlalchemy.sql import func
|
||||
from sqlalchemy.orm import relationship, declarative_base
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import (
|
||||
JSON,
|
||||
Boolean,
|
||||
Column,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
String,
|
||||
Table,
|
||||
)
|
||||
from sqlalchemy.orm import declarative_base, relationship
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from sqlalchemy.orm import Session
|
||||
from datetime import datetime
|
||||
|
||||
from app.shared.db import models
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared import schemas
|
||||
from app.shared.db import models
|
||||
|
||||
|
||||
# TODO: isolate database operations away from worker and into WEB
|
||||
# ONLY WORKER
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import traceback
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
@@ -6,7 +7,7 @@ from loguru import logger
|
||||
logger.add("logs/api_logs.log", retention="30 days")
|
||||
logger.add("logs/error_logs.log", retention="30 days", level="ERROR")
|
||||
|
||||
|
||||
|
||||
def log_error(e: Exception, traceback_str: str = None, extra:str = ""):
|
||||
if not traceback_str: traceback_str = traceback.format_exc()
|
||||
if extra: extra = f"{extra}\n"
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
|
||||
from annotated_types import Len
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class SubmitSheet(BaseModel):
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
|
||||
from functools import lru_cache
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from typing import Annotated, Set
|
||||
|
||||
from annotated_types import Len
|
||||
from fastapi_mail import ConnectionConfig
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from typing import Annotated, Set
|
||||
from annotated_types import Len
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
|
||||
|
||||
model_config = SettingsConfigDict(env_file=os.environ.get("ENVIRONMENT_FILE") , env_file_encoding='utf-8', extra='ignore', str_strip_whitespace=True)
|
||||
|
||||
# general
|
||||
@@ -37,14 +38,14 @@ class Settings(BaseSettings):
|
||||
if self.REDIS_PASSWORD:
|
||||
return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOSTNAME}:6379"
|
||||
return f"redis://{self.REDIS_HOSTNAME}:6379"
|
||||
|
||||
|
||||
# cronjobs
|
||||
CRON_ARCHIVE_SHEETS: bool = False
|
||||
CRON_DELETE_STALE_SHEETS: bool = False
|
||||
DELETE_STALE_SHEETS_DAYS: int = 14
|
||||
CRON_DELETE_SCHEDULED_ARCHIVES: bool = False
|
||||
DELETE_SCHEDULED_ARCHIVES_CHECK_EVERY_N_DAYS: int = 7
|
||||
|
||||
|
||||
# observability
|
||||
REPEAT_COUNT_METRICS_SECONDS: int = 30
|
||||
|
||||
@@ -73,4 +74,4 @@ class Settings(BaseSettings):
|
||||
|
||||
@lru_cache
|
||||
def get_settings():
|
||||
return Settings()
|
||||
return Settings()
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
|
||||
from functools import lru_cache
|
||||
from celery import Celery
|
||||
import redis
|
||||
|
||||
from celery import Celery
|
||||
|
||||
import redis
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,16 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, List, Set
|
||||
|
||||
import yaml
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, computed_field, field_validator, Field, model_validator
|
||||
from typing import Dict, List, Set
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
Field,
|
||||
computed_field,
|
||||
field_validator,
|
||||
model_validator,
|
||||
)
|
||||
from typing_extensions import Self
|
||||
|
||||
|
||||
|
||||
@@ -7,4 +7,4 @@ def fnv1a_hash_mod(s: str, modulo:int) -> int:
|
||||
hash ^= ord(char)
|
||||
hash *= fnv_prime
|
||||
hash &= 0xFFFFFFFF # Keep it 32-bit
|
||||
return (hash if hash < 0x80000000 else hash - 0x100000000) % modulo
|
||||
return (hash if hash < 0x80000000 else hash - 0x100000000) % modulo
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import os
|
||||
from typing import AsyncGenerator
|
||||
from fastapi.testclient import TestClient
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, AsyncEngine
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession
|
||||
|
||||
from app.shared.settings import Settings
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db.user_state import UserState
|
||||
|
||||
|
||||
@@ -65,10 +67,11 @@ def db_session(test_db):
|
||||
|
||||
@pytest_asyncio.fixture()
|
||||
async def async_test_db(get_settings: Settings):
|
||||
import asyncio
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import make_async_engine
|
||||
from app.web.db.crud import get_user_group_names
|
||||
import asyncio
|
||||
|
||||
get_user_group_names.cache_clear()
|
||||
engine = await make_async_engine(get_settings.ASYNC_DATABASE_PATH)
|
||||
@@ -108,8 +111,8 @@ async def async_db_session(async_test_db: AsyncEngine) -> AsyncGenerator[AsyncSe
|
||||
|
||||
@pytest.fixture()
|
||||
def app(db_session):
|
||||
from app.web.main import app_factory
|
||||
from app.web.db import crud
|
||||
from app.web.main import app_factory
|
||||
app = app_factory()
|
||||
crud.upsert_user_groups(db_session)
|
||||
return app
|
||||
@@ -123,7 +126,11 @@ def client(app):
|
||||
|
||||
@pytest.fixture()
|
||||
def app_with_auth(app, db_session):
|
||||
from app.web.security import get_token_or_user_auth, get_user_auth, get_user_state
|
||||
from app.web.security import (
|
||||
get_token_or_user_auth,
|
||||
get_user_auth,
|
||||
get_user_state,
|
||||
)
|
||||
app.dependency_overrides[get_token_or_user_auth] = lambda: "rick@example.com"
|
||||
app.dependency_overrides[get_user_auth] = lambda: "morty@example.com"
|
||||
app.dependency_overrides[get_user_state] = lambda: UserState(db_session, "MORTY@example.com")
|
||||
@@ -138,7 +145,7 @@ def client_with_auth(app_with_auth):
|
||||
|
||||
@pytest.fixture()
|
||||
def app_with_token(app):
|
||||
from app.web.security import token_api_key_auth, get_token_or_user_auth
|
||||
from app.web.security import get_token_or_user_auth, token_api_key_auth
|
||||
app.dependency_overrides[token_api_key_auth] = lambda: ALLOW_ANY_EMAIL
|
||||
app.dependency_overrides[get_token_or_user_auth] = lambda: ALLOW_ANY_EMAIL
|
||||
return app
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"client_email": "fake_service_account@fake_service_account.iam.gserviceaccount.com"
|
||||
}
|
||||
"client_email": "fake_service_account@fake_service_account.iam.gserviceaccount.com"
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ configurations:
|
||||
gsheet_feeder:
|
||||
service_account: "app/tests/fake_service_account.json"
|
||||
cli_feeder:
|
||||
urls:
|
||||
urls:
|
||||
- "url1"
|
||||
hash_enricher:
|
||||
algorithm: "SHA-256"
|
||||
|
||||
@@ -3,4 +3,4 @@ def test_generate_uuid():
|
||||
|
||||
assert generate_uuid() != generate_uuid()
|
||||
assert len(generate_uuid()) == 36
|
||||
assert generate_uuid().count("-") == 4
|
||||
assert generate_uuid().count("-") == 4
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
from app.shared.db import models
|
||||
from app.shared.db import worker_crud, models
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
from app.shared.db import models, worker_crud
|
||||
from app.tests.web.db.test_crud import test_data
|
||||
|
||||
|
||||
def test_update_sheet_last_url_archived_at(db_session):
|
||||
|
||||
# Create test sheet
|
||||
@@ -19,7 +18,7 @@ def test_update_sheet_last_url_archived_at(db_session):
|
||||
db_session.refresh(test_sheet)
|
||||
assert isinstance(test_sheet.last_url_archived_at, datetime)
|
||||
assert test_sheet.last_url_archived_at > before
|
||||
|
||||
|
||||
# Test non-existent sheet
|
||||
assert worker_crud.update_sheet_last_url_archived_at(db_session, "non-existent-sheet") is False
|
||||
|
||||
@@ -73,8 +72,8 @@ def test_create_tag(db_session):
|
||||
|
||||
|
||||
def test_create_task(db_session):
|
||||
from app.shared.db import worker_crud
|
||||
from app.shared import schemas
|
||||
from app.shared.db import worker_crud
|
||||
|
||||
task = schemas.ArchiveCreate(
|
||||
id="archive-id-456-101",
|
||||
@@ -114,4 +113,4 @@ def test_create_task(db_session):
|
||||
assert nt.group_id == "spaceship"
|
||||
assert len(nt.tags) == 0
|
||||
assert len(nt.urls) == 0
|
||||
assert nt.created_at is not None
|
||||
assert nt.created_at is not None
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from app.shared.business_logic import get_store_archive_until, get_store_archive_until_or_never
|
||||
|
||||
from app.shared.business_logic import (
|
||||
get_store_archive_until,
|
||||
get_store_archive_until_or_never,
|
||||
)
|
||||
|
||||
|
||||
class Test_get_store_archive_until:
|
||||
|
||||
@@ -11,7 +11,7 @@ def test_fnv1a_hash_mod():
|
||||
|
||||
# Test different modulos
|
||||
hash1 = fnv1a_hash_mod("test", 5)
|
||||
hash2 = fnv1a_hash_mod("test", 10)
|
||||
hash2 = fnv1a_hash_mod("test", 10)
|
||||
assert 0 <= hash1 < 5
|
||||
assert 0 <= hash2 < 10
|
||||
|
||||
@@ -28,4 +28,4 @@ def test_fnv1a_hash_mod():
|
||||
assert 0 <= fnv1a_hash_mod("测试", 10) < 10
|
||||
|
||||
# Test modulo = 1 edge case
|
||||
assert fnv1a_hash_mod("test", 1) == 0
|
||||
assert fnv1a_hash_mod("test", 1) == 0
|
||||
|
||||
@@ -3,4 +3,4 @@ This is just an invalid yaml for testing
|
||||
|
||||
still broken: True
|
||||
- one
|
||||
- two
|
||||
- two
|
||||
|
||||
@@ -84,4 +84,4 @@ groups:
|
||||
# max_archive_lifespan_months: 12
|
||||
max_monthly_urls: 1
|
||||
# max_monthly_mbs: 50
|
||||
priority: "low"
|
||||
priority: "low"
|
||||
|
||||
@@ -3,10 +3,12 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.settings import Settings
|
||||
|
||||
from app.web.db import crud
|
||||
|
||||
|
||||
authors = ["rick@example.com", "morty@example.com", "jerry@example.com"]
|
||||
|
||||
|
||||
@@ -373,6 +375,7 @@ async def test_get_sheets_by_id_hash(async_db_session):
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_stale_sheets(async_db_session):
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from sqlalchemy.sql import select
|
||||
|
||||
now = datetime.now()
|
||||
@@ -435,4 +438,4 @@ async def test_delete_stale_sheets(async_db_session):
|
||||
|
||||
# Running again should not delete anything
|
||||
deleted = await crud.delete_stale_sheets(async_db_session, 7)
|
||||
assert len(deleted) == 0
|
||||
assert len(deleted) == 0
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
from unittest.mock import MagicMock, PropertyMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.shared.db import models
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
from unittest.mock import MagicMock
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.shared.schemas import Usage, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo
|
||||
from app.web.config import VERSION
|
||||
from app.tests.web.db.test_crud import test_data
|
||||
from app.web.config import VERSION
|
||||
|
||||
|
||||
def test_endpoint_home(client_with_auth):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import datetime
|
||||
import json
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from app.shared.db import models
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import datetime
|
||||
import json
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
@@ -45,8 +45,8 @@ def test_create_sheet_endpoint(app_with_auth, db_session):
|
||||
assert response.json() == {"detail": "User does not have access to this group."}
|
||||
|
||||
# switch to jerry who's got less quota/permissions
|
||||
from app.web.security import get_user_state
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
app_with_auth.dependency_overrides[get_user_state] = lambda: UserState(db_session, "jerry@example.com")
|
||||
client_jerry = TestClient(app_with_auth)
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import shutil
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
|
||||
def test_lifespan(app):
|
||||
with TestClient(app) as client:
|
||||
@@ -25,7 +25,7 @@ def test_logging_middleware(m1, client_with_auth):
|
||||
client_with_auth.delete("/url/123")
|
||||
# creates one empty and one from above
|
||||
assert len(EXCEPTION_COUNTER.collect()[0].samples) == 2
|
||||
|
||||
|
||||
|
||||
def test_serve_local_archive_logic(get_settings):
|
||||
# create a test file first
|
||||
@@ -38,7 +38,7 @@ def test_serve_local_archive_logic(get_settings):
|
||||
get_settings.SERVE_LOCAL_ARCHIVE = "/app/local_archive_test"
|
||||
from app.web.main import app_factory
|
||||
app = app_factory(get_settings)
|
||||
|
||||
|
||||
# test
|
||||
client = TestClient(app)
|
||||
r = client.get("/app/local_archive_test/temp.txt")
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
from fastapi.security import HTTPAuthorizationCredentials
|
||||
import pytest
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
|
||||
@@ -108,8 +108,8 @@ async def test_authenticate_user_exception():
|
||||
|
||||
|
||||
def test_get_user_state():
|
||||
from app.web.security import get_user_state
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
|
||||
mock_session = Mock()
|
||||
test_email = "test@example.com"
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
from datetime import datetime
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared import schemas
|
||||
from auto_archiver.core import Media, Metadata
|
||||
|
||||
from app.shared import schemas
|
||||
from app.shared.db import models
|
||||
|
||||
|
||||
class Test_create_archive_task():
|
||||
URL = "https://example-live.com"
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from app.web.main import app_factory
|
||||
|
||||
app = app_factory
|
||||
|
||||
app = app_factory
|
||||
|
||||
@@ -5,7 +5,7 @@ API_DESCRIPTION = """
|
||||
|
||||
**Usage notes:**
|
||||
- The API requires a Bearer token for most operations, which you can obtain by logging in with your Google account.
|
||||
- You can use this API to archive single URLs or entire Google Sheets.
|
||||
- You can use this API to archive single URLs or entire Google Sheets.
|
||||
- Once you submit a URL or Sheet for archiving, the API will return a task_id that you can use to check the status of the archiving process. It works asynchronously.
|
||||
"""
|
||||
BREAKING_CHANGES = {"minVersion": "0.4.0", "message": "The latest update has breaking changes, please update the extension to the most recent version."}
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
from collections import defaultdict
|
||||
from functools import lru_cache
|
||||
from sqlalchemy.orm import Session, load_only
|
||||
from sqlalchemy import Column, or_, func, select
|
||||
from loguru import logger
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from functools import lru_cache
|
||||
|
||||
from cachetools import LRUCache, cached
|
||||
from cachetools.keys import hashkey
|
||||
from loguru import logger
|
||||
from sqlalchemy import Column, func, or_, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import Session, load_only
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared.db import models
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.user_groups import UserGroups
|
||||
from app.shared.utils.misc import fnv1a_hash_mod
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
@@ -117,7 +118,7 @@ async def get_group_priority_async(db: AsyncSession, group_id: str) -> dict:
|
||||
@cached(cache=LRUCache(maxsize=128), key=lambda db, email: hashkey(email))
|
||||
def get_user_group_names(db: Session, email: str) -> list[str]:
|
||||
"""
|
||||
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
|
||||
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
|
||||
"""
|
||||
# TODO: the read: [group1, group2] permissions don't currently work
|
||||
if not email or not len(email) or "@" not in email: return []
|
||||
@@ -173,7 +174,7 @@ def upsert_user_groups(db: Session):
|
||||
def display_email_pii(email: str):
|
||||
return f"'{email[0:3]}...@{email.split('@')[1]}'"
|
||||
"""
|
||||
reads the user_groups yaml file and inserts any new users, groups,
|
||||
reads the user_groups yaml file and inserts any new users, groups,
|
||||
along with new participation of users in groups
|
||||
"""
|
||||
filename = get_settings().USER_GROUPS_FILENAME
|
||||
@@ -192,6 +193,7 @@ def upsert_user_groups(db: Session):
|
||||
for group in explicit_groups:
|
||||
group_domains[group].add(domain)
|
||||
import json
|
||||
|
||||
# upsert groups and save a map of groupid -> dbobject
|
||||
for group_id, g in ug.groups.items():
|
||||
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, g.service_account_email, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
|
||||
from typing import Dict, Set
|
||||
import sqlalchemy
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
from datetime import datetime
|
||||
from typing import Dict, Set
|
||||
|
||||
import sqlalchemy
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.user_groups import GroupInfo, GroupPermissions
|
||||
from app.shared.schemas import Usage, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo, GroupPermissions
|
||||
from app.web.db import crud
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from app.web.config import VERSION, BREAKING_CHANGES
|
||||
from app.shared.schemas import ActiveUser, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo
|
||||
from app.web.config import BREAKING_CHANGES, VERSION
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
from app.shared.user_groups import GroupInfo
|
||||
|
||||
|
||||
default_router = APIRouter()
|
||||
|
||||
@@ -42,7 +44,7 @@ def get_user_usage(
|
||||
if not user.active:
|
||||
raise HTTPException(status_code=403, detail="User is not active.")
|
||||
return user.usage()
|
||||
|
||||
|
||||
|
||||
|
||||
@default_router.get('/favicon.ico', include_in_schema=False)
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
import json
|
||||
|
||||
import sqlalchemy
|
||||
from auto_archiver.core import Metadata
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from loguru import logger
|
||||
import sqlalchemy
|
||||
from auto_archiver.core import Metadata
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.aa_utils import get_all_urls
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared import business_logic, schemas
|
||||
from app.shared.db import worker_crud
|
||||
from app.shared.aa_utils import get_all_urls
|
||||
from app.shared.db import models, worker_crud
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.web.security import token_api_key_auth
|
||||
from app.shared.db import models
|
||||
from app.shared.log import log_error
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.security import token_api_key_auth
|
||||
|
||||
|
||||
interoperability_router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from sqlalchemy import exc
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.db.user_state import UserState
|
||||
from app.shared import schemas
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_user_state
|
||||
from app.web.db import crud
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
|
||||
|
||||
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
|
||||
|
||||
@@ -78,4 +78,4 @@ def archive_user_sheet(
|
||||
group_queue = user.priority_group(sheet.group_id)
|
||||
task = celery.signature("create_sheet_task", args=[schemas.SubmitSheet(sheet_id=id, author_id=user.email, group_id=sheet.group_id).model_dump_json()]).apply_async(**group_queue)
|
||||
|
||||
return JSONResponse({"id": task.id}, status_code=201)
|
||||
return JSONResponse({"id": task.id}, status_code=201)
|
||||
|
||||
@@ -3,10 +3,10 @@ from fastapi import APIRouter, Depends
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth
|
||||
from app.shared import schemas
|
||||
from app.shared.log import log_error
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth
|
||||
from app.web.utils.misc import custom_jsonable_encoder
|
||||
|
||||
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared import schemas
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth, get_user_state
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.shared.db.database import get_db_dependency
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from app.web.security import get_token_or_user_auth, get_user_state
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
|
||||
|
||||
celery = get_celery()
|
||||
@@ -47,7 +47,7 @@ def archive_url(
|
||||
else:
|
||||
archive_create.author_id = archive.author_id or email
|
||||
group_queue = convert_priority_to_queue_dict("high")
|
||||
|
||||
|
||||
|
||||
task = celery.signature("create_archive_task", args=[archive_create.model_dump_json()]).apply_async(**group_queue)
|
||||
task_response = schemas.Task(id=task.id)
|
||||
@@ -74,8 +74,8 @@ def search_by_url(
|
||||
|
||||
@url_router.delete("/{id}", summary="Delete a single URL archive by id.")
|
||||
def delete_archive(
|
||||
id:str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
id:str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency)
|
||||
) -> schemas.DeleteResponse:
|
||||
logger.info(f"deleting url archive task {id} request by {user.email}")
|
||||
|
||||
@@ -1,22 +1,32 @@
|
||||
import asyncio
|
||||
from collections import defaultdict
|
||||
import datetime
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import alembic.config
|
||||
from fastapi import FastAPI
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi_mail import FastMail, MessageSchema, MessageType
|
||||
from fastapi_utils.tasks import repeat_every
|
||||
from loguru import logger
|
||||
from fastapi_mail import FastMail, MessageSchema, MessageType
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import get_db, get_db_async, make_engine, wal_checkpoint
|
||||
from app.shared import schemas
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import (
|
||||
get_db,
|
||||
get_db_async,
|
||||
make_engine,
|
||||
wal_checkpoint,
|
||||
)
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.db import crud
|
||||
from app.web.middleware import increase_exceptions_counter
|
||||
from app.web.utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
|
||||
from app.web.utils.metrics import (
|
||||
measure_regular_metrics,
|
||||
redis_subscribe_worker_exceptions,
|
||||
)
|
||||
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@@ -183,4 +193,4 @@ async def delete_stale_sheets():
|
||||
async def generate_users_export_csv():
|
||||
#TODO: implement a cronjob that regularly requested user data to a CSV file
|
||||
# see https://colab.research.google.com/drive/1QDbo3QXHPBdiTuANlA1AWVvN-rqxuCPa?authuser=0#scrollTo=4nPXeSdK8RBT
|
||||
pass
|
||||
pass
|
||||
|
||||
@@ -1,24 +1,23 @@
|
||||
import os
|
||||
from fastapi import FastAPI, Depends
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from fastapi import Depends, FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from loguru import logger
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
|
||||
from app.web.middleware import logging_middleware
|
||||
from app.shared.task_messaging import get_celery
|
||||
|
||||
from app.web.security import token_api_key_auth
|
||||
from app.web.config import VERSION, API_DESCRIPTION
|
||||
from app.web.events import lifespan
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.config import API_DESCRIPTION, VERSION
|
||||
from app.web.endpoints.default import default_router
|
||||
from app.web.endpoints.url import url_router
|
||||
from app.web.endpoints.interoperability import interoperability_router
|
||||
from app.web.endpoints.sheet import sheet_router
|
||||
from app.web.endpoints.task import task_router
|
||||
from app.web.endpoints.interoperability import interoperability_router
|
||||
from app.web.endpoints.url import url_router
|
||||
from app.web.events import lifespan
|
||||
from app.web.middleware import logging_middleware
|
||||
from app.web.security import token_api_key_auth
|
||||
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@@ -57,4 +56,4 @@ def app_factory(settings = get_settings()):
|
||||
logger.warning(f"MOUNTing local archive, use this in development only {settings.SERVE_LOCAL_ARCHIVE}")
|
||||
app.mount(settings.SERVE_LOCAL_ARCHIVE, StaticFiles(directory=local_dir), name=settings.SERVE_LOCAL_ARCHIVE)
|
||||
|
||||
return app
|
||||
return app
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
|
||||
import traceback
|
||||
from loguru import logger
|
||||
|
||||
from fastapi import Request
|
||||
from loguru import logger
|
||||
|
||||
from app.shared.log import log_error
|
||||
from app.web.utils.metrics import EXCEPTION_COUNTER
|
||||
|
||||
@@ -25,7 +27,7 @@ async def increase_exceptions_counter(e: Exception, location:str="cronjob"):
|
||||
last_trace = traceback.extract_tb(e.__traceback__)[-1]
|
||||
_file, _line, func_name, _text = last_trace
|
||||
location = func_name
|
||||
except Exception as e:
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to get function name from cronjob exception traceback: {e}")
|
||||
EXCEPTION_COUNTER.labels(type=e.__class__.__name__, location=location).inc()
|
||||
log_error(e)
|
||||
log_error(e)
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
import secrets
|
||||
|
||||
import requests
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from loguru import logger
|
||||
import requests, secrets
|
||||
from fastapi import HTTPException, status, Depends
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.settings import get_settings
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db.user_state import UserState
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
bearer_security = HTTPBearer()
|
||||
|
||||
@@ -80,4 +83,4 @@ def authenticate_user(access_token):
|
||||
|
||||
|
||||
def get_user_state(email:str=Depends(get_user_auth), db:Session=Depends(get_db_dependency)):
|
||||
return UserState(db, email)
|
||||
return UserState(db, email)
|
||||
|
||||
@@ -2,12 +2,13 @@ import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
from app.web.db import crud
|
||||
from app.shared.db.database import get_db
|
||||
from app.shared.log import log_error
|
||||
from app.shared.task_messaging import get_redis
|
||||
from app.web.db import crud
|
||||
|
||||
|
||||
# Custom metrics
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import base64
|
||||
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
|
||||
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
import datetime
|
||||
import json
|
||||
import traceback
|
||||
|
||||
import traceback, datetime
|
||||
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
||||
from celery.signals import task_failure
|
||||
from loguru import logger
|
||||
from sqlalchemy import exc
|
||||
from auto_archiver.core.orchestrator import ArchivingOrchestrator
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import get_db
|
||||
from app.shared import business_logic, schemas
|
||||
from app.shared.task_messaging import get_celery, get_redis
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.log import log_error
|
||||
from app.shared.aa_utils import get_all_urls
|
||||
from app.shared.db import worker_crud
|
||||
from app.shared.db import models, worker_crud
|
||||
from app.shared.db.database import get_db
|
||||
from app.shared.log import log_error
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.task_messaging import get_celery, get_redis
|
||||
from app.worker.worker_log import setup_celery_logger
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
celery = get_celery("worker")
|
||||
@@ -26,7 +27,7 @@ USER_GROUPS_FILENAME = settings.USER_GROUPS_FILENAME
|
||||
setup_celery_logger(celery)
|
||||
|
||||
# TODO: these are temporary PATCHES for new aa's functionality
|
||||
# logger.add("app/worker/worker_log.log", level="DEBUG")
|
||||
# logger.add("app/worker/worker_log.log", level="DEBUG")
|
||||
logger.remove = lambda x: print(f"logger.remove({x})")
|
||||
|
||||
# TODO: after release, as it requires updating past entries with sheet_id where tag is used, drop tags
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from loguru import logger
|
||||
from celery import Celery
|
||||
import sys
|
||||
|
||||
from celery import Celery
|
||||
from loguru import logger
|
||||
|
||||
from app.shared.task_messaging import get_celery
|
||||
|
||||
|
||||
celery = get_celery("worker")
|
||||
|
||||
def setup_celery_logger(celery):
|
||||
@@ -22,7 +24,7 @@ def setup_celery_logger(celery):
|
||||
if message.strip():
|
||||
logger.info(message.strip())
|
||||
# Required to prevent issues with buffered output
|
||||
def flush(self): pass
|
||||
def flush(self): pass
|
||||
def isatty(self): return False
|
||||
|
||||
sys.stdout = InterceptHandler()
|
||||
|
||||
Reference in New Issue
Block a user