mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-12 13:38:33 +03:00
Add pre-commit with GiHub Action (#56)
* Update pyproject.toml * add pre-commit * Create .pre-commit-config.yaml * Comment out ruff * Update .pre-commit-config.yaml * General formatting * Create format-and-fail.yml * Update ci.yml * Add pre-commit to dev dependencies * Update pyproject.toml
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from app.web.main import app_factory
|
||||
|
||||
app = app_factory
|
||||
|
||||
app = app_factory
|
||||
|
||||
@@ -5,7 +5,7 @@ API_DESCRIPTION = """
|
||||
|
||||
**Usage notes:**
|
||||
- The API requires a Bearer token for most operations, which you can obtain by logging in with your Google account.
|
||||
- You can use this API to archive single URLs or entire Google Sheets.
|
||||
- You can use this API to archive single URLs or entire Google Sheets.
|
||||
- Once you submit a URL or Sheet for archiving, the API will return a task_id that you can use to check the status of the archiving process. It works asynchronously.
|
||||
"""
|
||||
BREAKING_CHANGES = {"minVersion": "0.4.0", "message": "The latest update has breaking changes, please update the extension to the most recent version."}
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
from collections import defaultdict
|
||||
from functools import lru_cache
|
||||
from sqlalchemy.orm import Session, load_only
|
||||
from sqlalchemy import Column, or_, func, select
|
||||
from loguru import logger
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from functools import lru_cache
|
||||
|
||||
from cachetools import LRUCache, cached
|
||||
from cachetools.keys import hashkey
|
||||
from loguru import logger
|
||||
from sqlalchemy import Column, func, or_, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import Session, load_only
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared.db import models
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.user_groups import UserGroups
|
||||
from app.shared.utils.misc import fnv1a_hash_mod
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
@@ -117,7 +118,7 @@ async def get_group_priority_async(db: AsyncSession, group_id: str) -> dict:
|
||||
@cached(cache=LRUCache(maxsize=128), key=lambda db, email: hashkey(email))
|
||||
def get_user_group_names(db: Session, email: str) -> list[str]:
|
||||
"""
|
||||
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
|
||||
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
|
||||
"""
|
||||
# TODO: the read: [group1, group2] permissions don't currently work
|
||||
if not email or not len(email) or "@" not in email: return []
|
||||
@@ -173,7 +174,7 @@ def upsert_user_groups(db: Session):
|
||||
def display_email_pii(email: str):
|
||||
return f"'{email[0:3]}...@{email.split('@')[1]}'"
|
||||
"""
|
||||
reads the user_groups yaml file and inserts any new users, groups,
|
||||
reads the user_groups yaml file and inserts any new users, groups,
|
||||
along with new participation of users in groups
|
||||
"""
|
||||
filename = get_settings().USER_GROUPS_FILENAME
|
||||
@@ -192,6 +193,7 @@ def upsert_user_groups(db: Session):
|
||||
for group in explicit_groups:
|
||||
group_domains[group].add(domain)
|
||||
import json
|
||||
|
||||
# upsert groups and save a map of groupid -> dbobject
|
||||
for group_id, g in ug.groups.items():
|
||||
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, g.service_account_email, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
|
||||
from typing import Dict, Set
|
||||
import sqlalchemy
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
from datetime import datetime
|
||||
from typing import Dict, Set
|
||||
|
||||
import sqlalchemy
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.user_groups import GroupInfo, GroupPermissions
|
||||
from app.shared.schemas import Usage, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo, GroupPermissions
|
||||
from app.web.db import crud
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from app.web.config import VERSION, BREAKING_CHANGES
|
||||
from app.shared.schemas import ActiveUser, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo
|
||||
from app.web.config import BREAKING_CHANGES, VERSION
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
from app.shared.user_groups import GroupInfo
|
||||
|
||||
|
||||
default_router = APIRouter()
|
||||
|
||||
@@ -42,7 +44,7 @@ def get_user_usage(
|
||||
if not user.active:
|
||||
raise HTTPException(status_code=403, detail="User is not active.")
|
||||
return user.usage()
|
||||
|
||||
|
||||
|
||||
|
||||
@default_router.get('/favicon.ico', include_in_schema=False)
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
import json
|
||||
|
||||
import sqlalchemy
|
||||
from auto_archiver.core import Metadata
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from loguru import logger
|
||||
import sqlalchemy
|
||||
from auto_archiver.core import Metadata
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.aa_utils import get_all_urls
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared import business_logic, schemas
|
||||
from app.shared.db import worker_crud
|
||||
from app.shared.aa_utils import get_all_urls
|
||||
from app.shared.db import models, worker_crud
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.web.security import token_api_key_auth
|
||||
from app.shared.db import models
|
||||
from app.shared.log import log_error
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.security import token_api_key_auth
|
||||
|
||||
|
||||
interoperability_router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from sqlalchemy import exc
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.db.user_state import UserState
|
||||
from app.shared import schemas
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_user_state
|
||||
from app.web.db import crud
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
|
||||
|
||||
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
|
||||
|
||||
@@ -78,4 +78,4 @@ def archive_user_sheet(
|
||||
group_queue = user.priority_group(sheet.group_id)
|
||||
task = celery.signature("create_sheet_task", args=[schemas.SubmitSheet(sheet_id=id, author_id=user.email, group_id=sheet.group_id).model_dump_json()]).apply_async(**group_queue)
|
||||
|
||||
return JSONResponse({"id": task.id}, status_code=201)
|
||||
return JSONResponse({"id": task.id}, status_code=201)
|
||||
|
||||
@@ -3,10 +3,10 @@ from fastapi import APIRouter, Depends
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth
|
||||
from app.shared import schemas
|
||||
from app.shared.log import log_error
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth
|
||||
from app.web.utils.misc import custom_jsonable_encoder
|
||||
|
||||
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared import schemas
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth, get_user_state
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.shared.db.database import get_db_dependency
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from app.web.security import get_token_or_user_auth, get_user_state
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
|
||||
|
||||
celery = get_celery()
|
||||
@@ -47,7 +47,7 @@ def archive_url(
|
||||
else:
|
||||
archive_create.author_id = archive.author_id or email
|
||||
group_queue = convert_priority_to_queue_dict("high")
|
||||
|
||||
|
||||
|
||||
task = celery.signature("create_archive_task", args=[archive_create.model_dump_json()]).apply_async(**group_queue)
|
||||
task_response = schemas.Task(id=task.id)
|
||||
@@ -74,8 +74,8 @@ def search_by_url(
|
||||
|
||||
@url_router.delete("/{id}", summary="Delete a single URL archive by id.")
|
||||
def delete_archive(
|
||||
id:str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
id:str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency)
|
||||
) -> schemas.DeleteResponse:
|
||||
logger.info(f"deleting url archive task {id} request by {user.email}")
|
||||
|
||||
@@ -1,22 +1,32 @@
|
||||
import asyncio
|
||||
from collections import defaultdict
|
||||
import datetime
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import alembic.config
|
||||
from fastapi import FastAPI
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi_mail import FastMail, MessageSchema, MessageType
|
||||
from fastapi_utils.tasks import repeat_every
|
||||
from loguru import logger
|
||||
from fastapi_mail import FastMail, MessageSchema, MessageType
|
||||
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import get_db, get_db_async, make_engine, wal_checkpoint
|
||||
from app.shared import schemas
|
||||
from app.shared.db import models
|
||||
from app.shared.db.database import (
|
||||
get_db,
|
||||
get_db_async,
|
||||
make_engine,
|
||||
wal_checkpoint,
|
||||
)
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.db import crud
|
||||
from app.web.middleware import increase_exceptions_counter
|
||||
from app.web.utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
|
||||
from app.web.utils.metrics import (
|
||||
measure_regular_metrics,
|
||||
redis_subscribe_worker_exceptions,
|
||||
)
|
||||
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@@ -183,4 +193,4 @@ async def delete_stale_sheets():
|
||||
async def generate_users_export_csv():
|
||||
#TODO: implement a cronjob that regularly requested user data to a CSV file
|
||||
# see https://colab.research.google.com/drive/1QDbo3QXHPBdiTuANlA1AWVvN-rqxuCPa?authuser=0#scrollTo=4nPXeSdK8RBT
|
||||
pass
|
||||
pass
|
||||
|
||||
@@ -1,24 +1,23 @@
|
||||
import os
|
||||
from fastapi import FastAPI, Depends
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from fastapi import Depends, FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from loguru import logger
|
||||
from prometheus_fastapi_instrumentator import Instrumentator
|
||||
|
||||
from app.web.middleware import logging_middleware
|
||||
from app.shared.task_messaging import get_celery
|
||||
|
||||
from app.web.security import token_api_key_auth
|
||||
from app.web.config import VERSION, API_DESCRIPTION
|
||||
from app.web.events import lifespan
|
||||
from app.shared.settings import get_settings
|
||||
|
||||
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.config import API_DESCRIPTION, VERSION
|
||||
from app.web.endpoints.default import default_router
|
||||
from app.web.endpoints.url import url_router
|
||||
from app.web.endpoints.interoperability import interoperability_router
|
||||
from app.web.endpoints.sheet import sheet_router
|
||||
from app.web.endpoints.task import task_router
|
||||
from app.web.endpoints.interoperability import interoperability_router
|
||||
from app.web.endpoints.url import url_router
|
||||
from app.web.events import lifespan
|
||||
from app.web.middleware import logging_middleware
|
||||
from app.web.security import token_api_key_auth
|
||||
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@@ -57,4 +56,4 @@ def app_factory(settings = get_settings()):
|
||||
logger.warning(f"MOUNTing local archive, use this in development only {settings.SERVE_LOCAL_ARCHIVE}")
|
||||
app.mount(settings.SERVE_LOCAL_ARCHIVE, StaticFiles(directory=local_dir), name=settings.SERVE_LOCAL_ARCHIVE)
|
||||
|
||||
return app
|
||||
return app
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
|
||||
import traceback
|
||||
from loguru import logger
|
||||
|
||||
from fastapi import Request
|
||||
from loguru import logger
|
||||
|
||||
from app.shared.log import log_error
|
||||
from app.web.utils.metrics import EXCEPTION_COUNTER
|
||||
|
||||
@@ -25,7 +27,7 @@ async def increase_exceptions_counter(e: Exception, location:str="cronjob"):
|
||||
last_trace = traceback.extract_tb(e.__traceback__)[-1]
|
||||
_file, _line, func_name, _text = last_trace
|
||||
location = func_name
|
||||
except Exception as e:
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to get function name from cronjob exception traceback: {e}")
|
||||
EXCEPTION_COUNTER.labels(type=e.__class__.__name__, location=location).inc()
|
||||
log_error(e)
|
||||
log_error(e)
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
import secrets
|
||||
|
||||
import requests
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from loguru import logger
|
||||
import requests, secrets
|
||||
from fastapi import HTTPException, status, Depends
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.shared.settings import get_settings
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.settings import get_settings
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db.user_state import UserState
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
bearer_security = HTTPBearer()
|
||||
|
||||
@@ -80,4 +83,4 @@ def authenticate_user(access_token):
|
||||
|
||||
|
||||
def get_user_state(email:str=Depends(get_user_auth), db:Session=Depends(get_db_dependency)):
|
||||
return UserState(db, email)
|
||||
return UserState(db, email)
|
||||
|
||||
@@ -2,12 +2,13 @@ import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
from app.web.db import crud
|
||||
from app.shared.db.database import get_db
|
||||
from app.shared.log import log_error
|
||||
from app.shared.task_messaging import get_redis
|
||||
from app.web.db import crud
|
||||
|
||||
|
||||
# Custom metrics
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import base64
|
||||
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user