Add pre-commit with GiHub Action (#56)

* Update pyproject.toml

* add pre-commit

* Create .pre-commit-config.yaml

* Comment out ruff

* Update .pre-commit-config.yaml

* General formatting

* Create format-and-fail.yml

* Update ci.yml

* Add pre-commit to dev dependencies

* Update pyproject.toml
This commit is contained in:
Michael Plunkett
2025-02-26 10:42:28 -06:00
committed by GitHub
parent d9f36957cd
commit 7e48f706df
68 changed files with 473 additions and 182 deletions

View File

@@ -1,3 +1,4 @@
from app.web.main import app_factory
app = app_factory
app = app_factory

View File

@@ -5,7 +5,7 @@ API_DESCRIPTION = """
**Usage notes:**
- The API requires a Bearer token for most operations, which you can obtain by logging in with your Google account.
- You can use this API to archive single URLs or entire Google Sheets.
- You can use this API to archive single URLs or entire Google Sheets.
- Once you submit a URL or Sheet for archiving, the API will return a task_id that you can use to check the status of the archiving process. It works asynchronously.
"""
BREAKING_CHANGES = {"minVersion": "0.4.0", "message": "The latest update has breaking changes, please update the extension to the most recent version."}

View File

@@ -1,18 +1,19 @@
from collections import defaultdict
from functools import lru_cache
from sqlalchemy.orm import Session, load_only
from sqlalchemy import Column, or_, func, select
from loguru import logger
from datetime import datetime, timedelta
from sqlalchemy.ext.asyncio import AsyncSession
from functools import lru_cache
from cachetools import LRUCache, cached
from cachetools.keys import hashkey
from loguru import logger
from sqlalchemy import Column, func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session, load_only
from app.web.config import ALLOW_ANY_EMAIL
from app.shared.db import models
from app.shared.settings import get_settings
from app.shared.user_groups import UserGroups
from app.shared.utils.misc import fnv1a_hash_mod
from app.web.config import ALLOW_ANY_EMAIL
from app.web.utils.misc import convert_priority_to_queue_dict
@@ -117,7 +118,7 @@ async def get_group_priority_async(db: AsyncSession, group_id: str) -> dict:
@cached(cache=LRUCache(maxsize=128), key=lambda db, email: hashkey(email))
def get_user_group_names(db: Session, email: str) -> list[str]:
"""
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
given an email retrieves the user groups from the DB and then the email-domain groups from a global variable, the email does not need to belong to an existing user.
"""
# TODO: the read: [group1, group2] permissions don't currently work
if not email or not len(email) or "@" not in email: return []
@@ -173,7 +174,7 @@ def upsert_user_groups(db: Session):
def display_email_pii(email: str):
return f"'{email[0:3]}...@{email.split('@')[1]}'"
"""
reads the user_groups yaml file and inserts any new users, groups,
reads the user_groups yaml file and inserts any new users, groups,
along with new participation of users in groups
"""
filename = get_settings().USER_GROUPS_FILENAME
@@ -192,6 +193,7 @@ def upsert_user_groups(db: Session):
for group in explicit_groups:
group_domains[group].add(domain)
import json
# upsert groups and save a map of groupid -> dbobject
for group_id, g in ug.groups.items():
upsert_group(db, group_id, g.description, g.orchestrator, g.orchestrator_sheet, g.service_account_email, json.loads(g.permissions.model_dump_json()), list(group_domains.get(group_id, [])))

View File

@@ -1,13 +1,14 @@
from typing import Dict, Set
import sqlalchemy
from sqlalchemy.orm import Session
from sqlalchemy import func
from datetime import datetime
from typing import Dict, Set
import sqlalchemy
from sqlalchemy import func
from sqlalchemy.orm import Session
from app.shared.db import models
from app.shared.user_groups import GroupInfo, GroupPermissions
from app.shared.schemas import Usage, UsageResponse
from app.shared.user_groups import GroupInfo, GroupPermissions
from app.web.db import crud
from app.web.utils.misc import convert_priority_to_queue_dict

View File

@@ -1,13 +1,15 @@
from typing import Dict
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import FileResponse, JSONResponse
from app.web.config import VERSION, BREAKING_CHANGES
from app.shared.schemas import ActiveUser, UsageResponse
from app.shared.user_groups import GroupInfo
from app.web.config import BREAKING_CHANGES, VERSION
from app.web.db.user_state import UserState
from app.web.security import get_user_state
from app.shared.user_groups import GroupInfo
default_router = APIRouter()
@@ -42,7 +44,7 @@ def get_user_usage(
if not user.active:
raise HTTPException(status_code=403, detail="User is not active.")
return user.usage()
@default_router.get('/favicon.ico', include_in_schema=False)

View File

@@ -1,19 +1,19 @@
import json
import sqlalchemy
from auto_archiver.core import Metadata
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import JSONResponse
from loguru import logger
import sqlalchemy
from auto_archiver.core import Metadata
from sqlalchemy.orm import Session
from app.shared.aa_utils import get_all_urls
from app.web.config import ALLOW_ANY_EMAIL
from app.shared import business_logic, schemas
from app.shared.db import worker_crud
from app.shared.aa_utils import get_all_urls
from app.shared.db import models, worker_crud
from app.shared.db.database import get_db_dependency
from app.web.security import token_api_key_auth
from app.shared.db import models
from app.shared.log import log_error
from app.web.config import ALLOW_ANY_EMAIL
from app.web.security import token_api_key_auth
interoperability_router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])

View File

@@ -1,16 +1,16 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import JSONResponse
from sqlalchemy import exc
from sqlalchemy.orm import Session
from app.web.db.user_state import UserState
from app.shared import schemas
from app.shared.task_messaging import get_celery
from app.web.security import get_user_state
from app.web.db import crud
from app.shared.db.database import get_db_dependency
from app.shared.task_messaging import get_celery
from app.web.db import crud
from app.web.db.user_state import UserState
from app.web.security import get_user_state
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
@@ -78,4 +78,4 @@ def archive_user_sheet(
group_queue = user.priority_group(sheet.group_id)
task = celery.signature("create_sheet_task", args=[schemas.SubmitSheet(sheet_id=id, author_id=user.email, group_id=sheet.group_id).model_dump_json()]).apply_async(**group_queue)
return JSONResponse({"id": task.id}, status_code=201)
return JSONResponse({"id": task.id}, status_code=201)

View File

@@ -3,10 +3,10 @@ from fastapi import APIRouter, Depends
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from app.shared.task_messaging import get_celery
from app.web.security import get_token_or_user_auth
from app.shared import schemas
from app.shared.log import log_error
from app.shared.task_messaging import get_celery
from app.web.security import get_token_or_user_auth
from app.web.utils.misc import custom_jsonable_encoder

View File

@@ -1,22 +1,22 @@
from datetime import datetime
from urllib.parse import urlparse
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import JSONResponse
from datetime import datetime
from loguru import logger
from sqlalchemy.orm import Session
from app.web.config import ALLOW_ANY_EMAIL
from app.shared import schemas
from app.shared.db.database import get_db_dependency
from app.shared.task_messaging import get_celery
from app.web.security import get_token_or_user_auth, get_user_state
from app.web.config import ALLOW_ANY_EMAIL
from app.web.db import crud
from app.web.db.user_state import UserState
from app.shared.db.database import get_db_dependency
from urllib.parse import urlparse
from app.web.security import get_token_or_user_auth, get_user_state
from app.web.utils.misc import convert_priority_to_queue_dict
url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
celery = get_celery()
@@ -47,7 +47,7 @@ def archive_url(
else:
archive_create.author_id = archive.author_id or email
group_queue = convert_priority_to_queue_dict("high")
task = celery.signature("create_archive_task", args=[archive_create.model_dump_json()]).apply_async(**group_queue)
task_response = schemas.Task(id=task.id)
@@ -74,8 +74,8 @@ def search_by_url(
@url_router.delete("/{id}", summary="Delete a single URL archive by id.")
def delete_archive(
id:str,
user: UserState = Depends(get_user_state),
id:str,
user: UserState = Depends(get_user_state),
db: Session = Depends(get_db_dependency)
) -> schemas.DeleteResponse:
logger.info(f"deleting url archive task {id} request by {user.email}")

View File

@@ -1,22 +1,32 @@
import asyncio
from collections import defaultdict
import datetime
import logging
from collections import defaultdict
from contextlib import asynccontextmanager
import alembic.config
from fastapi import FastAPI
from contextlib import asynccontextmanager
from fastapi_mail import FastMail, MessageSchema, MessageType
from fastapi_utils.tasks import repeat_every
from loguru import logger
from fastapi_mail import FastMail, MessageSchema, MessageType
from app.shared.db import models
from app.shared.db.database import get_db, get_db_async, make_engine, wal_checkpoint
from app.shared import schemas
from app.shared.db import models
from app.shared.db.database import (
get_db,
get_db_async,
make_engine,
wal_checkpoint,
)
from app.shared.settings import get_settings
from app.shared.task_messaging import get_celery
from app.web.db import crud
from app.web.middleware import increase_exceptions_counter
from app.web.utils.metrics import measure_regular_metrics, redis_subscribe_worker_exceptions
from app.web.utils.metrics import (
measure_regular_metrics,
redis_subscribe_worker_exceptions,
)
celery = get_celery()
@@ -183,4 +193,4 @@ async def delete_stale_sheets():
async def generate_users_export_csv():
#TODO: implement a cronjob that regularly requested user data to a CSV file
# see https://colab.research.google.com/drive/1QDbo3QXHPBdiTuANlA1AWVvN-rqxuCPa?authuser=0#scrollTo=4nPXeSdK8RBT
pass
pass

View File

@@ -1,24 +1,23 @@
import os
from fastapi import FastAPI, Depends
from fastapi.staticfiles import StaticFiles
from fastapi import Depends, FastAPI
from fastapi.middleware.cors import CORSMiddleware
from prometheus_fastapi_instrumentator import Instrumentator
from fastapi.staticfiles import StaticFiles
from loguru import logger
from prometheus_fastapi_instrumentator import Instrumentator
from app.web.middleware import logging_middleware
from app.shared.task_messaging import get_celery
from app.web.security import token_api_key_auth
from app.web.config import VERSION, API_DESCRIPTION
from app.web.events import lifespan
from app.shared.settings import get_settings
from app.shared.task_messaging import get_celery
from app.web.config import API_DESCRIPTION, VERSION
from app.web.endpoints.default import default_router
from app.web.endpoints.url import url_router
from app.web.endpoints.interoperability import interoperability_router
from app.web.endpoints.sheet import sheet_router
from app.web.endpoints.task import task_router
from app.web.endpoints.interoperability import interoperability_router
from app.web.endpoints.url import url_router
from app.web.events import lifespan
from app.web.middleware import logging_middleware
from app.web.security import token_api_key_auth
celery = get_celery()
@@ -57,4 +56,4 @@ def app_factory(settings = get_settings()):
logger.warning(f"MOUNTing local archive, use this in development only {settings.SERVE_LOCAL_ARCHIVE}")
app.mount(settings.SERVE_LOCAL_ARCHIVE, StaticFiles(directory=local_dir), name=settings.SERVE_LOCAL_ARCHIVE)
return app
return app

View File

@@ -1,7 +1,9 @@
import traceback
from loguru import logger
from fastapi import Request
from loguru import logger
from app.shared.log import log_error
from app.web.utils.metrics import EXCEPTION_COUNTER
@@ -25,7 +27,7 @@ async def increase_exceptions_counter(e: Exception, location:str="cronjob"):
last_trace = traceback.extract_tb(e.__traceback__)[-1]
_file, _line, func_name, _text = last_trace
location = func_name
except Exception as e:
except Exception as e:
logger.error(f"Unable to get function name from cronjob exception traceback: {e}")
EXCEPTION_COUNTER.labels(type=e.__class__.__name__, location=location).inc()
log_error(e)
log_error(e)

View File

@@ -1,14 +1,17 @@
import secrets
import requests
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from loguru import logger
import requests, secrets
from fastapi import HTTPException, status, Depends
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from sqlalchemy.orm import Session
from app.web.config import ALLOW_ANY_EMAIL
from app.shared.settings import get_settings
from app.shared.db.database import get_db_dependency
from app.shared.settings import get_settings
from app.web.config import ALLOW_ANY_EMAIL
from app.web.db.user_state import UserState
settings = get_settings()
bearer_security = HTTPBearer()
@@ -80,4 +83,4 @@ def authenticate_user(access_token):
def get_user_state(email:str=Depends(get_user_auth), db:Session=Depends(get_db_dependency)):
return UserState(db, email)
return UserState(db, email)

View File

@@ -2,12 +2,13 @@ import asyncio
import json
import os
import shutil
from prometheus_client import Counter, Gauge
from app.web.db import crud
from app.shared.db.database import get_db
from app.shared.log import log_error
from app.shared.task_messaging import get_redis
from app.web.db import crud
# Custom metrics

View File

@@ -1,4 +1,5 @@
import base64
from fastapi.encoders import jsonable_encoder