Format and lint web directory (#67)

This commit is contained in:
Michael Plunkett
2025-03-10 12:45:19 -05:00
committed by GitHub
parent 1ca0ae2fb2
commit b50ca91d89
20 changed files with 761 additions and 309 deletions

View File

@@ -1,4 +1,4 @@
from http import HTTPStatus
from typing import Dict
from fastapi import APIRouter, Depends, HTTPException
@@ -15,38 +15,50 @@ default_router = APIRouter()
@default_router.get("/")
async def home():
return JSONResponse({"version": VERSION, "breakingChanges": BREAKING_CHANGES})
async def home() -> JSONResponse:
return JSONResponse(
{"version": VERSION, "breakingChanges": BREAKING_CHANGES}
)
@default_router.get("/health")
async def health():
async def health() -> JSONResponse:
return JSONResponse({"status": "ok"})
@default_router.get("/user/active", summary="Check if the user is active and can use the tool.")
@default_router.get(
"/user/active", summary="Check if the user is active and can use the tool."
)
async def active(
user: UserState = Depends(get_user_state),
) -> ActiveUser:
return {"active": user.active}
return ActiveUser(active=user.active)
@default_router.get("/user/permissions", summary="Get the user's global 'all' permissions and the permissions for each group they belong to.")
@default_router.get(
"/user/permissions",
summary="Get the user's global 'all' permissions and the permissions for each group they belong to.",
)
def get_user_permissions(
user: UserState = Depends(get_user_state),
) -> Dict[str, GroupInfo]:
return user.permissions
@default_router.get("/user/usage", summary="Get the user's monthly URLs/MBs usage along with the total active sheets, breakdown by group.")
@default_router.get(
"/user/usage",
summary="Get the user's monthly URLs/MBs usage along with the total active sheets, breakdown by group.",
)
def get_user_usage(
user: UserState = Depends(get_user_state),
) -> UsageResponse:
if not user.active:
raise HTTPException(status_code=403, detail="User is not active.")
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN, detail="User is not active."
)
return user.usage()
@default_router.get('/favicon.ico', include_in_schema=False)
@default_router.get("/favicon.ico", include_in_schema=False)
async def favicon() -> FileResponse:
return FileResponse("app/web/static/favicon.ico")

View File

@@ -1,4 +1,5 @@
import json
from http import HTTPStatus
import sqlalchemy
from auto_archiver.core import Metadata
@@ -16,26 +17,39 @@ from app.web.config import ALLOW_ANY_EMAIL
from app.web.security import token_api_key_auth
interoperability_router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])
interoperability_router = APIRouter(
prefix="/interop", tags=["Interoperability endpoints."]
)
# ----- endpoint to submit data archived elsewhere
@interoperability_router.post("/submit-archive", status_code=201, summary="Submit a manual archive entry, for data that was archived elsewhere.")
@interoperability_router.post(
"/submit-archive",
status_code=HTTPStatus.CREATED,
summary="Submit a manual archive entry, for data that was archived elsewhere.",
)
def submit_manual_archive(
manual: schemas.SubmitManualArchive,
auth=Depends(token_api_key_auth),
db: Session = Depends(get_db_dependency)
db: Session = Depends(get_db_dependency),
):
try:
result: Metadata = Metadata.from_json(manual.result)
except json.JSONDecodeError as e:
log_error(e)
raise HTTPException(status_code=422, detail="Invalid JSON in result field.")
raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
detail="Invalid JSON in result field.",
) from e
manual.author_id = manual.author_id or ALLOW_ANY_EMAIL
manual.tags.add("manual")
store_until = business_logic.get_store_archive_until_or_never(db, manual.group_id)
logger.debug(f"[MANUAL ARCHIVE] {manual.author_id} {manual.url} {store_until}")
store_until = business_logic.get_store_archive_until_or_never(
db, manual.group_id
)
logger.debug(
f"[MANUAL ARCHIVE] {manual.author_id} {manual.url} {store_until}"
)
try:
archive = schemas.ArchiveCreate(
@@ -51,8 +65,15 @@ def submit_manual_archive(
)
db_archive = worker_crud.store_archived_url(db, archive)
logger.debug(f"[MANUAL ARCHIVE STORED] {db_archive.author_id} {db_archive.url}")
return JSONResponse({"id": db_archive.id}, status_code=201)
logger.debug(
f"[MANUAL ARCHIVE STORED] {db_archive.author_id} {db_archive.url}"
)
return JSONResponse(
{"id": db_archive.id}, status_code=HTTPStatus.CREATED
)
except sqlalchemy.exc.IntegrityError as e:
log_error(e)
raise HTTPException(status_code=422, detail=f"Cannot insert into DB due to integrity error, likely duplicate urls.")
raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
detail="Cannot insert into DB due to integrity error, likely duplicate urls.",
) from e

View File

@@ -1,81 +1,134 @@
from http import HTTPStatus
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import JSONResponse
from sqlalchemy import exc
from sqlalchemy.orm import Session
from app.shared import schemas
from app.shared.db.database import get_db_dependency
from app.shared.schemas import (
DeleteResponse,
SheetAdd,
SheetResponse,
SubmitSheet,
)
from app.shared.task_messaging import get_celery
from app.web.db import crud
from app.web.db.user_state import UserState
from app.web.security import get_user_state
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
sheet_router = APIRouter(
prefix="/sheet", tags=["Google Spreadsheet operations"]
)
celery = get_celery()
@sheet_router.post("/create", status_code=201, summary="Store a new Google Sheet for regular archiving.")
@sheet_router.post(
"/create",
status_code=HTTPStatus.CREATED,
summary="Store a new Google Sheet for regular archiving.",
)
def create_sheet(
sheet: schemas.SheetAdd,
sheet: SheetAdd,
user: UserState = Depends(get_user_state),
db: Session = Depends(get_db_dependency),
) -> schemas.SheetResponse:
) -> SheetResponse:
if not user.in_group(sheet.group_id):
raise HTTPException(status_code=403, detail="User does not have access to this group.")
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="User does not have access to this group.",
)
if not user.has_quota_monthly_sheets(sheet.group_id):
raise HTTPException(status_code=429, detail="User has reached their sheet quota for this group.")
raise HTTPException(
status_code=HTTPStatus.TOO_MANY_REQUESTS,
detail="User has reached their sheet quota for this group.",
)
if not user.is_sheet_frequency_allowed(sheet.group_id, sheet.frequency):
raise HTTPException(status_code=422, detail="Invalid frequency selected for this group.")
raise HTTPException(
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
detail="Invalid frequency selected for this group.",
)
try:
return crud.create_sheet(db, sheet.id, sheet.name, user.email, sheet.group_id, sheet.frequency)
return crud.create_sheet(
db,
sheet.id,
sheet.name,
user.email,
sheet.group_id,
sheet.frequency,
)
except exc.IntegrityError as e:
raise HTTPException(status_code=400, detail="Sheet with this ID is already being archived.") from e
raise HTTPException(
status_code=HTTPStatus.BAD_REQUEST,
detail="Sheet with this ID is already being archived.",
) from e
@sheet_router.get("/mine", status_code=200, summary="Get the authenticated user's Google Sheets.")
@sheet_router.get(
"/mine",
status_code=HTTPStatus.OK,
summary="Get the authenticated user's Google Sheets.",
)
def get_user_sheets(
user: UserState = Depends(get_user_state),
db: Session = Depends(get_db_dependency)
) -> list[schemas.SheetResponse]:
db: Session = Depends(get_db_dependency),
) -> list[SheetResponse]:
return crud.get_user_sheets(db, user.email)
@sheet_router.delete("/{id}", summary="Delete a Google Sheet by ID.")
@sheet_router.delete("/{sheet_id}", summary="Delete a Google Sheet by ID.")
def delete_sheet(
id: str,
sheet_id: str,
user: UserState = Depends(get_user_state),
db: Session = Depends(get_db_dependency),
) -> schemas.DeleteResponse:
return JSONResponse({
"id": id,
"deleted": crud.delete_sheet(db, id, user.email)
})
) -> DeleteResponse:
return DeleteResponse(
id=sheet_id, deleted=crud.delete_sheet(db, sheet_id, user.email)
)
@sheet_router.post("/{id}/archive", status_code=201, summary="Trigger an archiving task for a GSheet you own.", response_description="task_id for the archiving task.")
@sheet_router.post(
"/{sheet_id}/archive",
status_code=HTTPStatus.CREATED,
summary="Trigger an archiving task for a GSheet you own.",
response_description="task_id for the archiving task.",
)
def archive_user_sheet(
id: str,
sheet_id: str,
user: UserState = Depends(get_user_state),
db: Session = Depends(get_db_dependency),
) -> schemas.Task:
sheet = crud.get_user_sheet(db, user.email, sheet_id=id)
) -> JSONResponse:
sheet = crud.get_user_sheet(db, user.email, sheet_id=sheet_id)
if not sheet:
raise HTTPException(status_code=403, detail="No access to this sheet.")
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN, detail="No access to this sheet."
)
if not user.in_group(sheet.group_id):
raise HTTPException(status_code=403, detail="User does not have access to this group.")
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="User does not have access to this group.",
)
if not user.can_manually_trigger(sheet.group_id):
raise HTTPException(status_code=429, detail="User cannot manually trigger sheet archiving in this group.")
raise HTTPException(
status_code=HTTPStatus.TOO_MANY_REQUESTS,
detail="User cannot manually trigger sheet archiving in this group.",
)
group_queue = user.priority_group(sheet.group_id)
task = celery.signature("create_sheet_task", args=[schemas.SubmitSheet(sheet_id=id, author_id=user.email, group_id=sheet.group_id).model_dump_json()]).apply_async(**group_queue)
task = celery.signature(
"create_sheet_task",
args=[
SubmitSheet(
sheet_id=sheet_id, author_id=user.email, group_id=sheet.group_id
).model_dump_json()
],
).apply_async(**group_queue)
return JSONResponse({"id": task.id}, status_code=201)
return JSONResponse({"id": task.id}, status_code=HTTPStatus.CREATED)

View File

@@ -14,8 +14,14 @@ task_router = APIRouter(prefix="/task", tags=["Async task operations"])
celery = get_celery()
@task_router.get("/{task_id}", summary="Check the status of an async task by its id, works for URLs and Sheet tasks.")
def get_status(task_id, email=Depends(get_token_or_user_auth)) -> schemas.TaskResult:
@task_router.get(
"/{task_id}",
summary="Check the status of an async task by its id, works for URLs and Sheet tasks.",
)
def get_status(
task_id, email=Depends(get_token_or_user_auth)
) -> schemas.TaskResult:
task = AsyncResult(task_id, app=celery)
try:
if task.status == "FAILURE":
@@ -24,17 +30,17 @@ def get_status(task_id, email=Depends(get_token_or_user_auth)) -> schemas.TaskRe
# https://docs.celeryq.dev/en/stable/_modules/celery/result.html#AsyncResult
raise task.result
response = {
"id": task_id,
"status": task.status,
"result": task.result
}
return JSONResponse(jsonable_encoder(response, exclude_unset=True, custom_encoder={bytes: custom_jsonable_encoder}))
response = {"id": task_id, "status": task.status, "result": task.result}
return JSONResponse(
jsonable_encoder(
response,
exclude_unset=True,
custom_encoder={bytes: custom_jsonable_encoder},
)
)
except Exception as e:
log_error(e)
return JSONResponse({
"id": task_id,
"status": "FAILURE",
"result": {"error": str(e)}
})
return JSONResponse(
{"id": task_id, "status": "FAILURE", "result": {"error": str(e)}}
)

View File

@@ -1,5 +1,5 @@
from datetime import datetime
from http import HTTPStatus
from urllib.parse import urlparse
from fastapi import APIRouter, Depends, HTTPException
@@ -9,6 +9,7 @@ from sqlalchemy.orm import Session
from app.shared import schemas
from app.shared.db.database import get_db_dependency
from app.shared.schemas import DeleteResponse
from app.shared.task_messaging import get_celery
from app.web.config import ALLOW_ANY_EMAIL
from app.web.db import crud
@@ -21,65 +22,106 @@ url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
celery = get_celery()
@url_router.post("/archive", status_code=201, summary="Submit a single URL archive request, starts an archiving task.", response_description="task_id for the archiving task, will match the archive id.")
@url_router.post(
"/archive",
status_code=HTTPStatus.CREATED,
summary="Submit a single URL archive request, starts an archiving task.",
response_description="task_id for the archiving task, will match the archive id.",
)
def archive_url(
archive: schemas.ArchiveTrigger,
email=Depends(get_token_or_user_auth),
db: Session = Depends(get_db_dependency)
) -> schemas.Task:
logger.info(f"new {archive.public=} task for {email=} and {archive.group_id=}: {archive.url}")
db: Session = Depends(get_db_dependency),
) -> JSONResponse:
logger.info(
f"new {archive.public=} task for {email=} and {archive.group_id=}: {archive.url}"
)
parsed_url = urlparse(archive.url)
if not all([parsed_url.scheme, parsed_url.netloc]):
raise HTTPException(status_code=400, detail="Invalid URL received.")
raise HTTPException(
status_code=HTTPStatus.BAD_REQUEST, detail="Invalid URL received."
)
archive_create = schemas.ArchiveCreate(**archive.model_dump())
if email != ALLOW_ANY_EMAIL:
archive_create.author_id = email
user = UserState(db, email)
if archive.group_id and not user.in_group(archive.group_id):
raise HTTPException(status_code=403, detail="User does not have access to this group.")
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="User does not have access to this group.",
)
if not user.has_quota_max_monthly_urls(archive.group_id):
raise HTTPException(status_code=429, detail="User has reached their monthly URL quota.")
raise HTTPException(
status_code=HTTPStatus.TOO_MANY_REQUESTS,
detail="User has reached their monthly URL quota.",
)
if not user.has_quota_max_monthly_mbs(archive.group_id):
raise HTTPException(status_code=429, detail="User has reached their monthly MB quota.")
raise HTTPException(
status_code=HTTPStatus.TOO_MANY_REQUESTS,
detail="User has reached their monthly MB quota.",
)
group_queue = user.priority_group(archive_create.group_id)
else:
archive_create.author_id = archive.author_id or email
group_queue = convert_priority_to_queue_dict("high")
task = celery.signature("create_archive_task", args=[archive_create.model_dump_json()]).apply_async(**group_queue)
task = celery.signature(
"create_archive_task", args=[archive_create.model_dump_json()]
).apply_async(**group_queue)
task_response = schemas.Task(id=task.id)
return JSONResponse(task_response.model_dump(), status_code=201)
return JSONResponse(
task_response.model_dump(), status_code=HTTPStatus.CREATED
)
@url_router.get("/search", summary="Search for archive entries by URL.")
def search_by_url(
url: str, skip: int = 0, limit: int = 25,
archived_after: datetime = None, archived_before: datetime = None,
db: Session = Depends(get_db_dependency),
email: str = Depends(get_token_or_user_auth)
url: str,
skip: int = 0,
limit: int = 25,
archived_after: datetime = None,
archived_before: datetime = None,
db: Session = Depends(get_db_dependency),
email: str = Depends(get_token_or_user_auth),
) -> list[schemas.ArchiveResult]:
read_groups, read_public = False, False
if email != ALLOW_ANY_EMAIL:
user = UserState(db, email)
if not user.read and not user.read_public:
raise HTTPException(status_code=403, detail="User does not have read access.")
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN,
detail="User does not have read access.",
)
read_groups = user.read
read_public = user.read_public
return crud.search_archives_by_url(db, url.strip(), email, read_groups, read_public, skip=skip, limit=limit, archived_after=archived_after, archived_before=archived_before)
return crud.search_archives_by_url(
db,
url.strip(),
email,
read_groups,
read_public,
skip=skip,
limit=limit,
archived_after=archived_after,
archived_before=archived_before,
)
@url_router.delete("/{id}", summary="Delete a single URL archive by id.")
@url_router.delete(
"/{archive_id}", summary="Delete a single URL archive by id."
)
def delete_archive(
id:str,
archive_id: str,
user: UserState = Depends(get_user_state),
db: Session = Depends(get_db_dependency)
) -> schemas.DeleteResponse:
logger.info(f"deleting url archive task {id} request by {user.email}")
return JSONResponse({
"id": id,
"deleted": crud.soft_delete_archive(db, id, user.email)
})
db: Session = Depends(get_db_dependency),
) -> DeleteResponse:
logger.info(
f"deleting url archive task {archive_id} request by {user.email}"
)
return DeleteResponse(
id=archive_id,
deleted=crud.soft_delete_archive(db, archive_id, user.email),
)