mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-12 05:28:34 +03:00
Format and lint web directory (#67)
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
|
||||
from http import HTTPStatus
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
@@ -15,38 +15,50 @@ default_router = APIRouter()
|
||||
|
||||
|
||||
@default_router.get("/")
|
||||
async def home():
|
||||
return JSONResponse({"version": VERSION, "breakingChanges": BREAKING_CHANGES})
|
||||
async def home() -> JSONResponse:
|
||||
return JSONResponse(
|
||||
{"version": VERSION, "breakingChanges": BREAKING_CHANGES}
|
||||
)
|
||||
|
||||
|
||||
@default_router.get("/health")
|
||||
async def health():
|
||||
async def health() -> JSONResponse:
|
||||
return JSONResponse({"status": "ok"})
|
||||
|
||||
|
||||
@default_router.get("/user/active", summary="Check if the user is active and can use the tool.")
|
||||
@default_router.get(
|
||||
"/user/active", summary="Check if the user is active and can use the tool."
|
||||
)
|
||||
async def active(
|
||||
user: UserState = Depends(get_user_state),
|
||||
) -> ActiveUser:
|
||||
return {"active": user.active}
|
||||
return ActiveUser(active=user.active)
|
||||
|
||||
|
||||
@default_router.get("/user/permissions", summary="Get the user's global 'all' permissions and the permissions for each group they belong to.")
|
||||
@default_router.get(
|
||||
"/user/permissions",
|
||||
summary="Get the user's global 'all' permissions and the permissions for each group they belong to.",
|
||||
)
|
||||
def get_user_permissions(
|
||||
user: UserState = Depends(get_user_state),
|
||||
) -> Dict[str, GroupInfo]:
|
||||
return user.permissions
|
||||
|
||||
@default_router.get("/user/usage", summary="Get the user's monthly URLs/MBs usage along with the total active sheets, breakdown by group.")
|
||||
|
||||
@default_router.get(
|
||||
"/user/usage",
|
||||
summary="Get the user's monthly URLs/MBs usage along with the total active sheets, breakdown by group.",
|
||||
)
|
||||
def get_user_usage(
|
||||
user: UserState = Depends(get_user_state),
|
||||
) -> UsageResponse:
|
||||
if not user.active:
|
||||
raise HTTPException(status_code=403, detail="User is not active.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN, detail="User is not active."
|
||||
)
|
||||
return user.usage()
|
||||
|
||||
|
||||
|
||||
@default_router.get('/favicon.ico', include_in_schema=False)
|
||||
@default_router.get("/favicon.ico", include_in_schema=False)
|
||||
async def favicon() -> FileResponse:
|
||||
return FileResponse("app/web/static/favicon.ico")
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
from http import HTTPStatus
|
||||
|
||||
import sqlalchemy
|
||||
from auto_archiver.core import Metadata
|
||||
@@ -16,26 +17,39 @@ from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.security import token_api_key_auth
|
||||
|
||||
|
||||
interoperability_router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])
|
||||
interoperability_router = APIRouter(
|
||||
prefix="/interop", tags=["Interoperability endpoints."]
|
||||
)
|
||||
|
||||
|
||||
# ----- endpoint to submit data archived elsewhere
|
||||
@interoperability_router.post("/submit-archive", status_code=201, summary="Submit a manual archive entry, for data that was archived elsewhere.")
|
||||
@interoperability_router.post(
|
||||
"/submit-archive",
|
||||
status_code=HTTPStatus.CREATED,
|
||||
summary="Submit a manual archive entry, for data that was archived elsewhere.",
|
||||
)
|
||||
def submit_manual_archive(
|
||||
manual: schemas.SubmitManualArchive,
|
||||
auth=Depends(token_api_key_auth),
|
||||
db: Session = Depends(get_db_dependency)
|
||||
db: Session = Depends(get_db_dependency),
|
||||
):
|
||||
try:
|
||||
result: Metadata = Metadata.from_json(manual.result)
|
||||
except json.JSONDecodeError as e:
|
||||
log_error(e)
|
||||
raise HTTPException(status_code=422, detail="Invalid JSON in result field.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
|
||||
detail="Invalid JSON in result field.",
|
||||
) from e
|
||||
manual.author_id = manual.author_id or ALLOW_ANY_EMAIL
|
||||
manual.tags.add("manual")
|
||||
|
||||
store_until = business_logic.get_store_archive_until_or_never(db, manual.group_id)
|
||||
logger.debug(f"[MANUAL ARCHIVE] {manual.author_id} {manual.url} {store_until}")
|
||||
store_until = business_logic.get_store_archive_until_or_never(
|
||||
db, manual.group_id
|
||||
)
|
||||
logger.debug(
|
||||
f"[MANUAL ARCHIVE] {manual.author_id} {manual.url} {store_until}"
|
||||
)
|
||||
|
||||
try:
|
||||
archive = schemas.ArchiveCreate(
|
||||
@@ -51,8 +65,15 @@ def submit_manual_archive(
|
||||
)
|
||||
|
||||
db_archive = worker_crud.store_archived_url(db, archive)
|
||||
logger.debug(f"[MANUAL ARCHIVE STORED] {db_archive.author_id} {db_archive.url}")
|
||||
return JSONResponse({"id": db_archive.id}, status_code=201)
|
||||
logger.debug(
|
||||
f"[MANUAL ARCHIVE STORED] {db_archive.author_id} {db_archive.url}"
|
||||
)
|
||||
return JSONResponse(
|
||||
{"id": db_archive.id}, status_code=HTTPStatus.CREATED
|
||||
)
|
||||
except sqlalchemy.exc.IntegrityError as e:
|
||||
log_error(e)
|
||||
raise HTTPException(status_code=422, detail=f"Cannot insert into DB due to integrity error, likely duplicate urls.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
|
||||
detail="Cannot insert into DB due to integrity error, likely duplicate urls.",
|
||||
) from e
|
||||
|
||||
@@ -1,81 +1,134 @@
|
||||
from http import HTTPStatus
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from sqlalchemy import exc
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared import schemas
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.schemas import (
|
||||
DeleteResponse,
|
||||
SheetAdd,
|
||||
SheetResponse,
|
||||
SubmitSheet,
|
||||
)
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
|
||||
|
||||
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
|
||||
sheet_router = APIRouter(
|
||||
prefix="/sheet", tags=["Google Spreadsheet operations"]
|
||||
)
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@sheet_router.post("/create", status_code=201, summary="Store a new Google Sheet for regular archiving.")
|
||||
|
||||
@sheet_router.post(
|
||||
"/create",
|
||||
status_code=HTTPStatus.CREATED,
|
||||
summary="Store a new Google Sheet for regular archiving.",
|
||||
)
|
||||
def create_sheet(
|
||||
sheet: schemas.SheetAdd,
|
||||
sheet: SheetAdd,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> schemas.SheetResponse:
|
||||
|
||||
) -> SheetResponse:
|
||||
if not user.in_group(sheet.group_id):
|
||||
raise HTTPException(status_code=403, detail="User does not have access to this group.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN,
|
||||
detail="User does not have access to this group.",
|
||||
)
|
||||
|
||||
if not user.has_quota_monthly_sheets(sheet.group_id):
|
||||
raise HTTPException(status_code=429, detail="User has reached their sheet quota for this group.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.TOO_MANY_REQUESTS,
|
||||
detail="User has reached their sheet quota for this group.",
|
||||
)
|
||||
|
||||
if not user.is_sheet_frequency_allowed(sheet.group_id, sheet.frequency):
|
||||
raise HTTPException(status_code=422, detail="Invalid frequency selected for this group.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
|
||||
detail="Invalid frequency selected for this group.",
|
||||
)
|
||||
|
||||
try:
|
||||
return crud.create_sheet(db, sheet.id, sheet.name, user.email, sheet.group_id, sheet.frequency)
|
||||
return crud.create_sheet(
|
||||
db,
|
||||
sheet.id,
|
||||
sheet.name,
|
||||
user.email,
|
||||
sheet.group_id,
|
||||
sheet.frequency,
|
||||
)
|
||||
except exc.IntegrityError as e:
|
||||
raise HTTPException(status_code=400, detail="Sheet with this ID is already being archived.") from e
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.BAD_REQUEST,
|
||||
detail="Sheet with this ID is already being archived.",
|
||||
) from e
|
||||
|
||||
|
||||
@sheet_router.get("/mine", status_code=200, summary="Get the authenticated user's Google Sheets.")
|
||||
@sheet_router.get(
|
||||
"/mine",
|
||||
status_code=HTTPStatus.OK,
|
||||
summary="Get the authenticated user's Google Sheets.",
|
||||
)
|
||||
def get_user_sheets(
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency)
|
||||
) -> list[schemas.SheetResponse]:
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> list[SheetResponse]:
|
||||
return crud.get_user_sheets(db, user.email)
|
||||
|
||||
|
||||
@sheet_router.delete("/{id}", summary="Delete a Google Sheet by ID.")
|
||||
@sheet_router.delete("/{sheet_id}", summary="Delete a Google Sheet by ID.")
|
||||
def delete_sheet(
|
||||
id: str,
|
||||
sheet_id: str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> schemas.DeleteResponse:
|
||||
return JSONResponse({
|
||||
"id": id,
|
||||
"deleted": crud.delete_sheet(db, id, user.email)
|
||||
})
|
||||
) -> DeleteResponse:
|
||||
return DeleteResponse(
|
||||
id=sheet_id, deleted=crud.delete_sheet(db, sheet_id, user.email)
|
||||
)
|
||||
|
||||
|
||||
@sheet_router.post("/{id}/archive", status_code=201, summary="Trigger an archiving task for a GSheet you own.", response_description="task_id for the archiving task.")
|
||||
@sheet_router.post(
|
||||
"/{sheet_id}/archive",
|
||||
status_code=HTTPStatus.CREATED,
|
||||
summary="Trigger an archiving task for a GSheet you own.",
|
||||
response_description="task_id for the archiving task.",
|
||||
)
|
||||
def archive_user_sheet(
|
||||
id: str,
|
||||
sheet_id: str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> schemas.Task:
|
||||
|
||||
sheet = crud.get_user_sheet(db, user.email, sheet_id=id)
|
||||
) -> JSONResponse:
|
||||
sheet = crud.get_user_sheet(db, user.email, sheet_id=sheet_id)
|
||||
if not sheet:
|
||||
raise HTTPException(status_code=403, detail="No access to this sheet.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN, detail="No access to this sheet."
|
||||
)
|
||||
|
||||
if not user.in_group(sheet.group_id):
|
||||
raise HTTPException(status_code=403, detail="User does not have access to this group.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN,
|
||||
detail="User does not have access to this group.",
|
||||
)
|
||||
|
||||
if not user.can_manually_trigger(sheet.group_id):
|
||||
raise HTTPException(status_code=429, detail="User cannot manually trigger sheet archiving in this group.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.TOO_MANY_REQUESTS,
|
||||
detail="User cannot manually trigger sheet archiving in this group.",
|
||||
)
|
||||
|
||||
group_queue = user.priority_group(sheet.group_id)
|
||||
task = celery.signature("create_sheet_task", args=[schemas.SubmitSheet(sheet_id=id, author_id=user.email, group_id=sheet.group_id).model_dump_json()]).apply_async(**group_queue)
|
||||
task = celery.signature(
|
||||
"create_sheet_task",
|
||||
args=[
|
||||
SubmitSheet(
|
||||
sheet_id=sheet_id, author_id=user.email, group_id=sheet.group_id
|
||||
).model_dump_json()
|
||||
],
|
||||
).apply_async(**group_queue)
|
||||
|
||||
return JSONResponse({"id": task.id}, status_code=201)
|
||||
return JSONResponse({"id": task.id}, status_code=HTTPStatus.CREATED)
|
||||
|
||||
@@ -14,8 +14,14 @@ task_router = APIRouter(prefix="/task", tags=["Async task operations"])
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@task_router.get("/{task_id}", summary="Check the status of an async task by its id, works for URLs and Sheet tasks.")
|
||||
def get_status(task_id, email=Depends(get_token_or_user_auth)) -> schemas.TaskResult:
|
||||
|
||||
@task_router.get(
|
||||
"/{task_id}",
|
||||
summary="Check the status of an async task by its id, works for URLs and Sheet tasks.",
|
||||
)
|
||||
def get_status(
|
||||
task_id, email=Depends(get_token_or_user_auth)
|
||||
) -> schemas.TaskResult:
|
||||
task = AsyncResult(task_id, app=celery)
|
||||
try:
|
||||
if task.status == "FAILURE":
|
||||
@@ -24,17 +30,17 @@ def get_status(task_id, email=Depends(get_token_or_user_auth)) -> schemas.TaskRe
|
||||
# https://docs.celeryq.dev/en/stable/_modules/celery/result.html#AsyncResult
|
||||
raise task.result
|
||||
|
||||
response = {
|
||||
"id": task_id,
|
||||
"status": task.status,
|
||||
"result": task.result
|
||||
}
|
||||
return JSONResponse(jsonable_encoder(response, exclude_unset=True, custom_encoder={bytes: custom_jsonable_encoder}))
|
||||
response = {"id": task_id, "status": task.status, "result": task.result}
|
||||
return JSONResponse(
|
||||
jsonable_encoder(
|
||||
response,
|
||||
exclude_unset=True,
|
||||
custom_encoder={bytes: custom_jsonable_encoder},
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log_error(e)
|
||||
return JSONResponse({
|
||||
"id": task_id,
|
||||
"status": "FAILURE",
|
||||
"result": {"error": str(e)}
|
||||
})
|
||||
return JSONResponse(
|
||||
{"id": task_id, "status": "FAILURE", "result": {"error": str(e)}}
|
||||
)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
from datetime import datetime
|
||||
from http import HTTPStatus
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
@@ -9,6 +9,7 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared import schemas
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.schemas import DeleteResponse
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db import crud
|
||||
@@ -21,65 +22,106 @@ url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
@url_router.post("/archive", status_code=201, summary="Submit a single URL archive request, starts an archiving task.", response_description="task_id for the archiving task, will match the archive id.")
|
||||
|
||||
@url_router.post(
|
||||
"/archive",
|
||||
status_code=HTTPStatus.CREATED,
|
||||
summary="Submit a single URL archive request, starts an archiving task.",
|
||||
response_description="task_id for the archiving task, will match the archive id.",
|
||||
)
|
||||
def archive_url(
|
||||
archive: schemas.ArchiveTrigger,
|
||||
email=Depends(get_token_or_user_auth),
|
||||
db: Session = Depends(get_db_dependency)
|
||||
) -> schemas.Task:
|
||||
logger.info(f"new {archive.public=} task for {email=} and {archive.group_id=}: {archive.url}")
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> JSONResponse:
|
||||
logger.info(
|
||||
f"new {archive.public=} task for {email=} and {archive.group_id=}: {archive.url}"
|
||||
)
|
||||
|
||||
parsed_url = urlparse(archive.url)
|
||||
if not all([parsed_url.scheme, parsed_url.netloc]):
|
||||
raise HTTPException(status_code=400, detail="Invalid URL received.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.BAD_REQUEST, detail="Invalid URL received."
|
||||
)
|
||||
|
||||
archive_create = schemas.ArchiveCreate(**archive.model_dump())
|
||||
if email != ALLOW_ANY_EMAIL:
|
||||
archive_create.author_id = email
|
||||
user = UserState(db, email)
|
||||
if archive.group_id and not user.in_group(archive.group_id):
|
||||
raise HTTPException(status_code=403, detail="User does not have access to this group.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN,
|
||||
detail="User does not have access to this group.",
|
||||
)
|
||||
if not user.has_quota_max_monthly_urls(archive.group_id):
|
||||
raise HTTPException(status_code=429, detail="User has reached their monthly URL quota.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.TOO_MANY_REQUESTS,
|
||||
detail="User has reached their monthly URL quota.",
|
||||
)
|
||||
if not user.has_quota_max_monthly_mbs(archive.group_id):
|
||||
raise HTTPException(status_code=429, detail="User has reached their monthly MB quota.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.TOO_MANY_REQUESTS,
|
||||
detail="User has reached their monthly MB quota.",
|
||||
)
|
||||
group_queue = user.priority_group(archive_create.group_id)
|
||||
else:
|
||||
archive_create.author_id = archive.author_id or email
|
||||
group_queue = convert_priority_to_queue_dict("high")
|
||||
|
||||
|
||||
task = celery.signature("create_archive_task", args=[archive_create.model_dump_json()]).apply_async(**group_queue)
|
||||
task = celery.signature(
|
||||
"create_archive_task", args=[archive_create.model_dump_json()]
|
||||
).apply_async(**group_queue)
|
||||
task_response = schemas.Task(id=task.id)
|
||||
return JSONResponse(task_response.model_dump(), status_code=201)
|
||||
return JSONResponse(
|
||||
task_response.model_dump(), status_code=HTTPStatus.CREATED
|
||||
)
|
||||
|
||||
|
||||
@url_router.get("/search", summary="Search for archive entries by URL.")
|
||||
def search_by_url(
|
||||
url: str, skip: int = 0, limit: int = 25,
|
||||
archived_after: datetime = None, archived_before: datetime = None,
|
||||
db: Session = Depends(get_db_dependency),
|
||||
email: str = Depends(get_token_or_user_auth)
|
||||
url: str,
|
||||
skip: int = 0,
|
||||
limit: int = 25,
|
||||
archived_after: datetime = None,
|
||||
archived_before: datetime = None,
|
||||
db: Session = Depends(get_db_dependency),
|
||||
email: str = Depends(get_token_or_user_auth),
|
||||
) -> list[schemas.ArchiveResult]:
|
||||
|
||||
read_groups, read_public = False, False
|
||||
if email != ALLOW_ANY_EMAIL:
|
||||
user = UserState(db, email)
|
||||
if not user.read and not user.read_public:
|
||||
raise HTTPException(status_code=403, detail="User does not have read access.")
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN,
|
||||
detail="User does not have read access.",
|
||||
)
|
||||
read_groups = user.read
|
||||
read_public = user.read_public
|
||||
return crud.search_archives_by_url(db, url.strip(), email, read_groups, read_public, skip=skip, limit=limit, archived_after=archived_after, archived_before=archived_before)
|
||||
return crud.search_archives_by_url(
|
||||
db,
|
||||
url.strip(),
|
||||
email,
|
||||
read_groups,
|
||||
read_public,
|
||||
skip=skip,
|
||||
limit=limit,
|
||||
archived_after=archived_after,
|
||||
archived_before=archived_before,
|
||||
)
|
||||
|
||||
|
||||
@url_router.delete("/{id}", summary="Delete a single URL archive by id.")
|
||||
@url_router.delete(
|
||||
"/{archive_id}", summary="Delete a single URL archive by id."
|
||||
)
|
||||
def delete_archive(
|
||||
id:str,
|
||||
archive_id: str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency)
|
||||
) -> schemas.DeleteResponse:
|
||||
logger.info(f"deleting url archive task {id} request by {user.email}")
|
||||
return JSONResponse({
|
||||
"id": id,
|
||||
"deleted": crud.soft_delete_archive(db, id, user.email)
|
||||
})
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> DeleteResponse:
|
||||
logger.info(
|
||||
f"deleting url archive task {archive_id} request by {user.email}"
|
||||
)
|
||||
return DeleteResponse(
|
||||
id=archive_id,
|
||||
deleted=crud.soft_delete_archive(db, archive_id, user.email),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user