mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-13 05:58:35 +03:00
Standardize router names (#70)
This commit is contained in:
0
app/web/routers/__init__.py
Normal file
0
app/web/routers/__init__.py
Normal file
64
app/web/routers/default.py
Normal file
64
app/web/routers/default.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from http import HTTPStatus
|
||||
from typing import Dict
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
|
||||
from app.shared.schemas import ActiveUser, UsageResponse
|
||||
from app.shared.user_groups import GroupInfo
|
||||
from app.web.config import BREAKING_CHANGES, VERSION
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def home() -> JSONResponse:
|
||||
return JSONResponse(
|
||||
{"version": VERSION, "breakingChanges": BREAKING_CHANGES}
|
||||
)
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health() -> JSONResponse:
|
||||
return JSONResponse({"status": "ok"})
|
||||
|
||||
|
||||
@router.get(
|
||||
"/user/active", summary="Check if the user is active and can use the tool."
|
||||
)
|
||||
async def active(
|
||||
user: UserState = Depends(get_user_state),
|
||||
) -> ActiveUser:
|
||||
return ActiveUser(active=user.active)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/user/permissions",
|
||||
summary="Get the user's global 'all' permissions and the permissions for each group they belong to.",
|
||||
)
|
||||
def get_user_permissions(
|
||||
user: UserState = Depends(get_user_state),
|
||||
) -> Dict[str, GroupInfo]:
|
||||
return user.permissions
|
||||
|
||||
|
||||
@router.get(
|
||||
"/user/usage",
|
||||
summary="Get the user's monthly URLs/MBs usage along with the total active sheets, breakdown by group.",
|
||||
)
|
||||
def get_user_usage(
|
||||
user: UserState = Depends(get_user_state),
|
||||
) -> UsageResponse:
|
||||
if not user.active:
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN, detail="User is not active."
|
||||
)
|
||||
return user.usage()
|
||||
|
||||
|
||||
@router.get("/favicon.ico", include_in_schema=False)
|
||||
async def favicon() -> FileResponse:
|
||||
return FileResponse("app/web/static/favicon.ico")
|
||||
77
app/web/routers/interoperability.py
Normal file
77
app/web/routers/interoperability.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import json
|
||||
from http import HTTPStatus
|
||||
|
||||
import sqlalchemy
|
||||
from auto_archiver.core import Metadata
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared import business_logic, schemas
|
||||
from app.shared.db import models, worker_crud
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.log import log_error
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.security import token_api_key_auth
|
||||
from app.web.utils.misc import get_all_urls
|
||||
|
||||
|
||||
router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])
|
||||
|
||||
|
||||
# ----- endpoint to submit data archived elsewhere
|
||||
@router.post(
|
||||
"/submit-archive",
|
||||
status_code=HTTPStatus.CREATED,
|
||||
summary="Submit a manual archive entry, for data that was archived elsewhere.",
|
||||
)
|
||||
def submit_manual_archive(
|
||||
manual: schemas.SubmitManualArchive,
|
||||
auth=Depends(token_api_key_auth),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
):
|
||||
try:
|
||||
result: Metadata = Metadata.from_json(manual.result)
|
||||
except json.JSONDecodeError as e:
|
||||
log_error(e)
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
|
||||
detail="Invalid JSON in result field.",
|
||||
) from e
|
||||
manual.author_id = manual.author_id or ALLOW_ANY_EMAIL
|
||||
manual.tags.add("manual")
|
||||
|
||||
store_until = business_logic.get_store_archive_until_or_never(
|
||||
db, manual.group_id
|
||||
)
|
||||
logger.debug(
|
||||
f"[MANUAL ARCHIVE] {manual.author_id} {manual.url} {store_until}"
|
||||
)
|
||||
|
||||
try:
|
||||
archive = schemas.ArchiveCreate(
|
||||
author_id=manual.author_id,
|
||||
url=result.get_url(),
|
||||
public=manual.public,
|
||||
group_id=manual.group_id,
|
||||
tags=manual.tags,
|
||||
id=models.generate_uuid(),
|
||||
result=json.loads(result.to_json()),
|
||||
urls=get_all_urls(result),
|
||||
store_until=store_until,
|
||||
)
|
||||
|
||||
db_archive = worker_crud.store_archived_url(db, archive)
|
||||
logger.debug(
|
||||
f"[MANUAL ARCHIVE STORED] {db_archive.author_id} {db_archive.url}"
|
||||
)
|
||||
return JSONResponse(
|
||||
{"id": db_archive.id}, status_code=HTTPStatus.CREATED
|
||||
)
|
||||
except sqlalchemy.exc.IntegrityError as e:
|
||||
log_error(e)
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
|
||||
detail="Cannot insert into DB due to integrity error, likely duplicate urls.",
|
||||
) from e
|
||||
132
app/web/routers/sheet.py
Normal file
132
app/web/routers/sheet.py
Normal file
@@ -0,0 +1,132 @@
|
||||
from http import HTTPStatus
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from sqlalchemy import exc
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.schemas import (
|
||||
DeleteResponse,
|
||||
SheetAdd,
|
||||
SheetResponse,
|
||||
SubmitSheet,
|
||||
)
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_user_state
|
||||
|
||||
|
||||
router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/create",
|
||||
status_code=HTTPStatus.CREATED,
|
||||
summary="Store a new Google Sheet for regular archiving.",
|
||||
)
|
||||
def create_sheet(
|
||||
sheet: SheetAdd,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> SheetResponse:
|
||||
if not user.in_group(sheet.group_id):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN,
|
||||
detail="User does not have access to this group.",
|
||||
)
|
||||
|
||||
if not user.has_quota_monthly_sheets(sheet.group_id):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.TOO_MANY_REQUESTS,
|
||||
detail="User has reached their sheet quota for this group.",
|
||||
)
|
||||
|
||||
if not user.is_sheet_frequency_allowed(sheet.group_id, sheet.frequency):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.UNPROCESSABLE_ENTITY,
|
||||
detail="Invalid frequency selected for this group.",
|
||||
)
|
||||
|
||||
try:
|
||||
return crud.create_sheet(
|
||||
db,
|
||||
sheet.id,
|
||||
sheet.name,
|
||||
user.email,
|
||||
sheet.group_id,
|
||||
sheet.frequency,
|
||||
)
|
||||
except exc.IntegrityError as e:
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.BAD_REQUEST,
|
||||
detail="Sheet with this ID is already being archived.",
|
||||
) from e
|
||||
|
||||
|
||||
@router.get(
|
||||
"/mine",
|
||||
status_code=HTTPStatus.OK,
|
||||
summary="Get the authenticated user's Google Sheets.",
|
||||
)
|
||||
def get_user_sheets(
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> list[SheetResponse]:
|
||||
return crud.get_user_sheets(db, user.email)
|
||||
|
||||
|
||||
@router.delete("/{sheet_id}", summary="Delete a Google Sheet by ID.")
|
||||
def delete_sheet(
|
||||
sheet_id: str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> DeleteResponse:
|
||||
return DeleteResponse(
|
||||
id=sheet_id, deleted=crud.delete_sheet(db, sheet_id, user.email)
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{sheet_id}/archive",
|
||||
status_code=HTTPStatus.CREATED,
|
||||
summary="Trigger an archiving task for a GSheet you own.",
|
||||
response_description="task_id for the archiving task.",
|
||||
)
|
||||
def archive_user_sheet(
|
||||
sheet_id: str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> JSONResponse:
|
||||
sheet = crud.get_user_sheet(db, user.email, sheet_id=sheet_id)
|
||||
if not sheet:
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN, detail="No access to this sheet."
|
||||
)
|
||||
|
||||
if not user.in_group(sheet.group_id):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN,
|
||||
detail="User does not have access to this group.",
|
||||
)
|
||||
|
||||
if not user.can_manually_trigger(sheet.group_id):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.TOO_MANY_REQUESTS,
|
||||
detail="User cannot manually trigger sheet archiving in this group.",
|
||||
)
|
||||
|
||||
group_queue = user.priority_group(sheet.group_id)
|
||||
task = celery.signature(
|
||||
"create_sheet_task",
|
||||
args=[
|
||||
SubmitSheet(
|
||||
sheet_id=sheet_id, author_id=user.email, group_id=sheet.group_id
|
||||
).model_dump_json()
|
||||
],
|
||||
).apply_async(**group_queue)
|
||||
|
||||
return JSONResponse({"id": task.id}, status_code=HTTPStatus.CREATED)
|
||||
52
app/web/routers/task.py
Normal file
52
app/web/routers/task.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from celery.result import AsyncResult
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from app.shared import schemas
|
||||
from app.shared.constants import STATUS_FAILURE
|
||||
from app.shared.log import log_error
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.security import get_token_or_user_auth
|
||||
from app.web.utils.misc import custom_jsonable_encoder
|
||||
|
||||
|
||||
router = APIRouter(prefix="/task", tags=["Async task operations"])
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{task_id}",
|
||||
summary="Check the status of an async task by its id, works for URLs and Sheet tasks.",
|
||||
)
|
||||
def get_status(
|
||||
task_id, email=Depends(get_token_or_user_auth)
|
||||
) -> schemas.TaskResult:
|
||||
task = AsyncResult(task_id, app=celery)
|
||||
try:
|
||||
if task.status == STATUS_FAILURE:
|
||||
# *FAILURE* The task raised an exception, or has exceeded the retry limit.
|
||||
# The :attr:`result` attribute then contains the exception raised by
|
||||
# the task.
|
||||
# https://docs.celeryq.dev/en/stable/_modules/celery/result.html#AsyncResult
|
||||
raise task.result
|
||||
|
||||
response = {"id": task_id, "status": task.status, "result": task.result}
|
||||
return JSONResponse(
|
||||
jsonable_encoder(
|
||||
response,
|
||||
exclude_unset=True,
|
||||
custom_encoder={bytes: custom_jsonable_encoder},
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
log_error(e)
|
||||
return JSONResponse(
|
||||
{
|
||||
"id": task_id,
|
||||
"status": STATUS_FAILURE,
|
||||
"result": {"error": str(e)},
|
||||
}
|
||||
)
|
||||
125
app/web/routers/url.py
Normal file
125
app/web/routers/url.py
Normal file
@@ -0,0 +1,125 @@
|
||||
from datetime import datetime
|
||||
from http import HTTPStatus
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.shared import schemas
|
||||
from app.shared.db.database import get_db_dependency
|
||||
from app.shared.schemas import DeleteResponse
|
||||
from app.shared.task_messaging import get_celery
|
||||
from app.web.config import ALLOW_ANY_EMAIL
|
||||
from app.web.db import crud
|
||||
from app.web.db.user_state import UserState
|
||||
from app.web.security import get_token_or_user_auth, get_user_state
|
||||
from app.web.utils.misc import convert_priority_to_queue_dict
|
||||
|
||||
|
||||
router = APIRouter(prefix="/url", tags=["Single URL operations"])
|
||||
|
||||
celery = get_celery()
|
||||
|
||||
|
||||
@router.post(
|
||||
"/archive",
|
||||
status_code=HTTPStatus.CREATED,
|
||||
summary="Submit a single URL archive request, starts an archiving task.",
|
||||
response_description="task_id for the archiving task, will match the archive id.",
|
||||
)
|
||||
def archive_url(
|
||||
archive: schemas.ArchiveTrigger,
|
||||
email=Depends(get_token_or_user_auth),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> JSONResponse:
|
||||
logger.info(
|
||||
f"new {archive.public=} task for {email=} and {archive.group_id=}: {archive.url}"
|
||||
)
|
||||
|
||||
parsed_url = urlparse(archive.url)
|
||||
if not all([parsed_url.scheme, parsed_url.netloc]):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.BAD_REQUEST, detail="Invalid URL received."
|
||||
)
|
||||
|
||||
archive_create = schemas.ArchiveCreate(**archive.model_dump())
|
||||
if email != ALLOW_ANY_EMAIL:
|
||||
archive_create.author_id = email
|
||||
user = UserState(db, email)
|
||||
if archive.group_id and not user.in_group(archive.group_id):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN,
|
||||
detail="User does not have access to this group.",
|
||||
)
|
||||
if not user.has_quota_max_monthly_urls(archive.group_id):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.TOO_MANY_REQUESTS,
|
||||
detail="User has reached their monthly URL quota.",
|
||||
)
|
||||
if not user.has_quota_max_monthly_mbs(archive.group_id):
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.TOO_MANY_REQUESTS,
|
||||
detail="User has reached their monthly MB quota.",
|
||||
)
|
||||
group_queue = user.priority_group(archive_create.group_id)
|
||||
else:
|
||||
archive_create.author_id = archive.author_id or email
|
||||
group_queue = convert_priority_to_queue_dict("high")
|
||||
|
||||
task = celery.signature(
|
||||
"create_archive_task", args=[archive_create.model_dump_json()]
|
||||
).apply_async(**group_queue)
|
||||
task_response = schemas.Task(id=task.id)
|
||||
return JSONResponse(
|
||||
task_response.model_dump(), status_code=HTTPStatus.CREATED
|
||||
)
|
||||
|
||||
|
||||
@router.get("/search", summary="Search for archive entries by URL.")
|
||||
def search_by_url(
|
||||
url: str,
|
||||
skip: int = 0,
|
||||
limit: int = 25,
|
||||
archived_after: datetime = None,
|
||||
archived_before: datetime = None,
|
||||
db: Session = Depends(get_db_dependency),
|
||||
email: str = Depends(get_token_or_user_auth),
|
||||
) -> list[schemas.ArchiveResult]:
|
||||
read_groups, read_public = False, False
|
||||
if email != ALLOW_ANY_EMAIL:
|
||||
user = UserState(db, email)
|
||||
if not user.read and not user.read_public:
|
||||
raise HTTPException(
|
||||
status_code=HTTPStatus.FORBIDDEN,
|
||||
detail="User does not have read access.",
|
||||
)
|
||||
read_groups = user.read
|
||||
read_public = user.read_public
|
||||
return crud.search_archives_by_url(
|
||||
db,
|
||||
url.strip(),
|
||||
email,
|
||||
read_groups,
|
||||
read_public,
|
||||
skip=skip,
|
||||
limit=limit,
|
||||
archived_after=archived_after,
|
||||
archived_before=archived_before,
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{archive_id}", summary="Delete a single URL archive by id.")
|
||||
def delete_archive(
|
||||
archive_id: str,
|
||||
user: UserState = Depends(get_user_state),
|
||||
db: Session = Depends(get_db_dependency),
|
||||
) -> DeleteResponse:
|
||||
logger.info(
|
||||
f"deleting url archive task {archive_id} request by {user.email}"
|
||||
)
|
||||
return DeleteResponse(
|
||||
id=archive_id,
|
||||
deleted=crud.soft_delete_archive(db, archive_id, user.email),
|
||||
)
|
||||
Reference in New Issue
Block a user