mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-11 13:08:34 +03:00
isolating api methods, introducing first tests
This commit is contained in:
5
src/endpoints/__init__.py
Normal file
5
src/endpoints/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from endpoints.default import default_router
|
||||
from endpoints.url import url_router
|
||||
from endpoints.task import task_router
|
||||
from endpoints.interoperability import interoperability_router
|
||||
from endpoints.sheet import sheet_router
|
||||
36
src/endpoints/default.py
Normal file
36
src/endpoints/default.py
Normal file
@@ -0,0 +1,36 @@
|
||||
|
||||
from fastapi import APIRouter, Depends, Request, HTTPException
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.config import VERSION, BREAKING_CHANGES
|
||||
from db import crud
|
||||
from db.database import get_db
|
||||
from security import get_user_auth, bearer_security
|
||||
|
||||
|
||||
default_router = APIRouter()
|
||||
|
||||
|
||||
@default_router.get("/")
|
||||
async def home(request: Request):
|
||||
# TODO: maybe split into 2 routes: one non authenticated and one authenticated for the groups info only
|
||||
status = {"version": VERSION, "breakingChanges": BREAKING_CHANGES}
|
||||
try:
|
||||
email = await get_user_auth(await bearer_security(request))
|
||||
db: Session = next(get_db())
|
||||
status["groups"] = crud.get_user_groups(db, email)
|
||||
except HTTPException: pass # not authenticated is fine
|
||||
except Exception as e: logger.error(e)
|
||||
return JSONResponse(status)
|
||||
|
||||
|
||||
@default_router.get("/groups", response_model=list[str])
|
||||
def get_user_groups(db: Session = Depends(get_db), email=Depends(get_user_auth)):
|
||||
return crud.get_user_groups(db, email)
|
||||
|
||||
|
||||
@default_router.get('/favicon.ico', include_in_schema=False)
|
||||
async def favicon():
|
||||
return FileResponse("static/favicon.ico")
|
||||
26
src/endpoints/interoperability.py
Normal file
26
src/endpoints/interoperability.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from auto_archiver import Metadata
|
||||
from loguru import logger
|
||||
import sqlalchemy
|
||||
|
||||
from security import token_api_key_auth
|
||||
from db import models, schemas
|
||||
from worker import insert_result_into_db
|
||||
|
||||
|
||||
interoperability_router = APIRouter(prefix="/interop", tags=["Interoperability endpoints."])
|
||||
|
||||
|
||||
# ----- endpoint to submit data archived elsewhere
|
||||
@interoperability_router.post("/submit-archive", status_code=201, summary="Submit a manual archive entry, for data that was archived elsewhere.")
|
||||
def submit_manual_archive(manual: schemas.SubmitManual, auth=Depends(token_api_key_auth)):
|
||||
result = Metadata.from_json(manual.result)
|
||||
logger.info(f"MANUAL SUBMIT {result.get_url()} {manual.author_id}")
|
||||
manual.tags.add("manual")
|
||||
try:
|
||||
archive_id = insert_result_into_db(result, manual.tags, manual.public, manual.group_id, manual.author_id, models.generate_uuid())
|
||||
except sqlalchemy.exc.IntegrityError as e:
|
||||
logger.error(e)
|
||||
raise HTTPException(status_code=422, detail=f"Cannot insert into DB due to integrity error")
|
||||
return JSONResponse({"id": archive_id})
|
||||
23
src/endpoints/sheet.py
Normal file
23
src/endpoints/sheet.py
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from security import ALLOW_ANY_EMAIL, get_token_or_user_auth
|
||||
from db import schemas
|
||||
from worker import create_sheet_task
|
||||
|
||||
sheet_router = APIRouter(prefix="/sheet", tags=["Google Spreadsheet operations"])
|
||||
|
||||
|
||||
@sheet_router.post("/archive", status_code=201, summary="Submit a Google Sheet archive request, starts a sheet archiving task.", response_model=schemas.Task, response_description="task_id for the archiving task.")
|
||||
def archive_sheet(sheet:schemas.SubmitSheet, email = Depends(get_token_or_user_auth)):
|
||||
logger.info(f"SHEET TASK for {sheet=}")
|
||||
if email == ALLOW_ANY_EMAIL:
|
||||
email = sheet.author_id or "api-endpoint"
|
||||
sheet.author_id = email
|
||||
if not sheet.sheet_name and not sheet.sheet_id:
|
||||
raise HTTPException(status_code=422, detail=f"sheet name or id is required")
|
||||
task = create_sheet_task.delay(sheet.model_dump_json())
|
||||
return JSONResponse({"id": task.id})
|
||||
47
src/endpoints/task.py
Normal file
47
src/endpoints/task.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import traceback
|
||||
from celery.result import AsyncResult
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from loguru import logger
|
||||
from security import get_token_or_user_auth
|
||||
|
||||
from db import schemas
|
||||
|
||||
from worker import celery
|
||||
|
||||
|
||||
task_router = APIRouter(prefix="/task", tags=["Async task operations"])
|
||||
|
||||
|
||||
@task_router.get("/{task_id}", response_model=schemas.TaskResult, summary="Check the status of an async task by its id, works for URLs and Sheet tasks.")
|
||||
def get_status(task_id, email=Depends(get_token_or_user_auth)):
|
||||
logger.info(f"status check for user {email} task {task_id}")
|
||||
task = AsyncResult(task_id, app=celery)
|
||||
try:
|
||||
if task.status == "FAILURE":
|
||||
# *FAILURE* The task raised an exception, or has exceeded the retry limit.
|
||||
# The :attr:`result` attribute then contains the exception raised by the task.
|
||||
# https://docs.celeryq.dev/en/stable/_modules/celery/result.html#AsyncResult
|
||||
raise task.result
|
||||
# TODO: refactor to use schema?
|
||||
# response = schemas.TaskResult(id=task_id, status=task.status, result=task.result)
|
||||
|
||||
response = {
|
||||
"id": task_id,
|
||||
"status": task.status,
|
||||
"result": task.result
|
||||
}
|
||||
return JSONResponse(jsonable_encoder(response, exclude_unset=True))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
logger.error(traceback.format_exc())
|
||||
# TODO: refactor to use schema?
|
||||
# response = schemas.TaskResult(id=task_id, status="FAILURE", result={"error": str(e)})
|
||||
return JSONResponse({
|
||||
"id": task_id,
|
||||
"status": "FAILURE",
|
||||
"result": {"error": str(e)}
|
||||
})
|
||||
57
src/endpoints/url.py
Normal file
57
src/endpoints/url.py
Normal file
@@ -0,0 +1,57 @@
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from datetime import datetime
|
||||
|
||||
from loguru import logger
|
||||
from security import get_user_auth, get_token_or_user_auth
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from db import crud, schemas
|
||||
from db.database import get_db
|
||||
|
||||
from worker import create_archive_task
|
||||
|
||||
url_router = APIRouter(prefix="/url", tags=["Single URL operations"])
|
||||
|
||||
|
||||
@url_router.post("/archive", status_code=201, summary="Submit a single URL archive request, starts an archiving task.", response_model=schemas.Task, response_description="task_id for the archiving task, will match the archive id.")
|
||||
def archive_url(archive: schemas.ArchiveCreate, email=Depends(get_token_or_user_auth)):
|
||||
archive.author_id = email
|
||||
url = archive.url
|
||||
logger.info(f"new {archive.public=} task for {email=} and {archive.group_id=}: {url}")
|
||||
if type(url) != str or len(url) <= 5:
|
||||
raise HTTPException(status_code=422, detail=f"Invalid URL received: {url}")
|
||||
logger.info("creating task")
|
||||
task = create_archive_task.delay(archive.model_dump_json())
|
||||
task_response = schemas.Task(id=task.id)
|
||||
return JSONResponse(task_response.model_dump())
|
||||
|
||||
|
||||
@url_router.get("/search", response_model=list[schemas.Archive], summary="Search for archive entries by URL.")
|
||||
def search_by_url(
|
||||
url: str, skip: int = 0, limit: int = 25,
|
||||
archived_after: datetime = None, archived_before: datetime = None,
|
||||
db: Session = Depends(get_db),
|
||||
email=Depends(get_token_or_user_auth)):
|
||||
return crud.search_archives_by_url(db, url.strip(), email, skip=skip, limit=limit, archived_after=archived_after, archived_before=archived_before)
|
||||
|
||||
|
||||
@url_router.get("/latest", response_model=list[schemas.Archive], summary="Fetch latest URL archives for the authenticated user.")
|
||||
def latest(skip: int = 0, limit: int = 25, db: Session = Depends(get_db), email=Depends(get_user_auth)):
|
||||
return crud.search_archives_by_email(db, email, skip=skip, limit=limit)
|
||||
|
||||
|
||||
@url_router.get("/{id}", response_model=schemas.Archive, summary="Fetch a single URL archive by the associated id.")
|
||||
def lookup(id, db: Session = Depends(get_db), email=Depends(get_token_or_user_auth)):
|
||||
return crud.get_archive(db, id, email)
|
||||
|
||||
|
||||
@url_router.delete("/{id}", response_model=schemas.TaskDelete, summary="Delete a single URL archive by id.")
|
||||
def delete_task(id, db: Session = Depends(get_db), email=Depends(get_user_auth)):
|
||||
logger.info(f"deleting url archive task {id} request by {email}")
|
||||
#TODO: use response model?
|
||||
return JSONResponse({
|
||||
"id": id,
|
||||
"deleted": crud.soft_delete_task(db, id, email)
|
||||
})
|
||||
Reference in New Issue
Block a user