API updates

This commit is contained in:
msramalho
2023-02-26 20:22:20 +01:00
parent a00f3ff8d7
commit 3ef4787313
4 changed files with 37 additions and 22 deletions

View File

@@ -1,20 +1,24 @@
from sqlalchemy.orm import Session from sqlalchemy.orm import Session, load_only
from loguru import logger
from . import models, schemas from . import models, schemas
def get_task(db: Session, task_id: str): def get_task(db: Session, task_id: str):
return db.query(models.Task).filter(models.Task.id == task_id).first() return base_query(db).filter(models.Task.id == task_id).first()
# def get_user_by_email(db: Session, email: str):
# return db.query(models.User).filter(models.User.email == email).first()
def get_tasks(db: Session, skip: int = 0, limit: int = 100): def get_tasks(db: Session, skip: int = 0, limit: int = 100):
return db.query(models.Task).offset(skip).limit(limit).all() return base_query(db).offset(skip).limit(limit).all()
def search_tasks_by_url(db: Session, url:str, skip: int = 0, limit: int = 100):
return base_query(db).filter(models.Task.url.like(f'%{url}%')).offset(skip).limit(limit).all()
def search_tasks_by_email(db: Session, email:str, skip: int = 0, limit: int = 100):
return base_query(db).filter(models.Task.author==email).offset(skip).limit(limit).all()
def base_query(db:Session):
# allow only some fields to be returned, for example author should remain hidden
return db.query(models.Task).options(load_only(models.Task.id, models.Task.created_at, models.Task.url, models.Task.result))
def create_task(db: Session, task: schemas.TaskCreate): def create_task(db: Session, task: schemas.TaskCreate):
db_task = models.Task(id=task.id, url=task.url, author=task.author, result=task.result) db_task = models.Task(id=task.id, url=task.url, author=task.author, result=task.result)

View File

@@ -1,5 +1,4 @@
from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, JSON, TIMESTAMP, DateTime from sqlalchemy import Column, String, JSON, DateTime
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func from sqlalchemy.sql import func
from .database import Base from .database import Base

View File

@@ -26,7 +26,7 @@ assert len(GOOGLE_CHROME_APP_ID)>10, "GOOGLE_CHROME_APP_ID env variable not set"
ALLOWED_EMAILS = set(os.environ.get("ALLOWED_EMAILS", "").split(",")) ALLOWED_EMAILS = set(os.environ.get("ALLOWED_EMAILS", "").split(","))
assert len(GOOGLE_CHROME_APP_ID)>=1, "at least one ALLOWED_EMAILS is required from the env variable" assert len(GOOGLE_CHROME_APP_ID)>=1, "at least one ALLOWED_EMAILS is required from the env variable"
ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "chrome-extension://ondkcheoicfckabcnkdgbepofpjmjcmb,chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp").split(",") ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "chrome-extension://ondkcheoicfckabcnkdgbepofpjmjcmb,chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp").split(",")
VERSION = "0.1.1" VERSION = "0.1.3"
app = FastAPI() app = FastAPI()
app.add_middleware( app.add_middleware(
@@ -44,17 +44,27 @@ def get_db():
finally: session.close() finally: session.close()
@app.get("/tasks/search-url", response_model=list[schemas.Task])
def search(access_token:str, url:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
validate_user_get_email(access_token)
return crud.search_tasks_by_url(db, url, skip=skip, limit=limit)
@app.get("/tasks/search", response_model=list[schemas.Task]) @app.get("/tasks/search", response_model=list[schemas.Task])
def search(access_token:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db)): def search(access_token:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
validate_user_get_email(access_token) validate_user_get_email(access_token)
return crud.get_tasks(db, skip=skip, limit=limit) return crud.get_tasks(db, skip=skip, limit=limit)
@app.get("/tasks/sync", response_model=list[schemas.Task])
def search(access_token:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db)):
email = validate_user_get_email(access_token)
return crud.search_tasks_by_email(db, email, skip=skip, limit=limit)
@app.post("/tasks", status_code=201) @app.post("/tasks", status_code=201)
def run_task(payload = Body(...)): def run_task(payload = Body(...)):
email = validate_user_get_email(payload["access_token"]) email = validate_user_get_email(payload.get("access_token"))
logger.info(f"new task for user {email}: {payload['url']}") logger.info(f"new task for user {email}: {payload.get('url')}")
task = create_archive_task.delay(payload["url"]) task = create_archive_task.delay(url=payload.get('url'), email=email)
return JSONResponse({"task_id": task.id}) return JSONResponse({"id": task.id})
@app.get("/tasks/{task_id}") @app.get("/tasks/{task_id}")
def get_status(task_id, access_token:str): def get_status(task_id, access_token:str):
@@ -62,9 +72,9 @@ def get_status(task_id, access_token:str):
logger.info(f"status check for user {email}") logger.info(f"status check for user {email}")
task_result = AsyncResult(task_id, app=celery) task_result = AsyncResult(task_id, app=celery)
result = { result = {
"task_id": task_id, "id": task_id,
"task_status": task_result.status, "status": task_result.status,
"task_result": task_result.result "result": task_result.result
} }
try: try:
json_result = jsonable_encoder(result, exclude_unset=True) json_result = jsonable_encoder(result, exclude_unset=True)
@@ -74,8 +84,8 @@ def get_status(task_id, access_token:str):
logger.error(e) logger.error(e)
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
return JSONResponse({ return JSONResponse({
"task_id": task_id, "id": task_id,
"task_status": "FAILURE", "status": "FAILURE",
}) })

View File

@@ -4,6 +4,7 @@ import os
from celery import Celery from celery import Celery
from dataclasses import asdict from dataclasses import asdict
from auto_archiver import Config, ArchivingOrchestrator, Metadata from auto_archiver import Config, ArchivingOrchestrator, Metadata
from auto_archiver.enrichers import ScreenshotEnricher
from loguru import logger from loguru import logger
from db import crud, models, schemas from db import crud, models, schemas
@@ -28,11 +29,12 @@ config.parse(use_cli=False, yaml_config_filename="secrets/orchestration.yaml")
orchestrator = None orchestrator = None
@celery.task(name="create_archive_task", bind=True) @celery.task(name="create_archive_task", bind=True)
def create_archive_task(self, url: str , user_email:str=""): def create_archive_task(self, url: str, email:str=""):
assert type(url)==str and len(url)>5, f"Invalid URL received: {url}"
global orchestrator global orchestrator
if not orchestrator: orchestrator = ArchivingOrchestrator(config) if not orchestrator: orchestrator = ArchivingOrchestrator(config)
result = orchestrator.feed_item(Metadata().set_url(url)).to_json() result = orchestrator.feed_item(Metadata().set_url(url)).to_json()
with get_db() as session: with get_db() as session:
db_task = crud.create_task(session, task=schemas.TaskCreate(id=self.request.id, url=url, author=user_email, result=json.loads(result))) db_task = crud.create_task(session, task=schemas.TaskCreate(id=self.request.id, url=url, author=email, result=json.loads(result)))
logger.debug(f"Added {db_task.id=} to database on {db_task.created_at}") logger.debug(f"Added {db_task.id=} to database on {db_task.created_at}")
return result return result