mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-08 03:28:35 +03:00
feat: submit-url endpoint ready
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -12,4 +12,5 @@ redis/data/*
|
||||
.ipynb_checkpoints*
|
||||
#temp
|
||||
tests
|
||||
src/user-groups.yaml
|
||||
src/user-groups.yaml
|
||||
wit*
|
||||
24
src/Pipfile.lock
generated
24
src/Pipfile.lock
generated
@@ -197,11 +197,11 @@
|
||||
},
|
||||
"auto-archiver": {
|
||||
"hashes": [
|
||||
"sha256:40d8a9b3b818805ec53a4c63b87f5222e29a33ff35effda19b416bbdca4d477d",
|
||||
"sha256:84bed4548694b7e9dffc181ef9f2b1f9184578403a6b491aaf95577c08df9698"
|
||||
"sha256:530eeec3a03fd38410cf864b641df38c61505afc8c402d730ba5aca2ff67ffa8",
|
||||
"sha256:54a1e202f8c1fdfb46f8295a07e1e9e121bec2b6f81c85c53134bbb80a07501a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.5.17"
|
||||
"version": "==0.5.18"
|
||||
},
|
||||
"beautifulsoup4": {
|
||||
"hashes": [
|
||||
@@ -228,19 +228,19 @@
|
||||
},
|
||||
"boto3": {
|
||||
"hashes": [
|
||||
"sha256:d47a68a0ca6599e8711c7da670fbac24085d9d50cfb4f761204f154d2b6fae26",
|
||||
"sha256:f0a78f94a7140b60960898fd86677e4e73cc96bd7f3e5c64fc5cc1818d04c7b8"
|
||||
"sha256:5b61a82f0c1cd006bd109ddf27c93d9b010c4c188fc583ee257ff6f3bb89970d",
|
||||
"sha256:fe19d287bc8ede385e1b9136f135ee8f93eab81404ad1445b1a70cabfe3f7087"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.26.138"
|
||||
"version": "==1.26.139"
|
||||
},
|
||||
"botocore": {
|
||||
"hashes": [
|
||||
"sha256:31edc237088c104f7a05887646bbec31d7459dd2e108fd90cbffa315902817e2",
|
||||
"sha256:3d145f30d10a9c712acee48e7ce906c9456bb25fe50d477c9312c702ccfa50d1"
|
||||
"sha256:acc62710bdf11e47f4f26fb290a9082ff00377d7e93a16e1f080f9c789898114",
|
||||
"sha256:b164af929eb2f1507833718de9eb8811e3adc6943b464c1869e95ac87f3bab88"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.29.138"
|
||||
"version": "==1.29.139"
|
||||
},
|
||||
"brotli": {
|
||||
"hashes": [
|
||||
@@ -1836,11 +1836,11 @@
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:6ad00b63f849b7dcc313b70b6b304ed67b2b2963b3098a33efe18056b1a9a223",
|
||||
"sha256:ff6b238610c747e44c268aa4bb23c8c735d665a63726df3f9431ce707f2aa768"
|
||||
"sha256:558bc0c4145f01e6405f4a5fdbd82050bd221b119f4bf72a961a1cfd471349d6",
|
||||
"sha256:6bac751f4789b135c43228e72de18637e9a6c29d12777023a703fd1a6858469f"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.6.0"
|
||||
"version": "==4.6.1"
|
||||
},
|
||||
"typing-inspect": {
|
||||
"hashes": [
|
||||
|
||||
@@ -31,3 +31,10 @@ class SubmitSheet(BaseModel):
|
||||
group_id: str | None = None
|
||||
tags: set | None = set()
|
||||
columns: dict | None = {} # TODO: implement
|
||||
|
||||
class SubmitManual(BaseModel):
|
||||
result: str # should be a Metadata.to_json()
|
||||
public: bool = False
|
||||
author_id: str | None = None
|
||||
group_id: str | None = None
|
||||
tags: set | None = set()
|
||||
|
||||
19
src/main.py
19
src/main.py
@@ -10,12 +10,13 @@ from dotenv import load_dotenv
|
||||
import traceback, os, logging
|
||||
from loguru import logger
|
||||
|
||||
from worker import create_archive_task, create_sheet_task, celery
|
||||
from worker import create_archive_task, create_sheet_task, celery, insert_result_into_db
|
||||
|
||||
from db import crud, models, schemas
|
||||
from db.database import engine, SessionLocal
|
||||
from sqlalchemy.orm import Session
|
||||
from security import get_bearer_auth, get_basic_auth, bearer_security
|
||||
from auto_archiver import Metadata
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -62,7 +63,7 @@ async def home(request: Request):
|
||||
return JSONResponse(status)
|
||||
|
||||
|
||||
# Bearer protected below
|
||||
#-----Submit URL and manipulate tasks. Bearer protected below
|
||||
|
||||
@app.get("/groups", response_model=list[str])
|
||||
def get_user_groups(db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
|
||||
@@ -115,7 +116,6 @@ def get_status(task_id, email = Depends(get_bearer_auth)):
|
||||
"result": {"error": e}
|
||||
})
|
||||
|
||||
|
||||
@app.delete("/tasks/{task_id}")
|
||||
def delete_task(task_id, db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
|
||||
logger.info(f"deleting task {task_id} request by {email}")
|
||||
@@ -124,6 +124,7 @@ def delete_task(task_id, db: Session = Depends(get_db), email = Depends(get_bear
|
||||
"deleted": crud.soft_delete_task(db, task_id, email)
|
||||
})
|
||||
|
||||
#----- Google Sheets Logic
|
||||
@app.post("/sheet", status_code=201)
|
||||
def archive_sheet(sheet:schemas.SubmitSheet, email = Depends(get_bearer_auth)):
|
||||
logger.info(f"SHEET TASK for {sheet=}")
|
||||
@@ -133,6 +134,18 @@ def archive_sheet(sheet:schemas.SubmitSheet, email = Depends(get_bearer_auth)):
|
||||
task = create_sheet_task.delay(sheet.json())
|
||||
return JSONResponse({"id": task.id})
|
||||
|
||||
#----- endpoint to submit data archived elsewhere
|
||||
@app.post("/submit-archive", status_code=201)
|
||||
def submit_manual_archive(manual:schemas.SubmitManual, basic_auth = Depends(get_basic_auth)):
|
||||
logger.info(f"Submit {manual=}")
|
||||
result = Metadata.from_json(manual.result)
|
||||
logger.info(f"{result=}")
|
||||
manual.tags.add("manual")
|
||||
|
||||
archive_id = insert_result_into_db(result, manual.tags, manual.public, manual.group_id, manual.author_id, models.generate_uuid())
|
||||
return JSONResponse({"id": archive_id})
|
||||
|
||||
|
||||
# Basic protected logic to allow access to 1 static file
|
||||
SF = os.environ.get("STATIC_FILE", "")
|
||||
if len(SF) > 1 and os.path.isfile(SF):
|
||||
|
||||
@@ -11,12 +11,12 @@ async-generator==1.10 ; python_version >= '3.5'
|
||||
async-timeout==4.0.2 ; python_version >= '3.6'
|
||||
attrs==23.1.0 ; python_version >= '3.7'
|
||||
authlib==0.15.6
|
||||
auto-archiver==0.5.17
|
||||
auto-archiver==0.5.18
|
||||
beautifulsoup4==4.12.2 ; python_full_version >= '3.6.0'
|
||||
billiard==3.6.4.0
|
||||
blinker==1.6.2 ; python_version >= '3.7'
|
||||
boto3==1.26.138 ; python_version >= '3.7'
|
||||
botocore==1.29.138 ; python_version >= '3.7'
|
||||
boto3==1.26.139 ; python_version >= '3.7'
|
||||
botocore==1.29.139 ; python_version >= '3.7'
|
||||
brotli==1.0.9 ; platform_python_implementation == 'CPython'
|
||||
bs4==0.0.1
|
||||
cachetools==5.3.0 ; python_version ~= '3.7'
|
||||
@@ -117,7 +117,7 @@ tornado==6.3.2 ; python_version >= '3.5.2'
|
||||
tqdm==4.65.0 ; python_version >= '3.7'
|
||||
trio==0.22.0 ; python_version >= '3.7'
|
||||
trio-websocket==0.10.2 ; python_version >= '3.7'
|
||||
typing-extensions==4.6.0 ; python_version >= '3.7'
|
||||
typing-extensions==4.6.1 ; python_version >= '3.7'
|
||||
typing-inspect==0.8.0
|
||||
tzlocal==5.0.1 ; python_version >= '3.7'
|
||||
uritemplate==4.1.1 ; python_version >= '3.6'
|
||||
|
||||
@@ -143,9 +143,9 @@ def is_group_invalid_for_user(public: bool, group_id: str, author_id: str):
|
||||
return False
|
||||
|
||||
|
||||
def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_id: str, author_id: str, task_id:str):
|
||||
def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_id: str, author_id: str, task_id:str) -> str:
|
||||
logger.info(f"INSERTING {public=} {result} into {task_id}")
|
||||
assert result, "UNABLE TO archive: {url}"
|
||||
assert result, f"UNABLE TO archive: {result.get_url()}"
|
||||
with get_db() as session:
|
||||
# create DB URLs
|
||||
db_urls = [models.ArchiveUrl(url=url, key=m.get("id", f"media_{i}")) for i, m in enumerate(result.media) for url in m.urls]
|
||||
@@ -154,6 +154,7 @@ def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_
|
||||
# insert archive
|
||||
db_task = crud.create_task(session, task=schemas.ArchiveCreate(id=task_id, url=result.get_url(), result=json.loads(result.to_json()), public=public, author_id=author_id, group_id=group_id), tags=db_tags, urls=db_urls)
|
||||
logger.debug(f"Added {db_task.id=} to database on {db_task.created_at}")
|
||||
return db_task.id
|
||||
|
||||
|
||||
# INIT
|
||||
|
||||
Reference in New Issue
Block a user