diff --git a/.gitignore b/.gitignore index feaf54f..cf49566 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ redis/data/* .ipynb_checkpoints* #temp tests -src/user-groups.yaml \ No newline at end of file +src/user-groups.yaml +wit* \ No newline at end of file diff --git a/src/Pipfile.lock b/src/Pipfile.lock index 079c9d1..aa0d229 100644 --- a/src/Pipfile.lock +++ b/src/Pipfile.lock @@ -197,11 +197,11 @@ }, "auto-archiver": { "hashes": [ - "sha256:40d8a9b3b818805ec53a4c63b87f5222e29a33ff35effda19b416bbdca4d477d", - "sha256:84bed4548694b7e9dffc181ef9f2b1f9184578403a6b491aaf95577c08df9698" + "sha256:530eeec3a03fd38410cf864b641df38c61505afc8c402d730ba5aca2ff67ffa8", + "sha256:54a1e202f8c1fdfb46f8295a07e1e9e121bec2b6f81c85c53134bbb80a07501a" ], "index": "pypi", - "version": "==0.5.17" + "version": "==0.5.18" }, "beautifulsoup4": { "hashes": [ @@ -228,19 +228,19 @@ }, "boto3": { "hashes": [ - "sha256:d47a68a0ca6599e8711c7da670fbac24085d9d50cfb4f761204f154d2b6fae26", - "sha256:f0a78f94a7140b60960898fd86677e4e73cc96bd7f3e5c64fc5cc1818d04c7b8" + "sha256:5b61a82f0c1cd006bd109ddf27c93d9b010c4c188fc583ee257ff6f3bb89970d", + "sha256:fe19d287bc8ede385e1b9136f135ee8f93eab81404ad1445b1a70cabfe3f7087" ], "markers": "python_version >= '3.7'", - "version": "==1.26.138" + "version": "==1.26.139" }, "botocore": { "hashes": [ - "sha256:31edc237088c104f7a05887646bbec31d7459dd2e108fd90cbffa315902817e2", - "sha256:3d145f30d10a9c712acee48e7ce906c9456bb25fe50d477c9312c702ccfa50d1" + "sha256:acc62710bdf11e47f4f26fb290a9082ff00377d7e93a16e1f080f9c789898114", + "sha256:b164af929eb2f1507833718de9eb8811e3adc6943b464c1869e95ac87f3bab88" ], "markers": "python_version >= '3.7'", - "version": "==1.29.138" + "version": "==1.29.139" }, "brotli": { "hashes": [ @@ -1836,11 +1836,11 @@ }, "typing-extensions": { "hashes": [ - "sha256:6ad00b63f849b7dcc313b70b6b304ed67b2b2963b3098a33efe18056b1a9a223", - "sha256:ff6b238610c747e44c268aa4bb23c8c735d665a63726df3f9431ce707f2aa768" + "sha256:558bc0c4145f01e6405f4a5fdbd82050bd221b119f4bf72a961a1cfd471349d6", + "sha256:6bac751f4789b135c43228e72de18637e9a6c29d12777023a703fd1a6858469f" ], "markers": "python_version >= '3.7'", - "version": "==4.6.0" + "version": "==4.6.1" }, "typing-inspect": { "hashes": [ diff --git a/src/db/schemas.py b/src/db/schemas.py index ae32947..14b5e81 100644 --- a/src/db/schemas.py +++ b/src/db/schemas.py @@ -31,3 +31,10 @@ class SubmitSheet(BaseModel): group_id: str | None = None tags: set | None = set() columns: dict | None = {} # TODO: implement + +class SubmitManual(BaseModel): + result: str # should be a Metadata.to_json() + public: bool = False + author_id: str | None = None + group_id: str | None = None + tags: set | None = set() diff --git a/src/main.py b/src/main.py index 8b4805c..2f9a29a 100644 --- a/src/main.py +++ b/src/main.py @@ -10,12 +10,13 @@ from dotenv import load_dotenv import traceback, os, logging from loguru import logger -from worker import create_archive_task, create_sheet_task, celery +from worker import create_archive_task, create_sheet_task, celery, insert_result_into_db from db import crud, models, schemas from db.database import engine, SessionLocal from sqlalchemy.orm import Session from security import get_bearer_auth, get_basic_auth, bearer_security +from auto_archiver import Metadata load_dotenv() @@ -62,7 +63,7 @@ async def home(request: Request): return JSONResponse(status) -# Bearer protected below +#-----Submit URL and manipulate tasks. Bearer protected below @app.get("/groups", response_model=list[str]) def get_user_groups(db: Session = Depends(get_db), email = Depends(get_bearer_auth)): @@ -115,7 +116,6 @@ def get_status(task_id, email = Depends(get_bearer_auth)): "result": {"error": e} }) - @app.delete("/tasks/{task_id}") def delete_task(task_id, db: Session = Depends(get_db), email = Depends(get_bearer_auth)): logger.info(f"deleting task {task_id} request by {email}") @@ -124,6 +124,7 @@ def delete_task(task_id, db: Session = Depends(get_db), email = Depends(get_bear "deleted": crud.soft_delete_task(db, task_id, email) }) +#----- Google Sheets Logic @app.post("/sheet", status_code=201) def archive_sheet(sheet:schemas.SubmitSheet, email = Depends(get_bearer_auth)): logger.info(f"SHEET TASK for {sheet=}") @@ -133,6 +134,18 @@ def archive_sheet(sheet:schemas.SubmitSheet, email = Depends(get_bearer_auth)): task = create_sheet_task.delay(sheet.json()) return JSONResponse({"id": task.id}) +#----- endpoint to submit data archived elsewhere +@app.post("/submit-archive", status_code=201) +def submit_manual_archive(manual:schemas.SubmitManual, basic_auth = Depends(get_basic_auth)): + logger.info(f"Submit {manual=}") + result = Metadata.from_json(manual.result) + logger.info(f"{result=}") + manual.tags.add("manual") + + archive_id = insert_result_into_db(result, manual.tags, manual.public, manual.group_id, manual.author_id, models.generate_uuid()) + return JSONResponse({"id": archive_id}) + + # Basic protected logic to allow access to 1 static file SF = os.environ.get("STATIC_FILE", "") if len(SF) > 1 and os.path.isfile(SF): diff --git a/src/requirements.txt b/src/requirements.txt index c815600..74bb63a 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -11,12 +11,12 @@ async-generator==1.10 ; python_version >= '3.5' async-timeout==4.0.2 ; python_version >= '3.6' attrs==23.1.0 ; python_version >= '3.7' authlib==0.15.6 -auto-archiver==0.5.17 +auto-archiver==0.5.18 beautifulsoup4==4.12.2 ; python_full_version >= '3.6.0' billiard==3.6.4.0 blinker==1.6.2 ; python_version >= '3.7' -boto3==1.26.138 ; python_version >= '3.7' -botocore==1.29.138 ; python_version >= '3.7' +boto3==1.26.139 ; python_version >= '3.7' +botocore==1.29.139 ; python_version >= '3.7' brotli==1.0.9 ; platform_python_implementation == 'CPython' bs4==0.0.1 cachetools==5.3.0 ; python_version ~= '3.7' @@ -117,7 +117,7 @@ tornado==6.3.2 ; python_version >= '3.5.2' tqdm==4.65.0 ; python_version >= '3.7' trio==0.22.0 ; python_version >= '3.7' trio-websocket==0.10.2 ; python_version >= '3.7' -typing-extensions==4.6.0 ; python_version >= '3.7' +typing-extensions==4.6.1 ; python_version >= '3.7' typing-inspect==0.8.0 tzlocal==5.0.1 ; python_version >= '3.7' uritemplate==4.1.1 ; python_version >= '3.6' diff --git a/src/worker.py b/src/worker.py index 474716c..317deed 100644 --- a/src/worker.py +++ b/src/worker.py @@ -143,9 +143,9 @@ def is_group_invalid_for_user(public: bool, group_id: str, author_id: str): return False -def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_id: str, author_id: str, task_id:str): +def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_id: str, author_id: str, task_id:str) -> str: logger.info(f"INSERTING {public=} {result} into {task_id}") - assert result, "UNABLE TO archive: {url}" + assert result, f"UNABLE TO archive: {result.get_url()}" with get_db() as session: # create DB URLs db_urls = [models.ArchiveUrl(url=url, key=m.get("id", f"media_{i}")) for i, m in enumerate(result.media) for url in m.urls] @@ -154,6 +154,7 @@ def insert_result_into_db(result: Metadata, tags: Set[str], public: bool, group_ # insert archive db_task = crud.create_task(session, task=schemas.ArchiveCreate(id=task_id, url=result.get_url(), result=json.loads(result.to_json()), public=public, author_id=author_id, group_id=group_id), tags=db_tags, urls=db_urls) logger.debug(f"Added {db_task.id=} to database on {db_task.created_at}") + return db_task.id # INIT