From 4119387feb85302bfd2ee63d7698cd5689cb4f74 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Wed, 26 Apr 2023 23:40:42 +0100 Subject: [PATCH] new api mvp --- README.md | 1 + src/Pipfile | 1 + src/Pipfile.lock | 218 +++++++++++++++++++++++++------------------ src/db/crud.py | 44 +++++++-- src/db/models.py | 37 +++----- src/db/schemas.py | 27 ++++-- src/main.py | 57 ++++++++--- src/requirements.txt | 79 ++++++++-------- src/worker.py | 115 +++++++++++++++++------ 9 files changed, 370 insertions(+), 209 deletions(-) diff --git a/README.md b/README.md index 9520cf7..7829dd3 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ cd /src * console 1 - `docker compose up redis` optionally add `dashboard` for flower dashboard and `web` if not running uvicorn locally * console 2 - `pipenv shell` + `celery worker --app=worker.celery --loglevel=info --logfile=logs/celery_dev.log` * `celery --app=worker.celery worker --loglevel=info --logfile=logs/celery_dev.log` celery 5 + * or with watchdog for dev auto-reload `watchmedo auto-restart -d ./ -- celery --app=worker.celery worker --loglevel=info --logfile=logs/celery_dev.log` * console 3 - `pipenv shell` + `uvicorn main:app --host 0.0.0.0 --reload` orchestration must be from the console(?) * turn off VPNs if connection to docker is not working diff --git a/src/Pipfile b/src/Pipfile index 8be2d33..805c113 100644 --- a/src/Pipfile +++ b/src/Pipfile @@ -22,6 +22,7 @@ alembic = "*" fastapi-utils = "*" [dev-packages] +watchdog = "*" [requires] python_version = "3.10" diff --git a/src/Pipfile.lock b/src/Pipfile.lock index 3e4f2e6..185f5ec 100644 --- a/src/Pipfile.lock +++ b/src/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "8fbf19b1fc2cc928f83c9e52f4b6522b02ddedee2fc863b0434ee3567d928c8f" + "sha256": "85ba632faeb7f00b42dfefbbe50a9a8348610a1297fdf70017991c9c2cb7980d" }, "pipfile-spec": 6, "requires": { @@ -26,11 +26,11 @@ }, "aiosqlite": { "hashes": [ - "sha256:c3511b841e3a2c5614900ba1d179f366826857586f78abd75e7cbeb88e75a557", - "sha256:faa843ef5fb08bafe9a9b3859012d3d9d6f77ce3637899de20606b7fc39aa213" + "sha256:95ee77b91c8d2808bd08a59fbebf66270e9090c3d92ffbf260dc0db0b979577d", + "sha256:edba222e03453e094a3ce605db1b970c4b3376264e56f32e2a4959f948d66a96" ], "index": "pypi", - "version": "==0.18.0" + "version": "==0.19.0" }, "alembic": { "hashes": [ @@ -73,11 +73,11 @@ }, "attrs": { "hashes": [ - "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836", - "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99" + "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04", + "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015" ], - "markers": "python_version >= '3.6'", - "version": "==22.2.0" + "markers": "python_version >= '3.7'", + "version": "==23.1.0" }, "authlib": { "hashes": [ @@ -88,11 +88,11 @@ }, "auto-archiver": { "hashes": [ - "sha256:033451db9789296cebbd00bca5446541b0a4e86bc9ae0d65a8bedf8b53015044", - "sha256:b1a0fa870b0d7a0b2a0b259e6be92a3493aa2793cb04eee071263a7debcb7074" + "sha256:850361f29e57719fa4661fb1184993f1b03da165a15b9144729665f00a832e31", + "sha256:df0e0034b974e0c51549616831b70f1e0a035e154bd39457a43701a6ee3b3fd7" ], "index": "pypi", - "version": "==0.5.5" + "version": "==0.5.7" }, "beautifulsoup4": { "hashes": [ @@ -111,19 +111,19 @@ }, "boto3": { "hashes": [ - "sha256:03c2e1ddd29d993a6ab9b8a8fe184027957fc32bd405c496ad0c30311445925f", - "sha256:4ea3319bba2e8ff7cd9560259ae64f073c7fb6312158aa375777687231cabe69" + "sha256:2272a060005bf8299f7342cbf1344304eb44b7060cddba6784f676e3bc737bb8", + "sha256:deb53ad15ff0e75ae0be6d7115a2d34e4bafb0541484485f0feb61dabdfb5513" ], - "markers": "python_full_version >= '3.7.0'", - "version": "==1.26.112" + "markers": "python_version >= '3.7'", + "version": "==1.26.115" }, "botocore": { "hashes": [ - "sha256:1f52d9371d7b5ee30a53dcef7954c3cf22e04b131cfab5268035f3299ccde9e1", - "sha256:2cbaddb09b46dcb0a05490724d51acb224d3a8df433c347f995b4d78bfb02c8a" + "sha256:58eee8cf8f4f3e515df29f6dc535dd86ed3f4cea40999c5bc74640ff40bdc71f", + "sha256:dff327977d7c9f98f2dc54b51b8f70326952dd50ae23b885fdfa8bfeec014b76" ], - "markers": "python_full_version >= '3.7.0'", - "version": "==1.29.112" + "markers": "python_version >= '3.7'", + "version": "==1.29.115" }, "brotli": { "hashes": [ @@ -410,7 +410,7 @@ "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==8.1.3" }, "cloudscraper": { @@ -465,7 +465,7 @@ "sha256:070b29b5bbf4b1ec2cd51c96ea040dc68a614de703910a91ad1abba18f9f379f", "sha256:86b8b7517efcc558f085a142cdb7620f0921543fcabdb538c8a4c4001d8178e3" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.1.8" }, "exceptiongroup": { @@ -478,11 +478,11 @@ }, "fastapi": { "hashes": [ - "sha256:99d4fdb10e9dd9a24027ac1d0bd4b56702652056ca17a6c8721eec4ad2f14e18", - "sha256:daf73bbe844180200be7966f68e8ec9fd8be57079dff1bacb366db32729e6eb5" + "sha256:9569f0a381f8a457ec479d90fa01005cfddaae07546eb1f3fa035bc4797ae7d5", + "sha256:a870d443e5405982e1667dfe372663abf10754f246866056336d7f01c21dab07" ], "index": "pypi", - "version": "==0.95.0" + "version": "==0.95.1" }, "fastapi-utils": { "hashes": [ @@ -501,18 +501,18 @@ }, "filelock": { "hashes": [ - "sha256:3618c0da67adcc0506b015fd11ef7faf1b493f0b40d87728e19986b536890c37", - "sha256:f08a52314748335c6460fc8fe40cd5638b85001225db78c2aa01c8c0db83b318" + "sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9", + "sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718" ], - "markers": "python_full_version >= '3.7.0'", - "version": "==3.11.0" + "markers": "python_version >= '3.7'", + "version": "==3.12.0" }, "flask": { "hashes": [ "sha256:7eb373984bf1c770023fce9db164ed0c3353cd0b53f130f4693da0ca756a2e6d", "sha256:c0bec9477df1cb867e5a67c9e1ab758de9cb4a3e52dd70681f59fa40a62b3f2d" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==2.2.3" }, "flower": { @@ -535,16 +535,16 @@ "sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22", "sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==2.11.0" }, "google-api-python-client": { "hashes": [ - "sha256:07b21ef21a542dd69cd7c09817a6079b2769cc2a791981402e8f0fcdb2d47f90", - "sha256:baf3c6f9b1679d89fcb88c29941a8b04b9a815d721880786baecc6a7f5bd376f" + "sha256:0f320190ab9d5bd2fdb0cb894e8e53bb5e17d4888ee8dc4d26ba65ce378409e2", + "sha256:3ca4e93821f4e9ac29b91ab0d9df168b42c8ad0fb8bff65b8c2ccb2d462b0464" ], - "markers": "python_full_version >= '3.7.0'", - "version": "==2.85.0" + "markers": "python_version >= '3.7'", + "version": "==2.86.0" }, "google-auth": { "hashes": [ @@ -574,7 +574,7 @@ "sha256:4168fcb568a826a52f23510412da405abd93f4d23ba544bb68d943b14ba3cb44", "sha256:b287dc48449d1d41af0c69f4ea26242b5ae4c3d7249a38b0984c86a4caffff1f" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.59.0" }, "greenlet": { @@ -656,7 +656,7 @@ "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==0.14.0" }, "httplib2": { @@ -672,7 +672,7 @@ "sha256:401201aca462749773f02920139f302450cb548b70489b9b4b92be39fe3c3c50", "sha256:5f1f22bc65911eb1a6ffe7659bd6598e33dcfeeb904eb16ee1e705a09bf75916" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==4.6.0" }, "idna": { @@ -688,7 +688,7 @@ "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==2.0.0" }, "instaloader": { @@ -703,7 +703,7 @@ "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44", "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==2.1.2" }, "jinja2": { @@ -719,7 +719,7 @@ "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.0.1" }, "kombu": { @@ -826,7 +826,7 @@ "sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818", "sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.2.4" }, "markdown-it-py": { @@ -834,7 +834,7 @@ "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30", "sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==2.2.0" }, "markupsafe": { @@ -890,7 +890,7 @@ "sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6", "sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==2.1.2" }, "marshmallow": { @@ -898,7 +898,7 @@ "sha256:90032c0fd650ce94b6ec6dc8dfeb0e3ff50c144586462c389b81a07205bedb78", "sha256:93f0958568da045b0021ec6aeb7ac37c81bfcccbb9a0e7ed8559885070b3a19b" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==3.19.0" }, "marshmallow-enum": { @@ -913,7 +913,7 @@ "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==0.1.2" }, "mutagen": { @@ -921,7 +921,7 @@ "sha256:6e5f8ba84836b99fe60be5fb27f84be4ad919bbb6b49caa6ae81e70584b55e58", "sha256:8af0728aa2d5c3ee5a727e28d0627966641fddfe804c23eabb5926a4d770aed5" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.46.0" }, "mypy-extensions": { @@ -952,7 +952,7 @@ "sha256:6f82bd3de45da303cf1f771ecafa1633750a358436a8bb60e06a1ceb745d2672", "sha256:c4ab89a56575d6d38a05aa16daeaa333109c1f96167aba8901ab18b6b5e0f7f5" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.2.0" }, "packaging": { @@ -960,7 +960,7 @@ "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==23.1" }, "pluggy": { @@ -994,7 +994,7 @@ "sha256:f08aa300b67f1c012100d8eb62d47129e53d1150f4469fd78a29fa3cb68c66f2", "sha256:f2f4710543abec186aee332d6852ef5ae7ce2e9e807a3da570f36de5a732d88e" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==4.22.3" }, "py": { @@ -1132,16 +1132,16 @@ "sha256:ecbbc51391248116c0a055899e6c3e7ffbb11fb5e2a4cd6f2d0b93272118a209", "sha256:f4a2b50e2b03d5776e7f21af73e2070e1b5c0d0df255a827e7c632962f8315af" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.10.7" }, "pygments": { "hashes": [ - "sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094", - "sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500" + "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c", + "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1" ], - "markers": "python_full_version >= '3.7.0'", - "version": "==2.15.0" + "markers": "python_version >= '3.7'", + "version": "==2.15.1" }, "pyparsing": { "hashes": [ @@ -1188,7 +1188,7 @@ "sha256:70ca6ea68fe63ecc8fa4fcf00ae651fc8a5d02d93dcd12ae6d4fc7ca46c4d395", "sha256:ce0d46ddb668b3be82f4ed5e503dbc33dd815d83e2eb6824211310d3fb172a27" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==8.0.1" }, "python-twitter-v2": { @@ -1196,7 +1196,7 @@ "sha256:2397d518c17bfbc16a3d414b1cf6d3c231fd8d322f21c755ac2215c9ee675537", "sha256:4e03a30b2570fa4f17fbc7293d850fb8276c66be106d55e460b9287de37e1dd2" ], - "markers": "python_version >= '3.6' and python_version < '4.0'", + "markers": "python_version >= '3.6' and python_version < '4'", "version": "==0.8.1" }, "pytz": { @@ -1363,7 +1363,7 @@ "sha256:22b74cae0278fd5086ff44144d3813be1cedc9115bdfabbfefd86400cb88b20a", "sha256:b5d573e13605423ec80bdd0cd5f8541f7844a0e71a13f74cf454ccb2f490708b" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==13.3.4" }, "rsa": { @@ -1371,7 +1371,7 @@ "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21" ], - "markers": "python_version >= '3.6' and python_version < '4.0'", + "markers": "python_version >= '3.6' and python_version < '4'", "version": "==4.9" }, "s3transfer": { @@ -1379,7 +1379,7 @@ "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd", "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==0.6.0" }, "selenium": { @@ -1387,7 +1387,7 @@ "sha256:28430ac54a54fa59ad1f5392a1b89b169fe3ab2c2ccd1a9a10b6fe74f36cd6da", "sha256:61cda3a304f82637162bc155cae7bf88fdb04c115fa2cb1c1c2e1358fcd19a9f" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==4.8.3" }, "six": { @@ -1403,7 +1403,7 @@ "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.3.0" }, "snscrape": { @@ -1423,11 +1423,11 @@ }, "soupsieve": { "hashes": [ - "sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955", - "sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a" + "sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8", + "sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea" ], - "markers": "python_full_version >= '3.7.0'", - "version": "==2.4" + "markers": "python_version >= '3.7'", + "version": "==2.4.1" }, "sqlalchemy": { "hashes": [ @@ -1481,16 +1481,16 @@ "sha256:41da799057ea8620e4667a3e69a5b1923ebd32b1819c8fa75634bbe8d8bea9bd", "sha256:e87fce5d7cbdde34b76f0ac69013fd9d190d581d80681493016666e6f96c6d5e" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==0.26.1" }, "telethon": { "hashes": [ - "sha256:613bae42acb5f2eeb1a0b92614e323021c66f374db62adf9826ea0c2c9120bb1", - "sha256:893c10f133974fba4c53eb1736b6514d596d1cd94c83436a711f3345df945199" + "sha256:b3990ec22351a3f3e1af376729c985025bbdd3bdabdde8c156112c3d3dfe1941", + "sha256:edc42fd58b8e1569830d3ead564cafa60fd51d684f03ee2a1fdd5f77a5a10438" ], "markers": "python_version >= '3.5'", - "version": "==1.28.2" + "version": "==1.28.5" }, "text-unidecode": { "hashes": [ @@ -1515,27 +1515,27 @@ }, "tornado": { "hashes": [ - "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca", - "sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72", - "sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23", - "sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8", - "sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b", - "sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9", - "sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13", - "sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75", - "sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac", - "sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e", - "sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b" + "sha256:4546003dc8b5733489139d3bff5fa6a0211be505faf819bd9970e7c2b32e8122", + "sha256:4d349846931557b7ec92f224b5d598b160e2ba26ae1812480b42e9622c884bf7", + "sha256:6164571f5b9f73143d1334df4584cb9ac86d20c461e17b6c189a19ead8bb93c1", + "sha256:6cfff1e9c15c79e106b8352269d201f8fc0815914a6260f3893ca18b724ea94b", + "sha256:720f53e6367b38190ae7fa398c25c086c69d88b3c6535bd6021a126b727fb5cd", + "sha256:912df5712024564e362ecce43c8d5862e14c78c8dd3846c9d889d44fbd7f4951", + "sha256:c37b6a384d54ce6a31168d40ab21ad2591ddaf34973075cc0cad154402ecd9e8", + "sha256:c659ab04d5aa477dbe44152c67d93f3ad3243b992d94f795ca1d5c73c37337ce", + "sha256:c9114a61a4588c09065b9996ae05462350d17160b92b9bf9a1e93689cc0424dc", + "sha256:d68f3192936ff2c4add04dc21a436a43b4408d466746b78bb2b9d0a53a18683f", + "sha256:d7b737e18f701de3e4a3b0824260b4d740e4d60607b8089bb80e80ffd464780e" ], "markers": "python_full_version >= '3.5.2'", - "version": "==6.2" + "version": "==6.3" }, "tqdm": { "hashes": [ "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5", "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==4.65.0" }, "trio": { @@ -1543,7 +1543,7 @@ "sha256:ce68f1c5400a47b137c5a4de72c7c901bd4e7a24fbdebfe9b41de8c6c04eaacf", "sha256:f1dd0780a89bfc880c7c7994519cb53f62aacb2c25ff487001c0052bd721cdf0" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==0.22.0" }, "trio-websocket": { @@ -1551,7 +1551,7 @@ "sha256:0908435e4eecc49d830ae1c4d6c47b978a75f00594a2be2104d58b61a04cdb53", "sha256:af13e9393f9051111300287947ec595d601758ce3d165328e7d36325135a8d62" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==0.10.2" }, "typing-extensions": { @@ -1559,7 +1559,7 @@ "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==4.5.0" }, "typing-inspect": { @@ -1582,7 +1582,7 @@ "sha256:3f21d09e1b2aa9f2dacca12da240ca37de3ba5237a93addfd6d593afe9073355", "sha256:b44c4388f3d34f25862cfbb387578a4d70fec417649da694a132f628a23367e2" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==4.3" }, "uritemplate": { @@ -1629,7 +1629,7 @@ "sha256:5a32fb5419f7bb8bd35de8548948fe27a06f857a4d086c87e142bf07aabc3fd7", "sha256:a87c5aa7c1570c3aa87031e78c2052105e3681f57503fd4cb56470c3ab6106d6" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==0.3.15" }, "websockets": { @@ -1704,7 +1704,7 @@ "sha256:fe10ddc59b304cb19a1bdf5bd0a7719cbbc9fbdd57ac80ed436b709fcf889106", "sha256:ff64a1d38d156d429404aaa84b27305e957fd10c30e5880d1765c9480bea490f" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==10.4" }, "werkzeug": { @@ -1712,7 +1712,7 @@ "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe", "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==2.2.3" }, "wsproto": { @@ -1720,7 +1720,7 @@ "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==1.2.0" }, "yt-dlp": { @@ -1728,9 +1728,43 @@ "sha256:3b2df037c80922f0f83f63ee2f9253496b4a8668c0fe8d2a836ba9040f853b07", "sha256:9af92de5effc193bdb51216d9ebf28874d96180d202fae752b0d9f2a63380f3a" ], - "markers": "python_full_version >= '3.7.0'", + "markers": "python_version >= '3.7'", "version": "==2023.2.17" } }, - "develop": {} + "develop": { + "watchdog": { + "hashes": [ + "sha256:0e06ab8858a76e1219e68c7573dfeba9dd1c0219476c5a44d5333b01d7e1743a", + "sha256:13bbbb462ee42ec3c5723e1205be8ced776f05b100e4737518c67c8325cf6100", + "sha256:233b5817932685d39a7896b1090353fc8efc1ef99c9c054e46c8002561252fb8", + "sha256:25f70b4aa53bd743729c7475d7ec41093a580528b100e9a8c5b5efe8899592fc", + "sha256:2b57a1e730af3156d13b7fdddfc23dea6487fceca29fc75c5a868beed29177ae", + "sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41", + "sha256:3aa7f6a12e831ddfe78cdd4f8996af9cf334fd6346531b16cec61c3b3c0d8da0", + "sha256:3ed7c71a9dccfe838c2f0b6314ed0d9b22e77d268c67e015450a29036a81f60f", + "sha256:4c9956d27be0bb08fc5f30d9d0179a855436e655f046d288e2bcc11adfae893c", + "sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9", + "sha256:4f94069eb16657d2c6faada4624c39464f65c05606af50bb7902e036e3219be3", + "sha256:5113334cf8cf0ac8cd45e1f8309a603291b614191c9add34d33075727a967709", + "sha256:51f90f73b4697bac9c9a78394c3acbbd331ccd3655c11be1a15ae6fe289a8c83", + "sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759", + "sha256:7ade88d0d778b1b222adebcc0927428f883db07017618a5e684fd03b83342bd9", + "sha256:7c5f84b5194c24dd573fa6472685b2a27cc5a17fe5f7b6fd40345378ca6812e3", + "sha256:7e447d172af52ad204d19982739aa2346245cc5ba6f579d16dac4bfec226d2e7", + "sha256:8ae9cda41fa114e28faf86cb137d751a17ffd0316d1c34ccf2235e8a84365c7f", + "sha256:8f3ceecd20d71067c7fd4c9e832d4e22584318983cabc013dbf3f70ea95de346", + "sha256:9fac43a7466eb73e64a9940ac9ed6369baa39b3bf221ae23493a9ec4d0022674", + "sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397", + "sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96", + "sha256:ba07e92756c97e3aca0912b5cbc4e5ad802f4557212788e72a72a47ff376950d", + "sha256:c07253088265c363d1ddf4b3cdb808d59a0468ecd017770ed716991620b8f77a", + "sha256:c9d8c8ec7efb887333cf71e328e39cffbf771d8f8f95d308ea4125bf5f90ba64", + "sha256:d00e6be486affb5781468457b21a6cbe848c33ef43f9ea4a73b4882e5f188a44", + "sha256:d429c2430c93b7903914e4db9a966c7f2b068dd2ebdd2fa9b9ce094c7d459f33" + ], + "index": "pypi", + "version": "==3.0.0" + } + } } diff --git a/src/db/crud.py b/src/db/crud.py index 8dacd66..f2687a0 100644 --- a/src/db/crud.py +++ b/src/db/crud.py @@ -1,9 +1,11 @@ from functools import cache from sqlalchemy.orm import Session, load_only +from sqlalchemy import Column from loguru import logger from . import models, schemas import yaml +## --------------- TASK = Archive def get_task(db: Session, task_id: str): return base_query(db).filter(models.Archive.id == task_id).first() @@ -17,22 +19,19 @@ def search_tasks_by_url(db: Session, url:str, skip: int = 0, limit: int = 100): def search_tasks_by_email(db: Session, email:str, skip: int = 0, limit: int = 100): return base_query(db).filter(models.Archive.author.has(email=email)).offset(skip).limit(limit).all() -def create_task(db: Session, task: schemas.TaskCreate): - db_task = models.Archive(id=task.id, url=task.url, author=task.author, result=task.result) +def create_task(db: Session, task: schemas.ArchiveCreate, tags:list[models.Tag],urls:list[models.ArchiveUrl]): + db_task = models.Archive(id=task.id, url=task.url, author_id=task.author_id, result=task.result, group_id=task.group_id) + logger.debug(tags) + db_task.tags = tags # will this work? TODO: test if I don't call create tag before + db_task.urls = urls # will this work to create ArchiveUrl? TODO: test db.add(db_task) db.commit() db.refresh(db_task) return db_task -# def delete_task(db: Session, task_id: str, email:str)->bool: -# db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email).first() -# if db_task: -# db.delete(db_task) -# db.commit() -# return db_task is not None - def soft_delete_task(db: Session, task_id: str, email:str)->bool: - db_task = db.query(models.Archive).filter(models.Archive.id == task_id, models.Archive.author==email, models.Archive.deleted==False).first() + # TODO: implement hard-delete with cronjob that deletes from S3 + db_task = db.query(models.Archive).filter(models.Archive.id == task_id, models.Archive.author_id==email, models.Archive.deleted==False).first() if db_task: db_task.deleted = True db.commit() @@ -44,6 +43,31 @@ def base_query(db:Session): .options(load_only(models.Archive.id, models.Archive.created_at, models.Archive.url, models.Archive.result))\ .filter(models.Archive.deleted == False) +## --------------- TAG + +def create_tag(db: Session, tag: str): + db_tag = db.query(models.Tag).filter(models.Tag.id==tag).first() + if not db_tag: + db_tag = models.Tag(id=tag) + db.add(db_tag) + db.commit() + db.refresh(db_tag) + return db_tag + +def search_tags(db: Session, tag:str, skip: int = 0, limit: int = 100): + return db.query(models.Tag).filter(models.Tag.url.like(f'%{tag}%')).offset(skip).limit(limit).all() + + +def get_group_for_user(db:Session, group_name:str, email:str)->models.Group: + return db.query(models.association_table_user_groups).filter_by(user_id=email, group_id=group_name).first() + +def get_user_groups(db: Session, email:str): + groups = db.query(models.association_table_user_groups).filter_by(user_id=email).with_entities(Column("group_id")).all() + return [g[0] for g in groups] + + +## --------------- INIT User-Groups + @cache def get_group(db:Session, group_name:str)->models.Group: db_group = db.query(models.Group).filter(models.Group.id==group_name).first() diff --git a/src/db/models.py b/src/db/models.py index 59c61f5..9cddc7f 100644 --- a/src/db/models.py +++ b/src/db/models.py @@ -4,25 +4,8 @@ from sqlalchemy.orm import relationship import uuid from .database import Base - -# class Task(Base): -# __tablename__ = "tasks" - -# id = Column(String, primary_key=True, index=True) -# url = Column(String, index=True) -# author = Column(String, index=True) -# result = Column(JSON, default=None) -# created_at = Column(DateTime(timezone=True), server_default=func.now()) -# # updated_at = Column(DateTime(timezone=True), onupdate=func.now()) -# deleted = Column(Boolean, default=False) - - -# # items = relationship("Item", back_populates="owner") -# # tags = relationship("Tag", back_populates="owner") - def generate_uuid(): return str(uuid.uuid4()) -### new data model below # many to many association tables association_table_archive_tags = Table( @@ -45,23 +28,33 @@ class Archive(Base): id = Column(String, primary_key=True, index=True) url = Column(String, index=True) result = Column(JSON, default=None) + public = Column(Boolean, default=True) # if public=false, access to group and author + deleted = Column(Boolean, default=False) created_at = Column(DateTime(timezone=True), server_default=func.now()) updated_at = Column(DateTime(timezone=True), onupdate=func.now()) - deleted = Column(Boolean, default=False) - public = Column(Boolean, default=True) # if public=false, access to group and author group_id = Column(String, ForeignKey("groups.id"), default=None) author_id = Column(String, ForeignKey("users.email")) - group = relationship("Group", back_populates="archives") tags = relationship("Tag", back_populates="archives", secondary=association_table_archive_tags) + group = relationship("Group", back_populates="archives") author = relationship("User", back_populates="archives") + urls = relationship("ArchiveUrl", back_populates="archive") + +class ArchiveUrl(Base): + __tablename__ = "archive_urls" + + url = Column(String, primary_key=True, index=True) + key = Column(String, default=None) + archive_id = Column(String, ForeignKey("archives.id")) + + archive = relationship("Archive", back_populates="urls") + class Tag(Base): __tablename__ = "tags" - id = Column(String, primary_key=True, index=True, default=generate_uuid) - name = Column(String, unique=True, index=True) + id = Column(String, primary_key=True, index=True) created_at = Column(DateTime(timezone=True), server_default=func.now()) archives = relationship("Archive", back_populates="tags", secondary=association_table_archive_tags) diff --git a/src/db/schemas.py b/src/db/schemas.py index 03c5d32..255b46d 100644 --- a/src/db/schemas.py +++ b/src/db/schemas.py @@ -1,16 +1,31 @@ from pydantic import BaseModel from datetime import datetime -class TaskCreate(BaseModel): - id: str +class ArchiveCreate(BaseModel): + id: str | None = None url: str - author: str - result: dict + result: dict | None = None + public: bool = True + author_id: str | None = None + group_id: str | None = None + tags: list = [] + # urls: list = [] -class Task(TaskCreate): + +class Archive(ArchiveCreate): created_at: datetime + updated_at: datetime | None deleted: bool class Config: - orm_mode = True \ No newline at end of file + orm_mode = True + + +# class TagCreate(BaseModel): +# id: str + +# class Tag(TagCreate): +# created_at: datetime +# # class Config: +# # orm_mode = True \ No newline at end of file diff --git a/src/main.py b/src/main.py index f650e85..8b7fc53 100644 --- a/src/main.py +++ b/src/main.py @@ -15,14 +15,15 @@ from worker import create_archive_task, celery from db import crud, models, schemas from db.database import engine, SessionLocal from sqlalchemy.orm import Session -from security import get_bearer_auth, get_basic_auth +from security import get_bearer_auth, get_basic_auth, bearer_security load_dotenv() # Configuration ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "chrome-extension://ondkcheoicfckabcnkdgbepofpjmjcmb,chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp").split(",") -VERSION = "0.2.0" - +VERSION = "0.3.1" +# min-version refers to the version of auto-archiver-extension on the webstore +BREAKING_CHANGES = {"minVersion": "0.3.0", "message": "The latest update has breaking changes, please update the extension to the most recent version."} app = FastAPI() app.add_middleware( @@ -41,8 +42,16 @@ def get_db(): @app.get("/") -def home(): return JSONResponse({"version": VERSION}) - +async def home(request: Request): + status = {"version": VERSION, "breakingChanges": BREAKING_CHANGES} + try: + # if authenticated will load available groups + email = await get_bearer_auth(await bearer_security(request)) + db: Session = next(get_db()) + status["groups"] = crud.get_user_groups(db, email) + except HTTPException: pass + except Exception as e: logger.error(e) + return JSONResponse(status) # logging configurations logger.add("logs/api_logs.log", retention="30 days", rotation="3 days") @@ -55,36 +64,59 @@ async def logging_middleware(request: Request, call_next): # Bearer protected below -@app.get("/tasks/search-url", response_model=list[schemas.Task]) -def search(url:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)): +@app.get("/groups", response_model=list[str]) +def get_user_groups(db: Session = Depends(get_db), email = Depends(get_bearer_auth)): + return crud.get_user_groups(db, email) + +@app.get("/tasks/search-url", response_model=list[schemas.Archive]) +def search(url:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db), _email = Depends(get_bearer_auth)): return crud.search_tasks_by_url(db, url, skip=skip, limit=limit) # @app.get("/tasks/search", response_model=list[schemas.Task]) # def search(skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)): # return crud.get_tasks(db, skip=skip, limit=limit) -@app.get("/tasks/sync", response_model=list[schemas.Task]) +@app.get("/tasks/sync", response_model=list[schemas.Archive]) def search(skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)): return crud.search_tasks_by_email(db, email, skip=skip, limit=limit) @app.post("/tasks", status_code=201) -def run_task(payload = Body(...), email = Depends(get_bearer_auth)): - url = payload.get('url') - logger.info(f"new task for user {email}: {url}") +def run_task(archive:schemas.ArchiveCreate, email = Depends(get_bearer_auth)): + archive.author_id = email + url = archive.url + logger.warning(archive) + logger.info(f"new {archive.public=} task for {email=} and {archive.group_id=}: {url}") if type(url)!=str or len(url)<=5: raise HTTPException(status_code=422, detail=f"Invalid URL received: {url}") - task = create_archive_task.delay(url=payload.get('url'), email=email) + logger.info("creating task") + task = create_archive_task.delay(archive.json()) return JSONResponse({"id": task.id}) +# @app.post("/tasks", status_code=201) +# def run_task(payload = Body(...), email = Depends(get_bearer_auth)): +# url = payload.get('url') +# public = payload.get('public', True) +# group = payload.get('group', None) +# logger.info(f"new {public=} task for {email=} and {group=}: {url}") +# if type(url)!=str or len(url)<=5: +# raise HTTPException(status_code=422, detail=f"Invalid URL received: {url}") +# task = create_archive_task.delay(url=payload.get('url'), email=email, public=public, group=group) +# return JSONResponse({"id": task.id}) + @app.get("/tasks/{task_id}") def get_status(task_id, email = Depends(get_bearer_auth)): logger.info(f"status check for user {email}") task_result = AsyncResult(task_id, app=celery) + logger.info(task_result) result = { "id": task_id, "status": task_result.status, "result": task_result.result } + try: + if task_result.result and "error" in task_result.result: + result["status"] = "FAILURE" + except Exception as e: logger.error(traceback.format_exc()) try: json_result = jsonable_encoder(result, exclude_unset=True) return JSONResponse(json_result) @@ -94,6 +126,7 @@ def get_status(task_id, email = Depends(get_bearer_auth)): return JSONResponse({ "id": task_id, "status": "FAILURE", + "result": {"error": e} }) diff --git a/src/requirements.txt b/src/requirements.txt index 4695367..f283eeb 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -7,19 +7,19 @@ # -i https://pypi.org/simple aiofiles==0.6.0 -aiosqlite==0.18.0 -alembic==1.9.4 +aiosqlite==0.19.0 +alembic==1.10.3 amqp==2.6.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' anyio==3.6.2; python_full_version >= '3.6.2' argparse==1.4.0 async-generator==1.10; python_version >= '3.5' -attrs==22.2.0; python_version >= '3.6' +attrs==23.1.0; python_version >= '3.7' authlib==0.15.6 -auto-archiver==0.4.3 -beautifulsoup4==4.11.2; python_version >= '3.6' +auto-archiver==0.5.6 +beautifulsoup4==4.12.2; python_version >= '3.6' billiard==3.6.4.0 -boto3==1.26.79; python_version >= '3.7' -botocore==1.29.79; python_version >= '3.7' +boto3==1.26.115; python_version >= '3.7' +botocore==1.29.115; python_version >= '3.7' brotli==1.0.9; platform_python_implementation == 'CPython' bs4==0.0.1 cachetools==5.3.0; python_version ~= '3.7' @@ -31,24 +31,25 @@ click==8.1.3; python_version >= '3.7' cloudscraper==1.2.69 cryptography==38.0.4; python_version >= '3.6' dataclasses-json==0.5.7; python_version >= '3.6' -dateparser==1.1.7; python_version >= '3.7' -exceptiongroup==1.1.0; python_version < '3.11' -fastapi==0.92.0 +dateparser==1.1.8; python_version >= '3.7' +exceptiongroup==1.1.1; python_version < '3.11' +fastapi-utils==0.2.1 +fastapi==0.95.1 ffmpeg-python==0.2.0 -filelock==3.9.0; python_version >= '3.7' +filelock==3.12.0; python_version >= '3.7' flask==2.2.3; python_version >= '3.7' flower==0.9.7 future==0.18.3; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3' google-api-core==2.11.0; python_version >= '3.7' -google-api-python-client==2.79.0; python_version >= '3.7' +google-api-python-client==2.86.0; python_version >= '3.7' google-auth-httplib2==0.1.0 google-auth-oauthlib==1.0.0; python_version >= '3.6' -google-auth==2.16.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' -googleapis-common-protos==1.58.0; python_version >= '3.7' -greenlet==2.0.2; platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32'))))) -gspread==5.7.2; python_version not in '3.0, 3.1, 3.2, 3.3' and python_version >= '3.6' +google-auth==2.17.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' +googleapis-common-protos==1.59.0; python_version >= '3.7' +greenlet==2.0.2; python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32'))))) +gspread==5.8.0; python_version not in '3.0, 3.1, 3.2, 3.3' and python_version >= '3.6' h11==0.14.0; python_version >= '3.7' -httplib2==0.21.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +httplib2==0.22.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' humanize==4.6.0; python_version >= '3.7' idna==3.4; python_version >= '3.5' iniconfig==2.0.0; python_version >= '3.7' @@ -57,7 +58,7 @@ itsdangerous==2.1.2; python_version >= '3.7' jinja2==3.1.2 jmespath==1.0.1; python_version >= '3.7' kombu==4.6.11; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' -loguru==0.6.0 +loguru==0.7.0 lxml==4.9.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' mako==1.2.4; python_version >= '3.7' markdown-it-py==2.2.0; python_version >= '3.7' @@ -70,59 +71,59 @@ mypy-extensions==1.0.0; python_version >= '3.5' oauth2client==4.1.3 oauthlib==3.2.2; python_version >= '3.6' outcome==1.2.0; python_version >= '3.7' -packaging==23.0; python_version >= '3.7' +packaging==23.1; python_version >= '3.7' pluggy==0.13.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' prometheus-client==0.8.0 -protobuf==4.22.0; python_version >= '3.7' +protobuf==4.22.3; python_version >= '3.7' py==1.11.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' pyaes==1.6.1 pyasn1-modules==0.2.8 pyasn1==0.4.8 pycparser==2.21 pycryptodomex==3.17; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' -pydantic==1.10.5; python_version >= '3.7' -pygments==2.14.0; python_version >= '3.6' +pydantic==1.10.7; python_version >= '3.7' +pygments==2.15.0; python_version >= '3.7' pyparsing==3.0.9; python_version >= '3.1' pysocks==1.7.1 pytest==6.2.4 python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' python-dotenv==1.0.0 python-slugify==8.0.1; python_version >= '3.7' -python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4.0' +python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4' pytz-deprecation-shim==0.1.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' -pytz==2022.7.1 +pytz==2023.3 pyyaml==6.0; python_version >= '3.6' redis==3.5.3 -regex==2022.10.31; python_version >= '3.6' +regex==2023.3.23; python_version >= '3.8' requests-oauthlib==1.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' requests-toolbelt==0.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' requests==2.28.2 -rich==13.3.1; python_version >= '3.7' -rsa==4.9; python_version >= '3.6' and python_version < '4.0' +rich==13.3.4; python_version >= '3.7' +rsa==4.9; python_version >= '3.6' and python_version < '4' s3transfer==0.6.0; python_version >= '3.7' -selenium==4.8.2; python_version >= '3.7' +selenium==4.8.3; python_version >= '3.7' six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' sniffio==1.3.0; python_version >= '3.7' -snscrape==0.5.0.20230113; python_version ~= '3.8' +snscrape==0.6.2.20230320; python_version ~= '3.8' sortedcontainers==2.4.0 -soupsieve==2.4; python_version >= '3.7' -sqlalchemy==2.0.4 -starlette==0.25.0; python_version >= '3.7' -telethon==1.27.0; python_version >= '3.5' +soupsieve==2.4.1; python_version >= '3.7' +sqlalchemy==1.4.47 +starlette==0.26.1; python_version >= '3.7' +telethon==1.28.5; python_version >= '3.5' text-unidecode==1.3 tiktok-downloader==0.3.4 toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3' -tornado==6.2; python_full_version >= '3.5.2' -tqdm==4.64.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' -trio-websocket==0.9.2; python_version >= '3.5' +tornado==6.3; python_full_version >= '3.5.2' +tqdm==4.65.0; python_version >= '3.7' +trio-websocket==0.10.2; python_version >= '3.7' trio==0.22.0; python_version >= '3.7' typing-extensions==4.5.0; python_version >= '3.7' typing-inspect==0.8.0 -tzdata==2022.7; python_version >= '3.6' -tzlocal==4.2; python_version >= '3.6' +tzdata==2023.3; python_version >= '3.6' +tzlocal==4.3; python_version >= '3.7' uritemplate==4.1.1; python_version >= '3.6' urllib3==1.26.14; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' -uvicorn==0.20.0 +uvicorn==0.21.1 vine==1.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' vk-api==11.9.9 vk-url-scraper==0.3.15; python_version >= '3.7' diff --git a/src/worker.py b/src/worker.py index 08fbae4..1b5b081 100644 --- a/src/worker.py +++ b/src/worker.py @@ -1,12 +1,14 @@ -import os, re +import os, re, traceback, yaml -from celery import Celery +from celery import Celery, states +from celery.exceptions import Ignore +from celery.signals import task_failure from auto_archiver import Config, ArchivingOrchestrator, Metadata # from auto_archiver.enrichers import ScreenshotEnricher from loguru import logger -from db import crud, schemas +from db import crud, schemas, models from db.database import engine, SessionLocal from contextlib import contextmanager import json @@ -14,6 +16,8 @@ import json celery = Celery(__name__) celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") +USER_GROUPS_FILENAME=os.environ.get("USER_GROUPS_FILENAME", "user-groups.yaml") + @contextmanager def get_db(): @@ -21,34 +25,89 @@ def get_db(): try: yield session finally: session.close() -config_default = Config() -config_default.parse(use_cli=False, yaml_config_filename=os.environ.get("ORCHESTRATION_CONFIG_DEFAULT", "secrets/orchestration.yaml")) +@celery.task(name="create_archive_task", bind=True, autoretry_for=(Exception,), retry_backoff=True, retry_kwargs={'max_retries': 5}) +def create_archive_task(self, archive_json: str): + + archive = schemas.ArchiveCreate.parse_raw(archive_json) + if not archive.public and archive.group_id and len(archive.group_id) > 0: + # ensure group is valid for user + with get_db() as session: + db_group = crud.get_group_for_user(session, archive.group_id, archive.author_id) + if not db_group: + logger.error(em := f"User {archive.author_id} is not part of {archive.group_id}, no permission") + return {"error": em} -config_bcat = None -if (config_bcat_file := os.environ.get("ORCHESTRATION_CONFIG_BELLINGCAT")): - config_bcat = Config() - config_bcat.parse(use_cli=False, yaml_config_filename=config_bcat_file) + url = archive.url + logger.info(f"{url=}") + logger.info(f"{archive=}") + orchestrator = choose_orchestrator(archive.group_id, archive.author_id) + result = orchestrator.feed_item(Metadata().set_url(url)) + if not result: + logger.error(f"UNABLE TO archive: {url}") + return {"error": "unable to archive"} -orchestrators = {"bellingcat": None, "default": None} - -@celery.task(name="create_archive_task", bind=True, autoretry_for=(Exception,), retry_backoff=True, retry_kwargs={'max_retries': 5}) -def create_archive_task(self, url: str, email:str=""): - orchestrator = choose_orchestrator(email) - result = orchestrator.feed_item(Metadata().set_url(url)).to_json() + result_json = result.to_json() with get_db() as session: - db_task = crud.create_task(session, task=schemas.TaskCreate(id=self.request.id, url=url, author=email, result=json.loads(result))) + # create DB URLs + db_urls = [models.ArchiveUrl(url=url, key=m.get("id", f"media_{i}")) for i, m in enumerate(result.media) for url in m.urls] + # create DB TAGs if needed + db_tags = [crud.create_tag(session, tag) for tag in archive.tags] + # insert archive + db_task = crud.create_task(session, task=schemas.ArchiveCreate(id=self.request.id, url=url, result=json.loads(result_json), public=archive.public, author_id=archive.author_id, group_id=archive.group_id), tags=db_tags, urls=db_urls) logger.debug(f"Added {db_task.id=} to database on {db_task.created_at}") - return result + return result_json + +@task_failure.connect(sender=create_archive_task) +def task_failure_notifier(sender=None, **kwargs): + logger.warning("😅 From task_failure_notifier ==> Task failed successfully! ") + logger.error(kwargs['exception']) + logger.error(kwargs['traceback']) + logger.error("\n".join(traceback.format_list(traceback.extract_tb(kwargs['traceback'])))) + +def choose_orchestrator(group, email): + global ORCHESTRATORS + if group not in ORCHESTRATORS: group = get_user_first_group(email) + assert group in ORCHESTRATORS, f"{group=} not in configurations" + logger.info(f"CHOOSE Orchestrator for {group=}, {email=}") + return ArchivingOrchestrator(ORCHESTRATORS.get(group)) + +def read_user_groups(): + # read yaml safely + with open(USER_GROUPS_FILENAME) as inf: + try: + return yaml.safe_load(inf) + except yaml.YAMLError as e: + logger.error(f"could not open user groups filename {USER_GROUPS_FILENAME}: {e}") + raise e + +def get_user_first_group(email): + user_groups_yaml = read_user_groups() + groups = user_groups_yaml.get("users", {}).get(email, []) + if len(groups): return groups[0] + return "default" -def choose_orchestrator(email): - global orchestrators, config_bcat - if re.match(r'^[\w.]+@bellingcat\.com$', email) and config_bcat: - logger.debug("Using bellingcat config for orchestration") - if not orchestrators["bellingcat"]: - orchestrators["bellingcat"] = ArchivingOrchestrator(config_bcat) - return orchestrators["bellingcat"] - logger.debug("Using default config for orchestration") - if not orchestrators["default"]: - orchestrators["default"] = ArchivingOrchestrator(config_default) - return orchestrators["default"] \ No newline at end of file +def load_orchestrators(): + global ORCHESTRATORS + ORCHESTRATORS = {} + """ + reads the orchestrators key in the config file to load different orchestrators for different groups + """ + user_groups_yaml = read_user_groups() + + orchestrators_config = user_groups_yaml.get("orchestrators", {}) + assert len(orchestrators_config), f"No orchestrators key found in {USER_GROUPS_FILENAME}. please see the example file" + assert "default" in orchestrators_config, "please include a 'default' orchestrator to be used when the user has no group" + logger.debug(f"Found {len(orchestrators_config)} group orchestrators.") + + for group, config_filename in orchestrators_config.items(): + config = Config() + config.parse(use_cli=False, yaml_config_filename=config_filename) + ORCHESTRATORS[group] = config + return ORCHESTRATORS + + +## INIT + +ORCHESTRATORS = {} +load_orchestrators() \ No newline at end of file