new api mvp

This commit is contained in:
msramalho
2023-04-26 23:40:42 +01:00
parent 6c8283f517
commit 4119387feb
9 changed files with 370 additions and 209 deletions

View File

@@ -13,6 +13,7 @@ cd /src
* console 1 - `docker compose up redis` optionally add `dashboard` for flower dashboard and `web` if not running uvicorn locally
* console 2 - `pipenv shell` + `celery worker --app=worker.celery --loglevel=info --logfile=logs/celery_dev.log`
* `celery --app=worker.celery worker --loglevel=info --logfile=logs/celery_dev.log` celery 5
* or with watchdog for dev auto-reload `watchmedo auto-restart -d ./ -- celery --app=worker.celery worker --loglevel=info --logfile=logs/celery_dev.log`
* console 3 - `pipenv shell` + `uvicorn main:app --host 0.0.0.0 --reload`
orchestration must be from the console(?)
* turn off VPNs if connection to docker is not working

View File

@@ -22,6 +22,7 @@ alembic = "*"
fastapi-utils = "*"
[dev-packages]
watchdog = "*"
[requires]
python_version = "3.10"

218
src/Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "8fbf19b1fc2cc928f83c9e52f4b6522b02ddedee2fc863b0434ee3567d928c8f"
"sha256": "85ba632faeb7f00b42dfefbbe50a9a8348610a1297fdf70017991c9c2cb7980d"
},
"pipfile-spec": 6,
"requires": {
@@ -26,11 +26,11 @@
},
"aiosqlite": {
"hashes": [
"sha256:c3511b841e3a2c5614900ba1d179f366826857586f78abd75e7cbeb88e75a557",
"sha256:faa843ef5fb08bafe9a9b3859012d3d9d6f77ce3637899de20606b7fc39aa213"
"sha256:95ee77b91c8d2808bd08a59fbebf66270e9090c3d92ffbf260dc0db0b979577d",
"sha256:edba222e03453e094a3ce605db1b970c4b3376264e56f32e2a4959f948d66a96"
],
"index": "pypi",
"version": "==0.18.0"
"version": "==0.19.0"
},
"alembic": {
"hashes": [
@@ -73,11 +73,11 @@
},
"attrs": {
"hashes": [
"sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
"sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
"sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04",
"sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"
],
"markers": "python_version >= '3.6'",
"version": "==22.2.0"
"markers": "python_version >= '3.7'",
"version": "==23.1.0"
},
"authlib": {
"hashes": [
@@ -88,11 +88,11 @@
},
"auto-archiver": {
"hashes": [
"sha256:033451db9789296cebbd00bca5446541b0a4e86bc9ae0d65a8bedf8b53015044",
"sha256:b1a0fa870b0d7a0b2a0b259e6be92a3493aa2793cb04eee071263a7debcb7074"
"sha256:850361f29e57719fa4661fb1184993f1b03da165a15b9144729665f00a832e31",
"sha256:df0e0034b974e0c51549616831b70f1e0a035e154bd39457a43701a6ee3b3fd7"
],
"index": "pypi",
"version": "==0.5.5"
"version": "==0.5.7"
},
"beautifulsoup4": {
"hashes": [
@@ -111,19 +111,19 @@
},
"boto3": {
"hashes": [
"sha256:03c2e1ddd29d993a6ab9b8a8fe184027957fc32bd405c496ad0c30311445925f",
"sha256:4ea3319bba2e8ff7cd9560259ae64f073c7fb6312158aa375777687231cabe69"
"sha256:2272a060005bf8299f7342cbf1344304eb44b7060cddba6784f676e3bc737bb8",
"sha256:deb53ad15ff0e75ae0be6d7115a2d34e4bafb0541484485f0feb61dabdfb5513"
],
"markers": "python_full_version >= '3.7.0'",
"version": "==1.26.112"
"markers": "python_version >= '3.7'",
"version": "==1.26.115"
},
"botocore": {
"hashes": [
"sha256:1f52d9371d7b5ee30a53dcef7954c3cf22e04b131cfab5268035f3299ccde9e1",
"sha256:2cbaddb09b46dcb0a05490724d51acb224d3a8df433c347f995b4d78bfb02c8a"
"sha256:58eee8cf8f4f3e515df29f6dc535dd86ed3f4cea40999c5bc74640ff40bdc71f",
"sha256:dff327977d7c9f98f2dc54b51b8f70326952dd50ae23b885fdfa8bfeec014b76"
],
"markers": "python_full_version >= '3.7.0'",
"version": "==1.29.112"
"markers": "python_version >= '3.7'",
"version": "==1.29.115"
},
"brotli": {
"hashes": [
@@ -410,7 +410,7 @@
"sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e",
"sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==8.1.3"
},
"cloudscraper": {
@@ -465,7 +465,7 @@
"sha256:070b29b5bbf4b1ec2cd51c96ea040dc68a614de703910a91ad1abba18f9f379f",
"sha256:86b8b7517efcc558f085a142cdb7620f0921543fcabdb538c8a4c4001d8178e3"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.1.8"
},
"exceptiongroup": {
@@ -478,11 +478,11 @@
},
"fastapi": {
"hashes": [
"sha256:99d4fdb10e9dd9a24027ac1d0bd4b56702652056ca17a6c8721eec4ad2f14e18",
"sha256:daf73bbe844180200be7966f68e8ec9fd8be57079dff1bacb366db32729e6eb5"
"sha256:9569f0a381f8a457ec479d90fa01005cfddaae07546eb1f3fa035bc4797ae7d5",
"sha256:a870d443e5405982e1667dfe372663abf10754f246866056336d7f01c21dab07"
],
"index": "pypi",
"version": "==0.95.0"
"version": "==0.95.1"
},
"fastapi-utils": {
"hashes": [
@@ -501,18 +501,18 @@
},
"filelock": {
"hashes": [
"sha256:3618c0da67adcc0506b015fd11ef7faf1b493f0b40d87728e19986b536890c37",
"sha256:f08a52314748335c6460fc8fe40cd5638b85001225db78c2aa01c8c0db83b318"
"sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9",
"sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"
],
"markers": "python_full_version >= '3.7.0'",
"version": "==3.11.0"
"markers": "python_version >= '3.7'",
"version": "==3.12.0"
},
"flask": {
"hashes": [
"sha256:7eb373984bf1c770023fce9db164ed0c3353cd0b53f130f4693da0ca756a2e6d",
"sha256:c0bec9477df1cb867e5a67c9e1ab758de9cb4a3e52dd70681f59fa40a62b3f2d"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==2.2.3"
},
"flower": {
@@ -535,16 +535,16 @@
"sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22",
"sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==2.11.0"
},
"google-api-python-client": {
"hashes": [
"sha256:07b21ef21a542dd69cd7c09817a6079b2769cc2a791981402e8f0fcdb2d47f90",
"sha256:baf3c6f9b1679d89fcb88c29941a8b04b9a815d721880786baecc6a7f5bd376f"
"sha256:0f320190ab9d5bd2fdb0cb894e8e53bb5e17d4888ee8dc4d26ba65ce378409e2",
"sha256:3ca4e93821f4e9ac29b91ab0d9df168b42c8ad0fb8bff65b8c2ccb2d462b0464"
],
"markers": "python_full_version >= '3.7.0'",
"version": "==2.85.0"
"markers": "python_version >= '3.7'",
"version": "==2.86.0"
},
"google-auth": {
"hashes": [
@@ -574,7 +574,7 @@
"sha256:4168fcb568a826a52f23510412da405abd93f4d23ba544bb68d943b14ba3cb44",
"sha256:b287dc48449d1d41af0c69f4ea26242b5ae4c3d7249a38b0984c86a4caffff1f"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.59.0"
},
"greenlet": {
@@ -656,7 +656,7 @@
"sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d",
"sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==0.14.0"
},
"httplib2": {
@@ -672,7 +672,7 @@
"sha256:401201aca462749773f02920139f302450cb548b70489b9b4b92be39fe3c3c50",
"sha256:5f1f22bc65911eb1a6ffe7659bd6598e33dcfeeb904eb16ee1e705a09bf75916"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==4.6.0"
},
"idna": {
@@ -688,7 +688,7 @@
"sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3",
"sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==2.0.0"
},
"instaloader": {
@@ -703,7 +703,7 @@
"sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44",
"sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==2.1.2"
},
"jinja2": {
@@ -719,7 +719,7 @@
"sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980",
"sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.0.1"
},
"kombu": {
@@ -826,7 +826,7 @@
"sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818",
"sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.2.4"
},
"markdown-it-py": {
@@ -834,7 +834,7 @@
"sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30",
"sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==2.2.0"
},
"markupsafe": {
@@ -890,7 +890,7 @@
"sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6",
"sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==2.1.2"
},
"marshmallow": {
@@ -898,7 +898,7 @@
"sha256:90032c0fd650ce94b6ec6dc8dfeb0e3ff50c144586462c389b81a07205bedb78",
"sha256:93f0958568da045b0021ec6aeb7ac37c81bfcccbb9a0e7ed8559885070b3a19b"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==3.19.0"
},
"marshmallow-enum": {
@@ -913,7 +913,7 @@
"sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8",
"sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==0.1.2"
},
"mutagen": {
@@ -921,7 +921,7 @@
"sha256:6e5f8ba84836b99fe60be5fb27f84be4ad919bbb6b49caa6ae81e70584b55e58",
"sha256:8af0728aa2d5c3ee5a727e28d0627966641fddfe804c23eabb5926a4d770aed5"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.46.0"
},
"mypy-extensions": {
@@ -952,7 +952,7 @@
"sha256:6f82bd3de45da303cf1f771ecafa1633750a358436a8bb60e06a1ceb745d2672",
"sha256:c4ab89a56575d6d38a05aa16daeaa333109c1f96167aba8901ab18b6b5e0f7f5"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.2.0"
},
"packaging": {
@@ -960,7 +960,7 @@
"sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
"sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==23.1"
},
"pluggy": {
@@ -994,7 +994,7 @@
"sha256:f08aa300b67f1c012100d8eb62d47129e53d1150f4469fd78a29fa3cb68c66f2",
"sha256:f2f4710543abec186aee332d6852ef5ae7ce2e9e807a3da570f36de5a732d88e"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==4.22.3"
},
"py": {
@@ -1132,16 +1132,16 @@
"sha256:ecbbc51391248116c0a055899e6c3e7ffbb11fb5e2a4cd6f2d0b93272118a209",
"sha256:f4a2b50e2b03d5776e7f21af73e2070e1b5c0d0df255a827e7c632962f8315af"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.10.7"
},
"pygments": {
"hashes": [
"sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094",
"sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500"
"sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c",
"sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"
],
"markers": "python_full_version >= '3.7.0'",
"version": "==2.15.0"
"markers": "python_version >= '3.7'",
"version": "==2.15.1"
},
"pyparsing": {
"hashes": [
@@ -1188,7 +1188,7 @@
"sha256:70ca6ea68fe63ecc8fa4fcf00ae651fc8a5d02d93dcd12ae6d4fc7ca46c4d395",
"sha256:ce0d46ddb668b3be82f4ed5e503dbc33dd815d83e2eb6824211310d3fb172a27"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==8.0.1"
},
"python-twitter-v2": {
@@ -1196,7 +1196,7 @@
"sha256:2397d518c17bfbc16a3d414b1cf6d3c231fd8d322f21c755ac2215c9ee675537",
"sha256:4e03a30b2570fa4f17fbc7293d850fb8276c66be106d55e460b9287de37e1dd2"
],
"markers": "python_version >= '3.6' and python_version < '4.0'",
"markers": "python_version >= '3.6' and python_version < '4'",
"version": "==0.8.1"
},
"pytz": {
@@ -1363,7 +1363,7 @@
"sha256:22b74cae0278fd5086ff44144d3813be1cedc9115bdfabbfefd86400cb88b20a",
"sha256:b5d573e13605423ec80bdd0cd5f8541f7844a0e71a13f74cf454ccb2f490708b"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==13.3.4"
},
"rsa": {
@@ -1371,7 +1371,7 @@
"sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7",
"sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"
],
"markers": "python_version >= '3.6' and python_version < '4.0'",
"markers": "python_version >= '3.6' and python_version < '4'",
"version": "==4.9"
},
"s3transfer": {
@@ -1379,7 +1379,7 @@
"sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd",
"sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==0.6.0"
},
"selenium": {
@@ -1387,7 +1387,7 @@
"sha256:28430ac54a54fa59ad1f5392a1b89b169fe3ab2c2ccd1a9a10b6fe74f36cd6da",
"sha256:61cda3a304f82637162bc155cae7bf88fdb04c115fa2cb1c1c2e1358fcd19a9f"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==4.8.3"
},
"six": {
@@ -1403,7 +1403,7 @@
"sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101",
"sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.3.0"
},
"snscrape": {
@@ -1423,11 +1423,11 @@
},
"soupsieve": {
"hashes": [
"sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955",
"sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a"
"sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8",
"sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea"
],
"markers": "python_full_version >= '3.7.0'",
"version": "==2.4"
"markers": "python_version >= '3.7'",
"version": "==2.4.1"
},
"sqlalchemy": {
"hashes": [
@@ -1481,16 +1481,16 @@
"sha256:41da799057ea8620e4667a3e69a5b1923ebd32b1819c8fa75634bbe8d8bea9bd",
"sha256:e87fce5d7cbdde34b76f0ac69013fd9d190d581d80681493016666e6f96c6d5e"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==0.26.1"
},
"telethon": {
"hashes": [
"sha256:613bae42acb5f2eeb1a0b92614e323021c66f374db62adf9826ea0c2c9120bb1",
"sha256:893c10f133974fba4c53eb1736b6514d596d1cd94c83436a711f3345df945199"
"sha256:b3990ec22351a3f3e1af376729c985025bbdd3bdabdde8c156112c3d3dfe1941",
"sha256:edc42fd58b8e1569830d3ead564cafa60fd51d684f03ee2a1fdd5f77a5a10438"
],
"markers": "python_version >= '3.5'",
"version": "==1.28.2"
"version": "==1.28.5"
},
"text-unidecode": {
"hashes": [
@@ -1515,27 +1515,27 @@
},
"tornado": {
"hashes": [
"sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca",
"sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72",
"sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23",
"sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8",
"sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b",
"sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9",
"sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13",
"sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75",
"sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac",
"sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e",
"sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b"
"sha256:4546003dc8b5733489139d3bff5fa6a0211be505faf819bd9970e7c2b32e8122",
"sha256:4d349846931557b7ec92f224b5d598b160e2ba26ae1812480b42e9622c884bf7",
"sha256:6164571f5b9f73143d1334df4584cb9ac86d20c461e17b6c189a19ead8bb93c1",
"sha256:6cfff1e9c15c79e106b8352269d201f8fc0815914a6260f3893ca18b724ea94b",
"sha256:720f53e6367b38190ae7fa398c25c086c69d88b3c6535bd6021a126b727fb5cd",
"sha256:912df5712024564e362ecce43c8d5862e14c78c8dd3846c9d889d44fbd7f4951",
"sha256:c37b6a384d54ce6a31168d40ab21ad2591ddaf34973075cc0cad154402ecd9e8",
"sha256:c659ab04d5aa477dbe44152c67d93f3ad3243b992d94f795ca1d5c73c37337ce",
"sha256:c9114a61a4588c09065b9996ae05462350d17160b92b9bf9a1e93689cc0424dc",
"sha256:d68f3192936ff2c4add04dc21a436a43b4408d466746b78bb2b9d0a53a18683f",
"sha256:d7b737e18f701de3e4a3b0824260b4d740e4d60607b8089bb80e80ffd464780e"
],
"markers": "python_full_version >= '3.5.2'",
"version": "==6.2"
"version": "==6.3"
},
"tqdm": {
"hashes": [
"sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5",
"sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==4.65.0"
},
"trio": {
@@ -1543,7 +1543,7 @@
"sha256:ce68f1c5400a47b137c5a4de72c7c901bd4e7a24fbdebfe9b41de8c6c04eaacf",
"sha256:f1dd0780a89bfc880c7c7994519cb53f62aacb2c25ff487001c0052bd721cdf0"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==0.22.0"
},
"trio-websocket": {
@@ -1551,7 +1551,7 @@
"sha256:0908435e4eecc49d830ae1c4d6c47b978a75f00594a2be2104d58b61a04cdb53",
"sha256:af13e9393f9051111300287947ec595d601758ce3d165328e7d36325135a8d62"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==0.10.2"
},
"typing-extensions": {
@@ -1559,7 +1559,7 @@
"sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb",
"sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==4.5.0"
},
"typing-inspect": {
@@ -1582,7 +1582,7 @@
"sha256:3f21d09e1b2aa9f2dacca12da240ca37de3ba5237a93addfd6d593afe9073355",
"sha256:b44c4388f3d34f25862cfbb387578a4d70fec417649da694a132f628a23367e2"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==4.3"
},
"uritemplate": {
@@ -1629,7 +1629,7 @@
"sha256:5a32fb5419f7bb8bd35de8548948fe27a06f857a4d086c87e142bf07aabc3fd7",
"sha256:a87c5aa7c1570c3aa87031e78c2052105e3681f57503fd4cb56470c3ab6106d6"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==0.3.15"
},
"websockets": {
@@ -1704,7 +1704,7 @@
"sha256:fe10ddc59b304cb19a1bdf5bd0a7719cbbc9fbdd57ac80ed436b709fcf889106",
"sha256:ff64a1d38d156d429404aaa84b27305e957fd10c30e5880d1765c9480bea490f"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==10.4"
},
"werkzeug": {
@@ -1712,7 +1712,7 @@
"sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe",
"sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==2.2.3"
},
"wsproto": {
@@ -1720,7 +1720,7 @@
"sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065",
"sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==1.2.0"
},
"yt-dlp": {
@@ -1728,9 +1728,43 @@
"sha256:3b2df037c80922f0f83f63ee2f9253496b4a8668c0fe8d2a836ba9040f853b07",
"sha256:9af92de5effc193bdb51216d9ebf28874d96180d202fae752b0d9f2a63380f3a"
],
"markers": "python_full_version >= '3.7.0'",
"markers": "python_version >= '3.7'",
"version": "==2023.2.17"
}
},
"develop": {}
"develop": {
"watchdog": {
"hashes": [
"sha256:0e06ab8858a76e1219e68c7573dfeba9dd1c0219476c5a44d5333b01d7e1743a",
"sha256:13bbbb462ee42ec3c5723e1205be8ced776f05b100e4737518c67c8325cf6100",
"sha256:233b5817932685d39a7896b1090353fc8efc1ef99c9c054e46c8002561252fb8",
"sha256:25f70b4aa53bd743729c7475d7ec41093a580528b100e9a8c5b5efe8899592fc",
"sha256:2b57a1e730af3156d13b7fdddfc23dea6487fceca29fc75c5a868beed29177ae",
"sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41",
"sha256:3aa7f6a12e831ddfe78cdd4f8996af9cf334fd6346531b16cec61c3b3c0d8da0",
"sha256:3ed7c71a9dccfe838c2f0b6314ed0d9b22e77d268c67e015450a29036a81f60f",
"sha256:4c9956d27be0bb08fc5f30d9d0179a855436e655f046d288e2bcc11adfae893c",
"sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9",
"sha256:4f94069eb16657d2c6faada4624c39464f65c05606af50bb7902e036e3219be3",
"sha256:5113334cf8cf0ac8cd45e1f8309a603291b614191c9add34d33075727a967709",
"sha256:51f90f73b4697bac9c9a78394c3acbbd331ccd3655c11be1a15ae6fe289a8c83",
"sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759",
"sha256:7ade88d0d778b1b222adebcc0927428f883db07017618a5e684fd03b83342bd9",
"sha256:7c5f84b5194c24dd573fa6472685b2a27cc5a17fe5f7b6fd40345378ca6812e3",
"sha256:7e447d172af52ad204d19982739aa2346245cc5ba6f579d16dac4bfec226d2e7",
"sha256:8ae9cda41fa114e28faf86cb137d751a17ffd0316d1c34ccf2235e8a84365c7f",
"sha256:8f3ceecd20d71067c7fd4c9e832d4e22584318983cabc013dbf3f70ea95de346",
"sha256:9fac43a7466eb73e64a9940ac9ed6369baa39b3bf221ae23493a9ec4d0022674",
"sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397",
"sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96",
"sha256:ba07e92756c97e3aca0912b5cbc4e5ad802f4557212788e72a72a47ff376950d",
"sha256:c07253088265c363d1ddf4b3cdb808d59a0468ecd017770ed716991620b8f77a",
"sha256:c9d8c8ec7efb887333cf71e328e39cffbf771d8f8f95d308ea4125bf5f90ba64",
"sha256:d00e6be486affb5781468457b21a6cbe848c33ef43f9ea4a73b4882e5f188a44",
"sha256:d429c2430c93b7903914e4db9a966c7f2b068dd2ebdd2fa9b9ce094c7d459f33"
],
"index": "pypi",
"version": "==3.0.0"
}
}
}

View File

@@ -1,9 +1,11 @@
from functools import cache
from sqlalchemy.orm import Session, load_only
from sqlalchemy import Column
from loguru import logger
from . import models, schemas
import yaml
## --------------- TASK = Archive
def get_task(db: Session, task_id: str):
return base_query(db).filter(models.Archive.id == task_id).first()
@@ -17,22 +19,19 @@ def search_tasks_by_url(db: Session, url:str, skip: int = 0, limit: int = 100):
def search_tasks_by_email(db: Session, email:str, skip: int = 0, limit: int = 100):
return base_query(db).filter(models.Archive.author.has(email=email)).offset(skip).limit(limit).all()
def create_task(db: Session, task: schemas.TaskCreate):
db_task = models.Archive(id=task.id, url=task.url, author=task.author, result=task.result)
def create_task(db: Session, task: schemas.ArchiveCreate, tags:list[models.Tag],urls:list[models.ArchiveUrl]):
db_task = models.Archive(id=task.id, url=task.url, author_id=task.author_id, result=task.result, group_id=task.group_id)
logger.debug(tags)
db_task.tags = tags # will this work? TODO: test if I don't call create tag before
db_task.urls = urls # will this work to create ArchiveUrl? TODO: test
db.add(db_task)
db.commit()
db.refresh(db_task)
return db_task
# def delete_task(db: Session, task_id: str, email:str)->bool:
# db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email).first()
# if db_task:
# db.delete(db_task)
# db.commit()
# return db_task is not None
def soft_delete_task(db: Session, task_id: str, email:str)->bool:
db_task = db.query(models.Archive).filter(models.Archive.id == task_id, models.Archive.author==email, models.Archive.deleted==False).first()
# TODO: implement hard-delete with cronjob that deletes from S3
db_task = db.query(models.Archive).filter(models.Archive.id == task_id, models.Archive.author_id==email, models.Archive.deleted==False).first()
if db_task:
db_task.deleted = True
db.commit()
@@ -44,6 +43,31 @@ def base_query(db:Session):
.options(load_only(models.Archive.id, models.Archive.created_at, models.Archive.url, models.Archive.result))\
.filter(models.Archive.deleted == False)
## --------------- TAG
def create_tag(db: Session, tag: str):
db_tag = db.query(models.Tag).filter(models.Tag.id==tag).first()
if not db_tag:
db_tag = models.Tag(id=tag)
db.add(db_tag)
db.commit()
db.refresh(db_tag)
return db_tag
def search_tags(db: Session, tag:str, skip: int = 0, limit: int = 100):
return db.query(models.Tag).filter(models.Tag.url.like(f'%{tag}%')).offset(skip).limit(limit).all()
def get_group_for_user(db:Session, group_name:str, email:str)->models.Group:
return db.query(models.association_table_user_groups).filter_by(user_id=email, group_id=group_name).first()
def get_user_groups(db: Session, email:str):
groups = db.query(models.association_table_user_groups).filter_by(user_id=email).with_entities(Column("group_id")).all()
return [g[0] for g in groups]
## --------------- INIT User-Groups
@cache
def get_group(db:Session, group_name:str)->models.Group:
db_group = db.query(models.Group).filter(models.Group.id==group_name).first()

View File

@@ -4,25 +4,8 @@ from sqlalchemy.orm import relationship
import uuid
from .database import Base
# class Task(Base):
# __tablename__ = "tasks"
# id = Column(String, primary_key=True, index=True)
# url = Column(String, index=True)
# author = Column(String, index=True)
# result = Column(JSON, default=None)
# created_at = Column(DateTime(timezone=True), server_default=func.now())
# # updated_at = Column(DateTime(timezone=True), onupdate=func.now())
# deleted = Column(Boolean, default=False)
# # items = relationship("Item", back_populates="owner")
# # tags = relationship("Tag", back_populates="owner")
def generate_uuid():
return str(uuid.uuid4())
### new data model below
# many to many association tables
association_table_archive_tags = Table(
@@ -45,23 +28,33 @@ class Archive(Base):
id = Column(String, primary_key=True, index=True)
url = Column(String, index=True)
result = Column(JSON, default=None)
public = Column(Boolean, default=True) # if public=false, access to group and author
deleted = Column(Boolean, default=False)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
deleted = Column(Boolean, default=False)
public = Column(Boolean, default=True) # if public=false, access to group and author
group_id = Column(String, ForeignKey("groups.id"), default=None)
author_id = Column(String, ForeignKey("users.email"))
group = relationship("Group", back_populates="archives")
tags = relationship("Tag", back_populates="archives", secondary=association_table_archive_tags)
group = relationship("Group", back_populates="archives")
author = relationship("User", back_populates="archives")
urls = relationship("ArchiveUrl", back_populates="archive")
class ArchiveUrl(Base):
__tablename__ = "archive_urls"
url = Column(String, primary_key=True, index=True)
key = Column(String, default=None)
archive_id = Column(String, ForeignKey("archives.id"))
archive = relationship("Archive", back_populates="urls")
class Tag(Base):
__tablename__ = "tags"
id = Column(String, primary_key=True, index=True, default=generate_uuid)
name = Column(String, unique=True, index=True)
id = Column(String, primary_key=True, index=True)
created_at = Column(DateTime(timezone=True), server_default=func.now())
archives = relationship("Archive", back_populates="tags", secondary=association_table_archive_tags)

View File

@@ -1,16 +1,31 @@
from pydantic import BaseModel
from datetime import datetime
class TaskCreate(BaseModel):
id: str
class ArchiveCreate(BaseModel):
id: str | None = None
url: str
author: str
result: dict
result: dict | None = None
public: bool = True
author_id: str | None = None
group_id: str | None = None
tags: list = []
# urls: list = []
class Task(TaskCreate):
class Archive(ArchiveCreate):
created_at: datetime
updated_at: datetime | None
deleted: bool
class Config:
orm_mode = True
orm_mode = True
# class TagCreate(BaseModel):
# id: str
# class Tag(TagCreate):
# created_at: datetime
# # class Config:
# # orm_mode = True

View File

@@ -15,14 +15,15 @@ from worker import create_archive_task, celery
from db import crud, models, schemas
from db.database import engine, SessionLocal
from sqlalchemy.orm import Session
from security import get_bearer_auth, get_basic_auth
from security import get_bearer_auth, get_basic_auth, bearer_security
load_dotenv()
# Configuration
ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "chrome-extension://ondkcheoicfckabcnkdgbepofpjmjcmb,chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp").split(",")
VERSION = "0.2.0"
VERSION = "0.3.1"
# min-version refers to the version of auto-archiver-extension on the webstore
BREAKING_CHANGES = {"minVersion": "0.3.0", "message": "The latest update has breaking changes, please update the extension to the most recent version."}
app = FastAPI()
app.add_middleware(
@@ -41,8 +42,16 @@ def get_db():
@app.get("/")
def home(): return JSONResponse({"version": VERSION})
async def home(request: Request):
status = {"version": VERSION, "breakingChanges": BREAKING_CHANGES}
try:
# if authenticated will load available groups
email = await get_bearer_auth(await bearer_security(request))
db: Session = next(get_db())
status["groups"] = crud.get_user_groups(db, email)
except HTTPException: pass
except Exception as e: logger.error(e)
return JSONResponse(status)
# logging configurations
logger.add("logs/api_logs.log", retention="30 days", rotation="3 days")
@@ -55,36 +64,59 @@ async def logging_middleware(request: Request, call_next):
# Bearer protected below
@app.get("/tasks/search-url", response_model=list[schemas.Task])
def search(url:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
@app.get("/groups", response_model=list[str])
def get_user_groups(db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
return crud.get_user_groups(db, email)
@app.get("/tasks/search-url", response_model=list[schemas.Archive])
def search(url:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db), _email = Depends(get_bearer_auth)):
return crud.search_tasks_by_url(db, url, skip=skip, limit=limit)
# @app.get("/tasks/search", response_model=list[schemas.Task])
# def search(skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
# return crud.get_tasks(db, skip=skip, limit=limit)
@app.get("/tasks/sync", response_model=list[schemas.Task])
@app.get("/tasks/sync", response_model=list[schemas.Archive])
def search(skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
return crud.search_tasks_by_email(db, email, skip=skip, limit=limit)
@app.post("/tasks", status_code=201)
def run_task(payload = Body(...), email = Depends(get_bearer_auth)):
url = payload.get('url')
logger.info(f"new task for user {email}: {url}")
def run_task(archive:schemas.ArchiveCreate, email = Depends(get_bearer_auth)):
archive.author_id = email
url = archive.url
logger.warning(archive)
logger.info(f"new {archive.public=} task for {email=} and {archive.group_id=}: {url}")
if type(url)!=str or len(url)<=5:
raise HTTPException(status_code=422, detail=f"Invalid URL received: {url}")
task = create_archive_task.delay(url=payload.get('url'), email=email)
logger.info("creating task")
task = create_archive_task.delay(archive.json())
return JSONResponse({"id": task.id})
# @app.post("/tasks", status_code=201)
# def run_task(payload = Body(...), email = Depends(get_bearer_auth)):
# url = payload.get('url')
# public = payload.get('public', True)
# group = payload.get('group', None)
# logger.info(f"new {public=} task for {email=} and {group=}: {url}")
# if type(url)!=str or len(url)<=5:
# raise HTTPException(status_code=422, detail=f"Invalid URL received: {url}")
# task = create_archive_task.delay(url=payload.get('url'), email=email, public=public, group=group)
# return JSONResponse({"id": task.id})
@app.get("/tasks/{task_id}")
def get_status(task_id, email = Depends(get_bearer_auth)):
logger.info(f"status check for user {email}")
task_result = AsyncResult(task_id, app=celery)
logger.info(task_result)
result = {
"id": task_id,
"status": task_result.status,
"result": task_result.result
}
try:
if task_result.result and "error" in task_result.result:
result["status"] = "FAILURE"
except Exception as e: logger.error(traceback.format_exc())
try:
json_result = jsonable_encoder(result, exclude_unset=True)
return JSONResponse(json_result)
@@ -94,6 +126,7 @@ def get_status(task_id, email = Depends(get_bearer_auth)):
return JSONResponse({
"id": task_id,
"status": "FAILURE",
"result": {"error": e}
})

View File

@@ -7,19 +7,19 @@
# -i https://pypi.org/simple
aiofiles==0.6.0
aiosqlite==0.18.0
alembic==1.9.4
aiosqlite==0.19.0
alembic==1.10.3
amqp==2.6.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
anyio==3.6.2; python_full_version >= '3.6.2'
argparse==1.4.0
async-generator==1.10; python_version >= '3.5'
attrs==22.2.0; python_version >= '3.6'
attrs==23.1.0; python_version >= '3.7'
authlib==0.15.6
auto-archiver==0.4.3
beautifulsoup4==4.11.2; python_version >= '3.6'
auto-archiver==0.5.6
beautifulsoup4==4.12.2; python_version >= '3.6'
billiard==3.6.4.0
boto3==1.26.79; python_version >= '3.7'
botocore==1.29.79; python_version >= '3.7'
boto3==1.26.115; python_version >= '3.7'
botocore==1.29.115; python_version >= '3.7'
brotli==1.0.9; platform_python_implementation == 'CPython'
bs4==0.0.1
cachetools==5.3.0; python_version ~= '3.7'
@@ -31,24 +31,25 @@ click==8.1.3; python_version >= '3.7'
cloudscraper==1.2.69
cryptography==38.0.4; python_version >= '3.6'
dataclasses-json==0.5.7; python_version >= '3.6'
dateparser==1.1.7; python_version >= '3.7'
exceptiongroup==1.1.0; python_version < '3.11'
fastapi==0.92.0
dateparser==1.1.8; python_version >= '3.7'
exceptiongroup==1.1.1; python_version < '3.11'
fastapi-utils==0.2.1
fastapi==0.95.1
ffmpeg-python==0.2.0
filelock==3.9.0; python_version >= '3.7'
filelock==3.12.0; python_version >= '3.7'
flask==2.2.3; python_version >= '3.7'
flower==0.9.7
future==0.18.3; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
google-api-core==2.11.0; python_version >= '3.7'
google-api-python-client==2.79.0; python_version >= '3.7'
google-api-python-client==2.86.0; python_version >= '3.7'
google-auth-httplib2==0.1.0
google-auth-oauthlib==1.0.0; python_version >= '3.6'
google-auth==2.16.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
googleapis-common-protos==1.58.0; python_version >= '3.7'
greenlet==2.0.2; platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))
gspread==5.7.2; python_version not in '3.0, 3.1, 3.2, 3.3' and python_version >= '3.6'
google-auth==2.17.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
googleapis-common-protos==1.59.0; python_version >= '3.7'
greenlet==2.0.2; python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))
gspread==5.8.0; python_version not in '3.0, 3.1, 3.2, 3.3' and python_version >= '3.6'
h11==0.14.0; python_version >= '3.7'
httplib2==0.21.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
httplib2==0.22.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
humanize==4.6.0; python_version >= '3.7'
idna==3.4; python_version >= '3.5'
iniconfig==2.0.0; python_version >= '3.7'
@@ -57,7 +58,7 @@ itsdangerous==2.1.2; python_version >= '3.7'
jinja2==3.1.2
jmespath==1.0.1; python_version >= '3.7'
kombu==4.6.11; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
loguru==0.6.0
loguru==0.7.0
lxml==4.9.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
mako==1.2.4; python_version >= '3.7'
markdown-it-py==2.2.0; python_version >= '3.7'
@@ -70,59 +71,59 @@ mypy-extensions==1.0.0; python_version >= '3.5'
oauth2client==4.1.3
oauthlib==3.2.2; python_version >= '3.6'
outcome==1.2.0; python_version >= '3.7'
packaging==23.0; python_version >= '3.7'
packaging==23.1; python_version >= '3.7'
pluggy==0.13.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
prometheus-client==0.8.0
protobuf==4.22.0; python_version >= '3.7'
protobuf==4.22.3; python_version >= '3.7'
py==1.11.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
pyaes==1.6.1
pyasn1-modules==0.2.8
pyasn1==0.4.8
pycparser==2.21
pycryptodomex==3.17; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
pydantic==1.10.5; python_version >= '3.7'
pygments==2.14.0; python_version >= '3.6'
pydantic==1.10.7; python_version >= '3.7'
pygments==2.15.0; python_version >= '3.7'
pyparsing==3.0.9; python_version >= '3.1'
pysocks==1.7.1
pytest==6.2.4
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
python-dotenv==1.0.0
python-slugify==8.0.1; python_version >= '3.7'
python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4.0'
python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4'
pytz-deprecation-shim==0.1.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
pytz==2022.7.1
pytz==2023.3
pyyaml==6.0; python_version >= '3.6'
redis==3.5.3
regex==2022.10.31; python_version >= '3.6'
regex==2023.3.23; python_version >= '3.8'
requests-oauthlib==1.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
requests-toolbelt==0.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
requests==2.28.2
rich==13.3.1; python_version >= '3.7'
rsa==4.9; python_version >= '3.6' and python_version < '4.0'
rich==13.3.4; python_version >= '3.7'
rsa==4.9; python_version >= '3.6' and python_version < '4'
s3transfer==0.6.0; python_version >= '3.7'
selenium==4.8.2; python_version >= '3.7'
selenium==4.8.3; python_version >= '3.7'
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
sniffio==1.3.0; python_version >= '3.7'
snscrape==0.5.0.20230113; python_version ~= '3.8'
snscrape==0.6.2.20230320; python_version ~= '3.8'
sortedcontainers==2.4.0
soupsieve==2.4; python_version >= '3.7'
sqlalchemy==2.0.4
starlette==0.25.0; python_version >= '3.7'
telethon==1.27.0; python_version >= '3.5'
soupsieve==2.4.1; python_version >= '3.7'
sqlalchemy==1.4.47
starlette==0.26.1; python_version >= '3.7'
telethon==1.28.5; python_version >= '3.5'
text-unidecode==1.3
tiktok-downloader==0.3.4
toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
tornado==6.2; python_full_version >= '3.5.2'
tqdm==4.64.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
trio-websocket==0.9.2; python_version >= '3.5'
tornado==6.3; python_full_version >= '3.5.2'
tqdm==4.65.0; python_version >= '3.7'
trio-websocket==0.10.2; python_version >= '3.7'
trio==0.22.0; python_version >= '3.7'
typing-extensions==4.5.0; python_version >= '3.7'
typing-inspect==0.8.0
tzdata==2022.7; python_version >= '3.6'
tzlocal==4.2; python_version >= '3.6'
tzdata==2023.3; python_version >= '3.6'
tzlocal==4.3; python_version >= '3.7'
uritemplate==4.1.1; python_version >= '3.6'
urllib3==1.26.14; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
uvicorn==0.20.0
uvicorn==0.21.1
vine==1.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
vk-api==11.9.9
vk-url-scraper==0.3.15; python_version >= '3.7'

View File

@@ -1,12 +1,14 @@
import os, re
import os, re, traceback, yaml
from celery import Celery
from celery import Celery, states
from celery.exceptions import Ignore
from celery.signals import task_failure
from auto_archiver import Config, ArchivingOrchestrator, Metadata
# from auto_archiver.enrichers import ScreenshotEnricher
from loguru import logger
from db import crud, schemas
from db import crud, schemas, models
from db.database import engine, SessionLocal
from contextlib import contextmanager
import json
@@ -14,6 +16,8 @@ import json
celery = Celery(__name__)
celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379")
celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379")
USER_GROUPS_FILENAME=os.environ.get("USER_GROUPS_FILENAME", "user-groups.yaml")
@contextmanager
def get_db():
@@ -21,34 +25,89 @@ def get_db():
try: yield session
finally: session.close()
config_default = Config()
config_default.parse(use_cli=False, yaml_config_filename=os.environ.get("ORCHESTRATION_CONFIG_DEFAULT", "secrets/orchestration.yaml"))
@celery.task(name="create_archive_task", bind=True, autoretry_for=(Exception,), retry_backoff=True, retry_kwargs={'max_retries': 5})
def create_archive_task(self, archive_json: str):
archive = schemas.ArchiveCreate.parse_raw(archive_json)
if not archive.public and archive.group_id and len(archive.group_id) > 0:
# ensure group is valid for user
with get_db() as session:
db_group = crud.get_group_for_user(session, archive.group_id, archive.author_id)
if not db_group:
logger.error(em := f"User {archive.author_id} is not part of {archive.group_id}, no permission")
return {"error": em}
config_bcat = None
if (config_bcat_file := os.environ.get("ORCHESTRATION_CONFIG_BELLINGCAT")):
config_bcat = Config()
config_bcat.parse(use_cli=False, yaml_config_filename=config_bcat_file)
url = archive.url
logger.info(f"{url=}")
logger.info(f"{archive=}")
orchestrator = choose_orchestrator(archive.group_id, archive.author_id)
result = orchestrator.feed_item(Metadata().set_url(url))
if not result:
logger.error(f"UNABLE TO archive: {url}")
return {"error": "unable to archive"}
orchestrators = {"bellingcat": None, "default": None}
@celery.task(name="create_archive_task", bind=True, autoretry_for=(Exception,), retry_backoff=True, retry_kwargs={'max_retries': 5})
def create_archive_task(self, url: str, email:str=""):
orchestrator = choose_orchestrator(email)
result = orchestrator.feed_item(Metadata().set_url(url)).to_json()
result_json = result.to_json()
with get_db() as session:
db_task = crud.create_task(session, task=schemas.TaskCreate(id=self.request.id, url=url, author=email, result=json.loads(result)))
# create DB URLs
db_urls = [models.ArchiveUrl(url=url, key=m.get("id", f"media_{i}")) for i, m in enumerate(result.media) for url in m.urls]
# create DB TAGs if needed
db_tags = [crud.create_tag(session, tag) for tag in archive.tags]
# insert archive
db_task = crud.create_task(session, task=schemas.ArchiveCreate(id=self.request.id, url=url, result=json.loads(result_json), public=archive.public, author_id=archive.author_id, group_id=archive.group_id), tags=db_tags, urls=db_urls)
logger.debug(f"Added {db_task.id=} to database on {db_task.created_at}")
return result
return result_json
@task_failure.connect(sender=create_archive_task)
def task_failure_notifier(sender=None, **kwargs):
logger.warning("😅 From task_failure_notifier ==> Task failed successfully! ")
logger.error(kwargs['exception'])
logger.error(kwargs['traceback'])
logger.error("\n".join(traceback.format_list(traceback.extract_tb(kwargs['traceback']))))
def choose_orchestrator(group, email):
global ORCHESTRATORS
if group not in ORCHESTRATORS: group = get_user_first_group(email)
assert group in ORCHESTRATORS, f"{group=} not in configurations"
logger.info(f"CHOOSE Orchestrator for {group=}, {email=}")
return ArchivingOrchestrator(ORCHESTRATORS.get(group))
def read_user_groups():
# read yaml safely
with open(USER_GROUPS_FILENAME) as inf:
try:
return yaml.safe_load(inf)
except yaml.YAMLError as e:
logger.error(f"could not open user groups filename {USER_GROUPS_FILENAME}: {e}")
raise e
def get_user_first_group(email):
user_groups_yaml = read_user_groups()
groups = user_groups_yaml.get("users", {}).get(email, [])
if len(groups): return groups[0]
return "default"
def choose_orchestrator(email):
global orchestrators, config_bcat
if re.match(r'^[\w.]+@bellingcat\.com$', email) and config_bcat:
logger.debug("Using bellingcat config for orchestration")
if not orchestrators["bellingcat"]:
orchestrators["bellingcat"] = ArchivingOrchestrator(config_bcat)
return orchestrators["bellingcat"]
logger.debug("Using default config for orchestration")
if not orchestrators["default"]:
orchestrators["default"] = ArchivingOrchestrator(config_default)
return orchestrators["default"]
def load_orchestrators():
global ORCHESTRATORS
ORCHESTRATORS = {}
"""
reads the orchestrators key in the config file to load different orchestrators for different groups
"""
user_groups_yaml = read_user_groups()
orchestrators_config = user_groups_yaml.get("orchestrators", {})
assert len(orchestrators_config), f"No orchestrators key found in {USER_GROUPS_FILENAME}. please see the example file"
assert "default" in orchestrators_config, "please include a 'default' orchestrator to be used when the user has no group"
logger.debug(f"Found {len(orchestrators_config)} group orchestrators.")
for group, config_filename in orchestrators_config.items():
config = Config()
config.parse(use_cli=False, yaml_config_filename=config_filename)
ORCHESTRATORS[group] = config
return ORCHESTRATORS
## INIT
ORCHESTRATORS = {}
load_orchestrators()