mirror of
https://github.com/bellingcat/auto-archiver-api.git
synced 2026-06-08 03:28:35 +03:00
new api mvp
This commit is contained in:
@@ -13,6 +13,7 @@ cd /src
|
||||
* console 1 - `docker compose up redis` optionally add `dashboard` for flower dashboard and `web` if not running uvicorn locally
|
||||
* console 2 - `pipenv shell` + `celery worker --app=worker.celery --loglevel=info --logfile=logs/celery_dev.log`
|
||||
* `celery --app=worker.celery worker --loglevel=info --logfile=logs/celery_dev.log` celery 5
|
||||
* or with watchdog for dev auto-reload `watchmedo auto-restart -d ./ -- celery --app=worker.celery worker --loglevel=info --logfile=logs/celery_dev.log`
|
||||
* console 3 - `pipenv shell` + `uvicorn main:app --host 0.0.0.0 --reload`
|
||||
orchestration must be from the console(?)
|
||||
* turn off VPNs if connection to docker is not working
|
||||
|
||||
@@ -22,6 +22,7 @@ alembic = "*"
|
||||
fastapi-utils = "*"
|
||||
|
||||
[dev-packages]
|
||||
watchdog = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.10"
|
||||
|
||||
218
src/Pipfile.lock
generated
218
src/Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "8fbf19b1fc2cc928f83c9e52f4b6522b02ddedee2fc863b0434ee3567d928c8f"
|
||||
"sha256": "85ba632faeb7f00b42dfefbbe50a9a8348610a1297fdf70017991c9c2cb7980d"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@@ -26,11 +26,11 @@
|
||||
},
|
||||
"aiosqlite": {
|
||||
"hashes": [
|
||||
"sha256:c3511b841e3a2c5614900ba1d179f366826857586f78abd75e7cbeb88e75a557",
|
||||
"sha256:faa843ef5fb08bafe9a9b3859012d3d9d6f77ce3637899de20606b7fc39aa213"
|
||||
"sha256:95ee77b91c8d2808bd08a59fbebf66270e9090c3d92ffbf260dc0db0b979577d",
|
||||
"sha256:edba222e03453e094a3ce605db1b970c4b3376264e56f32e2a4959f948d66a96"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.18.0"
|
||||
"version": "==0.19.0"
|
||||
},
|
||||
"alembic": {
|
||||
"hashes": [
|
||||
@@ -73,11 +73,11 @@
|
||||
},
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836",
|
||||
"sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"
|
||||
"sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04",
|
||||
"sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==22.2.0"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==23.1.0"
|
||||
},
|
||||
"authlib": {
|
||||
"hashes": [
|
||||
@@ -88,11 +88,11 @@
|
||||
},
|
||||
"auto-archiver": {
|
||||
"hashes": [
|
||||
"sha256:033451db9789296cebbd00bca5446541b0a4e86bc9ae0d65a8bedf8b53015044",
|
||||
"sha256:b1a0fa870b0d7a0b2a0b259e6be92a3493aa2793cb04eee071263a7debcb7074"
|
||||
"sha256:850361f29e57719fa4661fb1184993f1b03da165a15b9144729665f00a832e31",
|
||||
"sha256:df0e0034b974e0c51549616831b70f1e0a035e154bd39457a43701a6ee3b3fd7"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.5.5"
|
||||
"version": "==0.5.7"
|
||||
},
|
||||
"beautifulsoup4": {
|
||||
"hashes": [
|
||||
@@ -111,19 +111,19 @@
|
||||
},
|
||||
"boto3": {
|
||||
"hashes": [
|
||||
"sha256:03c2e1ddd29d993a6ab9b8a8fe184027957fc32bd405c496ad0c30311445925f",
|
||||
"sha256:4ea3319bba2e8ff7cd9560259ae64f073c7fb6312158aa375777687231cabe69"
|
||||
"sha256:2272a060005bf8299f7342cbf1344304eb44b7060cddba6784f676e3bc737bb8",
|
||||
"sha256:deb53ad15ff0e75ae0be6d7115a2d34e4bafb0541484485f0feb61dabdfb5513"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"version": "==1.26.112"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.26.115"
|
||||
},
|
||||
"botocore": {
|
||||
"hashes": [
|
||||
"sha256:1f52d9371d7b5ee30a53dcef7954c3cf22e04b131cfab5268035f3299ccde9e1",
|
||||
"sha256:2cbaddb09b46dcb0a05490724d51acb224d3a8df433c347f995b4d78bfb02c8a"
|
||||
"sha256:58eee8cf8f4f3e515df29f6dc535dd86ed3f4cea40999c5bc74640ff40bdc71f",
|
||||
"sha256:dff327977d7c9f98f2dc54b51b8f70326952dd50ae23b885fdfa8bfeec014b76"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"version": "==1.29.112"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.29.115"
|
||||
},
|
||||
"brotli": {
|
||||
"hashes": [
|
||||
@@ -410,7 +410,7 @@
|
||||
"sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e",
|
||||
"sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==8.1.3"
|
||||
},
|
||||
"cloudscraper": {
|
||||
@@ -465,7 +465,7 @@
|
||||
"sha256:070b29b5bbf4b1ec2cd51c96ea040dc68a614de703910a91ad1abba18f9f379f",
|
||||
"sha256:86b8b7517efcc558f085a142cdb7620f0921543fcabdb538c8a4c4001d8178e3"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.1.8"
|
||||
},
|
||||
"exceptiongroup": {
|
||||
@@ -478,11 +478,11 @@
|
||||
},
|
||||
"fastapi": {
|
||||
"hashes": [
|
||||
"sha256:99d4fdb10e9dd9a24027ac1d0bd4b56702652056ca17a6c8721eec4ad2f14e18",
|
||||
"sha256:daf73bbe844180200be7966f68e8ec9fd8be57079dff1bacb366db32729e6eb5"
|
||||
"sha256:9569f0a381f8a457ec479d90fa01005cfddaae07546eb1f3fa035bc4797ae7d5",
|
||||
"sha256:a870d443e5405982e1667dfe372663abf10754f246866056336d7f01c21dab07"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.95.0"
|
||||
"version": "==0.95.1"
|
||||
},
|
||||
"fastapi-utils": {
|
||||
"hashes": [
|
||||
@@ -501,18 +501,18 @@
|
||||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
"sha256:3618c0da67adcc0506b015fd11ef7faf1b493f0b40d87728e19986b536890c37",
|
||||
"sha256:f08a52314748335c6460fc8fe40cd5638b85001225db78c2aa01c8c0db83b318"
|
||||
"sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9",
|
||||
"sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"version": "==3.11.0"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.12.0"
|
||||
},
|
||||
"flask": {
|
||||
"hashes": [
|
||||
"sha256:7eb373984bf1c770023fce9db164ed0c3353cd0b53f130f4693da0ca756a2e6d",
|
||||
"sha256:c0bec9477df1cb867e5a67c9e1ab758de9cb4a3e52dd70681f59fa40a62b3f2d"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.2.3"
|
||||
},
|
||||
"flower": {
|
||||
@@ -535,16 +535,16 @@
|
||||
"sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22",
|
||||
"sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.11.0"
|
||||
},
|
||||
"google-api-python-client": {
|
||||
"hashes": [
|
||||
"sha256:07b21ef21a542dd69cd7c09817a6079b2769cc2a791981402e8f0fcdb2d47f90",
|
||||
"sha256:baf3c6f9b1679d89fcb88c29941a8b04b9a815d721880786baecc6a7f5bd376f"
|
||||
"sha256:0f320190ab9d5bd2fdb0cb894e8e53bb5e17d4888ee8dc4d26ba65ce378409e2",
|
||||
"sha256:3ca4e93821f4e9ac29b91ab0d9df168b42c8ad0fb8bff65b8c2ccb2d462b0464"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"version": "==2.85.0"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.86.0"
|
||||
},
|
||||
"google-auth": {
|
||||
"hashes": [
|
||||
@@ -574,7 +574,7 @@
|
||||
"sha256:4168fcb568a826a52f23510412da405abd93f4d23ba544bb68d943b14ba3cb44",
|
||||
"sha256:b287dc48449d1d41af0c69f4ea26242b5ae4c3d7249a38b0984c86a4caffff1f"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.59.0"
|
||||
},
|
||||
"greenlet": {
|
||||
@@ -656,7 +656,7 @@
|
||||
"sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d",
|
||||
"sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.14.0"
|
||||
},
|
||||
"httplib2": {
|
||||
@@ -672,7 +672,7 @@
|
||||
"sha256:401201aca462749773f02920139f302450cb548b70489b9b4b92be39fe3c3c50",
|
||||
"sha256:5f1f22bc65911eb1a6ffe7659bd6598e33dcfeeb904eb16ee1e705a09bf75916"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.6.0"
|
||||
},
|
||||
"idna": {
|
||||
@@ -688,7 +688,7 @@
|
||||
"sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3",
|
||||
"sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.0.0"
|
||||
},
|
||||
"instaloader": {
|
||||
@@ -703,7 +703,7 @@
|
||||
"sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44",
|
||||
"sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.1.2"
|
||||
},
|
||||
"jinja2": {
|
||||
@@ -719,7 +719,7 @@
|
||||
"sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980",
|
||||
"sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.0.1"
|
||||
},
|
||||
"kombu": {
|
||||
@@ -826,7 +826,7 @@
|
||||
"sha256:c97c79c018b9165ac9922ae4f32da095ffd3c4e6872b45eded42926deea46818",
|
||||
"sha256:d60a3903dc3bb01a18ad6a89cdbe2e4eadc69c0bc8ef1e3773ba53d44c3f7a34"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.2.4"
|
||||
},
|
||||
"markdown-it-py": {
|
||||
@@ -834,7 +834,7 @@
|
||||
"sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30",
|
||||
"sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.2.0"
|
||||
},
|
||||
"markupsafe": {
|
||||
@@ -890,7 +890,7 @@
|
||||
"sha256:f2bfb563d0211ce16b63c7cb9395d2c682a23187f54c3d79bfec33e6705473c6",
|
||||
"sha256:f8ffb705ffcf5ddd0e80b65ddf7bed7ee4f5a441ea7d3419e861a12eaf41af58"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.1.2"
|
||||
},
|
||||
"marshmallow": {
|
||||
@@ -898,7 +898,7 @@
|
||||
"sha256:90032c0fd650ce94b6ec6dc8dfeb0e3ff50c144586462c389b81a07205bedb78",
|
||||
"sha256:93f0958568da045b0021ec6aeb7ac37c81bfcccbb9a0e7ed8559885070b3a19b"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.19.0"
|
||||
},
|
||||
"marshmallow-enum": {
|
||||
@@ -913,7 +913,7 @@
|
||||
"sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8",
|
||||
"sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.1.2"
|
||||
},
|
||||
"mutagen": {
|
||||
@@ -921,7 +921,7 @@
|
||||
"sha256:6e5f8ba84836b99fe60be5fb27f84be4ad919bbb6b49caa6ae81e70584b55e58",
|
||||
"sha256:8af0728aa2d5c3ee5a727e28d0627966641fddfe804c23eabb5926a4d770aed5"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.46.0"
|
||||
},
|
||||
"mypy-extensions": {
|
||||
@@ -952,7 +952,7 @@
|
||||
"sha256:6f82bd3de45da303cf1f771ecafa1633750a358436a8bb60e06a1ceb745d2672",
|
||||
"sha256:c4ab89a56575d6d38a05aa16daeaa333109c1f96167aba8901ab18b6b5e0f7f5"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.2.0"
|
||||
},
|
||||
"packaging": {
|
||||
@@ -960,7 +960,7 @@
|
||||
"sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61",
|
||||
"sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==23.1"
|
||||
},
|
||||
"pluggy": {
|
||||
@@ -994,7 +994,7 @@
|
||||
"sha256:f08aa300b67f1c012100d8eb62d47129e53d1150f4469fd78a29fa3cb68c66f2",
|
||||
"sha256:f2f4710543abec186aee332d6852ef5ae7ce2e9e807a3da570f36de5a732d88e"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.22.3"
|
||||
},
|
||||
"py": {
|
||||
@@ -1132,16 +1132,16 @@
|
||||
"sha256:ecbbc51391248116c0a055899e6c3e7ffbb11fb5e2a4cd6f2d0b93272118a209",
|
||||
"sha256:f4a2b50e2b03d5776e7f21af73e2070e1b5c0d0df255a827e7c632962f8315af"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.10.7"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094",
|
||||
"sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500"
|
||||
"sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c",
|
||||
"sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"version": "==2.15.0"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.15.1"
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
@@ -1188,7 +1188,7 @@
|
||||
"sha256:70ca6ea68fe63ecc8fa4fcf00ae651fc8a5d02d93dcd12ae6d4fc7ca46c4d395",
|
||||
"sha256:ce0d46ddb668b3be82f4ed5e503dbc33dd815d83e2eb6824211310d3fb172a27"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==8.0.1"
|
||||
},
|
||||
"python-twitter-v2": {
|
||||
@@ -1196,7 +1196,7 @@
|
||||
"sha256:2397d518c17bfbc16a3d414b1cf6d3c231fd8d322f21c755ac2215c9ee675537",
|
||||
"sha256:4e03a30b2570fa4f17fbc7293d850fb8276c66be106d55e460b9287de37e1dd2"
|
||||
],
|
||||
"markers": "python_version >= '3.6' and python_version < '4.0'",
|
||||
"markers": "python_version >= '3.6' and python_version < '4'",
|
||||
"version": "==0.8.1"
|
||||
},
|
||||
"pytz": {
|
||||
@@ -1363,7 +1363,7 @@
|
||||
"sha256:22b74cae0278fd5086ff44144d3813be1cedc9115bdfabbfefd86400cb88b20a",
|
||||
"sha256:b5d573e13605423ec80bdd0cd5f8541f7844a0e71a13f74cf454ccb2f490708b"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==13.3.4"
|
||||
},
|
||||
"rsa": {
|
||||
@@ -1371,7 +1371,7 @@
|
||||
"sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7",
|
||||
"sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"
|
||||
],
|
||||
"markers": "python_version >= '3.6' and python_version < '4.0'",
|
||||
"markers": "python_version >= '3.6' and python_version < '4'",
|
||||
"version": "==4.9"
|
||||
},
|
||||
"s3transfer": {
|
||||
@@ -1379,7 +1379,7 @@
|
||||
"sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd",
|
||||
"sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.6.0"
|
||||
},
|
||||
"selenium": {
|
||||
@@ -1387,7 +1387,7 @@
|
||||
"sha256:28430ac54a54fa59ad1f5392a1b89b169fe3ab2c2ccd1a9a10b6fe74f36cd6da",
|
||||
"sha256:61cda3a304f82637162bc155cae7bf88fdb04c115fa2cb1c1c2e1358fcd19a9f"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.8.3"
|
||||
},
|
||||
"six": {
|
||||
@@ -1403,7 +1403,7 @@
|
||||
"sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101",
|
||||
"sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.3.0"
|
||||
},
|
||||
"snscrape": {
|
||||
@@ -1423,11 +1423,11 @@
|
||||
},
|
||||
"soupsieve": {
|
||||
"hashes": [
|
||||
"sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955",
|
||||
"sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a"
|
||||
"sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8",
|
||||
"sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"version": "==2.4"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.4.1"
|
||||
},
|
||||
"sqlalchemy": {
|
||||
"hashes": [
|
||||
@@ -1481,16 +1481,16 @@
|
||||
"sha256:41da799057ea8620e4667a3e69a5b1923ebd32b1819c8fa75634bbe8d8bea9bd",
|
||||
"sha256:e87fce5d7cbdde34b76f0ac69013fd9d190d581d80681493016666e6f96c6d5e"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.26.1"
|
||||
},
|
||||
"telethon": {
|
||||
"hashes": [
|
||||
"sha256:613bae42acb5f2eeb1a0b92614e323021c66f374db62adf9826ea0c2c9120bb1",
|
||||
"sha256:893c10f133974fba4c53eb1736b6514d596d1cd94c83436a711f3345df945199"
|
||||
"sha256:b3990ec22351a3f3e1af376729c985025bbdd3bdabdde8c156112c3d3dfe1941",
|
||||
"sha256:edc42fd58b8e1569830d3ead564cafa60fd51d684f03ee2a1fdd5f77a5a10438"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"version": "==1.28.2"
|
||||
"version": "==1.28.5"
|
||||
},
|
||||
"text-unidecode": {
|
||||
"hashes": [
|
||||
@@ -1515,27 +1515,27 @@
|
||||
},
|
||||
"tornado": {
|
||||
"hashes": [
|
||||
"sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca",
|
||||
"sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72",
|
||||
"sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23",
|
||||
"sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8",
|
||||
"sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b",
|
||||
"sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9",
|
||||
"sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13",
|
||||
"sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75",
|
||||
"sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac",
|
||||
"sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e",
|
||||
"sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b"
|
||||
"sha256:4546003dc8b5733489139d3bff5fa6a0211be505faf819bd9970e7c2b32e8122",
|
||||
"sha256:4d349846931557b7ec92f224b5d598b160e2ba26ae1812480b42e9622c884bf7",
|
||||
"sha256:6164571f5b9f73143d1334df4584cb9ac86d20c461e17b6c189a19ead8bb93c1",
|
||||
"sha256:6cfff1e9c15c79e106b8352269d201f8fc0815914a6260f3893ca18b724ea94b",
|
||||
"sha256:720f53e6367b38190ae7fa398c25c086c69d88b3c6535bd6021a126b727fb5cd",
|
||||
"sha256:912df5712024564e362ecce43c8d5862e14c78c8dd3846c9d889d44fbd7f4951",
|
||||
"sha256:c37b6a384d54ce6a31168d40ab21ad2591ddaf34973075cc0cad154402ecd9e8",
|
||||
"sha256:c659ab04d5aa477dbe44152c67d93f3ad3243b992d94f795ca1d5c73c37337ce",
|
||||
"sha256:c9114a61a4588c09065b9996ae05462350d17160b92b9bf9a1e93689cc0424dc",
|
||||
"sha256:d68f3192936ff2c4add04dc21a436a43b4408d466746b78bb2b9d0a53a18683f",
|
||||
"sha256:d7b737e18f701de3e4a3b0824260b4d740e4d60607b8089bb80e80ffd464780e"
|
||||
],
|
||||
"markers": "python_full_version >= '3.5.2'",
|
||||
"version": "==6.2"
|
||||
"version": "==6.3"
|
||||
},
|
||||
"tqdm": {
|
||||
"hashes": [
|
||||
"sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5",
|
||||
"sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.65.0"
|
||||
},
|
||||
"trio": {
|
||||
@@ -1543,7 +1543,7 @@
|
||||
"sha256:ce68f1c5400a47b137c5a4de72c7c901bd4e7a24fbdebfe9b41de8c6c04eaacf",
|
||||
"sha256:f1dd0780a89bfc880c7c7994519cb53f62aacb2c25ff487001c0052bd721cdf0"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.22.0"
|
||||
},
|
||||
"trio-websocket": {
|
||||
@@ -1551,7 +1551,7 @@
|
||||
"sha256:0908435e4eecc49d830ae1c4d6c47b978a75f00594a2be2104d58b61a04cdb53",
|
||||
"sha256:af13e9393f9051111300287947ec595d601758ce3d165328e7d36325135a8d62"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.10.2"
|
||||
},
|
||||
"typing-extensions": {
|
||||
@@ -1559,7 +1559,7 @@
|
||||
"sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb",
|
||||
"sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.5.0"
|
||||
},
|
||||
"typing-inspect": {
|
||||
@@ -1582,7 +1582,7 @@
|
||||
"sha256:3f21d09e1b2aa9f2dacca12da240ca37de3ba5237a93addfd6d593afe9073355",
|
||||
"sha256:b44c4388f3d34f25862cfbb387578a4d70fec417649da694a132f628a23367e2"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.3"
|
||||
},
|
||||
"uritemplate": {
|
||||
@@ -1629,7 +1629,7 @@
|
||||
"sha256:5a32fb5419f7bb8bd35de8548948fe27a06f857a4d086c87e142bf07aabc3fd7",
|
||||
"sha256:a87c5aa7c1570c3aa87031e78c2052105e3681f57503fd4cb56470c3ab6106d6"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.3.15"
|
||||
},
|
||||
"websockets": {
|
||||
@@ -1704,7 +1704,7 @@
|
||||
"sha256:fe10ddc59b304cb19a1bdf5bd0a7719cbbc9fbdd57ac80ed436b709fcf889106",
|
||||
"sha256:ff64a1d38d156d429404aaa84b27305e957fd10c30e5880d1765c9480bea490f"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==10.4"
|
||||
},
|
||||
"werkzeug": {
|
||||
@@ -1712,7 +1712,7 @@
|
||||
"sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe",
|
||||
"sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.2.3"
|
||||
},
|
||||
"wsproto": {
|
||||
@@ -1720,7 +1720,7 @@
|
||||
"sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065",
|
||||
"sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.2.0"
|
||||
},
|
||||
"yt-dlp": {
|
||||
@@ -1728,9 +1728,43 @@
|
||||
"sha256:3b2df037c80922f0f83f63ee2f9253496b4a8668c0fe8d2a836ba9040f853b07",
|
||||
"sha256:9af92de5effc193bdb51216d9ebf28874d96180d202fae752b0d9f2a63380f3a"
|
||||
],
|
||||
"markers": "python_full_version >= '3.7.0'",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2023.2.17"
|
||||
}
|
||||
},
|
||||
"develop": {}
|
||||
"develop": {
|
||||
"watchdog": {
|
||||
"hashes": [
|
||||
"sha256:0e06ab8858a76e1219e68c7573dfeba9dd1c0219476c5a44d5333b01d7e1743a",
|
||||
"sha256:13bbbb462ee42ec3c5723e1205be8ced776f05b100e4737518c67c8325cf6100",
|
||||
"sha256:233b5817932685d39a7896b1090353fc8efc1ef99c9c054e46c8002561252fb8",
|
||||
"sha256:25f70b4aa53bd743729c7475d7ec41093a580528b100e9a8c5b5efe8899592fc",
|
||||
"sha256:2b57a1e730af3156d13b7fdddfc23dea6487fceca29fc75c5a868beed29177ae",
|
||||
"sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41",
|
||||
"sha256:3aa7f6a12e831ddfe78cdd4f8996af9cf334fd6346531b16cec61c3b3c0d8da0",
|
||||
"sha256:3ed7c71a9dccfe838c2f0b6314ed0d9b22e77d268c67e015450a29036a81f60f",
|
||||
"sha256:4c9956d27be0bb08fc5f30d9d0179a855436e655f046d288e2bcc11adfae893c",
|
||||
"sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9",
|
||||
"sha256:4f94069eb16657d2c6faada4624c39464f65c05606af50bb7902e036e3219be3",
|
||||
"sha256:5113334cf8cf0ac8cd45e1f8309a603291b614191c9add34d33075727a967709",
|
||||
"sha256:51f90f73b4697bac9c9a78394c3acbbd331ccd3655c11be1a15ae6fe289a8c83",
|
||||
"sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759",
|
||||
"sha256:7ade88d0d778b1b222adebcc0927428f883db07017618a5e684fd03b83342bd9",
|
||||
"sha256:7c5f84b5194c24dd573fa6472685b2a27cc5a17fe5f7b6fd40345378ca6812e3",
|
||||
"sha256:7e447d172af52ad204d19982739aa2346245cc5ba6f579d16dac4bfec226d2e7",
|
||||
"sha256:8ae9cda41fa114e28faf86cb137d751a17ffd0316d1c34ccf2235e8a84365c7f",
|
||||
"sha256:8f3ceecd20d71067c7fd4c9e832d4e22584318983cabc013dbf3f70ea95de346",
|
||||
"sha256:9fac43a7466eb73e64a9940ac9ed6369baa39b3bf221ae23493a9ec4d0022674",
|
||||
"sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397",
|
||||
"sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96",
|
||||
"sha256:ba07e92756c97e3aca0912b5cbc4e5ad802f4557212788e72a72a47ff376950d",
|
||||
"sha256:c07253088265c363d1ddf4b3cdb808d59a0468ecd017770ed716991620b8f77a",
|
||||
"sha256:c9d8c8ec7efb887333cf71e328e39cffbf771d8f8f95d308ea4125bf5f90ba64",
|
||||
"sha256:d00e6be486affb5781468457b21a6cbe848c33ef43f9ea4a73b4882e5f188a44",
|
||||
"sha256:d429c2430c93b7903914e4db9a966c7f2b068dd2ebdd2fa9b9ce094c7d459f33"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from functools import cache
|
||||
from sqlalchemy.orm import Session, load_only
|
||||
from sqlalchemy import Column
|
||||
from loguru import logger
|
||||
from . import models, schemas
|
||||
import yaml
|
||||
|
||||
## --------------- TASK = Archive
|
||||
|
||||
def get_task(db: Session, task_id: str):
|
||||
return base_query(db).filter(models.Archive.id == task_id).first()
|
||||
@@ -17,22 +19,19 @@ def search_tasks_by_url(db: Session, url:str, skip: int = 0, limit: int = 100):
|
||||
def search_tasks_by_email(db: Session, email:str, skip: int = 0, limit: int = 100):
|
||||
return base_query(db).filter(models.Archive.author.has(email=email)).offset(skip).limit(limit).all()
|
||||
|
||||
def create_task(db: Session, task: schemas.TaskCreate):
|
||||
db_task = models.Archive(id=task.id, url=task.url, author=task.author, result=task.result)
|
||||
def create_task(db: Session, task: schemas.ArchiveCreate, tags:list[models.Tag],urls:list[models.ArchiveUrl]):
|
||||
db_task = models.Archive(id=task.id, url=task.url, author_id=task.author_id, result=task.result, group_id=task.group_id)
|
||||
logger.debug(tags)
|
||||
db_task.tags = tags # will this work? TODO: test if I don't call create tag before
|
||||
db_task.urls = urls # will this work to create ArchiveUrl? TODO: test
|
||||
db.add(db_task)
|
||||
db.commit()
|
||||
db.refresh(db_task)
|
||||
return db_task
|
||||
|
||||
# def delete_task(db: Session, task_id: str, email:str)->bool:
|
||||
# db_task = db.query(models.Task).filter(models.Task.id == task_id, models.Task.author==email).first()
|
||||
# if db_task:
|
||||
# db.delete(db_task)
|
||||
# db.commit()
|
||||
# return db_task is not None
|
||||
|
||||
def soft_delete_task(db: Session, task_id: str, email:str)->bool:
|
||||
db_task = db.query(models.Archive).filter(models.Archive.id == task_id, models.Archive.author==email, models.Archive.deleted==False).first()
|
||||
# TODO: implement hard-delete with cronjob that deletes from S3
|
||||
db_task = db.query(models.Archive).filter(models.Archive.id == task_id, models.Archive.author_id==email, models.Archive.deleted==False).first()
|
||||
if db_task:
|
||||
db_task.deleted = True
|
||||
db.commit()
|
||||
@@ -44,6 +43,31 @@ def base_query(db:Session):
|
||||
.options(load_only(models.Archive.id, models.Archive.created_at, models.Archive.url, models.Archive.result))\
|
||||
.filter(models.Archive.deleted == False)
|
||||
|
||||
## --------------- TAG
|
||||
|
||||
def create_tag(db: Session, tag: str):
|
||||
db_tag = db.query(models.Tag).filter(models.Tag.id==tag).first()
|
||||
if not db_tag:
|
||||
db_tag = models.Tag(id=tag)
|
||||
db.add(db_tag)
|
||||
db.commit()
|
||||
db.refresh(db_tag)
|
||||
return db_tag
|
||||
|
||||
def search_tags(db: Session, tag:str, skip: int = 0, limit: int = 100):
|
||||
return db.query(models.Tag).filter(models.Tag.url.like(f'%{tag}%')).offset(skip).limit(limit).all()
|
||||
|
||||
|
||||
def get_group_for_user(db:Session, group_name:str, email:str)->models.Group:
|
||||
return db.query(models.association_table_user_groups).filter_by(user_id=email, group_id=group_name).first()
|
||||
|
||||
def get_user_groups(db: Session, email:str):
|
||||
groups = db.query(models.association_table_user_groups).filter_by(user_id=email).with_entities(Column("group_id")).all()
|
||||
return [g[0] for g in groups]
|
||||
|
||||
|
||||
## --------------- INIT User-Groups
|
||||
|
||||
@cache
|
||||
def get_group(db:Session, group_name:str)->models.Group:
|
||||
db_group = db.query(models.Group).filter(models.Group.id==group_name).first()
|
||||
|
||||
@@ -4,25 +4,8 @@ from sqlalchemy.orm import relationship
|
||||
import uuid
|
||||
from .database import Base
|
||||
|
||||
|
||||
# class Task(Base):
|
||||
# __tablename__ = "tasks"
|
||||
|
||||
# id = Column(String, primary_key=True, index=True)
|
||||
# url = Column(String, index=True)
|
||||
# author = Column(String, index=True)
|
||||
# result = Column(JSON, default=None)
|
||||
# created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
# # updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
||||
# deleted = Column(Boolean, default=False)
|
||||
|
||||
|
||||
# # items = relationship("Item", back_populates="owner")
|
||||
# # tags = relationship("Tag", back_populates="owner")
|
||||
|
||||
def generate_uuid():
|
||||
return str(uuid.uuid4())
|
||||
### new data model below
|
||||
|
||||
# many to many association tables
|
||||
association_table_archive_tags = Table(
|
||||
@@ -45,23 +28,33 @@ class Archive(Base):
|
||||
id = Column(String, primary_key=True, index=True)
|
||||
url = Column(String, index=True)
|
||||
result = Column(JSON, default=None)
|
||||
public = Column(Boolean, default=True) # if public=false, access to group and author
|
||||
deleted = Column(Boolean, default=False)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
||||
deleted = Column(Boolean, default=False)
|
||||
public = Column(Boolean, default=True) # if public=false, access to group and author
|
||||
|
||||
group_id = Column(String, ForeignKey("groups.id"), default=None)
|
||||
author_id = Column(String, ForeignKey("users.email"))
|
||||
|
||||
group = relationship("Group", back_populates="archives")
|
||||
tags = relationship("Tag", back_populates="archives", secondary=association_table_archive_tags)
|
||||
group = relationship("Group", back_populates="archives")
|
||||
author = relationship("User", back_populates="archives")
|
||||
urls = relationship("ArchiveUrl", back_populates="archive")
|
||||
|
||||
class ArchiveUrl(Base):
|
||||
__tablename__ = "archive_urls"
|
||||
|
||||
url = Column(String, primary_key=True, index=True)
|
||||
key = Column(String, default=None)
|
||||
archive_id = Column(String, ForeignKey("archives.id"))
|
||||
|
||||
archive = relationship("Archive", back_populates="urls")
|
||||
|
||||
|
||||
class Tag(Base):
|
||||
__tablename__ = "tags"
|
||||
|
||||
id = Column(String, primary_key=True, index=True, default=generate_uuid)
|
||||
name = Column(String, unique=True, index=True)
|
||||
id = Column(String, primary_key=True, index=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
archives = relationship("Archive", back_populates="tags", secondary=association_table_archive_tags)
|
||||
|
||||
@@ -1,16 +1,31 @@
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
|
||||
class TaskCreate(BaseModel):
|
||||
id: str
|
||||
class ArchiveCreate(BaseModel):
|
||||
id: str | None = None
|
||||
url: str
|
||||
author: str
|
||||
result: dict
|
||||
result: dict | None = None
|
||||
public: bool = True
|
||||
author_id: str | None = None
|
||||
group_id: str | None = None
|
||||
tags: list = []
|
||||
# urls: list = []
|
||||
|
||||
|
||||
class Task(TaskCreate):
|
||||
|
||||
class Archive(ArchiveCreate):
|
||||
created_at: datetime
|
||||
updated_at: datetime | None
|
||||
deleted: bool
|
||||
|
||||
class Config:
|
||||
orm_mode = True
|
||||
orm_mode = True
|
||||
|
||||
|
||||
# class TagCreate(BaseModel):
|
||||
# id: str
|
||||
|
||||
# class Tag(TagCreate):
|
||||
# created_at: datetime
|
||||
# # class Config:
|
||||
# # orm_mode = True
|
||||
57
src/main.py
57
src/main.py
@@ -15,14 +15,15 @@ from worker import create_archive_task, celery
|
||||
from db import crud, models, schemas
|
||||
from db.database import engine, SessionLocal
|
||||
from sqlalchemy.orm import Session
|
||||
from security import get_bearer_auth, get_basic_auth
|
||||
from security import get_bearer_auth, get_basic_auth, bearer_security
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Configuration
|
||||
ALLOWED_ORIGINS = os.environ.get("ALLOWED_ORIGINS", "chrome-extension://ondkcheoicfckabcnkdgbepofpjmjcmb,chrome-extension://ojcimmjndnlmmlgnjaeojoebaceokpdp").split(",")
|
||||
VERSION = "0.2.0"
|
||||
|
||||
VERSION = "0.3.1"
|
||||
# min-version refers to the version of auto-archiver-extension on the webstore
|
||||
BREAKING_CHANGES = {"minVersion": "0.3.0", "message": "The latest update has breaking changes, please update the extension to the most recent version."}
|
||||
|
||||
app = FastAPI()
|
||||
app.add_middleware(
|
||||
@@ -41,8 +42,16 @@ def get_db():
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def home(): return JSONResponse({"version": VERSION})
|
||||
|
||||
async def home(request: Request):
|
||||
status = {"version": VERSION, "breakingChanges": BREAKING_CHANGES}
|
||||
try:
|
||||
# if authenticated will load available groups
|
||||
email = await get_bearer_auth(await bearer_security(request))
|
||||
db: Session = next(get_db())
|
||||
status["groups"] = crud.get_user_groups(db, email)
|
||||
except HTTPException: pass
|
||||
except Exception as e: logger.error(e)
|
||||
return JSONResponse(status)
|
||||
|
||||
# logging configurations
|
||||
logger.add("logs/api_logs.log", retention="30 days", rotation="3 days")
|
||||
@@ -55,36 +64,59 @@ async def logging_middleware(request: Request, call_next):
|
||||
|
||||
# Bearer protected below
|
||||
|
||||
@app.get("/tasks/search-url", response_model=list[schemas.Task])
|
||||
def search(url:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
|
||||
@app.get("/groups", response_model=list[str])
|
||||
def get_user_groups(db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
|
||||
return crud.get_user_groups(db, email)
|
||||
|
||||
@app.get("/tasks/search-url", response_model=list[schemas.Archive])
|
||||
def search(url:str, skip: int = 0, limit: int = 100, db: Session = Depends(get_db), _email = Depends(get_bearer_auth)):
|
||||
return crud.search_tasks_by_url(db, url, skip=skip, limit=limit)
|
||||
|
||||
# @app.get("/tasks/search", response_model=list[schemas.Task])
|
||||
# def search(skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
|
||||
# return crud.get_tasks(db, skip=skip, limit=limit)
|
||||
|
||||
@app.get("/tasks/sync", response_model=list[schemas.Task])
|
||||
@app.get("/tasks/sync", response_model=list[schemas.Archive])
|
||||
def search(skip: int = 0, limit: int = 100, db: Session = Depends(get_db), email = Depends(get_bearer_auth)):
|
||||
return crud.search_tasks_by_email(db, email, skip=skip, limit=limit)
|
||||
|
||||
@app.post("/tasks", status_code=201)
|
||||
def run_task(payload = Body(...), email = Depends(get_bearer_auth)):
|
||||
url = payload.get('url')
|
||||
logger.info(f"new task for user {email}: {url}")
|
||||
def run_task(archive:schemas.ArchiveCreate, email = Depends(get_bearer_auth)):
|
||||
archive.author_id = email
|
||||
url = archive.url
|
||||
logger.warning(archive)
|
||||
logger.info(f"new {archive.public=} task for {email=} and {archive.group_id=}: {url}")
|
||||
if type(url)!=str or len(url)<=5:
|
||||
raise HTTPException(status_code=422, detail=f"Invalid URL received: {url}")
|
||||
task = create_archive_task.delay(url=payload.get('url'), email=email)
|
||||
logger.info("creating task")
|
||||
task = create_archive_task.delay(archive.json())
|
||||
return JSONResponse({"id": task.id})
|
||||
|
||||
# @app.post("/tasks", status_code=201)
|
||||
# def run_task(payload = Body(...), email = Depends(get_bearer_auth)):
|
||||
# url = payload.get('url')
|
||||
# public = payload.get('public', True)
|
||||
# group = payload.get('group', None)
|
||||
# logger.info(f"new {public=} task for {email=} and {group=}: {url}")
|
||||
# if type(url)!=str or len(url)<=5:
|
||||
# raise HTTPException(status_code=422, detail=f"Invalid URL received: {url}")
|
||||
# task = create_archive_task.delay(url=payload.get('url'), email=email, public=public, group=group)
|
||||
# return JSONResponse({"id": task.id})
|
||||
|
||||
@app.get("/tasks/{task_id}")
|
||||
def get_status(task_id, email = Depends(get_bearer_auth)):
|
||||
logger.info(f"status check for user {email}")
|
||||
task_result = AsyncResult(task_id, app=celery)
|
||||
logger.info(task_result)
|
||||
result = {
|
||||
"id": task_id,
|
||||
"status": task_result.status,
|
||||
"result": task_result.result
|
||||
}
|
||||
try:
|
||||
if task_result.result and "error" in task_result.result:
|
||||
result["status"] = "FAILURE"
|
||||
except Exception as e: logger.error(traceback.format_exc())
|
||||
try:
|
||||
json_result = jsonable_encoder(result, exclude_unset=True)
|
||||
return JSONResponse(json_result)
|
||||
@@ -94,6 +126,7 @@ def get_status(task_id, email = Depends(get_bearer_auth)):
|
||||
return JSONResponse({
|
||||
"id": task_id,
|
||||
"status": "FAILURE",
|
||||
"result": {"error": e}
|
||||
})
|
||||
|
||||
|
||||
|
||||
@@ -7,19 +7,19 @@
|
||||
|
||||
# -i https://pypi.org/simple
|
||||
aiofiles==0.6.0
|
||||
aiosqlite==0.18.0
|
||||
alembic==1.9.4
|
||||
aiosqlite==0.19.0
|
||||
alembic==1.10.3
|
||||
amqp==2.6.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
anyio==3.6.2; python_full_version >= '3.6.2'
|
||||
argparse==1.4.0
|
||||
async-generator==1.10; python_version >= '3.5'
|
||||
attrs==22.2.0; python_version >= '3.6'
|
||||
attrs==23.1.0; python_version >= '3.7'
|
||||
authlib==0.15.6
|
||||
auto-archiver==0.4.3
|
||||
beautifulsoup4==4.11.2; python_version >= '3.6'
|
||||
auto-archiver==0.5.6
|
||||
beautifulsoup4==4.12.2; python_version >= '3.6'
|
||||
billiard==3.6.4.0
|
||||
boto3==1.26.79; python_version >= '3.7'
|
||||
botocore==1.29.79; python_version >= '3.7'
|
||||
boto3==1.26.115; python_version >= '3.7'
|
||||
botocore==1.29.115; python_version >= '3.7'
|
||||
brotli==1.0.9; platform_python_implementation == 'CPython'
|
||||
bs4==0.0.1
|
||||
cachetools==5.3.0; python_version ~= '3.7'
|
||||
@@ -31,24 +31,25 @@ click==8.1.3; python_version >= '3.7'
|
||||
cloudscraper==1.2.69
|
||||
cryptography==38.0.4; python_version >= '3.6'
|
||||
dataclasses-json==0.5.7; python_version >= '3.6'
|
||||
dateparser==1.1.7; python_version >= '3.7'
|
||||
exceptiongroup==1.1.0; python_version < '3.11'
|
||||
fastapi==0.92.0
|
||||
dateparser==1.1.8; python_version >= '3.7'
|
||||
exceptiongroup==1.1.1; python_version < '3.11'
|
||||
fastapi-utils==0.2.1
|
||||
fastapi==0.95.1
|
||||
ffmpeg-python==0.2.0
|
||||
filelock==3.9.0; python_version >= '3.7'
|
||||
filelock==3.12.0; python_version >= '3.7'
|
||||
flask==2.2.3; python_version >= '3.7'
|
||||
flower==0.9.7
|
||||
future==0.18.3; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
google-api-core==2.11.0; python_version >= '3.7'
|
||||
google-api-python-client==2.79.0; python_version >= '3.7'
|
||||
google-api-python-client==2.86.0; python_version >= '3.7'
|
||||
google-auth-httplib2==0.1.0
|
||||
google-auth-oauthlib==1.0.0; python_version >= '3.6'
|
||||
google-auth==2.16.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
||||
googleapis-common-protos==1.58.0; python_version >= '3.7'
|
||||
greenlet==2.0.2; platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))
|
||||
gspread==5.7.2; python_version not in '3.0, 3.1, 3.2, 3.3' and python_version >= '3.6'
|
||||
google-auth==2.17.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
||||
googleapis-common-protos==1.59.0; python_version >= '3.7'
|
||||
greenlet==2.0.2; python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))
|
||||
gspread==5.8.0; python_version not in '3.0, 3.1, 3.2, 3.3' and python_version >= '3.6'
|
||||
h11==0.14.0; python_version >= '3.7'
|
||||
httplib2==0.21.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
httplib2==0.22.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
humanize==4.6.0; python_version >= '3.7'
|
||||
idna==3.4; python_version >= '3.5'
|
||||
iniconfig==2.0.0; python_version >= '3.7'
|
||||
@@ -57,7 +58,7 @@ itsdangerous==2.1.2; python_version >= '3.7'
|
||||
jinja2==3.1.2
|
||||
jmespath==1.0.1; python_version >= '3.7'
|
||||
kombu==4.6.11; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
loguru==0.6.0
|
||||
loguru==0.7.0
|
||||
lxml==4.9.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
mako==1.2.4; python_version >= '3.7'
|
||||
markdown-it-py==2.2.0; python_version >= '3.7'
|
||||
@@ -70,59 +71,59 @@ mypy-extensions==1.0.0; python_version >= '3.5'
|
||||
oauth2client==4.1.3
|
||||
oauthlib==3.2.2; python_version >= '3.6'
|
||||
outcome==1.2.0; python_version >= '3.7'
|
||||
packaging==23.0; python_version >= '3.7'
|
||||
packaging==23.1; python_version >= '3.7'
|
||||
pluggy==0.13.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
prometheus-client==0.8.0
|
||||
protobuf==4.22.0; python_version >= '3.7'
|
||||
protobuf==4.22.3; python_version >= '3.7'
|
||||
py==1.11.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
pyaes==1.6.1
|
||||
pyasn1-modules==0.2.8
|
||||
pyasn1==0.4.8
|
||||
pycparser==2.21
|
||||
pycryptodomex==3.17; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
|
||||
pydantic==1.10.5; python_version >= '3.7'
|
||||
pygments==2.14.0; python_version >= '3.6'
|
||||
pydantic==1.10.7; python_version >= '3.7'
|
||||
pygments==2.15.0; python_version >= '3.7'
|
||||
pyparsing==3.0.9; python_version >= '3.1'
|
||||
pysocks==1.7.1
|
||||
pytest==6.2.4
|
||||
python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
python-dotenv==1.0.0
|
||||
python-slugify==8.0.1; python_version >= '3.7'
|
||||
python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4.0'
|
||||
python-twitter-v2==0.8.1; python_version >= '3.6' and python_version < '4'
|
||||
pytz-deprecation-shim==0.1.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
||||
pytz==2022.7.1
|
||||
pytz==2023.3
|
||||
pyyaml==6.0; python_version >= '3.6'
|
||||
redis==3.5.3
|
||||
regex==2022.10.31; python_version >= '3.6'
|
||||
regex==2023.3.23; python_version >= '3.8'
|
||||
requests-oauthlib==1.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
requests-toolbelt==0.10.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
requests==2.28.2
|
||||
rich==13.3.1; python_version >= '3.7'
|
||||
rsa==4.9; python_version >= '3.6' and python_version < '4.0'
|
||||
rich==13.3.4; python_version >= '3.7'
|
||||
rsa==4.9; python_version >= '3.6' and python_version < '4'
|
||||
s3transfer==0.6.0; python_version >= '3.7'
|
||||
selenium==4.8.2; python_version >= '3.7'
|
||||
selenium==4.8.3; python_version >= '3.7'
|
||||
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
sniffio==1.3.0; python_version >= '3.7'
|
||||
snscrape==0.5.0.20230113; python_version ~= '3.8'
|
||||
snscrape==0.6.2.20230320; python_version ~= '3.8'
|
||||
sortedcontainers==2.4.0
|
||||
soupsieve==2.4; python_version >= '3.7'
|
||||
sqlalchemy==2.0.4
|
||||
starlette==0.25.0; python_version >= '3.7'
|
||||
telethon==1.27.0; python_version >= '3.5'
|
||||
soupsieve==2.4.1; python_version >= '3.7'
|
||||
sqlalchemy==1.4.47
|
||||
starlette==0.26.1; python_version >= '3.7'
|
||||
telethon==1.28.5; python_version >= '3.5'
|
||||
text-unidecode==1.3
|
||||
tiktok-downloader==0.3.4
|
||||
toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
tornado==6.2; python_full_version >= '3.5.2'
|
||||
tqdm==4.64.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
trio-websocket==0.9.2; python_version >= '3.5'
|
||||
tornado==6.3; python_full_version >= '3.5.2'
|
||||
tqdm==4.65.0; python_version >= '3.7'
|
||||
trio-websocket==0.10.2; python_version >= '3.7'
|
||||
trio==0.22.0; python_version >= '3.7'
|
||||
typing-extensions==4.5.0; python_version >= '3.7'
|
||||
typing-inspect==0.8.0
|
||||
tzdata==2022.7; python_version >= '3.6'
|
||||
tzlocal==4.2; python_version >= '3.6'
|
||||
tzdata==2023.3; python_version >= '3.6'
|
||||
tzlocal==4.3; python_version >= '3.7'
|
||||
uritemplate==4.1.1; python_version >= '3.6'
|
||||
urllib3==1.26.14; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
|
||||
uvicorn==0.20.0
|
||||
uvicorn==0.21.1
|
||||
vine==1.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
|
||||
vk-api==11.9.9
|
||||
vk-url-scraper==0.3.15; python_version >= '3.7'
|
||||
|
||||
115
src/worker.py
115
src/worker.py
@@ -1,12 +1,14 @@
|
||||
|
||||
import os, re
|
||||
import os, re, traceback, yaml
|
||||
|
||||
from celery import Celery
|
||||
from celery import Celery, states
|
||||
from celery.exceptions import Ignore
|
||||
from celery.signals import task_failure
|
||||
from auto_archiver import Config, ArchivingOrchestrator, Metadata
|
||||
# from auto_archiver.enrichers import ScreenshotEnricher
|
||||
from loguru import logger
|
||||
|
||||
from db import crud, schemas
|
||||
from db import crud, schemas, models
|
||||
from db.database import engine, SessionLocal
|
||||
from contextlib import contextmanager
|
||||
import json
|
||||
@@ -14,6 +16,8 @@ import json
|
||||
celery = Celery(__name__)
|
||||
celery.conf.broker_url = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379")
|
||||
celery.conf.result_backend = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379")
|
||||
USER_GROUPS_FILENAME=os.environ.get("USER_GROUPS_FILENAME", "user-groups.yaml")
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_db():
|
||||
@@ -21,34 +25,89 @@ def get_db():
|
||||
try: yield session
|
||||
finally: session.close()
|
||||
|
||||
config_default = Config()
|
||||
config_default.parse(use_cli=False, yaml_config_filename=os.environ.get("ORCHESTRATION_CONFIG_DEFAULT", "secrets/orchestration.yaml"))
|
||||
@celery.task(name="create_archive_task", bind=True, autoretry_for=(Exception,), retry_backoff=True, retry_kwargs={'max_retries': 5})
|
||||
def create_archive_task(self, archive_json: str):
|
||||
|
||||
archive = schemas.ArchiveCreate.parse_raw(archive_json)
|
||||
if not archive.public and archive.group_id and len(archive.group_id) > 0:
|
||||
# ensure group is valid for user
|
||||
with get_db() as session:
|
||||
db_group = crud.get_group_for_user(session, archive.group_id, archive.author_id)
|
||||
if not db_group:
|
||||
logger.error(em := f"User {archive.author_id} is not part of {archive.group_id}, no permission")
|
||||
return {"error": em}
|
||||
|
||||
config_bcat = None
|
||||
if (config_bcat_file := os.environ.get("ORCHESTRATION_CONFIG_BELLINGCAT")):
|
||||
config_bcat = Config()
|
||||
config_bcat.parse(use_cli=False, yaml_config_filename=config_bcat_file)
|
||||
url = archive.url
|
||||
logger.info(f"{url=}")
|
||||
logger.info(f"{archive=}")
|
||||
orchestrator = choose_orchestrator(archive.group_id, archive.author_id)
|
||||
result = orchestrator.feed_item(Metadata().set_url(url))
|
||||
if not result:
|
||||
logger.error(f"UNABLE TO archive: {url}")
|
||||
return {"error": "unable to archive"}
|
||||
|
||||
orchestrators = {"bellingcat": None, "default": None}
|
||||
|
||||
@celery.task(name="create_archive_task", bind=True, autoretry_for=(Exception,), retry_backoff=True, retry_kwargs={'max_retries': 5})
|
||||
def create_archive_task(self, url: str, email:str=""):
|
||||
orchestrator = choose_orchestrator(email)
|
||||
result = orchestrator.feed_item(Metadata().set_url(url)).to_json()
|
||||
result_json = result.to_json()
|
||||
with get_db() as session:
|
||||
db_task = crud.create_task(session, task=schemas.TaskCreate(id=self.request.id, url=url, author=email, result=json.loads(result)))
|
||||
# create DB URLs
|
||||
db_urls = [models.ArchiveUrl(url=url, key=m.get("id", f"media_{i}")) for i, m in enumerate(result.media) for url in m.urls]
|
||||
# create DB TAGs if needed
|
||||
db_tags = [crud.create_tag(session, tag) for tag in archive.tags]
|
||||
# insert archive
|
||||
db_task = crud.create_task(session, task=schemas.ArchiveCreate(id=self.request.id, url=url, result=json.loads(result_json), public=archive.public, author_id=archive.author_id, group_id=archive.group_id), tags=db_tags, urls=db_urls)
|
||||
logger.debug(f"Added {db_task.id=} to database on {db_task.created_at}")
|
||||
return result
|
||||
return result_json
|
||||
|
||||
@task_failure.connect(sender=create_archive_task)
|
||||
def task_failure_notifier(sender=None, **kwargs):
|
||||
logger.warning("😅 From task_failure_notifier ==> Task failed successfully! ")
|
||||
logger.error(kwargs['exception'])
|
||||
logger.error(kwargs['traceback'])
|
||||
logger.error("\n".join(traceback.format_list(traceback.extract_tb(kwargs['traceback']))))
|
||||
|
||||
def choose_orchestrator(group, email):
|
||||
global ORCHESTRATORS
|
||||
if group not in ORCHESTRATORS: group = get_user_first_group(email)
|
||||
assert group in ORCHESTRATORS, f"{group=} not in configurations"
|
||||
logger.info(f"CHOOSE Orchestrator for {group=}, {email=}")
|
||||
return ArchivingOrchestrator(ORCHESTRATORS.get(group))
|
||||
|
||||
def read_user_groups():
|
||||
# read yaml safely
|
||||
with open(USER_GROUPS_FILENAME) as inf:
|
||||
try:
|
||||
return yaml.safe_load(inf)
|
||||
except yaml.YAMLError as e:
|
||||
logger.error(f"could not open user groups filename {USER_GROUPS_FILENAME}: {e}")
|
||||
raise e
|
||||
|
||||
def get_user_first_group(email):
|
||||
user_groups_yaml = read_user_groups()
|
||||
groups = user_groups_yaml.get("users", {}).get(email, [])
|
||||
if len(groups): return groups[0]
|
||||
return "default"
|
||||
|
||||
|
||||
def choose_orchestrator(email):
|
||||
global orchestrators, config_bcat
|
||||
if re.match(r'^[\w.]+@bellingcat\.com$', email) and config_bcat:
|
||||
logger.debug("Using bellingcat config for orchestration")
|
||||
if not orchestrators["bellingcat"]:
|
||||
orchestrators["bellingcat"] = ArchivingOrchestrator(config_bcat)
|
||||
return orchestrators["bellingcat"]
|
||||
logger.debug("Using default config for orchestration")
|
||||
if not orchestrators["default"]:
|
||||
orchestrators["default"] = ArchivingOrchestrator(config_default)
|
||||
return orchestrators["default"]
|
||||
def load_orchestrators():
|
||||
global ORCHESTRATORS
|
||||
ORCHESTRATORS = {}
|
||||
"""
|
||||
reads the orchestrators key in the config file to load different orchestrators for different groups
|
||||
"""
|
||||
user_groups_yaml = read_user_groups()
|
||||
|
||||
orchestrators_config = user_groups_yaml.get("orchestrators", {})
|
||||
assert len(orchestrators_config), f"No orchestrators key found in {USER_GROUPS_FILENAME}. please see the example file"
|
||||
assert "default" in orchestrators_config, "please include a 'default' orchestrator to be used when the user has no group"
|
||||
logger.debug(f"Found {len(orchestrators_config)} group orchestrators.")
|
||||
|
||||
for group, config_filename in orchestrators_config.items():
|
||||
config = Config()
|
||||
config.parse(use_cli=False, yaml_config_filename=config_filename)
|
||||
ORCHESTRATORS[group] = config
|
||||
return ORCHESTRATORS
|
||||
|
||||
|
||||
## INIT
|
||||
|
||||
ORCHESTRATORS = {}
|
||||
load_orchestrators()
|
||||
Reference in New Issue
Block a user