diff --git a/Pipfile b/Pipfile index 5e4e56b..8e5a39d 100644 --- a/Pipfile +++ b/Pipfile @@ -22,6 +22,8 @@ psycopg2 = "*" tqdm = "*" ratelimit = "*" pytz = "*" +langdetect = "*" +spacy = "==3.2.4" [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index e373452..cbdd3bc 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "bd884a30c799fc7b881926bd6a894fb36cc6710f1221c84c0e6be34b8836fa7d" + "sha256": "2fbcb9a9c7df5e0c994f4be5e88b9ac88aed1ddd4383760b2d6cf964738cf993" }, "pipfile-spec": 6, "requires": { @@ -16,17 +16,1173 @@ ] }, "default": { + "beautifulsoup4": { + "hashes": [ + "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30", + "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693" + ], + "version": "==4.11.1" + }, + "blis": { + "hashes": [ + "sha256:148f59a0a47a38ce82e3afc50c709494d5e5a494bef28ce1519c7a17346c645b", + "sha256:1667db8439d9ca41c0c1f0ea954d87462be01b125436c4b264f73603c9fb4e82", + "sha256:3e024f103522e72a27019cfcfe14569522a394f5d651565560a18040fdd69a6c", + "sha256:4a48eeaa506f176bcac306378f5e8063697c93e26d2418fcbe053e8912019090", + "sha256:5d4a81f9438db7a19ac8e64ad41331f65a659ea8f3bb1889a9c2088cfd9fe104", + "sha256:64bef63b1abd5b41819ea53897bdbc03c631a59c1757a9393e6ae0828692f31c", + "sha256:680480dfa16b354f2e4d584edb8d36f0505ed8df12939beee2d161aea7bb3609", + "sha256:76d13dbcd648ca33dfc83569bb219d0696e4f6e5ad00b9f538332a3bdb28ff30", + "sha256:7865e39cac4e10506afc49213938fb7e13bf73ca980c9c20ffad2de4ef858f43", + "sha256:a0183760604b14e8eb671a431d06606594def03c36aaaa2a2e7b7f88382dac76", + "sha256:b5e0acc760daf5c3b45bce44653943e3a04d81c21c5b92213ed51664525dc24e", + "sha256:bead485e5d79d3eb62a8df55618743878fb3cba606aaf926153db5803270b185", + "sha256:cfb7d730fef706f3ea4389196ce5f610f24cc83f828c498a275c12f05f0cf5c4", + "sha256:d6055ced65d6581ab4f1da0d3f6ec14c60512474c5c9b3210c9f30dd7dd1447d", + "sha256:e22145110864bcffb1d52cb57050b67b8a8ecd43c7c0a1ac0bcdb2c85c8bf416", + "sha256:f4109cce38e644e81d923836b34024905d59e88c8fb48b89b420f4d7661cd89f" + ], + "version": "==0.7.7" + }, + "boto3": { + "hashes": [ + "sha256:03fe95333a51e3a14d7ab9644ae702f9feb008a4f5d308724eaa9ab8fd4c7d71", + "sha256:f99417fb53d0af357eb91152b30686c40dd22381499bfab0d61830eee72d5a0b" + ], + "index": "pypi", + "version": "==1.21.40" + }, + "botocore": { + "hashes": [ + "sha256:06badf7bbddadc404b5df10e7fd5bd351caccebd151bda29155a94dde789af5c", + "sha256:fbf7ad87581a888600e5f0f39b19ded2dd668f6cf6abe29a172ac23ba7f39dd6" + ], + "version": "==1.24.40" + }, + "brotli": { + "hashes": [ + "sha256:12effe280b8ebfd389022aa65114e30407540ccb89b177d3fbc9a4f177c4bd5d", + "sha256:160c78292e98d21e73a4cc7f76a234390e516afcd982fa17e1422f7c6a9ce9c8", + "sha256:16d528a45c2e1909c2798f27f7bf0a3feec1dc9e50948e738b961618e38b6a7b", + "sha256:19598ecddd8a212aedb1ffa15763dd52a388518c4550e615aed88dc3753c0f0c", + "sha256:1c48472a6ba3b113452355b9af0a60da5c2ae60477f8feda8346f8fd48e3e87c", + "sha256:268fe94547ba25b58ebc724680609c8ee3e5a843202e9a381f6f9c5e8bdb5c70", + "sha256:269a5743a393c65db46a7bb982644c67ecba4b8d91b392403ad8a861ba6f495f", + "sha256:26d168aac4aaec9a4394221240e8a5436b5634adc3cd1cdf637f6645cecbf181", + "sha256:29d1d350178e5225397e28ea1b7aca3648fcbab546d20e7475805437bfb0a130", + "sha256:2aad0e0baa04517741c9bb5b07586c642302e5fb3e75319cb62087bd0995ab19", + "sha256:3496fc835370da351d37cada4cf744039616a6db7d13c430035e901443a34daa", + "sha256:35a3edbe18e876e596553c4007a087f8bcfd538f19bc116917b3c7522fca0429", + "sha256:3b78a24b5fd13c03ee2b7b86290ed20efdc95da75a3557cc06811764d5ad1126", + "sha256:40d15c79f42e0a2c72892bf407979febd9cf91f36f495ffb333d1d04cebb34e4", + "sha256:44bb8ff420c1d19d91d79d8c3574b8954288bdff0273bf788954064d260d7ab0", + "sha256:4688c1e42968ba52e57d8670ad2306fe92e0169c6f3af0089be75bbac0c64a3b", + "sha256:495ba7e49c2db22b046a53b469bbecea802efce200dffb69b93dd47397edc9b6", + "sha256:4d1b810aa0ed773f81dceda2cc7b403d01057458730e309856356d4ef4188438", + "sha256:503fa6af7da9f4b5780bb7e4cbe0c639b010f12be85d02c99452825dd0feef3f", + "sha256:56d027eace784738457437df7331965473f2c0da2c70e1a1f6fdbae5402e0389", + "sha256:5913a1177fc36e30fcf6dc868ce23b0453952c78c04c266d3149b3d39e1410d6", + "sha256:5b6ef7d9f9c38292df3690fe3e302b5b530999fa90014853dcd0d6902fb59f26", + "sha256:5cb1e18167792d7d21e21365d7650b72d5081ed476123ff7b8cac7f45189c0c7", + "sha256:61a7ee1f13ab913897dac7da44a73c6d44d48a4adff42a5701e3239791c96e14", + "sha256:622a231b08899c864eb87e85f81c75e7b9ce05b001e59bbfbf43d4a71f5f32b2", + "sha256:68715970f16b6e92c574c30747c95cf8cf62804569647386ff032195dc89a430", + "sha256:6b2ae9f5f67f89aade1fab0f7fd8f2832501311c363a21579d02defa844d9296", + "sha256:6c772d6c0a79ac0f414a9f8947cc407e119b8598de7621f39cacadae3cf57d12", + "sha256:6d847b14f7ea89f6ad3c9e3901d1bc4835f6b390a9c71df999b0162d9bb1e20f", + "sha256:76ffebb907bec09ff511bb3acc077695e2c32bc2142819491579a695f77ffd4d", + "sha256:7bbff90b63328013e1e8cb50650ae0b9bac54ffb4be6104378490193cd60f85a", + "sha256:7cb81373984cc0e4682f31bc3d6be9026006d96eecd07ea49aafb06897746452", + "sha256:7ee83d3e3a024a9618e5be64648d6d11c37047ac48adff25f12fa4226cf23d1c", + "sha256:854c33dad5ba0fbd6ab69185fec8dab89e13cda6b7d191ba111987df74f38761", + "sha256:85f7912459c67eaab2fb854ed2bc1cc25772b300545fe7ed2dc03954da638649", + "sha256:87fdccbb6bb589095f413b1e05734ba492c962b4a45a13ff3408fa44ffe6479b", + "sha256:88c63a1b55f352b02c6ffd24b15ead9fc0e8bf781dbe070213039324922a2eea", + "sha256:8a674ac10e0a87b683f4fa2b6fa41090edfd686a6524bd8dedbd6138b309175c", + "sha256:93130612b837103e15ac3f9cbacb4613f9e348b58b3aad53721d92e57f96d46a", + "sha256:9744a863b489c79a73aba014df554b0e7a0fc44ef3f8a0ef2a52919c7d155031", + "sha256:9749a124280a0ada4187a6cfd1ffd35c350fb3af79c706589d98e088c5044267", + "sha256:97f715cf371b16ac88b8c19da00029804e20e25f30d80203417255d239f228b5", + "sha256:9bf919756d25e4114ace16a8ce91eb340eb57a08e2c6950c3cebcbe3dff2a5e7", + "sha256:9d12cf2851759b8de8ca5fde36a59c08210a97ffca0eb94c532ce7b17c6a3d1d", + "sha256:9ed4c92a0665002ff8ea852353aeb60d9141eb04109e88928026d3c8a9e5433c", + "sha256:a72661af47119a80d82fa583b554095308d6a4c356b2a554fdc2799bc19f2a43", + "sha256:afde17ae04d90fbe53afb628f7f2d4ca022797aa093e809de5c3cf276f61bbfa", + "sha256:b336c5e9cf03c7be40c47b5fd694c43c9f1358a80ba384a21969e0b4e66a9b17", + "sha256:b663f1e02de5d0573610756398e44c130add0eb9a3fc912a09665332942a2efb", + "sha256:b83bb06a0192cccf1eb8d0a28672a1b79c74c3a8a5f2619625aeb6f28b3a82bb", + "sha256:c2415d9d082152460f2bd4e382a1e85aed233abc92db5a3880da2257dc7daf7b", + "sha256:c83aa123d56f2e060644427a882a36b3c12db93727ad7a7b9efd7d7f3e9cc2c4", + "sha256:cfc391f4429ee0a9370aa93d812a52e1fee0f37a81861f4fdd1f4fb28e8547c3", + "sha256:db844eb158a87ccab83e868a762ea8024ae27337fc7ddcbfcddd157f841fdfe7", + "sha256:defed7ea5f218a9f2336301e6fd379f55c655bea65ba2476346340a0ce6f74a1", + "sha256:e16eb9541f3dd1a3e92b89005e37b1257b157b7256df0e36bd7b33b50be73bcb", + "sha256:e23281b9a08ec338469268f98f194658abfb13658ee98e2b7f85ee9dd06caa91", + "sha256:e2d9e1cbc1b25e22000328702b014227737756f4b5bf5c485ac1d8091ada078b", + "sha256:e48f4234f2469ed012a98f4b7874e7f7e173c167bed4934912a29e03167cf6b1", + "sha256:e4c4e92c14a57c9bd4cb4be678c25369bf7a092d55fd0866f759e425b9660806", + "sha256:ec1947eabbaf8e0531e8e899fc1d9876c179fc518989461f5d24e2223395a9e3", + "sha256:f909bbbc433048b499cb9db9e713b5d8d949e8c109a2a548502fb9aa8630f0b1" + ], + "markers": "platform_python_implementation == 'CPython'", + "version": "==1.0.9" + }, + "bs4": { + "hashes": [ + "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a" + ], + "index": "pypi", + "version": "==0.0.1" + }, + "cachetools": { + "hashes": [ + "sha256:486471dfa8799eb7ec503a8059e263db000cdda20075ce5e48903087f79d5fd6", + "sha256:8fecd4203a38af17928be7b90689d8083603073622229ca7077b72d8e5a976e4" + ], + "version": "==5.0.0" + }, + "catalogue": { + "hashes": [ + "sha256:535d33ae79ebd21ca298551d85da186ae8b8e1df36b0fb0246da774163ec2d6b", + "sha256:cab4feda641fe05da1e6a1a9d123b0869d5ca324dcd93d4a5c384408ab62e7fb" + ], + "version": "==2.0.7" + }, + "certifi": { + "hashes": [ + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + ], + "version": "==2021.10.8" + }, + "cffi": { + "hashes": [ + "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3", + "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2", + "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636", + "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20", + "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728", + "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27", + "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66", + "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443", + "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0", + "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7", + "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39", + "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605", + "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a", + "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37", + "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029", + "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139", + "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc", + "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df", + "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14", + "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880", + "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2", + "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a", + "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e", + "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474", + "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024", + "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8", + "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0", + "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e", + "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a", + "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e", + "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032", + "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6", + "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e", + "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b", + "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e", + "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954", + "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962", + "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c", + "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4", + "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55", + "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962", + "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023", + "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c", + "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6", + "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8", + "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382", + "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7", + "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc", + "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997", + "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796" + ], + "version": "==1.15.0" + }, + "charset-normalizer": { + "hashes": [ + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + ], + "markers": "python_version >= '3'", + "version": "==2.0.12" + }, + "click": { + "hashes": [ + "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1", + "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb" + ], + "version": "==8.0.4" + }, + "cryptg": { + "hashes": [ + "sha256:02b31622a75a49a5dcd25e589c85faae54575f018e055bd21a17df97c8bb9095", + "sha256:0da1b367056e57a5c01d22608da0cd50e597b917c1b2d9631767aa3c0640a99a", + "sha256:135688c6fbda90748924c2cb047f63785ebf4397d81acc4a05357950653c5096", + "sha256:1fb6c6d4561a54406593197c1f5f23662ab320f4af4ab11834e1583e9d27a49a", + "sha256:2516557e89803637fa7342de43dbcc5f84bf68ae05b1064a354a62d423447d9f", + "sha256:29001dafd3d6a054365222b1f89b12876723c89cdd10aa0e5885a05dfd034eeb", + "sha256:2cc8115960e49a038091ffb2d09de59e0acbdc76de10d7d415b7671a06bae0a9", + "sha256:2cd8224eb64af756f45cdceab16d048494313db8acec1e38d75d97716082267b", + "sha256:307bf96a6ac9c87b44531d8da5fe3a6c5d856e1dc69b68136ef9c4fb66ad17ac", + "sha256:31cf7682de69022c9a77739cdcf7116b06522b128b9b51c7593f277f38c38dbf", + "sha256:3bc2f372dec3a7753c0c0d72c69fcbe44af5473f870a3406978e07e8560a1aa6", + "sha256:46960979542155c9d903656a3a39770061b09a3691a23296f06dc168fe4ff962", + "sha256:47ad5916be4558f4d674c12800e8d9663ce938b0046f19cdc869ba3a7ca280ec", + "sha256:5faed49d972c7f44ce4d6fa1a64169c85a11209fa1fbe1c8a333fb1454888725", + "sha256:695636cca0ee938bd7113658ee60bfaf89afa19708c40ecae5f4a222c2ec544a", + "sha256:6c5d66975fc59adca203fa91e2a104240457114468162d30e9213661239ac1d6", + "sha256:72a5485ece10a70160170ceb658b1836db82dccab08a1f7029c54d81cf6b1d43", + "sha256:7fc8e1893775c6f53dceda1959f19833cc27a67a80492c10e2415dc601b36650", + "sha256:890584db41c8e1e046ae40dee0074614470d36ebd6b7e57bb91303300066601f", + "sha256:a1fb178702730b59267f1e6c6dfe16c7bb9c1350cee4183221982ad2dba4e7f5", + "sha256:a4de1730ca56aa8a945f176c25586901ed5e9f15ffb70c6459eedf466eb6299b", + "sha256:b6352555e47f389ed502269bdb537233d0a928b12d9f4caa57e8c707151acd30", + "sha256:b8896394b72ff7dbf38072ad4c2cd59abdd9e388bb55e1c369102beb8e569f9d", + "sha256:bbd05b52d09e78bdc595f229c0481f4f2e1daf3959847322a6b2c1f76119305f", + "sha256:bf00943924cddb0838f8a65f5aae31f6fe2ad64a5d7e6f10a6b900b3f01b0ae0", + "sha256:bf15aae0fa01aeec728ab16b920cf4c6b2793099c71f62f30ff100d6fe8c9859", + "sha256:c09a5b14494532fc3226f5c5f57ef2a651c935ed6a1d2d0f9eff110046725524", + "sha256:c4812802ce4cd6f08189ce0fa8b79e9a96ac941e69e6b3032bb6908baefde2ba", + "sha256:c69c1e19884108e508697919de0cd43e2ca4e9af418962aa235273b3c51a0e37", + "sha256:ce08c04ebb06ce1ac417597c1bb514a3c1b36cf5c286b8c60f23df2e65703bf3", + "sha256:e29b0d944176cf88fe52d1c58f46017b5bddc9cc54ec0fc6fac20043febefc32", + "sha256:e48ab84e0ed364436d5e449c59762c5963f08ad87f6508f4cb7644745b5559a8", + "sha256:eff15f0a1eee678dd9ec747b58ce86edb78b608036ac4e02d8349f5f35202495", + "sha256:fdd62c2be23eeabb9ebd2ad41bf153f5ec48b968885ef14e676515407cd56339" + ], + "index": "pypi", + "version": "==0.2.post4" + }, + "cymem": { + "hashes": [ + "sha256:04676d696596b0db3f3c5a3936bab12fb6f24278921a6622bb185e61765b2b4d", + "sha256:169725b5816959d34de2545b33fee6a8021a6e08818794a426c5a4f981f17e5e", + "sha256:2aa3fa467d906cd2c27fa0a2e2952dd7925f5fcc7973fab6d815ef6acb25aad8", + "sha256:4749f220e4c06ec44eb10de13794ff0508cdc4f8eff656cf49cab2cdb3122c0c", + "sha256:492084aef23ac2ff3da3729e9d36340bc91a96c2dc8c3a82a1926e384ab52412", + "sha256:4f87fe087f2ae36c3e20e2b1a29d7f76a28c035372d0a97655f26223d975235a", + "sha256:6b0d1a6b0a1296f31fa9e4b7ae5ea49394084ecc883b1ae6fec4844403c43468", + "sha256:700540b68e96a7056d0691d467df2bbaaf0934a3e6fe2383669998cbee19580a", + "sha256:971cf0a8437dfb4185c3049c086e463612fe849efadc0f5cc153fc81c501da7d", + "sha256:a93fba62fe79dbf6fc4d5b6d804a6e114b44af3ff3d40a28833ee39f21bd336b", + "sha256:af3c01e6b20f9e6c07c7d7cdb7f710e49889d3906c9a3e039546ee6636a34b9a", + "sha256:b8e1c18bb00800425576710468299153caad20c64ddb6819d40a6a34e21ee21c", + "sha256:c59293b232b53ebb47427f16cf648e937022f489cff36c11d1d8a1f0075b6609", + "sha256:d7a59cef8f2fa25d12e2c30138f8623acbd43ad2715e730a709e49c5eef8e1b0", + "sha256:dd52d8a81881804625df88453611175ab7e0099b34f52204da1f6940cf2e83c9", + "sha256:ea535f74ab6024e7416f93de564e5c81fb7c0964b96280de66f60aeb05f0cf53" + ], + "version": "==2.0.6" + }, + "dateparser": { + "hashes": [ + "sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9", + "sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628" + ], + "index": "pypi", + "version": "==1.1.1" + }, + "ffmpeg-python": { + "hashes": [ + "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127", + "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5" + ], + "index": "pypi", + "version": "==0.2.0" + }, + "future": { + "hashes": [ + "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d" + ], + "version": "==0.18.2" + }, "gabber": { "git": "https://github.com/stanfordio/gabber.git" }, + "gogettr": { + "hashes": [ + "sha256:9f5c90e3b1befe6eb561d4bca9ca124faddbe5787d8b429f02703c68dd51d255", + "sha256:eebdd76e5bb67e3848905f9d6841a9a897a44944edf2b6e1a5a1e5e8fb2110c1" + ], + "index": "pypi", + "version": "==0.8.0" + }, + "google-auth": { + "hashes": [ + "sha256:1a8c2c70c070b72fe038e2eb20072e5b4f0fd8ca1875e8fafe7f71b689a8d572", + "sha256:4dacb91d8a15b7755fc08688576481bd9c37cfac9842289469cf000d557f8962" + ], + "version": "==2.6.4" + }, + "google-auth-oauthlib": { + "hashes": [ + "sha256:24f67735513c4c7134dbde2f1dee5a1deb6acc8dfcb577d7bff30d213a28e7b0", + "sha256:30596b824fc6808fdaca2f048e4998cc40fb4b3599eaea66d28dc7085b36c5b8" + ], + "version": "==0.5.1" + }, + "greenlet": { + "hashes": [ + "sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3", + "sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711", + "sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd", + "sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073", + "sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708", + "sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67", + "sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23", + "sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1", + "sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08", + "sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd", + "sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2", + "sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa", + "sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8", + "sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40", + "sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab", + "sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6", + "sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc", + "sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b", + "sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e", + "sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963", + "sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3", + "sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d", + "sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d", + "sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe", + "sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28", + "sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3", + "sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e", + "sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c", + "sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d", + "sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0", + "sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497", + "sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee", + "sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713", + "sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58", + "sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a", + "sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06", + "sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88", + "sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965", + "sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f", + "sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4", + "sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5", + "sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c", + "sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a", + "sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1", + "sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43", + "sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627", + "sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b", + "sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168", + "sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d", + "sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5", + "sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478", + "sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf", + "sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce", + "sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c", + "sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b" + ], + "markers": "python_version >= '3' and (platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32'))))))", + "version": "==1.1.2" + }, + "gspread": { + "hashes": [ + "sha256:319766d90db05056293f7ee0ad2b35503a1a40683a75897a2922398cd2016283", + "sha256:c719e1c024a2a6f3b7d818fbe07c3886b26fd6504b64d1b1359cf242968213cd" + ], + "index": "pypi", + "version": "==5.3.2" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3'", + "version": "==3.3" + }, + "instaloader": { + "hashes": [ + "sha256:7fa6147810eedcc1dedcdec8cfa1f220c9379ab8faeab6a336a7c181d944e2e4" + ], + "index": "pypi", + "version": "==4.9" + }, + "jinja2": { + "hashes": [ + "sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119", + "sha256:640bed4bb501cbd17194b3cace1dc2126f5b619cf068a726b98192a0fde74ae9" + ], + "version": "==3.1.1" + }, + "jmespath": { + "hashes": [ + "sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e", + "sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04" + ], + "version": "==1.0.0" + }, + "langcodes": { + "hashes": [ + "sha256:4d89fc9acb6e9c8fdef70bcdf376113a3db09b67285d9e1d534de6d8818e7e69", + "sha256:794d07d5a28781231ac335a1561b8442f8648ca07cd518310aeb45d6f0807ef6" + ], + "version": "==3.3.0" + }, + "langdetect": { + "hashes": [ + "sha256:7cbc0746252f19e76f77c0b1690aadf01963be835ef0cd4b56dddf2a8f1dfc2a", + "sha256:cbc1fef89f8d062739774bd51eda3da3274006b3661d199c2655f6b3f6d605a0" + ], + "index": "pypi", + "version": "==1.0.9" + }, + "loguru": { + "hashes": [ + "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c", + "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3" + ], + "index": "pypi", + "version": "==0.6.0" + }, + "markupsafe": { + "hashes": [ + "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003", + "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88", + "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5", + "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7", + "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a", + "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603", + "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1", + "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135", + "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247", + "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6", + "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601", + "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77", + "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02", + "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e", + "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63", + "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f", + "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980", + "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b", + "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812", + "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff", + "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96", + "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1", + "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925", + "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a", + "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6", + "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e", + "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f", + "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4", + "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f", + "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3", + "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c", + "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a", + "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417", + "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a", + "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a", + "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37", + "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452", + "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933", + "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a", + "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7" + ], + "version": "==2.1.1" + }, + "murmurhash": { + "hashes": [ + "sha256:00a5252b569d3f914b5bd0bce72d2efe9c0fb91a9703556ea1b608b141c68f2d", + "sha256:1431d817e1fff1ed35f8dc54dd5b4d70165ec98076de8aca351805f8037293f3", + "sha256:2911bc3e8040dfaac536b141539b0351915f1439953f0aa9e957f082cff035a6", + "sha256:5c7b8cc4a8db1c821b80f8ca70a25c3166b14d68ecef8693a117c6a0b1d74ace", + "sha256:773411eba268bf524c012e781f4405aacb9ef4edc063d1f6b38bbf06358b988e", + "sha256:7dc5a79346afa07f14384926c335c0c455226d687d1305b9378264875b450e51", + "sha256:8de08d145c85bb7ba89cb1b591742e3ef54cede73e35f62752af687a4a1859f7", + "sha256:90a8e06872015d6f9f66a42669e003a1df8be229defef69cd98546f4cb25546d", + "sha256:92cd7196974307143ce8e9e9b6e22e0a57abf30bdd5a1effe696b4825677e616", + "sha256:9d69cc0ffc0ef6d37399b8a0484a44f9877e531ebc164e55105e89738ed52089", + "sha256:a78d53f047c3410ce4c589d9b47090f628f844ed5694418144e63cfe7f3da7e9", + "sha256:ab326b172dc470331490bda516d4d6d7578c91445ad83a2a3418ac1b9c5f9f55", + "sha256:cdd1036688341413e5adef32b3fd58e8b44f24405f394f90129f39ed879e4f24", + "sha256:de267459d040c96727ba141075d5bc983ec69c6f75b6df1b703e3b5cd7090382", + "sha256:e40790fdaf65213d70da4ed9229f16f6d6376310dc8fc23eacc98e6151c6ae7e", + "sha256:f4ef3b26229ff192032a12653d637313e1231d23e788b83a2f4a3d8e2bf2d031" + ], + "version": "==1.0.6" + }, + "mutagen": { + "hashes": [ + "sha256:6397602efb3c2d7baebd2166ed85731ae1c1d475abca22090b7141ff5034b3e1", + "sha256:9c9f243fcec7f410f138cb12c21c84c64fde4195481a30c9bfb05b5f003adfed" + ], + "version": "==1.45.1" + }, + "numpy": { + "hashes": [ + "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676", + "sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4", + "sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce", + "sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123", + "sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1", + "sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e", + "sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5", + "sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d", + "sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a", + "sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab", + "sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75", + "sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168", + "sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4", + "sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f", + "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18", + "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62", + "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe", + "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430", + "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802", + "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa" + ], + "version": "==1.22.3" + }, + "oauthlib": { + "hashes": [ + "sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2", + "sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe" + ], + "version": "==3.2.0" + }, + "packaging": { + "hashes": [ + "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", + "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" + ], + "version": "==21.3" + }, + "pathy": { + "hashes": [ + "sha256:25fd04cec6393661113086730ce69c789d121bea83ab1aa18452e8fd42faf29a", + "sha256:838624441f799a06b446a657e4ecc9ebc3fdd05234397e044a7c87e8f6e76b1c" + ], + "version": "==0.6.1" + }, + "pillow": { + "hashes": [ + "sha256:01ce45deec9df310cbbee11104bae1a2a43308dd9c317f99235b6d3080ddd66e", + "sha256:0c51cb9edac8a5abd069fd0758ac0a8bfe52c261ee0e330f363548aca6893595", + "sha256:17869489de2fce6c36690a0c721bd3db176194af5f39249c1ac56d0bb0fcc512", + "sha256:21dee8466b42912335151d24c1665fcf44dc2ee47e021d233a40c3ca5adae59c", + "sha256:25023a6209a4d7c42154073144608c9a71d3512b648a2f5d4465182cb93d3477", + "sha256:255c9d69754a4c90b0ee484967fc8818c7ff8311c6dddcc43a4340e10cd1636a", + "sha256:35be4a9f65441d9982240e6966c1eaa1c654c4e5e931eaf580130409e31804d4", + "sha256:3f42364485bfdab19c1373b5cd62f7c5ab7cc052e19644862ec8f15bb8af289e", + "sha256:3fddcdb619ba04491e8f771636583a7cc5a5051cd193ff1aa1ee8616d2a692c5", + "sha256:463acf531f5d0925ca55904fa668bb3461c3ef6bc779e1d6d8a488092bdee378", + "sha256:4fe29a070de394e449fd88ebe1624d1e2d7ddeed4c12e0b31624561b58948d9a", + "sha256:55dd1cf09a1fd7c7b78425967aacae9b0d70125f7d3ab973fadc7b5abc3de652", + "sha256:5a3ecc026ea0e14d0ad7cd990ea7f48bfcb3eb4271034657dc9d06933c6629a7", + "sha256:5cfca31ab4c13552a0f354c87fbd7f162a4fafd25e6b521bba93a57fe6a3700a", + "sha256:66822d01e82506a19407d1afc104c3fcea3b81d5eb11485e593ad6b8492f995a", + "sha256:69e5ddc609230d4408277af135c5b5c8fe7a54b2bdb8ad7c5100b86b3aab04c6", + "sha256:6b6d4050b208c8ff886fd3db6690bf04f9a48749d78b41b7a5bf24c236ab0165", + "sha256:7a053bd4d65a3294b153bdd7724dce864a1d548416a5ef61f6d03bf149205160", + "sha256:82283af99c1c3a5ba1da44c67296d5aad19f11c535b551a5ae55328a317ce331", + "sha256:8782189c796eff29dbb37dd87afa4ad4d40fc90b2742704f94812851b725964b", + "sha256:8d79c6f468215d1a8415aa53d9868a6b40c4682165b8cb62a221b1baa47db458", + "sha256:97bda660702a856c2c9e12ec26fc6d187631ddfd896ff685814ab21ef0597033", + "sha256:a325ac71914c5c043fa50441b36606e64a10cd262de12f7a179620f579752ff8", + "sha256:a336a4f74baf67e26f3acc4d61c913e378e931817cd1e2ef4dfb79d3e051b481", + "sha256:a598d8830f6ef5501002ae85c7dbfcd9c27cc4efc02a1989369303ba85573e58", + "sha256:a5eaf3b42df2bcda61c53a742ee2c6e63f777d0e085bbc6b2ab7ed57deb13db7", + "sha256:aea7ce61328e15943d7b9eaca87e81f7c62ff90f669116f857262e9da4057ba3", + "sha256:af79d3fde1fc2e33561166d62e3b63f0cc3e47b5a3a2e5fea40d4917754734ea", + "sha256:c24f718f9dd73bb2b31a6201e6db5ea4a61fdd1d1c200f43ee585fc6dcd21b34", + "sha256:c5b0ff59785d93b3437c3703e3c64c178aabada51dea2a7f2c5eccf1bcf565a3", + "sha256:c7110ec1701b0bf8df569a7592a196c9d07c764a0a74f65471ea56816f10e2c8", + "sha256:c870193cce4b76713a2b29be5d8327c8ccbe0d4a49bc22968aa1e680930f5581", + "sha256:c9efef876c21788366ea1f50ecb39d5d6f65febe25ad1d4c0b8dff98843ac244", + "sha256:de344bcf6e2463bb25179d74d6e7989e375f906bcec8cb86edb8b12acbc7dfef", + "sha256:eb1b89b11256b5b6cad5e7593f9061ac4624f7651f7a8eb4dfa37caa1dfaa4d0", + "sha256:ed742214068efa95e9844c2d9129e209ed63f61baa4d54dbf4cf8b5e2d30ccf2", + "sha256:f401ed2bbb155e1ade150ccc63db1a4f6c1909d3d378f7d1235a44e90d75fb97", + "sha256:fb89397013cf302f282f0fc998bb7abf11d49dcff72c8ecb320f76ea6e2c5717" + ], + "version": "==9.1.0" + }, "polyphemus": { "git": "https://github.com/bellingcat/polyphemus.git" }, + "preshed": { + "hashes": [ + "sha256:3af09f4cfcdaca085fd87dac8107617c4e2bb0ad1458f953841b71e9728287f5", + "sha256:58661bea8d0d63a648588511407285e43d43627e27f836e30819801fb3c75d70", + "sha256:5f99837e7353ce1fa81f0074d4b15f36e0af5af60a2a54d4d11e13cb09768a9e", + "sha256:61b2ea656cb1c38d544cc774f1c2ad1cdab23167b46b35310a7e211d4ba9c6d0", + "sha256:66a71ced487516cf81fd0431a3a843514262ae2f33e9a7688b87562258fa75d5", + "sha256:6c98f725d8478f3ade4ab1ea00f50a92d2d9406d37276bc46fd8bab1d47452c4", + "sha256:87e1add41b7f6236a3ccc34788f47ab8682bc28e8a2d369089062e274494c1a0", + "sha256:8c60a400babfc5b25ba371fda7041be227f7c625e1fb7a43329c2c08fe00a53b", + "sha256:92a8f49d17a63537a8beed48a049b62ef168ca07e0042a5b2bcdf178a1fb5d48", + "sha256:a279c138ad1d5be02547b1545254929588414b01571fe637016367f6a1aa11de", + "sha256:cfe1495fcfc7f479de840ddc4f426dbb55351e218ae5c8712c1269183a4d0060", + "sha256:e03ae3eee961106a517fcd827b5a7c51f7317236b3e665c989054ab8dc381d28", + "sha256:ea8aa9610837e907e8442e79300df0a861bfdb4dcaf026a5d9642a688ad04815", + "sha256:eaffbc71fdb8625f9aac4fe7e19e20bf318d1421ea05903bebe3e6ffef27b587", + "sha256:f92e752a868ea2690e1b38c4b775251a145e0fce36b9bdd972539e8271b7a23a", + "sha256:fb3b7588a3a0f2f2f1bf3fe403361b2b031212b73a37025aea1df7215af3772a" + ], + "version": "==3.0.6" + }, + "psycopg2": { + "hashes": [ + "sha256:06f32425949bd5fe8f625c49f17ebb9784e1e4fe928b7cce72edc36fb68e4c0c", + "sha256:0762c27d018edbcb2d34d51596e4346c983bd27c330218c56c4dc25ef7e819bf", + "sha256:083707a696e5e1c330af2508d8fab36f9700b26621ccbcb538abe22e15485362", + "sha256:34b33e0162cfcaad151f249c2649fd1030010c16f4bbc40a604c1cb77173dcf7", + "sha256:4295093a6ae3434d33ec6baab4ca5512a5082cc43c0505293087b8a46d108461", + "sha256:8cf3878353cc04b053822896bc4922b194792df9df2f1ad8da01fb3043602126", + "sha256:8e841d1bf3434da985cc5ef13e6f75c8981ced601fd70cc6bf33351b91562981", + "sha256:9572e08b50aed176ef6d66f15a21d823bb6f6d23152d35e8451d7d2d18fdac56", + "sha256:a81e3866f99382dfe8c15a151f1ca5fde5815fde879348fe5a9884a7c092a305", + "sha256:cb10d44e6694d763fa1078a26f7f6137d69f555a78ec85dc2ef716c37447e4b2", + "sha256:d3ca6421b942f60c008f81a3541e8faf6865a28d5a9b48544b0ee4f40cac7fca" + ], + "index": "pypi", + "version": "==2.9.3" + }, + "pyaes": { + "hashes": [ + "sha256:02c1b1405c38d3c370b085fb952dd8bea3fadcee6411ad99f312cc129c536d8f" + ], + "version": "==1.6.1" + }, + "pyasn1": { + "hashes": [ + "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359", + "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576", + "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf", + "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7", + "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", + "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00", + "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8", + "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86", + "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12", + "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776", + "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba", + "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2", + "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3" + ], + "version": "==0.4.8" + }, + "pyasn1-modules": { + "hashes": [ + "sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8", + "sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199", + "sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811", + "sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed", + "sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4", + "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e", + "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74", + "sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb", + "sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45", + "sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd", + "sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0", + "sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d", + "sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405" + ], + "version": "==0.2.8" + }, + "pycparser": { + "hashes": [ + "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9", + "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206" + ], + "version": "==2.21" + }, + "pycryptodomex": { + "hashes": [ + "sha256:1ca8e1b4c62038bb2da55451385246f51f412c5f5eabd64812c01766a5989b4a", + "sha256:298c00ea41a81a491d5b244d295d18369e5aac4b61b77b2de5b249ca61cd6659", + "sha256:2aa887683eee493e015545bd69d3d21ac8d5ad582674ec98f4af84511e353e45", + "sha256:2ce76ed0081fd6ac8c74edc75b9d14eca2064173af79843c24fa62573263c1f2", + "sha256:3da13c2535b7aea94cc2a6d1b1b37746814c74b6e80790daddd55ca5c120a489", + "sha256:406ec8cfe0c098fadb18d597dc2ee6de4428d640c0ccafa453f3d9b2e58d29e2", + "sha256:4d0db8df9ffae36f416897ad184608d9d7a8c2b46c4612c6bc759b26c073f750", + "sha256:530756d2faa40af4c1f74123e1d889bd07feae45bac2fd32f259a35f7aa74151", + "sha256:77931df40bb5ce5e13f4de2bfc982b2ddc0198971fbd947776c8bb5050896eb2", + "sha256:797a36bd1f69df9e2798e33edb4bd04e5a30478efc08f9428c087f17f65a7045", + "sha256:8085bd0ad2034352eee4d4f3e2da985c2749cb7344b939f4d95ead38c2520859", + "sha256:8536bc08d130cae6dcba1ea689f2913dfd332d06113904d171f2f56da6228e89", + "sha256:a4d412eba5679ede84b41dbe48b1bed8f33131ab9db06c238a235334733acc5e", + "sha256:aebecde2adc4a6847094d3bd6a8a9538ef3438a5ea84ac1983fcb167db614461", + "sha256:b276cc4deb4a80f9dfd47a41ebb464b1fe91efd8b1b8620cf5ccf8b824b850d6", + "sha256:b5a185ae79f899b01ca49f365bdf15a45d78d9856f09b0de1a41b92afce1a07f", + "sha256:c4d8977ccda886d88dc3ca789de2f1adc714df912ff3934b3d0a3f3d777deafb", + "sha256:c5dd3ffa663c982d7f1be9eb494a8924f6d40e2e2f7d1d27384cfab1b2ac0662", + "sha256:ca88f2f7020002638276439a01ffbb0355634907d1aa5ca91f3dc0c2e44e8f3b", + "sha256:d2cce1c82a7845d7e2e8a0956c6b7ed3f1661c9acf18eb120fc71e098ab5c6fe", + "sha256:d709572d64825d8d59ea112e11cc7faf6007f294e9951324b7574af4251e4de8", + "sha256:da8db8374295fb532b4b0c467e66800ef17d100e4d5faa2bbbd6df35502da125", + "sha256:e36c7e3b5382cd5669cf199c4a04a0279a43b2a3bdd77627e9b89778ac9ec08c", + "sha256:e95a4a6c54d27a84a4624d2af8bb9ee178111604653194ca6880c98dcad92f48", + "sha256:ee835def05622e0c8b1435a906491760a43d0c462f065ec9143ec4b8d79f8bff", + "sha256:f75009715dcf4a3d680c2338ab19dac5498f8121173a929872950f4fb3a48fbf", + "sha256:f8524b8bc89470cec7ac51734907818d3620fb1637f8f8b542d650ebec42a126" + ], + "version": "==3.14.1" + }, + "pydantic": { + "hashes": [ + "sha256:021ea0e4133e8c824775a0cfe098677acf6fa5a3cbf9206a376eed3fc09302cd", + "sha256:05ddfd37c1720c392f4e0d43c484217b7521558302e7069ce8d318438d297739", + "sha256:05ef5246a7ffd2ce12a619cbb29f3307b7c4509307b1b49f456657b43529dc6f", + "sha256:10e5622224245941efc193ad1d159887872776df7a8fd592ed746aa25d071840", + "sha256:18b5ea242dd3e62dbf89b2b0ec9ba6c7b5abaf6af85b95a97b00279f65845a23", + "sha256:234a6c19f1c14e25e362cb05c68afb7f183eb931dd3cd4605eafff055ebbf287", + "sha256:244ad78eeb388a43b0c927e74d3af78008e944074b7d0f4f696ddd5b2af43c62", + "sha256:26464e57ccaafe72b7ad156fdaa4e9b9ef051f69e175dbbb463283000c05ab7b", + "sha256:41b542c0b3c42dc17da70554bc6f38cbc30d7066d2c2815a94499b5684582ecb", + "sha256:4a03cbbe743e9c7247ceae6f0d8898f7a64bb65800a45cbdc52d65e370570820", + "sha256:4be75bebf676a5f0f87937c6ddb061fa39cbea067240d98e298508c1bda6f3f3", + "sha256:54cd5121383f4a461ff7644c7ca20c0419d58052db70d8791eacbbe31528916b", + "sha256:589eb6cd6361e8ac341db97602eb7f354551482368a37f4fd086c0733548308e", + "sha256:8621559dcf5afacf0069ed194278f35c255dc1a1385c28b32dd6c110fd6531b3", + "sha256:8b223557f9510cf0bfd8b01316bf6dd281cf41826607eada99662f5e4963f316", + "sha256:99a9fc39470010c45c161a1dc584997f1feb13f689ecf645f59bb4ba623e586b", + "sha256:a7c6002203fe2c5a1b5cbb141bb85060cbff88c2d78eccbc72d97eb7022c43e4", + "sha256:a83db7205f60c6a86f2c44a61791d993dff4b73135df1973ecd9eed5ea0bda20", + "sha256:ac8eed4ca3bd3aadc58a13c2aa93cd8a884bcf21cb019f8cfecaae3b6ce3746e", + "sha256:e710876437bc07bd414ff453ac8ec63d219e7690128d925c6e82889d674bb505", + "sha256:ea5cb40a3b23b3265f6325727ddfc45141b08ed665458be8c6285e7b85bd73a1", + "sha256:fec866a0b59f372b7e776f2d7308511784dace622e0992a0b59ea3ccee0ae833" + ], + "version": "==1.8.2" + }, "pyexiftool": { "git": "https://github.com/smarnach/pyexiftool.git" }, + "pyparsing": { + "hashes": [ + "sha256:7bf433498c016c4314268d95df76c81b842a4cb2b276fa3312cfb1e1d85f6954", + "sha256:ef7b523f6356f763771559412c0d7134753f037822dad1b16945b7b846f7ad06" + ], + "version": "==3.0.8" + }, + "pytesseract": { + "hashes": [ + "sha256:7e2bafc7f48d1bb71443ce4633a56f5e21925a98f220a36c336297edcd1956d0", + "sha256:fecda37d1e4eaf744c657cd03a5daab4eb97c61506ac5550274322c8ae32eca2" + ], + "index": "pypi", + "version": "==0.3.9" + }, + "python-dateutil": { + "hashes": [ + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + ], + "version": "==2.8.2" + }, + "pytz": { + "hashes": [ + "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", + "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" + ], + "index": "pypi", + "version": "==2022.1" + }, + "pytz-deprecation-shim": { + "hashes": [ + "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6", + "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d" + ], + "version": "==0.1.0.post0" + }, + "ratelimit": { + "hashes": [ + "sha256:af8a9b64b821529aca09ebaf6d8d279100d766f19e90b5059ac6a718ca6dee42" + ], + "index": "pypi", + "version": "==2.2.1" + }, + "regex": { + "hashes": [ + "sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14", + "sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9", + "sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204", + "sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f", + "sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737", + "sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b", + "sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3", + "sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4", + "sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac", + "sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f", + "sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29", + "sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772", + "sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1", + "sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863", + "sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66", + "sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed", + "sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47", + "sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f", + "sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f", + "sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008", + "sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d", + "sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571", + "sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0", + "sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a", + "sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3", + "sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7", + "sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447", + "sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493", + "sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4", + "sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede", + "sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640", + "sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd", + "sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c", + "sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee", + "sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30", + "sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b", + "sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec", + "sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1", + "sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e", + "sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8", + "sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9", + "sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231", + "sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7", + "sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729", + "sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960", + "sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056", + "sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357", + "sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7", + "sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3", + "sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7", + "sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573", + "sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0", + "sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178", + "sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f", + "sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834", + "sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c", + "sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015", + "sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0", + "sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57", + "sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635", + "sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07", + "sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2", + "sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1", + "sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b", + "sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2", + "sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5", + "sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b", + "sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86", + "sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5", + "sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93", + "sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0", + "sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f", + "sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d", + "sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4" + ], + "version": "==2022.3.2" + }, + "requests": { + "hashes": [ + "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", + "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" + ], + "index": "pypi", + "version": "==2.27.1" + }, + "requests-oauthlib": { + "hashes": [ + "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5", + "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a" + ], + "version": "==1.3.1" + }, + "rsa": { + "hashes": [ + "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17", + "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb" + ], + "version": "==4.8" + }, + "s3transfer": { + "hashes": [ + "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971", + "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed" + ], + "version": "==0.5.2" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "version": "==1.16.0" + }, + "smart-open": { + "hashes": [ + "sha256:71d14489da58b60ce12fc3ecb823facc59a8b23cd1b58edb97175640350d3a62", + "sha256:75abf758717a92a8f53aa96953f0c245c8cedf8e1e4184903db3659b419d4c17" + ], + "version": "==5.2.1" + }, "snscrape": { "git": "https://github.com/bellingcat/snscrape" + }, + "soupsieve": { + "hashes": [ + "sha256:0bcc6d7432153063e3df09c3ac9442af3eba488715bfcad6a4c38ccb2a523124", + "sha256:a714129d3021ec17ce5be346b1007300558b378332c289a1a20e7d4de6ff18a5" + ], + "version": "==2.3.2" + }, + "spacy": { + "hashes": [ + "sha256:0168d97e7fbbddd3258016e4d3c10d1593b7129dddff146c14f3b103ade6b1cd", + "sha256:090e684eec551b5b7d56d9242cea18742515a706191ad158e32e16e8f2fe15ac", + "sha256:0e9a98999b0fce03d4f483112837ac7111378449ace069c7cd050908f0fa5d9f", + "sha256:2053cb78bcf4eec38aa266890a5700167a284d1a26197f851710d29f3d7071b3", + "sha256:36e9ef5a32834383d37bbd27fca49388e31e9b53f77c91ba8ccbf19af10e3aef", + "sha256:3e4c6f298d54044582daca1142b082ee38831bb3d7bb931d2ee601e8b8dce64f", + "sha256:400af3490c36c1b6c895de526ec06f6c7655af5ca595743c07e09e9bc8f378ea", + "sha256:6db861f69f18ba5e00d0bd44744cf1662e00cc3b564d17a1ccdc4625ec3d5c3d", + "sha256:7e20c63ba47eaa33ebd4b2cc6eefa3e8906505273799138ad8ab231b146d8875", + "sha256:87bd072ccacedbf8bc5a692fea1d5c320abd26821c63af157a7c95baa47dc36d", + "sha256:89be328ff378e4cdcfb4dcf38ca2fad740f87213825ed10e8ce9f54b822277b8", + "sha256:ac2288e87de1066ad65676e930f53978d6ee97c34044dca4d24f64a24e2a88b6", + "sha256:bc9184973c9052e1bb9eeb975801e6906aacbe0c009533ec0c34f443832473fd", + "sha256:cff47cdaa824802cd38ae94fe98af9cde6810d86334cd283659c868e0011831a", + "sha256:e759e27da39e469b6367b82281a10eb4e50de04260ba49d42091cbdfe2d99633", + "sha256:ed29278fc89f07c1999ceca5f6702b379589c8e884a57816bdaeb05a1a7b2bbb" + ], + "index": "pypi", + "version": "==3.2.4" + }, + "spacy-legacy": { + "hashes": [ + "sha256:4f7dcbc4e6c8e8cb4eadbb009f9c0a1a2a67442e0032c8d6776c9470c3759903", + "sha256:dfd58b0cc65b3596cb06f7b95e7bf4fff34668297c59eb179eb050db07b199df" + ], + "version": "==3.0.9" + }, + "spacy-loggers": { + "hashes": [ + "sha256:d48c9313a577ad1818da961cf6db71a73fd1e556ae47e6e68d7e28b541d11e18", + "sha256:e75d44f4cf99e6763d7132ca7c8c420e0a92790222a08bc8eb9e24ea2c13536e" + ], + "version": "==1.0.2" + }, + "sqlalchemy": { + "hashes": [ + "sha256:093b3109c2747d5dc0fa4314b1caf4c7ca336d5c8c831e3cfbec06a7e861e1e6", + "sha256:186cb3bd77abf2ddcf722f755659559bfb157647b3fd3f32ea1c70e8311e8f6b", + "sha256:1b4eac3933c335d7f375639885765722534bb4e52e51cdc01a667eea822af9b6", + "sha256:1ff9f84b2098ef1b96255a80981ee10f4b5d49b6cfeeccf9632c2078cd86052e", + "sha256:28aa2ef06c904729620cc735262192e622db9136c26d8587f71f29ec7715628a", + "sha256:28b17ebbaee6587013be2f78dc4f6e95115e1ec8dd7647c4e7be048da749e48b", + "sha256:2c6c411d8c59afba95abccd2b418f30ade674186660a2d310d364843049fb2c1", + "sha256:2ffc813b01dc6473990f5e575f210ca5ac2f5465ace3908b78ffd6d20058aab5", + "sha256:48036698f20080462e981b18d77d574631a3d1fc2c33b416c6df299ec1d10b99", + "sha256:48f0eb5bcc87a9b2a95b345ed18d6400daaa86ca414f6840961ed85c342af8f4", + "sha256:4ba2c1f368bcf8551cdaa27eac525022471015633d5bdafbc4297e0511f62f51", + "sha256:53c7469b86a60fe2babca4f70111357e6e3d5150373bc85eb3b914356983e89a", + "sha256:6204d06bfa85f87625e1831ca663f9dba91ac8aec24b8c65d02fb25cbaf4b4d7", + "sha256:63c82c9e8ccc2fb4bfd87c24ffbac320f70b7c93b78f206c1f9c441fa3013a5f", + "sha256:70e571ae9ee0ff36ed37e2b2765445d54981e4d600eccdf6fe3838bc2538d157", + "sha256:95411abc0e36d18f54fa5e24d42960ea3f144fb16caaa5a8c2e492b5424cc82c", + "sha256:9837133b89ad017e50a02a3b46419869cf4e9aa02743e911b2a9e25fa6b05403", + "sha256:9bec63b1e20ef69484f530fb4b4837e050450637ff9acd6dccc7003c5013abf8", + "sha256:9d8edfb09ed2b865485530c13e269833dab62ab2d582fde21026c9039d4d0e62", + "sha256:9dac1924611698f8fe5b2e58601156c01da2b6c0758ba519003013a78280cf4d", + "sha256:9e1a72197529ea00357640f21d92ffc7024e156ef9ac36edf271c8335facbc1a", + "sha256:9e7094cf04e6042c4210a185fa7b9b8b3b789dd6d1de7b4f19452290838e48bd", + "sha256:a4efb70a62cbbbc052c67dc66b5448b0053b509732184af3e7859d05fdf6223c", + "sha256:a5dbdbb39c1b100df4d182c78949158073ca46ba2850c64fe02ffb1eb5b70903", + "sha256:aeea6ace30603ca9a8869853bb4a04c7446856d7789e36694cd887967b7621f6", + "sha256:b2489e70bfa2356f2d421106794507daccf6cc8711753c442fc97272437fc606", + "sha256:babd63fb7cb6b0440abb6d16aca2be63342a6eea3dc7b613bb7a9357dc36920f", + "sha256:c6fb6b9ed1d0be7fa2c90be8ad2442c14cbf84eb0709dd1afeeff1e511550041", + "sha256:cfd8e4c64c30a5219032e64404d468c425bdbc13b397da906fc9bee6591fc0dd", + "sha256:d17316100fcd0b6371ac9211351cb976fd0c2e12a859c1a57965e3ef7f3ed2bc", + "sha256:d38a49aa75a5759d0d118e26701d70c70a37b896379115f8386e91b0444bfa70", + "sha256:da25e75ba9f3fabc271673b6b413ca234994e6d3453424bea36bb5549c5bbaec", + "sha256:e255a8dd5572b0c66d6ee53597d36157ad6cf3bc1114f61c54a65189f996ab03", + "sha256:e8b09e2d90267717d850f2e2323919ea32004f55c40e5d53b41267e382446044", + "sha256:ecc81336b46e31ae9c9bdfa220082079914e31a476d088d3337ecf531d861228", + "sha256:effadcda9a129cc56408dd5b2ea20ee9edcea24bd58e6a1489fa27672d733182" + ], + "index": "pypi", + "version": "==1.4.35" + }, + "srsly": { + "hashes": [ + "sha256:0d2b92c40f9aa9ba7cb0d8048bd7bfaa13d79d02e9ad6808ca7a8879ba5ed50b", + "sha256:27b3f693296d8a24c306aacd5df38a565ec43214f2aeb51a38170af5dc8b48bc", + "sha256:2d0236feafe3805b384532221596e6749a54d0ff10ba022b333dc1de7aa1b2f7", + "sha256:61e31a72370238387a8ff2a4cebea402227215a1450648b852cad9e511a8b59e", + "sha256:62630dbf20e240610fa64b6717545fcc28d9f18a6085ee93656be000678592a6", + "sha256:82cbf1ec388ed0c16f8062fee30dc54ba8513bd51aae0602570143c6d9218e4c", + "sha256:97a67c8f86ce3207e5e810b998a94ea49d439139adc21d9aadbd0bfab9faa64b", + "sha256:a906c9b1f62c109ddcfaeaf242b19b2ebc5d2f865eb38ef4af35959027c5185b", + "sha256:ab31586fd89e5e5fe6f38664209577b03e85fb834f238c928c15ed3c80ab9c73", + "sha256:acbb14546da9bdf287dfefa0883e793ac563c7868eca32cd65504463980022fa", + "sha256:cffec31143c6e1c783ead11245c08938cae859115d4cb0f4cf423e2895707b74", + "sha256:d1d13dc2133d5a83d30774793adb2c3fd9be905da339e2d54e2c79d55248c1a5", + "sha256:d3b93531f086c516a26f729beac9b052c2ad0528d72e80f9d193de26aa2202be", + "sha256:dbe91f6dd4aea9e819493628356dc715bd9c606486297bb7ca5748e6e003841c", + "sha256:f5ddcc5f36eb318d011c6f142e826c1ca15cb34bd5beab2f21fee62d4ae4d590", + "sha256:fb08416fd6ef04c51fdeefd6d28592b64563b2853243c571a9b0d67403b5be7f" + ], + "version": "==2.4.3" + }, + "telethon": { + "hashes": [ + "sha256:04fdc5fa4ed3e886e6ecf4bad79205ab8880c6aefbd42c29c89c689a502aa816", + "sha256:818cb61281ed3f75ba4da9b68cb69486bed9474d2db4e0aa16e482053117452c" + ], + "index": "pypi", + "version": "==1.24.0" + }, + "thinc": { + "hashes": [ + "sha256:0368c0b279492c0ed0b5b1bc79614e8a335ae1ccc3b1617de46f04eb74dc9a43", + "sha256:0557791e73865fa81f09623dd1f9b98b6d4ab80c63fca5f141530536516aac98", + "sha256:2e315020da85c3791e191fbf37c4a2433f57cf322e27380da0cd4de99d96053b", + "sha256:376b196da6c69c8efaaf26fb99f6997543d80ea4bc5f4ab8600e9d1d521a7dc9", + "sha256:42641f021f4fdc47eaec4b9ff66246b153b9783ef24e2c266bf0f51eccd40db5", + "sha256:489521ca3cca469d67432fc30f14c7c13c17320b179bf8e362319313feaafbb7", + "sha256:5d98e6b3bf220c1068442d09d7c34dd8e52bbdfa43ea32f773747c5909a1c011", + "sha256:70781a0802fbb62a27217ccb80e744e80a5b43f9107ac596c5cd2dc9878ae258", + "sha256:72cec290eb1b54ba6144b05d96f3247ea34eb41c66842961b05b408b93f2ba9b", + "sha256:8ddda1aa1432eef8bab5c83e4cf2020f1ed891771a6dd86729f1aa6078f25f2c", + "sha256:a1f19dd9a7121d332d16446db39b4999abb4f040ce7c71bc86ea05664c86d361", + "sha256:a4276b64a8cd91197f30382c0874f59fa6c94ef533150d845b2f30998aae87cc", + "sha256:a4ee24a6505d63b6f0161f25d0f73f87ab569e0e1a9799a6baca97352788a91f", + "sha256:bed92be72516b1511fecaf616ea31ff1c2e972a7ec4ad991c212f9b2f5c94183", + "sha256:ecd8eab82598b079e901f16567818dc955481326c01d84b819c3c05801b97e07", + "sha256:f9ba4e4dac98e166950e004c87a0f57b8f8796ecd0e3b6973beb6febc20257ff" + ], + "version": "==8.0.15" + }, + "tqdm": { + "hashes": [ + "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d", + "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6" + ], + "index": "pypi", + "version": "==4.64.0" + }, + "typer": { + "hashes": [ + "sha256:5646aef0d936b2c761a10393f0384ee6b5c7fe0bb3e5cd710b17134ca1d99cff", + "sha256:e8467f0ebac0c81366c2168d6ad9f888efdfb6d4e1d3d5b4a004f46fa444b5c3" + ], + "version": "==0.4.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42", + "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2" + ], + "version": "==4.1.1" + }, + "tzdata": { + "hashes": [ + "sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9", + "sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3" + ], + "markers": "python_version >= '3.6'", + "version": "==2022.1" + }, + "tzlocal": { + "hashes": [ + "sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745", + "sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7" + ], + "version": "==4.2" + }, + "urllib3": { + "hashes": [ + "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", + "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" + ], + "version": "==1.26.9" + }, + "wasabi": { + "hashes": [ + "sha256:217edcb2850993c7931399e7419afccde13539d589e333bc85f9053cf0bb1772", + "sha256:ada6f13e9b70ef26bf95fad0febdfdebe2005e29a08ad58f4bbae383a97298cf" + ], + "version": "==0.9.1" + }, + "websockets": { + "hashes": [ + "sha256:038afef2a05893578d10dadbdbb5f112bd115c46347e1efe99f6a356ff062138", + "sha256:05f6e9757017270e7a92a2975e2ae88a9a582ffc4629086fd6039aa80e99cd86", + "sha256:0b66421f9f13d4df60cd48ab977ed2c2b6c9147ae1a33caf5a9f46294422fda1", + "sha256:0cd02f36d37e503aca88ab23cc0a1a0e92a263d37acf6331521eb38040dcf77b", + "sha256:0f73cb2526d6da268e86977b2c4b58f2195994e53070fe567d5487c6436047e6", + "sha256:117383d0a17a0dda349f7a8790763dde75c1508ff8e4d6e8328b898b7df48397", + "sha256:1c1f3b18c8162e3b09761d0c6a0305fd642934202541cc511ef972cb9463261e", + "sha256:1c9031e90ebfc486e9cdad532b94004ade3aa39a31d3c46c105bb0b579cd2490", + "sha256:2349fa81b6b959484bb2bda556ccb9eb70ba68987646a0f8a537a1a18319fb03", + "sha256:24b879ba7db12bb525d4e58089fcbe6a3df3ce4666523183654170e86d372cbe", + "sha256:2aa9b91347ecd0412683f28aabe27f6bad502d89bd363b76e0a3508b1596402e", + "sha256:56d48eebe9e39ce0d68701bce3b21df923aa05dcc00f9fd8300de1df31a7c07c", + "sha256:5a38a0175ae82e4a8c4bac29fc01b9ee26d7d5a614e5ee11e7813c68a7d938ce", + "sha256:5b04270b5613f245ec84bb2c6a482a9d009aefad37c0575f6cda8499125d5d5c", + "sha256:6193bbc1ee63aadeb9a4d81de0e19477401d150d506aee772d8380943f118186", + "sha256:669e54228a4d9457abafed27cbf0e2b9f401445c4dfefc12bf8e4db9751703b8", + "sha256:6a009eb551c46fd79737791c0c833fc0e5b56bcd1c3057498b262d660b92e9cd", + "sha256:71a4491cfe7a9f18ee57d41163cb6a8a3fa591e0f0564ca8b0ed86b2a30cced4", + "sha256:7b38a5c9112e3dbbe45540f7b60c5204f49b3cb501b40950d6ab34cd202ab1d0", + "sha256:7bb9d8a6beca478c7e9bdde0159bd810cc1006ad6a7cb460533bae39da692ca2", + "sha256:82bc33db6d8309dc27a3bee11f7da2288ad925fcbabc2a4bb78f7e9c56249baf", + "sha256:8351c3c86b08156337b0e4ece0e3c5ec3e01fcd14e8950996832a23c99416098", + "sha256:8beac786a388bb99a66c3be4ab0fb38273c0e3bc17f612a4e0a47c4fc8b9c045", + "sha256:97950c7c844ec6f8d292440953ae18b99e3a6a09885e09d20d5e7ecd9b914cf8", + "sha256:98f57b3120f8331cd7440dbe0e776474f5e3632fdaa474af1f6b754955a47d71", + "sha256:9ca2ca05a4c29179f06cf6727b45dba5d228da62623ec9df4184413d8aae6cb9", + "sha256:a03a25d95cc7400bd4d61a63460b5d85a7761c12075ee2f51de1ffe73aa593d3", + "sha256:a10c0c1ee02164246f90053273a42d72a3b2452a7e7486fdae781138cf7fbe2d", + "sha256:a72b92f96e5e540d5dda99ee3346e199ade8df63152fa3c737260da1730c411f", + "sha256:ac081aa0307f263d63c5ff0727935c736c8dad51ddf2dc9f5d0c4759842aefaa", + "sha256:b22bdc795e62e71118b63e14a08bacfa4f262fd2877de7e5b950f5ac16b0348f", + "sha256:b4059e2ccbe6587b6dc9a01db5fc49ead9a884faa4076eea96c5ec62cb32f42a", + "sha256:b7fe45ae43ac814beb8ca09d6995b56800676f2cfa8e23f42839dc69bba34a42", + "sha256:bef03a51f9657fb03d8da6ccd233fe96e04101a852f0ffd35f5b725b28221ff3", + "sha256:bffc65442dd35c473ca9790a3fa3ba06396102a950794f536783f4b8060af8dd", + "sha256:c21a67ab9a94bd53e10bba21912556027fea944648a09e6508415ad14e37c325", + "sha256:c67d9cacb3f6537ca21e9b224d4fd08481538e43bcac08b3d93181b0816def39", + "sha256:c6e56606842bb24e16e36ae7eb308d866b4249cf0be8f63b212f287eeb76b124", + "sha256:cb316b87cbe3c0791c2ad92a5a36bf6adc87c457654335810b25048c1daa6fd5", + "sha256:cef40a1b183dcf39d23b392e9dd1d9b07ab9c46aadf294fff1350fb79146e72b", + "sha256:cf931c33db9c87c53d009856045dd524e4a378445693382a920fa1e0eb77c36c", + "sha256:d4d110a84b63c5cfdd22485acc97b8b919aefeecd6300c0c9d551e055b9a88ea", + "sha256:d5396710f86a306cf52f87fd8ea594a0e894ba0cc5a36059eaca3a477dc332aa", + "sha256:f09f46b1ff6d09b01c7816c50bd1903cf7d02ebbdb63726132717c2fcda835d5", + "sha256:f14bd10e170abc01682a9f8b28b16e6f20acf6175945ef38db6ffe31b0c72c3f", + "sha256:f5c335dc0e7dc271ef36df3f439868b3c790775f345338c2f61a562f1074187b", + "sha256:f8296b8408ec6853b26771599990721a26403e62b9de7e50ac0a056772ac0b5e", + "sha256:fa35c5d1830d0fb7b810324e9eeab9aa92e8f273f11fdbdc0741dcded6d72b9f" + ], + "version": "==10.2" + }, + "yt-dlp": { + "hashes": [ + "sha256:6edefe326b1e1478fdbe627a66203e5248a6b0dd50c101e682cf700ab70cdf72", + "sha256:8758d016509d4574b90fbde975aa70adaef71ed5e7a195141588f6d6945205ba" + ], + "index": "pypi", + "version": "==2022.4.8" } }, "develop": { @@ -42,7 +1198,6 @@ "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4", "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==21.4.0" }, "babel": { @@ -50,7 +1205,6 @@ "sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9", "sha256:bc0c176f9f6a994582230df350aa6e05ba2ebe4b3ac317eab29d9be5d2768da0" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.9.1" }, "black": { @@ -99,16 +1253,12 @@ }, "click": { "hashes": [ - "sha256:24e1a4a9ec5bf6299411369b208c1df2188d9eb8d916302fe6bf03faed227f1e", - "sha256:479707fe14d9ec9a0757618b7a100a0ae4c4e236fac5b7f80ca68028141a1a72" + "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1", + "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb" ], - "markers": "python_version >= '3.7'", - "version": "==8.1.2" + "version": "==8.0.4" }, "coverage": { - "extras": [ - "toml" - ], "hashes": [ "sha256:03e2a7826086b91ef345ff18742ee9fc47a6839ccd517061ef8fa1976e652ce9", "sha256:07e6db90cd9686c767dcc593dff16c8c09f9814f5e9c51034066cad3373b914d", @@ -152,7 +1302,6 @@ "sha256:f9987b0354b06d4df0f4d3e0ec1ae76d7ce7cbca9a2f98c25041eb79eec766f1", "sha256:fd9e830e9d8d89b20ab1e5af09b32d33e1a08ef4c4e14411e559556fd788e6b2" ], - "markers": "python_version >= '3.7'", "version": "==6.3.2" }, "docutils": { @@ -160,7 +1309,6 @@ "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125", "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==0.17.1" }, "idna": { @@ -176,7 +1324,6 @@ "sha256:1db2f82529e53c3e929e8926a1fa9235aa82d0bd0c580359c67ec31b2fddaa8c", "sha256:cd1750d452385ca327479d45b64d9c7729ecf0b3969a58148298c77092261f9d" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.3.0" }, "importlib-metadata": { @@ -199,7 +1346,6 @@ "sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119", "sha256:640bed4bb501cbd17194b3cace1dc2126f5b619cf068a726b98192a0fde74ae9" ], - "markers": "python_version >= '3.7'", "version": "==3.1.1" }, "markupsafe": { @@ -245,7 +1391,6 @@ "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a", "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7" ], - "markers": "python_version >= '3.7'", "version": "==2.1.1" }, "mypy-extensions": { @@ -260,7 +1405,6 @@ "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" ], - "markers": "python_version >= '3.6'", "version": "==21.3" }, "pathspec": { @@ -275,7 +1419,6 @@ "sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d", "sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227" ], - "markers": "python_version >= '3.7'", "version": "==2.5.1" }, "pluggy": { @@ -283,7 +1426,6 @@ "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" ], - "markers": "python_version >= '3.6'", "version": "==1.0.0" }, "py": { @@ -291,7 +1433,6 @@ "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==1.11.0" }, "pygments": { @@ -299,7 +1440,6 @@ "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65", "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a" ], - "markers": "python_version >= '3.5'", "version": "==2.11.2" }, "pyparsing": { @@ -307,7 +1447,6 @@ "sha256:7bf433498c016c4314268d95df76c81b842a4cb2b276fa3312cfb1e1d85f6954", "sha256:ef7b523f6356f763771559412c0d7134753f037822dad1b16945b7b846f7ad06" ], - "markers": "python_full_version >= '3.6.8'", "version": "==3.0.8" }, "pytest": { @@ -347,6 +1486,7 @@ "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" ], + "index": "pypi", "version": "==2022.1" }, "requests": { @@ -354,7 +1494,7 @@ "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "index": "pypi", "version": "==2.27.1" }, "snowballstemmer": { @@ -385,7 +1525,6 @@ "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a", "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58" ], - "markers": "python_version >= '3.5'", "version": "==1.0.2" }, "sphinxcontrib-devhelp": { @@ -393,7 +1532,6 @@ "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4" ], - "markers": "python_version >= '3.5'", "version": "==1.0.2" }, "sphinxcontrib-htmlhelp": { @@ -401,7 +1539,6 @@ "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07", "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2" ], - "markers": "python_version >= '3.6'", "version": "==2.0.0" }, "sphinxcontrib-jsmath": { @@ -409,7 +1546,6 @@ "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8" ], - "markers": "python_version >= '3.5'", "version": "==1.0.1" }, "sphinxcontrib-qthelp": { @@ -417,7 +1553,6 @@ "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6" ], - "markers": "python_version >= '3.5'", "version": "==1.0.3" }, "sphinxcontrib-serializinghtml": { @@ -425,7 +1560,6 @@ "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd", "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952" ], - "markers": "python_version >= '3.5'", "version": "==1.1.5" }, "tomli": { @@ -433,7 +1567,7 @@ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_full_version < '3.11.0'", + "markers": "python_version < '3.11'", "version": "==2.0.1" }, "typing-extensions": { @@ -441,7 +1575,6 @@ "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42", "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2" ], - "markers": "python_version < '3.10'", "version": "==4.1.1" }, "urllib3": { @@ -449,7 +1582,6 @@ "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", "version": "==1.26.9" }, "zipp": { @@ -457,7 +1589,6 @@ "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad", "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099" ], - "markers": "python_version >= '3.7'", "version": "==3.8.0" } } diff --git a/app.py b/app.py index 2367575..3ebc673 100644 --- a/app.py +++ b/app.py @@ -16,6 +16,7 @@ from cisticola.scraper import ( BitchuteScraper, RumbleScraper, ) +from cisticola.transformer import (ETLController, TelegramTelethonTransformer) def sync_channels(args): @@ -59,11 +60,17 @@ def sync_channels(args): channel = ( session.query(Channel) .filter_by( - platform_id=str(platform_id), platform=c["platform"], url=c["url"] + platform_id=str(platform_id), platform=str(c["platform"]) ) .first() ) + if not channel: + channel = session.query(Channel).filter_by(platform=str(c["platform"]), url=str(c["url"])).first() + + if not channel and c["screenname"] != '' and c["screenname"] is not None: + channel = session.query(Channel).filter_by(platform=str(c["platform"]), screenname=str(c["screenname"])).first() + if not channel: channel = Channel(**c, source="researcher") logger.debug(f"{channel} does not exist, adding") @@ -73,6 +80,26 @@ def sync_channels(args): wks.update_cell(row, 1, channel.id) time.sleep(1) + else: + logger.info(f"Channel found, updating channel {channel}") + channel.name = c["name"] + channel.category = c["category"] + channel.platform = c["platform"] + channel.url = c["url"] + channel.screenname = c["screenname"] + channel.country = c["country"] + channel.influencer = c["influencer"] + channel.public = c["public"] + channel.chat = c["chat"] + channel.notes = c["notes"] + channel.source = "researcher" + + session.flush() + session.commit() + + wks.update_cell(row, 1, channel.id) + time.sleep(1) + else: channel = session.query(Channel).filter_by(id=int(c["id"])).first() @@ -87,6 +114,7 @@ def sync_channels(args): channel.public = c["public"] channel.chat = c["chat"] channel.notes = c["notes"] + channel.source = "researcher" session.flush() session.commit() @@ -122,6 +150,18 @@ def get_scraper_controller(): return controller +def get_transformer_controller(): + engine = create_engine(os.environ["DB"]) + + controller = ETLController() + controller.connect_to_db(engine) + + transformers = [TelegramTelethonTransformer()] + + controller.register_transformers(transformers) + + return controller + def scrape_channels(args): logger.info(f"Scraping channels, media: {args.media}") @@ -143,6 +183,12 @@ def archive_media(args): controller = get_scraper_controller() controller.archive_unarchived_media() +def transform(args): + logger.info(f"Transforming untransformed media") + + controller = get_transformer_controller() + controller.transform_all_untransformed() + def init_db(): engine = create_engine(os.environ["DB"]) @@ -182,5 +228,8 @@ if __name__ == "__main__": elif args.command == "channel-info": logger.add("logs/channel-info.log", level="TRACE", rotation="100 MB") scrape_channel_info(args) + elif args.command == "transform": + logger.add("logs/transform.log", level="TRACE", rotation="100 MB") + transform(args) else: logger.error(f"Unrecognized command {args.command}") diff --git a/cisticola/base.py b/cisticola/base.py index bd70c12..9ca32ee 100644 --- a/cisticola/base.py +++ b/cisticola/base.py @@ -1,5 +1,5 @@ from typing import List -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime import tempfile import json @@ -10,6 +10,11 @@ from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKe import pytesseract import PIL import exiftool +import re +from langdetect import detect, DetectorFactory +from langdetect.lang_detect_exception import LangDetectException +from loguru import logger +import spacy from .utils import make_request @@ -109,6 +114,14 @@ class RawChannelInfo: #: Datetime (relative to UTC) that the scraped post was archived at. date_archived: datetime +nlp_en = spacy.load('en_core_web_sm', disable=['parser', 'tok2vec', 'attribute_ruler']) +nlp_de = spacy.load('de_core_news_sm', disable=['parser', 'tok2vec', 'attribute_ruler']) +nlp_it = spacy.load('it_core_news_sm', disable=['parser', 'tok2vec', 'attribute_ruler']) +nlp_fr = spacy.load('fr_core_news_sm', disable=['parser', 'tok2vec', 'attribute_ruler']) +nlp_ru = spacy.load('ru_core_news_sm', disable=['parser', 'tok2vec', 'attribute_ruler']) +nlp_nl = spacy.load('nl_core_news_sm', disable=['parser', 'tok2vec', 'attribute_ruler']) +nlp_xx = spacy.load('xx_ent_wiki_sm') + @dataclass class Post: """An object with fields for columns in the analysis table""" @@ -136,6 +149,9 @@ class Post: #: Datetime (relative to UTC) that the scraped post was archived at. date_archived: datetime + + #: Datetime (UTC) that the scraped post was transformed at. + date_transformed: datetime #: URL of the original post url: str @@ -149,6 +165,24 @@ class Post: #: Text of the original post content: str + #: Named entities detected in post + named_entities: list = field(default_factory=list) + + #: Any cryptocurrency addresses in post + cryptocurrency_addresses: list = field(default_factory=list) + + #: Hashtags in post + hashtags: list = field(default_factory=list) + + #: Links to any other websites + outlinks: list = field(default_factory=list) + + #: Detected language of post + detected_language: str = "" + + #: Normalized post content + normalized_content: str = "" + #: The ID of the Channel that the post was forwarded or quoted from forwarded_from: int = None @@ -156,7 +190,60 @@ class Post: reply_to: int = None def hydrate(self): - pass + URL_REGEX = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’])|(?:(? Generator[ScraperResult, None, None]: - scr = snscrape.modules.telegram.TelegramChannelScraper( - channel.screenname) - - g = scr.get_items() - - for post in g: - if since is not None and post.date.replace(tzinfo=timezone.utc) <= since.date.replace(tzinfo=timezone.utc): - logger.info(f'Timestamp of post {post} is earlier than the previous archived timestamp {post.date.replace(tzinfo=timezone.utc)}') - break - - logger.info(f'Processing post {post}') - - archived_urls = {} - - for image_url in post.images: - archived_urls[image_url] = None - - for video_url in post.videos: - archived_urls[video_url] = None - - if archive_media: - for url in archived_urls: - media_blob, content_type, key = self.url_to_blob(url) - archived_url = self.archive_blob(media_blob, content_type, key) - archived_urls[url] = archived_url - - yield ScraperResult( - scraper=self.__version__, - platform="Telegram", - channel=channel.id, - platform_id=post.url, - date=post.date, - date_archived=datetime.now(timezone.utc), - raw_data=post.json(), - archived_urls=archived_urls, - media_archived=datetime.now(timezone.utc) if archive_media else None - ) - - @logger.catch - def get_profile(self, channel: Channel) -> RawChannelInfo: - - scr = snscrape.modules.telegram.TelegramChannelScraper( - channel.screenname) - - profile = scr._get_entity().__dict__ - - return RawChannelInfo(scraper=self.__version__, - platform=channel.platform, - channel=channel.id, - raw_data=json.dumps(profile), - date_archived=datetime.now(timezone.utc)) diff --git a/cisticola/transformer/__init__.py b/cisticola/transformer/__init__.py index 7812b52..b4e0968 100644 --- a/cisticola/transformer/__init__.py +++ b/cisticola/transformer/__init__.py @@ -1,3 +1,4 @@ from .base import ETLController from .twitter import TwitterTransformer -from .bitchute import BitchuteTransformer \ No newline at end of file +from .bitchute import BitchuteTransformer +from .telegram_telethon import TelegramTelethonTransformer \ No newline at end of file diff --git a/cisticola/transformer/base.py b/cisticola/transformer/base.py index 38da38a..385f593 100644 --- a/cisticola/transformer/base.py +++ b/cisticola/transformer/base.py @@ -2,7 +2,9 @@ from typing import List, Generator, Union, Callable from loguru import logger from sqlalchemy.orm import sessionmaker, make_transient from sqlalchemy.engine.base import Engine +from sqlalchemy.sql.expression import func from collections import defaultdict +from datetime import datetime from cisticola.base import ScraperResult, Post, Media, Channel, mapper_registry @@ -68,6 +70,10 @@ class ETLController: self.transformers.append(transformer) + def register_transformers(self, transformers): + for t in transformers: + self.register_transformer(t) + def connect_to_db(self, engine: Engine): """Connects the ETLController to a SQLAlchemy engine. @@ -90,10 +96,11 @@ class ETLController: # This is using some adhoc unique constraints that might be worth formalizing at some point if type(obj) == Channel: - instance = session.query(Channel).filter_by(url=obj.url, platform_id=obj.platform_id, platform=obj.platform).first() + instance = session.query(Channel).filter_by(url=obj.url, platform_id=str(obj.platform_id), platform=obj.platform).first() elif type(obj) == Post: - instance = session.query(Post).filter_by(platform=obj.platform, platform_id=obj.platform_id).first() + instance = None + # instance = session.query(Post).filter_by(platform=obj.platform, platform_id=obj.platform_id).first() elif issubclass(type(obj), Media): instance = session.query(type(obj)).filter_by(original_url=obj.original_url, post=obj.post).first() @@ -125,9 +132,9 @@ class ETLController: if hydrate: obj.hydrate() - logger.info(f"Inserting new object {obj}") session.add(obj) - session.flush() + logger.trace(f"Inserted new object {obj}") + return obj @logger.catch(reraise=True) @@ -146,21 +153,24 @@ class ETLController: logger.error("No DB session") return + session = self.session() + for result in results: - for transformer in self.transformers: - handled = False + if result.scraper is not None and result.platform is not None: + for transformer in self.transformers: + handled = False - if transformer.can_handle(result): - logger.info(f"{transformer} is handling result {result}") - handled = True - session = self.session() + if transformer.can_handle(result): + logger.trace(f"{transformer} is handling result {result.id} ({result.date})") + handled = True - transformer.transform(result, lambda obj: self.insert_or_select(obj, session, hydrate)) - session.commit() - break + transformer.transform(result, lambda obj: self.insert_or_select(obj, session, hydrate), session) - if handled == False: - logger.warning(f"No Transformer could handle {result}") + session.commit() + break + + if handled == False: + logger.warning(f"No Transformer could handle ID {result.id} with platform {result.platform} ({result.date})") @logger.catch(reraise=True) def transform_all_untransformed(self, hydrate: bool = True): @@ -178,12 +188,33 @@ class ETLController: return session = self.session() - untransformed = ( - session.query(ScraperResult) + + BATCH_SIZE = 50000 + offset = 0 + batch = [] + + query = (session.query(ScraperResult) + # .filter_by(platform="Telegram") + # .filter(ScraperResult.raw_data.notlike("%MessageService%")) .join(Post, isouter=True) .where(Post.raw_id == None) - .all() + # .order_by(func.random()) + .order_by(ScraperResult.date.asc()) ) - logger.info(f"Found {len(untransformed)} items to ETL") - self.transform_results(untransformed, hydrate=hydrate) + while len(batch) > 0 or offset == 0: + logger.info(f"Fetching untransformed batch of {BATCH_SIZE}, offset {offset}") + + batch = query.slice(offset, offset + BATCH_SIZE).all() + # untransformed = ( + + # .limit(BATCH_SIZE) + # .offset(offset) + # .all() + # ) + + offset += BATCH_SIZE + + logger.info(f"Found {len(batch)} items to ETL ({offset} already processed)") + + self.transform_results(batch, hydrate=hydrate) diff --git a/cisticola/transformer/telegram_telethon.py b/cisticola/transformer/telegram_telethon.py new file mode 100644 index 0000000..b98003e --- /dev/null +++ b/cisticola/transformer/telegram_telethon.py @@ -0,0 +1,167 @@ +import json +from loguru import logger +from typing import Generator, Union, Callable +import dateutil.parser +from bs4 import BeautifulSoup +from psycopg2 import DatabaseError +import requests +import time +from telethon.sync import TelegramClient +from telethon.errors.rpcerrorlist import ChannelPrivateError, ChannelInvalidError +import os +from datetime import datetime, timezone + +from cisticola.transformer.base import Transformer +from cisticola.base import ScraperResult, Post, Image, Video, Media, Channel + + +class TelegramTelethonTransformer(Transformer): + __version__ = 'TelegramTelethonTransformer 0.0.1' + + bad_channels = {} + + def can_handle(self, data: ScraperResult) -> bool: + scraper = data.scraper.split(' ') + if scraper[0] == "TelegramTelethonScraper": + return True + + return False + + def get_screenname_from_id(self, channel_id): + api_id = os.environ['TELEGRAM_API_ID'] + api_hash = os.environ['TELEGRAM_API_HASH'] + + try: + with TelegramClient("transform.session", api_id, api_hash) as client: + data = client.get_entity(channel_id) + + return (data.username, data.title, "") + except ChannelPrivateError: + logger.info("ChannelPrivateError") + return ("", "", "ChannelPrivateError") + except ChannelInvalidError: + logger.info("ChannelInvalidError") + return ("", "", "ChannelInvalidError") + except ValueError: + logger.info("ValueError") + return ("", "", "ValueError") + + def get_name_from_web_interface(self, orig_screenname, id): + url = "https://t.me/s/" + orig_screenname + "/" + str(id) + + # this doesn't work for chat channels + if orig_screenname in self.bad_channels: + logger.debug(f"Skipping screenname because it is not accessible for channel {orig_screenname}") + return "" + + logger.info(f"Finding channel from URL {url}") + r = requests.get(url) + + if r.url != url: + self.bad_channels[orig_screenname] = True + return "" + + soup = BeautifulSoup(r.content) + post = soup.findAll("div", {"data-post" : orig_screenname + "/" + str(id)}) + name = "" + + # multiple posts can be combined into one result in the web interface + decrement = 0 + while len(post) == 0: + decrement += 1 + if decrement > 8: + break + + logger.info(f"Could not find post from {url}, looking for id {id - decrement}") + post = soup.findAll("div", {"data-post" : orig_screenname + "/" + str(id - decrement)}) + + if len(post) == 0: + logger.warning(f"Could not find post from {url}") + else: + fwd_tag = post[0].findAll("a", {"class", "tgme_widget_message_forwarded_from_name"}) + + if len(fwd_tag) == 0: + fwd_tag = post[0].findAll("span", {"class", "tgme_widget_message_forwarded_from_name"}) + + if len(fwd_tag) >= 1: + name = fwd_tag[0].text + + return name + + def transform(self, data: ScraperResult, insert: Callable, session) -> Generator[Union[Post, Channel, Media], None, None]: + raw = json.loads(data.raw_data) + + if raw['_'] != 'Message': + logger.warning(f"Cannot convert type {raw['_']} to post") + return + + fwd_from = None + + if raw['fwd_from'] and raw['fwd_from']['from_id'] and 'channel_id' in raw['fwd_from']['from_id']: + channel = session.query(Channel).filter_by(platform_id=str(raw['fwd_from']['from_id']['channel_id'])).first() + + if channel is None: + (screenname, name, notes) = self.get_screenname_from_id(raw['fwd_from']['from_id']['channel_id']) + + if name == "": + logger.info("Trying fallback web interface") + orig_channel = session.query(Channel).filter_by(id=data.channel).first() + if orig_channel.screenname is not None: + name = self.get_name_from_web_interface(orig_channel.screenname, raw['id']) + + channel = Channel( + name=name, + platform_id=raw['fwd_from']['from_id']['channel_id'], + platform=data.platform, + url="https://t.me/s/" + screenname if screenname is not None else "", + screenname=screenname, + category='forwarded', + source=self.__version__, + notes=notes + ) + + channel = insert(channel) + logger.info(f"Added {channel}") + + fwd_from = channel.id + + reply_to = None + if raw['reply_to']: + reply_to_id = raw['reply_to']['reply_to_msg_id'] + post = session.query(Post).filter_by(channel=data.channel, platform_id=reply_to_id).first() + if post is None: + reply_to = -1 + else: + reply_to = post.id + + transformed = Post( + raw_id = data.id, + platform_id = raw['id'], + scraper = data.scraper, + transformer=self.__version__, + platform=data.platform, + channel=data.channel, + date=dateutil.parser.parse(raw['date']), + date_archived=data.date_archived, + date_transformed=datetime.now(timezone.utc), + url="", + content=raw['message'], + author_id=raw['post_author'], + author_username="", + forwarded_from=fwd_from, + reply_to=reply_to + ) + + transformed = insert(transformed) + + # for k in data.archived_urls: + # if data.archived_urls[k]: + # archived_url = data.archived_urls[k] + # ext = archived_url.split('.')[-1] + + # if ext == 'mp4' or ext == 'mov' or ext == 'avi' or ext =='mkv': + # insert(Video(url=archived_url, post=transformed.id, raw_id=data.id, original_url=k)) + # else: + # insert(Image(url=archived_url, post=transformed.id, raw_id=data.id, original_url=k)) + + \ No newline at end of file diff --git a/russian_telegram_ingest.py b/russian_telegram_ingest.py deleted file mode 100644 index 23d3094..0000000 --- a/russian_telegram_ingest.py +++ /dev/null @@ -1,138 +0,0 @@ -import sys - -from sqlalchemy import create_engine -from loguru import logger - -from cisticola.base import Channel -from cisticola.scraper import ( - ScraperController, - TelegramSnscrapeScraper) - -logger.remove() -logger.add(sys.stderr, level="INFO") -logger.add("../russian_telegram_ingest.log") - -test_channels = [ - Channel( - id=0, - name="QAnon Россия", - platform_id=-1001319637748, - category="Qanon", - followers=94048, - platform="Telegram", - url="https://t.me/qanonrus", - screenname="qanonrus", - country="RU", - influencer=None, - public=True, - chat=False, - notes=""), - Channel( - id=1, - name="The Great Awakening | Q", - platform_id=-1001325597521, - category="Qanon", - followers=5715, - platform="Telegram", - url="https://t.me/greatawakin", - screenname="greatawakin", - country="RU", - influencer=None, - public=True, - chat=False, - notes=""), - Channel( - id=2, - name="Великое Пробуждение", - platform_id=-1001285898079, - category="Qanon", - followers=5861, - platform="Telegram", - url="https://t.me/greatawakeningrus", - screenname="greatawakeningrus", - country="RU", - influencer=None, - public=True, - chat=False, - notes=""), - Channel( - id=3, - name="T🕊Редакция Президент Гордон🕊", - platform_id=-1001101170442, - category="Qanon", - followers=5743, - platform="Telegram", - url="https://t.me/prezidentgordonteam", - screenname="prezidentgordonteam", - country="RU", - influencer=None, - public=True, - chat=False, - notes=""), - Channel( - id=4, - name="ПРОЕКТ АВРОРА", - platform_id=-1001279171101, - category="Qanon", - followers=5930, - platform="Telegram", - url="https://t.me/project_aurora", - screenname="project_aurora", - country="RU", - influencer=None, - public=True, - chat=False, - notes=""), - Channel( - id=5, - name="Сон Разума", - platform_id=-1001202338312, - category="Qanon", - followers=27099, - platform="Telegram", - url="https://t.me/error_288", - screenname="error_288", - country="RU", - influencer=None, - public=True, - chat=False, - notes=""), - Channel( - id=6, - name="Пробуждающий Мир - официальный канал", - platform_id=-1001492521207, - category="Qanon", - followers=19097, - platform="Telegram", - url="https://t.me/promirru", - screenname="promirru", - country="RU", - influencer=None, - public=True, - chat=False, - notes=""), - Channel( - id=7, - name="ЦЕЛЬНОЗОР", - platform_id=-1001642737506, - category="Qanon", - followers=13654, - platform="Telegram", - url="https://t.me/tselnozor", - screenname="tselnozor", - country="RU", - influencer=None, - public=True, - chat=False, - notes=""),] - -controller = ScraperController() - -telegram = TelegramSnscrapeScraper() -controller.register_scraper(telegram) - -engine = create_engine('sqlite:///russian_telegram.db') -controller.connect_to_db(engine) - -controller.scrape_channels(test_channels, archive_media = False) -