mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-08 03:18:34 +03:00
Merge pull request #3 from bellingcat/media
Expanding media archiving, implementing Odysee scraper
This commit is contained in:
16
.gitignore
vendored
16
.gitignore
vendored
@@ -1,7 +1,17 @@
|
||||
.DS_Store
|
||||
# Sphinx documentation
|
||||
docs/build/
|
||||
docs/source/_*
|
||||
|
||||
# Miscellaneous files
|
||||
**/.DS_Store
|
||||
*.pyc
|
||||
*.ipynb
|
||||
*.db
|
||||
docs/build/
|
||||
docs/source/_*
|
||||
.env
|
||||
|
||||
# Unit test / coverage reports
|
||||
reports
|
||||
.coverage
|
||||
.cache
|
||||
.pytest_cache/
|
||||
cover/
|
||||
8
Pipfile
8
Pipfile
@@ -13,8 +13,16 @@ dateparser = "*"
|
||||
sphinx = "*"
|
||||
boto3 = "*"
|
||||
snscrape = {git = "https://github.com/bellingcat/snscrape.git"}
|
||||
ffmpeg-python = "*"
|
||||
polyphemus = {git = "https://github.com/bellingcat/polyphemus.git"}
|
||||
garc = "*"
|
||||
youtube-dl = "*"
|
||||
|
||||
[dev-packages]
|
||||
pytest = "*"
|
||||
pytest-cov = "*"
|
||||
pytest-html = "*"
|
||||
pytest-metadata = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.9"
|
||||
|
||||
454
Pipfile.lock
generated
454
Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "d3ee112521273c2b0b9df074b4eb9a20649a2854bfffa433171749019acf8561"
|
||||
"sha256": "ea2a1f1dff68fa0bd30dab06553e913f467c3b1399388b97f0ed913ab74c6e85"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@@ -23,6 +23,14 @@
|
||||
],
|
||||
"version": "==0.7.12"
|
||||
},
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4",
|
||||
"sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==21.4.0"
|
||||
},
|
||||
"babel": {
|
||||
"hashes": [
|
||||
"sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9",
|
||||
@@ -41,19 +49,19 @@
|
||||
},
|
||||
"boto3": {
|
||||
"hashes": [
|
||||
"sha256:0e8d4d814f94031947035a4c2bb2c23832d5de941a6a492fb85794a02bafc44d",
|
||||
"sha256:95d9b5b6fe3383fbf8f33d58f62258d3b3ea138d4369017031339b60fd5b8887"
|
||||
"sha256:75709628320cea8ce137975dc33b75213c2e4f6e7cd09e55290de7245e2c79e2",
|
||||
"sha256:c92ec20a670721b5a1bc013b305a84db2b7f9c716653b3056ce7e2fbd2a180ef"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.21.6"
|
||||
"version": "==1.21.12"
|
||||
},
|
||||
"botocore": {
|
||||
"hashes": [
|
||||
"sha256:359b9ea3870a1f8264113cb0b1216baa94bf1e8cee5d5d8af63a2e7ca6e7b33c",
|
||||
"sha256:69aaa5a78ac7371f573e463be51fb962213c42fab08ef82380e84b9a87386949"
|
||||
"sha256:0174999a04b0a2e42457106093ace9b36fa94772a442d9bcf60750263d1d073e",
|
||||
"sha256:0cd7395311a3fef4aad8df8f511b4f7d221c24ae30934bd5c03458b0fc096d0c"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.24.6"
|
||||
"version": "==1.24.12"
|
||||
},
|
||||
"bs4": {
|
||||
"hashes": [
|
||||
@@ -101,6 +109,14 @@
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==0.17.1"
|
||||
},
|
||||
"ffmpeg-python": {
|
||||
"hashes": [
|
||||
"sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127",
|
||||
"sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.2.0"
|
||||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
"sha256:9cd540a9352e432c7246a48fe4e8712b10acb1df2ad1f30e8c070b82ae1fed85",
|
||||
@@ -109,6 +125,20 @@
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.6.0"
|
||||
},
|
||||
"future": {
|
||||
"hashes": [
|
||||
"sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"
|
||||
],
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.18.2"
|
||||
},
|
||||
"garc": {
|
||||
"hashes": [
|
||||
"sha256:6f1da8ccdb30b165b8d9247314b73d1002f60381480e61fdbf108dc9abf3c216"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.1"
|
||||
},
|
||||
"gogettr": {
|
||||
"hashes": [
|
||||
"sha256:9f5c90e3b1befe6eb561d4bca9ca124faddbe5787d8b429f02703c68dd51d255",
|
||||
@@ -175,7 +205,7 @@
|
||||
"sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c",
|
||||
"sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"markers": "python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))",
|
||||
"version": "==1.1.2"
|
||||
},
|
||||
"idna": {
|
||||
@@ -196,11 +226,18 @@
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:175f4ee440a0317f6e8d81b7f8d4869f93316170a65ad2b007d2929186c8052c",
|
||||
"sha256:e0bc84ff355328a4adfc5240c4f211e0ab386f80aa640d1b11f0618a1d282094"
|
||||
"sha256:b36ffa925fe3139b2f6ff11d6925ffd4fa7bc47870165e3ac260ac7b4f91e6ac",
|
||||
"sha256:d16e8c1deb60de41b8e8ed21c1a7b947b0bc62fab7e1d470bcdf331cea2e6735"
|
||||
],
|
||||
"markers": "python_version < '3.10'",
|
||||
"version": "==4.11.1"
|
||||
"version": "==4.11.2"
|
||||
},
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
|
||||
"sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
|
||||
],
|
||||
"version": "==1.1.1"
|
||||
},
|
||||
"jinja2": {
|
||||
"hashes": [
|
||||
@@ -339,6 +376,31 @@
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.1.0"
|
||||
},
|
||||
"numpy": {
|
||||
"hashes": [
|
||||
"sha256:03ae5850619abb34a879d5f2d4bb4dcd025d6d8fb72f5e461dae84edccfe129f",
|
||||
"sha256:076aee5a3763d41da6bef9565fdf3cb987606f567cd8b104aded2b38b7b47abf",
|
||||
"sha256:0b536b6840e84c1c6a410f3a5aa727821e6108f3454d81a5cd5900999ef04f89",
|
||||
"sha256:15efb7b93806d438e3bc590ca8ef2f953b0ce4f86f337ef4559d31ec6cf9d7dd",
|
||||
"sha256:168259b1b184aa83a514f307352c25c56af111c269ffc109d9704e81f72e764b",
|
||||
"sha256:2638389562bda1635b564490d76713695ff497242a83d9b684d27bb4a6cc9d7a",
|
||||
"sha256:3556c5550de40027d3121ebbb170f61bbe19eb639c7ad0c7b482cd9b560cd23b",
|
||||
"sha256:4a176959b6e7e00b5a0d6f549a479f869829bfd8150282c590deee6d099bbb6e",
|
||||
"sha256:515a8b6edbb904594685da6e176ac9fbea8f73a5ebae947281de6613e27f1956",
|
||||
"sha256:55535c7c2f61e2b2fc817c5cbe1af7cb907c7f011e46ae0a52caa4be1f19afe2",
|
||||
"sha256:59153979d60f5bfe9e4c00e401e24dfe0469ef8da6d68247439d3278f30a180f",
|
||||
"sha256:60cb8e5933193a3cc2912ee29ca331e9c15b2da034f76159b7abc520b3d1233a",
|
||||
"sha256:6767ad399e9327bfdbaa40871be4254d1995f4a3ca3806127f10cec778bd9896",
|
||||
"sha256:76a4f9bce0278becc2da7da3b8ef854bed41a991f4226911a24a9711baad672c",
|
||||
"sha256:8cf33634b60c9cef346663a222d9841d3bbbc0a2f00221d6bcfd0d993d5543f6",
|
||||
"sha256:94dd11d9f13ea1be17bac39c1942f527cbf7065f94953cf62dfe805653da2f8f",
|
||||
"sha256:aafa46b5a39a27aca566198d3312fb3bde95ce9677085efd02c86f7ef6be4ec7",
|
||||
"sha256:badca914580eb46385e7f7e4e426fea6de0a37b9e06bec252e481ae7ec287082",
|
||||
"sha256:d76a26c5118c4d96e264acc9e3242d72e1a2b92e739807b3b69d8d47684b6677"
|
||||
],
|
||||
"markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'",
|
||||
"version": "==1.22.2"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
|
||||
@@ -347,6 +409,53 @@
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==21.3"
|
||||
},
|
||||
"pandas": {
|
||||
"hashes": [
|
||||
"sha256:0259cd11e7e6125aaea3af823b80444f3adad6149ff4c97fef760093598b3e34",
|
||||
"sha256:04dd15d9db538470900c851498e532ef28d4e56bfe72c9523acb32042de43dfb",
|
||||
"sha256:0b1a13f647e4209ed7dbb5da3497891d0045da9785327530ab696417ef478f84",
|
||||
"sha256:19f7c632436b1b4f84615c3b127bbd7bc603db95e3d4332ed259dc815c9aaa26",
|
||||
"sha256:1b384516dbb4e6aae30e3464c2e77c563da5980440fbdfbd0968e3942f8f9d70",
|
||||
"sha256:1d85d5f6be66dfd6d1d8d13b9535e342a2214260f1852654b19fa4d7b8d1218b",
|
||||
"sha256:2e5a7a1e0ecaac652326af627a3eca84886da9e667d68286866d4e33f6547caf",
|
||||
"sha256:3129a35d9dad1d80c234dd78f8f03141b914395d23f97cf92a366dcd19f8f8bf",
|
||||
"sha256:358b0bc98a5ff067132d23bf7a2242ee95db9ea5b7bbc401cf79205f11502fd3",
|
||||
"sha256:3dfb32ed50122fe8c5e7f2b8d97387edd742cc78f9ec36f007ee126cd3720907",
|
||||
"sha256:4e1176f45981c8ccc8161bc036916c004ca51037a7ed73f2d2a9857e6dbe654f",
|
||||
"sha256:508c99debccd15790d526ce6b1624b97a5e1e4ca5b871319fb0ebfd46b8f4dad",
|
||||
"sha256:6105af6533f8b63a43ea9f08a2ede04e8f43e49daef0209ab0d30352bcf08bee",
|
||||
"sha256:6d6ad1da00c7cc7d8dd1559a6ba59ba3973be6b15722d49738b2be0977eb8a0c",
|
||||
"sha256:7ea47ba1d6f359680130bd29af497333be6110de8f4c35b9211eec5a5a9630fa",
|
||||
"sha256:8db93ec98ac7cb5f8ac1420c10f5e3c43533153f253fe7fb6d891cf5aa2b80d2",
|
||||
"sha256:96e9ece5759f9b47ae43794b6359bbc54805d76e573b161ae770c1ea59393106",
|
||||
"sha256:bbb15ad79050e8b8d39ec40dd96a30cd09b886a2ae8848d0df1abba4d5502a67",
|
||||
"sha256:c614001129b2a5add5e3677c3a213a9e6fd376204cb8d17c04e84ff7dfc02a73",
|
||||
"sha256:e6a7bbbb7950063bfc942f8794bc3e31697c020a14f1cd8905fc1d28ec674a01",
|
||||
"sha256:f02e85e6d832be37d7f16cf6ac8bb26b519ace3e5f3235564a91c7f658ab2a43"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==1.4.1"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
|
||||
"sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.0.0"
|
||||
},
|
||||
"polyphemus": {
|
||||
"git": "https://github.com/bellingcat/polyphemus.git",
|
||||
"ref": "8506fd43770661cdcf92c5cac2356cba74778834"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719",
|
||||
"sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==1.11.0"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65",
|
||||
@@ -371,6 +480,14 @@
|
||||
],
|
||||
"version": "==1.7.1"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db",
|
||||
"sha256:e30905a0c131d3d94b89624a1cc5afec3e0ba2fbdb151867d8e0ebd49850f171"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==7.0.1"
|
||||
},
|
||||
"python-dateutil": {
|
||||
"hashes": [
|
||||
"sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
|
||||
@@ -396,82 +513,83 @@
|
||||
},
|
||||
"regex": {
|
||||
"hashes": [
|
||||
"sha256:04611cc0f627fc4a50bc4a9a2e6178a974c6a6a4aa9c1cca921635d2c47b9c87",
|
||||
"sha256:0b5d6f9aed3153487252d00a18e53f19b7f52a1651bc1d0c4b5844bc286dfa52",
|
||||
"sha256:0d2f5c3f7057530afd7b739ed42eb04f1011203bc5e4663e1e1d01bb50f813e3",
|
||||
"sha256:11772be1eb1748e0e197a40ffb82fb8fd0d6914cd147d841d9703e2bef24d288",
|
||||
"sha256:1333b3ce73269f986b1fa4d5d395643810074dc2de5b9d262eb258daf37dc98f",
|
||||
"sha256:16f81025bb3556eccb0681d7946e2b35ff254f9f888cff7d2120e8826330315c",
|
||||
"sha256:1a171eaac36a08964d023eeff740b18a415f79aeb212169080c170ec42dd5184",
|
||||
"sha256:1d6301f5288e9bdca65fab3de6b7de17362c5016d6bf8ee4ba4cbe833b2eda0f",
|
||||
"sha256:1e031899cb2bc92c0cf4d45389eff5b078d1936860a1be3aa8c94fa25fb46ed8",
|
||||
"sha256:1f8c0ae0a0de4e19fddaaff036f508db175f6f03db318c80bbc239a1def62d02",
|
||||
"sha256:2245441445099411b528379dee83e56eadf449db924648e5feb9b747473f42e3",
|
||||
"sha256:22709d701e7037e64dae2a04855021b62efd64a66c3ceed99dfd684bfef09e38",
|
||||
"sha256:24c89346734a4e4d60ecf9b27cac4c1fee3431a413f7aa00be7c4d7bbacc2c4d",
|
||||
"sha256:25716aa70a0d153cd844fe861d4f3315a6ccafce22b39d8aadbf7fcadff2b633",
|
||||
"sha256:2dacb3dae6b8cc579637a7b72f008bff50a94cde5e36e432352f4ca57b9e54c4",
|
||||
"sha256:34316bf693b1d2d29c087ee7e4bb10cdfa39da5f9c50fa15b07489b4ab93a1b5",
|
||||
"sha256:36b2d700a27e168fa96272b42d28c7ac3ff72030c67b32f37c05616ebd22a202",
|
||||
"sha256:37978254d9d00cda01acc1997513f786b6b971e57b778fbe7c20e30ae81a97f3",
|
||||
"sha256:38289f1690a7e27aacd049e420769b996826f3728756859420eeee21cc857118",
|
||||
"sha256:385ccf6d011b97768a640e9d4de25412204fbe8d6b9ae39ff115d4ff03f6fe5d",
|
||||
"sha256:3c7ea86b9ca83e30fa4d4cd0eaf01db3ebcc7b2726a25990966627e39577d729",
|
||||
"sha256:49810f907dfe6de8da5da7d2b238d343e6add62f01a15d03e2195afc180059ed",
|
||||
"sha256:519c0b3a6fbb68afaa0febf0d28f6c4b0a1074aefc484802ecb9709faf181607",
|
||||
"sha256:51f02ca184518702975b56affde6c573ebad4e411599005ce4468b1014b4786c",
|
||||
"sha256:552a39987ac6655dad4bf6f17dd2b55c7b0c6e949d933b8846d2e312ee80005a",
|
||||
"sha256:596f5ae2eeddb79b595583c2e0285312b2783b0ec759930c272dbf02f851ff75",
|
||||
"sha256:6014038f52b4b2ac1fa41a58d439a8a00f015b5c0735a0cd4b09afe344c94899",
|
||||
"sha256:61ebbcd208d78658b09e19c78920f1ad38936a0aa0f9c459c46c197d11c580a0",
|
||||
"sha256:6213713ac743b190ecbf3f316d6e41d099e774812d470422b3a0f137ea635832",
|
||||
"sha256:637e27ea1ebe4a561db75a880ac659ff439dec7f55588212e71700bb1ddd5af9",
|
||||
"sha256:6aa427c55a0abec450bca10b64446331b5ca8f79b648531138f357569705bc4a",
|
||||
"sha256:6ca45359d7a21644793de0e29de497ef7f1ae7268e346c4faf87b421fea364e6",
|
||||
"sha256:6db1b52c6f2c04fafc8da17ea506608e6be7086715dab498570c3e55e4f8fbd1",
|
||||
"sha256:752e7ddfb743344d447367baa85bccd3629c2c3940f70506eb5f01abce98ee68",
|
||||
"sha256:760c54ad1b8a9b81951030a7e8e7c3ec0964c1cb9fee585a03ff53d9e531bb8e",
|
||||
"sha256:768632fd8172ae03852e3245f11c8a425d95f65ff444ce46b3e673ae5b057b74",
|
||||
"sha256:7a0b9f6a1a15d494b35f25ed07abda03209fa76c33564c09c9e81d34f4b919d7",
|
||||
"sha256:7e070d3aef50ac3856f2ef5ec7214798453da878bb5e5a16c16a61edf1817cc3",
|
||||
"sha256:7e12949e5071c20ec49ef00c75121ed2b076972132fc1913ddf5f76cae8d10b4",
|
||||
"sha256:7e26eac9e52e8ce86f915fd33380f1b6896a2b51994e40bb094841e5003429b4",
|
||||
"sha256:85ffd6b1cb0dfb037ede50ff3bef80d9bf7fa60515d192403af6745524524f3b",
|
||||
"sha256:8618d9213a863c468a865e9d2ec50221015f7abf52221bc927152ef26c484b4c",
|
||||
"sha256:8acef4d8a4353f6678fd1035422a937c2170de58a2b29f7da045d5249e934101",
|
||||
"sha256:8d2f355a951f60f0843f2368b39970e4667517e54e86b1508e76f92b44811a8a",
|
||||
"sha256:90b6840b6448203228a9d8464a7a0d99aa8fa9f027ef95fe230579abaf8a6ee1",
|
||||
"sha256:9187500d83fd0cef4669385cbb0961e227a41c0c9bc39219044e35810793edf7",
|
||||
"sha256:93c20777a72cae8620203ac11c4010365706062aa13aaedd1a21bb07adbb9d5d",
|
||||
"sha256:93cce7d422a0093cfb3606beae38a8e47a25232eea0f292c878af580a9dc7605",
|
||||
"sha256:94c623c331a48a5ccc7d25271399aff29729fa202c737ae3b4b28b89d2b0976d",
|
||||
"sha256:97f32dc03a8054a4c4a5ab5d761ed4861e828b2c200febd4e46857069a483916",
|
||||
"sha256:9a2bf98ac92f58777c0fafc772bf0493e67fcf677302e0c0a630ee517a43b949",
|
||||
"sha256:a602bdc8607c99eb5b391592d58c92618dcd1537fdd87df1813f03fed49957a6",
|
||||
"sha256:a9d24b03daf7415f78abc2d25a208f234e2c585e5e6f92f0204d2ab7b9ab48e3",
|
||||
"sha256:abfcb0ef78df0ee9df4ea81f03beea41849340ce33a4c4bd4dbb99e23ec781b6",
|
||||
"sha256:b013f759cd69cb0a62de954d6d2096d648bc210034b79b1881406b07ed0a83f9",
|
||||
"sha256:b02e3e72665cd02afafb933453b0c9f6c59ff6e3708bd28d0d8580450e7e88af",
|
||||
"sha256:b52cc45e71657bc4743a5606d9023459de929b2a198d545868e11898ba1c3f59",
|
||||
"sha256:ba37f11e1d020969e8a779c06b4af866ffb6b854d7229db63c5fdddfceaa917f",
|
||||
"sha256:bb804c7d0bfbd7e3f33924ff49757de9106c44e27979e2492819c16972ec0da2",
|
||||
"sha256:bf594cc7cc9d528338d66674c10a5b25e3cde7dd75c3e96784df8f371d77a298",
|
||||
"sha256:c38baee6bdb7fe1b110b6b3aaa555e6e872d322206b7245aa39572d3fc991ee4",
|
||||
"sha256:c73d2166e4b210b73d1429c4f1ca97cea9cc090e5302df2a7a0a96ce55373f1c",
|
||||
"sha256:c9099bf89078675c372339011ccfc9ec310310bf6c292b413c013eb90ffdcafc",
|
||||
"sha256:cf0db26a1f76aa6b3aa314a74b8facd586b7a5457d05b64f8082a62c9c49582a",
|
||||
"sha256:d19a34f8a3429bd536996ad53597b805c10352a8561d8382e05830df389d2b43",
|
||||
"sha256:da80047524eac2acf7c04c18ac7a7da05a9136241f642dd2ed94269ef0d0a45a",
|
||||
"sha256:de2923886b5d3214be951bc2ce3f6b8ac0d6dfd4a0d0e2a4d2e5523d8046fdfb",
|
||||
"sha256:defa0652696ff0ba48c8aff5a1fac1eef1ca6ac9c660b047fc8e7623c4eb5093",
|
||||
"sha256:e54a1eb9fd38f2779e973d2f8958fd575b532fe26013405d1afb9ee2374e7ab8",
|
||||
"sha256:e5c31d70a478b0ca22a9d2d76d520ae996214019d39ed7dd93af872c7f301e52",
|
||||
"sha256:ebaeb93f90c0903233b11ce913a7cb8f6ee069158406e056f884854c737d2442",
|
||||
"sha256:ecfe51abf7f045e0b9cdde71ca9e153d11238679ef7b5da6c82093874adf3338",
|
||||
"sha256:f99112aed4fb7cee00c7f77e8b964a9b10f69488cdff626ffd797d02e2e4484f",
|
||||
"sha256:fd914db437ec25bfa410f8aa0aa2f3ba87cdfc04d9919d608d02330947afaeab"
|
||||
"sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14",
|
||||
"sha256:03299b0bcaa7824eb7c0ebd7ef1e3663302d1b533653bfe9dc7e595d453e2ae9",
|
||||
"sha256:06b1df01cf2aef3a9790858af524ae2588762c8a90e784ba00d003f045306204",
|
||||
"sha256:09b4b6ccc61d4119342b26246ddd5a04accdeebe36bdfe865ad87a0784efd77f",
|
||||
"sha256:0be0c34a39e5d04a62fd5342f0886d0e57592a4f4993b3f9d257c1f688b19737",
|
||||
"sha256:0d96eec8550fd2fd26f8e675f6d8b61b159482ad8ffa26991b894ed5ee19038b",
|
||||
"sha256:0eb0e2845e81bdea92b8281a3969632686502565abf4a0b9e4ab1471c863d8f3",
|
||||
"sha256:13bbf0c9453c6d16e5867bda7f6c0c7cff1decf96c5498318bb87f8136d2abd4",
|
||||
"sha256:17e51ad1e6131c496b58d317bc9abec71f44eb1957d32629d06013a21bc99cac",
|
||||
"sha256:1977bb64264815d3ef016625adc9df90e6d0e27e76260280c63eca993e3f455f",
|
||||
"sha256:1e30762ddddb22f7f14c4f59c34d3addabc789216d813b0f3e2788d7bcf0cf29",
|
||||
"sha256:1e73652057473ad3e6934944af090852a02590c349357b79182c1b681da2c772",
|
||||
"sha256:20e6a27959f162f979165e496add0d7d56d7038237092d1aba20b46de79158f1",
|
||||
"sha256:286ff9ec2709d56ae7517040be0d6c502642517ce9937ab6d89b1e7d0904f863",
|
||||
"sha256:297c42ede2c81f0cb6f34ea60b5cf6dc965d97fa6936c11fc3286019231f0d66",
|
||||
"sha256:320c2f4106962ecea0f33d8d31b985d3c185757c49c1fb735501515f963715ed",
|
||||
"sha256:35ed2f3c918a00b109157428abfc4e8d1ffabc37c8f9abc5939ebd1e95dabc47",
|
||||
"sha256:3d146e5591cb67c5e836229a04723a30af795ef9b70a0bbd913572e14b7b940f",
|
||||
"sha256:42bb37e2b2d25d958c25903f6125a41aaaa1ed49ca62c103331f24b8a459142f",
|
||||
"sha256:42d6007722d46bd2c95cce700181570b56edc0dcbadbfe7855ec26c3f2d7e008",
|
||||
"sha256:43eba5c46208deedec833663201752e865feddc840433285fbadee07b84b464d",
|
||||
"sha256:452519bc4c973e961b1620c815ea6dd8944a12d68e71002be5a7aff0a8361571",
|
||||
"sha256:4b9c16a807b17b17c4fa3a1d8c242467237be67ba92ad24ff51425329e7ae3d0",
|
||||
"sha256:5510932596a0f33399b7fff1bd61c59c977f2b8ee987b36539ba97eb3513584a",
|
||||
"sha256:55820bc631684172b9b56a991d217ec7c2e580d956591dc2144985113980f5a3",
|
||||
"sha256:57484d39447f94967e83e56db1b1108c68918c44ab519b8ecfc34b790ca52bf7",
|
||||
"sha256:58ba41e462653eaf68fc4a84ec4d350b26a98d030be1ab24aba1adcc78ffe447",
|
||||
"sha256:5bc5f921be39ccb65fdda741e04b2555917a4bced24b4df14eddc7569be3b493",
|
||||
"sha256:5dcc4168536c8f68654f014a3db49b6b4a26b226f735708be2054314ed4964f4",
|
||||
"sha256:5f92a7cdc6a0ae2abd184e8dfd6ef2279989d24c85d2c85d0423206284103ede",
|
||||
"sha256:67250b36edfa714ba62dc62d3f238e86db1065fccb538278804790f578253640",
|
||||
"sha256:6df070a986fc064d865c381aecf0aaff914178fdf6874da2f2387e82d93cc5bd",
|
||||
"sha256:729aa8ca624c42f309397c5fc9e21db90bf7e2fdd872461aabdbada33de9063c",
|
||||
"sha256:72bc3a5effa5974be6d965ed8301ac1e869bc18425c8a8fac179fbe7876e3aee",
|
||||
"sha256:74d86e8924835f863c34e646392ef39039405f6ce52956d8af16497af4064a30",
|
||||
"sha256:79e5af1ff258bc0fe0bdd6f69bc4ae33935a898e3cbefbbccf22e88a27fa053b",
|
||||
"sha256:7b103dffb9f6a47ed7ffdf352b78cfe058b1777617371226c1894e1be443afec",
|
||||
"sha256:83f03f0bd88c12e63ca2d024adeee75234d69808b341e88343b0232329e1f1a1",
|
||||
"sha256:86d7a68fa53688e1f612c3246044157117403c7ce19ebab7d02daf45bd63913e",
|
||||
"sha256:878c626cbca3b649e14e972c14539a01191d79e58934e3f3ef4a9e17f90277f8",
|
||||
"sha256:878f5d649ba1db9f52cc4ef491f7dba2d061cdc48dd444c54260eebc0b1729b9",
|
||||
"sha256:87bc01226cd288f0bd9a4f9f07bf6827134dc97a96c22e2d28628e824c8de231",
|
||||
"sha256:8babb2b5751105dc0aef2a2e539f4ba391e738c62038d8cb331c710f6b0f3da7",
|
||||
"sha256:91e0f7e7be77250b808a5f46d90bf0032527d3c032b2131b63dee54753a4d729",
|
||||
"sha256:9557545c10d52c845f270b665b52a6a972884725aa5cf12777374e18f2ea8960",
|
||||
"sha256:9ccb0a4ab926016867260c24c192d9df9586e834f5db83dfa2c8fffb3a6e5056",
|
||||
"sha256:9d828c5987d543d052b53c579a01a52d96b86f937b1777bbfe11ef2728929357",
|
||||
"sha256:9efa41d1527b366c88f265a227b20bcec65bda879962e3fc8a2aee11e81266d7",
|
||||
"sha256:aaf5317c961d93c1a200b9370fb1c6b6836cc7144fef3e5a951326912bf1f5a3",
|
||||
"sha256:ab69b4fe09e296261377d209068d52402fb85ef89dc78a9ac4a29a895f4e24a7",
|
||||
"sha256:ad397bc7d51d69cb07ef89e44243f971a04ce1dca9bf24c992c362406c0c6573",
|
||||
"sha256:ae17fc8103f3b63345709d3e9654a274eee1c6072592aec32b026efd401931d0",
|
||||
"sha256:af4d8cc28e4c7a2f6a9fed544228c567340f8258b6d7ea815b62a72817bbd178",
|
||||
"sha256:b22ff939a8856a44f4822da38ef4868bd3a9ade22bb6d9062b36957c850e404f",
|
||||
"sha256:b549d851f91a4efb3e65498bd4249b1447ab6035a9972f7fc215eb1f59328834",
|
||||
"sha256:be319f4eb400ee567b722e9ea63d5b2bb31464e3cf1b016502e3ee2de4f86f5c",
|
||||
"sha256:c0446b2871335d5a5e9fcf1462f954586b09a845832263db95059dcd01442015",
|
||||
"sha256:c68d2c04f7701a418ec2e5631b7f3552efc32f6bcc1739369c6eeb1af55f62e0",
|
||||
"sha256:c87ac58b9baaf50b6c1b81a18d20eda7e2883aa9a4fb4f1ca70f2e443bfcdc57",
|
||||
"sha256:caa2734ada16a44ae57b229d45091f06e30a9a52ace76d7574546ab23008c635",
|
||||
"sha256:cb34c2d66355fb70ae47b5595aafd7218e59bb9c00ad8cc3abd1406ca5874f07",
|
||||
"sha256:cb3652bbe6720786b9137862205986f3ae54a09dec8499a995ed58292bdf77c2",
|
||||
"sha256:cf668f26604e9f7aee9f8eaae4ca07a948168af90b96be97a4b7fa902a6d2ac1",
|
||||
"sha256:d326ff80ed531bf2507cba93011c30fff2dd51454c85f55df0f59f2030b1687b",
|
||||
"sha256:d6c2441538e4fadd4291c8420853431a229fcbefc1bf521810fbc2629d8ae8c2",
|
||||
"sha256:d6ecfd1970b3380a569d7b3ecc5dd70dba295897418ed9e31ec3c16a5ab099a5",
|
||||
"sha256:e5602a9b5074dcacc113bba4d2f011d2748f50e3201c8139ac5b68cf2a76bd8b",
|
||||
"sha256:ef806f684f17dbd6263d72a54ad4073af42b42effa3eb42b877e750c24c76f86",
|
||||
"sha256:f3356afbb301ec34a500b8ba8b47cba0b44ed4641c306e1dd981a08b416170b5",
|
||||
"sha256:f6f7ee2289176cb1d2c59a24f50900f8b9580259fa9f1a739432242e7d254f93",
|
||||
"sha256:f7e8f1ee28e0a05831c92dc1c0c1c94af5289963b7cf09eca5b5e3ce4f8c91b0",
|
||||
"sha256:f8169ec628880bdbca67082a9196e2106060a4a5cbd486ac51881a4df805a36f",
|
||||
"sha256:fbc88d3ba402b5d041d204ec2449c4078898f89c4a6e6f0ed1c1a510ef1e221d",
|
||||
"sha256:fbd3fe37353c62fd0eb19fb76f78aa693716262bcd5f9c14bb9e5aca4b3f0dc4"
|
||||
],
|
||||
"version": "==2022.1.18"
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2022.3.2"
|
||||
},
|
||||
"requests": {
|
||||
"extras": [
|
||||
@@ -486,11 +604,11 @@
|
||||
},
|
||||
"s3transfer": {
|
||||
"hashes": [
|
||||
"sha256:25c140f5c66aa79e1ac60be50dcd45ddc59e83895f062a3aab263b870102911f",
|
||||
"sha256:69d264d3e760e569b78aaa0f22c97e955891cd22e32b10c51f784eeda4d9d10a"
|
||||
"sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971",
|
||||
"sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==0.5.1"
|
||||
"version": "==0.5.2"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
@@ -617,6 +735,14 @@
|
||||
"index": "pypi",
|
||||
"version": "==1.4.31"
|
||||
},
|
||||
"tomli": {
|
||||
"hashes": [
|
||||
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
|
||||
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.0.1"
|
||||
},
|
||||
"tzdata": {
|
||||
"hashes": [
|
||||
"sha256:3eee491e22ebfe1e5cfcc97a4137cd70f092ce59144d81f8924a844de05ba8f5",
|
||||
@@ -638,9 +764,17 @@
|
||||
"sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
|
||||
"sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'",
|
||||
"version": "==1.26.8"
|
||||
},
|
||||
"youtube-dl": {
|
||||
"hashes": [
|
||||
"sha256:bc59e86c5d15d887ac590454511f08ce2c47698d5a82c27bfe27b5d814bbaed2",
|
||||
"sha256:f1336d5de68647e0364a47b3c0712578e59ec76f02048ff5c50ef1c69d79cd55"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2021.12.17"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
"sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d",
|
||||
@@ -650,5 +784,143 @@
|
||||
"version": "==3.7.0"
|
||||
}
|
||||
},
|
||||
"develop": {}
|
||||
"develop": {
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4",
|
||||
"sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==21.4.0"
|
||||
},
|
||||
"coverage": {
|
||||
"extras": [
|
||||
"toml"
|
||||
],
|
||||
"hashes": [
|
||||
"sha256:03e2a7826086b91ef345ff18742ee9fc47a6839ccd517061ef8fa1976e652ce9",
|
||||
"sha256:07e6db90cd9686c767dcc593dff16c8c09f9814f5e9c51034066cad3373b914d",
|
||||
"sha256:18d520c6860515a771708937d2f78f63cc47ab3b80cb78e86573b0a760161faf",
|
||||
"sha256:1ebf730d2381158ecf3dfd4453fbca0613e16eaa547b4170e2450c9707665ce7",
|
||||
"sha256:21b7745788866028adeb1e0eca3bf1101109e2dc58456cb49d2d9b99a8c516e6",
|
||||
"sha256:26e2deacd414fc2f97dd9f7676ee3eaecd299ca751412d89f40bc01557a6b1b4",
|
||||
"sha256:2c6dbb42f3ad25760010c45191e9757e7dce981cbfb90e42feef301d71540059",
|
||||
"sha256:2fea046bfb455510e05be95e879f0e768d45c10c11509e20e06d8fcaa31d9e39",
|
||||
"sha256:34626a7eee2a3da12af0507780bb51eb52dca0e1751fd1471d0810539cefb536",
|
||||
"sha256:37d1141ad6b2466a7b53a22e08fe76994c2d35a5b6b469590424a9953155afac",
|
||||
"sha256:46191097ebc381fbf89bdce207a6c107ac4ec0890d8d20f3360345ff5976155c",
|
||||
"sha256:4dd8bafa458b5c7d061540f1ee9f18025a68e2d8471b3e858a9dad47c8d41903",
|
||||
"sha256:4e21876082ed887baed0146fe222f861b5815455ada3b33b890f4105d806128d",
|
||||
"sha256:58303469e9a272b4abdb9e302a780072c0633cdcc0165db7eec0f9e32f901e05",
|
||||
"sha256:5ca5aeb4344b30d0bec47481536b8ba1181d50dbe783b0e4ad03c95dc1296684",
|
||||
"sha256:68353fe7cdf91f109fc7d474461b46e7f1f14e533e911a2a2cbb8b0fc8613cf1",
|
||||
"sha256:6f89d05e028d274ce4fa1a86887b071ae1755082ef94a6740238cd7a8178804f",
|
||||
"sha256:7a15dc0a14008f1da3d1ebd44bdda3e357dbabdf5a0b5034d38fcde0b5c234b7",
|
||||
"sha256:8bdde1177f2311ee552f47ae6e5aa7750c0e3291ca6b75f71f7ffe1f1dab3dca",
|
||||
"sha256:8ce257cac556cb03be4a248d92ed36904a59a4a5ff55a994e92214cde15c5bad",
|
||||
"sha256:8cf5cfcb1521dc3255d845d9dca3ff204b3229401994ef8d1984b32746bb45ca",
|
||||
"sha256:8fbbdc8d55990eac1b0919ca69eb5a988a802b854488c34b8f37f3e2025fa90d",
|
||||
"sha256:9548f10d8be799551eb3a9c74bbf2b4934ddb330e08a73320123c07f95cc2d92",
|
||||
"sha256:96f8a1cb43ca1422f36492bebe63312d396491a9165ed3b9231e778d43a7fca4",
|
||||
"sha256:9b27d894748475fa858f9597c0ee1d4829f44683f3813633aaf94b19cb5453cf",
|
||||
"sha256:9baff2a45ae1f17c8078452e9e5962e518eab705e50a0aa8083733ea7d45f3a6",
|
||||
"sha256:a2a8b8bcc399edb4347a5ca8b9b87e7524c0967b335fbb08a83c8421489ddee1",
|
||||
"sha256:acf53bc2cf7282ab9b8ba346746afe703474004d9e566ad164c91a7a59f188a4",
|
||||
"sha256:b0be84e5a6209858a1d3e8d1806c46214e867ce1b0fd32e4ea03f4bd8b2e3359",
|
||||
"sha256:b31651d018b23ec463e95cf10070d0b2c548aa950a03d0b559eaa11c7e5a6fa3",
|
||||
"sha256:b78e5afb39941572209f71866aa0b206c12f0109835aa0d601e41552f9b3e620",
|
||||
"sha256:c76aeef1b95aff3905fb2ae2d96e319caca5b76fa41d3470b19d4e4a3a313512",
|
||||
"sha256:dd035edafefee4d573140a76fdc785dc38829fe5a455c4bb12bac8c20cfc3d69",
|
||||
"sha256:dd6fe30bd519694b356cbfcaca9bd5c1737cddd20778c6a581ae20dc8c04def2",
|
||||
"sha256:e5f4e1edcf57ce94e5475fe09e5afa3e3145081318e5fd1a43a6b4539a97e518",
|
||||
"sha256:ec6bc7fe73a938933d4178c9b23c4e0568e43e220aef9472c4f6044bfc6dd0f0",
|
||||
"sha256:f1555ea6d6da108e1999b2463ea1003fe03f29213e459145e70edbaf3e004aaa",
|
||||
"sha256:f5fa5803f47e095d7ad8443d28b01d48c0359484fec1b9d8606d0e3282084bc4",
|
||||
"sha256:f7331dbf301b7289013175087636bbaf5b2405e57259dd2c42fdcc9fcc47325e",
|
||||
"sha256:f9987b0354b06d4df0f4d3e0ec1ae76d7ce7cbca9a2f98c25041eb79eec766f1",
|
||||
"sha256:fd9e830e9d8d89b20ab1e5af09b32d33e1a08ef4c4e14411e559556fd788e6b2"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==6.3.2"
|
||||
},
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
|
||||
"sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
|
||||
],
|
||||
"version": "==1.1.1"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
|
||||
"sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==21.3"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
|
||||
"sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.0.0"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719",
|
||||
"sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==1.11.0"
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
"sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea",
|
||||
"sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.0.7"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db",
|
||||
"sha256:e30905a0c131d3d94b89624a1cc5afec3e0ba2fbdb151867d8e0ebd49850f171"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==7.0.1"
|
||||
},
|
||||
"pytest-cov": {
|
||||
"hashes": [
|
||||
"sha256:578d5d15ac4a25e5f961c938b85a05b09fdaae9deef3bb6de9a6e766622ca7a6",
|
||||
"sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.0.0"
|
||||
},
|
||||
"pytest-html": {
|
||||
"hashes": [
|
||||
"sha256:3ee1cf319c913d19fe53aeb0bc400e7b0bc2dbeb477553733db1dad12eb75ee3",
|
||||
"sha256:b7f82f123936a3f4d2950bc993c2c1ca09ce262c9ae12f9ac763a2401380b455"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.1.1"
|
||||
},
|
||||
"pytest-metadata": {
|
||||
"hashes": [
|
||||
"sha256:576055b8336dd4a9006dd2a47615f76f2f8c30ab12b1b1c039d99e834583523f",
|
||||
"sha256:71b506d49d34e539cc3cfdb7ce2c5f072bea5c953320002c95968e0238f8ecf1"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.11.0"
|
||||
},
|
||||
"tomli": {
|
||||
"hashes": [
|
||||
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
|
||||
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.0.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,71 +1,3 @@
|
||||
from typing import List
|
||||
import cisticola.base
|
||||
import cisticola.scraper.base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class ScraperController:
|
||||
"""Registers scrapers, uses them to generate ScraperResults. Synchronizes
|
||||
everything with database via ORM."""
|
||||
|
||||
def __init__(self):
|
||||
self.scrapers = []
|
||||
self.session = None
|
||||
self.mapper_registry = None
|
||||
|
||||
def register_scraper(self, scraper: cisticola.scraper.base.Scraper):
|
||||
self.scrapers.append(scraper)
|
||||
|
||||
def scrape_channels(self, channels: List[cisticola.base.Channel]):
|
||||
if self.session is None:
|
||||
logger.error("No DB session")
|
||||
return
|
||||
|
||||
for channel in channels:
|
||||
handled = False
|
||||
|
||||
for scraper in self.scrapers:
|
||||
if scraper.can_handle(channel):
|
||||
session = self.session()
|
||||
handled = True
|
||||
added = 0
|
||||
|
||||
# get most recent post
|
||||
session = self.session()
|
||||
rows = session.query(cisticola.base.ScraperResult).where(
|
||||
cisticola.base.ScraperResult.channel == channel.id).order_by(
|
||||
cisticola.base.ScraperResult.date.desc()).limit(1).all()
|
||||
|
||||
if len(rows) == 1:
|
||||
since = rows[0]
|
||||
else:
|
||||
since = None
|
||||
|
||||
posts = scraper.get_posts(channel, since=since)
|
||||
|
||||
for post in posts:
|
||||
session.add(post)
|
||||
added += 1
|
||||
|
||||
session.commit()
|
||||
logger.info(
|
||||
f"{scraper} found {added} new posts from {channel}")
|
||||
break
|
||||
|
||||
if not handled:
|
||||
logger.warning(f"No handler found for Channel {channel}")
|
||||
|
||||
def connect_to_db(self, engine):
|
||||
# create tables
|
||||
cisticola.base.mapper_registry.metadata.create_all(bind=engine)
|
||||
|
||||
self.session = sessionmaker()
|
||||
self.session.configure(bind=engine)
|
||||
|
||||
|
||||
class ETLController:
|
||||
"""This class will transform the raw_data tables into a format more conducive to analysis."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
from . import base
|
||||
from . import scraper
|
||||
from . import transformer
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
from typing import List
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.orm import registry
|
||||
from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey
|
||||
|
||||
mapper_registry = registry()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScraperResult:
|
||||
"""A minimally processed result from a scraper"""
|
||||
@@ -84,4 +85,4 @@ analysis_table = Table('analysis', mapper_registry.metadata,
|
||||
Column('author_username', String)
|
||||
)
|
||||
|
||||
mapper_registry.map_imperatively(TransformedResult, analysis_table)
|
||||
mapper_registry.map_imperatively(TransformedResult, analysis_table)
|
||||
@@ -0,0 +1,8 @@
|
||||
from .base import Scraper, ScraperController
|
||||
from .bitchute import BitchuteScraper
|
||||
from .gab import GabScraper
|
||||
from .gettr import GettrScraper
|
||||
from .odysee import OdyseeScraper
|
||||
from .rumble import RumbleScraper
|
||||
from .telegram_snscrape import TelegramSnscrapeScraper
|
||||
from .twitter import TwitterScraper
|
||||
@@ -1,10 +1,16 @@
|
||||
from typing import Generator
|
||||
import cisticola.base
|
||||
import requests
|
||||
from typing import Generator, Tuple, List
|
||||
import os
|
||||
import boto3
|
||||
from io import BytesIO
|
||||
from urllib.parse import urlparse
|
||||
import tempfile
|
||||
|
||||
import requests
|
||||
import boto3
|
||||
from loguru import logger
|
||||
import ffmpeg
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from cisticola.base import Channel, ScraperResult, mapper_registry
|
||||
|
||||
class Scraper:
|
||||
__version__ = "Scraper 0.0.0"
|
||||
@@ -19,31 +25,64 @@ class Scraper:
|
||||
'DO_SPACES_KEY'),
|
||||
aws_secret_access_key=os.getenv('DO_SPACES_SECRET'))
|
||||
|
||||
self.headers = {
|
||||
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0'}
|
||||
|
||||
pass
|
||||
|
||||
def __str__(self):
|
||||
return self.__version__
|
||||
|
||||
def archive_media(self, url: str, key: str = None) -> str:
|
||||
def url_to_key(self, url: str, content_type: str) -> str:
|
||||
key = urlparse(url).path.split('/')[-1]
|
||||
return key
|
||||
|
||||
def url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
|
||||
|
||||
n_retries = 0
|
||||
r = requests.get(url)
|
||||
|
||||
r = requests.get(url, headers = self.headers)
|
||||
|
||||
while r.status_code != 200 and n_retries < 5:
|
||||
logger.warning(f"{n_retries}/5: Request for {url} failed")
|
||||
n_retries += 1
|
||||
r = requests.get(url)
|
||||
r = requests.get(url, headers = self.headers)
|
||||
|
||||
if r.status_code != 200:
|
||||
logger.error(f"Could not fetch URL {url}")
|
||||
return url
|
||||
|
||||
blob = r.content
|
||||
|
||||
content_type = r.headers.get('Content-Type')
|
||||
|
||||
if key is None:
|
||||
key = url.split('/')[-1]
|
||||
key = key.split('?')[0]
|
||||
key = self.url_to_key(url, content_type)
|
||||
|
||||
return blob, content_type, key
|
||||
|
||||
def m3u8_url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
|
||||
|
||||
content_type = 'video/mp4'
|
||||
ext = '.' + content_type.split('/')[-1]
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix = ext) as temp_file:
|
||||
|
||||
(
|
||||
ffmpeg
|
||||
.input(url)
|
||||
.output(temp_file.name, vcodec='copy')
|
||||
.global_args('-loglevel', 'error')
|
||||
.run(overwrite_output=True))
|
||||
|
||||
temp_file.seek(0)
|
||||
blob = temp_file.read()
|
||||
|
||||
if key is None:
|
||||
key = self.url_to_key(url = url, content_type = content_type)
|
||||
|
||||
return blob, content_type, key
|
||||
|
||||
def archive_media(self, blob: bytes, content_type: str, key: str) -> str:
|
||||
|
||||
filename = self.__version__.replace(' ', '_') + '/' + key
|
||||
|
||||
@@ -54,8 +93,77 @@ class Scraper:
|
||||
|
||||
return archived_url
|
||||
|
||||
def can_handle(self, channel: cisticola.base.Channel) -> bool:
|
||||
def can_handle(self, channel: Channel) -> bool:
|
||||
pass
|
||||
|
||||
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
|
||||
pass
|
||||
|
||||
|
||||
class ScraperController:
|
||||
"""Registers scrapers, uses them to generate ScraperResults. Synchronizes
|
||||
everything with database via ORM."""
|
||||
|
||||
def __init__(self):
|
||||
self.scrapers = []
|
||||
self.session = None
|
||||
self.mapper_registry = None
|
||||
|
||||
def register_scraper(self, scraper: Scraper):
|
||||
self.scrapers.append(scraper)
|
||||
|
||||
def register_scrapers(self, scraper: List[Scraper]):
|
||||
self.scrapers.extend(scraper)
|
||||
|
||||
def scrape_channels(self, channels: List[Channel]):
|
||||
if self.session is None:
|
||||
logger.error("No DB session")
|
||||
return
|
||||
|
||||
for channel in channels:
|
||||
handled = False
|
||||
|
||||
for scraper in self.scrapers:
|
||||
if scraper.can_handle(channel):
|
||||
session = self.session()
|
||||
handled = True
|
||||
added = 0
|
||||
|
||||
# get most recent post
|
||||
session = self.session()
|
||||
rows = session.query(ScraperResult).where(
|
||||
ScraperResult.channel == channel.id).order_by(
|
||||
ScraperResult.date.desc()).limit(1).all()
|
||||
|
||||
if len(rows) == 1:
|
||||
since = rows[0]
|
||||
else:
|
||||
since = None
|
||||
|
||||
posts = scraper.get_posts(channel, since=since)
|
||||
|
||||
for post in posts:
|
||||
session.add(post)
|
||||
added += 1
|
||||
|
||||
session.commit()
|
||||
logger.info(
|
||||
f"{scraper} found {added} new posts from {channel}")
|
||||
break
|
||||
|
||||
if not handled:
|
||||
logger.warning(f"No handler found for Channel {channel}")
|
||||
|
||||
def connect_to_db(self, engine):
|
||||
# create tables
|
||||
mapper_registry.metadata.create_all(bind=engine)
|
||||
|
||||
self.session = sessionmaker()
|
||||
self.session.configure(bind=engine)
|
||||
|
||||
|
||||
class ETLController:
|
||||
"""This class will transform the raw_data tables into a format more conducive to analysis."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@@ -9,53 +9,54 @@ from typing import Generator
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import cisticola.base
|
||||
|
||||
class BitchuteScraper(cisticola.scraper.Scraper):
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
class BitchuteScraper(Scraper):
|
||||
"""An implementation of a Scraper for Bitchute, using classes from the 4cat
|
||||
library"""
|
||||
__version__ = "BitchuteScraper 0.0.1"
|
||||
|
||||
# TODO snscrape should be able to scrape from user ID alone, but there is
|
||||
# currently a bug/other issue, so it is extracting the username from URL
|
||||
def get_username_from_url(url):
|
||||
username = url.split('bitchute.com/channel/')[-1].strip('/')
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
session = requests.Session()
|
||||
session.headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0"
|
||||
session.headers.update(self.headers)
|
||||
request = session.get("https://www.bitchute.com/search")
|
||||
csrftoken = BeautifulSoup(request.text, 'html.parser').findAll(
|
||||
"input", {"name": "csrfmiddlewaretoken"})[0].get("value")
|
||||
time.sleep(0.25)
|
||||
|
||||
# Don't scrape comment information
|
||||
#TODO implement framework for processing and storing comments
|
||||
detail = 'basic'
|
||||
detail = 'comments'
|
||||
|
||||
posts = []
|
||||
username = BitchuteScraper.get_username_from_url(channel.url)
|
||||
scraper = get_videos_user(session, username, csrftoken, detail)
|
||||
|
||||
for i, post in enumerate(scraper):
|
||||
for post in scraper:
|
||||
|
||||
if since is not None and post['timestamp'] <= since.date_archived.timestamp():
|
||||
print( f'\n\nBREAK ON VIDEO: {i}\n\n')
|
||||
if since is not None and datetime.fromtimestamp(post['timestamp']) <= since.date:
|
||||
break
|
||||
|
||||
posts.append(cisticola.base.ScraperResult(
|
||||
archived_urls = {}
|
||||
|
||||
if 'video_url' in post:
|
||||
url = post['video_url']
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
platform="Bitchute",
|
||||
channel=channel.id,
|
||||
platform_id=post['id'],
|
||||
date=datetime.fromtimestamp(post['timestamp']),
|
||||
date_archived=datetime.now(),
|
||||
raw_data=json.dumps(post)))
|
||||
|
||||
return posts
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Bitchute" and BitchuteScraper.get_username_from_url(channel.url) is not None:
|
||||
|
||||
53
cisticola/scraper/gab.py
Normal file
53
cisticola/scraper/gab.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from datetime import datetime
|
||||
import json
|
||||
from typing import Generator
|
||||
|
||||
from garc import Garc
|
||||
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
class GabScraper(Scraper):
|
||||
"""An implementation of a Scraper for Gab, using GARC library"""
|
||||
__version__ = "GabScraper 0.0.1"
|
||||
|
||||
def get_username_from_url(url):
|
||||
username = url.split('https://gab.com/')[-1]
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
|
||||
client = Garc(profile = 'main')
|
||||
username = GabScraper.get_username_from_url(channel.url)
|
||||
|
||||
scraper = client.userposts(username)
|
||||
|
||||
for post in scraper:
|
||||
if since is not None and datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")) <= since.date:
|
||||
break
|
||||
|
||||
media_urls = []
|
||||
archived_urls = {}
|
||||
|
||||
media_urls.extend([p['url'] for p in post['media_attachments']])
|
||||
|
||||
if post.get('repost') is not None:
|
||||
media_urls.extend([p['url'] for p in post['repost']['media_attachments']])
|
||||
|
||||
for url in media_urls:
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
platform="Gab",
|
||||
channel=channel.id,
|
||||
platform_id=post['id'],
|
||||
date=datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo = None),
|
||||
date_archived=datetime.now(),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Gab" and GabScraper.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
@@ -1,11 +1,13 @@
|
||||
import cisticola.base
|
||||
import cisticola.scraper.base
|
||||
from datetime import datetime
|
||||
import json
|
||||
from typing import Generator
|
||||
from typing import Generator, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from gogettr import PublicClient
|
||||
|
||||
class GettrScraper(cisticola.scraper.base.Scraper):
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
class GettrScraper(Scraper):
|
||||
"""An implementation of a Scraper for Gettr, using gogettr library"""
|
||||
__version__ = "GettrScraper 0.0.1"
|
||||
|
||||
@@ -16,7 +18,7 @@ class GettrScraper(cisticola.scraper.base.Scraper):
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
|
||||
client = PublicClient()
|
||||
username = GettrScraper.get_username_from_url(channel.url)
|
||||
scraper = client.user_activity(username=username, type="posts")
|
||||
@@ -30,19 +32,23 @@ class GettrScraper(cisticola.scraper.base.Scraper):
|
||||
if 'imgs' in post:
|
||||
for img in post['imgs']:
|
||||
url = "https://media.gettr.com/" + img
|
||||
archived_url = self.archive_media(url)
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[img] = archived_url
|
||||
|
||||
if 'main' in post:
|
||||
archived_url = self.archive_media("https://media.gettr.com/" + post['main'])
|
||||
url = "https://media.gettr.com/" + post['main']
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[post['main']] = archived_url
|
||||
|
||||
# TODO this is just archiving the playlist file, not the actual video
|
||||
if 'vid' in post:
|
||||
archived_url = self.archive_media("https://media.gettr.com/" + post['vid'])
|
||||
url = "https://media.gettr.com/" + post['vid']
|
||||
media_blob, content_type, key = self.m3u8_url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[post['vid']] = archived_url
|
||||
|
||||
yield cisticola.base.ScraperResult(
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
platform="Gettr",
|
||||
channel=channel.id,
|
||||
@@ -55,3 +61,8 @@ class GettrScraper(cisticola.scraper.base.Scraper):
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Gettr" and GettrScraper.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
|
||||
def url_to_key(self, url: str, content_type: str) -> str:
|
||||
ext = '.' + content_type.split('/')[-1]
|
||||
key = urlparse(url).path.split('/')[-2] + ext
|
||||
return key
|
||||
78
cisticola/scraper/odysee.py
Normal file
78
cisticola/scraper/odysee.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from datetime import datetime
|
||||
import json
|
||||
from typing import Generator
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from polyphemus.base import OdyseeChannel
|
||||
import requests
|
||||
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
class OdyseeScraper(Scraper):
|
||||
"""An implementation of a Scraper for Odysee, using polyphemus library"""
|
||||
__version__ = "OdyseeScraper 0.0.1"
|
||||
|
||||
def get_username_from_url(url):
|
||||
|
||||
username = url.split('odysee.com/')[-1].strip('@').split(':')[0]
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
username = OdyseeScraper.get_username_from_url(channel.url)
|
||||
odysee_channel = OdyseeChannel(channel_name = username)
|
||||
|
||||
all_videos = odysee_channel.get_all_videos()
|
||||
|
||||
for video in all_videos:
|
||||
if since is not None and datetime.fromtimestamp(video['created']) <= since.date:
|
||||
break
|
||||
|
||||
archived_urls = {}
|
||||
url = video.info['streaming_url']
|
||||
|
||||
# Check if file is a video file or an m3u8 file
|
||||
r = requests.head(url)
|
||||
if r.headers['Content-Type'] == 'text/html; charset=utf-8':
|
||||
media_blob, content_type, key = self.m3u8_url_to_blob(url)
|
||||
else:
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
|
||||
all_comments = video.get_all_comments()
|
||||
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
platform="Odysee",
|
||||
channel=channel.id,
|
||||
platform_id=video.info['claim_id'],
|
||||
date=datetime.fromtimestamp(video.info['created']),
|
||||
date_archived=datetime.now(),
|
||||
raw_data=json.dumps(video.info),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
for comment in all_comments:
|
||||
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
platform="Odysee",
|
||||
channel=channel.id,
|
||||
platform_id=comment.info['claim_id'],
|
||||
date=datetime.fromtimestamp(comment.info['created']),
|
||||
date_archived=datetime.now(),
|
||||
raw_data=json.dumps(comment.info),
|
||||
archived_urls={})
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Odysee" and OdyseeScraper.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
|
||||
def url_to_key(self, url: str, content_type: str) -> str:
|
||||
key = urlparse(url).path.split('/')[-2]
|
||||
ext = content_type.split('/')[-1]
|
||||
|
||||
return f'{key}.{ext}'
|
||||
143
cisticola/scraper/rumble.py
Normal file
143
cisticola/scraper/rumble.py
Normal file
@@ -0,0 +1,143 @@
|
||||
from datetime import datetime
|
||||
import json
|
||||
from typing import Generator, Tuple
|
||||
import tempfile
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import youtube_dl
|
||||
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
BASE_URL = 'https://rumble.com'
|
||||
|
||||
class RumbleScraper(Scraper):
|
||||
"""An implementation of a Scraper for Rumble, using custom functions"""
|
||||
__version__ = "RumbleScraper 0.0.1"
|
||||
|
||||
def get_username_from_url(url):
|
||||
username = url.split('https://rumble.com/c/')[1]
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
username = RumbleScraper.get_username_from_url(channel.url)
|
||||
scraper = get_channel_videos(username)
|
||||
|
||||
for post in scraper:
|
||||
if since is not None and datetime.fromtimestamp(post['cdate']*0.001) <= since.date:
|
||||
break
|
||||
|
||||
archived_urls = {}
|
||||
|
||||
url = post['media_url']
|
||||
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[post['media_url']] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
platform="Rumble",
|
||||
channel=channel.id,
|
||||
platform_id=post['media_url'].split('/')[-2],
|
||||
date=datetime.fromisoformat(post['datetime']).replace(tzinfo=None),
|
||||
date_archived=datetime.now(),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Rumble" and RumbleScraper.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
|
||||
def url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
|
||||
|
||||
content_type = 'video/mp4'
|
||||
ext = '.' + content_type.split('/')[-1]
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
ydl_opts = {
|
||||
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
|
||||
"merge_output_format": "mp4",
|
||||
"outtmpl": f"{temp_dir}/%(id)s.%(ext)s",
|
||||
"noplaylist": True,
|
||||
'quiet': True,
|
||||
"verbose": False,}
|
||||
ydl = youtube_dl.YoutubeDL(ydl_opts)
|
||||
|
||||
try:
|
||||
meta = ydl.extract_info(
|
||||
url,
|
||||
download=True,)
|
||||
except youtube_dl.utils.DownloadError as e:
|
||||
raise e
|
||||
else:
|
||||
video_id = meta["id"]
|
||||
video_ext = meta["ext"]
|
||||
|
||||
with open(f"{temp_dir}/{video_id}.{video_ext}", "rb") as f:
|
||||
blob = f.read()
|
||||
|
||||
if key is None:
|
||||
key = urlparse(url).path.split('/')[-2] + ext
|
||||
|
||||
return blob, content_type, key
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
def get_media_url(url):
|
||||
|
||||
r = requests.get(url)
|
||||
soup = BeautifulSoup(r.content, features = 'lxml')
|
||||
|
||||
script = json.loads(''.join(soup.find('script', {'type':'application/ld+json'}).text))
|
||||
media_url = script[0]['embedUrl']
|
||||
|
||||
return media_url
|
||||
|
||||
def process_video(video):
|
||||
|
||||
rumble_soup = video.find('span', {'class' : 'video-item--rumbles'})
|
||||
if rumble_soup is None:
|
||||
rumbles = '0'
|
||||
else:
|
||||
rumbles = rumble_soup['data-value']
|
||||
|
||||
info = {
|
||||
'title' : video.find('h3').text,
|
||||
'thumbnail' : video.find('img')['src'],
|
||||
'link' : BASE_URL + video.find('a', href = True)['href'],
|
||||
'views' : video.find('span', {'class' : 'video-item--views'})['data-value'],
|
||||
'rumbles' : rumbles,
|
||||
'duration' : video.find('span', {'class' : 'video-item--duration'})['data-value'],
|
||||
'datetime' : video.find('time')['datetime']}
|
||||
|
||||
info['media_url'] = get_media_url(info['link'])
|
||||
|
||||
return info
|
||||
|
||||
def get_channel_videos(channel):
|
||||
|
||||
page = 1
|
||||
channel_url = f'{BASE_URL}/c/{channel}?page='
|
||||
|
||||
while True:
|
||||
url = channel_url + str(page)
|
||||
r = requests.get(url)
|
||||
|
||||
if r.status_code == 404:
|
||||
break
|
||||
|
||||
soup = BeautifulSoup(r.content, features = 'lxml')
|
||||
|
||||
video_list = soup.find_all('li', {'class' : 'video-listing-entry'})
|
||||
|
||||
for video in video_list:
|
||||
yield process_video(video)
|
||||
|
||||
page += 1
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
@@ -1,18 +1,19 @@
|
||||
import cisticola.base
|
||||
import cisticola.scraper.base
|
||||
from typing import Generator
|
||||
import snscrape.modules
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import snscrape.modules
|
||||
|
||||
class TelegramSnscrapeScraper(cisticola.scraper.base.Scraper):
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
class TelegramSnscrapeScraper(Scraper):
|
||||
__version__ = "TelegramSnscrapeScraper 0.0.1"
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Telegram" and channel.public and not channel.chat:
|
||||
return True
|
||||
|
||||
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
|
||||
scr = snscrape.modules.telegram.TelegramChannelScraper(
|
||||
channel.screenname)
|
||||
|
||||
@@ -25,14 +26,16 @@ class TelegramSnscrapeScraper(cisticola.scraper.base.Scraper):
|
||||
archived_urls = {}
|
||||
|
||||
for image_url in post.images:
|
||||
archive_url = self.archive_media(image_url)
|
||||
archived_urls[image_url] = archive_url
|
||||
media_blob, content_type, key = self.url_to_blob(image_url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[image_url] = archived_url
|
||||
|
||||
if post.video:
|
||||
video_archive_url = self.archive_media(post.video)
|
||||
archived_urls[post.video] = video_archive_url
|
||||
media_blob, content_type, key = self.url_to_blob(post.video)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[post.video] = archived_url
|
||||
|
||||
yield cisticola.base.ScraperResult(
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
platform="Telegram",
|
||||
channel=channel.id,
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
import cisticola.base
|
||||
import cisticola.scraper.base
|
||||
from datetime import datetime, timezone
|
||||
from typing import Generator
|
||||
import snscrape.modules
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
from snscrape.modules.twitter import TwitterProfileScraper, Video, Gif, Photo
|
||||
from loguru import logger
|
||||
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
class TwitterScraper(cisticola.scraper.base.Scraper):
|
||||
class TwitterScraper(Scraper):
|
||||
"""An implementation of a Scraper for Twitter, using snscrape library"""
|
||||
__version__ = "TwitterScraper 0.0.1"
|
||||
|
||||
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
|
||||
scraper = snscrape.modules.twitter.TwitterProfileScraper(channel.platform_id)
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
|
||||
scraper = TwitterProfileScraper(channel.platform_id)
|
||||
|
||||
first = True
|
||||
|
||||
@@ -28,23 +30,24 @@ class TwitterScraper(cisticola.scraper.base.Scraper):
|
||||
|
||||
if tweet.media:
|
||||
for media in tweet.media:
|
||||
if type(media) == snscrape.modules.twitter.Video:
|
||||
if type(media) == Video:
|
||||
variant = max(
|
||||
[v for v in media.variants if v.bitrate], key=lambda v: v.bitrate)
|
||||
url = variant.url
|
||||
elif type(media) == snscrape.modules.twitter.Gif:
|
||||
elif type(media) == Gif:
|
||||
url = media.variants[0].url
|
||||
elif type(media) == snscrape.modules.twitter.Photo:
|
||||
elif type(media) == Photo:
|
||||
url = media.fullUrl
|
||||
else:
|
||||
logger.warning(f"Could not get media URL of {media}")
|
||||
url = None
|
||||
|
||||
if url is not None:
|
||||
archived_url = self.archive_media(url)
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
|
||||
yield cisticola.base.ScraperResult(
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
platform="Twitter",
|
||||
channel=channel.id,
|
||||
@@ -57,3 +60,16 @@ class TwitterScraper(cisticola.scraper.base.Scraper):
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Twitter" and channel.platform_id:
|
||||
return True
|
||||
|
||||
def url_to_key(self, url: str, content_type: str) -> str:
|
||||
parsed_url = urlparse(url)
|
||||
queries = parse_qs(parsed_url.query)
|
||||
|
||||
# TODO might require additional statements for other media formats
|
||||
if 'jpg' in queries.get('format', []):
|
||||
ext = '.jpg'
|
||||
elif parsed_url.path.endswith('.mp4'):
|
||||
ext = ''
|
||||
|
||||
key = parsed_url.path.split('/')[-1] + ext
|
||||
return key
|
||||
@@ -1,16 +1,2 @@
|
||||
import cisticola.base
|
||||
|
||||
class Transformer:
|
||||
"""Interface class for transformers"""
|
||||
|
||||
__version__ = "Transformer 0.0.0"
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def can_handle(data: cisticola.base.ScraperResult) -> bool:
|
||||
pass
|
||||
|
||||
def transform(data: cisticola.base.ScraperResult) -> cisticola.base.TransformedResult:
|
||||
pass
|
||||
|
||||
from . import base
|
||||
from .twitter import TwitterTransformer
|
||||
16
cisticola/transformer/base.py
Normal file
16
cisticola/transformer/base.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from cisticola.base import ScraperResult, TransformedResult
|
||||
|
||||
class Transformer:
|
||||
"""Interface class for transformers"""
|
||||
|
||||
__version__ = "Transformer 0.0.0"
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def can_handle(data: ScraperResult) -> bool:
|
||||
pass
|
||||
|
||||
def transform(data: ScraperResult) -> TransformedResult:
|
||||
pass
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
import cisticola.transformer
|
||||
import cisticola.base
|
||||
import json
|
||||
|
||||
from cisticola.transformer.base import Transformer
|
||||
from cisticola.base import ScraperResult, TransformedResult
|
||||
|
||||
class TwitterTransformer(cisticola.transformer.Transformer):
|
||||
class TwitterTransformer(Transformer):
|
||||
"""A Twitter specific ScraperResult, with a method ETL/transforming"""
|
||||
|
||||
__version__ = "TwitterTransformer 0.0.1"
|
||||
|
||||
def transform(self, data: cisticola.base.ScraperResult) -> cisticola.base.TransformedResult:
|
||||
def transform(self, data: ScraperResult) -> TransformedResult:
|
||||
raw = json.loads(data.raw_data)
|
||||
|
||||
transformed = cisticola.base.TransformedResult(
|
||||
transformed = TransformedResult(
|
||||
raw_id=data.id,
|
||||
scraper=data.scraper,
|
||||
transformer=self.__version__,
|
||||
|
||||
131
examples/russian_telegram_ingest.py
Normal file
131
examples/russian_telegram_ingest.py
Normal file
@@ -0,0 +1,131 @@
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import (
|
||||
ScraperController,
|
||||
TelegramSnscrapeScraper)
|
||||
|
||||
test_channels = [
|
||||
Channel(
|
||||
id=0,
|
||||
name="QAnon Россия",
|
||||
platform_id=-1001319637748,
|
||||
category="Qanon",
|
||||
followers=94048,
|
||||
platform="Telegram",
|
||||
url="https://t.me/qanonrus",
|
||||
screenname="qanonrus",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=1,
|
||||
name="The Great Awakening | Q",
|
||||
platform_id=-1001325597521,
|
||||
category="Qanon",
|
||||
followers=5715,
|
||||
platform="Telegram",
|
||||
url="https://t.me/greatawakin",
|
||||
screenname="greatawakin",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=2,
|
||||
name="Великое Пробуждение",
|
||||
platform_id=-1001285898079,
|
||||
category="Qanon",
|
||||
followers=5861,
|
||||
platform="Telegram",
|
||||
url="https://t.me/greatawakeningrus",
|
||||
screenname="greatawakeningrus",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=3,
|
||||
name="T🕊Редакция Президент Гордон🕊",
|
||||
platform_id=-1001101170442,
|
||||
category="Qanon",
|
||||
followers=5743,
|
||||
platform="Telegram",
|
||||
url="https://t.me/prezidentgordonteam",
|
||||
screenname="prezidentgordonteam",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=4,
|
||||
name="ПРОЕКТ АВРОРА",
|
||||
platform_id=-1001279171101,
|
||||
category="Qanon",
|
||||
followers=5930,
|
||||
platform="Telegram",
|
||||
url="https://t.me/project_aurora",
|
||||
screenname="project_aurora",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=5,
|
||||
name="Сон Разума",
|
||||
platform_id=-1001202338312,
|
||||
category="Qanon",
|
||||
followers=27099,
|
||||
platform="Telegram",
|
||||
url="https://t.me/error_288",
|
||||
screenname="error_288",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=6,
|
||||
name="Пробуждающий Мир - официальный канал",
|
||||
platform_id=-1001492521207,
|
||||
category="Qanon",
|
||||
followers=19097,
|
||||
platform="Telegram",
|
||||
url="https://t.me/promirru",
|
||||
screenname="promirru",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=7,
|
||||
name="ЦЕЛЬНОЗОР",
|
||||
platform_id=-1001642737506,
|
||||
category="Qanon",
|
||||
followers=13654,
|
||||
platform="Telegram",
|
||||
url="https://t.me/tselnozor",
|
||||
screenname="tselnozor",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),]
|
||||
|
||||
controller = ScraperController()
|
||||
|
||||
telegram = TelegramSnscrapeScraper()
|
||||
controller.register_scraper(telegram)
|
||||
|
||||
engine = create_engine('sqlite:///russian_telegram.db')
|
||||
controller.connect_to_db(engine)
|
||||
|
||||
controller.scrape_channels(test_channels)
|
||||
|
||||
13
pytest.ini
Normal file
13
pytest.ini
Normal file
@@ -0,0 +1,13 @@
|
||||
[pytest]
|
||||
minversion =
|
||||
6.0.2
|
||||
testpaths =
|
||||
tests/
|
||||
python_files =
|
||||
*.py
|
||||
addopts =
|
||||
-vvv
|
||||
--cov='cisticola'
|
||||
--cov-report html:reports/coverage
|
||||
--html='reports/tests.html'
|
||||
--self-contained-html
|
||||
149
test.py
149
test.py
@@ -1,44 +1,127 @@
|
||||
import cisticola
|
||||
import cisticola.scraper.telegram_snscrape
|
||||
import cisticola.scraper.twitter
|
||||
import cisticola.scraper.gettr
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import (
|
||||
ScraperController,
|
||||
BitchuteScraper,
|
||||
GabScraper,
|
||||
GettrScraper,
|
||||
OdyseeScraper,
|
||||
RumbleScraper,
|
||||
TelegramSnscrapeScraper,
|
||||
TwitterScraper)
|
||||
|
||||
test_channels = [
|
||||
cisticola.base.Channel(id=0, name="Logan Williams (test)", platform_id=891729132,
|
||||
category="test", followers=None, platform="Twitter",
|
||||
url="https://twitter.com/obtusatum", screenname="obtusatum", country="US",
|
||||
influencer=None, public=True, chat=False,
|
||||
notes=""),
|
||||
cisticola.base.Channel(id=1, name="JQHN SPARTAN", platform_id=-1001181961026,
|
||||
category="qanon", followers=None, platform="Telegram",
|
||||
url="https://t.me/jqhnspartan", screenname="jqhnspartan", country="FR",
|
||||
influencer="JQNH SPARTAN", public=True, chat=False, notes=""),
|
||||
cisticola.base.Channel(id=2, name="LizardRepublic", platform_id='lizardrepublic',
|
||||
category="qanon", followers=None, platform="Gettr",
|
||||
url="https://www.gettr.com/user/lizardrepublic", screenname="lizardrepublic", country="US",
|
||||
influencer=None, public=True, chat=False, notes=""),
|
||||
cisticola.base.Channel(id=3, name="Patriot Front", platform_id='OVv9QZL4sEsC',
|
||||
category="nazi", followers=None, platform="Bitchute",
|
||||
url="https://www.bitchute.com/channel/OVv9QZL4sEsC/", screenname=None, country="US",
|
||||
influencer=None, public=True, chat=False, notes=""),]
|
||||
Channel(
|
||||
id=0,
|
||||
name="Logan Williams (test)",
|
||||
platform_id=891729132,
|
||||
category="test",
|
||||
followers=None,
|
||||
platform="Twitter",
|
||||
url="https://twitter.com/obtusatum",
|
||||
screenname="obtusatum",
|
||||
country="US",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=1,
|
||||
name="South West Ohio Proud Boys (test)",
|
||||
platform_id=-1001276612436,
|
||||
category="test",
|
||||
followers=None,
|
||||
platform="Telegram",
|
||||
url="https://t.me/SouthwestOhioPB",
|
||||
screenname="SouthwestOhioPB",
|
||||
country="US",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=2,
|
||||
name="LizardRepublic (test)",
|
||||
platform_id='lizardrepublic',
|
||||
category="test",
|
||||
followers=None,
|
||||
platform="Gettr",
|
||||
url="https://www.gettr.com/user/lizardrepublic",
|
||||
screenname="lizardrepublic",
|
||||
country="US",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=4,
|
||||
name="bestonlinejewelrystoresusa@gmail.com (test)", platform_id='bestonlinejewelrystoresusagmailcom',
|
||||
category="test",
|
||||
followers=None,
|
||||
platform="Bitchute",
|
||||
url="https://www.bitchute.com/channel/bestonlinejewelrystoresusagmailcom/", screenname=None,
|
||||
country="US",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=5,
|
||||
name="Mak1n' Bacon (test)",
|
||||
platform_id='Mak1nBacon',
|
||||
category="test",
|
||||
followers=None,
|
||||
platform="Odysee",
|
||||
url="https://odysee.com/@Mak1nBacon",
|
||||
screenname='Mak1nBacon',
|
||||
country="US",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=6,
|
||||
name="Capt. Marc Simon (test)",
|
||||
platform_id='marc_capt',
|
||||
category="test",
|
||||
followers=None,
|
||||
platform="Gab",
|
||||
url="https://gab.com/marc_capt",
|
||||
screenname='marc_capt',
|
||||
country="CA",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=7,
|
||||
name="we are uploading videos wow products and problem solving products.please share like and subscribe our channelwe are uploading videos wow products and problem solving products.please share like and subscribe our channel", platform_id='c-916305',
|
||||
category="test",
|
||||
followers=None,
|
||||
platform="Rumble",
|
||||
url="https://rumble.com/c/c-916305",
|
||||
screenname='we are uploading',
|
||||
country="CA",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes="")]
|
||||
|
||||
controller = ScraperController()
|
||||
|
||||
controller = cisticola.ScraperController()
|
||||
scrapers = [
|
||||
BitchuteScraper(),
|
||||
GabScraper(),
|
||||
GettrScraper(),
|
||||
OdyseeScraper(),
|
||||
RumbleScraper(),
|
||||
TelegramSnscrapeScraper(),
|
||||
TwitterScraper()]
|
||||
|
||||
twitter = cisticola.scraper.twitter.TwitterScraper()
|
||||
controller.register_scraper(twitter)
|
||||
|
||||
telegram = cisticola.scraper.telegram_snscrape.TelegramSnscrapeScraper()
|
||||
controller.register_scraper(telegram)
|
||||
|
||||
gettr = cisticola.scraper.gettr.GettrScraper()
|
||||
controller.register_scraper(gettr)
|
||||
controller.register_scrapers(scrapers)
|
||||
|
||||
engine = create_engine('sqlite:///test3.db')
|
||||
controller.connect_to_db(engine)
|
||||
|
||||
controller.scrape_channels(test_channels)
|
||||
|
||||
controller.scrape_channels(test_channels)
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
147
tests/conftest.py
Normal file
147
tests/conftest.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import pytest
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from cisticola.scraper import ScraperController
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
BITCHUTE_CHANNEL_KWARGS = {
|
||||
'id': 0,
|
||||
'name': 'bestonlinejewelrystoresusa@gmail.com (test)',
|
||||
'platform_id': 'bestonlinejewelrystoresusagmailcom',
|
||||
'category': 'test',
|
||||
'followers': None,
|
||||
'platform': 'Bitchute',
|
||||
'url': 'https://www.bitchute.com/channel/bestonlinejewelrystoresusagmailcom/',
|
||||
'screenname': None,
|
||||
'country': 'US',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
GAB_CHANNEL_KWARGS = {
|
||||
'id': 1,
|
||||
'name': 'Capt. Marc Simon (test)',
|
||||
'platform_id': 'marc_capt',
|
||||
'category': 'test',
|
||||
'followers': None,
|
||||
'platform': 'Gab',
|
||||
'url': 'https://gab.com/marc_capt',
|
||||
'screenname': 'marc_capt',
|
||||
'country': 'CA',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
GETTR_CHANNEL_KWARGS = {
|
||||
'id': 2,
|
||||
'name': 'LizardRepublic (test)',
|
||||
'platform_id': 'lizardrepublic',
|
||||
'category': 'test',
|
||||
'followers': None,
|
||||
'platform': 'Gettr',
|
||||
'url': 'https://www.gettr.com/user/lizardrepublic',
|
||||
'screenname': 'lizardrepublic',
|
||||
'country': 'US',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
ODYSEE_CHANNEL_KWARGS = {
|
||||
'id': 3,
|
||||
'name': "Mak1n' Bacon (test)",
|
||||
'platform_id': 'Mak1nBacon',
|
||||
'category': 'test',
|
||||
'followers': None,
|
||||
'platform': 'Odysee',
|
||||
'url': 'https://odysee.com/@Mak1nBacon',
|
||||
'screenname': 'Mak1nBacon',
|
||||
'country': 'US',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
RUMBLE_CHANNEL_KWARGS = {
|
||||
'id': 4,
|
||||
'name': 'we are uploading videos wow products',
|
||||
'platform_id': 'c-916305',
|
||||
'category': 'test',
|
||||
'followers': None,
|
||||
'platform': 'Rumble',
|
||||
'url': 'https://rumble.com/c/c-916305',
|
||||
'screenname': 'we are uploading',
|
||||
'country': 'CA',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
TELEGRAM_SNSCRAPE_CHANNEL_KWARGS = {
|
||||
'id': 5,
|
||||
'name': 'South West Ohio Proud Boys (test)',
|
||||
'platform_id': -1001276612436,
|
||||
'category': 'test',
|
||||
'followers': None,
|
||||
'platform': 'Telegram',
|
||||
'url': 'https://t.me/SouthwestOhioPB',
|
||||
'screenname': 'SouthwestOhioPB',
|
||||
'country': 'US',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
TWITTER_CHANNEL_KWARGS = {
|
||||
'id': 5,
|
||||
'name': 'Logan Williams (test)',
|
||||
'platform_id': 891729132,
|
||||
'category': 'test',
|
||||
'followers': None,
|
||||
'platform': 'Twitter',
|
||||
'url': 'https://twitter.com/obtusatum',
|
||||
'screenname': 'obtusatum',
|
||||
'country': 'US',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
@pytest.fixture(scope='package')
|
||||
def controller(tmpdir_factory):
|
||||
|
||||
"""Initialize ScraperController and SQLite database file to be used for all
|
||||
tests in the package.
|
||||
"""
|
||||
|
||||
file = tmpdir_factory.mktemp('test_data').join('test.db')
|
||||
engine = create_engine(f'sqlite:///{file}')
|
||||
|
||||
scraper_controller = ScraperController()
|
||||
scraper_controller.connect_to_db(engine)
|
||||
|
||||
return scraper_controller
|
||||
|
||||
@pytest.fixture(scope='package')
|
||||
def channel_kwargs():
|
||||
|
||||
"""Define keyword arguments to use for defining test channels for each
|
||||
platform to be scraped.
|
||||
"""
|
||||
|
||||
return {
|
||||
'bitchute' : BITCHUTE_CHANNEL_KWARGS,
|
||||
'gab' : GAB_CHANNEL_KWARGS,
|
||||
'gettr' : GETTR_CHANNEL_KWARGS,
|
||||
'odysee' : ODYSEE_CHANNEL_KWARGS,
|
||||
'rumble' : RUMBLE_CHANNEL_KWARGS,
|
||||
'telegram_snscrape' : TELEGRAM_SNSCRAPE_CHANNEL_KWARGS,
|
||||
'twitter' : TWITTER_CHANNEL_KWARGS}
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
8
tests/scraper/bitchute.py
Normal file
8
tests/scraper/bitchute.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import BitchuteScraper
|
||||
|
||||
def test_scrape_bitchute_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['bitchute'])]
|
||||
controller.register_scraper(BitchuteScraper())
|
||||
controller.scrape_channels(channels)
|
||||
8
tests/scraper/gab.py
Normal file
8
tests/scraper/gab.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import GabScraper
|
||||
|
||||
def test_scrape_gab_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab'])]
|
||||
controller.register_scraper(GabScraper())
|
||||
controller.scrape_channels(channels)
|
||||
8
tests/scraper/gettr.py
Normal file
8
tests/scraper/gettr.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import GettrScraper
|
||||
|
||||
def test_scrape_gettr_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gettr'])]
|
||||
controller.register_scraper(GettrScraper())
|
||||
controller.scrape_channels(channels)
|
||||
8
tests/scraper/odysee.py
Normal file
8
tests/scraper/odysee.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import OdyseeScraper
|
||||
|
||||
def test_scrape_odysee_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['odysee'])]
|
||||
controller.register_scraper(OdyseeScraper())
|
||||
controller.scrape_channels(channels)
|
||||
8
tests/scraper/rumble.py
Normal file
8
tests/scraper/rumble.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import RumbleScraper
|
||||
|
||||
def test_scrape_rumble_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['rumble'])]
|
||||
controller.register_scraper(RumbleScraper())
|
||||
controller.scrape_channels(channels)
|
||||
8
tests/scraper/telegram_snscrape.py
Normal file
8
tests/scraper/telegram_snscrape.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TelegramSnscrapeScraper
|
||||
|
||||
def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram_snscrape'])]
|
||||
controller.register_scraper(TelegramSnscrapeScraper())
|
||||
controller.scrape_channels(channels)
|
||||
8
tests/scraper/twitter.py
Normal file
8
tests/scraper/twitter.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TwitterScraper
|
||||
|
||||
def test_scrape_twitter_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['twitter'])]
|
||||
controller.register_scraper(TwitterScraper())
|
||||
controller.scrape_channels(channels)
|
||||
Reference in New Issue
Block a user