mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-08 03:18:34 +03:00
Merge pull request #23 from bellingcat/profile
Added methods for retrieving channel profile metadata, refactored Gab scraper to use gabber
This commit is contained in:
2
Pipfile
2
Pipfile
@@ -14,7 +14,6 @@ boto3 = "*"
|
||||
snscrape = {git = "https://github.com/bellingcat/snscrape.git"}
|
||||
ffmpeg-python = "*"
|
||||
polyphemus = {git = "https://github.com/bellingcat/polyphemus.git"}
|
||||
garc = "*"
|
||||
yt-dlp = "*"
|
||||
telethon = "*"
|
||||
pytesseract = "*"
|
||||
@@ -22,6 +21,7 @@ pyexiftool = {git = "https://github.com/smarnach/pyexiftool.git"}
|
||||
instaloader = "*"
|
||||
gspread = "*"
|
||||
cryptg = "*"
|
||||
gabber = {git = "https://github.com/stanfordio/gabber.git"}
|
||||
|
||||
[dev-packages]
|
||||
pytest = "*"
|
||||
|
||||
526
Pipfile.lock
generated
526
Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "e3b96b0ac8c80d4817f9adac4ab171bf4b7e07e80927c7b152a24e8bbdbf7faa"
|
||||
"sha256": "b712e767d64e54e83e8c2d8a27a68203583ed7ad31d4ea3b4b6076a72a2150fd"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@@ -16,14 +16,6 @@
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"attrs": {
|
||||
"hashes": [
|
||||
"sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4",
|
||||
"sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==21.4.0"
|
||||
},
|
||||
"beautifulsoup4": {
|
||||
"hashes": [
|
||||
"sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf",
|
||||
@@ -34,19 +26,19 @@
|
||||
},
|
||||
"boto3": {
|
||||
"hashes": [
|
||||
"sha256:76d5b90400c54b25278150768e946edf166acce2c1597c0ecfbebb1dbe9acf2c",
|
||||
"sha256:7bb2e6506a6ad44d111dd20a5d510374b6958fe989b4ef887109c79d812f926f"
|
||||
"sha256:127ebdf58c8825b53f1eff111e08c49ffffeb1f6d7a5665c9907ce8128fe14b1",
|
||||
"sha256:b7ce3bf013f0f60e40c2676d5a7b620ed927cfad0aa348a606b10e9a0387f249"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.21.19"
|
||||
"version": "==1.21.29"
|
||||
},
|
||||
"botocore": {
|
||||
"hashes": [
|
||||
"sha256:5ed2be0e413961134f4c17eab16396d41a5b4b73a637588260c04d20806d52ea",
|
||||
"sha256:d0d77bce152ca51f3c2cd0f9bf05cb3b623e719406ad58b4c20444e237fe82eb"
|
||||
"sha256:b467d64cd773dc4d49ef31b18a8dded554f284f799720bd12e989fe2138fd5b8",
|
||||
"sha256:de87907d42682179946ddfa113b9334e3c4258404aef19edd8c92381ff54775c"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.24.19"
|
||||
"version": "==1.24.29"
|
||||
},
|
||||
"brotli": {
|
||||
"hashes": [
|
||||
@@ -123,6 +115,14 @@
|
||||
"index": "pypi",
|
||||
"version": "==0.0.1"
|
||||
},
|
||||
"cachetools": {
|
||||
"hashes": [
|
||||
"sha256:486471dfa8799eb7ec503a8059e263db000cdda20075ce5e48903087f79d5fd6",
|
||||
"sha256:8fecd4203a38af17928be7b90689d8083603073622229ca7077b72d8e5a976e4"
|
||||
],
|
||||
"markers": "python_version ~= '3.7'",
|
||||
"version": "==5.0.0"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
|
||||
@@ -130,6 +130,61 @@
|
||||
],
|
||||
"version": "==2021.10.8"
|
||||
},
|
||||
"cffi": {
|
||||
"hashes": [
|
||||
"sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3",
|
||||
"sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2",
|
||||
"sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636",
|
||||
"sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20",
|
||||
"sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728",
|
||||
"sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27",
|
||||
"sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66",
|
||||
"sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443",
|
||||
"sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0",
|
||||
"sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7",
|
||||
"sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39",
|
||||
"sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605",
|
||||
"sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a",
|
||||
"sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37",
|
||||
"sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029",
|
||||
"sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139",
|
||||
"sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc",
|
||||
"sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df",
|
||||
"sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14",
|
||||
"sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880",
|
||||
"sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2",
|
||||
"sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a",
|
||||
"sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e",
|
||||
"sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474",
|
||||
"sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024",
|
||||
"sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8",
|
||||
"sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0",
|
||||
"sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e",
|
||||
"sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a",
|
||||
"sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e",
|
||||
"sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032",
|
||||
"sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6",
|
||||
"sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e",
|
||||
"sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b",
|
||||
"sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e",
|
||||
"sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954",
|
||||
"sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962",
|
||||
"sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c",
|
||||
"sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4",
|
||||
"sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55",
|
||||
"sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962",
|
||||
"sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023",
|
||||
"sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c",
|
||||
"sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6",
|
||||
"sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8",
|
||||
"sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382",
|
||||
"sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7",
|
||||
"sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc",
|
||||
"sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997",
|
||||
"sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"
|
||||
],
|
||||
"version": "==1.15.0"
|
||||
},
|
||||
"charset-normalizer": {
|
||||
"hashes": [
|
||||
"sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597",
|
||||
@@ -140,19 +195,59 @@
|
||||
},
|
||||
"click": {
|
||||
"hashes": [
|
||||
"sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1",
|
||||
"sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"
|
||||
"sha256:19a4baa64da924c5e0cd889aba8e947f280309f1a2ce0947a3e3a7bcb7cc72d6",
|
||||
"sha256:977c213473c7665d3aa092b41ff12063227751c41d7b17165013e10069cc5cd2"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==8.0.4"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==8.1.0"
|
||||
},
|
||||
"cryptg": {
|
||||
"hashes": [
|
||||
"sha256:02b31622a75a49a5dcd25e589c85faae54575f018e055bd21a17df97c8bb9095",
|
||||
"sha256:0da1b367056e57a5c01d22608da0cd50e597b917c1b2d9631767aa3c0640a99a",
|
||||
"sha256:135688c6fbda90748924c2cb047f63785ebf4397d81acc4a05357950653c5096",
|
||||
"sha256:1fb6c6d4561a54406593197c1f5f23662ab320f4af4ab11834e1583e9d27a49a",
|
||||
"sha256:2516557e89803637fa7342de43dbcc5f84bf68ae05b1064a354a62d423447d9f",
|
||||
"sha256:29001dafd3d6a054365222b1f89b12876723c89cdd10aa0e5885a05dfd034eeb",
|
||||
"sha256:2cc8115960e49a038091ffb2d09de59e0acbdc76de10d7d415b7671a06bae0a9",
|
||||
"sha256:2cd8224eb64af756f45cdceab16d048494313db8acec1e38d75d97716082267b",
|
||||
"sha256:307bf96a6ac9c87b44531d8da5fe3a6c5d856e1dc69b68136ef9c4fb66ad17ac",
|
||||
"sha256:31cf7682de69022c9a77739cdcf7116b06522b128b9b51c7593f277f38c38dbf",
|
||||
"sha256:3bc2f372dec3a7753c0c0d72c69fcbe44af5473f870a3406978e07e8560a1aa6",
|
||||
"sha256:46960979542155c9d903656a3a39770061b09a3691a23296f06dc168fe4ff962",
|
||||
"sha256:47ad5916be4558f4d674c12800e8d9663ce938b0046f19cdc869ba3a7ca280ec",
|
||||
"sha256:5faed49d972c7f44ce4d6fa1a64169c85a11209fa1fbe1c8a333fb1454888725",
|
||||
"sha256:695636cca0ee938bd7113658ee60bfaf89afa19708c40ecae5f4a222c2ec544a",
|
||||
"sha256:6c5d66975fc59adca203fa91e2a104240457114468162d30e9213661239ac1d6",
|
||||
"sha256:72a5485ece10a70160170ceb658b1836db82dccab08a1f7029c54d81cf6b1d43",
|
||||
"sha256:7fc8e1893775c6f53dceda1959f19833cc27a67a80492c10e2415dc601b36650",
|
||||
"sha256:890584db41c8e1e046ae40dee0074614470d36ebd6b7e57bb91303300066601f",
|
||||
"sha256:a1fb178702730b59267f1e6c6dfe16c7bb9c1350cee4183221982ad2dba4e7f5",
|
||||
"sha256:a4de1730ca56aa8a945f176c25586901ed5e9f15ffb70c6459eedf466eb6299b",
|
||||
"sha256:b6352555e47f389ed502269bdb537233d0a928b12d9f4caa57e8c707151acd30",
|
||||
"sha256:b8896394b72ff7dbf38072ad4c2cd59abdd9e388bb55e1c369102beb8e569f9d",
|
||||
"sha256:bbd05b52d09e78bdc595f229c0481f4f2e1daf3959847322a6b2c1f76119305f",
|
||||
"sha256:bf00943924cddb0838f8a65f5aae31f6fe2ad64a5d7e6f10a6b900b3f01b0ae0",
|
||||
"sha256:bf15aae0fa01aeec728ab16b920cf4c6b2793099c71f62f30ff100d6fe8c9859",
|
||||
"sha256:c09a5b14494532fc3226f5c5f57ef2a651c935ed6a1d2d0f9eff110046725524",
|
||||
"sha256:c4812802ce4cd6f08189ce0fa8b79e9a96ac941e69e6b3032bb6908baefde2ba",
|
||||
"sha256:c69c1e19884108e508697919de0cd43e2ca4e9af418962aa235273b3c51a0e37",
|
||||
"sha256:ce08c04ebb06ce1ac417597c1bb514a3c1b36cf5c286b8c60f23df2e65703bf3",
|
||||
"sha256:e29b0d944176cf88fe52d1c58f46017b5bddc9cc54ec0fc6fac20043febefc32",
|
||||
"sha256:e48ab84e0ed364436d5e449c59762c5963f08ad87f6508f4cb7644745b5559a8",
|
||||
"sha256:eff15f0a1eee678dd9ec747b58ce86edb78b608036ac4e02d8349f5f35202495",
|
||||
"sha256:fdd62c2be23eeabb9ebd2ad41bf153f5ec48b968885ef14e676515407cd56339"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.2.post4"
|
||||
},
|
||||
"dateparser": {
|
||||
"hashes": [
|
||||
"sha256:faa2b97f51f3b5ff1ba2f17be90de2b733fb6191f89b4058787473e8202f3044",
|
||||
"sha256:fec344db1f73d005182e214c0ff27313c748bbe0c1638ce9d48a809ddfdab2a0"
|
||||
"sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9",
|
||||
"sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.1.0"
|
||||
"version": "==1.1.1"
|
||||
},
|
||||
"ffmpeg-python": {
|
||||
"hashes": [
|
||||
@@ -177,12 +272,9 @@
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.18.2"
|
||||
},
|
||||
"garc": {
|
||||
"hashes": [
|
||||
"sha256:6f1da8ccdb30b165b8d9247314b73d1002f60381480e61fdbf108dc9abf3c216"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.1"
|
||||
"gabber": {
|
||||
"git": "https://github.com/stanfordio/gabber.git",
|
||||
"ref": "d80c44c488ad4e087ba4c8f033802fe2071843bd"
|
||||
},
|
||||
"gogettr": {
|
||||
"hashes": [
|
||||
@@ -192,66 +284,89 @@
|
||||
"index": "pypi",
|
||||
"version": "==0.8.0"
|
||||
},
|
||||
"google-auth": {
|
||||
"hashes": [
|
||||
"sha256:3ba4d63cb29c1e6d5ffcc1c0623c03cf02ede6240a072f213084749574e691ab",
|
||||
"sha256:60d449f8142c742db760f4c0be39121bc8d9be855555d784c252deaca1ced3f5"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
|
||||
"version": "==2.6.2"
|
||||
},
|
||||
"google-auth-oauthlib": {
|
||||
"hashes": [
|
||||
"sha256:24f67735513c4c7134dbde2f1dee5a1deb6acc8dfcb577d7bff30d213a28e7b0",
|
||||
"sha256:30596b824fc6808fdaca2f048e4998cc40fb4b3599eaea66d28dc7085b36c5b8"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==0.5.1"
|
||||
},
|
||||
"greenlet": {
|
||||
"hashes": [
|
||||
"sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3",
|
||||
"sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711",
|
||||
"sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd",
|
||||
"sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073",
|
||||
"sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708",
|
||||
"sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67",
|
||||
"sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23",
|
||||
"sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1",
|
||||
"sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08",
|
||||
"sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd",
|
||||
"sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2",
|
||||
"sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa",
|
||||
"sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8",
|
||||
"sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40",
|
||||
"sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab",
|
||||
"sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6",
|
||||
"sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc",
|
||||
"sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b",
|
||||
"sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e",
|
||||
"sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963",
|
||||
"sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3",
|
||||
"sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d",
|
||||
"sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d",
|
||||
"sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe",
|
||||
"sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28",
|
||||
"sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3",
|
||||
"sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e",
|
||||
"sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c",
|
||||
"sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d",
|
||||
"sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0",
|
||||
"sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497",
|
||||
"sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee",
|
||||
"sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713",
|
||||
"sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58",
|
||||
"sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a",
|
||||
"sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06",
|
||||
"sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88",
|
||||
"sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965",
|
||||
"sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f",
|
||||
"sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4",
|
||||
"sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5",
|
||||
"sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c",
|
||||
"sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a",
|
||||
"sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1",
|
||||
"sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43",
|
||||
"sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627",
|
||||
"sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b",
|
||||
"sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168",
|
||||
"sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d",
|
||||
"sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5",
|
||||
"sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478",
|
||||
"sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf",
|
||||
"sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce",
|
||||
"sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c",
|
||||
"sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"
|
||||
"sha256:004aed447382d80a56ecc354a6d807f305e6c808714ce6ccbca4839c94fae81d",
|
||||
"sha256:068d68fad6bd623e29a2d36e74538c9b9d6dc6464931cd27d93da6cfc6a7f242",
|
||||
"sha256:06fd4075754009c9817c6b4e1dc0af4616de52757b6ca973a81c3c1aadc28257",
|
||||
"sha256:1004cb542451814b12a4f38e835a47734e2b2c683acbf463d5ae76282a3974cf",
|
||||
"sha256:10c358633a8b27bfc32d27114ef2ca2ddc9f1f89f1643d1157b85e1fdd695315",
|
||||
"sha256:115bc25fefbdc692c4483e9ddb9011ccd0251590ed59dbfff0f4eb7050bf99c4",
|
||||
"sha256:1d987a2579336792f73ae6b106c2f087e32afc8573fbf9566f123ac6d8cfb72f",
|
||||
"sha256:2128d727fd1e8afba8e68feb2cdcf88c90163b69ddc9707722a3e491c5280720",
|
||||
"sha256:230132c241fe284f93f2e7b3969e9b22bbd76ef98cf93e382c945d378907f5a4",
|
||||
"sha256:23558f7bd08a663386c032ab8d302d613d2d02ae0c9758ad410bab6035b58d3d",
|
||||
"sha256:255d520d3e4a5f16883b182e1a94219fe455ab4f50aaaf534bfd6d64ee728397",
|
||||
"sha256:2a6bc19a728f6f643cfc89b876159a1a25a8f7d8700c013d48a73691f80b4550",
|
||||
"sha256:379bed346ef8ba0a0e698b3c5975a44d15dd4a5bbff40bbd7fd548b445d5550b",
|
||||
"sha256:3b12d0866759db93b0a893b4e50a7d7d1681519d2346c26695bb8bb2c652230e",
|
||||
"sha256:40d491944f69e350e1e8b25f6ca49459824ede1678ec0cd4b5541f41edc06614",
|
||||
"sha256:471484c7b9d7b7867263051aa81cdeed6e06b455e629a7f05eb91a6cb8bd0836",
|
||||
"sha256:488c557080557bc01aabb3e1bda7225c68455b853733a8652857ac0d810dad1b",
|
||||
"sha256:49c2e76e7aa81ba889b3c183e2341af3cc6161ee38852085110ae49d5b5d9a40",
|
||||
"sha256:52d13ec90236e5935ed6da044e78faa1371d5116cc43fe6d7ca8994dd619ef96",
|
||||
"sha256:57898c69a253d81f487787bdd538629fabd671fab8a9e31b041ca30965fd9556",
|
||||
"sha256:5d577eef5beb5730ef01ab39983eb852a97c359b7a546809adf70c409f4b2ecc",
|
||||
"sha256:6a41987c1474c9158a0c0c96611530a8f299bc547d35bee8add981b8b2534f74",
|
||||
"sha256:6ae67b7df8db3626af8e042e9c6949cfa27d1a3bbbfdff29e45b72bb6673a650",
|
||||
"sha256:6c42c27e9d12e8a481aff469ffe8dd4ce0484c354a418470960f760f6ae41e7c",
|
||||
"sha256:6c4a90c9f6128b4d0905a89930bd325e0491574e5cb453f606bb7094a3197587",
|
||||
"sha256:6e64518e5833ac2d9359b6d9bd4df2c0cf441a0f3a4eca9e735fbea99009fa70",
|
||||
"sha256:6fd3a270c23c5b42d86a9c7c6b0229f23ee4a7a4cabdaaa1693ad7a0982d13cb",
|
||||
"sha256:70db73351e0fcf11a76288c47a0469d9a330bcb2e7618c5eb57432b8caa82403",
|
||||
"sha256:771f401692046845626cbdf1dd0f04e999413ede0ee9ad39033fe30b5fa2e845",
|
||||
"sha256:7935026ec61b967cbc6b746c0ca75c1651ea118d7fee4d259cff9e6866153374",
|
||||
"sha256:7b76b1cac9baac1980210e29145800954e7b42e91ef69c4d695de1cab87ce41f",
|
||||
"sha256:7e3f37c11b6699b1a1e0fcc0e88829dba4f2866546381b05ab8b3f4db645a823",
|
||||
"sha256:8370fa65ad421484894f559055f951843754153b72b9bca2ebdc5288efe2e3f0",
|
||||
"sha256:8ae9c443d44a4e23252632e4d7775f419f992d0df3eff923e23775f5cc551d39",
|
||||
"sha256:8b31d85f2781e44f1ffaaf7ea07f484e7d42317c677c355fa77b4a1a4bea7394",
|
||||
"sha256:8b450336b27f3b375cadc474c6704838eaa8dd3ca312aac3bb69d92264a8e638",
|
||||
"sha256:9ce84357388a76d886febff4e50e321c212ffd3248b590960b2da6e02404a5c9",
|
||||
"sha256:a23e986fb0ba8e7407286add41fa0d4207be44e3dce1b04789f4757800eca1cf",
|
||||
"sha256:a81610ee00d0da9cd2c8679479b7791149365b6dfb3971b01b22ee29b04787ce",
|
||||
"sha256:b4e40444975e5ab0ed3004369209c39a28e084951daaeee4919f164b6b849b14",
|
||||
"sha256:b66600de16702b9dfa74bea34524b55183a2183e5fd92f20fe6c2fcae550a64c",
|
||||
"sha256:ba6ee18694d3673796b7a31b7d21254e87e9e43ca5be56f323fd396111255315",
|
||||
"sha256:bd03837da28293baa39bdfc3cada69e2f8807f423ae06168aa28d2b32c63a6b6",
|
||||
"sha256:bd2192070f88c0778ae1d68a0980fdece3473498c1db37f3794e3454f91e3ecf",
|
||||
"sha256:c1f6f1a3cc013012cd1da913c40b13e6d721046a8c8a0ea0cde94069645a75db",
|
||||
"sha256:ce10a8e7e067bde3c1fbf494d2b8859db510206030b0b67bc3af90b0eb1887b9",
|
||||
"sha256:d31386d208303a5a6cf0819ef9f6db6680bab9e4ca8e48adb3d4b26ead89beb7",
|
||||
"sha256:d83b3af53b201970973c5574b39df226746194063bb248a53fd12b470ac34319",
|
||||
"sha256:df9657b212c054ac6d803290d7c4bcd7790af0b725984fce1eeb0a1e3f2d9798",
|
||||
"sha256:e576e5fd3f129e6b3595dc734ac7f2b8c548f19ef07781194bc538dc9c0cdbbc",
|
||||
"sha256:e7400358558094c1bcedc75f3b3c4f400c53130b44833848890a99968dee6a64",
|
||||
"sha256:eb6a385f8577d30e4cb43dd555fb134ddaae1edeb84205e09dabec332bf49fd0",
|
||||
"sha256:f27f0875e0873f6bf5df09a456bfcac0667824cabac4cad30b43f36e0382ffe7",
|
||||
"sha256:fcd4a6d04995f1d66bc78b503e4e59ae72fd32aaec4f661657fe5ae5c1aa4ce3"
|
||||
],
|
||||
"markers": "python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))",
|
||||
"version": "==1.1.2"
|
||||
"version": "==2.0.0a2"
|
||||
},
|
||||
"gspread": {
|
||||
"hashes": [
|
||||
"sha256:a347197628fa1885dcc860701fb1b3f5471386aa863a71cfe232b6473c6fea1b",
|
||||
"sha256:be2220e19723570ed98e8b8eb6a5b6e04afa0f08ec1f08b89e217c354488a047"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.3.0"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
@@ -261,35 +376,28 @@
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==3.3"
|
||||
},
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
|
||||
"sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
|
||||
],
|
||||
"version": "==1.1.1"
|
||||
},
|
||||
"instaloader": {
|
||||
"hashes": [
|
||||
"sha256:9615a12a5a01a8b6c9d99a2a047b21d81b341cfd77656b9261bda30ece0cd562"
|
||||
"sha256:7fa6147810eedcc1dedcdec8cfa1f220c9379ab8faeab6a336a7c181d944e2e4"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.8.4"
|
||||
"version": "==4.9"
|
||||
},
|
||||
"jmespath": {
|
||||
"hashes": [
|
||||
"sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9",
|
||||
"sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"
|
||||
"sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e",
|
||||
"sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04"
|
||||
],
|
||||
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==0.10.0"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==1.0.0"
|
||||
},
|
||||
"loguru": {
|
||||
"hashes": [
|
||||
"sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c",
|
||||
"sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"
|
||||
"sha256:b28e72ac7a98be3d28ad28570299a393dfcd32e5e3f6a353dec94675767b6319",
|
||||
"sha256:f8087ac396b5ee5f67c963b495d615ebbceac2796379599820e324419d53667c"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.6.0"
|
||||
"version": "==0.5.3"
|
||||
},
|
||||
"lxml": {
|
||||
"hashes": [
|
||||
@@ -392,6 +500,14 @@
|
||||
"markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'",
|
||||
"version": "==1.22.3"
|
||||
},
|
||||
"oauthlib": {
|
||||
"hashes": [
|
||||
"sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2",
|
||||
"sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.2.0"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
|
||||
@@ -468,25 +584,9 @@
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==9.0.1"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
|
||||
"sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==1.0.0"
|
||||
},
|
||||
"polyphemus": {
|
||||
"git": "https://github.com/bellingcat/polyphemus.git",
|
||||
"ref": "c85dea215ae720e3df71d2ed1aaa82f7b8a6a2ed"
|
||||
},
|
||||
"py": {
|
||||
"hashes": [
|
||||
"sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719",
|
||||
"sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==1.11.0"
|
||||
"ref": "00a5123a3768a55ffe29f2c803a4181895f17890"
|
||||
},
|
||||
"pyaes": {
|
||||
"hashes": [
|
||||
@@ -512,6 +612,31 @@
|
||||
],
|
||||
"version": "==0.4.8"
|
||||
},
|
||||
"pyasn1-modules": {
|
||||
"hashes": [
|
||||
"sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8",
|
||||
"sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199",
|
||||
"sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811",
|
||||
"sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed",
|
||||
"sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4",
|
||||
"sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e",
|
||||
"sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74",
|
||||
"sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb",
|
||||
"sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45",
|
||||
"sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd",
|
||||
"sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0",
|
||||
"sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d",
|
||||
"sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405"
|
||||
],
|
||||
"version": "==0.2.8"
|
||||
},
|
||||
"pycparser": {
|
||||
"hashes": [
|
||||
"sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9",
|
||||
"sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"
|
||||
],
|
||||
"version": "==2.21"
|
||||
},
|
||||
"pycryptodomex": {
|
||||
"hashes": [
|
||||
"sha256:1ca8e1b4c62038bb2da55451385246f51f412c5f5eabd64812c01766a5989b4a",
|
||||
@@ -573,14 +698,6 @@
|
||||
"index": "pypi",
|
||||
"version": "==0.3.9"
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:b555252a95bbb2a37a97b5ac2eb050c436f7989993565f5e0c9128fcaacadd0e",
|
||||
"sha256:f1089d218cfcc63a212c42896f1b7fbf096874d045e1988186861a1a87d27b47"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==7.1.0"
|
||||
},
|
||||
"python-dateutil": {
|
||||
"hashes": [
|
||||
"sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
|
||||
@@ -591,10 +708,10 @@
|
||||
},
|
||||
"pytz": {
|
||||
"hashes": [
|
||||
"sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
|
||||
"sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
|
||||
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
|
||||
"sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
|
||||
],
|
||||
"version": "==2021.3"
|
||||
"version": "==2022.1"
|
||||
},
|
||||
"pytz-deprecation-shim": {
|
||||
"hashes": [
|
||||
@@ -604,6 +721,12 @@
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
|
||||
"version": "==0.1.0.post0"
|
||||
},
|
||||
"ratelimit": {
|
||||
"hashes": [
|
||||
"sha256:af8a9b64b821529aca09ebaf6d8d279100d766f19e90b5059ac6a718ca6dee42"
|
||||
],
|
||||
"version": "==2.2.1"
|
||||
},
|
||||
"regex": {
|
||||
"hashes": [
|
||||
"sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14",
|
||||
@@ -685,9 +808,6 @@
|
||||
"version": "==2022.3.2"
|
||||
},
|
||||
"requests": {
|
||||
"extras": [
|
||||
"socks"
|
||||
],
|
||||
"hashes": [
|
||||
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
|
||||
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
|
||||
@@ -695,6 +815,14 @@
|
||||
"index": "pypi",
|
||||
"version": "==2.27.1"
|
||||
},
|
||||
"requests-oauthlib": {
|
||||
"hashes": [
|
||||
"sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5",
|
||||
"sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==1.3.1"
|
||||
},
|
||||
"rsa": {
|
||||
"hashes": [
|
||||
"sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17",
|
||||
@@ -721,7 +849,7 @@
|
||||
},
|
||||
"snscrape": {
|
||||
"git": "https://github.com/bellingcat/snscrape.git",
|
||||
"ref": "de4ebed81f3f6a4bb4c65630daab6ec63784959b"
|
||||
"ref": "fb8d73ac95011b7ad848a6048d3eed1880e80f21"
|
||||
},
|
||||
"soupsieve": {
|
||||
"hashes": [
|
||||
@@ -780,21 +908,21 @@
|
||||
"index": "pypi",
|
||||
"version": "==1.24.0"
|
||||
},
|
||||
"tomli": {
|
||||
"tqdm": {
|
||||
"hashes": [
|
||||
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
|
||||
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
|
||||
"sha256:4230a49119a416c88cc47d0d2d32d5d90f1a282d5e497d49801950704e49863d",
|
||||
"sha256:6461b009d6792008d0000e1b0c7ca50195ec78c0e808a3a6b668a56a3236c3a5"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.0.1"
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==4.63.1"
|
||||
},
|
||||
"tzdata": {
|
||||
"hashes": [
|
||||
"sha256:3eee491e22ebfe1e5cfcc97a4137cd70f092ce59144d81f8924a844de05ba8f5",
|
||||
"sha256:68dbe41afd01b867894bbdfd54fa03f468cfa4f0086bfb4adcd8de8f24f3ee21"
|
||||
"sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9",
|
||||
"sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==2021.5"
|
||||
"version": "==2022.1"
|
||||
},
|
||||
"tzlocal": {
|
||||
"hashes": [
|
||||
@@ -806,11 +934,11 @@
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
|
||||
"sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
|
||||
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
|
||||
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==1.26.8"
|
||||
"version": "==1.26.9"
|
||||
},
|
||||
"websockets": {
|
||||
"hashes": [
|
||||
@@ -899,6 +1027,35 @@
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.9.1"
|
||||
},
|
||||
"black": {
|
||||
"hashes": [
|
||||
"sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b",
|
||||
"sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176",
|
||||
"sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09",
|
||||
"sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a",
|
||||
"sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015",
|
||||
"sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79",
|
||||
"sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb",
|
||||
"sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20",
|
||||
"sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464",
|
||||
"sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968",
|
||||
"sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82",
|
||||
"sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21",
|
||||
"sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0",
|
||||
"sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265",
|
||||
"sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b",
|
||||
"sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a",
|
||||
"sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72",
|
||||
"sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce",
|
||||
"sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0",
|
||||
"sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a",
|
||||
"sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163",
|
||||
"sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad",
|
||||
"sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==22.3.0"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
|
||||
@@ -914,6 +1071,14 @@
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==2.0.12"
|
||||
},
|
||||
"click": {
|
||||
"hashes": [
|
||||
"sha256:19a4baa64da924c5e0cd889aba8e947f280309f1a2ce0947a3e3a7bcb7cc72d6",
|
||||
"sha256:977c213473c7665d3aa092b41ff12063227751c41d7b17165013e10069cc5cd2"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==8.1.0"
|
||||
},
|
||||
"coverage": {
|
||||
"extras": [
|
||||
"toml"
|
||||
@@ -1005,11 +1170,11 @@
|
||||
},
|
||||
"jinja2": {
|
||||
"hashes": [
|
||||
"sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
|
||||
"sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
|
||||
"sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119",
|
||||
"sha256:640bed4bb501cbd17194b3cace1dc2126f5b619cf068a726b98192a0fde74ae9"
|
||||
],
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==3.0.3"
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.1.1"
|
||||
},
|
||||
"markupsafe": {
|
||||
"hashes": [
|
||||
@@ -1057,6 +1222,13 @@
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.1.1"
|
||||
},
|
||||
"mypy-extensions": {
|
||||
"hashes": [
|
||||
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
|
||||
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
|
||||
],
|
||||
"version": "==0.4.3"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
|
||||
@@ -1065,6 +1237,21 @@
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==21.3"
|
||||
},
|
||||
"pathspec": {
|
||||
"hashes": [
|
||||
"sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a",
|
||||
"sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"
|
||||
],
|
||||
"version": "==0.9.0"
|
||||
},
|
||||
"platformdirs": {
|
||||
"hashes": [
|
||||
"sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d",
|
||||
"sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.5.1"
|
||||
},
|
||||
"pluggy": {
|
||||
"hashes": [
|
||||
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
|
||||
@@ -1099,11 +1286,11 @@
|
||||
},
|
||||
"pytest": {
|
||||
"hashes": [
|
||||
"sha256:b555252a95bbb2a37a97b5ac2eb050c436f7989993565f5e0c9128fcaacadd0e",
|
||||
"sha256:f1089d218cfcc63a212c42896f1b7fbf096874d045e1988186861a1a87d27b47"
|
||||
"sha256:841132caef6b1ad17a9afde46dc4f6cfa59a05f9555aae5151f73bdf2820ca63",
|
||||
"sha256:92f723789a8fdd7180b6b06483874feca4c48a5c76968e03bb3e7f806a1869ea"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==7.1.0"
|
||||
"index": "pypi",
|
||||
"version": "==7.1.1"
|
||||
},
|
||||
"pytest-cov": {
|
||||
"hashes": [
|
||||
@@ -1123,23 +1310,20 @@
|
||||
},
|
||||
"pytest-metadata": {
|
||||
"hashes": [
|
||||
"sha256:576055b8336dd4a9006dd2a47615f76f2f8c30ab12b1b1c039d99e834583523f",
|
||||
"sha256:71b506d49d34e539cc3cfdb7ce2c5f072bea5c953320002c95968e0238f8ecf1"
|
||||
"sha256:141ba561a17659cda00cf74e7c7cf6103bab4550acad76a46f893339de63b1df",
|
||||
"sha256:5cdb6aeea8ba9109181cf9f149c8a3ae1430ff7e44506a8f866af8a98ca46301"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.11.0"
|
||||
"version": "==2.0.1"
|
||||
},
|
||||
"pytz": {
|
||||
"hashes": [
|
||||
"sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
|
||||
"sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
|
||||
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
|
||||
"sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
|
||||
],
|
||||
"version": "==2021.3"
|
||||
"version": "==2022.1"
|
||||
},
|
||||
"requests": {
|
||||
"extras": [
|
||||
"socks"
|
||||
],
|
||||
"hashes": [
|
||||
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
|
||||
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
|
||||
@@ -1156,11 +1340,11 @@
|
||||
},
|
||||
"sphinx": {
|
||||
"hashes": [
|
||||
"sha256:5da895959511473857b6d0200f56865ed62c31e8f82dd338063b84ec022701fe",
|
||||
"sha256:6caad9786055cb1fa22b4a365c1775816b876f91966481765d7d50e9f0dd35cc"
|
||||
"sha256:7bf8ca9637a4ee15af412d1a1d9689fec70523a68ca9bb9127c2f3eeb344e2e6",
|
||||
"sha256:ebf612653238bcc8f4359627a9b7ce44ede6fdd75d9d30f68255c7383d3a6226"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.4.0"
|
||||
"version": "==4.5.0"
|
||||
},
|
||||
"sphinx-rtd-theme": {
|
||||
"hashes": [
|
||||
@@ -1223,16 +1407,24 @@
|
||||
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
|
||||
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"markers": "python_full_version < '3.11.0'",
|
||||
"version": "==2.0.1"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42",
|
||||
"sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"
|
||||
],
|
||||
"markers": "python_version < '3.10'",
|
||||
"version": "==4.1.1"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
|
||||
"sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
|
||||
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
|
||||
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==1.26.8"
|
||||
"version": "==1.26.9"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from cisticola.utils import make_request
|
||||
from .base import Scraper, ScraperController
|
||||
from .base import Scraper, ScraperController, ChannelDoesNotExistError
|
||||
from .bitchute import BitchuteScraper
|
||||
from .gab import GabScraper
|
||||
from .gettr import GettrScraper
|
||||
|
||||
@@ -412,4 +412,7 @@ class ScraperController:
|
||||
"""
|
||||
|
||||
mapper_registry.metadata.drop_all(bind=self.engine)
|
||||
self.connect_to_db(self.engine)
|
||||
self.connect_to_db(self.engine)
|
||||
|
||||
class ChannelDoesNotExistError(Exception):
|
||||
"""The specified channel does not exist or has been deleted."""
|
||||
@@ -58,12 +58,56 @@ class BitchuteScraper(Scraper):
|
||||
date=datetime.fromtimestamp(post['timestamp']),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
base_url = channel.url
|
||||
|
||||
session = requests.session()
|
||||
response = session.get(base_url)
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
||||
canonical_url = soup.find('link', {'id' : 'canonical'})['href']
|
||||
csrftoken = session.cookies['csrftoken']
|
||||
csrfmiddlewaretoken = soup.find('input', {'name' : 'csrfmiddlewaretoken'})['value']
|
||||
|
||||
about_soup = soup.find('div', {'id' : 'channel-about'})
|
||||
info_list = about_soup.find('div', {'class' : 'channel-about-details'}).find_all('p')
|
||||
description_soup = about_soup.find('div', {'id' : 'channel-description'})
|
||||
|
||||
headers = {'Referer': base_url}
|
||||
data = {
|
||||
'csrftoken': csrftoken,
|
||||
'csrfmiddlewaretoken': csrfmiddlewaretoken}
|
||||
|
||||
response = session.post(canonical_url + 'counts/', data = data, headers = headers)
|
||||
counts = json.loads(response.text)
|
||||
|
||||
owner_soup = soup.find('p', {'class' : 'owner'})
|
||||
if owner_soup.text == '[email\xa0protected]':
|
||||
owner_name = decode_cfemail(owner_soup.find('span', {'class': "__cf_email__"})['data-cfemail'])
|
||||
else:
|
||||
owner_name = owner_soup.text
|
||||
|
||||
profile = {
|
||||
'description' : description_soup.text.strip(),
|
||||
'description_links' : [a['href'] for a in description_soup.find_all('a', href = True)],
|
||||
'created': re.sub(r'\s', ' ', info_list[0].text.split('Created')[1].strip('. ')),
|
||||
'videos' : int(info_list[1].text.split('videos')[0].strip()),
|
||||
'owner_url' : soup.find('p', {'class' : 'owner'}).find('a', href = True)['href'],
|
||||
'owner_name' : owner_name,
|
||||
'image' : about_soup.find('img', {'alt' : 'Channel Image'}).get('data-src'),
|
||||
'subscribers': counts['subscriber_count'],
|
||||
'views': int(counts['about_view_count'].split(' ')[0])}
|
||||
|
||||
return profile
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
def strip_tags(html, convert_newlines=True):
|
||||
@@ -419,30 +463,20 @@ def get_videos_user(session, user, csrftoken, detail):
|
||||
# these need to be yielded *after* the video because else the result file will have the comments
|
||||
# before the video, which is weird
|
||||
yield comment
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
|
||||
def get_about(user):
|
||||
"""
|
||||
Extract fields from channel's "About" tab
|
||||
"""
|
||||
base_url = "https://www.bitchute.com/channel/%s/" % user
|
||||
def decode_cfemail(cfemail):
|
||||
|
||||
response = requests.get(base_url)
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
"""https://stackoverflow.com/questions/36911296/scraping-of-protected-email
|
||||
"""
|
||||
|
||||
about_soup = soup.find('div', {'id' : 'channel-about'})
|
||||
info_list = about_soup.find('div', {'class' : 'channel-about-details'}).find_all('p')
|
||||
description_soup = about_soup.find('div', {'id' : 'channel-description'})
|
||||
email = ""
|
||||
k = int(cfemail[:2], 16)
|
||||
|
||||
about = {
|
||||
'description' : description_soup.text,
|
||||
'description_links' : [a['href'] for a in description_soup.find_all('a', href = True)],
|
||||
'created': re.sub(r'\s', ' ', info_list[0].text.split('Created')[1].strip('. ')),
|
||||
'videos' : int(info_list[1].text.split('videos')[0].strip()),
|
||||
'owner_url' : soup.find('p', {'class' : 'owner'}).find('a', href = True)['href'],
|
||||
'owner_name' : soup.find('p', {'class' : 'owner'}).text,
|
||||
'category' : info_list[-1].text.split('Category')[1].strip(),
|
||||
'image' : about_soup.find('img', {'alt' : 'Channel Image'})['data-src']
|
||||
}
|
||||
|
||||
return about
|
||||
for i in range(2, len(cfemail)-1, 2):
|
||||
email += chr(int(cfemail[i:i+2], 16)^k)
|
||||
|
||||
return email
|
||||
|
||||
#---------------------------------------------------------------------------#
|
||||
@@ -1,29 +1,53 @@
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timezone, date
|
||||
import json
|
||||
from typing import Generator
|
||||
import os
|
||||
|
||||
from garc import Garc
|
||||
from gabber.client import Client, GAB_API_BASE_URL
|
||||
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
class GabScraper(Scraper):
|
||||
"""An implementation of a Scraper for Gab, using GARC library"""
|
||||
__version__ = "GabScraper 0.0.1"
|
||||
"""An implementation of a Scraper for Gab, using gabber library"""
|
||||
__version__ = "GabScraper 0.0.2"
|
||||
|
||||
def get_username_from_url(self, url):
|
||||
username = url.split('https://gab.com/')[-1]
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
client = Garc(profile = 'main')
|
||||
username = self.get_username_from_url(channel.url)
|
||||
def get_group_id_from_url(self, url):
|
||||
group_id = int(url.split('/')[-1])
|
||||
|
||||
scraper = client.userposts(username)
|
||||
return group_id
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
client = Client(
|
||||
username = os.environ['GAB_USER'],
|
||||
password = os.environ['GAB_PASS'],
|
||||
threads = 25)
|
||||
|
||||
if channel.url.split('/')[-2] == 'groups':
|
||||
|
||||
group_id = self.get_group_id_from_url(url = channel.url)
|
||||
scraper = client.pull_group_posts(
|
||||
id = group_id,
|
||||
depth = float('inf'))
|
||||
else:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
result = client._get(GAB_API_BASE_URL + f"/account_by_username/{username}").json()
|
||||
user_id = int(result['id'])
|
||||
|
||||
scraper = client.pull_statuses(
|
||||
id = user_id,
|
||||
created_after = date.min,
|
||||
replies = False)
|
||||
|
||||
for post in scraper:
|
||||
if since is not None and datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")) <= since.date:
|
||||
if since is not None and datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc) <= since.date.replace(tzinfo=timezone.utc):
|
||||
break
|
||||
|
||||
media_urls = []
|
||||
@@ -31,10 +55,18 @@ class GabScraper(Scraper):
|
||||
|
||||
if archive_media:
|
||||
|
||||
media_urls.extend([p['url'] for p in post['media_attachments']])
|
||||
|
||||
if post.get('repost') is not None:
|
||||
media_urls.extend([p['url'] for p in post['repost']['media_attachments']])
|
||||
for attachment in post.get('media_attachments'):
|
||||
if attachment.get('type') == 'video':
|
||||
media_urls.append(attachment['source_mp4'])
|
||||
else:
|
||||
media_urls.append(attachment['url'])
|
||||
|
||||
if post.get('reblog') is not None:
|
||||
for attachment in post['reblog'].get('media_attachments'):
|
||||
if attachment.get('type') == 'video':
|
||||
media_urls.append(attachment['source_mp4'])
|
||||
else:
|
||||
media_urls.append(attachment['url'])
|
||||
|
||||
for url in media_urls:
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
@@ -49,8 +81,29 @@ class GabScraper(Scraper):
|
||||
date=datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
def can_handle(self, channel):
|
||||
def can_handle(self, channel: Channel) -> bool:
|
||||
if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
return True
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
client = Client(
|
||||
username = os.environ['GAB_USER'],
|
||||
password = os.environ['GAB_PASS'],
|
||||
threads = 25)
|
||||
|
||||
if channel.url.split('/')[-2] == 'groups':
|
||||
|
||||
group_id = self.get_group_id_from_url(url = channel.url)
|
||||
profile = client.pull_group(id = group_id)
|
||||
|
||||
else:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
profile = client._get(GAB_API_BASE_URL + f"/account_by_username/{username}").json()
|
||||
|
||||
return profile
|
||||
@@ -59,7 +59,8 @@ class GettrScraper(Scraper):
|
||||
date=datetime.fromtimestamp(post['cdate']/1000.),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Gettr" and self.get_username_from_url(channel.url) is not None:
|
||||
@@ -68,4 +69,11 @@ class GettrScraper(Scraper):
|
||||
def url_to_key(self, url: str, content_type: str) -> str:
|
||||
ext = '.' + content_type.split('/')[-1]
|
||||
key = urlparse(url).path.split('/')[-2] + ext
|
||||
return key
|
||||
return key
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
client = client = PublicClient()
|
||||
username = self.get_username_from_url(channel.url)
|
||||
profile = client.user_info(username)
|
||||
|
||||
return profile
|
||||
@@ -80,7 +80,8 @@ class InstagramScraper(Scraper):
|
||||
date=post.date_utc,
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post._asdict(), default=str),
|
||||
archived_urls=archived_urls)
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
for comment in post.get_comments():
|
||||
|
||||
@@ -96,8 +97,32 @@ class InstagramScraper(Scraper):
|
||||
date=comment.created_at_utc,
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(comment_dict, default=str),
|
||||
archived_urls={})
|
||||
archived_urls={},
|
||||
media_archived=archive_media)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Instagram" and self.get_username_from_url(channel.url) is not None:
|
||||
return True
|
||||
return True
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
loader = instaloader.Instaloader(
|
||||
quiet = True,
|
||||
download_comments = False,
|
||||
save_metadata = False)
|
||||
|
||||
loader.login(
|
||||
user = os.environ['INSTAGRAM_USERNAME'],
|
||||
passwd = os.environ['INSTAGRAM_PASSWORD'])
|
||||
|
||||
user_profile = instaloader.Profile.from_username(
|
||||
context = loader.context,
|
||||
username = username)
|
||||
|
||||
profile = user_profile._asdict()
|
||||
profile['followers'] = user_profile.followers
|
||||
profile['followees'] = user_profile.followees
|
||||
|
||||
return profile
|
||||
@@ -3,9 +3,11 @@ import json
|
||||
from typing import Generator
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from polyphemus.base import OdyseeChannel
|
||||
import requests
|
||||
from loguru import logger
|
||||
|
||||
from polyphemus.base import OdyseeChannel
|
||||
from polyphemus.api import get_auth_token
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
|
||||
@@ -13,6 +15,10 @@ class OdyseeScraper(Scraper):
|
||||
"""An implementation of a Scraper for Odysee, using polyphemus library"""
|
||||
__version__ = "OdyseeScraper 0.0.1"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.auth_token = get_auth_token()
|
||||
|
||||
def get_username_from_url(self, url):
|
||||
|
||||
username = url.split('odysee.com/')[-1].strip('@').split(':')[0]
|
||||
@@ -22,12 +28,12 @@ class OdyseeScraper(Scraper):
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
odysee_channel = OdyseeChannel(channel_name = username)
|
||||
odysee_channel = OdyseeChannel(channel_name = username, auth_token = self.auth_token)
|
||||
|
||||
all_videos = odysee_channel.get_all_videos()
|
||||
|
||||
for video in all_videos:
|
||||
if since is not None and datetime.fromtimestamp(video['created']) <= since.date:
|
||||
if since is not None and datetime.fromtimestamp(video.info['created']) <= since.date:
|
||||
break
|
||||
|
||||
archived_urls = {}
|
||||
@@ -55,7 +61,8 @@ class OdyseeScraper(Scraper):
|
||||
date=datetime.fromtimestamp(video.info['created']),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(video.info),
|
||||
archived_urls=archived_urls)
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
for comment in all_comments:
|
||||
|
||||
@@ -67,7 +74,8 @@ class OdyseeScraper(Scraper):
|
||||
date=datetime.fromtimestamp(comment.info['created']),
|
||||
date_archived=datetime.now(),
|
||||
raw_data=json.dumps(comment.info),
|
||||
archived_urls={})
|
||||
archived_urls={},
|
||||
media_archived=True)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Odysee" and self.get_username_from_url(channel.url) is not None:
|
||||
@@ -77,4 +85,12 @@ class OdyseeScraper(Scraper):
|
||||
key = urlparse(url).path.split('/')[-2]
|
||||
ext = content_type.split('/')[-1]
|
||||
|
||||
return f'{key}.{ext}'
|
||||
return f'{key}.{ext}'
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
odysee_channel = OdyseeChannel(channel_name = username, auth_token = self.auth_token)
|
||||
profile = odysee_channel.info
|
||||
|
||||
return profile
|
||||
@@ -14,18 +14,12 @@ class RumbleScraper(Scraper):
|
||||
"""An implementation of a Scraper for Rumble, using custom functions"""
|
||||
__version__ = "RumbleScraper 0.0.1"
|
||||
|
||||
def get_username_from_url(self, url):
|
||||
username = url.split('https://rumble.com/c/')[1]
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
scraper = get_channel_videos(username)
|
||||
scraper = get_channel_videos(channel.url)
|
||||
|
||||
for post in scraper:
|
||||
if since is not None and datetime.fromtimestamp(post['cdate']*0.001) <= since.date:
|
||||
if since is not None and post['datetime'].replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
|
||||
break
|
||||
|
||||
archived_urls = {}
|
||||
@@ -43,10 +37,11 @@ class RumbleScraper(Scraper):
|
||||
platform="Rumble",
|
||||
channel=channel.id,
|
||||
platform_id=post['media_url'].split('/')[-2],
|
||||
date=datetime.fromisoformat(post['datetime']).replace(tzinfo=timezone.utc),
|
||||
date=post['datetime'].replace(tzinfo=timezone.utc),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
raw_data=json.dumps(post, default = str),
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
def url_to_key(self, url: str, content_type: str) -> str:
|
||||
ext = '.' + content_type.split('/')[-1]
|
||||
@@ -54,15 +49,21 @@ class RumbleScraper(Scraper):
|
||||
return key
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Rumble" and self.get_username_from_url(channel.url) is not None:
|
||||
if channel.platform == "Rumble" and channel.url is not None:
|
||||
return True
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
profile = get_channel_profile(url = channel.url)
|
||||
|
||||
return profile
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
def get_media_url(url):
|
||||
|
||||
r = make_request(url = url)
|
||||
soup = BeautifulSoup(r.content, features = 'lxml')
|
||||
soup = BeautifulSoup(r.content, features = 'html.parser')
|
||||
|
||||
script = json.loads(''.join(soup.find('script', {'type':'application/ld+json'}).text))
|
||||
media_url = script[0]['embedUrl']
|
||||
@@ -84,16 +85,16 @@ def process_video(video):
|
||||
'views' : video.find('span', {'class' : 'video-item--views'})['data-value'],
|
||||
'rumbles' : rumbles,
|
||||
'duration' : video.find('span', {'class' : 'video-item--duration'})['data-value'],
|
||||
'datetime' : video.find('time')['datetime']}
|
||||
'datetime' : datetime.fromisoformat(video.find('time')['datetime'])}
|
||||
|
||||
info['media_url'] = get_media_url(info['link'])
|
||||
|
||||
return info
|
||||
|
||||
def get_channel_videos(channel):
|
||||
def get_channel_videos(url):
|
||||
|
||||
page = 1
|
||||
channel_url = f'{BASE_URL}/c/{channel}?page='
|
||||
channel_url = f'{url}?page='
|
||||
|
||||
while True:
|
||||
url = channel_url + str(page)
|
||||
@@ -111,4 +112,22 @@ def get_channel_videos(channel):
|
||||
|
||||
page += 1
|
||||
|
||||
def get_channel_profile(url):
|
||||
|
||||
channel_url = f'{url}'
|
||||
r = make_request(url = channel_url)
|
||||
soup = BeautifulSoup(r.content, features = 'lxml')
|
||||
|
||||
verified_svg = soup.find('h1').find('svg', {'class' : 'listing-header--verified'})
|
||||
thumbnail_soup = soup.find('img', {'class' : 'listing-header--thumb'})
|
||||
cover_soup = soup.find('img', {'class' : 'listing-header--backsplash-img'})
|
||||
|
||||
profile = {
|
||||
'name': soup.find('h1').text,
|
||||
'verified': verified_svg is not None,
|
||||
'thumbnail': thumbnail_soup.get('src') if thumbnail_soup else None,
|
||||
'cover': cover_soup.get('src') if cover_soup else None,
|
||||
'subscribers': soup.find('span', {'class' : 'subscribe-button-count'}).text}
|
||||
return profile
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
@@ -53,3 +53,11 @@ class TelegramSnscrapeScraper(Scraper):
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media
|
||||
)
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
scr = snscrape.modules.telegram.TelegramChannelScraper(
|
||||
channel.screenname)
|
||||
|
||||
profile = scr._get_entity().__dict__
|
||||
return profile
|
||||
@@ -8,6 +8,7 @@ import time
|
||||
|
||||
from loguru import logger
|
||||
from telethon.sync import TelegramClient
|
||||
from telethon.tl.functions.channels import GetFullChannelRequest
|
||||
from telethon.tl import types
|
||||
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
@@ -138,3 +139,17 @@ class TelegramTelethonScraper(Scraper):
|
||||
raw_data=json.dumps(post.to_dict(), default=str),
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
api_id = os.environ['TELEGRAM_API_ID']
|
||||
api_hash = os.environ['TELEGRAM_API_HASH']
|
||||
phone = os.environ['TELEGRAM_PHONE']
|
||||
|
||||
with TelegramClient(phone, api_id, api_hash) as client:
|
||||
full_channel = client(GetFullChannelRequest(channel = username))
|
||||
profile = full_channel.__dict__
|
||||
|
||||
return profile
|
||||
|
||||
@@ -2,11 +2,11 @@ from datetime import datetime, timezone
|
||||
from typing import Generator
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
from snscrape.modules.twitter import TwitterProfileScraper, Video, Gif, Photo
|
||||
from snscrape.modules.twitter import TwitterProfileScraper, TwitterUserScraper, Video, Gif, Photo
|
||||
from loguru import logger
|
||||
|
||||
from cisticola.base import Channel, ScraperResult
|
||||
from cisticola.scraper.base import Scraper
|
||||
from cisticola.scraper.base import Scraper, ChannelDoesNotExistError
|
||||
|
||||
class TwitterScraper(Scraper):
|
||||
"""An implementation of a Scraper for Twitter, using snscrape library"""
|
||||
@@ -67,7 +67,8 @@ class TwitterScraper(Scraper):
|
||||
date=tweet.date,
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=tweet.json(),
|
||||
archived_urls=archived_urls)
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Twitter" and channel.platform_id:
|
||||
@@ -88,4 +89,14 @@ class TwitterScraper(Scraper):
|
||||
ext = ''
|
||||
|
||||
key = parsed_url.path.split('/')[-1] + ext
|
||||
return key
|
||||
return key
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
scraper = TwitterUserScraper(channel.screenname)
|
||||
entity = scraper._get_entity()
|
||||
|
||||
if entity is None:
|
||||
raise ChannelDoesNotExistError(channel.url)
|
||||
else:
|
||||
return entity.__dict__
|
||||
@@ -25,7 +25,7 @@ class VkontakteScraper(Scraper):
|
||||
first = True
|
||||
|
||||
for post in scraper.get_items():
|
||||
if since is not None and post.date.replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
|
||||
if since is not None and datetime.fromordinal(post.date.toordinal()).replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
|
||||
# with VKontakteUserScraper, the first tweet could be an old pinned tweet
|
||||
if first:
|
||||
first = False
|
||||
@@ -63,7 +63,8 @@ class VkontakteScraper(Scraper):
|
||||
date=datetime.fromordinal(post.date.toordinal()).replace(tzinfo=timezone.utc),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=post.json(),
|
||||
archived_urls=archived_urls)
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Vkontakte" and channel.platform_id:
|
||||
@@ -77,4 +78,12 @@ class VkontakteScraper(Scraper):
|
||||
ext = '.mp4'
|
||||
key = path.split('/')[-1] + ext
|
||||
|
||||
return key
|
||||
return key
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
scraper = VKontakteUserScraper(username)
|
||||
|
||||
profile = scraper._get_entity().__dict__
|
||||
return profile
|
||||
@@ -72,8 +72,24 @@ class YoutubeScraper(Scraper):
|
||||
date=datetime.strptime(video['upload_date'], '%Y%m%d').replace(tzinfo=timezone.utc),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(video, default = str),
|
||||
archived_urls=archived_urls)
|
||||
archived_urls=archived_urls,
|
||||
media_archived=archive_media)
|
||||
|
||||
def can_handle(self, channel):
|
||||
if channel.platform == "Youtube" and channel.url:
|
||||
return True
|
||||
return True
|
||||
|
||||
def get_profile(self, channel: Channel) -> dict:
|
||||
|
||||
ydl_opts = {}
|
||||
ydl = yt_dlp.YoutubeDL(ydl_opts)
|
||||
|
||||
meta = None
|
||||
try:
|
||||
meta = ydl.extract_info(
|
||||
channel.url,
|
||||
process=False)
|
||||
except yt_dlp.utils.DownloadError as e:
|
||||
raise e
|
||||
|
||||
return meta
|
||||
@@ -75,12 +75,18 @@ For developers, if changes are made to the package structure or additional modul
|
||||
Testing
|
||||
-------
|
||||
|
||||
The *cisticola* application uses pytest_ for unit testing. To run the test suite, run the following command from the package root directory:
|
||||
The *cisticola* application uses pytest_ for unit testing. To run the full test suite, run the following command from the package root directory:
|
||||
|
||||
.. code-block::
|
||||
|
||||
pipenv run pytest
|
||||
|
||||
To run the test suite without archiving media (which can take a long time), run the following command from the package root directory:
|
||||
|
||||
.. code-block::
|
||||
|
||||
pipenv run pytest -m "not media"
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
|
||||
@@ -11,6 +11,11 @@ addopts =
|
||||
--cov-report html:reports/coverage
|
||||
--html='reports/tests.html'
|
||||
--self-contained-html
|
||||
markers =
|
||||
profile: marks tests for only extracting channel metadata (deselect with '-m
|
||||
"not profile"')
|
||||
media: marks tests for archiving all media attachments (deselect with '-m
|
||||
"not media"')
|
||||
filterwarnings =
|
||||
ignore:the imp module is deprecated:DeprecationWarning
|
||||
ignore:The localize method is no longer necessary, as this time zone supports the fold attribute
|
||||
|
||||
@@ -33,6 +33,19 @@ GAB_CHANNEL_KWARGS = {
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
GAB_GROUP_KWARGS = {
|
||||
'name': 'iran group (test)',
|
||||
'platform_id': 10001,
|
||||
'category': 'test',
|
||||
'platform': 'Gab',
|
||||
'url': 'https://gab.com/groups/10001',
|
||||
'screenname': 'iran group',
|
||||
'country': 'IR',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': True,
|
||||
'notes': ''}
|
||||
|
||||
GETTR_CHANNEL_KWARGS = {
|
||||
'name': 'LizardRepublic (test)',
|
||||
'platform_id': 'lizardrepublic',
|
||||
@@ -178,6 +191,7 @@ def channel_kwargs():
|
||||
return {
|
||||
'bitchute' : BITCHUTE_CHANNEL_KWARGS,
|
||||
'gab' : GAB_CHANNEL_KWARGS,
|
||||
'gab_group' : GAB_GROUP_KWARGS,
|
||||
'gettr' : GETTR_CHANNEL_KWARGS,
|
||||
'instagram' : INSTAGRAM_CHANNEL_KWARGS,
|
||||
'odysee' : ODYSEE_CHANNEL_KWARGS,
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import BitchuteScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_bitchute_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = BitchuteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_bitchute_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_bitchute_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['bitchute'])]
|
||||
controller.register_scraper(scraper = BitchuteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_bitchute_profile(channel_kwargs):
|
||||
|
||||
scraper = BitchuteScraper()
|
||||
channel = Channel(**channel_kwargs['bitchute'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import GabScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_gab_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_gab_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,32 @@ def test_scrape_gab_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['gab'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_gab_profile(channel_kwargs):
|
||||
|
||||
scraper = GabScraper()
|
||||
channel = Channel(**channel_kwargs['gab'])
|
||||
scraper.get_profile(channel=channel)
|
||||
|
||||
def test_scrape_gab_group_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab_group'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_gab_group(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab_group'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_gab_group_profile(channel_kwargs):
|
||||
|
||||
scraper = GabScraper()
|
||||
channel = Channel(**channel_kwargs['gab_group'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import GettrScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_gettr_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = GettrScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_gettr_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_gettr_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['gettr'])]
|
||||
controller.register_scraper(scraper = GettrScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_gettr_profile(channel_kwargs):
|
||||
|
||||
scraper = GettrScraper()
|
||||
channel = Channel(**channel_kwargs['gettr'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import InstagramScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_instagram_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = InstagramScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_instagram_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_instagram_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['instagram'])]
|
||||
controller.register_scraper(scraper = InstagramScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_instagram_profile(channel_kwargs):
|
||||
|
||||
scraper = InstagramScraper()
|
||||
channel = Channel(**channel_kwargs['instagram'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import OdyseeScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_odysee_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = OdyseeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_odysee_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_odysee_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['odysee'])]
|
||||
controller.register_scraper(scraper = OdyseeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_odysee_profile(channel_kwargs):
|
||||
|
||||
scraper = OdyseeScraper()
|
||||
channel = Channel(**channel_kwargs['odysee'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import RumbleScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_rumble_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = RumbleScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_rumble_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_rumble_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['rumble'])]
|
||||
controller.register_scraper(scraper = RumbleScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_rumble_profile(channel_kwargs):
|
||||
|
||||
scraper = RumbleScraper()
|
||||
channel = Channel(**channel_kwargs['rumble'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TelegramSnscrapeScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = TelegramSnscrapeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramSnscrapeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_telegram_snscrape_profile(channel_kwargs):
|
||||
|
||||
scraper = TelegramSnscrapeScraper()
|
||||
channel = Channel(**channel_kwargs['telegram'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TelegramTelethonScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = TelegramTelethonScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramTelethonScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_telegram_telethon_profile(channel_kwargs):
|
||||
|
||||
scraper = TelegramTelethonScraper()
|
||||
channel = Channel(**channel_kwargs['telegram'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TwitterScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_twitter_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = TwitterScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_twitter_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_twitter_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['twitter'])]
|
||||
controller.register_scraper(scraper = TwitterScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_twitter_profile(channel_kwargs):
|
||||
|
||||
scraper = TwitterScraper()
|
||||
channel = Channel(**channel_kwargs['twitter'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import VkontakteScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_vkontakte_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = VkontakteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_vkontakte_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_vkontakte_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['vkontakte'])]
|
||||
controller.register_scraper(scraper = VkontakteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_vkontakte_profile(channel_kwargs):
|
||||
|
||||
scraper = VkontakteScraper()
|
||||
channel = Channel(**channel_kwargs['vkontakte'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import YoutubeScraper
|
||||
|
||||
@@ -7,6 +9,7 @@ def test_scrape_youtube_channel_no_media(controller, channel_kwargs):
|
||||
controller.register_scraper(scraper = YoutubeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_youtube_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
@@ -14,3 +17,10 @@ def test_scrape_youtube_channel(controller, channel_kwargs):
|
||||
channels = [Channel(**channel_kwargs['youtube'])]
|
||||
controller.register_scraper(scraper = YoutubeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_youtube_profile(channel_kwargs):
|
||||
|
||||
scraper = YoutubeScraper()
|
||||
channel = Channel(**channel_kwargs['youtube'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,11 +1,14 @@
|
||||
from sqlalchemy.orm import sessionmaker, with_polymorphic
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TwitterScraper
|
||||
from cisticola.transformer import TwitterTransformer
|
||||
from cisticola.base import Post, Media
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_etl_twitter(engine, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user