Merge pull request #23 from bellingcat/profile

Added methods for retrieving channel profile metadata, refactored Gab scraper to use gabber
This commit is contained in:
Logan Williams
2022-03-31 08:13:17 +02:00
committed by GitHub
30 changed files with 815 additions and 246 deletions

View File

@@ -14,7 +14,6 @@ boto3 = "*"
snscrape = {git = "https://github.com/bellingcat/snscrape.git"}
ffmpeg-python = "*"
polyphemus = {git = "https://github.com/bellingcat/polyphemus.git"}
garc = "*"
yt-dlp = "*"
telethon = "*"
pytesseract = "*"
@@ -22,6 +21,7 @@ pyexiftool = {git = "https://github.com/smarnach/pyexiftool.git"}
instaloader = "*"
gspread = "*"
cryptg = "*"
gabber = {git = "https://github.com/stanfordio/gabber.git"}
[dev-packages]
pytest = "*"

526
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "e3b96b0ac8c80d4817f9adac4ab171bf4b7e07e80927c7b152a24e8bbdbf7faa"
"sha256": "b712e767d64e54e83e8c2d8a27a68203583ed7ad31d4ea3b4b6076a72a2150fd"
},
"pipfile-spec": 6,
"requires": {
@@ -16,14 +16,6 @@
]
},
"default": {
"attrs": {
"hashes": [
"sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4",
"sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==21.4.0"
},
"beautifulsoup4": {
"hashes": [
"sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf",
@@ -34,19 +26,19 @@
},
"boto3": {
"hashes": [
"sha256:76d5b90400c54b25278150768e946edf166acce2c1597c0ecfbebb1dbe9acf2c",
"sha256:7bb2e6506a6ad44d111dd20a5d510374b6958fe989b4ef887109c79d812f926f"
"sha256:127ebdf58c8825b53f1eff111e08c49ffffeb1f6d7a5665c9907ce8128fe14b1",
"sha256:b7ce3bf013f0f60e40c2676d5a7b620ed927cfad0aa348a606b10e9a0387f249"
],
"index": "pypi",
"version": "==1.21.19"
"version": "==1.21.29"
},
"botocore": {
"hashes": [
"sha256:5ed2be0e413961134f4c17eab16396d41a5b4b73a637588260c04d20806d52ea",
"sha256:d0d77bce152ca51f3c2cd0f9bf05cb3b623e719406ad58b4c20444e237fe82eb"
"sha256:b467d64cd773dc4d49ef31b18a8dded554f284f799720bd12e989fe2138fd5b8",
"sha256:de87907d42682179946ddfa113b9334e3c4258404aef19edd8c92381ff54775c"
],
"markers": "python_version >= '3.6'",
"version": "==1.24.19"
"version": "==1.24.29"
},
"brotli": {
"hashes": [
@@ -123,6 +115,14 @@
"index": "pypi",
"version": "==0.0.1"
},
"cachetools": {
"hashes": [
"sha256:486471dfa8799eb7ec503a8059e263db000cdda20075ce5e48903087f79d5fd6",
"sha256:8fecd4203a38af17928be7b90689d8083603073622229ca7077b72d8e5a976e4"
],
"markers": "python_version ~= '3.7'",
"version": "==5.0.0"
},
"certifi": {
"hashes": [
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
@@ -130,6 +130,61 @@
],
"version": "==2021.10.8"
},
"cffi": {
"hashes": [
"sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3",
"sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2",
"sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636",
"sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20",
"sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728",
"sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27",
"sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66",
"sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443",
"sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0",
"sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7",
"sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39",
"sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605",
"sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a",
"sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37",
"sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029",
"sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139",
"sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc",
"sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df",
"sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14",
"sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880",
"sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2",
"sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a",
"sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e",
"sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474",
"sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024",
"sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8",
"sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0",
"sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e",
"sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a",
"sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e",
"sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032",
"sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6",
"sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e",
"sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b",
"sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e",
"sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954",
"sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962",
"sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c",
"sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4",
"sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55",
"sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962",
"sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023",
"sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c",
"sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6",
"sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8",
"sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382",
"sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7",
"sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc",
"sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997",
"sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"
],
"version": "==1.15.0"
},
"charset-normalizer": {
"hashes": [
"sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597",
@@ -140,19 +195,59 @@
},
"click": {
"hashes": [
"sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1",
"sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"
"sha256:19a4baa64da924c5e0cd889aba8e947f280309f1a2ce0947a3e3a7bcb7cc72d6",
"sha256:977c213473c7665d3aa092b41ff12063227751c41d7b17165013e10069cc5cd2"
],
"markers": "python_version >= '3.6'",
"version": "==8.0.4"
"markers": "python_version >= '3.7'",
"version": "==8.1.0"
},
"cryptg": {
"hashes": [
"sha256:02b31622a75a49a5dcd25e589c85faae54575f018e055bd21a17df97c8bb9095",
"sha256:0da1b367056e57a5c01d22608da0cd50e597b917c1b2d9631767aa3c0640a99a",
"sha256:135688c6fbda90748924c2cb047f63785ebf4397d81acc4a05357950653c5096",
"sha256:1fb6c6d4561a54406593197c1f5f23662ab320f4af4ab11834e1583e9d27a49a",
"sha256:2516557e89803637fa7342de43dbcc5f84bf68ae05b1064a354a62d423447d9f",
"sha256:29001dafd3d6a054365222b1f89b12876723c89cdd10aa0e5885a05dfd034eeb",
"sha256:2cc8115960e49a038091ffb2d09de59e0acbdc76de10d7d415b7671a06bae0a9",
"sha256:2cd8224eb64af756f45cdceab16d048494313db8acec1e38d75d97716082267b",
"sha256:307bf96a6ac9c87b44531d8da5fe3a6c5d856e1dc69b68136ef9c4fb66ad17ac",
"sha256:31cf7682de69022c9a77739cdcf7116b06522b128b9b51c7593f277f38c38dbf",
"sha256:3bc2f372dec3a7753c0c0d72c69fcbe44af5473f870a3406978e07e8560a1aa6",
"sha256:46960979542155c9d903656a3a39770061b09a3691a23296f06dc168fe4ff962",
"sha256:47ad5916be4558f4d674c12800e8d9663ce938b0046f19cdc869ba3a7ca280ec",
"sha256:5faed49d972c7f44ce4d6fa1a64169c85a11209fa1fbe1c8a333fb1454888725",
"sha256:695636cca0ee938bd7113658ee60bfaf89afa19708c40ecae5f4a222c2ec544a",
"sha256:6c5d66975fc59adca203fa91e2a104240457114468162d30e9213661239ac1d6",
"sha256:72a5485ece10a70160170ceb658b1836db82dccab08a1f7029c54d81cf6b1d43",
"sha256:7fc8e1893775c6f53dceda1959f19833cc27a67a80492c10e2415dc601b36650",
"sha256:890584db41c8e1e046ae40dee0074614470d36ebd6b7e57bb91303300066601f",
"sha256:a1fb178702730b59267f1e6c6dfe16c7bb9c1350cee4183221982ad2dba4e7f5",
"sha256:a4de1730ca56aa8a945f176c25586901ed5e9f15ffb70c6459eedf466eb6299b",
"sha256:b6352555e47f389ed502269bdb537233d0a928b12d9f4caa57e8c707151acd30",
"sha256:b8896394b72ff7dbf38072ad4c2cd59abdd9e388bb55e1c369102beb8e569f9d",
"sha256:bbd05b52d09e78bdc595f229c0481f4f2e1daf3959847322a6b2c1f76119305f",
"sha256:bf00943924cddb0838f8a65f5aae31f6fe2ad64a5d7e6f10a6b900b3f01b0ae0",
"sha256:bf15aae0fa01aeec728ab16b920cf4c6b2793099c71f62f30ff100d6fe8c9859",
"sha256:c09a5b14494532fc3226f5c5f57ef2a651c935ed6a1d2d0f9eff110046725524",
"sha256:c4812802ce4cd6f08189ce0fa8b79e9a96ac941e69e6b3032bb6908baefde2ba",
"sha256:c69c1e19884108e508697919de0cd43e2ca4e9af418962aa235273b3c51a0e37",
"sha256:ce08c04ebb06ce1ac417597c1bb514a3c1b36cf5c286b8c60f23df2e65703bf3",
"sha256:e29b0d944176cf88fe52d1c58f46017b5bddc9cc54ec0fc6fac20043febefc32",
"sha256:e48ab84e0ed364436d5e449c59762c5963f08ad87f6508f4cb7644745b5559a8",
"sha256:eff15f0a1eee678dd9ec747b58ce86edb78b608036ac4e02d8349f5f35202495",
"sha256:fdd62c2be23eeabb9ebd2ad41bf153f5ec48b968885ef14e676515407cd56339"
],
"index": "pypi",
"version": "==0.2.post4"
},
"dateparser": {
"hashes": [
"sha256:faa2b97f51f3b5ff1ba2f17be90de2b733fb6191f89b4058787473e8202f3044",
"sha256:fec344db1f73d005182e214c0ff27313c748bbe0c1638ce9d48a809ddfdab2a0"
"sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9",
"sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628"
],
"index": "pypi",
"version": "==1.1.0"
"version": "==1.1.1"
},
"ffmpeg-python": {
"hashes": [
@@ -177,12 +272,9 @@
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==0.18.2"
},
"garc": {
"hashes": [
"sha256:6f1da8ccdb30b165b8d9247314b73d1002f60381480e61fdbf108dc9abf3c216"
],
"index": "pypi",
"version": "==2.1"
"gabber": {
"git": "https://github.com/stanfordio/gabber.git",
"ref": "d80c44c488ad4e087ba4c8f033802fe2071843bd"
},
"gogettr": {
"hashes": [
@@ -192,66 +284,89 @@
"index": "pypi",
"version": "==0.8.0"
},
"google-auth": {
"hashes": [
"sha256:3ba4d63cb29c1e6d5ffcc1c0623c03cf02ede6240a072f213084749574e691ab",
"sha256:60d449f8142c742db760f4c0be39121bc8d9be855555d784c252deaca1ced3f5"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==2.6.2"
},
"google-auth-oauthlib": {
"hashes": [
"sha256:24f67735513c4c7134dbde2f1dee5a1deb6acc8dfcb577d7bff30d213a28e7b0",
"sha256:30596b824fc6808fdaca2f048e4998cc40fb4b3599eaea66d28dc7085b36c5b8"
],
"markers": "python_version >= '3.6'",
"version": "==0.5.1"
},
"greenlet": {
"hashes": [
"sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3",
"sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711",
"sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd",
"sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073",
"sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708",
"sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67",
"sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23",
"sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1",
"sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08",
"sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd",
"sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2",
"sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa",
"sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8",
"sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40",
"sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab",
"sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6",
"sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc",
"sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b",
"sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e",
"sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963",
"sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3",
"sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d",
"sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d",
"sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe",
"sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28",
"sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3",
"sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e",
"sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c",
"sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d",
"sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0",
"sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497",
"sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee",
"sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713",
"sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58",
"sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a",
"sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06",
"sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88",
"sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965",
"sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f",
"sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4",
"sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5",
"sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c",
"sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a",
"sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1",
"sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43",
"sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627",
"sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b",
"sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168",
"sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d",
"sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5",
"sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478",
"sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf",
"sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce",
"sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c",
"sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"
"sha256:004aed447382d80a56ecc354a6d807f305e6c808714ce6ccbca4839c94fae81d",
"sha256:068d68fad6bd623e29a2d36e74538c9b9d6dc6464931cd27d93da6cfc6a7f242",
"sha256:06fd4075754009c9817c6b4e1dc0af4616de52757b6ca973a81c3c1aadc28257",
"sha256:1004cb542451814b12a4f38e835a47734e2b2c683acbf463d5ae76282a3974cf",
"sha256:10c358633a8b27bfc32d27114ef2ca2ddc9f1f89f1643d1157b85e1fdd695315",
"sha256:115bc25fefbdc692c4483e9ddb9011ccd0251590ed59dbfff0f4eb7050bf99c4",
"sha256:1d987a2579336792f73ae6b106c2f087e32afc8573fbf9566f123ac6d8cfb72f",
"sha256:2128d727fd1e8afba8e68feb2cdcf88c90163b69ddc9707722a3e491c5280720",
"sha256:230132c241fe284f93f2e7b3969e9b22bbd76ef98cf93e382c945d378907f5a4",
"sha256:23558f7bd08a663386c032ab8d302d613d2d02ae0c9758ad410bab6035b58d3d",
"sha256:255d520d3e4a5f16883b182e1a94219fe455ab4f50aaaf534bfd6d64ee728397",
"sha256:2a6bc19a728f6f643cfc89b876159a1a25a8f7d8700c013d48a73691f80b4550",
"sha256:379bed346ef8ba0a0e698b3c5975a44d15dd4a5bbff40bbd7fd548b445d5550b",
"sha256:3b12d0866759db93b0a893b4e50a7d7d1681519d2346c26695bb8bb2c652230e",
"sha256:40d491944f69e350e1e8b25f6ca49459824ede1678ec0cd4b5541f41edc06614",
"sha256:471484c7b9d7b7867263051aa81cdeed6e06b455e629a7f05eb91a6cb8bd0836",
"sha256:488c557080557bc01aabb3e1bda7225c68455b853733a8652857ac0d810dad1b",
"sha256:49c2e76e7aa81ba889b3c183e2341af3cc6161ee38852085110ae49d5b5d9a40",
"sha256:52d13ec90236e5935ed6da044e78faa1371d5116cc43fe6d7ca8994dd619ef96",
"sha256:57898c69a253d81f487787bdd538629fabd671fab8a9e31b041ca30965fd9556",
"sha256:5d577eef5beb5730ef01ab39983eb852a97c359b7a546809adf70c409f4b2ecc",
"sha256:6a41987c1474c9158a0c0c96611530a8f299bc547d35bee8add981b8b2534f74",
"sha256:6ae67b7df8db3626af8e042e9c6949cfa27d1a3bbbfdff29e45b72bb6673a650",
"sha256:6c42c27e9d12e8a481aff469ffe8dd4ce0484c354a418470960f760f6ae41e7c",
"sha256:6c4a90c9f6128b4d0905a89930bd325e0491574e5cb453f606bb7094a3197587",
"sha256:6e64518e5833ac2d9359b6d9bd4df2c0cf441a0f3a4eca9e735fbea99009fa70",
"sha256:6fd3a270c23c5b42d86a9c7c6b0229f23ee4a7a4cabdaaa1693ad7a0982d13cb",
"sha256:70db73351e0fcf11a76288c47a0469d9a330bcb2e7618c5eb57432b8caa82403",
"sha256:771f401692046845626cbdf1dd0f04e999413ede0ee9ad39033fe30b5fa2e845",
"sha256:7935026ec61b967cbc6b746c0ca75c1651ea118d7fee4d259cff9e6866153374",
"sha256:7b76b1cac9baac1980210e29145800954e7b42e91ef69c4d695de1cab87ce41f",
"sha256:7e3f37c11b6699b1a1e0fcc0e88829dba4f2866546381b05ab8b3f4db645a823",
"sha256:8370fa65ad421484894f559055f951843754153b72b9bca2ebdc5288efe2e3f0",
"sha256:8ae9c443d44a4e23252632e4d7775f419f992d0df3eff923e23775f5cc551d39",
"sha256:8b31d85f2781e44f1ffaaf7ea07f484e7d42317c677c355fa77b4a1a4bea7394",
"sha256:8b450336b27f3b375cadc474c6704838eaa8dd3ca312aac3bb69d92264a8e638",
"sha256:9ce84357388a76d886febff4e50e321c212ffd3248b590960b2da6e02404a5c9",
"sha256:a23e986fb0ba8e7407286add41fa0d4207be44e3dce1b04789f4757800eca1cf",
"sha256:a81610ee00d0da9cd2c8679479b7791149365b6dfb3971b01b22ee29b04787ce",
"sha256:b4e40444975e5ab0ed3004369209c39a28e084951daaeee4919f164b6b849b14",
"sha256:b66600de16702b9dfa74bea34524b55183a2183e5fd92f20fe6c2fcae550a64c",
"sha256:ba6ee18694d3673796b7a31b7d21254e87e9e43ca5be56f323fd396111255315",
"sha256:bd03837da28293baa39bdfc3cada69e2f8807f423ae06168aa28d2b32c63a6b6",
"sha256:bd2192070f88c0778ae1d68a0980fdece3473498c1db37f3794e3454f91e3ecf",
"sha256:c1f6f1a3cc013012cd1da913c40b13e6d721046a8c8a0ea0cde94069645a75db",
"sha256:ce10a8e7e067bde3c1fbf494d2b8859db510206030b0b67bc3af90b0eb1887b9",
"sha256:d31386d208303a5a6cf0819ef9f6db6680bab9e4ca8e48adb3d4b26ead89beb7",
"sha256:d83b3af53b201970973c5574b39df226746194063bb248a53fd12b470ac34319",
"sha256:df9657b212c054ac6d803290d7c4bcd7790af0b725984fce1eeb0a1e3f2d9798",
"sha256:e576e5fd3f129e6b3595dc734ac7f2b8c548f19ef07781194bc538dc9c0cdbbc",
"sha256:e7400358558094c1bcedc75f3b3c4f400c53130b44833848890a99968dee6a64",
"sha256:eb6a385f8577d30e4cb43dd555fb134ddaae1edeb84205e09dabec332bf49fd0",
"sha256:f27f0875e0873f6bf5df09a456bfcac0667824cabac4cad30b43f36e0382ffe7",
"sha256:fcd4a6d04995f1d66bc78b503e4e59ae72fd32aaec4f661657fe5ae5c1aa4ce3"
],
"markers": "python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))",
"version": "==1.1.2"
"version": "==2.0.0a2"
},
"gspread": {
"hashes": [
"sha256:a347197628fa1885dcc860701fb1b3f5471386aa863a71cfe232b6473c6fea1b",
"sha256:be2220e19723570ed98e8b8eb6a5b6e04afa0f08ec1f08b89e217c354488a047"
],
"index": "pypi",
"version": "==5.3.0"
},
"idna": {
"hashes": [
@@ -261,35 +376,28 @@
"markers": "python_version >= '3'",
"version": "==3.3"
},
"iniconfig": {
"hashes": [
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
"sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
],
"version": "==1.1.1"
},
"instaloader": {
"hashes": [
"sha256:9615a12a5a01a8b6c9d99a2a047b21d81b341cfd77656b9261bda30ece0cd562"
"sha256:7fa6147810eedcc1dedcdec8cfa1f220c9379ab8faeab6a336a7c181d944e2e4"
],
"index": "pypi",
"version": "==4.8.4"
"version": "==4.9"
},
"jmespath": {
"hashes": [
"sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9",
"sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"
"sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e",
"sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04"
],
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==0.10.0"
"markers": "python_version >= '3.7'",
"version": "==1.0.0"
},
"loguru": {
"hashes": [
"sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c",
"sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"
"sha256:b28e72ac7a98be3d28ad28570299a393dfcd32e5e3f6a353dec94675767b6319",
"sha256:f8087ac396b5ee5f67c963b495d615ebbceac2796379599820e324419d53667c"
],
"index": "pypi",
"version": "==0.6.0"
"version": "==0.5.3"
},
"lxml": {
"hashes": [
@@ -392,6 +500,14 @@
"markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'",
"version": "==1.22.3"
},
"oauthlib": {
"hashes": [
"sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2",
"sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"
],
"markers": "python_version >= '3.6'",
"version": "==3.2.0"
},
"packaging": {
"hashes": [
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
@@ -468,25 +584,9 @@
"markers": "python_version >= '3.7'",
"version": "==9.0.1"
},
"pluggy": {
"hashes": [
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
"sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"
],
"markers": "python_version >= '3.6'",
"version": "==1.0.0"
},
"polyphemus": {
"git": "https://github.com/bellingcat/polyphemus.git",
"ref": "c85dea215ae720e3df71d2ed1aaa82f7b8a6a2ed"
},
"py": {
"hashes": [
"sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719",
"sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==1.11.0"
"ref": "00a5123a3768a55ffe29f2c803a4181895f17890"
},
"pyaes": {
"hashes": [
@@ -512,6 +612,31 @@
],
"version": "==0.4.8"
},
"pyasn1-modules": {
"hashes": [
"sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8",
"sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199",
"sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811",
"sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed",
"sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4",
"sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e",
"sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74",
"sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb",
"sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45",
"sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd",
"sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0",
"sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d",
"sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405"
],
"version": "==0.2.8"
},
"pycparser": {
"hashes": [
"sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9",
"sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"
],
"version": "==2.21"
},
"pycryptodomex": {
"hashes": [
"sha256:1ca8e1b4c62038bb2da55451385246f51f412c5f5eabd64812c01766a5989b4a",
@@ -573,14 +698,6 @@
"index": "pypi",
"version": "==0.3.9"
},
"pytest": {
"hashes": [
"sha256:b555252a95bbb2a37a97b5ac2eb050c436f7989993565f5e0c9128fcaacadd0e",
"sha256:f1089d218cfcc63a212c42896f1b7fbf096874d045e1988186861a1a87d27b47"
],
"markers": "python_version >= '3.7'",
"version": "==7.1.0"
},
"python-dateutil": {
"hashes": [
"sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86",
@@ -591,10 +708,10 @@
},
"pytz": {
"hashes": [
"sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
"sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
"sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
],
"version": "==2021.3"
"version": "==2022.1"
},
"pytz-deprecation-shim": {
"hashes": [
@@ -604,6 +721,12 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==0.1.0.post0"
},
"ratelimit": {
"hashes": [
"sha256:af8a9b64b821529aca09ebaf6d8d279100d766f19e90b5059ac6a718ca6dee42"
],
"version": "==2.2.1"
},
"regex": {
"hashes": [
"sha256:0008650041531d0eadecc96a73d37c2dc4821cf51b0766e374cb4f1ddc4e1c14",
@@ -685,9 +808,6 @@
"version": "==2022.3.2"
},
"requests": {
"extras": [
"socks"
],
"hashes": [
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
@@ -695,6 +815,14 @@
"index": "pypi",
"version": "==2.27.1"
},
"requests-oauthlib": {
"hashes": [
"sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5",
"sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.3.1"
},
"rsa": {
"hashes": [
"sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17",
@@ -721,7 +849,7 @@
},
"snscrape": {
"git": "https://github.com/bellingcat/snscrape.git",
"ref": "de4ebed81f3f6a4bb4c65630daab6ec63784959b"
"ref": "fb8d73ac95011b7ad848a6048d3eed1880e80f21"
},
"soupsieve": {
"hashes": [
@@ -780,21 +908,21 @@
"index": "pypi",
"version": "==1.24.0"
},
"tomli": {
"tqdm": {
"hashes": [
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
"sha256:4230a49119a416c88cc47d0d2d32d5d90f1a282d5e497d49801950704e49863d",
"sha256:6461b009d6792008d0000e1b0c7ca50195ec78c0e808a3a6b668a56a3236c3a5"
],
"markers": "python_version >= '3.7'",
"version": "==2.0.1"
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==4.63.1"
},
"tzdata": {
"hashes": [
"sha256:3eee491e22ebfe1e5cfcc97a4137cd70f092ce59144d81f8924a844de05ba8f5",
"sha256:68dbe41afd01b867894bbdfd54fa03f468cfa4f0086bfb4adcd8de8f24f3ee21"
"sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9",
"sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3"
],
"markers": "python_version >= '3.6'",
"version": "==2021.5"
"version": "==2022.1"
},
"tzlocal": {
"hashes": [
@@ -806,11 +934,11 @@
},
"urllib3": {
"hashes": [
"sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
"sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.8"
"version": "==1.26.9"
},
"websockets": {
"hashes": [
@@ -899,6 +1027,35 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.9.1"
},
"black": {
"hashes": [
"sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b",
"sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176",
"sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09",
"sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a",
"sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015",
"sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79",
"sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb",
"sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20",
"sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464",
"sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968",
"sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82",
"sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21",
"sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0",
"sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265",
"sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b",
"sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a",
"sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72",
"sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce",
"sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0",
"sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a",
"sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163",
"sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad",
"sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d"
],
"index": "pypi",
"version": "==22.3.0"
},
"certifi": {
"hashes": [
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
@@ -914,6 +1071,14 @@
"markers": "python_version >= '3'",
"version": "==2.0.12"
},
"click": {
"hashes": [
"sha256:19a4baa64da924c5e0cd889aba8e947f280309f1a2ce0947a3e3a7bcb7cc72d6",
"sha256:977c213473c7665d3aa092b41ff12063227751c41d7b17165013e10069cc5cd2"
],
"markers": "python_version >= '3.7'",
"version": "==8.1.0"
},
"coverage": {
"extras": [
"toml"
@@ -1005,11 +1170,11 @@
},
"jinja2": {
"hashes": [
"sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
"sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
"sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119",
"sha256:640bed4bb501cbd17194b3cace1dc2126f5b619cf068a726b98192a0fde74ae9"
],
"markers": "python_version >= '3.6'",
"version": "==3.0.3"
"markers": "python_version >= '3.7'",
"version": "==3.1.1"
},
"markupsafe": {
"hashes": [
@@ -1057,6 +1222,13 @@
"markers": "python_version >= '3.7'",
"version": "==2.1.1"
},
"mypy-extensions": {
"hashes": [
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
],
"version": "==0.4.3"
},
"packaging": {
"hashes": [
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
@@ -1065,6 +1237,21 @@
"markers": "python_version >= '3.6'",
"version": "==21.3"
},
"pathspec": {
"hashes": [
"sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a",
"sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"
],
"version": "==0.9.0"
},
"platformdirs": {
"hashes": [
"sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d",
"sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227"
],
"markers": "python_version >= '3.7'",
"version": "==2.5.1"
},
"pluggy": {
"hashes": [
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
@@ -1099,11 +1286,11 @@
},
"pytest": {
"hashes": [
"sha256:b555252a95bbb2a37a97b5ac2eb050c436f7989993565f5e0c9128fcaacadd0e",
"sha256:f1089d218cfcc63a212c42896f1b7fbf096874d045e1988186861a1a87d27b47"
"sha256:841132caef6b1ad17a9afde46dc4f6cfa59a05f9555aae5151f73bdf2820ca63",
"sha256:92f723789a8fdd7180b6b06483874feca4c48a5c76968e03bb3e7f806a1869ea"
],
"markers": "python_version >= '3.7'",
"version": "==7.1.0"
"index": "pypi",
"version": "==7.1.1"
},
"pytest-cov": {
"hashes": [
@@ -1123,23 +1310,20 @@
},
"pytest-metadata": {
"hashes": [
"sha256:576055b8336dd4a9006dd2a47615f76f2f8c30ab12b1b1c039d99e834583523f",
"sha256:71b506d49d34e539cc3cfdb7ce2c5f072bea5c953320002c95968e0238f8ecf1"
"sha256:141ba561a17659cda00cf74e7c7cf6103bab4550acad76a46f893339de63b1df",
"sha256:5cdb6aeea8ba9109181cf9f149c8a3ae1430ff7e44506a8f866af8a98ca46301"
],
"index": "pypi",
"version": "==1.11.0"
"version": "==2.0.1"
},
"pytz": {
"hashes": [
"sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
"sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
"sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
],
"version": "==2021.3"
"version": "==2022.1"
},
"requests": {
"extras": [
"socks"
],
"hashes": [
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
@@ -1156,11 +1340,11 @@
},
"sphinx": {
"hashes": [
"sha256:5da895959511473857b6d0200f56865ed62c31e8f82dd338063b84ec022701fe",
"sha256:6caad9786055cb1fa22b4a365c1775816b876f91966481765d7d50e9f0dd35cc"
"sha256:7bf8ca9637a4ee15af412d1a1d9689fec70523a68ca9bb9127c2f3eeb344e2e6",
"sha256:ebf612653238bcc8f4359627a9b7ce44ede6fdd75d9d30f68255c7383d3a6226"
],
"index": "pypi",
"version": "==4.4.0"
"version": "==4.5.0"
},
"sphinx-rtd-theme": {
"hashes": [
@@ -1223,16 +1407,24 @@
"sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
"sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
],
"markers": "python_version >= '3.7'",
"markers": "python_full_version < '3.11.0'",
"version": "==2.0.1"
},
"typing-extensions": {
"hashes": [
"sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42",
"sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"
],
"markers": "python_version < '3.10'",
"version": "==4.1.1"
},
"urllib3": {
"hashes": [
"sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
"sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.8"
"version": "==1.26.9"
},
"zipp": {
"hashes": [

View File

@@ -1,5 +1,5 @@
from cisticola.utils import make_request
from .base import Scraper, ScraperController
from .base import Scraper, ScraperController, ChannelDoesNotExistError
from .bitchute import BitchuteScraper
from .gab import GabScraper
from .gettr import GettrScraper

View File

@@ -412,4 +412,7 @@ class ScraperController:
"""
mapper_registry.metadata.drop_all(bind=self.engine)
self.connect_to_db(self.engine)
self.connect_to_db(self.engine)
class ChannelDoesNotExistError(Exception):
"""The specified channel does not exist or has been deleted."""

View File

@@ -58,12 +58,56 @@ class BitchuteScraper(Scraper):
date=datetime.fromtimestamp(post['timestamp']),
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(post),
archived_urls=archived_urls)
archived_urls=archived_urls,
media_archived=archive_media)
def can_handle(self, channel):
if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None:
return True
def get_profile(self, channel: Channel) -> dict:
base_url = channel.url
session = requests.session()
response = session.get(base_url)
soup = BeautifulSoup(response.content, 'html.parser')
canonical_url = soup.find('link', {'id' : 'canonical'})['href']
csrftoken = session.cookies['csrftoken']
csrfmiddlewaretoken = soup.find('input', {'name' : 'csrfmiddlewaretoken'})['value']
about_soup = soup.find('div', {'id' : 'channel-about'})
info_list = about_soup.find('div', {'class' : 'channel-about-details'}).find_all('p')
description_soup = about_soup.find('div', {'id' : 'channel-description'})
headers = {'Referer': base_url}
data = {
'csrftoken': csrftoken,
'csrfmiddlewaretoken': csrfmiddlewaretoken}
response = session.post(canonical_url + 'counts/', data = data, headers = headers)
counts = json.loads(response.text)
owner_soup = soup.find('p', {'class' : 'owner'})
if owner_soup.text == '[email\xa0protected]':
owner_name = decode_cfemail(owner_soup.find('span', {'class': "__cf_email__"})['data-cfemail'])
else:
owner_name = owner_soup.text
profile = {
'description' : description_soup.text.strip(),
'description_links' : [a['href'] for a in description_soup.find_all('a', href = True)],
'created': re.sub(r'\s', ' ', info_list[0].text.split('Created')[1].strip('. ')),
'videos' : int(info_list[1].text.split('videos')[0].strip()),
'owner_url' : soup.find('p', {'class' : 'owner'}).find('a', href = True)['href'],
'owner_name' : owner_name,
'image' : about_soup.find('img', {'alt' : 'Channel Image'}).get('data-src'),
'subscribers': counts['subscriber_count'],
'views': int(counts['about_view_count'].split(' ')[0])}
return profile
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
def strip_tags(html, convert_newlines=True):
@@ -419,30 +463,20 @@ def get_videos_user(session, user, csrftoken, detail):
# these need to be yielded *after* the video because else the result file will have the comments
# before the video, which is weird
yield comment
#-----------------------------------------------------------------------------#
def get_about(user):
"""
Extract fields from channel's "About" tab
"""
base_url = "https://www.bitchute.com/channel/%s/" % user
def decode_cfemail(cfemail):
response = requests.get(base_url)
soup = BeautifulSoup(response.content, 'html.parser')
"""https://stackoverflow.com/questions/36911296/scraping-of-protected-email
"""
about_soup = soup.find('div', {'id' : 'channel-about'})
info_list = about_soup.find('div', {'class' : 'channel-about-details'}).find_all('p')
description_soup = about_soup.find('div', {'id' : 'channel-description'})
email = ""
k = int(cfemail[:2], 16)
about = {
'description' : description_soup.text,
'description_links' : [a['href'] for a in description_soup.find_all('a', href = True)],
'created': re.sub(r'\s', ' ', info_list[0].text.split('Created')[1].strip('. ')),
'videos' : int(info_list[1].text.split('videos')[0].strip()),
'owner_url' : soup.find('p', {'class' : 'owner'}).find('a', href = True)['href'],
'owner_name' : soup.find('p', {'class' : 'owner'}).text,
'category' : info_list[-1].text.split('Category')[1].strip(),
'image' : about_soup.find('img', {'alt' : 'Channel Image'})['data-src']
}
return about
for i in range(2, len(cfemail)-1, 2):
email += chr(int(cfemail[i:i+2], 16)^k)
return email
#---------------------------------------------------------------------------#

View File

@@ -1,29 +1,53 @@
from datetime import datetime, timezone
from datetime import datetime, timezone, date
import json
from typing import Generator
import os
from garc import Garc
from gabber.client import Client, GAB_API_BASE_URL
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
class GabScraper(Scraper):
"""An implementation of a Scraper for Gab, using GARC library"""
__version__ = "GabScraper 0.0.1"
"""An implementation of a Scraper for Gab, using gabber library"""
__version__ = "GabScraper 0.0.2"
def get_username_from_url(self, url):
username = url.split('https://gab.com/')[-1]
return username
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
client = Garc(profile = 'main')
username = self.get_username_from_url(channel.url)
def get_group_id_from_url(self, url):
group_id = int(url.split('/')[-1])
scraper = client.userposts(username)
return group_id
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
client = Client(
username = os.environ['GAB_USER'],
password = os.environ['GAB_PASS'],
threads = 25)
if channel.url.split('/')[-2] == 'groups':
group_id = self.get_group_id_from_url(url = channel.url)
scraper = client.pull_group_posts(
id = group_id,
depth = float('inf'))
else:
username = self.get_username_from_url(channel.url)
result = client._get(GAB_API_BASE_URL + f"/account_by_username/{username}").json()
user_id = int(result['id'])
scraper = client.pull_statuses(
id = user_id,
created_after = date.min,
replies = False)
for post in scraper:
if since is not None and datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")) <= since.date:
if since is not None and datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc) <= since.date.replace(tzinfo=timezone.utc):
break
media_urls = []
@@ -31,10 +55,18 @@ class GabScraper(Scraper):
if archive_media:
media_urls.extend([p['url'] for p in post['media_attachments']])
if post.get('repost') is not None:
media_urls.extend([p['url'] for p in post['repost']['media_attachments']])
for attachment in post.get('media_attachments'):
if attachment.get('type') == 'video':
media_urls.append(attachment['source_mp4'])
else:
media_urls.append(attachment['url'])
if post.get('reblog') is not None:
for attachment in post['reblog'].get('media_attachments'):
if attachment.get('type') == 'video':
media_urls.append(attachment['source_mp4'])
else:
media_urls.append(attachment['url'])
for url in media_urls:
media_blob, content_type, key = self.url_to_blob(url)
@@ -49,8 +81,29 @@ class GabScraper(Scraper):
date=datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc),
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(post),
archived_urls=archived_urls)
archived_urls=archived_urls,
media_archived=archive_media)
def can_handle(self, channel):
def can_handle(self, channel: Channel) -> bool:
if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None:
return True
return True
def get_profile(self, channel: Channel) -> dict:
client = Client(
username = os.environ['GAB_USER'],
password = os.environ['GAB_PASS'],
threads = 25)
if channel.url.split('/')[-2] == 'groups':
group_id = self.get_group_id_from_url(url = channel.url)
profile = client.pull_group(id = group_id)
else:
username = self.get_username_from_url(channel.url)
profile = client._get(GAB_API_BASE_URL + f"/account_by_username/{username}").json()
return profile

View File

@@ -59,7 +59,8 @@ class GettrScraper(Scraper):
date=datetime.fromtimestamp(post['cdate']/1000.),
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(post),
archived_urls=archived_urls)
archived_urls=archived_urls,
media_archived=archive_media)
def can_handle(self, channel):
if channel.platform == "Gettr" and self.get_username_from_url(channel.url) is not None:
@@ -68,4 +69,11 @@ class GettrScraper(Scraper):
def url_to_key(self, url: str, content_type: str) -> str:
ext = '.' + content_type.split('/')[-1]
key = urlparse(url).path.split('/')[-2] + ext
return key
return key
def get_profile(self, channel: Channel) -> dict:
client = client = PublicClient()
username = self.get_username_from_url(channel.url)
profile = client.user_info(username)
return profile

View File

@@ -80,7 +80,8 @@ class InstagramScraper(Scraper):
date=post.date_utc,
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(post._asdict(), default=str),
archived_urls=archived_urls)
archived_urls=archived_urls,
media_archived=archive_media)
for comment in post.get_comments():
@@ -96,8 +97,32 @@ class InstagramScraper(Scraper):
date=comment.created_at_utc,
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(comment_dict, default=str),
archived_urls={})
archived_urls={},
media_archived=archive_media)
def can_handle(self, channel):
if channel.platform == "Instagram" and self.get_username_from_url(channel.url) is not None:
return True
return True
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
loader = instaloader.Instaloader(
quiet = True,
download_comments = False,
save_metadata = False)
loader.login(
user = os.environ['INSTAGRAM_USERNAME'],
passwd = os.environ['INSTAGRAM_PASSWORD'])
user_profile = instaloader.Profile.from_username(
context = loader.context,
username = username)
profile = user_profile._asdict()
profile['followers'] = user_profile.followers
profile['followees'] = user_profile.followees
return profile

View File

@@ -3,9 +3,11 @@ import json
from typing import Generator
from urllib.parse import urlparse
from polyphemus.base import OdyseeChannel
import requests
from loguru import logger
from polyphemus.base import OdyseeChannel
from polyphemus.api import get_auth_token
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
@@ -13,6 +15,10 @@ class OdyseeScraper(Scraper):
"""An implementation of a Scraper for Odysee, using polyphemus library"""
__version__ = "OdyseeScraper 0.0.1"
def __init__(self):
super().__init__()
self.auth_token = get_auth_token()
def get_username_from_url(self, url):
username = url.split('odysee.com/')[-1].strip('@').split(':')[0]
@@ -22,12 +28,12 @@ class OdyseeScraper(Scraper):
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
username = self.get_username_from_url(channel.url)
odysee_channel = OdyseeChannel(channel_name = username)
odysee_channel = OdyseeChannel(channel_name = username, auth_token = self.auth_token)
all_videos = odysee_channel.get_all_videos()
for video in all_videos:
if since is not None and datetime.fromtimestamp(video['created']) <= since.date:
if since is not None and datetime.fromtimestamp(video.info['created']) <= since.date:
break
archived_urls = {}
@@ -55,7 +61,8 @@ class OdyseeScraper(Scraper):
date=datetime.fromtimestamp(video.info['created']),
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(video.info),
archived_urls=archived_urls)
archived_urls=archived_urls,
media_archived=archive_media)
for comment in all_comments:
@@ -67,7 +74,8 @@ class OdyseeScraper(Scraper):
date=datetime.fromtimestamp(comment.info['created']),
date_archived=datetime.now(),
raw_data=json.dumps(comment.info),
archived_urls={})
archived_urls={},
media_archived=True)
def can_handle(self, channel):
if channel.platform == "Odysee" and self.get_username_from_url(channel.url) is not None:
@@ -77,4 +85,12 @@ class OdyseeScraper(Scraper):
key = urlparse(url).path.split('/')[-2]
ext = content_type.split('/')[-1]
return f'{key}.{ext}'
return f'{key}.{ext}'
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
odysee_channel = OdyseeChannel(channel_name = username, auth_token = self.auth_token)
profile = odysee_channel.info
return profile

View File

@@ -14,18 +14,12 @@ class RumbleScraper(Scraper):
"""An implementation of a Scraper for Rumble, using custom functions"""
__version__ = "RumbleScraper 0.0.1"
def get_username_from_url(self, url):
username = url.split('https://rumble.com/c/')[1]
return username
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
username = self.get_username_from_url(channel.url)
scraper = get_channel_videos(username)
scraper = get_channel_videos(channel.url)
for post in scraper:
if since is not None and datetime.fromtimestamp(post['cdate']*0.001) <= since.date:
if since is not None and post['datetime'].replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
break
archived_urls = {}
@@ -43,10 +37,11 @@ class RumbleScraper(Scraper):
platform="Rumble",
channel=channel.id,
platform_id=post['media_url'].split('/')[-2],
date=datetime.fromisoformat(post['datetime']).replace(tzinfo=timezone.utc),
date=post['datetime'].replace(tzinfo=timezone.utc),
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(post),
archived_urls=archived_urls)
raw_data=json.dumps(post, default = str),
archived_urls=archived_urls,
media_archived=archive_media)
def url_to_key(self, url: str, content_type: str) -> str:
ext = '.' + content_type.split('/')[-1]
@@ -54,15 +49,21 @@ class RumbleScraper(Scraper):
return key
def can_handle(self, channel):
if channel.platform == "Rumble" and self.get_username_from_url(channel.url) is not None:
if channel.platform == "Rumble" and channel.url is not None:
return True
def get_profile(self, channel: Channel) -> dict:
profile = get_channel_profile(url = channel.url)
return profile
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
def get_media_url(url):
r = make_request(url = url)
soup = BeautifulSoup(r.content, features = 'lxml')
soup = BeautifulSoup(r.content, features = 'html.parser')
script = json.loads(''.join(soup.find('script', {'type':'application/ld+json'}).text))
media_url = script[0]['embedUrl']
@@ -84,16 +85,16 @@ def process_video(video):
'views' : video.find('span', {'class' : 'video-item--views'})['data-value'],
'rumbles' : rumbles,
'duration' : video.find('span', {'class' : 'video-item--duration'})['data-value'],
'datetime' : video.find('time')['datetime']}
'datetime' : datetime.fromisoformat(video.find('time')['datetime'])}
info['media_url'] = get_media_url(info['link'])
return info
def get_channel_videos(channel):
def get_channel_videos(url):
page = 1
channel_url = f'{BASE_URL}/c/{channel}?page='
channel_url = f'{url}?page='
while True:
url = channel_url + str(page)
@@ -111,4 +112,22 @@ def get_channel_videos(channel):
page += 1
def get_channel_profile(url):
channel_url = f'{url}'
r = make_request(url = channel_url)
soup = BeautifulSoup(r.content, features = 'lxml')
verified_svg = soup.find('h1').find('svg', {'class' : 'listing-header--verified'})
thumbnail_soup = soup.find('img', {'class' : 'listing-header--thumb'})
cover_soup = soup.find('img', {'class' : 'listing-header--backsplash-img'})
profile = {
'name': soup.find('h1').text,
'verified': verified_svg is not None,
'thumbnail': thumbnail_soup.get('src') if thumbnail_soup else None,
'cover': cover_soup.get('src') if cover_soup else None,
'subscribers': soup.find('span', {'class' : 'subscribe-button-count'}).text}
return profile
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

View File

@@ -53,3 +53,11 @@ class TelegramSnscrapeScraper(Scraper):
archived_urls=archived_urls,
media_archived=archive_media
)
def get_profile(self, channel: Channel) -> dict:
scr = snscrape.modules.telegram.TelegramChannelScraper(
channel.screenname)
profile = scr._get_entity().__dict__
return profile

View File

@@ -8,6 +8,7 @@ import time
from loguru import logger
from telethon.sync import TelegramClient
from telethon.tl.functions.channels import GetFullChannelRequest
from telethon.tl import types
from cisticola.base import Channel, ScraperResult
@@ -138,3 +139,17 @@ class TelegramTelethonScraper(Scraper):
raw_data=json.dumps(post.to_dict(), default=str),
archived_urls=archived_urls,
media_archived=archive_media)
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
api_id = os.environ['TELEGRAM_API_ID']
api_hash = os.environ['TELEGRAM_API_HASH']
phone = os.environ['TELEGRAM_PHONE']
with TelegramClient(phone, api_id, api_hash) as client:
full_channel = client(GetFullChannelRequest(channel = username))
profile = full_channel.__dict__
return profile

View File

@@ -2,11 +2,11 @@ from datetime import datetime, timezone
from typing import Generator
from urllib.parse import urlparse, parse_qs
from snscrape.modules.twitter import TwitterProfileScraper, Video, Gif, Photo
from snscrape.modules.twitter import TwitterProfileScraper, TwitterUserScraper, Video, Gif, Photo
from loguru import logger
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
from cisticola.scraper.base import Scraper, ChannelDoesNotExistError
class TwitterScraper(Scraper):
"""An implementation of a Scraper for Twitter, using snscrape library"""
@@ -67,7 +67,8 @@ class TwitterScraper(Scraper):
date=tweet.date,
date_archived=datetime.now(timezone.utc),
raw_data=tweet.json(),
archived_urls=archived_urls)
archived_urls=archived_urls,
media_archived=archive_media)
def can_handle(self, channel):
if channel.platform == "Twitter" and channel.platform_id:
@@ -88,4 +89,14 @@ class TwitterScraper(Scraper):
ext = ''
key = parsed_url.path.split('/')[-1] + ext
return key
return key
def get_profile(self, channel: Channel) -> dict:
scraper = TwitterUserScraper(channel.screenname)
entity = scraper._get_entity()
if entity is None:
raise ChannelDoesNotExistError(channel.url)
else:
return entity.__dict__

View File

@@ -25,7 +25,7 @@ class VkontakteScraper(Scraper):
first = True
for post in scraper.get_items():
if since is not None and post.date.replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
if since is not None and datetime.fromordinal(post.date.toordinal()).replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
# with VKontakteUserScraper, the first tweet could be an old pinned tweet
if first:
first = False
@@ -63,7 +63,8 @@ class VkontakteScraper(Scraper):
date=datetime.fromordinal(post.date.toordinal()).replace(tzinfo=timezone.utc),
date_archived=datetime.now(timezone.utc),
raw_data=post.json(),
archived_urls=archived_urls)
archived_urls=archived_urls,
media_archived=archive_media)
def can_handle(self, channel):
if channel.platform == "Vkontakte" and channel.platform_id:
@@ -77,4 +78,12 @@ class VkontakteScraper(Scraper):
ext = '.mp4'
key = path.split('/')[-1] + ext
return key
return key
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
scraper = VKontakteUserScraper(username)
profile = scraper._get_entity().__dict__
return profile

View File

@@ -72,8 +72,24 @@ class YoutubeScraper(Scraper):
date=datetime.strptime(video['upload_date'], '%Y%m%d').replace(tzinfo=timezone.utc),
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(video, default = str),
archived_urls=archived_urls)
archived_urls=archived_urls,
media_archived=archive_media)
def can_handle(self, channel):
if channel.platform == "Youtube" and channel.url:
return True
return True
def get_profile(self, channel: Channel) -> dict:
ydl_opts = {}
ydl = yt_dlp.YoutubeDL(ydl_opts)
meta = None
try:
meta = ydl.extract_info(
channel.url,
process=False)
except yt_dlp.utils.DownloadError as e:
raise e
return meta

View File

@@ -75,12 +75,18 @@ For developers, if changes are made to the package structure or additional modul
Testing
-------
The *cisticola* application uses pytest_ for unit testing. To run the test suite, run the following command from the package root directory:
The *cisticola* application uses pytest_ for unit testing. To run the full test suite, run the following command from the package root directory:
.. code-block::
pipenv run pytest
To run the test suite without archiving media (which can take a long time), run the following command from the package root directory:
.. code-block::
pipenv run pytest -m "not media"
Examples
--------

View File

@@ -11,6 +11,11 @@ addopts =
--cov-report html:reports/coverage
--html='reports/tests.html'
--self-contained-html
markers =
profile: marks tests for only extracting channel metadata (deselect with '-m
"not profile"')
media: marks tests for archiving all media attachments (deselect with '-m
"not media"')
filterwarnings =
ignore:the imp module is deprecated:DeprecationWarning
ignore:The localize method is no longer necessary, as this time zone supports the fold attribute

View File

@@ -33,6 +33,19 @@ GAB_CHANNEL_KWARGS = {
'chat': False,
'notes': ''}
GAB_GROUP_KWARGS = {
'name': 'iran group (test)',
'platform_id': 10001,
'category': 'test',
'platform': 'Gab',
'url': 'https://gab.com/groups/10001',
'screenname': 'iran group',
'country': 'IR',
'influencer': None,
'public': True,
'chat': True,
'notes': ''}
GETTR_CHANNEL_KWARGS = {
'name': 'LizardRepublic (test)',
'platform_id': 'lizardrepublic',
@@ -178,6 +191,7 @@ def channel_kwargs():
return {
'bitchute' : BITCHUTE_CHANNEL_KWARGS,
'gab' : GAB_CHANNEL_KWARGS,
'gab_group' : GAB_GROUP_KWARGS,
'gettr' : GETTR_CHANNEL_KWARGS,
'instagram' : INSTAGRAM_CHANNEL_KWARGS,
'odysee' : ODYSEE_CHANNEL_KWARGS,

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import BitchuteScraper
@@ -7,6 +9,7 @@ def test_scrape_bitchute_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = BitchuteScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_bitchute_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_bitchute_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['bitchute'])]
controller.register_scraper(scraper = BitchuteScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_bitchute_profile(channel_kwargs):
scraper = BitchuteScraper()
channel = Channel(**channel_kwargs['bitchute'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import GabScraper
@@ -7,6 +9,7 @@ def test_scrape_gab_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = GabScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_gab_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,32 @@ def test_scrape_gab_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['gab'])]
controller.register_scraper(scraper = GabScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_gab_profile(channel_kwargs):
scraper = GabScraper()
channel = Channel(**channel_kwargs['gab'])
scraper.get_profile(channel=channel)
def test_scrape_gab_group_no_media(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['gab_group'])]
controller.register_scraper(scraper = GabScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_gab_group(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['gab_group'])]
controller.register_scraper(scraper = GabScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_gab_group_profile(channel_kwargs):
scraper = GabScraper()
channel = Channel(**channel_kwargs['gab_group'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import GettrScraper
@@ -7,6 +9,7 @@ def test_scrape_gettr_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = GettrScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_gettr_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_gettr_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['gettr'])]
controller.register_scraper(scraper = GettrScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_gettr_profile(channel_kwargs):
scraper = GettrScraper()
channel = Channel(**channel_kwargs['gettr'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import InstagramScraper
@@ -7,6 +9,7 @@ def test_scrape_instagram_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = InstagramScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_instagram_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_instagram_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['instagram'])]
controller.register_scraper(scraper = InstagramScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_instagram_profile(channel_kwargs):
scraper = InstagramScraper()
channel = Channel(**channel_kwargs['instagram'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import OdyseeScraper
@@ -7,6 +9,7 @@ def test_scrape_odysee_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = OdyseeScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_odysee_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_odysee_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['odysee'])]
controller.register_scraper(scraper = OdyseeScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_odysee_profile(channel_kwargs):
scraper = OdyseeScraper()
channel = Channel(**channel_kwargs['odysee'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import RumbleScraper
@@ -7,6 +9,7 @@ def test_scrape_rumble_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = RumbleScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_rumble_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_rumble_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['rumble'])]
controller.register_scraper(scraper = RumbleScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_rumble_profile(channel_kwargs):
scraper = RumbleScraper()
channel = Channel(**channel_kwargs['rumble'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import TelegramSnscrapeScraper
@@ -7,6 +9,7 @@ def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = TelegramSnscrapeScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramSnscrapeScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_telegram_snscrape_profile(channel_kwargs):
scraper = TelegramSnscrapeScraper()
channel = Channel(**channel_kwargs['telegram'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import TelegramTelethonScraper
@@ -7,6 +9,7 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = TelegramTelethonScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramTelethonScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_telegram_telethon_profile(channel_kwargs):
scraper = TelegramTelethonScraper()
channel = Channel(**channel_kwargs['telegram'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import TwitterScraper
@@ -7,6 +9,7 @@ def test_scrape_twitter_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = TwitterScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_twitter_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_twitter_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['twitter'])]
controller.register_scraper(scraper = TwitterScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_twitter_profile(channel_kwargs):
scraper = TwitterScraper()
channel = Channel(**channel_kwargs['twitter'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import VkontakteScraper
@@ -7,6 +9,7 @@ def test_scrape_vkontakte_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = VkontakteScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_vkontakte_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_vkontakte_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['vkontakte'])]
controller.register_scraper(scraper = VkontakteScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_vkontakte_profile(channel_kwargs):
scraper = VkontakteScraper()
channel = Channel(**channel_kwargs['vkontakte'])
scraper.get_profile(channel=channel)

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import YoutubeScraper
@@ -7,6 +9,7 @@ def test_scrape_youtube_channel_no_media(controller, channel_kwargs):
controller.register_scraper(scraper = YoutubeScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
def test_scrape_youtube_channel(controller, channel_kwargs):
controller.reset_db()
@@ -14,3 +17,10 @@ def test_scrape_youtube_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['youtube'])]
controller.register_scraper(scraper = YoutubeScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_youtube_profile(channel_kwargs):
scraper = YoutubeScraper()
channel = Channel(**channel_kwargs['youtube'])
scraper.get_profile(channel=channel)

View File

@@ -1,11 +1,14 @@
from sqlalchemy.orm import sessionmaker, with_polymorphic
import json
import pytest
from cisticola.base import Channel
from cisticola.scraper import TwitterScraper
from cisticola.transformer import TwitterTransformer
from cisticola.base import Post, Media
@pytest.mark.media
def test_scrape_etl_twitter(engine, controller, etl_controller, channel_kwargs):
controller.reset_db()