added odysee scraper, minor refactoring of url_to_blob method (added url_to_key method that can be overridden by child classes while still using the parent url_to_blob method) and changed test file to include only channels with a relatively small number of posts, to make testing faster

This commit is contained in:
Tristan Lee
2022-02-25 20:28:00 -06:00
parent ef83cc4b0a
commit 47dad8fb00
5 changed files with 145 additions and 20 deletions

74
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "f4f00b78a16b39eeb122566ec4cc6bf2dfeae044ae95a281e352e00850c74cc6"
"sha256": "263a7825d8113518c7a0690d5f69526cabe2dfa6ea572bb39cbe5d26495e619c"
},
"pipfile-spec": 6,
"requires": {
@@ -41,19 +41,19 @@
},
"boto3": {
"hashes": [
"sha256:8f59383fe578ac9107466a464d7198933e5332d85a4790f2e01cf24a4a7f635b",
"sha256:af92931f65e33e7450c3389c6cc6ab6b3e2f619697ea5566aacc0f16f3b21f61"
"sha256:9b6903fe9cc92d2f6111db28675263f1ab45adbcf1483025c82a304ce7790b71",
"sha256:f2ce641957c1782e382548ced4a447189e45851bbe58c1f6752ff2b661527de7"
],
"index": "pypi",
"version": "==1.21.7"
"version": "==1.21.8"
},
"botocore": {
"hashes": [
"sha256:5d1a2a2ac72461bbaa79317b3e4cb72c7ebb315aef184d90f72ec1f6dba0ca6c",
"sha256:a34118bfadc02903ab404148822fe5a6de7a3bb58943f1a6a19cc8b0446d2a50"
"sha256:9fbc5c57b31850c51c87abc3e166ed4e0f343665bec4e1a0ff814fbc9704642c",
"sha256:a5431d806dc75fb1844463d921759fcd8d387674443af8d7fd0867f296b02759"
],
"markers": "python_version >= '3.6'",
"version": "==1.24.7"
"version": "==1.24.8"
},
"bs4": {
"hashes": [
@@ -354,6 +354,31 @@
"markers": "python_version >= '3.7'",
"version": "==2.1.0"
},
"numpy": {
"hashes": [
"sha256:03ae5850619abb34a879d5f2d4bb4dcd025d6d8fb72f5e461dae84edccfe129f",
"sha256:076aee5a3763d41da6bef9565fdf3cb987606f567cd8b104aded2b38b7b47abf",
"sha256:0b536b6840e84c1c6a410f3a5aa727821e6108f3454d81a5cd5900999ef04f89",
"sha256:15efb7b93806d438e3bc590ca8ef2f953b0ce4f86f337ef4559d31ec6cf9d7dd",
"sha256:168259b1b184aa83a514f307352c25c56af111c269ffc109d9704e81f72e764b",
"sha256:2638389562bda1635b564490d76713695ff497242a83d9b684d27bb4a6cc9d7a",
"sha256:3556c5550de40027d3121ebbb170f61bbe19eb639c7ad0c7b482cd9b560cd23b",
"sha256:4a176959b6e7e00b5a0d6f549a479f869829bfd8150282c590deee6d099bbb6e",
"sha256:515a8b6edbb904594685da6e176ac9fbea8f73a5ebae947281de6613e27f1956",
"sha256:55535c7c2f61e2b2fc817c5cbe1af7cb907c7f011e46ae0a52caa4be1f19afe2",
"sha256:59153979d60f5bfe9e4c00e401e24dfe0469ef8da6d68247439d3278f30a180f",
"sha256:60cb8e5933193a3cc2912ee29ca331e9c15b2da034f76159b7abc520b3d1233a",
"sha256:6767ad399e9327bfdbaa40871be4254d1995f4a3ca3806127f10cec778bd9896",
"sha256:76a4f9bce0278becc2da7da3b8ef854bed41a991f4226911a24a9711baad672c",
"sha256:8cf33634b60c9cef346663a222d9841d3bbbc0a2f00221d6bcfd0d993d5543f6",
"sha256:94dd11d9f13ea1be17bac39c1942f527cbf7065f94953cf62dfe805653da2f8f",
"sha256:aafa46b5a39a27aca566198d3312fb3bde95ce9677085efd02c86f7ef6be4ec7",
"sha256:badca914580eb46385e7f7e4e426fea6de0a37b9e06bec252e481ae7ec287082",
"sha256:d76a26c5118c4d96e264acc9e3242d72e1a2b92e739807b3b69d8d47684b6677"
],
"markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'",
"version": "==1.22.2"
},
"packaging": {
"hashes": [
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
@@ -362,6 +387,37 @@
"markers": "python_version >= '3.6'",
"version": "==21.3"
},
"pandas": {
"hashes": [
"sha256:0259cd11e7e6125aaea3af823b80444f3adad6149ff4c97fef760093598b3e34",
"sha256:04dd15d9db538470900c851498e532ef28d4e56bfe72c9523acb32042de43dfb",
"sha256:0b1a13f647e4209ed7dbb5da3497891d0045da9785327530ab696417ef478f84",
"sha256:19f7c632436b1b4f84615c3b127bbd7bc603db95e3d4332ed259dc815c9aaa26",
"sha256:1b384516dbb4e6aae30e3464c2e77c563da5980440fbdfbd0968e3942f8f9d70",
"sha256:1d85d5f6be66dfd6d1d8d13b9535e342a2214260f1852654b19fa4d7b8d1218b",
"sha256:2e5a7a1e0ecaac652326af627a3eca84886da9e667d68286866d4e33f6547caf",
"sha256:3129a35d9dad1d80c234dd78f8f03141b914395d23f97cf92a366dcd19f8f8bf",
"sha256:358b0bc98a5ff067132d23bf7a2242ee95db9ea5b7bbc401cf79205f11502fd3",
"sha256:3dfb32ed50122fe8c5e7f2b8d97387edd742cc78f9ec36f007ee126cd3720907",
"sha256:4e1176f45981c8ccc8161bc036916c004ca51037a7ed73f2d2a9857e6dbe654f",
"sha256:508c99debccd15790d526ce6b1624b97a5e1e4ca5b871319fb0ebfd46b8f4dad",
"sha256:6105af6533f8b63a43ea9f08a2ede04e8f43e49daef0209ab0d30352bcf08bee",
"sha256:6d6ad1da00c7cc7d8dd1559a6ba59ba3973be6b15722d49738b2be0977eb8a0c",
"sha256:7ea47ba1d6f359680130bd29af497333be6110de8f4c35b9211eec5a5a9630fa",
"sha256:8db93ec98ac7cb5f8ac1420c10f5e3c43533153f253fe7fb6d891cf5aa2b80d2",
"sha256:96e9ece5759f9b47ae43794b6359bbc54805d76e573b161ae770c1ea59393106",
"sha256:bbb15ad79050e8b8d39ec40dd96a30cd09b886a2ae8848d0df1abba4d5502a67",
"sha256:c614001129b2a5add5e3677c3a213a9e6fd376204cb8d17c04e84ff7dfc02a73",
"sha256:e6a7bbbb7950063bfc942f8794bc3e31697c020a14f1cd8905fc1d28ec674a01",
"sha256:f02e85e6d832be37d7f16cf6ac8bb26b519ace3e5f3235564a91c7f658ab2a43"
],
"markers": "python_version >= '3.8'",
"version": "==1.4.1"
},
"polyphemus": {
"git": "https://github.com/bellingcat/polyphemus.git",
"ref": "72ea0a63de4b40bf8038dfdb26cbbab87ba86da9"
},
"pygments": {
"hashes": [
"sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65",
@@ -489,7 +545,9 @@
"version": "==2022.1.18"
},
"requests": {
"extras": [],
"extras": [
"socks"
],
"hashes": [
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"