implemented methods for extracting profile metadata (still need to test)

This commit is contained in:
Tristan Lee
2022-03-28 20:16:59 -05:00
parent d68cbd207a
commit 16870d7daa
13 changed files with 424 additions and 154 deletions

362
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "e3b96b0ac8c80d4817f9adac4ab171bf4b7e07e80927c7b152a24e8bbdbf7faa"
"sha256": "26955249044f1cd4bb4504c14f00f0c50508192338026227fc7b889e9f4fc11c"
},
"pipfile-spec": 6,
"requires": {
@@ -34,19 +34,19 @@
},
"boto3": {
"hashes": [
"sha256:76d5b90400c54b25278150768e946edf166acce2c1597c0ecfbebb1dbe9acf2c",
"sha256:7bb2e6506a6ad44d111dd20a5d510374b6958fe989b4ef887109c79d812f926f"
"sha256:788aa3281e91413bc201268a251c9d4ca2e9deb3a4af74daea2389cf66e5132e",
"sha256:ca37b9b4ade72f6d4fa2b7bee584dd5b1c7585f07f22ff1edbc9ecc0c4173b1f"
],
"index": "pypi",
"version": "==1.21.19"
"version": "==1.21.28"
},
"botocore": {
"hashes": [
"sha256:5ed2be0e413961134f4c17eab16396d41a5b4b73a637588260c04d20806d52ea",
"sha256:d0d77bce152ca51f3c2cd0f9bf05cb3b623e719406ad58b4c20444e237fe82eb"
"sha256:03c41d26d1e765380b8175d4b136d3144aa051f17a86eebfdf9a885a5a9a6a72",
"sha256:102eb24b44d473adea6bb8728b20fb9547fa5858c3293df7cad67ef17ea736a7"
],
"markers": "python_version >= '3.6'",
"version": "==1.24.19"
"version": "==1.24.28"
},
"brotli": {
"hashes": [
@@ -123,6 +123,14 @@
"index": "pypi",
"version": "==0.0.1"
},
"cachetools": {
"hashes": [
"sha256:486471dfa8799eb7ec503a8059e263db000cdda20075ce5e48903087f79d5fd6",
"sha256:8fecd4203a38af17928be7b90689d8083603073622229ca7077b72d8e5a976e4"
],
"markers": "python_version ~= '3.7'",
"version": "==5.0.0"
},
"certifi": {
"hashes": [
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
@@ -140,19 +148,19 @@
},
"click": {
"hashes": [
"sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1",
"sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"
"sha256:19a4baa64da924c5e0cd889aba8e947f280309f1a2ce0947a3e3a7bcb7cc72d6",
"sha256:977c213473c7665d3aa092b41ff12063227751c41d7b17165013e10069cc5cd2"
],
"markers": "python_version >= '3.6'",
"version": "==8.0.4"
"markers": "python_version >= '3.7'",
"version": "==8.1.0"
},
"dateparser": {
"hashes": [
"sha256:faa2b97f51f3b5ff1ba2f17be90de2b733fb6191f89b4058787473e8202f3044",
"sha256:fec344db1f73d005182e214c0ff27313c748bbe0c1638ce9d48a809ddfdab2a0"
"sha256:038196b1f12c7397e38aad3d61588833257f6f552baa63a1499e6987fa8d42d9",
"sha256:9600874312ff28a41f96ec7ccdc73be1d1c44435719da47fea3339d55ff5a628"
],
"index": "pypi",
"version": "==1.1.0"
"version": "==1.1.1"
},
"ffmpeg-python": {
"hashes": [
@@ -192,66 +200,89 @@
"index": "pypi",
"version": "==0.8.0"
},
"google-auth": {
"hashes": [
"sha256:3ba4d63cb29c1e6d5ffcc1c0623c03cf02ede6240a072f213084749574e691ab",
"sha256:60d449f8142c742db760f4c0be39121bc8d9be855555d784c252deaca1ced3f5"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==2.6.2"
},
"google-auth-oauthlib": {
"hashes": [
"sha256:24f67735513c4c7134dbde2f1dee5a1deb6acc8dfcb577d7bff30d213a28e7b0",
"sha256:30596b824fc6808fdaca2f048e4998cc40fb4b3599eaea66d28dc7085b36c5b8"
],
"markers": "python_version >= '3.6'",
"version": "==0.5.1"
},
"greenlet": {
"hashes": [
"sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3",
"sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711",
"sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd",
"sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073",
"sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708",
"sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67",
"sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23",
"sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1",
"sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08",
"sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd",
"sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2",
"sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa",
"sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8",
"sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40",
"sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab",
"sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6",
"sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc",
"sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b",
"sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e",
"sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963",
"sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3",
"sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d",
"sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d",
"sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe",
"sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28",
"sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3",
"sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e",
"sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c",
"sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d",
"sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0",
"sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497",
"sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee",
"sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713",
"sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58",
"sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a",
"sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06",
"sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88",
"sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965",
"sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f",
"sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4",
"sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5",
"sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c",
"sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a",
"sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1",
"sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43",
"sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627",
"sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b",
"sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168",
"sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d",
"sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5",
"sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478",
"sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf",
"sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce",
"sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c",
"sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"
"sha256:004aed447382d80a56ecc354a6d807f305e6c808714ce6ccbca4839c94fae81d",
"sha256:068d68fad6bd623e29a2d36e74538c9b9d6dc6464931cd27d93da6cfc6a7f242",
"sha256:06fd4075754009c9817c6b4e1dc0af4616de52757b6ca973a81c3c1aadc28257",
"sha256:1004cb542451814b12a4f38e835a47734e2b2c683acbf463d5ae76282a3974cf",
"sha256:10c358633a8b27bfc32d27114ef2ca2ddc9f1f89f1643d1157b85e1fdd695315",
"sha256:115bc25fefbdc692c4483e9ddb9011ccd0251590ed59dbfff0f4eb7050bf99c4",
"sha256:1d987a2579336792f73ae6b106c2f087e32afc8573fbf9566f123ac6d8cfb72f",
"sha256:2128d727fd1e8afba8e68feb2cdcf88c90163b69ddc9707722a3e491c5280720",
"sha256:230132c241fe284f93f2e7b3969e9b22bbd76ef98cf93e382c945d378907f5a4",
"sha256:23558f7bd08a663386c032ab8d302d613d2d02ae0c9758ad410bab6035b58d3d",
"sha256:255d520d3e4a5f16883b182e1a94219fe455ab4f50aaaf534bfd6d64ee728397",
"sha256:2a6bc19a728f6f643cfc89b876159a1a25a8f7d8700c013d48a73691f80b4550",
"sha256:379bed346ef8ba0a0e698b3c5975a44d15dd4a5bbff40bbd7fd548b445d5550b",
"sha256:3b12d0866759db93b0a893b4e50a7d7d1681519d2346c26695bb8bb2c652230e",
"sha256:40d491944f69e350e1e8b25f6ca49459824ede1678ec0cd4b5541f41edc06614",
"sha256:471484c7b9d7b7867263051aa81cdeed6e06b455e629a7f05eb91a6cb8bd0836",
"sha256:488c557080557bc01aabb3e1bda7225c68455b853733a8652857ac0d810dad1b",
"sha256:49c2e76e7aa81ba889b3c183e2341af3cc6161ee38852085110ae49d5b5d9a40",
"sha256:52d13ec90236e5935ed6da044e78faa1371d5116cc43fe6d7ca8994dd619ef96",
"sha256:57898c69a253d81f487787bdd538629fabd671fab8a9e31b041ca30965fd9556",
"sha256:5d577eef5beb5730ef01ab39983eb852a97c359b7a546809adf70c409f4b2ecc",
"sha256:6a41987c1474c9158a0c0c96611530a8f299bc547d35bee8add981b8b2534f74",
"sha256:6ae67b7df8db3626af8e042e9c6949cfa27d1a3bbbfdff29e45b72bb6673a650",
"sha256:6c42c27e9d12e8a481aff469ffe8dd4ce0484c354a418470960f760f6ae41e7c",
"sha256:6c4a90c9f6128b4d0905a89930bd325e0491574e5cb453f606bb7094a3197587",
"sha256:6e64518e5833ac2d9359b6d9bd4df2c0cf441a0f3a4eca9e735fbea99009fa70",
"sha256:6fd3a270c23c5b42d86a9c7c6b0229f23ee4a7a4cabdaaa1693ad7a0982d13cb",
"sha256:70db73351e0fcf11a76288c47a0469d9a330bcb2e7618c5eb57432b8caa82403",
"sha256:771f401692046845626cbdf1dd0f04e999413ede0ee9ad39033fe30b5fa2e845",
"sha256:7935026ec61b967cbc6b746c0ca75c1651ea118d7fee4d259cff9e6866153374",
"sha256:7b76b1cac9baac1980210e29145800954e7b42e91ef69c4d695de1cab87ce41f",
"sha256:7e3f37c11b6699b1a1e0fcc0e88829dba4f2866546381b05ab8b3f4db645a823",
"sha256:8370fa65ad421484894f559055f951843754153b72b9bca2ebdc5288efe2e3f0",
"sha256:8ae9c443d44a4e23252632e4d7775f419f992d0df3eff923e23775f5cc551d39",
"sha256:8b31d85f2781e44f1ffaaf7ea07f484e7d42317c677c355fa77b4a1a4bea7394",
"sha256:8b450336b27f3b375cadc474c6704838eaa8dd3ca312aac3bb69d92264a8e638",
"sha256:9ce84357388a76d886febff4e50e321c212ffd3248b590960b2da6e02404a5c9",
"sha256:a23e986fb0ba8e7407286add41fa0d4207be44e3dce1b04789f4757800eca1cf",
"sha256:a81610ee00d0da9cd2c8679479b7791149365b6dfb3971b01b22ee29b04787ce",
"sha256:b4e40444975e5ab0ed3004369209c39a28e084951daaeee4919f164b6b849b14",
"sha256:b66600de16702b9dfa74bea34524b55183a2183e5fd92f20fe6c2fcae550a64c",
"sha256:ba6ee18694d3673796b7a31b7d21254e87e9e43ca5be56f323fd396111255315",
"sha256:bd03837da28293baa39bdfc3cada69e2f8807f423ae06168aa28d2b32c63a6b6",
"sha256:bd2192070f88c0778ae1d68a0980fdece3473498c1db37f3794e3454f91e3ecf",
"sha256:c1f6f1a3cc013012cd1da913c40b13e6d721046a8c8a0ea0cde94069645a75db",
"sha256:ce10a8e7e067bde3c1fbf494d2b8859db510206030b0b67bc3af90b0eb1887b9",
"sha256:d31386d208303a5a6cf0819ef9f6db6680bab9e4ca8e48adb3d4b26ead89beb7",
"sha256:d83b3af53b201970973c5574b39df226746194063bb248a53fd12b470ac34319",
"sha256:df9657b212c054ac6d803290d7c4bcd7790af0b725984fce1eeb0a1e3f2d9798",
"sha256:e576e5fd3f129e6b3595dc734ac7f2b8c548f19ef07781194bc538dc9c0cdbbc",
"sha256:e7400358558094c1bcedc75f3b3c4f400c53130b44833848890a99968dee6a64",
"sha256:eb6a385f8577d30e4cb43dd555fb134ddaae1edeb84205e09dabec332bf49fd0",
"sha256:f27f0875e0873f6bf5df09a456bfcac0667824cabac4cad30b43f36e0382ffe7",
"sha256:fcd4a6d04995f1d66bc78b503e4e59ae72fd32aaec4f661657fe5ae5c1aa4ce3"
],
"markers": "python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))",
"version": "==1.1.2"
"version": "==2.0.0a2"
},
"gspread": {
"hashes": [
"sha256:a347197628fa1885dcc860701fb1b3f5471386aa863a71cfe232b6473c6fea1b",
"sha256:be2220e19723570ed98e8b8eb6a5b6e04afa0f08ec1f08b89e217c354488a047"
],
"index": "pypi",
"version": "==5.3.0"
},
"idna": {
"hashes": [
@@ -270,18 +301,18 @@
},
"instaloader": {
"hashes": [
"sha256:9615a12a5a01a8b6c9d99a2a047b21d81b341cfd77656b9261bda30ece0cd562"
"sha256:7fa6147810eedcc1dedcdec8cfa1f220c9379ab8faeab6a336a7c181d944e2e4"
],
"index": "pypi",
"version": "==4.8.4"
"version": "==4.9"
},
"jmespath": {
"hashes": [
"sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9",
"sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"
"sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e",
"sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04"
],
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==0.10.0"
"markers": "python_version >= '3.7'",
"version": "==1.0.0"
},
"loguru": {
"hashes": [
@@ -363,7 +394,7 @@
"sha256:6397602efb3c2d7baebd2166ed85731ae1c1d475abca22090b7141ff5034b3e1",
"sha256:9c9f243fcec7f410f138cb12c21c84c64fde4195481a30c9bfb05b5f003adfed"
],
"markers": "python_version >= '3.5' and python_version < '4'",
"markers": "python_version >= '3.5' and python_version < '4.0'",
"version": "==1.45.1"
},
"numpy": {
@@ -392,6 +423,14 @@
"markers": "python_version < '3.10' and platform_machine != 'aarch64' and platform_machine != 'arm64'",
"version": "==1.22.3"
},
"oauthlib": {
"hashes": [
"sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2",
"sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"
],
"markers": "python_version >= '3.6'",
"version": "==3.2.0"
},
"packaging": {
"hashes": [
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
@@ -512,6 +551,24 @@
],
"version": "==0.4.8"
},
"pyasn1-modules": {
"hashes": [
"sha256:0845a5582f6a02bb3e1bde9ecfc4bfcae6ec3210dd270522fee602365430c3f8",
"sha256:0fe1b68d1e486a1ed5473f1302bd991c1611d319bba158e98b106ff86e1d7199",
"sha256:15b7c67fabc7fc240d87fb9aabf999cf82311a6d6fb2c70d00d3d0604878c811",
"sha256:426edb7a5e8879f1ec54a1864f16b882c2837bfd06eee62f2c982315ee2473ed",
"sha256:65cebbaffc913f4fe9e4808735c95ea22d7a7775646ab690518c056784bc21b4",
"sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e",
"sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74",
"sha256:a99324196732f53093a84c4369c996713eb8c89d360a496b599fb1a9c47fc3eb",
"sha256:b80486a6c77252ea3a3e9b1e360bc9cf28eaac41263d173c032581ad2f20fe45",
"sha256:c29a5e5cc7a3f05926aff34e097e84f8589cd790ce0ed41b67aed6857b26aafd",
"sha256:cbac4bc38d117f2a49aeedec4407d23e8866ea4ac27ff2cf7fb3e5b570df19e0",
"sha256:f39edd8c4ecaa4556e989147ebf219227e2cd2e8a43c7e7fcb1f1c18c5fd6a3d",
"sha256:fe0644d9ab041506b62782e92b06b8c68cca799e1a9636ec398675459e031405"
],
"version": "==0.2.8"
},
"pycryptodomex": {
"hashes": [
"sha256:1ca8e1b4c62038bb2da55451385246f51f412c5f5eabd64812c01766a5989b4a",
@@ -575,11 +632,11 @@
},
"pytest": {
"hashes": [
"sha256:b555252a95bbb2a37a97b5ac2eb050c436f7989993565f5e0c9128fcaacadd0e",
"sha256:f1089d218cfcc63a212c42896f1b7fbf096874d045e1988186861a1a87d27b47"
"sha256:841132caef6b1ad17a9afde46dc4f6cfa59a05f9555aae5151f73bdf2820ca63",
"sha256:92f723789a8fdd7180b6b06483874feca4c48a5c76968e03bb3e7f806a1869ea"
],
"markers": "python_version >= '3.7'",
"version": "==7.1.0"
"version": "==7.1.1"
},
"python-dateutil": {
"hashes": [
@@ -591,10 +648,10 @@
},
"pytz": {
"hashes": [
"sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
"sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
"sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
],
"version": "==2021.3"
"version": "==2022.1"
},
"pytz-deprecation-shim": {
"hashes": [
@@ -685,9 +742,7 @@
"version": "==2022.3.2"
},
"requests": {
"extras": [
"socks"
],
"extras": [],
"hashes": [
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
@@ -695,12 +750,20 @@
"index": "pypi",
"version": "==2.27.1"
},
"requests-oauthlib": {
"hashes": [
"sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5",
"sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.3.1"
},
"rsa": {
"hashes": [
"sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17",
"sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"
],
"markers": "python_version >= '3.6' and python_version < '4'",
"markers": "python_version >= '3.6' and python_version < '4.0'",
"version": "==4.8"
},
"s3transfer": {
@@ -790,11 +853,11 @@
},
"tzdata": {
"hashes": [
"sha256:3eee491e22ebfe1e5cfcc97a4137cd70f092ce59144d81f8924a844de05ba8f5",
"sha256:68dbe41afd01b867894bbdfd54fa03f468cfa4f0086bfb4adcd8de8f24f3ee21"
"sha256:238e70234214138ed7b4e8a0fab0e5e13872edab3be586ab8198c407620e2ab9",
"sha256:8b536a8ec63dc0751342b3984193a3118f8fca2afe25752bb9b7fffd398552d3"
],
"markers": "python_version >= '3.6'",
"version": "==2021.5"
"version": "==2022.1"
},
"tzlocal": {
"hashes": [
@@ -806,11 +869,11 @@
},
"urllib3": {
"hashes": [
"sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
"sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.8"
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'",
"version": "==1.26.9"
},
"websockets": {
"hashes": [
@@ -899,6 +962,35 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.9.1"
},
"black": {
"hashes": [
"sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b",
"sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176",
"sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09",
"sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a",
"sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015",
"sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79",
"sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb",
"sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20",
"sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464",
"sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968",
"sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82",
"sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21",
"sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0",
"sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265",
"sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b",
"sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a",
"sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72",
"sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce",
"sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0",
"sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a",
"sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163",
"sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad",
"sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d"
],
"index": "pypi",
"version": "==22.3.0"
},
"certifi": {
"hashes": [
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
@@ -914,6 +1006,14 @@
"markers": "python_version >= '3'",
"version": "==2.0.12"
},
"click": {
"hashes": [
"sha256:19a4baa64da924c5e0cd889aba8e947f280309f1a2ce0947a3e3a7bcb7cc72d6",
"sha256:977c213473c7665d3aa092b41ff12063227751c41d7b17165013e10069cc5cd2"
],
"markers": "python_version >= '3.7'",
"version": "==8.1.0"
},
"coverage": {
"extras": [
"toml"
@@ -1005,11 +1105,11 @@
},
"jinja2": {
"hashes": [
"sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
"sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
"sha256:539835f51a74a69f41b848a9645dbdc35b4f20a3b601e2d9a7e22947b15ff119",
"sha256:640bed4bb501cbd17194b3cace1dc2126f5b619cf068a726b98192a0fde74ae9"
],
"markers": "python_version >= '3.6'",
"version": "==3.0.3"
"markers": "python_version >= '3.7'",
"version": "==3.1.1"
},
"markupsafe": {
"hashes": [
@@ -1057,6 +1157,13 @@
"markers": "python_version >= '3.7'",
"version": "==2.1.1"
},
"mypy-extensions": {
"hashes": [
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
],
"version": "==0.4.3"
},
"packaging": {
"hashes": [
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
@@ -1065,6 +1172,21 @@
"markers": "python_version >= '3.6'",
"version": "==21.3"
},
"pathspec": {
"hashes": [
"sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a",
"sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"
],
"version": "==0.9.0"
},
"platformdirs": {
"hashes": [
"sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d",
"sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227"
],
"markers": "python_version >= '3.7'",
"version": "==2.5.1"
},
"pluggy": {
"hashes": [
"sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159",
@@ -1099,11 +1221,11 @@
},
"pytest": {
"hashes": [
"sha256:b555252a95bbb2a37a97b5ac2eb050c436f7989993565f5e0c9128fcaacadd0e",
"sha256:f1089d218cfcc63a212c42896f1b7fbf096874d045e1988186861a1a87d27b47"
"sha256:841132caef6b1ad17a9afde46dc4f6cfa59a05f9555aae5151f73bdf2820ca63",
"sha256:92f723789a8fdd7180b6b06483874feca4c48a5c76968e03bb3e7f806a1869ea"
],
"markers": "python_version >= '3.7'",
"version": "==7.1.0"
"version": "==7.1.1"
},
"pytest-cov": {
"hashes": [
@@ -1123,23 +1245,21 @@
},
"pytest-metadata": {
"hashes": [
"sha256:576055b8336dd4a9006dd2a47615f76f2f8c30ab12b1b1c039d99e834583523f",
"sha256:71b506d49d34e539cc3cfdb7ce2c5f072bea5c953320002c95968e0238f8ecf1"
"sha256:141ba561a17659cda00cf74e7c7cf6103bab4550acad76a46f893339de63b1df",
"sha256:5cdb6aeea8ba9109181cf9f149c8a3ae1430ff7e44506a8f866af8a98ca46301"
],
"index": "pypi",
"version": "==1.11.0"
"version": "==2.0.1"
},
"pytz": {
"hashes": [
"sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
"sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
"sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7",
"sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"
],
"version": "==2021.3"
"version": "==2022.1"
},
"requests": {
"extras": [
"socks"
],
"extras": [],
"hashes": [
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
@@ -1156,11 +1276,11 @@
},
"sphinx": {
"hashes": [
"sha256:5da895959511473857b6d0200f56865ed62c31e8f82dd338063b84ec022701fe",
"sha256:6caad9786055cb1fa22b4a365c1775816b876f91966481765d7d50e9f0dd35cc"
"sha256:7bf8ca9637a4ee15af412d1a1d9689fec70523a68ca9bb9127c2f3eeb344e2e6",
"sha256:ebf612653238bcc8f4359627a9b7ce44ede6fdd75d9d30f68255c7383d3a6226"
],
"index": "pypi",
"version": "==4.4.0"
"version": "==4.5.0"
},
"sphinx-rtd-theme": {
"hashes": [
@@ -1226,13 +1346,21 @@
"markers": "python_version >= '3.7'",
"version": "==2.0.1"
},
"typing-extensions": {
"hashes": [
"sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42",
"sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"
],
"markers": "python_version < '3.10'",
"version": "==4.1.1"
},
"urllib3": {
"hashes": [
"sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
"sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.8"
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'",
"version": "==1.26.9"
},
"zipp": {
"hashes": [

View File

@@ -64,6 +64,43 @@ class BitchuteScraper(Scraper):
if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None:
return True
def get_profile(self, channel: Channel) -> dict:
base_url = "https://www.bitchute.com/channel/%s/" % channel.url
session = requests.session()
response = session.get(base_url)
soup = BeautifulSoup(response.content, 'html.parser')
csrftoken = session.cookies['csrftoken']
csrfmiddlewaretoken = soup.find('input', {'name' : 'csrfmiddlewaretoken'})['value']
about_soup = soup.find('div', {'id' : 'channel-about'})
info_list = about_soup.find('div', {'class' : 'channel-about-details'}).find_all('p')
description_soup = about_soup.find('div', {'id' : 'channel-description'})
headers = {'Referer': base_url}
data = {
'csrftoken': csrftoken,
'csrfmiddlewaretoken': csrfmiddlewaretoken}
response = session.post(base_url + 'counts/', data = data, headers = headers)
counts = json.loads(response.text)
profile = {
'description' : description_soup.text.strip(),
'description_links' : [a['href'] for a in description_soup.find_all('a', href = True)],
'created': re.sub(r'\s', ' ', info_list[0].text.split('Created')[1].strip('. ')),
'videos' : int(info_list[1].text.split('videos')[0].strip()),
'owner_url' : soup.find('p', {'class' : 'owner'}).find('a', href = True)['href'],
'owner_name' : soup.find('p', {'class' : 'owner'}).text,
'category' : info_list[-1].text.split('Category')[1].strip(),
'image' : about_soup.find('img', {'alt' : 'Channel Image'})['data-src'],
'subscribers': counts['subscriber_count'],
'views': int(counts['about_view_count'].split(' ')[0])}
return profile
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
def strip_tags(html, convert_newlines=True):
@@ -420,29 +457,3 @@ def get_videos_user(session, user, csrftoken, detail):
# before the video, which is weird
yield comment
#-----------------------------------------------------------------------------#
def get_about(user):
"""
Extract fields from channel's "About" tab
"""
base_url = "https://www.bitchute.com/channel/%s/" % user
response = requests.get(base_url)
soup = BeautifulSoup(response.content, 'html.parser')
about_soup = soup.find('div', {'id' : 'channel-about'})
info_list = about_soup.find('div', {'class' : 'channel-about-details'}).find_all('p')
description_soup = about_soup.find('div', {'id' : 'channel-description'})
about = {
'description' : description_soup.text,
'description_links' : [a['href'] for a in description_soup.find_all('a', href = True)],
'created': re.sub(r'\s', ' ', info_list[0].text.split('Created')[1].strip('. ')),
'videos' : int(info_list[1].text.split('videos')[0].strip()),
'owner_url' : soup.find('p', {'class' : 'owner'}).find('a', href = True)['href'],
'owner_name' : soup.find('p', {'class' : 'owner'}).text,
'category' : info_list[-1].text.split('Category')[1].strip(),
'image' : about_soup.find('img', {'alt' : 'Channel Image'})['data-src']
}
return about

View File

@@ -51,6 +51,13 @@ class GabScraper(Scraper):
raw_data=json.dumps(post),
archived_urls=archived_urls)
def can_handle(self, channel):
def can_handle(self, channel: Channel) -> bool:
if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None:
return True
return True
def get_profile(self, channel: Channel) -> dict:
client = Garc(profile = 'main')
username = self.get_username_from_url(channel.url)
profile = list(client.user(username))[0]
return profile

View File

@@ -68,4 +68,11 @@ class GettrScraper(Scraper):
def url_to_key(self, url: str, content_type: str) -> str:
ext = '.' + content_type.split('/')[-1]
key = urlparse(url).path.split('/')[-2] + ext
return key
return key
def get_profile(self, channel: Channel) -> dict:
client = client = PublicClient()
username = self.get_username_from_url(channel.url)
profile = client.user_info(username)
return profile

View File

@@ -100,4 +100,27 @@ class InstagramScraper(Scraper):
def can_handle(self, channel):
if channel.platform == "Instagram" and self.get_username_from_url(channel.url) is not None:
return True
return True
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
loader = instaloader.Instaloader(
quiet = True,
download_comments = False,
save_metadata = False)
loader.login(
user = os.environ['INSTAGRAM_USERNAME'],
passwd = os.environ['INSTAGRAM_PASSWORD'])
user_profile = instaloader.Profile.from_username(
context = loader.context,
username = username)
profile = user_profile._asdict()
profile['followers'] = user_profile.followers
profile['followees'] = user_profile.followees
return profile

View File

@@ -77,4 +77,12 @@ class OdyseeScraper(Scraper):
key = urlparse(url).path.split('/')[-2]
ext = content_type.split('/')[-1]
return f'{key}.{ext}'
return f'{key}.{ext}'
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
odysee_channel = OdyseeChannel(channel_name = username)
profile = odysee_channel.info
return profile

View File

@@ -57,6 +57,13 @@ class RumbleScraper(Scraper):
if channel.platform == "Rumble" and self.get_username_from_url(channel.url) is not None:
return True
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
profile = get_channel_profile(username = username)
return profile
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
def get_media_url(url):
@@ -90,10 +97,10 @@ def process_video(video):
return info
def get_channel_videos(channel):
def get_channel_videos(username):
page = 1
channel_url = f'{BASE_URL}/c/{channel}?page='
channel_url = f'{BASE_URL}/c/{username}?page='
while True:
url = channel_url + str(page)
@@ -111,4 +118,21 @@ def get_channel_videos(channel):
page += 1
def get_channel_profile(username):
channel_url = f'{BASE_URL}/c/{username}'
r = make_request(url = channel_url)
soup = BeautifulSoup(r.content, features = 'lxml')
verified_svg = soup.find('h1').find('svg', {'class' : 'listing-header--verified'})
profile = {
'name': soup.find('h1').text,
'verified': verified_svg is not None,
'thumbnail': soup.find('img', {'class' : 'listing-header--thumb'})['src'],
'cover': soup.find('img', {'class' : 'listing-header--backsplash-img'})['src'],
'subscribers': soup.find('span', {'class' : 'subscribe-button-count'}).text}
return profile
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

View File

@@ -54,3 +54,11 @@ class TelegramSnscrapeScraper(Scraper):
raw_data=post.json(),
archived_urls=archived_urls
)
def get_profile(self, channel: Channel) -> dict:
scr = snscrape.modules.telegram.TelegramChannelScraper(
channel.screenname)
profile = scr._get_entity().__dict__
return profile

View File

@@ -7,6 +7,7 @@ from pathlib import Path
from loguru import logger
from telethon.sync import TelegramClient
from telethon.tl.functions.channels import GetFullChannelRequest
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
@@ -74,3 +75,17 @@ class TelegramTelethonScraper(Scraper):
date_archived=datetime.now(timezone.utc),
raw_data=json.dumps(post.to_dict(), default=str),
archived_urls=archived_urls)
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
api_id = os.environ['TELEGRAM_API_ID']
api_hash = os.environ['TELEGRAM_API_HASH']
phone = os.environ['TELEGRAM_PHONE']
with TelegramClient(phone, api_id, api_hash) as client:
full_channel = client(GetFullChannelRequest(channel = username))
profile = full_channel.__dict__
return profile

View File

@@ -2,7 +2,7 @@ from datetime import datetime, timezone
from typing import Generator
from urllib.parse import urlparse, parse_qs
from snscrape.modules.twitter import TwitterProfileScraper, Video, Gif, Photo
from snscrape.modules.twitter import TwitterProfileScraper, TwitterUserScraper, Video, Gif, Photo
from loguru import logger
from cisticola.base import Channel, ScraperResult
@@ -86,4 +86,11 @@ class TwitterScraper(Scraper):
ext = ''
key = parsed_url.path.split('/')[-1] + ext
return key
return key
def get_profile(self, channel: Channel) -> dict:
scraper = TwitterUserScraper(channel.platform_id)
profile = scraper._get_entity().__dict__
return profile

View File

@@ -77,4 +77,12 @@ class VkontakteScraper(Scraper):
ext = '.mp4'
key = path.split('/')[-1] + ext
return key
return key
def get_profile(self, channel: Channel) -> dict:
username = self.get_username_from_url(channel.url)
scraper = VKontakteUserScraper(username)
profile = scraper._get_entity().__dict__
return profile

View File

@@ -76,4 +76,19 @@ class YoutubeScraper(Scraper):
def can_handle(self, channel):
if channel.platform == "Youtube" and channel.url:
return True
return True
def get_profile(self, channel: Channel) -> dict:
ydl_opts = {}
ydl = yt_dlp.YoutubeDL(ydl_opts)
meta = None
try:
meta = ydl.extract_info(
channel.url,
process=False)
except yt_dlp.utils.DownloadError as e:
raise e
return meta

View File

@@ -1,3 +1,5 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import BitchuteScraper
@@ -14,3 +16,10 @@ def test_scrape_bitchute_channel(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['bitchute'])]
controller.register_scraper(scraper = BitchuteScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_bitchute_profile(channel_kwargs):
scraper = BitchuteScraper()
channel = Channel(**channel_kwargs['bitchute'])
scraper.get_profile(channel=channel)