From 282f33eff33166a8100180293d51d26eadc3ef0f Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Fri, 1 Apr 2022 01:30:49 -0500
Subject: [PATCH] implemented deferred media archiving for all scrapers, and
 implemented tests for them. Refactored archiving methods of Instagram and
 Gettr scrapers to be able to use default archiving method

---
 Pipfile                                |   2 +-
 Pipfile.lock                           | 254 +++++++++++--------------
 cisticola/scraper/base.py              |  19 +-
 cisticola/scraper/bitchute.py          |  10 +-
 cisticola/scraper/gab.py               |  27 ++-
 cisticola/scraper/gettr.py             |  33 ++--
 cisticola/scraper/instagram.py         |  51 +++--
 cisticola/scraper/odysee.py            |  28 ++-
 cisticola/scraper/rumble.py            |  28 ++-
 cisticola/scraper/telegram_snscrape.py |   4 +-
 cisticola/scraper/twitter.py           |  12 +-
 cisticola/scraper/vkontakte.py         |  60 ++++--
 cisticola/scraper/youtube.py           |  58 +++++-
 pytest.ini                             |   7 +-
 tests/conftest.py                      |   8 +-
 tests/scraper/bitchute.py              |   7 +
 tests/scraper/gab.py                   |   7 +
 tests/scraper/gettr.py                 |   7 +
 tests/scraper/instagram.py             |   7 +
 tests/scraper/odysee.py                |   7 +
 tests/scraper/rumble.py                |   7 +
 tests/scraper/telegram_snscrape.py     |   7 +
 tests/scraper/telegram_telethon.py     |   7 +
 tests/scraper/twitter.py               |   7 +
 tests/scraper/vkontakte.py             |   7 +
 tests/scraper/youtube.py               |   7 +
 26 files changed, 417 insertions(+), 261 deletions(-)

diff --git a/Pipfile b/Pipfile
index df21bef..c913ecb 100644
--- a/Pipfile
+++ b/Pipfile
@@ -36,7 +36,7 @@ sphinx = "*"
 sphinx_rtd_theme = "*"
 
 [requires]
-python_version = "3.8"
+python_version = "3.9"
 
 [pipenv]
 allow_prereleases = true
diff --git a/Pipfile.lock b/Pipfile.lock
index ea45b5e..2d95b71 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,11 +1,11 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "e57f79178ac0e05f9753a29f97e08d2ae96b7775044bb4c6ba616baae1d21183"
+            "sha256": "b9fc02f3ecaa2199480c4fcba30f02780860dfbc2e10c026889c78f639709fb4"
         },
         "pipfile-spec": 6,
         "requires": {
-            "python_version": "3.8"
+            "python_version": "3.9"
         },
         "sources": [
             {
@@ -16,28 +16,6 @@
         ]
     },
     "default": {
-        "backports.zoneinfo": {
-            "hashes": [
-                "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf",
-                "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328",
-                "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546",
-                "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6",
-                "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570",
-                "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9",
-                "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7",
-                "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987",
-                "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722",
-                "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582",
-                "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc",
-                "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b",
-                "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1",
-                "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08",
-                "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac",
-                "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"
-            ],
-            "markers": "python_version >= '3.6' and python_version < '3.9'",
-            "version": "==0.2.1"
-        },
         "beautifulsoup4": {
             "hashes": [
                 "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf",
@@ -48,19 +26,19 @@
         },
         "boto3": {
             "hashes": [
-                "sha256:ef210f8e85cdb6d26a38ebad1cfe9cefdef2ab269207e5987653555375a7ef6b",
-                "sha256:f0af8f4ef5fe6353c794cd3cce627d469a618b58ace7ca75a63cfd719df615ce"
+                "sha256:35f68b60652bff50e7bc926238443cb578f29f120908bb945e5640e90c6dd53e",
+                "sha256:7f3f93ee97215862ccd1a216f37deb7d64055c71f826b821805904df7b84ee6a"
             ],
             "index": "pypi",
-            "version": "==1.21.30"
+            "version": "==1.21.31"
         },
         "botocore": {
             "hashes": [
-                "sha256:af4bdc51eeecbe9fdcdadbed9ad58c5c91380ef30f3560022bbc2ee1d78f0ad6",
-                "sha256:c622751093e3d0bf61343e66d6d06190ef30bf42b1557d5070ca84e9efa06d4b"
+                "sha256:3bb21e3ee5e4de3ed76bb99b4496a46e9b5c82e7b7fdb62702f11dda1b57b769",
+                "sha256:424fd94bef86a11f5340dc15eb50602dedec2ecc01c3a25c4fea23a2c8195500"
             ],
             "markers": "python_version >= '3.6'",
-            "version": "==1.24.30"
+            "version": "==1.24.31"
         },
         "brotli": {
             "hashes": [
@@ -217,11 +195,11 @@
         },
         "click": {
             "hashes": [
-                "sha256:5e0d195c2067da3136efb897449ec1e9e6c98282fbf30d7f9e164af9be901a6b",
-                "sha256:7ab900e38149c9872376e8f9b5986ddcaf68c0f413cf73678a0bca5547e6f976"
+                "sha256:24e1a4a9ec5bf6299411369b208c1df2188d9eb8d916302fe6bf03faed227f1e",
+                "sha256:479707fe14d9ec9a0757618b7a100a0ae4c4e236fac5b7f80ca68028141a1a72"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==8.1.1"
+            "version": "==8.1.2"
         },
         "cryptg": {
             "hashes": [
@@ -324,64 +302,63 @@
         },
         "greenlet": {
             "hashes": [
-                "sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3",
-                "sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711",
-                "sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd",
-                "sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073",
-                "sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708",
-                "sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67",
-                "sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23",
-                "sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1",
-                "sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08",
-                "sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd",
-                "sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2",
-                "sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa",
-                "sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8",
-                "sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40",
-                "sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab",
-                "sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6",
-                "sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc",
-                "sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b",
-                "sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e",
-                "sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963",
-                "sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3",
-                "sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d",
-                "sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d",
-                "sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe",
-                "sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28",
-                "sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3",
-                "sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e",
-                "sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c",
-                "sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d",
-                "sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0",
-                "sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497",
-                "sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee",
-                "sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713",
-                "sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58",
-                "sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a",
-                "sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06",
-                "sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88",
-                "sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965",
-                "sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f",
-                "sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4",
-                "sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5",
-                "sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c",
-                "sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a",
-                "sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1",
-                "sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43",
-                "sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627",
-                "sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b",
-                "sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168",
-                "sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d",
-                "sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5",
-                "sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478",
-                "sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf",
-                "sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce",
-                "sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c",
-                "sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"
+                "sha256:004aed447382d80a56ecc354a6d807f305e6c808714ce6ccbca4839c94fae81d",
+                "sha256:068d68fad6bd623e29a2d36e74538c9b9d6dc6464931cd27d93da6cfc6a7f242",
+                "sha256:06fd4075754009c9817c6b4e1dc0af4616de52757b6ca973a81c3c1aadc28257",
+                "sha256:1004cb542451814b12a4f38e835a47734e2b2c683acbf463d5ae76282a3974cf",
+                "sha256:10c358633a8b27bfc32d27114ef2ca2ddc9f1f89f1643d1157b85e1fdd695315",
+                "sha256:115bc25fefbdc692c4483e9ddb9011ccd0251590ed59dbfff0f4eb7050bf99c4",
+                "sha256:1d987a2579336792f73ae6b106c2f087e32afc8573fbf9566f123ac6d8cfb72f",
+                "sha256:2128d727fd1e8afba8e68feb2cdcf88c90163b69ddc9707722a3e491c5280720",
+                "sha256:230132c241fe284f93f2e7b3969e9b22bbd76ef98cf93e382c945d378907f5a4",
+                "sha256:23558f7bd08a663386c032ab8d302d613d2d02ae0c9758ad410bab6035b58d3d",
+                "sha256:255d520d3e4a5f16883b182e1a94219fe455ab4f50aaaf534bfd6d64ee728397",
+                "sha256:2a6bc19a728f6f643cfc89b876159a1a25a8f7d8700c013d48a73691f80b4550",
+                "sha256:379bed346ef8ba0a0e698b3c5975a44d15dd4a5bbff40bbd7fd548b445d5550b",
+                "sha256:3b12d0866759db93b0a893b4e50a7d7d1681519d2346c26695bb8bb2c652230e",
+                "sha256:40d491944f69e350e1e8b25f6ca49459824ede1678ec0cd4b5541f41edc06614",
+                "sha256:471484c7b9d7b7867263051aa81cdeed6e06b455e629a7f05eb91a6cb8bd0836",
+                "sha256:488c557080557bc01aabb3e1bda7225c68455b853733a8652857ac0d810dad1b",
+                "sha256:49c2e76e7aa81ba889b3c183e2341af3cc6161ee38852085110ae49d5b5d9a40",
+                "sha256:52d13ec90236e5935ed6da044e78faa1371d5116cc43fe6d7ca8994dd619ef96",
+                "sha256:57898c69a253d81f487787bdd538629fabd671fab8a9e31b041ca30965fd9556",
+                "sha256:5d577eef5beb5730ef01ab39983eb852a97c359b7a546809adf70c409f4b2ecc",
+                "sha256:6a41987c1474c9158a0c0c96611530a8f299bc547d35bee8add981b8b2534f74",
+                "sha256:6ae67b7df8db3626af8e042e9c6949cfa27d1a3bbbfdff29e45b72bb6673a650",
+                "sha256:6c42c27e9d12e8a481aff469ffe8dd4ce0484c354a418470960f760f6ae41e7c",
+                "sha256:6c4a90c9f6128b4d0905a89930bd325e0491574e5cb453f606bb7094a3197587",
+                "sha256:6e64518e5833ac2d9359b6d9bd4df2c0cf441a0f3a4eca9e735fbea99009fa70",
+                "sha256:6fd3a270c23c5b42d86a9c7c6b0229f23ee4a7a4cabdaaa1693ad7a0982d13cb",
+                "sha256:70db73351e0fcf11a76288c47a0469d9a330bcb2e7618c5eb57432b8caa82403",
+                "sha256:771f401692046845626cbdf1dd0f04e999413ede0ee9ad39033fe30b5fa2e845",
+                "sha256:7935026ec61b967cbc6b746c0ca75c1651ea118d7fee4d259cff9e6866153374",
+                "sha256:7b76b1cac9baac1980210e29145800954e7b42e91ef69c4d695de1cab87ce41f",
+                "sha256:7e3f37c11b6699b1a1e0fcc0e88829dba4f2866546381b05ab8b3f4db645a823",
+                "sha256:8370fa65ad421484894f559055f951843754153b72b9bca2ebdc5288efe2e3f0",
+                "sha256:8ae9c443d44a4e23252632e4d7775f419f992d0df3eff923e23775f5cc551d39",
+                "sha256:8b31d85f2781e44f1ffaaf7ea07f484e7d42317c677c355fa77b4a1a4bea7394",
+                "sha256:8b450336b27f3b375cadc474c6704838eaa8dd3ca312aac3bb69d92264a8e638",
+                "sha256:9ce84357388a76d886febff4e50e321c212ffd3248b590960b2da6e02404a5c9",
+                "sha256:a23e986fb0ba8e7407286add41fa0d4207be44e3dce1b04789f4757800eca1cf",
+                "sha256:a81610ee00d0da9cd2c8679479b7791149365b6dfb3971b01b22ee29b04787ce",
+                "sha256:b4e40444975e5ab0ed3004369209c39a28e084951daaeee4919f164b6b849b14",
+                "sha256:b66600de16702b9dfa74bea34524b55183a2183e5fd92f20fe6c2fcae550a64c",
+                "sha256:ba6ee18694d3673796b7a31b7d21254e87e9e43ca5be56f323fd396111255315",
+                "sha256:bd03837da28293baa39bdfc3cada69e2f8807f423ae06168aa28d2b32c63a6b6",
+                "sha256:bd2192070f88c0778ae1d68a0980fdece3473498c1db37f3794e3454f91e3ecf",
+                "sha256:c1f6f1a3cc013012cd1da913c40b13e6d721046a8c8a0ea0cde94069645a75db",
+                "sha256:ce10a8e7e067bde3c1fbf494d2b8859db510206030b0b67bc3af90b0eb1887b9",
+                "sha256:d31386d208303a5a6cf0819ef9f6db6680bab9e4ca8e48adb3d4b26ead89beb7",
+                "sha256:d83b3af53b201970973c5574b39df226746194063bb248a53fd12b470ac34319",
+                "sha256:df9657b212c054ac6d803290d7c4bcd7790af0b725984fce1eeb0a1e3f2d9798",
+                "sha256:e576e5fd3f129e6b3595dc734ac7f2b8c548f19ef07781194bc538dc9c0cdbbc",
+                "sha256:e7400358558094c1bcedc75f3b3c4f400c53130b44833848890a99968dee6a64",
+                "sha256:eb6a385f8577d30e4cb43dd555fb134ddaae1edeb84205e09dabec332bf49fd0",
+                "sha256:f27f0875e0873f6bf5df09a456bfcac0667824cabac4cad30b43f36e0382ffe7",
+                "sha256:fcd4a6d04995f1d66bc78b503e4e59ae72fd32aaec4f661657fe5ae5c1aa4ce3"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
-            "version": "==1.1.2"
+            "markers": "python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))",
+            "version": "==2.0.0a2"
         },
         "gspread": {
             "hashes": [
@@ -416,11 +393,11 @@
         },
         "loguru": {
             "hashes": [
-                "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c",
-                "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"
+                "sha256:b28e72ac7a98be3d28ad28570299a393dfcd32e5e3f6a353dec94675767b6319",
+                "sha256:f8087ac396b5ee5f67c963b495d615ebbceac2796379599820e324419d53667c"
             ],
             "index": "pypi",
-            "version": "==0.6.0"
+            "version": "==0.5.3"
         },
         "lxml": {
             "hashes": [
@@ -895,9 +872,7 @@
             "version": "==2022.3.2"
         },
         "requests": {
-            "extras": [
-                "socks"
-            ],
+            "extras": [],
             "hashes": [
                 "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
                 "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
@@ -918,7 +893,7 @@
                 "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17",
                 "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"
             ],
-            "markers": "python_version >= '3.6'",
+            "markers": "python_version >= '3.6' and python_version < '4'",
             "version": "==4.8"
         },
         "s3transfer": {
@@ -951,44 +926,45 @@
         },
         "sqlalchemy": {
             "hashes": [
-                "sha256:04164e0063feb7aedd9d073db0fd496edb244be40d46ea1f0d8990815e4b8c34",
-                "sha256:159c2f69dd6efd28e894f261ffca1100690f28210f34cfcd70b895e0ea7a64f3",
-                "sha256:199dc6d0068753b6a8c0bd3aceb86a3e782df118260ebc1fa981ea31ee054674",
-                "sha256:1bbac3e8293b34c4403d297e21e8f10d2a57756b75cff101dc62186adec725f5",
-                "sha256:20e9eba7fd86ef52e0df25bea83b8b518dfdf0bce09b336cfe51671f52aaaa3f",
-                "sha256:290cbdf19129ae520d4bdce392648c6fcdbee763bc8f750b53a5ab51880cb9c9",
-                "sha256:316270e5867566376e69a0ac738b863d41396e2b63274616817e1d34156dff0e",
-                "sha256:3f88a4ee192142eeed3fe173f673ea6ab1f5a863810a9d85dbf6c67a9bd08f97",
-                "sha256:4aa96e957141006181ca58e792e900ee511085b8dae06c2d08c00f108280fb8a",
-                "sha256:4b2bcab3a914715d332ca783e9bda13bc570d8b9ef087563210ba63082c18c16",
-                "sha256:576684771456d02e24078047c2567025f2011977aa342063468577d94e194b00",
-                "sha256:5a2e73508f939175363d8a4be9dcdc84cf16a92578d7fa86e6e4ca0e6b3667b2",
-                "sha256:5ba59761c19b800bc2e1c9324da04d35ef51e4ee9621ff37534bc2290d258f71",
-                "sha256:5dc9801ae9884e822ba942ca493642fb50f049c06b6dbe3178691fce48ceb089",
-                "sha256:6fdd2dc5931daab778c2b65b03df6ae68376e028a3098eb624d0909d999885bc",
-                "sha256:708973b5d9e1e441188124aaf13c121e5b03b6054c2df59b32219175a25aa13e",
-                "sha256:7ff72b3cc9242d1a1c9b84bd945907bf174d74fc2519efe6184d6390a8df478b",
-                "sha256:8679f9aba5ac22e7bce54ccd8a77641d3aea3e2d96e73e4356c887ebf8ff1082",
-                "sha256:8b9a395122770a6f08ebfd0321546d7379f43505882c7419d7886856a07caa13",
-                "sha256:8e1e5d96b744a4f91163290b01045430f3f32579e46d87282449e5b14d27d4ac",
-                "sha256:9a0195af6b9050c9322a97cf07514f66fe511968e623ca87b2df5e3cf6349615",
-                "sha256:9cb5698c896fa72f88e7ef04ef62572faf56809093180771d9be8d9f2e264a13",
-                "sha256:b3f1d9b3aa09ab9adc7f8c4b40fc3e081eb903054c9a6f9ae1633fe15ae503b4",
-                "sha256:bb42f9b259c33662c6a9b866012f6908a91731a419e69304e1261ba3ab87b8d1",
-                "sha256:bca714d831e5b8860c3ab134c93aec63d1a4f493bed20084f54e3ce9f0a3bf99",
-                "sha256:bedd89c34ab62565d44745212814e4b57ef1c24ad4af9b29c504ce40f0dc6558",
-                "sha256:bfec934aac7f9fa95fc82147a4ba5db0a8bdc4ebf1e33b585ab8860beb10232f",
-                "sha256:c7046f7aa2db445daccc8424f50b47a66c4039c9f058246b43796aa818f8b751",
-                "sha256:d7e483f4791fbda60e23926b098702340504f7684ce7e1fd2c1bf02029288423",
-                "sha256:dd93162615870c976dba43963a24bb418b28448fef584f30755990c134a06a55",
-                "sha256:e4607d2d16330757818c9d6fba322c2e80b4b112ff24295d1343a80b876eb0ed",
-                "sha256:e9a680d9665f88346ed339888781f5236347933906c5a56348abb8261282ec48",
-                "sha256:edfcf93fd92e2f9eef640b3a7a40db20fe3c1d7c2c74faa41424c63dead61b76",
-                "sha256:f7e4a3c0c3c596296b37f8427c467c8e4336dc8d50f8ed38042e8ba79507b2c9",
-                "sha256:fff677fa4522dafb5a5e2c0cf909790d5d367326321aeabc0dffc9047cb235bd"
+                "sha256:045d6a26c262929af0b9cb25441aae675ac04db4ea8bd2446b355617cd6b6b7d",
+                "sha256:07f4dab2deb6d34618a2ccfff3971a85923ad7c3a9a45401818870fc51d3f0cc",
+                "sha256:08aaad905aba8940f27aeb9f1f851bf63f18ef97b0062ca41f64afc4b64e0e8c",
+                "sha256:27a42894a2751e438eaed12fc0dcfe741ff2f66c14760d081222c5adc5460064",
+                "sha256:2a3e4dc7c452ba3c0f3175ad5a8e0ba49c2b0570a8d07272cf50844c8d78e74f",
+                "sha256:345306707bb0e51e7cd6e7573adafbce018894ee5e3b9c31134545f704936db0",
+                "sha256:36f08d94670315ca04c8139bd80b3e02b9dd9cc66fc11bcb96fd10ad51a051ab",
+                "sha256:3ebb97ed96f4506e2f212e1fcf0ec07a103bb194938627660a5acb4d9feae49c",
+                "sha256:40b995d7aeeb6f88a1927ce6692c0f626b59d8effd3e1d597f125e141707b37c",
+                "sha256:4414ace6e3a5e39523e55a5d9f3b215699b2ead4ff91fca98f1b659b7ab2d92a",
+                "sha256:50107d8183da3fbe5715957aa3954cd9d82aed555c5b4d3fd37fac861af422fa",
+                "sha256:50174e173d03209c34e07e7b57cca48d0082ac2390edf927aafc706c111da11e",
+                "sha256:5e88912bf192e7b5739c446d2276e1cba74cfa6c1c93eea2b2534404f6be1dbd",
+                "sha256:621d3f6c0ba2407bb97e82b649be5ca7d5b6c201dcfb964ce13f517bf1cb6305",
+                "sha256:623bac2d6bdca3f3e61cf1e1c466c5fb9f5cf08735736ee1111187b7a4108891",
+                "sha256:671f61c3db4595b0e86cc4b30f675a7c0206d9ce99f041b4f6761c7ddd1e0074",
+                "sha256:67c1c27c48875afc950bee5ee24582794f20b545e64e4f9ca94071a9b514d6ed",
+                "sha256:6a6cfd468f54d65324fd3847cfd0148b0610efa6a43e5f5fcc89f455696ae9e7",
+                "sha256:70048a83f0a1ece1fcd7189891c888e20af2c57fbd33eb760d8cece9843b896c",
+                "sha256:7ee14a7f9f76d1ef9d5e5b760c9252617c839b87eee04d1ce8325ac66ae155c4",
+                "sha256:804cf491437f3e4ce31247ab4b309b181f06ecc97d309b746d10f09439b4eb85",
+                "sha256:878c7beaafa365602762c19f638282e1885454fed1aed86f8fae038933c7c671",
+                "sha256:954ea8c527c4322afb6885944904714893af81fe9167e421273770991bf08a4a",
+                "sha256:a47bf6b7ca6c28e4f4e262fabcf5be6b907af81be36de77839c9eeda2cdf3bb3",
+                "sha256:a4fb5c6ee84a6bba4ff6f9f5379f0b3a0ffe9de7ba5a0945659b3da8d519709b",
+                "sha256:b34bbc683789559f1bc9bb685fc162e0956dbbdfbe2fbd6755a9f5982c113610",
+                "sha256:c025d45318b73c0601cca451532556cbab532b2742839ebb8cb58f9ebf06811e",
+                "sha256:c3ad7f5b61ba014f5045912aea15b03c473bb02b1c07fd92c9d2c794fa183276",
+                "sha256:c9218e3519398129e364121e0d89823e6ba2a2b77c28bfc661face0829c41433",
+                "sha256:cd5cffd1dd753828f1069f33062f3896e51c990acd957c264f40e051b3e19887",
+                "sha256:d8efcaa709ea8e7c08c3d3e7639c39b36083f5a995f397f9e6eedf5f5e4e4946",
+                "sha256:e297a5cc625e3f1367a82deedf2d48ee4d2b2bd263b8b8d2efbaaf5608b5229e",
+                "sha256:e67278ceb63270cdac0a7b89fc3c29a56f7dac9616a7ee48e7ad6b52e3b631e5",
+                "sha256:eb6558ba07409dafa18c793c34292b3265be455904966f0724c10198829477e3",
+                "sha256:f197c66663ed0f9e1178d51141d864688fb244a83f6b17f667d521e482537b2e",
+                "sha256:f47996b1810894f766c9ee689607077c6c0e0fd6761e04c12ba13efb56d50c1d"
             ],
             "index": "pypi",
-            "version": "==1.4.32"
+            "version": "==1.4.34"
         },
         "telethon": {
             "hashes": [
@@ -1163,11 +1139,11 @@
         },
         "click": {
             "hashes": [
-                "sha256:5e0d195c2067da3136efb897449ec1e9e6c98282fbf30d7f9e164af9be901a6b",
-                "sha256:7ab900e38149c9872376e8f9b5986ddcaf68c0f413cf73678a0bca5547e6f976"
+                "sha256:24e1a4a9ec5bf6299411369b208c1df2188d9eb8d916302fe6bf03faed227f1e",
+                "sha256:479707fe14d9ec9a0757618b7a100a0ae4c4e236fac5b7f80ca68028141a1a72"
             ],
             "markers": "python_version >= '3.7'",
-            "version": "==8.1.1"
+            "version": "==8.1.2"
         },
         "coverage": {
             "extras": [
@@ -1415,9 +1391,7 @@
             "version": "==2022.1"
         },
         "requests": {
-            "extras": [
-                "socks"
-            ],
+            "extras": [],
             "hashes": [
                 "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
                 "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
@@ -1501,7 +1475,7 @@
                 "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
                 "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"
             ],
-            "markers": "python_full_version < '3.11.0'",
+            "markers": "python_version >= '3.7'",
             "version": "==2.0.1"
         },
         "typing-extensions": {
diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py
index fb25b58..ccbfa39 100644
--- a/cisticola/scraper/base.py
+++ b/cisticola/scraper/base.py
@@ -235,6 +235,20 @@ class Scraper:
         return archived_url
 
     def archive_files(self, result: ScraperResult) -> ScraperResult:
+        """Archive files corresponding to ``archived_url`` dict keys, if the 
+        files have not previously been archived.
+
+        Parameters
+        ----------
+        result: ScraperResult
+            Previously scraped ScraperResult run with ``archive_media=False``.
+
+        Returns
+        -------
+        ScraperResult
+            Same ScraperResult as ``result``, but with all URLs in ``archived_url`` dict archived.
+        """
+
         for url in result.archived_urls:
             if result.archived_urls[url] is None:
                 media_blob, content_type, key = self.url_to_blob(url)
@@ -244,7 +258,6 @@ class Scraper:
         result.media_archived = True
         return result
 
-
     def can_handle(self, channel: Channel) -> bool:
         """Whether or not the scraper can scrape the specified channel.
 
@@ -365,6 +378,10 @@ class ScraperController:
                     else:
                         since = None
 
+                    # TODO currently, if channels haven't been added to the database, if channel.id is None, the `since` returns the most recently scraped ScraperResult with channel.id == None, which can be from a different platform and channel. Maybe add check in above query logic that channel.id isn't null.
+                    if channel.id is None:
+                        since = None
+
                     posts = scraper.get_posts(channel, since=since, archive_media=archive_media)
 
                     for post in posts:
diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py
index 034e3ac..f318b1e 100644
--- a/cisticola/scraper/bitchute.py
+++ b/cisticola/scraper/bitchute.py
@@ -43,9 +43,12 @@ class BitchuteScraper(Scraper):
 
             archived_urls = {}
 
-            if archive_media:
-                if 'video_url' in post:
-                    url = post['video_url']
+            if 'video_url' in post:
+                url = post['video_url']
+                archived_urls[url] = None 
+
+                if archive_media:
+
                     media_blob, content_type, key = self.url_to_blob(url)
                     archived_url = self.archive_blob(media_blob, content_type, key)
                     archived_urls[url] = archived_url
@@ -112,6 +115,7 @@ class BitchuteScraper(Scraper):
             channel=channel.id,
             raw_data=json.dumps(profile),
             date_archived=datetime.now(timezone.utc))
+            
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
 
 def strip_tags(html, convert_newlines=True):
diff --git a/cisticola/scraper/gab.py b/cisticola/scraper/gab.py
index f66d562..36baf67 100644
--- a/cisticola/scraper/gab.py
+++ b/cisticola/scraper/gab.py
@@ -50,25 +50,24 @@ class GabScraper(Scraper):
             if since is not None and datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc) <= since.date.replace(tzinfo=timezone.utc):
                 break
 
-            media_urls = []
             archived_urls = {}
 
-            if archive_media:
-
-                for attachment in post.get('media_attachments'):
+            for attachment in post.get('media_attachments'):
+                if attachment.get('type') == 'video':
+                    archived_urls[attachment['source_mp4']] = None
+                else:
+                    archived_urls[attachment['url']] = None
+                    
+            if post.get('reblog') is not None:
+                for attachment in post['reblog'].get('media_attachments'):
                     if attachment.get('type') == 'video':
-                        media_urls.append(attachment['source_mp4'])
+                        archived_urls[attachment['source_mp4']] = None
                     else:
-                        media_urls.append(attachment['url'])
-                        
-                if post.get('reblog') is not None:
-                    for attachment in post['reblog'].get('media_attachments'):
-                        if attachment.get('type') == 'video':
-                            media_urls.append(attachment['source_mp4'])
-                        else:
-                            media_urls.append(attachment['url'])
+                        archived_urls[attachment['url']] = None
 
-                for url in media_urls:
+            for url in archived_urls.keys():
+
+                if archive_media:
                     media_blob, content_type, key = self.url_to_blob(url)
                     archived_url = self.archive_blob(media_blob, content_type, key)
                     archived_urls[url] = archived_url
diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py
index a5088cd..43bc095 100644
--- a/cisticola/scraper/gettr.py
+++ b/cisticola/scraper/gettr.py
@@ -30,26 +30,25 @@ class GettrScraper(Scraper):
 
             archived_urls = {}
 
-            if archive_media:
+            if 'imgs' in post:
+                for img in post['imgs']:
+                    url = "https://media.gettr.com/" + img
+                    archived_urls[url] = None
 
-                if 'imgs' in post:
-                    for img in post['imgs']:
-                        url = "https://media.gettr.com/" + img
-                        media_blob, content_type, key = self.url_to_blob(url)
-                        archived_url = self.archive_blob(media_blob, content_type, key)
-                        archived_urls[img] = archived_url
+            if 'main' in post:
+                url = "https://media.gettr.com/" + post['main']
+                archived_urls[url] = None
 
-                if 'main' in post:
-                    url = "https://media.gettr.com/" + post['main']
+            if 'ovid' in post:
+                url = "https://media.gettr.com/" + post['ovid']
+                archived_urls[url] = None
+
+            for url in archived_urls.keys():
+
+                if archive_media:
                     media_blob, content_type, key = self.url_to_blob(url)
                     archived_url = self.archive_blob(media_blob, content_type, key)
-                    archived_urls[post['main']] = archived_url
-
-                if 'vid' in post:
-                    url = "https://media.gettr.com/" + post['vid']
-                    media_blob, content_type, key = self.m3u8_url_to_blob(url)
-                    archived_url = self.archive_blob(media_blob, content_type, key)
-                    archived_urls[post['vid']] = archived_url
+                    archived_urls[url] = archived_url
 
             yield ScraperResult(
                 scraper=self.__version__,
@@ -72,7 +71,7 @@ class GettrScraper(Scraper):
         return key 
 
     def get_profile(self, channel: Channel) -> RawChannelInfo:
-        client = client = PublicClient()
+        client = PublicClient()
         username = self.get_username_from_url(channel.url)
         profile = client.user_info(username)
 
diff --git a/cisticola/scraper/instagram.py b/cisticola/scraper/instagram.py
index dfe0304..435d69d 100644
--- a/cisticola/scraper/instagram.py
+++ b/cisticola/scraper/instagram.py
@@ -1,4 +1,4 @@
-from typing import Generator
+from typing import Generator, List
 from datetime import datetime, timezone
 import os
 import json
@@ -49,28 +49,14 @@ class InstagramScraper(Scraper):
 
             post_url = f'{BASE_URL}p/{post.shortcode}/'
 
-            archived_urls = {}
+            archived_urls = get_archived_urls_from_post(post = post)
 
-            if archive_media:
+            for url in archived_urls.keys():
 
-                with tempfile.TemporaryDirectory() as temp_dir:
-
-                    loader.download_post(post = post, target = Path(temp_dir))
-
-                    files = os.listdir(temp_dir)
-                    files = [f for f in files if not f.endswith('.txt')]
-
-                    for file in files:
-                        ext = file.split('.')[-1]
-                        content_type = CONTENT_TYPES[ext]
-                        filename = Path(temp_dir, file)
-                        key = f'{post.shortcode}__{file}'
-                    
-                        with open(filename, 'rb') as f:
-                            blob = f.read()
-                
-                        archived_url = self.archive_blob(blob = blob, content_type = content_type, key = key)
-                        archived_urls[post_url] = archived_url
+                if archive_media:
+                    media_blob, content_type, key = self.url_to_blob(url)
+                    archived_url = self.archive_blob(media_blob, content_type, key)
+                    archived_urls[url] = archived_url
 
             yield ScraperResult(
                 scraper=self.__version__,
@@ -98,7 +84,7 @@ class InstagramScraper(Scraper):
                     date_archived=datetime.now(timezone.utc),
                     raw_posts=json.dumps(comment_dict, default=str),
                     archived_urls={},
-                    media_archived=archive_media)
+                    media_archived=True)
 
     def can_handle(self, channel):
         if channel.platform == "Instagram" and self.get_username_from_url(channel.url) is not None:
@@ -126,7 +112,20 @@ class InstagramScraper(Scraper):
         profile['followees'] = user_profile.followees
 
         return RawChannelInfo(scraper=self.__version__,
-                        platform=channel.platform,
-                        channel=channel.id,
-                        raw_data=json.dumps(profile),
-                        date_archived=datetime.now(timezone.utc))
+            platform=channel.platform,
+            channel=channel.id,
+            raw_data=json.dumps(profile),
+            date_archived=datetime.now(timezone.utc))
+
+def get_archived_urls_from_post(post: instaloader.Post) -> List[str]:
+    typename = post._node['__typename']
+    if typename == 'GraphImage':
+        urls = [post._node['display_url']]
+    elif typename == 'GraphVideo':
+        urls = [post._node['video_url']]
+    elif typename == 'GraphSidecar':
+        urls = [edge['node']['display_url'] for edge in post._node['edge_sidecar_to_children']['edges']]
+    else:
+        raise NotImplementedError(f'post of type {typename} is currently not supported.')
+        
+    return {url : None for url in urls}
\ No newline at end of file
diff --git a/cisticola/scraper/odysee.py b/cisticola/scraper/odysee.py
index 0f5db65..020a2ba 100644
--- a/cisticola/scraper/odysee.py
+++ b/cisticola/scraper/odysee.py
@@ -36,10 +36,11 @@ class OdyseeScraper(Scraper):
             if since is not None and datetime.fromtimestamp(video.info['created']) <= since.date:
                 break
 
-            archived_urls = {}
+            url = video.info['streaming_url']
+
+            archived_urls = {url: None}
 
             if archive_media:
-                url = video.info['streaming_url']
 
                 # Check if file is a video file or an m3u8 file
                 r = requests.head(url)
@@ -77,6 +78,21 @@ class OdyseeScraper(Scraper):
                     archived_urls={},
                     media_archived=True)
 
+    def archive_files(self, result: ScraperResult) -> ScraperResult:
+        for url in result.archived_urls:
+            if result.archived_urls[url] is None:
+                r = requests.head(url)
+                if r.headers['Content-Type'] == 'text/html; charset=utf-8':
+                    media_blob, content_type, key = self.m3u8_url_to_blob(url)
+                else:
+                    media_blob, content_type, key = self.url_to_blob(url)
+
+                archived_url = self.archive_blob(media_blob, content_type, key)
+                result.archived_urls[url] = archived_url
+
+        result.media_archived = True
+        return result
+
     def can_handle(self, channel):
         if channel.platform == "Odysee" and self.get_username_from_url(channel.url) is not None:
             return True
@@ -94,7 +110,7 @@ class OdyseeScraper(Scraper):
         profile = odysee_channel.info
 
         return RawChannelInfo(scraper=self.__version__,
-                        platform=channel.platform,
-                        channel=channel.id,
-                        raw_data=json.dumps(profile),
-                        date_archived=datetime.now(timezone.utc))
\ No newline at end of file
+            platform=channel.platform,
+            channel=channel.id,
+            raw_data=json.dumps(profile),
+            date_archived=datetime.now(timezone.utc))
\ No newline at end of file
diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py
index cb24c57..2a4d968 100644
--- a/cisticola/scraper/rumble.py
+++ b/cisticola/scraper/rumble.py
@@ -19,18 +19,18 @@ class RumbleScraper(Scraper):
         scraper = get_channel_videos(channel.url)
 
         for post in scraper:
-            if since is not None and post['datetime'].replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
+            if since is not None and post['datetime'].replace(tzinfo=timezone.utc) <= since.date.replace(tzinfo=timezone.utc):
                 break
 
-            archived_urls = {}
+            url = post['media_url']
+
+            archived_urls = {url: None}
 
             if archive_media:
 
-                url = post['media_url']
-
                 media_blob, content_type, key = self.ytdlp_url_to_blob(url)
                 archived_url = self.archive_blob(media_blob, content_type, key)
-                archived_urls[post['media_url']] = archived_url
+                archived_urls[url] = archived_url
 
             yield ScraperResult(
                 scraper=self.__version__,
@@ -48,6 +48,16 @@ class RumbleScraper(Scraper):
         key = urlparse(url).path.split('/')[-2] + ext
         return key 
 
+    def archive_files(self, result: ScraperResult) -> ScraperResult:
+        for url in result.archived_urls:
+            if result.archived_urls[url] is None:
+                media_blob, content_type, key = self.ytdlp_url_to_blob(url)
+                archived_url = self.archive_blob(media_blob, content_type, key)
+                result.archived_urls[url] = archived_url
+
+        result.media_archived = True
+        return result
+
     def can_handle(self, channel):
         if channel.platform == "Rumble" and channel.url is not None:
             return True
@@ -57,10 +67,10 @@ class RumbleScraper(Scraper):
         profile = get_channel_profile(url = channel.url)
 
         return RawChannelInfo(scraper=self.__version__,
-                        platform=channel.platform,
-                        channel=channel.id,
-                        raw_data=json.dumps(profile),
-                        date_archived=datetime.now(timezone.utc))
+            platform=channel.platform,
+            channel=channel.id,
+            raw_data=json.dumps(profile),
+            date_archived=datetime.now(timezone.utc))
 
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
 
diff --git a/cisticola/scraper/telegram_snscrape.py b/cisticola/scraper/telegram_snscrape.py
index e683296..e272db5 100644
--- a/cisticola/scraper/telegram_snscrape.py
+++ b/cisticola/scraper/telegram_snscrape.py
@@ -33,8 +33,8 @@ class TelegramSnscrapeScraper(Scraper):
             for image_url in post.images:
                 archived_urls[image_url] = None
 
-            if post.video:
-                archived_urls[post.video] = None
+            for video_url in post.videos:
+                archived_urls[video_url] = None
 
             if archive_media:
                 for url in archived_urls:
diff --git a/cisticola/scraper/twitter.py b/cisticola/scraper/twitter.py
index 6ed37db..1d00d53 100644
--- a/cisticola/scraper/twitter.py
+++ b/cisticola/scraper/twitter.py
@@ -14,7 +14,7 @@ class TwitterScraper(Scraper):
 
     def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
         if channel.platform_id:
-            identifier = channel.platform_id
+            identifier = int(channel.platform_id)
         else:
             identifier = channel.screenname
 
@@ -23,7 +23,7 @@ class TwitterScraper(Scraper):
         first = True
 
         for tweet in scraper.get_items():
-            if since is not None and tweet.date.replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
+            if since is not None and tweet.date.replace(tzinfo=timezone.utc) <= since.date.replace(tzinfo=timezone.utc):
                 # with TwitterProfileScraper, the first tweet could be an old pinned tweet
                 if first:
                     first = False
@@ -105,7 +105,7 @@ class TwitterScraper(Scraper):
             raise ChannelDoesNotExistError(channel.url)
         else:   
             return RawChannelInfo(scraper=self.__version__,
-            platform=channel.platform,
-            channel=channel.id,
-            raw_data=json.dumps(entity.__dict__, default=str),
-            date_archived=datetime.now(timezone.utc))
+                platform=channel.platform,
+                channel=channel.id,
+                raw_data=json.dumps(entity.__dict__, default=str),
+                date_archived=datetime.now(timezone.utc))
diff --git a/cisticola/scraper/vkontakte.py b/cisticola/scraper/vkontakte.py
index 3f23bca..5e3d5d3 100644
--- a/cisticola/scraper/vkontakte.py
+++ b/cisticola/scraper/vkontakte.py
@@ -1,8 +1,12 @@
 from datetime import datetime, timezone
 from typing import Generator
 from urllib.parse import urlparse
+import json 
+import re 
+
 from snscrape.modules.vkontakte import VKontakteUserScraper
 from loguru import logger
+from yt_dlp.extractor.vk import VKIE
 
 from cisticola.base import Channel, ScraperResult, RawChannelInfo
 from cisticola.scraper.base import Scraper
@@ -24,7 +28,7 @@ class VkontakteScraper(Scraper):
         first = True
 
         for post in scraper.get_items():
-            if since is not None and datetime.fromordinal(post.date.toordinal()).replace(tzinfo=timezone.utc) <= since.date_archived.replace(tzinfo=timezone.utc):
+            if since is not None and datetime.fromordinal(post.date.toordinal()).replace(tzinfo=timezone.utc) <= since.date.replace(tzinfo=timezone.utc):
                 # with VKontakteUserScraper, the first tweet could be an old pinned tweet
                 if first:
                     first = False
@@ -34,23 +38,26 @@ class VkontakteScraper(Scraper):
 
             archived_urls = {}
 
-            if archive_media:
+            if post.photos:
 
-                if post.photos:
+                for photo in post.photos:
+                    variant = max(
+                        [v for v in photo.variants], key=lambda v: v.width * v.height)
+                    url = variant.url
+                    if url is not None:
+                        archived_urls[url] = None
 
-                    for photo in post.photos:
-                        variant = max(
-                            [v for v in photo.variants], key=lambda v: v.width * v.height)
-                        url = variant.url
-                
-                        if url is not None:
-                            media_blob, content_type, key = self.url_to_blob(url)
-                            archived_url = self.archive_blob(media_blob, content_type, key)
-                            archived_urls[url] = archived_url
+            if post.video:
+                archived_urls[post.video.url] = None
 
-                if post.video:
-                    url = post.video.url
-                    media_blob, content_type, key = self.ytdlp_url_to_blob(url)
+            for url in archived_urls.keys():
+
+                if archive_media:
+                    if re.match(VKIE._VALID_URL, url):
+                        # Uses regex from yt_dlp to verify VK video URL
+                        media_blob, content_type, key = self.ytdlp_url_to_blob(url)
+                    else:
+                        media_blob, content_type, key = self.url_to_blob(url)
                     archived_url = self.archive_blob(media_blob, content_type, key)
                     archived_urls[url] = archived_url
 
@@ -65,6 +72,21 @@ class VkontakteScraper(Scraper):
                 archived_urls=archived_urls,
                 media_archived=archive_media)
 
+    def archive_files(self, result: ScraperResult) -> ScraperResult:
+        for url in result.archived_urls:
+            if result.archived_urls[url] is None:
+                if re.match(VKIE._VALID_URL, url):
+                    # Uses regex from yt_dlp to verify VK video URL
+                    media_blob, content_type, key = self.ytdlp_url_to_blob(url)
+                else:
+                    media_blob, content_type, key = self.url_to_blob(url)
+                archived_url = self.archive_blob(media_blob, content_type, key)
+                result.archived_urls[url] = archived_url
+
+        result.media_archived = True
+        return result
+
+
     def can_handle(self, channel):
         if channel.platform == "Vkontakte" and channel.platform_id:
             return True
@@ -87,7 +109,7 @@ class VkontakteScraper(Scraper):
         profile = scraper._get_entity().__dict__
 
         return RawChannelInfo(scraper=self.__version__,
-                    platform=channel.platform,
-                    channel=channel.id,
-                    raw_data=json.dumps(profile),
-                    date_archived=datetime.now(timezone.utc))
+            platform=channel.platform,
+            channel=channel.id,
+            raw_data=json.dumps(profile),
+            date_archived=datetime.now(timezone.utc))
diff --git a/cisticola/scraper/youtube.py b/cisticola/scraper/youtube.py
index 0d4c8e3..f937d24 100644
--- a/cisticola/scraper/youtube.py
+++ b/cisticola/scraper/youtube.py
@@ -2,7 +2,11 @@ from datetime import datetime, timezone
 import json
 from typing import Generator
 import tempfile
+from pathlib import Path
+import os
+
 import yt_dlp
+from loguru import logger
 
 from cisticola.base import Channel, ScraperResult, RawChannelInfo
 from cisticola.scraper import Scraper
@@ -46,7 +50,10 @@ class YoutubeScraper(Scraper):
                         
                 for video in valid_videos:
 
-                    archived_urls = {}
+                    url = video['webpage_url']
+
+                    archived_urls = {url: None}
+                    
                     video_id = video["id"]
                     video_ext = video["ext"]
 
@@ -54,11 +61,8 @@ class YoutubeScraper(Scraper):
                     
                         key = f"{video_id}.{video_ext}"
 
-                        with open(f"{temp_dir}/{key}", "rb") as f:
+                        with open(Path(temp_dir)/key, "rb") as f:
                             media_blob = f.read()
-                        archived_url = self.archive_blob(media_blob, content_type, key)
-
-                        url = video['webpage_url']
 
                         archived_url = self.archive_blob(media_blob, content_type, key)
                         archived_urls[url] = archived_url
@@ -78,6 +82,41 @@ class YoutubeScraper(Scraper):
         if channel.platform == "Youtube" and channel.url:
             return True
 
+    def archive_files(self, result: ScraperResult) -> ScraperResult:
+        for url in result.archived_urls:
+            if result.archived_urls[url] is None:
+
+                media_blob = None
+
+                with tempfile.TemporaryDirectory() as temp_dir:
+
+                    ydl_opts = {
+                        "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
+                        "merge_output_format": "mp4",
+                        "outtmpl": f"{temp_dir}/%(id)s.%(ext)s"}
+
+                    ydl = yt_dlp.YoutubeDL(ydl_opts)
+
+                    try:
+                        ydl.download(url)
+                    except yt_dlp.utils.DownloadError as e:
+                        raise e
+                        
+                    files = os.listdir(temp_dir)
+                    if len(files) != 1:
+                        logger.warning(f'{len(files)} files downloaded for video: {url}')
+                    key = files[0]
+                    with open(Path(temp_dir, key), 'rb') as f:
+                        media_blob = f.read()
+
+                if media_blob is not None:
+                    content_type = 'video/mp4'            
+                    archived_url = self.archive_blob(media_blob, content_type, key)
+                    result.archived_urls[url] = archived_url
+
+        result.media_archived = True
+        return result
+
     def get_profile(self, channel: Channel) -> RawChannelInfo:
         ydl_opts = {}
         ydl = yt_dlp.YoutubeDL(ydl_opts)
@@ -87,12 +126,13 @@ class YoutubeScraper(Scraper):
             meta = ydl.extract_info(
                 channel.url,
                 process=False)
+            meta.pop('entries')
 
             return RawChannelInfo(scraper=self.__version__,
-                    platform=channel.platform,
-                    channel=channel.id,
-                    raw_data=json.dumps(meta),
-                    date_archived=datetime.now(timezone.utc))
+                platform=channel.platform,
+                channel=channel.id,
+                raw_data=json.dumps(meta),
+                date_archived=datetime.now(timezone.utc))
 
         except yt_dlp.utils.DownloadError as e:
             raise e
diff --git a/pytest.ini b/pytest.ini
index 744f87d..ae2a8b6 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -12,10 +12,9 @@ addopts =
   --html='reports/tests.html'
   --self-contained-html
 markers = 
-    profile: marks tests for only extracting channel metadata (deselect with '-m 
-    "not profile"')
-    media: marks tests for archiving all media attachments (deselect with '-m 
-    "not media"')
+    profile: marks tests for only extracting channel metadata (deselect with '-m "not profile"')
+    media: marks tests for archiving all media attachments (deselect with '-m "not media"')
+    unarchived: marks tests for archiving all unarchived media attachments (deselect with '-m "not unarchived"')
 filterwarnings =
     ignore:the imp module is deprecated:DeprecationWarning
     ignore:The localize method is no longer necessary, as this time zone supports the fold attribute
diff --git a/tests/conftest.py b/tests/conftest.py
index 3bccf81..684c15d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -99,12 +99,12 @@ RUMBLE_CHANNEL_KWARGS = {
     'notes': ''}
 
 TELEGRAM_CHANNEL_KWARGS = {
-    'name': 'USA Freedom Convoy (test)',
-    'platform_id': -1001799578085,
+    'name': 'South West Ohio Proud Boys (test)',
+    'platform_id': -1001276612436,
     'category': 'test',
     'platform': 'Telegram',
-    'url': 'https://t.me/usafreedomconvoy2022',
-    'screenname': 'usafreedomconvoy2022',
+    'url': 'https://t.me/SouthwestOhioPB',
+    'screenname': 'SouthwestOhioPB',
     'country': 'US',
     'influencer': None,
     'public': True,
diff --git a/tests/scraper/bitchute.py b/tests/scraper/bitchute.py
index 94707ec..62b3ffe 100644
--- a/tests/scraper/bitchute.py
+++ b/tests/scraper/bitchute.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import BitchuteScraper
 
+@pytest.mark.unarchived
 def test_scrape_bitchute_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['bitchute'])]
     controller.register_scraper(scraper = BitchuteScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_bitchute_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_bitchute_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/gab.py b/tests/scraper/gab.py
index d600429..79ba8d7 100644
--- a/tests/scraper/gab.py
+++ b/tests/scraper/gab.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import GabScraper
 
+@pytest.mark.unarchived
 def test_scrape_gab_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['gab'])]
     controller.register_scraper(scraper = GabScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_gab_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_gab_channel(controller, channel_kwargs):
     
diff --git a/tests/scraper/gettr.py b/tests/scraper/gettr.py
index 81a8bb8..352e839 100644
--- a/tests/scraper/gettr.py
+++ b/tests/scraper/gettr.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import GettrScraper
 
+@pytest.mark.unarchived
 def test_scrape_gettr_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['gettr'])]
     controller.register_scraper(scraper = GettrScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_gettr_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_gettr_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/instagram.py b/tests/scraper/instagram.py
index 98a0684..099ab40 100644
--- a/tests/scraper/instagram.py
+++ b/tests/scraper/instagram.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import InstagramScraper
 
+@pytest.mark.unarchived
 def test_scrape_instagram_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['instagram'])]
     controller.register_scraper(scraper = InstagramScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_instagram_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_instagram_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/odysee.py b/tests/scraper/odysee.py
index 84a45f8..9883bdb 100644
--- a/tests/scraper/odysee.py
+++ b/tests/scraper/odysee.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import OdyseeScraper
 
+@pytest.mark.unarchived
 def test_scrape_odysee_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['odysee'])]
     controller.register_scraper(scraper = OdyseeScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_odysee_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_odysee_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/rumble.py b/tests/scraper/rumble.py
index 18c8749..5b01f9c 100644
--- a/tests/scraper/rumble.py
+++ b/tests/scraper/rumble.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import RumbleScraper
 
+@pytest.mark.unarchived
 def test_scrape_rumble_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['rumble'])]
     controller.register_scraper(scraper = RumbleScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_rumble_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_rumble_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/telegram_snscrape.py b/tests/scraper/telegram_snscrape.py
index dbaed43..5dbe151 100644
--- a/tests/scraper/telegram_snscrape.py
+++ b/tests/scraper/telegram_snscrape.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import TelegramSnscrapeScraper
 
+@pytest.mark.unarchived
 def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['telegram'])]
     controller.register_scraper(scraper = TelegramSnscrapeScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_telegram_snscrape_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/telegram_telethon.py b/tests/scraper/telegram_telethon.py
index ee994eb..8dbe9ff 100644
--- a/tests/scraper/telegram_telethon.py
+++ b/tests/scraper/telegram_telethon.py
@@ -3,6 +3,7 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import TelegramTelethonScraper
 
+@pytest.mark.unarchived
 def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
     controller.remove_all_scrapers()
 
@@ -10,6 +11,12 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
     controller.register_scraper(scraper = TelegramTelethonScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_telegram_telethon_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/twitter.py b/tests/scraper/twitter.py
index 97765aa..0a4ad86 100644
--- a/tests/scraper/twitter.py
+++ b/tests/scraper/twitter.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import TwitterScraper
 
+@pytest.mark.unarchived
 def test_scrape_twitter_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['twitter'])]
     controller.register_scraper(scraper = TwitterScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_twitter_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_twitter_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/vkontakte.py b/tests/scraper/vkontakte.py
index 4209c30..12ff12c 100644
--- a/tests/scraper/vkontakte.py
+++ b/tests/scraper/vkontakte.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import VkontakteScraper
 
+@pytest.mark.unarchived
 def test_scrape_vkontakte_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['vkontakte'])]
     controller.register_scraper(scraper = VkontakteScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_vkontakte_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_vkontakte_channel(controller, channel_kwargs):
 
diff --git a/tests/scraper/youtube.py b/tests/scraper/youtube.py
index 1750b08..79ba7c7 100644
--- a/tests/scraper/youtube.py
+++ b/tests/scraper/youtube.py
@@ -3,12 +3,19 @@ import pytest
 from cisticola.base import Channel
 from cisticola.scraper import YoutubeScraper
 
+@pytest.mark.unarchived
 def test_scrape_youtube_channel_no_media(controller, channel_kwargs):
 
     channels = [Channel(**channel_kwargs['youtube'])]
     controller.register_scraper(scraper = YoutubeScraper())
     controller.scrape_channels(channels = channels, archive_media = False)
 
+@pytest.mark.media
+@pytest.mark.unarchived
+def test_scrape_youtube_channel_unarchived_media(controller):
+
+    controller.archive_unarchived_media()
+
 @pytest.mark.media
 def test_scrape_youtube_channel(controller, channel_kwargs):