From e4cf9daf73265fd36ccc243bae8f25432e5bb58f Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Mon, 14 Mar 2022 18:04:27 -0500
Subject: [PATCH 1/4] added docstrings, improved Sphinx docs

---
 Pipfile                                       |   3 +-
 Pipfile.lock                                  | 433 ++++++++++--------
 cisticola/base.py                             | 139 ++++--
 cisticola/scraper/base.py                     | 153 ++++++-
 docs/images/cisticola_logo.svg                |  64 +++
 docs/images/favicon.ico                       | Bin 0 -> 614 bytes
 docs/source/cisticola.base.rst                |   8 +
 docs/source/cisticola.rst                     |  23 +-
 docs/source/cisticola.scraper.base.rst        |   8 +
 docs/source/cisticola.scraper.bitchute.rst    |   8 +
 docs/source/cisticola.scraper.gab.rst         |   8 +
 docs/source/cisticola.scraper.gettr.rst       |   8 +
 docs/source/cisticola.scraper.odysee.rst      |   8 +
 docs/source/cisticola.scraper.rst             |  48 +-
 docs/source/cisticola.scraper.rumble.rst      |   8 +
 .../cisticola.scraper.telegram_snscrape.rst   |   8 +
 .../cisticola.scraper.telegram_telethon.rst   |   8 +
 docs/source/cisticola.scraper.twitter.rst     |   8 +
 docs/source/cisticola.scraper.utils.rst       |   8 +
 docs/source/cisticola.transformer.base.rst    |   8 +
 docs/source/cisticola.transformer.rst         |  24 +-
 docs/source/cisticola.transformer.twitter.rst |   8 +
 docs/source/conf.py                           |  11 +-
 docs/source/index.rst                         |   2 +-
 docs/source/modules.rst                       |   7 -
 25 files changed, 700 insertions(+), 311 deletions(-)
 create mode 100644 docs/images/cisticola_logo.svg
 create mode 100644 docs/images/favicon.ico
 create mode 100644 docs/source/cisticola.base.rst
 create mode 100644 docs/source/cisticola.scraper.base.rst
 create mode 100644 docs/source/cisticola.scraper.bitchute.rst
 create mode 100644 docs/source/cisticola.scraper.gab.rst
 create mode 100644 docs/source/cisticola.scraper.gettr.rst
 create mode 100644 docs/source/cisticola.scraper.odysee.rst
 create mode 100644 docs/source/cisticola.scraper.rumble.rst
 create mode 100644 docs/source/cisticola.scraper.telegram_snscrape.rst
 create mode 100644 docs/source/cisticola.scraper.telegram_telethon.rst
 create mode 100644 docs/source/cisticola.scraper.twitter.rst
 create mode 100644 docs/source/cisticola.scraper.utils.rst
 create mode 100644 docs/source/cisticola.transformer.base.rst
 create mode 100644 docs/source/cisticola.transformer.twitter.rst
 delete mode 100644 docs/source/modules.rst

diff --git a/Pipfile b/Pipfile
index 62f2c74..328faea 100644
--- a/Pipfile
+++ b/Pipfile
@@ -10,7 +10,6 @@ gogettr = "*"
 requests = "*"
 bs4 = "*"
 dateparser = "*"
-sphinx = "*"
 boto3 = "*"
 snscrape = {git = "https://github.com/bellingcat/snscrape.git"}
 ffmpeg-python = "*"
@@ -24,6 +23,8 @@ pytest = "*"
 pytest-cov = "*"
 pytest-html = "*"
 pytest-metadata = "*"
+sphinx = "*"
+sphinx_rtd_theme = "*"
 
 [requires]
 python_version = "3.9"
diff --git a/Pipfile.lock b/Pipfile.lock
index 0ca0eda..f75c07f 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "3d293e1f3802d64ae7a8fbfc4c1d742cc33cd4c520a6263f93e566f89faa7013"
+            "sha256": "495ba305ca55a0ac5754037ba133518b47324965dd3ab0b8db8b69206524d68e"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -16,13 +16,6 @@
         ]
     },
     "default": {
-        "alabaster": {
-            "hashes": [
-                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
-                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
-            ],
-            "version": "==0.7.12"
-        },
         "attrs": {
             "hashes": [
                 "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4",
@@ -31,14 +24,6 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
             "version": "==21.4.0"
         },
-        "babel": {
-            "hashes": [
-                "sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9",
-                "sha256:bc0c176f9f6a994582230df350aa6e05ba2ebe4b3ac317eab29d9be5d2768da0"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==2.9.1"
-        },
         "beautifulsoup4": {
             "hashes": [
                 "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf",
@@ -49,19 +34,19 @@
         },
         "boto3": {
             "hashes": [
-                "sha256:30394729b38d5ce2f845440428a55161c6d45478044e553a12ca1acf56d7278a",
-                "sha256:895489900eb882777124c3b64a13df49785cf77f7bd1504e783464fb3b4c8163"
+                "sha256:8d6f3c548f0ee03d742f404c96515e7579fc6968135aaa50dd855a046698ff79",
+                "sha256:d857feb6af9932e1ee3a748060a2cd9fd6043dbbccf66976eda54586597efdc0"
             ],
             "index": "pypi",
-            "version": "==1.21.15"
+            "version": "==1.21.18"
         },
         "botocore": {
             "hashes": [
-                "sha256:405082f92a9e524e1aee96cbc90134668026d7da3c12f86990c91a12620ca28b",
-                "sha256:fa4816e94e72111a9341204061e760bcbde74ca5d900d3f2206c2c2e8e4b56e4"
+                "sha256:7ea8ef1ff7c4882ab59b337662f90ddf5ea860e95e7e209dca593a34ea585b1b",
+                "sha256:d2da7ccbc5ddd61fe3cd45fcbd3de380d9e3a15bfa8fbfd2d9259a93dcc60c56"
             ],
             "markers": "python_version >= '3.6'",
-            "version": "==1.24.15"
+            "version": "==1.24.18"
         },
         "bs4": {
             "hashes": [
@@ -101,14 +86,6 @@
             "index": "pypi",
             "version": "==1.1.0"
         },
-        "docutils": {
-            "hashes": [
-                "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125",
-                "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
-            "version": "==0.17.1"
-        },
         "ffmpeg-python": {
             "hashes": [
                 "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127",
@@ -216,22 +193,6 @@
             "markers": "python_version >= '3'",
             "version": "==3.3"
         },
-        "imagesize": {
-            "hashes": [
-                "sha256:1db2f82529e53c3e929e8926a1fa9235aa82d0bd0c580359c67ec31b2fddaa8c",
-                "sha256:cd1750d452385ca327479d45b64d9c7729ecf0b3969a58148298c77092261f9d"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==1.3.0"
-        },
-        "importlib-metadata": {
-            "hashes": [
-                "sha256:b36ffa925fe3139b2f6ff11d6925ffd4fa7bc47870165e3ac260ac7b4f91e6ac",
-                "sha256:d16e8c1deb60de41b8e8ed21c1a7b947b0bc62fab7e1d470bcdf331cea2e6735"
-            ],
-            "markers": "python_version < '3.10'",
-            "version": "==4.11.2"
-        },
         "iniconfig": {
             "hashes": [
                 "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
@@ -239,14 +200,6 @@
             ],
             "version": "==1.1.1"
         },
-        "jinja2": {
-            "hashes": [
-                "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
-                "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==3.0.3"
-        },
         "jmespath": {
             "hashes": [
                 "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9",
@@ -330,52 +283,6 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
             "version": "==4.8.0"
         },
-        "markupsafe": {
-            "hashes": [
-                "sha256:023af8c54fe63530545f70dd2a2a7eed18d07a9a77b94e8bf1e2ff7f252db9a3",
-                "sha256:09c86c9643cceb1d87ca08cdc30160d1b7ab49a8a21564868921959bd16441b8",
-                "sha256:142119fb14a1ef6d758912b25c4e803c3ff66920635c44078666fe7cc3f8f759",
-                "sha256:1d1fb9b2eec3c9714dd936860850300b51dbaa37404209c8d4cb66547884b7ed",
-                "sha256:204730fd5fe2fe3b1e9ccadb2bd18ba8712b111dcabce185af0b3b5285a7c989",
-                "sha256:24c3be29abb6b34052fd26fc7a8e0a49b1ee9d282e3665e8ad09a0a68faee5b3",
-                "sha256:290b02bab3c9e216da57c1d11d2ba73a9f73a614bbdcc027d299a60cdfabb11a",
-                "sha256:3028252424c72b2602a323f70fbf50aa80a5d3aa616ea6add4ba21ae9cc9da4c",
-                "sha256:30c653fde75a6e5eb814d2a0a89378f83d1d3f502ab710904ee585c38888816c",
-                "sha256:3cace1837bc84e63b3fd2dfce37f08f8c18aeb81ef5cf6bb9b51f625cb4e6cd8",
-                "sha256:4056f752015dfa9828dce3140dbadd543b555afb3252507348c493def166d454",
-                "sha256:454ffc1cbb75227d15667c09f164a0099159da0c1f3d2636aa648f12675491ad",
-                "sha256:598b65d74615c021423bd45c2bc5e9b59539c875a9bdb7e5f2a6b92dfcfc268d",
-                "sha256:599941da468f2cf22bf90a84f6e2a65524e87be2fce844f96f2dd9a6c9d1e635",
-                "sha256:5ddea4c352a488b5e1069069f2f501006b1a4362cb906bee9a193ef1245a7a61",
-                "sha256:62c0285e91414f5c8f621a17b69fc0088394ccdaa961ef469e833dbff64bd5ea",
-                "sha256:679cbb78914ab212c49c67ba2c7396dc599a8479de51b9a87b174700abd9ea49",
-                "sha256:6e104c0c2b4cd765b4e83909cde7ec61a1e313f8a75775897db321450e928cce",
-                "sha256:736895a020e31b428b3382a7887bfea96102c529530299f426bf2e636aacec9e",
-                "sha256:75bb36f134883fdbe13d8e63b8675f5f12b80bb6627f7714c7d6c5becf22719f",
-                "sha256:7d2f5d97fcbd004c03df8d8fe2b973fe2b14e7bfeb2cfa012eaa8759ce9a762f",
-                "sha256:80beaf63ddfbc64a0452b841d8036ca0611e049650e20afcb882f5d3c266d65f",
-                "sha256:84ad5e29bf8bab3ad70fd707d3c05524862bddc54dc040982b0dbcff36481de7",
-                "sha256:8da5924cb1f9064589767b0f3fc39d03e3d0fb5aa29e0cb21d43106519bd624a",
-                "sha256:961eb86e5be7d0973789f30ebcf6caab60b844203f4396ece27310295a6082c7",
-                "sha256:96de1932237abe0a13ba68b63e94113678c379dca45afa040a17b6e1ad7ed076",
-                "sha256:a0a0abef2ca47b33fb615b491ce31b055ef2430de52c5b3fb19a4042dbc5cadb",
-                "sha256:b2a5a856019d2833c56a3dcac1b80fe795c95f401818ea963594b345929dffa7",
-                "sha256:b8811d48078d1cf2a6863dafb896e68406c5f513048451cd2ded0473133473c7",
-                "sha256:c532d5ab79be0199fa2658e24a02fce8542df196e60665dd322409a03db6a52c",
-                "sha256:d3b64c65328cb4cd252c94f83e66e3d7acf8891e60ebf588d7b493a55a1dbf26",
-                "sha256:d4e702eea4a2903441f2735799d217f4ac1b55f7d8ad96ab7d4e25417cb0827c",
-                "sha256:d5653619b3eb5cbd35bfba3c12d575db2a74d15e0e1c08bf1db788069d410ce8",
-                "sha256:d66624f04de4af8bbf1c7f21cc06649c1c69a7f84109179add573ce35e46d448",
-                "sha256:e67ec74fada3841b8c5f4c4f197bea916025cb9aa3fe5abf7d52b655d042f956",
-                "sha256:e6f7f3f41faffaea6596da86ecc2389672fa949bd035251eab26dc6697451d05",
-                "sha256:f02cf7221d5cd915d7fa58ab64f7ee6dd0f6cddbb48683debf5d04ae9b1c2cc1",
-                "sha256:f0eddfcabd6936558ec020130f932d479930581171368fd728efcfb6ef0dd357",
-                "sha256:fabbe18087c3d33c5824cb145ffca52eccd053061df1d79d4b66dafa5ad2a5ea",
-                "sha256:fc3150f85e2dbcf99e65238c842d1cfe69d3e7649b19864c1cc043213d9cd730"
-            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==2.1.0"
-        },
         "numpy": {
             "hashes": [
                 "sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676",
@@ -395,6 +302,7 @@
                 "sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18",
                 "sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62",
                 "sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe",
+                "sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430",
                 "sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802",
                 "sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"
             ],
@@ -480,14 +388,6 @@
             ],
             "version": "==0.4.8"
         },
-        "pygments": {
-            "hashes": [
-                "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65",
-                "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"
-            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==2.11.2"
-        },
         "pyparsing": {
             "hashes": [
                 "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea",
@@ -506,11 +406,11 @@
         },
         "pytest": {
             "hashes": [
-                "sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db",
-                "sha256:e30905a0c131d3d94b89624a1cc5afec3e0ba2fbdb151867d8e0ebd49850f171"
+                "sha256:b555252a95bbb2a37a97b5ac2eb050c436f7989993565f5e0c9128fcaacadd0e",
+                "sha256:f1089d218cfcc63a212c42896f1b7fbf096874d045e1988186861a1a87d27b47"
             ],
-            "markers": "python_version >= '3.6'",
-            "version": "==7.0.1"
+            "markers": "python_version >= '3.7'",
+            "version": "==7.1.0"
         },
         "python-dateutil": {
             "hashes": [
@@ -628,7 +528,7 @@
                 "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17",
                 "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"
             ],
-            "markers": "python_version >= '3.6' and python_version < '4.0'",
+            "markers": "python_version >= '3.6' and python_version < '4'",
             "version": "==4.8"
         },
         "s3transfer": {
@@ -647,13 +547,6 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==1.16.0"
         },
-        "snowballstemmer": {
-            "hashes": [
-                "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1",
-                "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"
-            ],
-            "version": "==2.2.0"
-        },
         "snscrape": {
             "git": "https://github.com/bellingcat/snscrape.git",
             "ref": "de4ebed81f3f6a4bb4c65630daab6ec63784959b"
@@ -666,62 +559,6 @@
             "markers": "python_version >= '3.6'",
             "version": "==2.3.1"
         },
-        "sphinx": {
-            "hashes": [
-                "sha256:5da895959511473857b6d0200f56865ed62c31e8f82dd338063b84ec022701fe",
-                "sha256:6caad9786055cb1fa22b4a365c1775816b876f91966481765d7d50e9f0dd35cc"
-            ],
-            "index": "pypi",
-            "version": "==4.4.0"
-        },
-        "sphinxcontrib-applehelp": {
-            "hashes": [
-                "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a",
-                "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"
-            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==1.0.2"
-        },
-        "sphinxcontrib-devhelp": {
-            "hashes": [
-                "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e",
-                "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"
-            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==1.0.2"
-        },
-        "sphinxcontrib-htmlhelp": {
-            "hashes": [
-                "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07",
-                "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==2.0.0"
-        },
-        "sphinxcontrib-jsmath": {
-            "hashes": [
-                "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178",
-                "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"
-            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==1.0.1"
-        },
-        "sphinxcontrib-qthelp": {
-            "hashes": [
-                "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72",
-                "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"
-            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==1.0.3"
-        },
-        "sphinxcontrib-serializinghtml": {
-            "hashes": [
-                "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd",
-                "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"
-            ],
-            "markers": "python_version >= '3.5'",
-            "version": "==1.1.5"
-        },
         "sqlalchemy": {
             "hashes": [
                 "sha256:04164e0063feb7aedd9d073db0fd496edb244be40d46ea1f0d8990815e4b8c34",
@@ -800,7 +637,7 @@
                 "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
                 "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'",
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
             "version": "==1.26.8"
         },
         "youtube-dl": {
@@ -810,17 +647,16 @@
             ],
             "index": "pypi",
             "version": "==2021.12.17"
-        },
-        "zipp": {
-            "hashes": [
-                "sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d",
-                "sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375"
-            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==3.7.0"
         }
     },
     "develop": {
+        "alabaster": {
+            "hashes": [
+                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
+                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
+            ],
+            "version": "==0.7.12"
+        },
         "attrs": {
             "hashes": [
                 "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4",
@@ -829,6 +665,29 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
             "version": "==21.4.0"
         },
+        "babel": {
+            "hashes": [
+                "sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9",
+                "sha256:bc0c176f9f6a994582230df350aa6e05ba2ebe4b3ac317eab29d9be5d2768da0"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==2.9.1"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
+                "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
+            ],
+            "version": "==2021.10.8"
+        },
+        "charset-normalizer": {
+            "hashes": [
+                "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597",
+                "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"
+            ],
+            "markers": "python_version >= '3'",
+            "version": "==2.0.12"
+        },
         "coverage": {
             "extras": [
                 "toml"
@@ -879,6 +738,38 @@
             "markers": "python_version >= '3.7'",
             "version": "==6.3.2"
         },
+        "docutils": {
+            "hashes": [
+                "sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125",
+                "sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==0.17.1"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
+                "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
+            ],
+            "markers": "python_version >= '3'",
+            "version": "==3.3"
+        },
+        "imagesize": {
+            "hashes": [
+                "sha256:1db2f82529e53c3e929e8926a1fa9235aa82d0bd0c580359c67ec31b2fddaa8c",
+                "sha256:cd1750d452385ca327479d45b64d9c7729ecf0b3969a58148298c77092261f9d"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.3.0"
+        },
+        "importlib-metadata": {
+            "hashes": [
+                "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6",
+                "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539"
+            ],
+            "markers": "python_version < '3.10'",
+            "version": "==4.11.3"
+        },
         "iniconfig": {
             "hashes": [
                 "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
@@ -886,6 +777,60 @@
             ],
             "version": "==1.1.1"
         },
+        "jinja2": {
+            "hashes": [
+                "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
+                "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==3.0.3"
+        },
+        "markupsafe": {
+            "hashes": [
+                "sha256:023af8c54fe63530545f70dd2a2a7eed18d07a9a77b94e8bf1e2ff7f252db9a3",
+                "sha256:09c86c9643cceb1d87ca08cdc30160d1b7ab49a8a21564868921959bd16441b8",
+                "sha256:142119fb14a1ef6d758912b25c4e803c3ff66920635c44078666fe7cc3f8f759",
+                "sha256:1d1fb9b2eec3c9714dd936860850300b51dbaa37404209c8d4cb66547884b7ed",
+                "sha256:204730fd5fe2fe3b1e9ccadb2bd18ba8712b111dcabce185af0b3b5285a7c989",
+                "sha256:24c3be29abb6b34052fd26fc7a8e0a49b1ee9d282e3665e8ad09a0a68faee5b3",
+                "sha256:290b02bab3c9e216da57c1d11d2ba73a9f73a614bbdcc027d299a60cdfabb11a",
+                "sha256:3028252424c72b2602a323f70fbf50aa80a5d3aa616ea6add4ba21ae9cc9da4c",
+                "sha256:30c653fde75a6e5eb814d2a0a89378f83d1d3f502ab710904ee585c38888816c",
+                "sha256:3cace1837bc84e63b3fd2dfce37f08f8c18aeb81ef5cf6bb9b51f625cb4e6cd8",
+                "sha256:4056f752015dfa9828dce3140dbadd543b555afb3252507348c493def166d454",
+                "sha256:454ffc1cbb75227d15667c09f164a0099159da0c1f3d2636aa648f12675491ad",
+                "sha256:598b65d74615c021423bd45c2bc5e9b59539c875a9bdb7e5f2a6b92dfcfc268d",
+                "sha256:599941da468f2cf22bf90a84f6e2a65524e87be2fce844f96f2dd9a6c9d1e635",
+                "sha256:5ddea4c352a488b5e1069069f2f501006b1a4362cb906bee9a193ef1245a7a61",
+                "sha256:62c0285e91414f5c8f621a17b69fc0088394ccdaa961ef469e833dbff64bd5ea",
+                "sha256:679cbb78914ab212c49c67ba2c7396dc599a8479de51b9a87b174700abd9ea49",
+                "sha256:6e104c0c2b4cd765b4e83909cde7ec61a1e313f8a75775897db321450e928cce",
+                "sha256:736895a020e31b428b3382a7887bfea96102c529530299f426bf2e636aacec9e",
+                "sha256:75bb36f134883fdbe13d8e63b8675f5f12b80bb6627f7714c7d6c5becf22719f",
+                "sha256:7d2f5d97fcbd004c03df8d8fe2b973fe2b14e7bfeb2cfa012eaa8759ce9a762f",
+                "sha256:80beaf63ddfbc64a0452b841d8036ca0611e049650e20afcb882f5d3c266d65f",
+                "sha256:84ad5e29bf8bab3ad70fd707d3c05524862bddc54dc040982b0dbcff36481de7",
+                "sha256:8da5924cb1f9064589767b0f3fc39d03e3d0fb5aa29e0cb21d43106519bd624a",
+                "sha256:961eb86e5be7d0973789f30ebcf6caab60b844203f4396ece27310295a6082c7",
+                "sha256:96de1932237abe0a13ba68b63e94113678c379dca45afa040a17b6e1ad7ed076",
+                "sha256:a0a0abef2ca47b33fb615b491ce31b055ef2430de52c5b3fb19a4042dbc5cadb",
+                "sha256:b2a5a856019d2833c56a3dcac1b80fe795c95f401818ea963594b345929dffa7",
+                "sha256:b8811d48078d1cf2a6863dafb896e68406c5f513048451cd2ded0473133473c7",
+                "sha256:c532d5ab79be0199fa2658e24a02fce8542df196e60665dd322409a03db6a52c",
+                "sha256:d3b64c65328cb4cd252c94f83e66e3d7acf8891e60ebf588d7b493a55a1dbf26",
+                "sha256:d4e702eea4a2903441f2735799d217f4ac1b55f7d8ad96ab7d4e25417cb0827c",
+                "sha256:d5653619b3eb5cbd35bfba3c12d575db2a74d15e0e1c08bf1db788069d410ce8",
+                "sha256:d66624f04de4af8bbf1c7f21cc06649c1c69a7f84109179add573ce35e46d448",
+                "sha256:e67ec74fada3841b8c5f4c4f197bea916025cb9aa3fe5abf7d52b655d042f956",
+                "sha256:e6f7f3f41faffaea6596da86ecc2389672fa949bd035251eab26dc6697451d05",
+                "sha256:f02cf7221d5cd915d7fa58ab64f7ee6dd0f6cddbb48683debf5d04ae9b1c2cc1",
+                "sha256:f0eddfcabd6936558ec020130f932d479930581171368fd728efcfb6ef0dd357",
+                "sha256:fabbe18087c3d33c5824cb145ffca52eccd053061df1d79d4b66dafa5ad2a5ea",
+                "sha256:fc3150f85e2dbcf99e65238c842d1cfe69d3e7649b19864c1cc043213d9cd730"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==2.1.0"
+        },
         "packaging": {
             "hashes": [
                 "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
@@ -910,6 +855,14 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
             "version": "==1.11.0"
         },
+        "pygments": {
+            "hashes": [
+                "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65",
+                "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==2.11.2"
+        },
         "pyparsing": {
             "hashes": [
                 "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea",
@@ -920,11 +873,11 @@
         },
         "pytest": {
             "hashes": [
-                "sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db",
-                "sha256:e30905a0c131d3d94b89624a1cc5afec3e0ba2fbdb151867d8e0ebd49850f171"
+                "sha256:b555252a95bbb2a37a97b5ac2eb050c436f7989993565f5e0c9128fcaacadd0e",
+                "sha256:f1089d218cfcc63a212c42896f1b7fbf096874d045e1988186861a1a87d27b47"
             ],
-            "markers": "python_version >= '3.6'",
-            "version": "==7.0.1"
+            "markers": "python_version >= '3.7'",
+            "version": "==7.1.0"
         },
         "pytest-cov": {
             "hashes": [
@@ -950,6 +903,92 @@
             "index": "pypi",
             "version": "==1.11.0"
         },
+        "pytz": {
+            "hashes": [
+                "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c",
+                "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"
+            ],
+            "version": "==2021.3"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
+                "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
+            ],
+            "index": "pypi",
+            "version": "==2.27.1"
+        },
+        "snowballstemmer": {
+            "hashes": [
+                "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1",
+                "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"
+            ],
+            "version": "==2.2.0"
+        },
+        "sphinx": {
+            "hashes": [
+                "sha256:5da895959511473857b6d0200f56865ed62c31e8f82dd338063b84ec022701fe",
+                "sha256:6caad9786055cb1fa22b4a365c1775816b876f91966481765d7d50e9f0dd35cc"
+            ],
+            "index": "pypi",
+            "version": "==4.4.0"
+        },
+        "sphinx-rtd-theme": {
+            "hashes": [
+                "sha256:4d35a56f4508cfee4c4fb604373ede6feae2a306731d533f409ef5c3496fdbd8",
+                "sha256:eec6d497e4c2195fa0e8b2016b337532b8a699a68bcb22a512870e16925c6a5c"
+            ],
+            "index": "pypi",
+            "version": "==1.0.0"
+        },
+        "sphinxcontrib-applehelp": {
+            "hashes": [
+                "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a",
+                "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.2"
+        },
+        "sphinxcontrib-devhelp": {
+            "hashes": [
+                "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e",
+                "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.2"
+        },
+        "sphinxcontrib-htmlhelp": {
+            "hashes": [
+                "sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07",
+                "sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.0.0"
+        },
+        "sphinxcontrib-jsmath": {
+            "hashes": [
+                "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178",
+                "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.1"
+        },
+        "sphinxcontrib-qthelp": {
+            "hashes": [
+                "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72",
+                "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.0.3"
+        },
+        "sphinxcontrib-serializinghtml": {
+            "hashes": [
+                "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd",
+                "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.1.5"
+        },
         "tomli": {
             "hashes": [
                 "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
@@ -957,6 +996,22 @@
             ],
             "markers": "python_version >= '3.7'",
             "version": "==2.0.1"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
+                "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
+            "version": "==1.26.8"
+        },
+        "zipp": {
+            "hashes": [
+                "sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d",
+                "sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375"
+            ],
+            "markers": "python_version >= '3.7'",
+            "version": "==3.7.0"
         }
     }
 }
diff --git a/cisticola/base.py b/cisticola/base.py
index 97a18df..2c9ad83 100644
--- a/cisticola/base.py
+++ b/cisticola/base.py
@@ -5,21 +5,118 @@ from datetime import datetime
 from sqlalchemy.orm import registry
 from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey
 
-mapper_registry = registry()
-
 @dataclass
 class ScraperResult:
-    """A minimally processed result from a scraper"""
+    """A minimally processed result from a scraper
+    """
 
+    #: String specifying name and version of scraper used to generate result, e.g. ``"TwitterScraper 0.0.1"``.
     scraper: str
+
+    #: Name of platform from which result was scraped, e.g. ``"Twitter"``.
     platform: str
-    channel: int #TODO there is probably a way of making this a Channel object foreign key
+
+    #TODO there is probably a way of making this a Channel object foreign key
+    #: User-specified integer that uniquely identifies a channel, e.g. ``15``.
+    channel: int
+
+    #: String that uniquely identifies the scraped post on the given platform, e.g. ``"1503397267675533313"``
     platform_id: str
+
+    #: Datetime (relative to UTC) that the scraped post was created at.
     date: datetime
+
+    #: JSON dump of dict that contains all data scraped for the post.
     raw_data: str
+
+    #: Datetime (relative to UTC) that the scraped post was archived at.
     date_archived: datetime
+
+    #: Dict in which the keys are the original media URLs from the post, and the corresponding values are the URLs of the archived media files. 
     archived_urls: dict
 
+@dataclass
+class Channel:
+    """Information about a specific channel to be scraped.
+    """
+
+    #: User-specified integer that uniquely identifies a channel, e.g. ``15``.
+    id: int
+
+    #: Name of channel (different from username because it can be non-unique and contain emojis), e.g. ``T🕊Редакция Президент Гордон🕊"``.
+    name: str
+
+    #: String that uniquely identifies the channel on the given platform, e.g. ``"-1001101170442"``.
+    platform_id: str
+
+    #: User-specified category for the channel, e.g. ``"qanon-adjacent"``.
+    category: str
+
+    #: Number of followers the channel has on the given platform, e.e. ``"1465"``.
+    followers: int
+
+    #: Name of platform the given channel is on, e.g. ``"Telegram"``.
+    platform: str
+
+    #: URL for the given channel on the platform, e.g. ``"https://t.me/prezidentgordonteam"``
+    url: str
+
+    #: Screen name/username of channel.
+    screenname: str
+
+    #: 2 digit country code for the country of origin for the channel, e.g. ``"RU"``.
+    country: str
+
+    #: Name of influencer, if channel belongs to an influencer that operates on multiple platforms.
+    influencer: str
+
+    #: Whether or not the channel is publicly-accessible. 
+    public: bool
+
+    #: Whether or not the channel is a chat (i.e. allows users who are not the channel creator to post/message)
+    chat: bool
+
+    #: Any other additional notes about the channel.
+    notes: str
+
+@dataclass
+class TransformedResult:
+    """An object with fields for columns in the analysis table"""
+
+    #: ID number of the scraped post in the ``raw_data`` table
+    raw_id: int
+
+    #: String specifying name and version of scraper used to generate result, e.g. ``"TwitterScraper 0.0.1"``.
+    scraper: str
+
+    #: String specifying name and version of transformer used to tranform result, e.g. ``"TwitterTransformer 0.0.1"``.
+    transformer: str
+
+    #: Name of platform from which result was scraped, e.g. ``"Twitter"``.
+    platform: str
+
+    #: User-specified integer that uniquely identifies a channel, e.g. ``15``.
+    channel: str
+
+    #: Datetime (relative to UTC) that the scraped post was created at.
+    date: datetime
+
+    #: Datetime (relative to UTC) that the scraped post was archived at.
+    date_archived: datetime
+
+    #: URL of the original post
+    url: str
+
+    #: Text of the original post
+    content: str
+
+    #: String that uniquely identifies the channel on the given platform, e.g. ``"-1001101170442"``.
+    author_id: str
+
+    #: Username of author who made post.
+    author_username: str
+
+mapper_registry = registry()
 
 raw_data_table = Table('raw_data', mapper_registry.metadata,
                        Column('id', Integer, primary_key=True,
@@ -35,40 +132,6 @@ raw_data_table = Table('raw_data', mapper_registry.metadata,
 
 mapper_registry.map_imperatively(ScraperResult, raw_data_table)
 
-
-@dataclass
-class Channel:
-    id: int
-    name: str
-    platform_id: str
-    category: str
-    followers: int
-    platform: str
-    url: str
-    screenname: str
-    country: str
-    influencer: str
-    public: bool
-    chat: bool
-    notes: str
-
-
-@dataclass
-class TransformedResult:
-    """An object with fields for columns in the analysis table"""
-    raw_id: int
-    scraper: str
-    transformer: str
-    platform: str
-    channel: str
-    date: datetime
-    date_archived: datetime
-    url: str
-    content: str
-    author_id: str
-    author_username: str
-
-
 analysis_table = Table('analysis', mapper_registry.metadata,
                        Column('id', Integer, primary_key=True,
                               autoincrement=True),
diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py
index ea68f70..524a729 100644
--- a/cisticola/scraper/base.py
+++ b/cisticola/scraper/base.py
@@ -13,29 +13,73 @@ from cisticola.base import Channel, ScraperResult, mapper_registry
 from cisticola.scraper import make_request
 
 class Scraper:
+    """Base class for defining platform-specific scrapers for scraping all posts 
+    from a given channel on that specific platform. 
+    """
+
     __version__ = "Scraper 0.0.0"
 
     def __init__(self):
-        self.s3_client = boto3.client('s3',
-                                      region_name=os.environ['DO_SPACES_REGION'],
-                                      endpoint_url='https://{}.digitaloceanspaces.com'.format(
-                                          os.environ['DO_SPACES_REGION']),
-                                      aws_access_key_id=os.environ['DO_SPACES_KEY'],
-                                      aws_secret_access_key=os.environ['DO_SPACES_SECRET'])
 
+        # Initialize client to transfer files to the storage archive
+        self.s3_client = boto3.client(
+            service_name='s3',
+            region_name=os.environ['DO_SPACES_REGION'],
+            endpoint_url=f'https://{os.environ["DO_SPACES_REGION"]}.digitaloceanspaces.com',
+            aws_access_key_id=os.environ['DO_SPACES_KEY'],
+            aws_secret_access_key=os.environ['DO_SPACES_SECRET'])
+        
+        # Define request headers (necessary to bypass scraping protection 
+        # for several platform scrapers)
         self.headers = {
             'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0'}
 
-        pass
-
     def __str__(self):
         return self.__version__
 
     def url_to_key(self, url: str, content_type: str) -> str:
+        """Generate a unique identifier for media from a specified post.
+
+        Parameters
+        ---------
+        url: str
+            URL of original post. 
+            e.g. ``"https://twitter.com/bellingcat/status/1503397267675533313"``
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"image/jpeg"``
+
+        Returns
+        -------
+        key: str
+            Unique identifier for the media file from a specified post based on 
+            the original post URL and the media's Content-Type. 
+        """
+
         key = urlparse(url).path.split('/')[-1]
         return key 
 
     def url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
+        """Download media file from a specified post URL.
+
+        Parameters
+        ---------
+        url: str
+            URL of original post. 
+            e.g. ``"https://twitter.com/bellingcat/status/1503397267675533313"``
+        key: str or None
+            Pre-defined unique identifier for the media file.
+
+        Returns
+        -------
+        blob: bytes
+            Raw bytes of the downloaded media file. 
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"image/jpeg"``.
+        key: str
+            Unique identifier for the media file.
+        """
 
         r = make_request(url, headers = self.headers)
 
@@ -48,6 +92,27 @@ class Scraper:
         return blob, content_type, key
 
     def m3u8_url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
+        """Download media file from a specified post URL, where the media file 
+        is formatted as an m3u8 playlist, which is then decoded to an mp4 file.
+
+        Parameters
+        ---------
+        url: str
+            URL of original post. 
+            e.g. ``"https://twitter.com/bellingcat/status/1503397267675533313"``
+        key: str or None
+            Pre-defined unique identifier for the media file.
+
+        Returns
+        -------
+        blob: bytes
+            Raw bytes of the downloaded media file. 
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"video/mp4"``.
+        key: str
+            Unique identifier for the media file.
+        """
         
         content_type = 'video/mp4'
         ext = '.' + content_type.split('/')[-1]
@@ -70,6 +135,23 @@ class Scraper:
         return blob, content_type, key
 
     def archive_blob(self, blob: bytes, content_type: str, key: str) -> str:
+        """Upload raw bytes of a media file to the storage archive. 
+
+        Parameters
+        ----------
+        blob: bytes
+            Raw bytes of the media file to be archived.
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"video/mp4"``.
+        key: str
+            Unique identifier for the media file.
+
+        Returns
+        -------
+        archived_url: str
+            URL specifying the file on the storage archive.
+        """
 
         filename = self.__version__.replace(' ', '_') + '/' + key
 
@@ -81,9 +163,37 @@ class Scraper:
         return archived_url
 
     def can_handle(self, channel: Channel) -> bool:
+        """Whether or not the scraper can scrape the specified channel.
+
+        Parameters
+        ----------
+        channel: Channel
+            Channel to be scraped. 
+        
+        Returns
+        -------
+        bool
+            ``True`` if the scraper is capable of scraping ``channel``,
+            ``False`` if not. 
+        """
+
         raise NotImplementedError
 
     def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
+        """Scrape all posts from the specified Channel.
+
+        Parameters
+        ----------
+        channel: Channel
+            Channel to be scraped.
+        since: ScraperResult or None
+            Most recently scraped ScraperResult from a previous scrape, or 
+            ``None`` if scraper has not run before.
+        archive_media: bool
+            If ``True``, any media files (images, video, etc.) from posts are archived. 
+            If ``False``, media files are not archived. 
+        """
+        
         raise NotImplementedError
 
 
@@ -97,13 +207,28 @@ class ScraperController:
         self.mapper_registry = None
 
     def register_scraper(self, scraper: Scraper):
+        """Register a single Scraper instance to the controller.
+        """
         self.scrapers.append(scraper)
 
     def register_scrapers(self, scraper: List[Scraper]):
+        """Register a list of Scraper instances to the controller.
+        """
         self.scrapers.extend(scraper)
     
     @logger.catch
     def scrape_channels(self, channels: List[Channel], archive_media: bool = True):
+        """Scrape all posts for all specified channels. 
+
+        Parameters
+        ----------
+        channels: list<Channel>
+            List of Channel instances to be scraped
+        archive_media: bool
+            If ``True``, any media files (images, video, etc.) from posts are archived. 
+            If ``False``, media files are not archived. 
+        """
+
         if self.session is None:
             logger.error("No DB session")
             return
@@ -143,15 +268,11 @@ class ScraperController:
                 logger.warning(f"No handler found for Channel {channel}")
 
     def connect_to_db(self, engine):
+        """Connect the specified SQLAlchemy engine to the controller.
+        """
+        
         # create tables
         mapper_registry.metadata.create_all(bind=engine)
 
         self.session = sessionmaker()
-        self.session.configure(bind=engine)
-
-
-class ETLController:
-    """This class will transform the raw_data tables into a format more conducive to analysis."""
-
-    def __init__(self):
-        pass
+        self.session.configure(bind=engine)
\ No newline at end of file
diff --git a/docs/images/cisticola_logo.svg b/docs/images/cisticola_logo.svg
new file mode 100644
index 0000000..f570be8
--- /dev/null
+++ b/docs/images/cisticola_logo.svg
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="51.688999mm"
+   height="11.797mm"
+   viewBox="0 0 51.688999 11.797"
+   version="1.1"
+   id="svg5"
+   inkscape:version="1.1.2 (76b9e6a115, 2022-02-25)"
+   sodipodi:docname="cisticola_logo.svg"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <sodipodi:namedview
+     id="namedview7"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:document-units="mm"
+     showgrid="false"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0"
+     inkscape:zoom="2.0838024"
+     inkscape:cx="52.548168"
+     inkscape:cy="115.65396"
+     inkscape:window-width="1920"
+     inkscape:window-height="999"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="layer4" />
+  <defs
+     id="defs2" />
+  <g
+     inkscape:groupmode="layer"
+     id="layer3"
+     inkscape:label="background"
+     transform="translate(-60.255096,9.177412)">
+    <rect
+       style="fill:#000000;fill-opacity:1;stroke-width:0.723711"
+       id="rect16437"
+       width="51.688999"
+       height="11.797"
+       x="60.255096"
+       y="-9.177412" />
+  </g>
+  <g
+     inkscape:groupmode="layer"
+     id="layer4"
+     inkscape:label="text"
+     transform="translate(-60.255096,9.177412)">
+    <path
+       id="path15829"
+       style="font-size:8.36272px;line-height:1.25;font-family:'Atlas Typewriter';-inkscape-font-specification:'Atlas Typewriter';fill:#ffffff;fill-opacity:1;stroke-width:0.209067"
+       d="m 70.0228,-6.6771549 c -0.283099,0 -0.513662,0.2335451 -0.513662,0.5214152 0,0.2884 0.230563,0.5229656 0.513662,0.5229656 0.283371,0 0.51263,-0.2345656 0.51263,-0.5229656 0,-0.2878701 -0.229259,-0.5214152 -0.51263,-0.5214152 z m 16.056385,0 c -0.283099,0 -0.513146,0.2335451 -0.513146,0.5214152 0,0.2884 0.230047,0.5229656 0.513146,0.5229656 0.283371,0 0.51263,-0.2345656 0.51263,-0.5229656 0,-0.2878701 -0.229259,-0.5214152 -0.51263,-0.5214152 z m 14.358295,0.098185 v 0.3762044 h 1.37149 v 5.85390603 h -1.68051 V 0.0108085 h 3.82147 v -0.35966797 h -1.69757 V -6.5789696 Z m -20.312971,0.8862507 v 0.7611938 c 0,0.468312 -0.07489,0.5767091 -0.53485,0.5767091 h -1.104325 v 0.359668 h 1.622639 v 2.8768268 c 0,0.76936995 0.317555,1.22938228 1.346171,1.22938228 0.434861,0 0.777668,-0.07510942 1.011825,-0.15037842 v -0.37672118 c -0.30942,0.10035299 -0.676894,0.1421102 -1.003041,0.1421102 -0.577027,0 -0.911572,-0.23377623 -0.911572,-0.81080318 v -2.9104165 h 1.914613 v -0.359668 h -1.914613 v -1.3379029 z m -10.295495,0.8759155 v 0.2253091 c 0,0.6858022 -0.05795,0.7610827 -0.517798,1.1957929 l -0.263033,0.2511472 c -0.517789,0.4931802 -0.879533,0.8776991 -0.879533,1.7058391 0,0.81174998 0.550259,1.54770912 1.799892,1.54770912 1.109929,0 1.939959,-0.72802084 1.90686,-2.03295082 h -0.402561 c 0.02487,1.04563998 -0.583547,1.66449786 -1.471226,1.66449786 -0.887942,0 -1.39785,-0.44357232 -1.39785,-1.22938226 0,-0.53552 0.189648,-0.9282617 0.608748,-1.2965617 l 0.3452,-0.3095419 c 0.583689,-0.5183198 0.665593,-0.6512787 0.665593,-1.5043007 v -0.2175579 z m 16.056901,0 v 0.2253091 c 0,0.6858022 -0.05795,0.7610827 -0.517798,1.1957929 l -0.263549,0.2511472 c -0.517789,0.4931812 -0.879533,0.8776982 -0.879533,1.7058391 0,0.81174998 0.550259,1.54770912 1.799889,1.54770912 1.10993,0 1.939962,-0.72802185 1.90686,-2.03295082 h -0.402558 c 0.02487,1.04563898 -0.583549,1.66449786 -1.471229,1.66449786 -0.887939,0 -1.397847,-0.44357132 -1.397847,-1.22938226 0,-0.535519 0.189648,-0.9282617 0.608748,-1.2965617 l 0.3452,-0.3095419 c 0.583689,-0.5183198 0.665591,-0.6512798 0.665591,-1.5043007 v -0.2175579 z m -21.132043,0.3451985 c -1.296221,0 -1.998846,1.0201492 -1.998846,2.3414592 0,1.36312292 0.644041,2.24947501 1.956988,2.24947501 0.911535,0 1.580537,-0.45140498 1.739429,-1.38802891 h -0.442867 c -0.108715,0.56866496 -0.443238,1.02009271 -1.279509,1.02009272 -1.036976,0 -1.505334,-0.70234505 -1.505334,-1.93166502 0,-1.237682 0.602205,-1.9151286 1.547191,-1.9151286 0.769371,0 1.170749,0.4431218 1.237652,0.8779827 h 0.442867 c -0.133805,-0.9115361 -0.944927,-1.2541871 -1.697571,-1.2541871 z m 26.76064,0 c -1.296223,0 -1.998329,1.0201492 -1.998329,2.3414592 0,1.36312292 0.643525,2.24947501 1.956472,2.24947501 0.911537,0 1.580536,-0.45140498 1.739429,-1.38802891 h -0.442867 c -0.108718,0.56866496 -0.443238,1.02009271 -1.27951,1.02009272 -1.036978,0 -1.505333,-0.70234505 -1.505333,-1.93166502 0,-1.237682 0.602205,-1.9151286 1.547191,-1.9151286 0.769371,0 1.170749,0.4431218 1.237652,0.8779827 h 0.442867 c -0.133805,-0.9115361 -0.944928,-1.2541871 -1.697572,-1.2541871 z m -16.240352,0.00827 c -1.070428,0 -1.589051,0.5435457 -1.589051,1.2376507 0,0.7610079 0.477065,1.0702129 1.246437,1.1622029 l 0.794265,0.091984 c 0.660657,0.07526 0.877983,0.3177813 0.877983,0.7777303 0,0.63556696 -0.560403,0.96169837 -1.363223,0.96169837 -0.802823,0 -1.237647,-0.29258664 -1.329634,-0.96996657 h -0.426332 c 0.09199,0.99516294 0.836065,1.32136631 1.755966,1.32136631 1.179142,0 1.79007,-0.55190159 1.79007,-1.32963451 0,-0.64393 -0.334708,-1.0452197 -1.262972,-1.1539348 l -0.727605,-0.083716 c -0.602115,-0.0669 -0.928108,-0.2506897 -0.928108,-0.7942667 0,-0.5770279 0.451467,-0.8697145 1.153936,-0.8697145 0.73592,0 1.062085,0.2671824 1.187524,0.7689453 h 0.434599 c -0.125442,-0.7108309 -0.67723,-1.120345 -1.613855,-1.1203448 z m 21.408511,0 c -1.279496,0 -1.956985,0.9282102 -1.956985,2.2913331 0,1.36312292 0.677489,2.29133291 1.956985,2.29133291 1.279496,0 1.956988,-0.92820999 1.956988,-2.29133291 0,-1.3631229 -0.677492,-2.2913331 -1.956988,-2.2913331 z m 10.604009,0.00827 c -0.97844,0.02509 -1.52215,0.5936269 -1.58905,1.245919 h 0.41806 c 0.0669,-0.4766752 0.41824,-0.8779828 1.17925,-0.8779828 1.05371,0 1.17099,0.5686716 1.17099,1.3714926 v 0.1591634 c -1.19587,0.07526 -1.92365,0.1584901 -2.35851,0.4010091 -0.32615,0.192342 -0.58549,0.5104836 -0.58549,1.0206096 0,0.7693709 0.56054,1.212329 1.39681,1.212329 0.72756,0 1.29641,-0.34252917 1.53892,-0.94464519 h 0.0165 c -0.008,0.59375302 0.16707,0.87798259 0.79427,0.87798259 h 0.1757 v -0.35966797 h -0.16692 c -0.2927,0 -0.39325,-0.117143 -0.39325,-0.451652 V -2.7574976 c 0,-0.827909 -0.17565,-1.6975709 -1.59732,-1.6975709 z m -10.604009,0.3513998 c 1.003528,0 1.513605,0.7608842 1.513605,1.9316651 0,1.170781 -0.518441,1.93166494 -1.513605,1.93166494 -1.028615,0 -1.513602,-0.76088394 -1.513602,-1.93166494 0,-1.1707809 0.510075,-1.9316651 1.513602,-1.9316651 z m 11.774989,1.9063436 v 0.2594157 c 0,1.07042796 -0.72746,1.65571285 -1.47174,1.65571285 -0.60212,0 -1.01234,-0.29278114 -1.01234,-0.86144605 0,-0.309421 0.14247,-0.5433907 0.38499,-0.7022827 0.26761,-0.200705 0.83632,-0.2594098 2.09909,-0.3513998 z" />
+  </g>
+</svg>
diff --git a/docs/images/favicon.ico b/docs/images/favicon.ico
new file mode 100644
index 0000000000000000000000000000000000000000..75d94461c1835dd1a4c39511a04c96999bbc465b
GIT binary patch
literal 614
zcmV-s0-61ZP)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F80006lNkl<Zcmc)1
zF-xjp7=YpXdHcX9BZvxGLK<8`2dAjLC2}ajr5_L$2o*#{Aw`Xa1PwO&10tfRHeK9w
zX)tIADs+pW#X_Eg!-2CLeEr_wyDkBg02u;i2$&&Y9zKnak8^x{jEJziyUWYV3x0U`
zAR?Tdosmo?X&V?AAel_!hry?CILu%D{r&jW17z{x@$r#FB0;@gCzHuguh;QI0L3Q}
zp-ols%fo@H;>zG)YHEs^nHgLd9BglIv$wa03xk7DC`7Z_#D&2pV+_$~lt!b03xf|L
z!rIyzkw}E=>uX#Xx+Eg3tgLWya>B*M1vfW0xbWzLh_JP_#nI6bxm=FT%}rDl7ao>G
zgq@uove_&f8yn<uIaC!_9+sAtmN+;#Ae~N=&*xE9euKe6Z*MQBr>9(9U6Ie{QB{76
z!NTn9EMsG1oS&bgs&q0pH^=1UBsL5d;_*0bl}d$9M1(@2Kq{5OhQUH#UmvQ<_xCrQ
zjE;^nI5^1b>nk=47T(|A5fP%%D4opD&m$s~N+oO<EZpAS(rUF>U0p>)_@Ck7VfOd;
zxxc^X>FEg@JwTQYK0iMRhr^`PX+oh8<#L&ypCA4ei^aISykux-h=qj(K0ZFMA%J4R
z^E?g@4_RMd=j-c>VzJ2E+Z&$e5sSr`o}Q*!t+KqlOs!VKmH<i@CMG6WTwElPNH8)o
zLbKWA`T3c<yE`5p9%!{%bVC3oK!$)B0%i!<A4a^gvpN82(*OVf07*qoM6N<$g1M3u
AU;qFB

literal 0
HcmV?d00001

diff --git a/docs/source/cisticola.base.rst b/docs/source/cisticola.base.rst
new file mode 100644
index 0000000..db91e8b
--- /dev/null
+++ b/docs/source/cisticola.base.rst
@@ -0,0 +1,8 @@
+cisticola.base module
+=====================
+
+.. automodule:: cisticola.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.rst b/docs/source/cisticola.rst
index df2792c..6857abd 100644
--- a/docs/source/cisticola.rst
+++ b/docs/source/cisticola.rst
@@ -1,6 +1,12 @@
 cisticola package
 =================
 
+.. automodule:: cisticola
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
+
 Subpackages
 -----------
 
@@ -13,18 +19,7 @@ Subpackages
 Submodules
 ----------
 
-cisticola.base module
----------------------
+.. toctree::
+   :maxdepth: 4
 
-.. automodule:: cisticola.base
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Module contents
----------------
-
-.. automodule:: cisticola
-   :members:
-   :undoc-members:
-   :show-inheritance:
+   cisticola.base
diff --git a/docs/source/cisticola.scraper.base.rst b/docs/source/cisticola.scraper.base.rst
new file mode 100644
index 0000000..1c6f6e2
--- /dev/null
+++ b/docs/source/cisticola.scraper.base.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.base module
+=============================
+
+.. automodule:: cisticola.scraper.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.bitchute.rst b/docs/source/cisticola.scraper.bitchute.rst
new file mode 100644
index 0000000..dc44b13
--- /dev/null
+++ b/docs/source/cisticola.scraper.bitchute.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.bitchute module
+=================================
+
+.. automodule:: cisticola.scraper.bitchute
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.gab.rst b/docs/source/cisticola.scraper.gab.rst
new file mode 100644
index 0000000..b0777c7
--- /dev/null
+++ b/docs/source/cisticola.scraper.gab.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.gab module
+============================
+
+.. automodule:: cisticola.scraper.gab
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.gettr.rst b/docs/source/cisticola.scraper.gettr.rst
new file mode 100644
index 0000000..3275e9a
--- /dev/null
+++ b/docs/source/cisticola.scraper.gettr.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.gettr module
+==============================
+
+.. automodule:: cisticola.scraper.gettr
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.odysee.rst b/docs/source/cisticola.scraper.odysee.rst
new file mode 100644
index 0000000..491b0db
--- /dev/null
+++ b/docs/source/cisticola.scraper.odysee.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.odysee module
+===============================
+
+.. automodule:: cisticola.scraper.odysee
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.rst b/docs/source/cisticola.scraper.rst
index dcd8bca..5e3d9a1 100644
--- a/docs/source/cisticola.scraper.rst
+++ b/docs/source/cisticola.scraper.rst
@@ -1,37 +1,25 @@
 cisticola.scraper package
 =========================
 
-Submodules
-----------
-
-cisticola.scraper.bitchute module
----------------------------------
-
-.. automodule:: cisticola.scraper.bitchute
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-cisticola.scraper.gettr module
-------------------------------
-
-.. automodule:: cisticola.scraper.gettr
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-cisticola.scraper.twitter module
---------------------------------
-
-.. automodule:: cisticola.scraper.twitter
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Module contents
----------------
-
 .. automodule:: cisticola.scraper
    :members:
    :undoc-members:
    :show-inheritance:
+   :private-members:
+
+Submodules
+----------
+
+.. toctree::
+   :maxdepth: 4
+
+   cisticola.scraper.base
+   cisticola.scraper.bitchute
+   cisticola.scraper.gab
+   cisticola.scraper.gettr
+   cisticola.scraper.odysee
+   cisticola.scraper.rumble
+   cisticola.scraper.telegram_snscrape
+   cisticola.scraper.telegram_telethon
+   cisticola.scraper.twitter
+   cisticola.scraper.utils
diff --git a/docs/source/cisticola.scraper.rumble.rst b/docs/source/cisticola.scraper.rumble.rst
new file mode 100644
index 0000000..726c493
--- /dev/null
+++ b/docs/source/cisticola.scraper.rumble.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.rumble module
+===============================
+
+.. automodule:: cisticola.scraper.rumble
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.telegram_snscrape.rst b/docs/source/cisticola.scraper.telegram_snscrape.rst
new file mode 100644
index 0000000..ffc9a7c
--- /dev/null
+++ b/docs/source/cisticola.scraper.telegram_snscrape.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.telegram\_snscrape module
+===========================================
+
+.. automodule:: cisticola.scraper.telegram_snscrape
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.telegram_telethon.rst b/docs/source/cisticola.scraper.telegram_telethon.rst
new file mode 100644
index 0000000..a41db65
--- /dev/null
+++ b/docs/source/cisticola.scraper.telegram_telethon.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.telegram\_telethon module
+===========================================
+
+.. automodule:: cisticola.scraper.telegram_telethon
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.twitter.rst b/docs/source/cisticola.scraper.twitter.rst
new file mode 100644
index 0000000..9e557aa
--- /dev/null
+++ b/docs/source/cisticola.scraper.twitter.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.twitter module
+================================
+
+.. automodule:: cisticola.scraper.twitter
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.utils.rst b/docs/source/cisticola.scraper.utils.rst
new file mode 100644
index 0000000..ceefb4d
--- /dev/null
+++ b/docs/source/cisticola.scraper.utils.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.utils module
+==============================
+
+.. automodule:: cisticola.scraper.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.transformer.base.rst b/docs/source/cisticola.transformer.base.rst
new file mode 100644
index 0000000..0f57e13
--- /dev/null
+++ b/docs/source/cisticola.transformer.base.rst
@@ -0,0 +1,8 @@
+cisticola.transformer.base module
+=================================
+
+.. automodule:: cisticola.transformer.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.transformer.rst b/docs/source/cisticola.transformer.rst
index a2eb71e..218e1ec 100644
--- a/docs/source/cisticola.transformer.rst
+++ b/docs/source/cisticola.transformer.rst
@@ -1,21 +1,17 @@
 cisticola.transformer package
 =============================
 
-Submodules
-----------
-
-cisticola.transformer.twitter module
-------------------------------------
-
-.. automodule:: cisticola.transformer.twitter
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Module contents
----------------
-
 .. automodule:: cisticola.transformer
    :members:
    :undoc-members:
    :show-inheritance:
+   :private-members:
+
+Submodules
+----------
+
+.. toctree::
+   :maxdepth: 4
+
+   cisticola.transformer.base
+   cisticola.transformer.twitter
diff --git a/docs/source/cisticola.transformer.twitter.rst b/docs/source/cisticola.transformer.twitter.rst
new file mode 100644
index 0000000..05f29fa
--- /dev/null
+++ b/docs/source/cisticola.transformer.twitter.rst
@@ -0,0 +1,8 @@
+cisticola.transformer.twitter module
+====================================
+
+.. automodule:: cisticola.transformer.twitter
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 4af6aa1..c291fb8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -43,9 +43,18 @@ exclude_patterns = []
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = []
+
+# -- Default flags for autodoc------------------------------------------------
+
+autodoc_default_options = {'exclude-members': '_sa_class_manager'}
+
+html_favicon = '../images/favicon.ico'
+html_logo = '../images/cisticola_logo.svg'
+
+html_theme_options = {'style_nav_header_background': '#000000'}
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 67fd022..e3f70a9 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -5,7 +5,7 @@ Welcome to Cisticola's documentation!
    :maxdepth: 2
    :caption: Contents:
 
-   modules
+   cisticola
 
 
 
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
deleted file mode 100644
index 9af7d5f..0000000
--- a/docs/source/modules.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-cisticola
-=========
-
-.. toctree::
-   :maxdepth: 4
-
-   cisticola

From a3c859ec79c9f31d5b67e2de4e98354f10eafe28 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Mon, 14 Mar 2022 19:38:33 -0500
Subject: [PATCH 2/4] added more docstrings and comments

---
 cisticola/base.py         | 148 ++++++++++++++++++++++----------------
 cisticola/scraper/base.py |   6 +-
 pytest.ini                |   5 +-
 3 files changed, 94 insertions(+), 65 deletions(-)

diff --git a/cisticola/base.py b/cisticola/base.py
index fc07846..dfaaee8 100644
--- a/cisticola/base.py
+++ b/cisticola/base.py
@@ -1,14 +1,15 @@
 from typing import List
 from dataclasses import dataclass
 from datetime import datetime
+import tempfile 
+import json
+import io
+
 from sqlalchemy.orm import registry
 from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey
 import pytesseract
 import PIL
-import io
 import exiftool
-import json
-import os
 
 from .utils import make_request
 
@@ -123,6 +124,85 @@ class TransformedResult:
     #: Text of the original post
     content: str
 
+@dataclass
+class Media:
+    """Base class for organizing information about a media file.
+    """
+
+    #: ID number of the media's corresponding scraped post in the ``raw_data`` table.
+    raw_id: int
+
+    #: ID number of the media's corresponging scraped post in the ``analysis`` table.
+    post: int
+
+    #: URL of the original post.
+    url: str
+
+    #: Original URL of the media from the the original post.
+    original_url: str
+
+    #: JSON dump of the dict containing metadata information for the media file.
+    exif: str = None
+
+    def get_blob(self):
+        """Download media file as bytes blob.
+        """
+
+        blob = make_request(self.url)
+        return blob.content
+
+    def hydrate(self, blob = None):
+        """Download media file as bytes blob and extract data from content.
+        """
+
+        if blob is None:
+            blob = self.get_blob()
+
+        self.hydrate_exif(blob)
+
+    def hydrate_exif(self, blob):
+        """Extract Exif metadata from bytes blob.
+        """
+
+        with tempfile.NamedTemporaryFile() as temp_file:
+            temp_file.write(blob)
+
+            with exiftool.ExifTool() as et:
+                exif = et.get_metadata(temp_file.name)
+                self.exif = json.dumps(exif)
+
+@dataclass
+class Image(Media):
+    """Class for organizing information about an image file. 
+    """
+
+    #: Extracted OCR content from image
+    ocr: str = None
+
+    def hydrate(self, blob=None):
+        """Download image file as bytes blob and extract Exif and OCR content 
+        from the image.
+        """
+
+        if blob is None:
+            blob = self.get_blob()
+
+        super().hydrate(blob)
+        self.hydrate_ocr(blob)
+
+    def hydrate_ocr(self, blob):
+        """Extract OCR (optical character recognition) data from image bytes blob.
+        """
+
+        image = PIL.Image.open(io.BytesIO(blob))
+        self.ocr = pytesseract.image_to_string(image)
+
+@dataclass
+class Video(Media):
+    """Class for organizing information about an image file. 
+    """
+    
+    pass
 
 mapper_registry = registry()
 
@@ -138,7 +218,6 @@ raw_data_table = Table('raw_data', mapper_registry.metadata,
                        Column('date_archived', DateTime),
                        Column('archived_urls', JSON))
 
-mapper_registry.map_imperatively(ScraperResult, raw_data_table)
 
 analysis_table = Table('analysis', mapper_registry.metadata,
                        Column('id', Integer, primary_key=True,
@@ -153,72 +232,21 @@ analysis_table = Table('analysis', mapper_registry.metadata,
                        Column('url', String),
                        Column('author_id', String),
                        Column('author_username', String),
-                       Column('content', String)
-                       )
-
-mapper_registry.map_imperatively(TransformedResult, analysis_table)
-
-@dataclass
-class Media:
-    raw_id: int
-    post: int
-    url: str
-    original_url: str
-
-    exif: str = None
-
-    def get_blob(self):
-        blob = make_request(self.url)
-        return blob.content
-
-    def hydrate(self, blob = None):
-        if blob is None:
-            blob = self.get_blob()
-
-        self.hydrate_exif(blob)
-
-    def hydrate_exif(self, blob):
-        f = open('tmp', 'wb')
-        f.write(blob)
-        f.close()
-
-        with exiftool.ExifTool() as et:
-            exif = et.get_metadata('tmp')
-            self.exif = json.dumps(exif)
-
-        os.remove('tmp')
-
-@dataclass
-class Image(Media):
-    ocr: str = None
-
-    def hydrate(self, blob=None):
-        if blob is None:
-            blob = self.get_blob()
-
-        super().hydrate(blob)
-        self.hydrate_ocr(blob)
-
-    def hydrate_ocr(self, blob):
-        image = PIL.Image.open(io.BytesIO(blob))
-        self.ocr = pytesseract.image_to_string(image)
-
-@dataclass
-class Video(Media):
-    pass
+                       Column('content', String))
 
 media_table = Table('media', mapper_registry.metadata,
                        Column('id', Integer, primary_key=True,
                               autoincrement=True),
-                        Column('type', String),
+                       Column('type', String),
                        Column('raw_id', Integer, ForeignKey('raw_data.id')),
                        Column('post', Integer, ForeignKey('analysis.id')),
                        Column('url', String),
                        Column('original_url', String),
                        Column('exif', String),
-                       Column('ocr', String)
-                       )
+                       Column('ocr', String))
 
+mapper_registry.map_imperatively(TransformedResult, analysis_table)
+mapper_registry.map_imperatively(ScraperResult, raw_data_table)
 mapper_registry.map_imperatively(Media, media_table, polymorphic_on='type', polymorphic_identity='media')
 mapper_registry.map_imperatively(Image, media_table, inherits=Media, polymorphic_on='type', polymorphic_identity='image')
 mapper_registry.map_imperatively(Video, media_table, inherits=Media, polymorphic_on='type', polymorphic_identity='video')
\ No newline at end of file
diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py
index f35a13e..6f853c6 100644
--- a/cisticola/scraper/base.py
+++ b/cisticola/scraper/base.py
@@ -278,8 +278,8 @@ class ScraperController:
         self.session.configure(bind=self.engine)
 
     def reset_db(self):
+        """Drop all data from the SQLAlchemy database.
+        """
 
         mapper_registry.metadata.drop_all(bind=self.engine)
-        self.connect_to_db(self.engine)
-
-
+        self.connect_to_db(self.engine)
\ No newline at end of file
diff --git a/pytest.ini b/pytest.ini
index 09a94e1..f3545f6 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,6 +1,6 @@
 [pytest]
 minversion =
-  6.0.2
+  7.0.0
 testpaths =
   tests/
 python_files =
@@ -13,4 +13,5 @@ addopts =
   --self-contained-html
 filterwarnings =
     ignore:the imp module is deprecated:DeprecationWarning
-    ignore:The localize method is no longer necessary, as this time zone supports the fold attribute
\ No newline at end of file
+    ignore:The localize method is no longer necessary, as this time zone supports the fold attribute
+    ignore:invalid escape sequence:DeprecationWarning
\ No newline at end of file

From d68d76c0ab92dc6f351a0a27fcd4811868141790 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Tue, 15 Mar 2022 12:40:18 -0500
Subject: [PATCH 3/4] added missing docstrings, created Makefile target for
 sphinx-apidoc, added quickstart page for installation and configuration
 instructions

---
 cisticola/scraper/base.py                     | 60 ++++++++++--
 cisticola/scraper/bitchute.py                 |  8 +-
 cisticola/scraper/gab.py                      |  6 +-
 cisticola/scraper/gettr.py                    |  6 +-
 cisticola/scraper/instagram.py                |  1 +
 cisticola/scraper/odysee.py                   |  6 +-
 cisticola/scraper/rumble.py                   |  6 +-
 cisticola/scraper/telegram_snscrape.py        |  1 +
 cisticola/scraper/telegram_telethon.py        |  7 +-
 docs/Makefile                                 | 12 +++
 docs/make.bat                                 | 11 +++
 docs/source/cisticola.rst                     |  1 +
 docs/source/cisticola.scraper.instagram.rst   |  8 ++
 docs/source/cisticola.scraper.rst             |  4 +-
 docs/source/cisticola.scraper.utils.rst       |  8 --
 docs/source/cisticola.scraper.vkontakte.rst   |  8 ++
 docs/source/cisticola.scraper.youtube.rst     |  8 ++
 .../source/cisticola.transformer.bitchute.rst |  8 ++
 docs/source/cisticola.transformer.rst         |  1 +
 docs/source/cisticola.utils.rst               |  8 ++
 docs/source/index.rst                         | 15 +--
 docs/source/quickstart.rst                    | 96 +++++++++++++++++++
 22 files changed, 241 insertions(+), 48 deletions(-)
 create mode 100644 docs/source/cisticola.scraper.instagram.rst
 delete mode 100644 docs/source/cisticola.scraper.utils.rst
 create mode 100644 docs/source/cisticola.scraper.vkontakte.rst
 create mode 100644 docs/source/cisticola.scraper.youtube.rst
 create mode 100644 docs/source/cisticola.transformer.bitchute.rst
 create mode 100644 docs/source/cisticola.utils.rst
 create mode 100644 docs/source/quickstart.rst

diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py
index a2f921f..28dbe76 100644
--- a/cisticola/scraper/base.py
+++ b/cisticola/scraper/base.py
@@ -38,6 +38,24 @@ class Scraper:
     def __str__(self):
         return self.__version__
 
+    def get_username_from_url(self, url: str) -> str:
+        """Extract a channel's username from its URL. 
+
+        Parameters
+        ----------
+        url: str
+            URL of the channel on a given platform
+            e.g. ``"https://twitter.com/EliotHiggins"``
+        
+        Returns
+        -------
+        username: str
+            Extracted username of the channel.
+            e.g. ``"EliotHiggins"``
+        """
+        
+        raise NotImplementedError
+
     def url_to_key(self, url: str, content_type: str) -> str:
         """Generate a unique identifier for media from a specified post.
 
@@ -61,13 +79,13 @@ class Scraper:
         return key 
 
     def url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
-        """Download media file from a specified post URL.
+        """Download media file from a specified media file URL.
 
         Parameters
         ---------
         url: str
-            URL of original post. 
-            e.g. ``"https://twitter.com/bellingcat/status/1503397267675533313"``
+            URL of media file from original post. 
+            e.g. ``"https://pbs.twimg.com/media/FN0j0dYWUAcQxfK?format=png&name=medium"``
         key: str or None
             Pre-defined unique identifier for the media file.
 
@@ -93,14 +111,14 @@ class Scraper:
         return blob, content_type, key
 
     def m3u8_url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
-        """Download media file from a specified post URL, where the media file 
+        """Download media file from a specified media URL, where the media file 
         is formatted as an m3u8 playlist, which is then decoded to an mp4 file.
 
         Parameters
         ---------
         url: str
-            URL of original post. 
-            e.g. ``"https://twitter.com/bellingcat/status/1503397267675533313"``
+            URL of m3u8 playlist file from original post. 
+            e.g. ``"https://media.gettr.com/group47/origin/2022/03/15/01/cbc436c1-1a1a-4b97-671d-c42109f3ec9b/out.m3u8"``
         key: str or None
             Pre-defined unique identifier for the media file.
 
@@ -136,7 +154,28 @@ class Scraper:
         return blob, content_type, key
 
     def ytdlp_url_to_blob(self, url: str, key: str = None) -> Tuple[bytes, str, str]:
-        
+        """Download media file from a specified media URL, using a fork of 
+        youtube-dl that enables faster downloading.
+
+        Parameters
+        ---------
+        url: str
+            URL of media file from original post. 
+            e.g. ``"https://rumble.com/embed/vgt7gh/"``
+        key: str or None
+            Pre-defined unique identifier for the media file.
+
+        Returns
+        -------
+        blob: bytes
+            Raw bytes of the downloaded media file. 
+        content_type: str
+            Content-Type of media. 
+            e.g. ``"video/mp4"``.
+        key: str
+            Unique identifier for the media file.
+        """
+
         content_type = 'video/mp4'
 
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -225,6 +264,11 @@ class Scraper:
         archive_media: bool
             If ``True``, any media files (images, video, etc.) from posts are archived. 
             If ``False``, media files are not archived. 
+
+        Yields
+        ------
+        ScraperResult
+            Scraper result from a single post/comment from the specified Channel.
         """
         
         raise NotImplementedError
@@ -311,7 +355,7 @@ class ScraperController:
         self.session.configure(bind=self.engine)
 
     def reset_db(self):
-        """Drop all data from the SQLAlchemy database.
+        """Drop all data from the connected SQLAlchemy database.
         """
 
         mapper_registry.metadata.drop_all(bind=self.engine)
diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py
index 8a365f4..47a822e 100644
--- a/cisticola/scraper/bitchute.py
+++ b/cisticola/scraper/bitchute.py
@@ -1,4 +1,4 @@
-from datetime import datetime, timezone
+ from datetime import datetime, timezone
 import time
 import re 
 from html.parser import HTMLParser
@@ -17,7 +17,7 @@ class BitchuteScraper(Scraper):
     library"""
     __version__ = "BitchuteScraper 0.0.1"
 
-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
         username = url.split('bitchute.com/channel/')[-1].strip('/')
 
         return username
@@ -33,7 +33,7 @@ class BitchuteScraper(Scraper):
 
         detail = 'comments'
 
-        username = BitchuteScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
         scraper = get_videos_user(session, username, csrftoken, detail)
 
         for post in scraper:
@@ -61,7 +61,7 @@ class BitchuteScraper(Scraper):
                 archived_urls=archived_urls)
 
     def can_handle(self, channel):
-        if channel.platform == "Bitchute" and BitchuteScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Bitchute" and self.get_username_from_url(channel.url) is not None:
             return True
 
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
diff --git a/cisticola/scraper/gab.py b/cisticola/scraper/gab.py
index 910ebc2..f90f2a3 100644
--- a/cisticola/scraper/gab.py
+++ b/cisticola/scraper/gab.py
@@ -11,14 +11,14 @@ class GabScraper(Scraper):
     """An implementation of a Scraper for Gab, using GARC library"""
     __version__ = "GabScraper 0.0.1"
 
-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
         username = url.split('https://gab.com/')[-1]
 
         return username
 
     def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
         client = Garc(profile = 'main')
-        username = GabScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
 
         scraper = client.userposts(username)
 
@@ -52,5 +52,5 @@ class GabScraper(Scraper):
                 archived_urls=archived_urls)
 
     def can_handle(self, channel):
-        if channel.platform == "Gab" and GabScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Gab" and self.get_username_from_url(channel.url) is not None:
             return True
\ No newline at end of file
diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py
index 3cd069e..4fb15cc 100644
--- a/cisticola/scraper/gettr.py
+++ b/cisticola/scraper/gettr.py
@@ -12,7 +12,7 @@ class GettrScraper(Scraper):
     """An implementation of a Scraper for Gettr, using gogettr library"""
     __version__ = "GettrScraper 0.0.1"
 
-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
         username = url.split("gettr.com/user/")[1]
         if len(username.split("/")) > 1:
             return None
@@ -21,7 +21,7 @@ class GettrScraper(Scraper):
 
     def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
         client = PublicClient()
-        username = GettrScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
         scraper = client.user_activity(username=username, type="posts")
 
         for post in scraper:
@@ -62,7 +62,7 @@ class GettrScraper(Scraper):
                 archived_urls=archived_urls)
 
     def can_handle(self, channel):
-        if channel.platform == "Gettr" and GettrScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Gettr" and self.get_username_from_url(channel.url) is not None:
             return True
 
     def url_to_key(self, url: str, content_type: str) -> str:
diff --git a/cisticola/scraper/instagram.py b/cisticola/scraper/instagram.py
index eb20ecb..f9ae76e 100644
--- a/cisticola/scraper/instagram.py
+++ b/cisticola/scraper/instagram.py
@@ -18,6 +18,7 @@ CONTENT_TYPES = {
     'mp4' : 'video/mp4'}
 
 class InstagramScraper(Scraper):
+    """An implementation of a Scraper for Instagram, using instaloader library"""
     __version__ = "InstagramScraper 0.0.1"
 
     def get_username_from_url(self, url):
diff --git a/cisticola/scraper/odysee.py b/cisticola/scraper/odysee.py
index 61ed9ca..eb7ec04 100644
--- a/cisticola/scraper/odysee.py
+++ b/cisticola/scraper/odysee.py
@@ -13,7 +13,7 @@ class OdyseeScraper(Scraper):
     """An implementation of a Scraper for Odysee, using polyphemus library"""
     __version__ = "OdyseeScraper 0.0.1"
 
-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
 
         username = url.split('odysee.com/')[-1].strip('@').split(':')[0]
 
@@ -21,7 +21,7 @@ class OdyseeScraper(Scraper):
 
     def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
 
-        username = OdyseeScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
         odysee_channel = OdyseeChannel(channel_name = username)
         
         all_videos = odysee_channel.get_all_videos()
@@ -70,7 +70,7 @@ class OdyseeScraper(Scraper):
                     archived_urls={})
 
     def can_handle(self, channel):
-        if channel.platform == "Odysee" and OdyseeScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Odysee" and self.get_username_from_url(channel.url) is not None:
             return True
 
     def url_to_key(self, url: str, content_type: str) -> str:
diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py
index 8546d6e..9863fb0 100644
--- a/cisticola/scraper/rumble.py
+++ b/cisticola/scraper/rumble.py
@@ -14,14 +14,14 @@ class RumbleScraper(Scraper):
     """An implementation of a Scraper for Rumble, using custom functions"""
     __version__ = "RumbleScraper 0.0.1"
 
-    def get_username_from_url(url):
+    def get_username_from_url(self, url):
         username = url.split('https://rumble.com/c/')[1]
 
         return username
 
     def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
 
-        username = RumbleScraper.get_username_from_url(channel.url)
+        username = self.get_username_from_url(channel.url)
         scraper = get_channel_videos(username)
 
         for post in scraper:
@@ -54,7 +54,7 @@ class RumbleScraper(Scraper):
         return key 
 
     def can_handle(self, channel):
-        if channel.platform == "Rumble" and RumbleScraper.get_username_from_url(channel.url) is not None:
+        if channel.platform == "Rumble" and self.get_username_from_url(channel.url) is not None:
             return True
 
 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
diff --git a/cisticola/scraper/telegram_snscrape.py b/cisticola/scraper/telegram_snscrape.py
index 3f3f45d..ec5b292 100644
--- a/cisticola/scraper/telegram_snscrape.py
+++ b/cisticola/scraper/telegram_snscrape.py
@@ -8,6 +8,7 @@ from cisticola.base import Channel, ScraperResult
 from cisticola.scraper.base import Scraper
 
 class TelegramSnscrapeScraper(Scraper):
+    """An implementation of a Scraper for Telegram, using snscrape library"""
     __version__ = "TelegramSnscrapeScraper 0.0.1"
 
     def can_handle(self, channel):
diff --git a/cisticola/scraper/telegram_telethon.py b/cisticola/scraper/telegram_telethon.py
index 76d68f2..b8231bc 100644
--- a/cisticola/scraper/telegram_telethon.py
+++ b/cisticola/scraper/telegram_telethon.py
@@ -14,6 +14,7 @@ from cisticola.scraper.base import Scraper
 MEDIA_TYPES = ['photo', 'video', 'document', 'webpage']
 
 class TelegramTelethonScraper(Scraper):
+    """An implementation of a Scraper for Telegram, using Telethon library"""
     __version__ = "TelegramTelethonScraper 0.0.1"
 
     def get_username_from_url(self, url):
@@ -30,9 +31,9 @@ class TelegramTelethonScraper(Scraper):
 
         username = self.get_username_from_url(channel.url)
 
-        api_id = os.environ['TELEGRAM_API_ID_1']
-        api_hash = os.environ['TELEGRAM_API_HASH_1']
-        phone = os.environ['TELEGRAM_PHONE_1']
+        api_id = os.environ['TELEGRAM_API_ID']
+        api_hash = os.environ['TELEGRAM_API_HASH']
+        phone = os.environ['TELEGRAM_PHONE']
 
         with TelegramClient(phone, api_id, api_hash) as client:
 
diff --git a/docs/Makefile b/docs/Makefile
index d0c3cbf..ab3e9be 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -8,12 +8,24 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = source
 BUILDDIR      = build
 
+SPHINXAPIDOC  = sphinx-apidoc
+APIDOCFLAGS   = --separate --private --module-first
+MODULEPATH    = ../cisticola
+SOURCEFILES   = cisticola.*
+MODULEFILE    = modules.rst
+
 # Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 .PHONY: help Makefile
 
+# Custom process and flags for generating Sphinx sources
+apidoc:
+	rm $(SOURCEDIR)/$(SOURCEFILES)
+	$(SPHINXAPIDOC) $(APIDOCFLAGS) -o "$(SOURCEDIR)" "$(MODULEPATH)"
+	rm $(SOURCEDIR)/$(MODULEFILE)
+
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
diff --git a/docs/make.bat b/docs/make.bat
index 6fcf05b..3ab2ef7 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -10,6 +10,12 @@ if "%SPHINXBUILD%" == "" (
 set SOURCEDIR=source
 set BUILDDIR=build
 
+set SPHINXAPIDOC=sphinx-apidoc
+set APIDOCFLAGS=--separate --private --module-first
+set MODULEPATH=../cisticola
+set SOURCEFILES=cisticola.*
+set MODULEFILE=modules.rst
+
 if "%1" == "" goto help
 
 %SPHINXBUILD% >NUL 2>NUL
@@ -28,6 +34,11 @@ if errorlevel 9009 (
 %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 goto end
 
+:apidoc
+	del %SOURCEDIR%\%SOURCEFILES%
+	%SPHINXAPIDOC% %APIDOCFLAGS% -o %SOURCEDIR% %MODULEPATH%
+	del %SOURCEDIR%\%MODULEFILE%
+
 :help
 %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 
diff --git a/docs/source/cisticola.rst b/docs/source/cisticola.rst
index 6857abd..22cdf67 100644
--- a/docs/source/cisticola.rst
+++ b/docs/source/cisticola.rst
@@ -23,3 +23,4 @@ Submodules
    :maxdepth: 4
 
    cisticola.base
+   cisticola.utils
diff --git a/docs/source/cisticola.scraper.instagram.rst b/docs/source/cisticola.scraper.instagram.rst
new file mode 100644
index 0000000..53ddc43
--- /dev/null
+++ b/docs/source/cisticola.scraper.instagram.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.instagram module
+==================================
+
+.. automodule:: cisticola.scraper.instagram
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.rst b/docs/source/cisticola.scraper.rst
index 5e3d9a1..b93592c 100644
--- a/docs/source/cisticola.scraper.rst
+++ b/docs/source/cisticola.scraper.rst
@@ -17,9 +17,11 @@ Submodules
    cisticola.scraper.bitchute
    cisticola.scraper.gab
    cisticola.scraper.gettr
+   cisticola.scraper.instagram
    cisticola.scraper.odysee
    cisticola.scraper.rumble
    cisticola.scraper.telegram_snscrape
    cisticola.scraper.telegram_telethon
    cisticola.scraper.twitter
-   cisticola.scraper.utils
+   cisticola.scraper.vkontakte
+   cisticola.scraper.youtube
diff --git a/docs/source/cisticola.scraper.utils.rst b/docs/source/cisticola.scraper.utils.rst
deleted file mode 100644
index ceefb4d..0000000
--- a/docs/source/cisticola.scraper.utils.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-cisticola.scraper.utils module
-==============================
-
-.. automodule:: cisticola.scraper.utils
-   :members:
-   :undoc-members:
-   :show-inheritance:
-   :private-members:
diff --git a/docs/source/cisticola.scraper.vkontakte.rst b/docs/source/cisticola.scraper.vkontakte.rst
new file mode 100644
index 0000000..405d70d
--- /dev/null
+++ b/docs/source/cisticola.scraper.vkontakte.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.vkontakte module
+==================================
+
+.. automodule:: cisticola.scraper.vkontakte
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.scraper.youtube.rst b/docs/source/cisticola.scraper.youtube.rst
new file mode 100644
index 0000000..e990195
--- /dev/null
+++ b/docs/source/cisticola.scraper.youtube.rst
@@ -0,0 +1,8 @@
+cisticola.scraper.youtube module
+================================
+
+.. automodule:: cisticola.scraper.youtube
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.transformer.bitchute.rst b/docs/source/cisticola.transformer.bitchute.rst
new file mode 100644
index 0000000..7427e9f
--- /dev/null
+++ b/docs/source/cisticola.transformer.bitchute.rst
@@ -0,0 +1,8 @@
+cisticola.transformer.bitchute module
+=====================================
+
+.. automodule:: cisticola.transformer.bitchute
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/cisticola.transformer.rst b/docs/source/cisticola.transformer.rst
index 218e1ec..358d955 100644
--- a/docs/source/cisticola.transformer.rst
+++ b/docs/source/cisticola.transformer.rst
@@ -14,4 +14,5 @@ Submodules
    :maxdepth: 4
 
    cisticola.transformer.base
+   cisticola.transformer.bitchute
    cisticola.transformer.twitter
diff --git a/docs/source/cisticola.utils.rst b/docs/source/cisticola.utils.rst
new file mode 100644
index 0000000..6e5872a
--- /dev/null
+++ b/docs/source/cisticola.utils.rst
@@ -0,0 +1,8 @@
+cisticola.utils module
+======================
+
+.. automodule:: cisticola.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :private-members:
diff --git a/docs/source/index.rst b/docs/source/index.rst
index e3f70a9..3c12d81 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -2,16 +2,7 @@ Welcome to Cisticola's documentation!
 =====================================
 
 .. toctree::
-   :maxdepth: 2
-   :caption: Contents:
+  :maxdepth: 1
 
-   cisticola
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+  quickstart
+  cisticola
\ No newline at end of file
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
new file mode 100644
index 0000000..a6c5643
--- /dev/null
+++ b/docs/source/quickstart.rst
@@ -0,0 +1,96 @@
+Quickstart
+==========
+
+Installation
+------------
+
+The *cisticola* application uses pipenv_ for dependency management. To install the dependencies of *cisticola*, first install pipenv using the following command:
+
+.. code-block::
+
+    pip install pipenv
+
+and then install the dependencies using the following command from the package root directory:
+
+.. code-block::
+
+    pipenv install
+
+To install the necessary dependencies for building the documentation and running unit tests, run the following command from the package root directory:
+
+.. code-block::
+
+    pipenv install --dev
+
+Environment Variables
+---------------------
+
+Three of the scrapers in *cisticola* (:py:mod:`~cisticola.scraper.gab.GabScraper`,  :py:mod:`~cisticola.scraper.instagram.InstagramScraper`, and :py:mod:`~cisticola.scraper.telegram_telethon.TelegramTelethonScraper`) require platform credentials to work correctly. 
+
+Gab
+"""
+
+The Gab credentials can be configured by running the following command from the root directory:
+
+.. code-block::
+
+    pipenv run garc configure 
+
+which will direct you to provide the username and password for your Gab account.
+
+Instagram
+"""""""""
+
+The Instagram credentials can be configured by setting the following environment variables, either in the project's ``.env`` file or in the system's environment:
+
+- ``INSTAGRAM_USERNAME``: username of your Instagram account
+- ``INSTAGRAM_PASSWORD``: password of your Instagram account
+
+Telegram Telethon
+"""""""""""""""""
+
+The Telegram credentials can be configured by setting the following environment variables, either in the project's ``.env`` file or in the system's environment:
+
+- ``TELEGRAM_API_ID``: API ID number for your Telegram application
+- ``TELEGRAM_API_HASH``: API hash for your Telegram application
+- ``TELEGRAM_PHONE``: phone number for the account corresponding to your your Telegram application
+
+If you do not already have a Telegram application, you can create one by following the instructions on `this page`_.
+
+Documentation
+-------------
+
+The *cisticola* application uses Sphinx_ to generate and display its documentation. To build the documentation in the HTML format, run the following command from the ``docs/`` directory:
+
+.. code-block::
+
+    pipenv run make html
+
+For developers, if changes are made to the package structure or additional modules are created, you can update the Sphinx source ``*.rst`` files by running the following command from the ``docs/`` directory:
+
+.. code-block::
+
+    pipenv run make apidoc
+
+Testing
+-------
+
+The *cisticola* application uses pytest_ for unit testing. To run the test suite, run the following command from the package root directory:
+
+.. code-block::
+
+    pipenv run pytest
+
+Examples
+--------
+
+An example of a *cisticola* ingest file ``russian_telegram_ingest.py`` is included in the package root directory, showing how the list of channels to scrape is defined, and how the :py:mod:`~cisticola.scraper.base.ScraperController` and :py:mod:`~cisticola.transformer.base.Transformer` classes are used. To run the ingest script, run the following command from the package root directory:
+
+.. code-block::
+
+    pipenv run python russian_telegram_ingest.py
+
+.. _pipenv: https://pipenv.pypa.io/en/latest/
+.. _Sphinx: https://www.sphinx-doc.org/en/master/
+.. _pytest: https://docs.pytest.org/en/7.1.x/
+.. _this page: https://core.telegram.org/api/obtaining_api_id
\ No newline at end of file

From 93554b19e90c8f70e785696e583b57740fdb53e5 Mon Sep 17 00:00:00 2001
From: Tristan Lee <tristan@bellingcat.com>
Date: Tue, 15 Mar 2022 13:05:41 -0500
Subject: [PATCH 4/4] fixed typo

---
 cisticola/scraper/bitchute.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py
index 47a822e..b11d27a 100644
--- a/cisticola/scraper/bitchute.py
+++ b/cisticola/scraper/bitchute.py
@@ -1,4 +1,4 @@
- from datetime import datetime, timezone
+from datetime import datetime, timezone
 import time
 import re 
 from html.parser import HTMLParser