Merge pull request #1 from bellingcat/add-docs

Add documentation generation with Sphinx
This commit is contained in:
Logan Williams
2022-02-22 15:19:54 +01:00
committed by GitHub
15 changed files with 429 additions and 9 deletions

3
.gitignore vendored
View File

@@ -2,3 +2,6 @@
*.pyc
*.ipynb
*.db
docs/build/
docs/source/_*

View File

@@ -11,6 +11,7 @@ gogettr = "*"
requests = "*"
bs4 = "*"
dateparser = "*"
sphinx = "*"
[dev-packages]

203
Pipfile.lock generated
View File

@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "ca7eea4b95394e06f8b74eac90d376097fd01231010b594cdcc588a3440f1231"
"sha256": "cde7247f41da5501b9fc4fc5d01916548f719b3d4ea0f1dd1765c4cf0413bbf7"
},
"pipfile-spec": 6,
"requires": {
@@ -16,6 +16,21 @@
]
},
"default": {
"alabaster": {
"hashes": [
"sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
"sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
],
"version": "==0.7.12"
},
"babel": {
"hashes": [
"sha256:ab49e12b91d937cd11f0b67cb259a57ab4ad2b59ac7a3b41d6c06c0ac5b0def9",
"sha256:bc0c176f9f6a994582230df350aa6e05ba2ebe4b3ac317eab29d9be5d2768da0"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.9.1"
},
"beautifulsoup4": {
"hashes": [
"sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf",
@@ -48,11 +63,11 @@
},
"click": {
"hashes": [
"sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3",
"sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"
"sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1",
"sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"
],
"markers": "python_version >= '3.6'",
"version": "==8.0.3"
"version": "==8.0.4"
},
"dateparser": {
"hashes": [
@@ -62,6 +77,14 @@
"index": "pypi",
"version": "==1.1.0"
},
"docutils": {
"hashes": [
"sha256:686577d2e4c32380bb50cbb22f575ed742d58168cee37e99117a854bcd88f125",
"sha256:cf316c8370a737a022b72b56874f6602acf974a37a9fba42ec2876387549fc61"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==0.17.1"
},
"filelock": {
"hashes": [
"sha256:9cd540a9352e432c7246a48fe4e8712b10acb1df2ad1f30e8c070b82ae1fed85",
@@ -136,7 +159,7 @@
"sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c",
"sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"
],
"markers": "python_version >= '3' and platform_machine == 'aarch64' or (platform_machine == 'ppc64le' or (platform_machine == 'x86_64' or (platform_machine == 'amd64' or (platform_machine == 'AMD64' or (platform_machine == 'win32' or platform_machine == 'WIN32')))))",
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==1.1.2"
},
"idna": {
@@ -147,6 +170,30 @@
"markers": "python_version >= '3'",
"version": "==3.3"
},
"imagesize": {
"hashes": [
"sha256:1db2f82529e53c3e929e8926a1fa9235aa82d0bd0c580359c67ec31b2fddaa8c",
"sha256:cd1750d452385ca327479d45b64d9c7729ecf0b3969a58148298c77092261f9d"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.3.0"
},
"importlib-metadata": {
"hashes": [
"sha256:175f4ee440a0317f6e8d81b7f8d4869f93316170a65ad2b007d2929186c8052c",
"sha256:e0bc84ff355328a4adfc5240c4f211e0ab386f80aa640d1b11f0618a1d282094"
],
"markers": "python_version < '3.10'",
"version": "==4.11.1"
},
"jinja2": {
"hashes": [
"sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8",
"sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"
],
"markers": "python_version >= '3.6'",
"version": "==3.0.3"
},
"loguru": {
"hashes": [
"sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c",
@@ -222,6 +269,76 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==4.8.0"
},
"markupsafe": {
"hashes": [
"sha256:023af8c54fe63530545f70dd2a2a7eed18d07a9a77b94e8bf1e2ff7f252db9a3",
"sha256:09c86c9643cceb1d87ca08cdc30160d1b7ab49a8a21564868921959bd16441b8",
"sha256:142119fb14a1ef6d758912b25c4e803c3ff66920635c44078666fe7cc3f8f759",
"sha256:1d1fb9b2eec3c9714dd936860850300b51dbaa37404209c8d4cb66547884b7ed",
"sha256:204730fd5fe2fe3b1e9ccadb2bd18ba8712b111dcabce185af0b3b5285a7c989",
"sha256:24c3be29abb6b34052fd26fc7a8e0a49b1ee9d282e3665e8ad09a0a68faee5b3",
"sha256:290b02bab3c9e216da57c1d11d2ba73a9f73a614bbdcc027d299a60cdfabb11a",
"sha256:3028252424c72b2602a323f70fbf50aa80a5d3aa616ea6add4ba21ae9cc9da4c",
"sha256:30c653fde75a6e5eb814d2a0a89378f83d1d3f502ab710904ee585c38888816c",
"sha256:3cace1837bc84e63b3fd2dfce37f08f8c18aeb81ef5cf6bb9b51f625cb4e6cd8",
"sha256:4056f752015dfa9828dce3140dbadd543b555afb3252507348c493def166d454",
"sha256:454ffc1cbb75227d15667c09f164a0099159da0c1f3d2636aa648f12675491ad",
"sha256:598b65d74615c021423bd45c2bc5e9b59539c875a9bdb7e5f2a6b92dfcfc268d",
"sha256:599941da468f2cf22bf90a84f6e2a65524e87be2fce844f96f2dd9a6c9d1e635",
"sha256:5ddea4c352a488b5e1069069f2f501006b1a4362cb906bee9a193ef1245a7a61",
"sha256:62c0285e91414f5c8f621a17b69fc0088394ccdaa961ef469e833dbff64bd5ea",
"sha256:679cbb78914ab212c49c67ba2c7396dc599a8479de51b9a87b174700abd9ea49",
"sha256:6e104c0c2b4cd765b4e83909cde7ec61a1e313f8a75775897db321450e928cce",
"sha256:736895a020e31b428b3382a7887bfea96102c529530299f426bf2e636aacec9e",
"sha256:75bb36f134883fdbe13d8e63b8675f5f12b80bb6627f7714c7d6c5becf22719f",
"sha256:7d2f5d97fcbd004c03df8d8fe2b973fe2b14e7bfeb2cfa012eaa8759ce9a762f",
"sha256:80beaf63ddfbc64a0452b841d8036ca0611e049650e20afcb882f5d3c266d65f",
"sha256:84ad5e29bf8bab3ad70fd707d3c05524862bddc54dc040982b0dbcff36481de7",
"sha256:8da5924cb1f9064589767b0f3fc39d03e3d0fb5aa29e0cb21d43106519bd624a",
"sha256:961eb86e5be7d0973789f30ebcf6caab60b844203f4396ece27310295a6082c7",
"sha256:96de1932237abe0a13ba68b63e94113678c379dca45afa040a17b6e1ad7ed076",
"sha256:a0a0abef2ca47b33fb615b491ce31b055ef2430de52c5b3fb19a4042dbc5cadb",
"sha256:b2a5a856019d2833c56a3dcac1b80fe795c95f401818ea963594b345929dffa7",
"sha256:b8811d48078d1cf2a6863dafb896e68406c5f513048451cd2ded0473133473c7",
"sha256:c532d5ab79be0199fa2658e24a02fce8542df196e60665dd322409a03db6a52c",
"sha256:d3b64c65328cb4cd252c94f83e66e3d7acf8891e60ebf588d7b493a55a1dbf26",
"sha256:d4e702eea4a2903441f2735799d217f4ac1b55f7d8ad96ab7d4e25417cb0827c",
"sha256:d5653619b3eb5cbd35bfba3c12d575db2a74d15e0e1c08bf1db788069d410ce8",
"sha256:d66624f04de4af8bbf1c7f21cc06649c1c69a7f84109179add573ce35e46d448",
"sha256:e67ec74fada3841b8c5f4c4f197bea916025cb9aa3fe5abf7d52b655d042f956",
"sha256:e6f7f3f41faffaea6596da86ecc2389672fa949bd035251eab26dc6697451d05",
"sha256:f02cf7221d5cd915d7fa58ab64f7ee6dd0f6cddbb48683debf5d04ae9b1c2cc1",
"sha256:f0eddfcabd6936558ec020130f932d479930581171368fd728efcfb6ef0dd357",
"sha256:fabbe18087c3d33c5824cb145ffca52eccd053061df1d79d4b66dafa5ad2a5ea",
"sha256:fc3150f85e2dbcf99e65238c842d1cfe69d3e7649b19864c1cc043213d9cd730"
],
"markers": "python_version >= '3.7'",
"version": "==2.1.0"
},
"packaging": {
"hashes": [
"sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb",
"sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"
],
"markers": "python_version >= '3.6'",
"version": "==21.3"
},
"pygments": {
"hashes": [
"sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65",
"sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"
],
"markers": "python_version >= '3.5'",
"version": "==2.11.2"
},
"pyparsing": {
"hashes": [
"sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea",
"sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"
],
"markers": "python_version >= '3.6'",
"version": "==3.0.7"
},
"pysocks": {
"hashes": [
"sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299",
@@ -333,6 +450,9 @@
"version": "==2022.1.18"
},
"requests": {
"extras": [
"socks"
],
"hashes": [
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
@@ -348,6 +468,13 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.16.0"
},
"snowballstemmer": {
"hashes": [
"sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1",
"sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"
],
"version": "==2.2.0"
},
"snscrape": {
"hashes": [
"sha256:af30d12872da692ff9ccaf5651962edceb1fd4a28cf7cc92c8c898902f009ce3",
@@ -364,6 +491,62 @@
"markers": "python_version >= '3.6'",
"version": "==2.3.1"
},
"sphinx": {
"hashes": [
"sha256:5da895959511473857b6d0200f56865ed62c31e8f82dd338063b84ec022701fe",
"sha256:6caad9786055cb1fa22b4a365c1775816b876f91966481765d7d50e9f0dd35cc"
],
"index": "pypi",
"version": "==4.4.0"
},
"sphinxcontrib-applehelp": {
"hashes": [
"sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a",
"sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"
],
"markers": "python_version >= '3.5'",
"version": "==1.0.2"
},
"sphinxcontrib-devhelp": {
"hashes": [
"sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e",
"sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"
],
"markers": "python_version >= '3.5'",
"version": "==1.0.2"
},
"sphinxcontrib-htmlhelp": {
"hashes": [
"sha256:d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07",
"sha256:f5f8bb2d0d629f398bf47d0d69c07bc13b65f75a81ad9e2f71a63d4b7a2f6db2"
],
"markers": "python_version >= '3.6'",
"version": "==2.0.0"
},
"sphinxcontrib-jsmath": {
"hashes": [
"sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178",
"sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"
],
"markers": "python_version >= '3.5'",
"version": "==1.0.1"
},
"sphinxcontrib-qthelp": {
"hashes": [
"sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72",
"sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"
],
"markers": "python_version >= '3.5'",
"version": "==1.0.3"
},
"sphinxcontrib-serializinghtml": {
"hashes": [
"sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd",
"sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952"
],
"markers": "python_version >= '3.5'",
"version": "==1.1.5"
},
"sqlalchemy": {
"hashes": [
"sha256:05fa14f279d43df68964ad066f653193187909950aa0163320b728edfc400167",
@@ -427,8 +610,16 @@
"sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed",
"sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'",
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.8"
},
"zipp": {
"hashes": [
"sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d",
"sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375"
],
"markers": "python_version >= '3.7'",
"version": "==3.7.0"
}
},
"develop": {}

4
README.md Normal file
View File

@@ -0,0 +1,4 @@
Cisticola
==========
![Cisticola, the bird](docs/images/cisticola.jpeg)

View File

@@ -9,6 +9,7 @@ mapper_registry = registry()
@dataclass
class ScraperResult:
"""A minimally processed result from a scraper"""
scraper: str
platform: str
channel: int
@@ -19,7 +20,8 @@ class ScraperResult:
raw_data_table = Table('raw_data', mapper_registry.metadata,
Column('id', Integer, primary_key=True, autoincrement=True),
Column('id', Integer, primary_key=True,
autoincrement=True),
Column('scraper', String),
Column('platform', String),
Column('channel', Integer),
@@ -64,7 +66,8 @@ class TransformedResult:
analysis_table = Table('analysis', mapper_registry.metadata,
Column('id', Integer, primary_key=True, autoincrement=True),
Column('id', Integer, primary_key=True,
autoincrement=True),
Column('raw_id', Integer, ForeignKey('raw_data.id')),
Column('scraper', String),
Column('transformer', String),

View File

@@ -64,7 +64,7 @@ class BitchuteScraper(cisticola.scraper.Scraper):
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
def strip_tags(html, convert_newlines=True):
"""
r"""
Strip HTML from a string
:param html: HTML to strip

20
docs/Makefile Normal file
View File

@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

BIN
docs/images/cisticola.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

35
docs/make.bat Normal file
View File

@@ -0,0 +1,35 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

30
docs/source/cisticola.rst Normal file
View File

@@ -0,0 +1,30 @@
cisticola package
=================
Subpackages
-----------
.. toctree::
:maxdepth: 4
cisticola.scraper
cisticola.transformer
Submodules
----------
cisticola.base module
---------------------
.. automodule:: cisticola.base
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: cisticola
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,37 @@
cisticola.scraper package
=========================
Submodules
----------
cisticola.scraper.bitchute module
---------------------------------
.. automodule:: cisticola.scraper.bitchute
:members:
:undoc-members:
:show-inheritance:
cisticola.scraper.gettr module
------------------------------
.. automodule:: cisticola.scraper.gettr
:members:
:undoc-members:
:show-inheritance:
cisticola.scraper.twitter module
--------------------------------
.. automodule:: cisticola.scraper.twitter
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: cisticola.scraper
:members:
:undoc-members:
:show-inheritance:

View File

@@ -0,0 +1,21 @@
cisticola.transformer package
=============================
Submodules
----------
cisticola.transformer.twitter module
------------------------------------
.. automodule:: cisticola.transformer.twitter
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: cisticola.transformer
:members:
:undoc-members:
:show-inheritance:

51
docs/source/conf.py Normal file
View File

@@ -0,0 +1,51 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('../../'))
# -- Project information -----------------------------------------------------
project = 'Cisticola'
copyright = '2022, Bellingcat'
author = 'Bellingcat'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.napoleon']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = []

17
docs/source/index.rst Normal file
View File

@@ -0,0 +1,17 @@
Welcome to Cisticola's documentation!
=====================================
.. toctree::
:maxdepth: 2
:caption: Contents:
modules
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

7
docs/source/modules.rst Normal file
View File

@@ -0,0 +1,7 @@
cisticola
=========
.. toctree::
:maxdepth: 4
cisticola