Add documentation generation with Sphinx

This commit is contained in:
Logan Williams
2022-02-21 17:52:38 +01:00
parent 139459e3b2
commit e3d29bf811
54 changed files with 18250 additions and 8 deletions

303
docs/build/html/cisticola.scraper.html vendored Normal file
View File

@@ -0,0 +1,303 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
<title>cisticola.scraper package &#8212; Cisticola documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="_static/alabaster.css" />
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/doctools.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="cisticola.transformer package" href="cisticola.transformer.html" />
<link rel="prev" title="cisticola package" href="cisticola.html" />
<link rel="stylesheet" href="_static/custom.css" type="text/css" />
<meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
</head><body>
<div class="document">
<div class="documentwrapper">
<div class="bodywrapper">
<div class="body" role="main">
<section id="cisticola-scraper-package">
<h1>cisticola.scraper package<a class="headerlink" href="#cisticola-scraper-package" title="Permalink to this headline"></a></h1>
<section id="submodules">
<h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this headline"></a></h2>
</section>
<section id="module-cisticola.scraper.bitchute">
<span id="cisticola-scraper-bitchute-module"></span><h2>cisticola.scraper.bitchute module<a class="headerlink" href="#module-cisticola.scraper.bitchute" title="Permalink to this headline"></a></h2>
<dl class="py class">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.BitchuteScraper">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">cisticola.scraper.bitchute.</span></span><span class="sig-name descname"><span class="pre">BitchuteScraper</span></span><a class="headerlink" href="#cisticola.scraper.bitchute.BitchuteScraper" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#cisticola.scraper.Scraper" title="cisticola.scraper.Scraper"><code class="xref py py-class docutils literal notranslate"><span class="pre">cisticola.scraper.Scraper</span></code></a></p>
<p>An implementation of a Scraper for Bitchute, using classes from the 4cat
library</p>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.BitchuteScraper.can_handle">
<span class="sig-name descname"><span class="pre">can_handle</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">channel</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.bitchute.BitchuteScraper.can_handle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.BitchuteScraper.get_posts">
<span class="sig-name descname"><span class="pre">get_posts</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">channel</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="cisticola.html#cisticola.base.Channel" title="cisticola.base.Channel"><span class="pre">cisticola.base.Channel</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">since</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="cisticola.html#cisticola.base.ScraperResult" title="cisticola.base.ScraperResult"><span class="pre">cisticola.base.ScraperResult</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="cisticola.html#cisticola.base.ScraperResult" title="cisticola.base.ScraperResult"><span class="pre">cisticola.base.ScraperResult</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#cisticola.scraper.bitchute.BitchuteScraper.get_posts" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.BitchuteScraper.get_username_from_url">
<span class="sig-name descname"><span class="pre">get_username_from_url</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.bitchute.BitchuteScraper.get_username_from_url" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.append_details">
<span class="sig-prename descclassname"><span class="pre">cisticola.scraper.bitchute.</span></span><span class="sig-name descname"><span class="pre">append_details</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">video</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">detail</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.bitchute.append_details" title="Permalink to this definition"></a></dt>
<dd><p>Append extra metadata to video data</p>
<p>Fetches the BitChute video detail page to scrape extra data for the given video.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>video</strong> (<em>dict</em>) Video details as scraped so far</p></li>
<li><p><strong>detail</strong> (<em>str</em>) Detail level. If comments, also scrape video comments.</p></li>
</ul>
</dd>
<dt class="field-even">Return dict</dt>
<dd class="field-even"><p>Tuple, first item: updated video data, second: list of comments</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.get_about">
<span class="sig-prename descclassname"><span class="pre">cisticola.scraper.bitchute.</span></span><span class="sig-name descname"><span class="pre">get_about</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">user</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.bitchute.get_about" title="Permalink to this definition"></a></dt>
<dd><p>Extract fields from channels “About” tab</p>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.get_videos_user">
<span class="sig-prename descclassname"><span class="pre">cisticola.scraper.bitchute.</span></span><span class="sig-name descname"><span class="pre">get_videos_user</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">session</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">user</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">csrftoken</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">detail</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.bitchute.get_videos_user" title="Permalink to this definition"></a></dt>
<dd><p>Scrape videos for given BitChute user</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>session</strong> HTTP Session to use</p></li>
<li><p><strong>user</strong> (<em>str</em>) Username to scrape videos for</p></li>
<li><p><strong>csrftoken</strong> (<em>str</em>) CSRF token to use for requests</p></li>
<li><p><strong>detail</strong> (<em>str</em>) Detail level to scrape, basic/detail/comments</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Video data dictionaries, as a generator</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.request_from_bitchute">
<span class="sig-prename descclassname"><span class="pre">cisticola.scraper.bitchute.</span></span><span class="sig-name descname"><span class="pre">request_from_bitchute</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">session</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">headers</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.bitchute.request_from_bitchute" title="Permalink to this definition"></a></dt>
<dd><p>Request something via the BitChute API (or non-API)</p>
<p>To avoid having to write the same error-checking everywhere, this takes
care of retrying on failure, et cetera</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>session</strong> Requests session</p></li>
<li><p><strong>method</strong> (<em>str</em>) GET or POST</p></li>
<li><p><strong>url</strong> (<em>str</em>) URL to fetch</p></li>
<li><p><strong>header</strong> (<em>dict</em>) Headers to pass with the request</p></li>
<li><p><strong>data</strong> (<em>dict</em>) Data/params to send with the request</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>Requests response</p>
</dd>
</dl>
</dd></dl>
<dl class="py function">
<dt class="sig sig-object py" id="cisticola.scraper.bitchute.strip_tags">
<span class="sig-prename descclassname"><span class="pre">cisticola.scraper.bitchute.</span></span><span class="sig-name descname"><span class="pre">strip_tags</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">html</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">convert_newlines</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.bitchute.strip_tags" title="Permalink to this definition"></a></dt>
<dd><blockquote>
<div><p>Strip HTML from a string</p>
<dl class="field-list simple">
<dt class="field-odd">param html</dt>
<dd class="field-odd"><p>HTML to strip</p>
</dd>
<dt class="field-even">param convert_newlines</dt>
<dd class="field-even"><p>Convert &lt;br&gt; and &lt;/p&gt; tags to</p>
</dd>
</dl>
</div></blockquote>
<dl class="simple">
<dt>before stripping</dt><dd><dl class="field-list simple">
<dt class="field-odd">return</dt>
<dd class="field-odd"><p>Stripped HTML</p>
</dd>
</dl>
</dd>
</dl>
</dd></dl>
</section>
<section id="module-cisticola.scraper.gettr">
<span id="cisticola-scraper-gettr-module"></span><h2>cisticola.scraper.gettr module<a class="headerlink" href="#module-cisticola.scraper.gettr" title="Permalink to this headline"></a></h2>
<dl class="py class">
<dt class="sig sig-object py" id="cisticola.scraper.gettr.GettrScraper">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">cisticola.scraper.gettr.</span></span><span class="sig-name descname"><span class="pre">GettrScraper</span></span><a class="headerlink" href="#cisticola.scraper.gettr.GettrScraper" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#cisticola.scraper.Scraper" title="cisticola.scraper.Scraper"><code class="xref py py-class docutils literal notranslate"><span class="pre">cisticola.scraper.Scraper</span></code></a></p>
<p>An implementation of a Scraper for Gettr, using gogettr library</p>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.gettr.GettrScraper.can_handle">
<span class="sig-name descname"><span class="pre">can_handle</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">channel</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.gettr.GettrScraper.can_handle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.gettr.GettrScraper.get_posts">
<span class="sig-name descname"><span class="pre">get_posts</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">channel</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="cisticola.html#cisticola.base.Channel" title="cisticola.base.Channel"><span class="pre">cisticola.base.Channel</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">since</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="cisticola.html#cisticola.base.ScraperResult" title="cisticola.base.ScraperResult"><span class="pre">cisticola.base.ScraperResult</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="cisticola.html#cisticola.base.ScraperResult" title="cisticola.base.ScraperResult"><span class="pre">cisticola.base.ScraperResult</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#cisticola.scraper.gettr.GettrScraper.get_posts" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.gettr.GettrScraper.get_username_from_url">
<span class="sig-name descname"><span class="pre">get_username_from_url</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.gettr.GettrScraper.get_username_from_url" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</section>
<section id="module-cisticola.scraper.twitter">
<span id="cisticola-scraper-twitter-module"></span><h2>cisticola.scraper.twitter module<a class="headerlink" href="#module-cisticola.scraper.twitter" title="Permalink to this headline"></a></h2>
<dl class="py class">
<dt class="sig sig-object py" id="cisticola.scraper.twitter.TwitterScraper">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">cisticola.scraper.twitter.</span></span><span class="sig-name descname"><span class="pre">TwitterScraper</span></span><a class="headerlink" href="#cisticola.scraper.twitter.TwitterScraper" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <a class="reference internal" href="#cisticola.scraper.Scraper" title="cisticola.scraper.Scraper"><code class="xref py py-class docutils literal notranslate"><span class="pre">cisticola.scraper.Scraper</span></code></a></p>
<p>An implementation of a Scraper for Twitter, using snscrape library</p>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.twitter.TwitterScraper.can_handle">
<span class="sig-name descname"><span class="pre">can_handle</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">channel</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.twitter.TwitterScraper.can_handle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.twitter.TwitterScraper.get_posts">
<span class="sig-name descname"><span class="pre">get_posts</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">channel</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="cisticola.html#cisticola.base.Channel" title="cisticola.base.Channel"><span class="pre">cisticola.base.Channel</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">since</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="cisticola.html#cisticola.base.ScraperResult" title="cisticola.base.ScraperResult"><span class="pre">cisticola.base.ScraperResult</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="cisticola.html#cisticola.base.ScraperResult" title="cisticola.base.ScraperResult"><span class="pre">cisticola.base.ScraperResult</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#cisticola.scraper.twitter.TwitterScraper.get_posts" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.twitter.TwitterScraper.get_username_from_url">
<span class="sig-name descname"><span class="pre">get_username_from_url</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#cisticola.scraper.twitter.TwitterScraper.get_username_from_url" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</section>
<section id="module-cisticola.scraper">
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-cisticola.scraper" title="Permalink to this headline"></a></h2>
<dl class="py class">
<dt class="sig sig-object py" id="cisticola.scraper.Scraper">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">cisticola.scraper.</span></span><span class="sig-name descname"><span class="pre">Scraper</span></span><a class="headerlink" href="#cisticola.scraper.Scraper" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.Scraper.can_handle">
<span class="sig-name descname"><span class="pre">can_handle</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">channel</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="cisticola.html#cisticola.base.Channel" title="cisticola.base.Channel"><span class="pre">cisticola.base.Channel</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">bool</span></span></span><a class="headerlink" href="#cisticola.scraper.Scraper.can_handle" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="py method">
<dt class="sig sig-object py" id="cisticola.scraper.Scraper.get_posts">
<span class="sig-name descname"><span class="pre">get_posts</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">channel</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="cisticola.html#cisticola.base.Channel" title="cisticola.base.Channel"><span class="pre">cisticola.base.Channel</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">since</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="cisticola.html#cisticola.base.ScraperResult" title="cisticola.base.ScraperResult"><span class="pre">cisticola.base.ScraperResult</span></a><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="cisticola.html#cisticola.base.ScraperResult" title="cisticola.base.ScraperResult"><span class="pre">cisticola.base.ScraperResult</span></a><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#cisticola.scraper.Scraper.get_posts" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
</dd></dl>
</section>
</section>
</div>
</div>
</div>
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
<div class="sphinxsidebarwrapper">
<h1 class="logo"><a href="index.html">Cisticola</a></h1>
<h3>Navigation</h3>
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="modules.html">cisticola</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="cisticola.html">cisticola package</a></li>
</ul>
</li>
</ul>
<div class="relations">
<h3>Related Topics</h3>
<ul>
<li><a href="index.html">Documentation overview</a><ul>
<li><a href="modules.html">cisticola</a><ul>
<li><a href="cisticola.html">cisticola package</a><ul>
<li>Previous: <a href="cisticola.html" title="previous chapter">cisticola package</a></li>
<li>Next: <a href="cisticola.transformer.html" title="next chapter">cisticola.transformer package</a></li>
</ul></li>
</ul></li>
</ul></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
<h3 id="searchlabel">Quick search</h3>
<div class="searchformwrapper">
<form class="search" action="search.html" method="get">
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
<input type="submit" value="Go" />
</form>
</div>
</div>
<script>$('#searchbox').show(0);</script>
</div>
</div>
<div class="clearer"></div>
</div>
<div class="footer">
&copy;2022, Bellingcat.
|
Powered by <a href="http://sphinx-doc.org/">Sphinx 4.4.0</a>
&amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.12</a>
|
<a href="_sources/cisticola.scraper.rst.txt"
rel="nofollow">Page source</a>
</div>
</body>
</html>