added odysee scraper, minor refactoring of url_to_blob method (added url_to_key method that can be overridden by child classes while still using the parent url_to_blob method) and changed test file to include only channels with a relatively small number of posts, to make testing faster

This commit is contained in:
Tristan Lee
2022-02-25 20:28:00 -06:00
parent ef83cc4b0a
commit 47dad8fb00
5 changed files with 145 additions and 20 deletions

29
test.py
View File

@@ -3,6 +3,7 @@ import cisticola.scraper.telegram_snscrape
import cisticola.scraper.twitter
import cisticola.scraper.gettr
import cisticola.scraper.bitchute
import cisticola.scraper.odysee
from sqlalchemy import create_engine
@@ -13,19 +14,24 @@ test_channels = [
url="https://twitter.com/obtusatum", screenname="obtusatum", country="US",
influencer=None, public=True, chat=False,
notes=""),
cisticola.base.Channel(id=1, name="JQHN SPARTAN", platform_id=-1001181961026,
category="qanon", followers=None, platform="Telegram",
url="https://t.me/jqhnspartan", screenname="jqhnspartan", country="FR",
influencer="JQNH SPARTAN", public=True, chat=False, notes=""),
cisticola.base.Channel(id=2, name="LizardRepublic", platform_id='lizardrepublic',
category="qanon", followers=None, platform="Gettr",
cisticola.base.Channel(id=1, name="South West Ohio Proud Boys (test)", platform_id=-1001276612436,
category="test", followers=None, platform="Telegram",
url="https://t.me/SouthwestOhioPB", screenname="SouthwestOhioPB", country="US",
influencer=None, public=True, chat=False, notes=""),
cisticola.base.Channel(id=2, name="LizardRepublic (test)", platform_id='lizardrepublic',
category="test", followers=None, platform="Gettr",
url="https://www.gettr.com/user/lizardrepublic", screenname="lizardrepublic", country="US",
influencer=None, public=True, chat=False, notes=""),
cisticola.base.Channel(
id=4, name="bestonlinejewelrystoresusa@gmail.com", platform_id='bestonlinejewelrystoresusagmailcom',
category="spam", followers=None, platform="Bitchute",
id=4, name="bestonlinejewelrystoresusa@gmail.com (test)", platform_id='bestonlinejewelrystoresusagmailcom',
category="test", followers=None, platform="Bitchute",
url="https://www.bitchute.com/channel/bestonlinejewelrystoresusagmailcom/", screenname=None, country="US",
influencer=None, public=True, chat=False, notes=""),]
influencer=None, public=True, chat=False, notes=""),
cisticola.base.Channel(
id=5, name="Mak1n' Bacon (test)", platform_id='Mak1nBacon',
category="test", followers=None, platform="Odysee",
url="https://odysee.com/@Mak1nBacon", screenname='Mak1nBacon', country="US",
influencer=None, public=True, chat=False, notes="")]
controller = cisticola.ScraperController()
@@ -40,7 +46,10 @@ gettr = cisticola.scraper.gettr.GettrScraper()
controller.register_scraper(gettr)
bitchute = cisticola.scraper.bitchute.BitchuteScraper()
controller.register_scraper(gettr)
controller.register_scraper(bitchute)
odysee = cisticola.scraper.odysee.OdyseeScraper()
controller.register_scraper(odysee)
engine = create_engine('sqlite:///test3.db')
controller.connect_to_db(engine)