refactored import structure

This commit is contained in:
Tristan Lee
2022-03-04 10:55:54 -06:00
parent 75240bb060
commit c21e43ddfa
16 changed files with 418 additions and 216 deletions

View File

@@ -1,71 +1,3 @@
from typing import List
import cisticola.base
import cisticola.scraper.base
from sqlalchemy.orm import sessionmaker
from loguru import logger
class ScraperController:
"""Registers scrapers, uses them to generate ScraperResults. Synchronizes
everything with database via ORM."""
def __init__(self):
self.scrapers = []
self.session = None
self.mapper_registry = None
def register_scraper(self, scraper: cisticola.scraper.base.Scraper):
self.scrapers.append(scraper)
def scrape_channels(self, channels: List[cisticola.base.Channel]):
if self.session is None:
logger.error("No DB session")
return
for channel in channels:
handled = False
for scraper in self.scrapers:
if scraper.can_handle(channel):
session = self.session()
handled = True
added = 0
# get most recent post
session = self.session()
rows = session.query(cisticola.base.ScraperResult).where(
cisticola.base.ScraperResult.channel == channel.id).order_by(
cisticola.base.ScraperResult.date.desc()).limit(1).all()
if len(rows) == 1:
since = rows[0]
else:
since = None
posts = scraper.get_posts(channel, since=since)
for post in posts:
session.add(post)
added += 1
session.commit()
logger.info(
f"{scraper} found {added} new posts from {channel}")
break
if not handled:
logger.warning(f"No handler found for Channel {channel}")
def connect_to_db(self, engine):
# create tables
cisticola.base.mapper_registry.metadata.create_all(bind=engine)
self.session = sessionmaker()
self.session.configure(bind=engine)
class ETLController:
"""This class will transform the raw_data tables into a format more conducive to analysis."""
def __init__(self):
pass
from . import base
from . import scraper
from . import transformer

View File

@@ -1,11 +1,12 @@
from typing import List
from dataclasses import dataclass
from datetime import datetime
from sqlalchemy.orm import registry
from sqlalchemy import Table, Column, Integer, String, JSON, DateTime, ForeignKey
mapper_registry = registry()
@dataclass
class ScraperResult:
"""A minimally processed result from a scraper"""
@@ -84,4 +85,4 @@ analysis_table = Table('analysis', mapper_registry.metadata,
Column('author_username', String)
)
mapper_registry.map_imperatively(TransformedResult, analysis_table)
mapper_registry.map_imperatively(TransformedResult, analysis_table)

View File

@@ -0,0 +1,131 @@
from sqlalchemy import create_engine
from cisticola.base import Channel
from cisticola.scraper import (
ScraperController,
TelegramSnscrapeScraper)
test_channels = [
Channel(
id=0,
name="QAnon Россия",
platform_id=-1001319637748,
category="Qanon",
followers=94048,
platform="Telegram",
url="https://t.me/qanonrus",
screenname="qanonrus",
country="RU",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=1,
name="The Great Awakening | Q",
platform_id=-1001325597521,
category="Qanon",
followers=5715,
platform="Telegram",
url="https://t.me/greatawakin",
screenname="greatawakin",
country="RU",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=2,
name="Великое Пробуждение",
platform_id=-1001285898079,
category="Qanon",
followers=5861,
platform="Telegram",
url="https://t.me/greatawakeningrus",
screenname="greatawakeningrus",
country="RU",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=3,
name="T🕊Редакция Президент Гордон🕊",
platform_id=-1001101170442,
category="Qanon",
followers=5743,
platform="Telegram",
url="https://t.me/prezidentgordonteam",
screenname="prezidentgordonteam",
country="RU",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=4,
name="ПРОЕКТ АВРОРА",
platform_id=-1001279171101,
category="Qanon",
followers=5930,
platform="Telegram",
url="https://t.me/project_aurora",
screenname="project_aurora",
country="RU",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=5,
name="Сон Разума",
platform_id=-1001202338312,
category="Qanon",
followers=27099,
platform="Telegram",
url="https://t.me/error_288",
screenname="error_288",
country="RU",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=6,
name="Пробуждающий Мир - официальный канал",
platform_id=-1001492521207,
category="Qanon",
followers=19097,
platform="Telegram",
url="https://t.me/promirru",
screenname="promirru",
country="RU",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=7,
name="ЦЕЛЬНОЗОР",
platform_id=-1001642737506,
category="Qanon",
followers=13654,
platform="Telegram",
url="https://t.me/tselnozor",
screenname="tselnozor",
country="RU",
influencer=None,
public=True,
chat=False,
notes=""),]
controller = ScraperController()
telegram = TelegramSnscrapeScraper()
controller.register_scraper(telegram)
engine = create_engine('sqlite:///russian_telegram.db')
controller.connect_to_db(engine)
controller.scrape_channels(test_channels)

View File

@@ -0,0 +1,8 @@
from .base import Scraper, ScraperController
from .bitchute import BitchuteScraper
from .gab import GabScraper
from .gettr import GettrScraper
from .odysee import OdyseeScraper
from .rumble import RumbleScraper
from .telegram_snscrape import TelegramSnscrapeScraper
from .twitter import TwitterScraper

View File

@@ -1,13 +1,17 @@
from typing import Generator, Tuple
import cisticola.base
import requests
from typing import Generator, Tuple, List
import os
import boto3
from io import BytesIO
from urllib.parse import urlparse
import tempfile
import requests
import boto3
from loguru import logger
import ffmpeg
import tempfile
from sqlalchemy.orm import sessionmaker
from cisticola.base import Channel, ScraperResult, mapper_registry
class Scraper:
__version__ = "Scraper 0.0.0"
@@ -89,8 +93,77 @@ class Scraper:
return archived_url
def can_handle(self, channel: cisticola.base.Channel) -> bool:
def can_handle(self, channel: Channel) -> bool:
pass
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
pass
class ScraperController:
"""Registers scrapers, uses them to generate ScraperResults. Synchronizes
everything with database via ORM."""
def __init__(self):
self.scrapers = []
self.session = None
self.mapper_registry = None
def register_scraper(self, scraper: Scraper):
self.scrapers.append(scraper)
def register_scrapers(self, scraper: List[Scraper]):
self.scrapers.extend(scraper)
def scrape_channels(self, channels: List[Channel]):
if self.session is None:
logger.error("No DB session")
return
for channel in channels:
handled = False
for scraper in self.scrapers:
if scraper.can_handle(channel):
session = self.session()
handled = True
added = 0
# get most recent post
session = self.session()
rows = session.query(ScraperResult).where(
ScraperResult.channel == channel.id).order_by(
ScraperResult.date.desc()).limit(1).all()
if len(rows) == 1:
since = rows[0]
else:
since = None
posts = scraper.get_posts(channel, since=since)
for post in posts:
session.add(post)
added += 1
session.commit()
logger.info(
f"{scraper} found {added} new posts from {channel}")
break
if not handled:
logger.warning(f"No handler found for Channel {channel}")
def connect_to_db(self, engine):
# create tables
mapper_registry.metadata.create_all(bind=engine)
self.session = sessionmaker()
self.session.configure(bind=engine)
class ETLController:
"""This class will transform the raw_data tables into a format more conducive to analysis."""
def __init__(self):
pass

View File

@@ -9,9 +9,9 @@ from typing import Generator
import requests
from bs4 import BeautifulSoup
import cisticola.base
class BitchuteScraper(cisticola.scraper.base.Scraper):
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
class BitchuteScraper(Scraper):
"""An implementation of a Scraper for Bitchute, using classes from the 4cat
library"""
__version__ = "BitchuteScraper 0.0.1"
@@ -23,7 +23,7 @@ class BitchuteScraper(cisticola.scraper.base.Scraper):
return username
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
session = requests.Session()
session.headers.update(self.headers)
@@ -32,8 +32,6 @@ class BitchuteScraper(cisticola.scraper.base.Scraper):
"input", {"name": "csrfmiddlewaretoken"})[0].get("value")
time.sleep(0.25)
# Don't scrape comment information
#TODO implement framework for processing and storing comments
detail = 'comments'
username = BitchuteScraper.get_username_from_url(channel.url)
@@ -52,7 +50,7 @@ class BitchuteScraper(cisticola.scraper.base.Scraper):
archived_url = self.archive_media(media_blob, content_type, key)
archived_urls[url] = archived_url
yield cisticola.base.ScraperResult(
yield ScraperResult(
scraper=self.__version__,
platform="Bitchute",
channel=channel.id,

View File

@@ -1,11 +1,12 @@
import cisticola.base
import cisticola.scraper.base
from datetime import datetime
import json
from typing import Generator
from garc import Garc
class GabScraper(cisticola.scraper.base.Scraper):
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
class GabScraper(Scraper):
"""An implementation of a Scraper for Gab, using GARC library"""
__version__ = "GabScraper 0.0.1"
@@ -14,7 +15,7 @@ class GabScraper(cisticola.scraper.base.Scraper):
return username
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
client = Garc(profile = 'main')
username = GabScraper.get_username_from_url(channel.url)
@@ -37,7 +38,7 @@ class GabScraper(cisticola.scraper.base.Scraper):
archived_url = self.archive_media(media_blob, content_type, key)
archived_urls[url] = archived_url
yield cisticola.base.ScraperResult(
yield ScraperResult(
scraper=self.__version__,
platform="Gab",
channel=channel.id,

View File

@@ -1,12 +1,13 @@
import cisticola.base
import cisticola.scraper.base
from datetime import datetime
import json
from typing import Generator, Tuple
from gogettr import PublicClient
from urllib.parse import urlparse
class GettrScraper(cisticola.scraper.base.Scraper):
from gogettr import PublicClient
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
class GettrScraper(Scraper):
"""An implementation of a Scraper for Gettr, using gogettr library"""
__version__ = "GettrScraper 0.0.1"
@@ -17,7 +18,7 @@ class GettrScraper(cisticola.scraper.base.Scraper):
return username
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
client = PublicClient()
username = GettrScraper.get_username_from_url(channel.url)
scraper = client.user_activity(username=username, type="posts")
@@ -47,7 +48,7 @@ class GettrScraper(cisticola.scraper.base.Scraper):
archived_url = self.archive_media(media_blob, content_type, key)
archived_urls[post['vid']] = archived_url
yield cisticola.base.ScraperResult(
yield ScraperResult(
scraper=self.__version__,
platform="Gettr",
channel=channel.id,

View File

@@ -1,13 +1,15 @@
import cisticola.base
import cisticola.scraper.base
from datetime import datetime
import json
from typing import Generator
from polyphemus.base import OdyseeChannel
from urllib.parse import urlparse
from polyphemus.base import OdyseeChannel
import requests
class OdyseeScraper(cisticola.scraper.base.Scraper):
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
class OdyseeScraper(Scraper):
"""An implementation of a Scraper for Odysee, using polyphemus library"""
__version__ = "OdyseeScraper 0.0.1"
@@ -17,7 +19,7 @@ class OdyseeScraper(cisticola.scraper.base.Scraper):
return username
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
username = OdyseeScraper.get_username_from_url(channel.url)
odysee_channel = OdyseeChannel(channel_name = username)
@@ -43,7 +45,7 @@ class OdyseeScraper(cisticola.scraper.base.Scraper):
all_comments = video.get_all_comments()
yield cisticola.base.ScraperResult(
yield ScraperResult(
scraper=self.__version__,
platform="Odysee",
channel=channel.id,
@@ -55,7 +57,7 @@ class OdyseeScraper(cisticola.scraper.base.Scraper):
for comment in all_comments:
yield cisticola.base.ScraperResult(
yield ScraperResult(
scraper=self.__version__,
platform="Odysee",
channel=channel.id,

View File

@@ -1,20 +1,19 @@
from concurrent.futures import process
import cisticola.base
import cisticola.scraper.base
from datetime import datetime
import json
from typing import Generator, Tuple
import tempfile
from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup
import youtube_dl
import json
from urllib.parse import urlparse
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
BASE_URL = 'https://rumble.com'
class RumbleScraper(cisticola.scraper.base.Scraper):
class RumbleScraper(Scraper):
"""An implementation of a Scraper for Rumble, using custom functions"""
__version__ = "RumbleScraper 0.0.1"
@@ -23,7 +22,7 @@ class RumbleScraper(cisticola.scraper.base.Scraper):
return username
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
username = RumbleScraper.get_username_from_url(channel.url)
scraper = get_channel_videos(username)
@@ -40,7 +39,7 @@ class RumbleScraper(cisticola.scraper.base.Scraper):
archived_url = self.archive_media(media_blob, content_type, key)
archived_urls[post['media_url']] = archived_url
yield cisticola.base.ScraperResult(
yield ScraperResult(
scraper=self.__version__,
platform="Rumble",
channel=channel.id,

View File

@@ -1,18 +1,19 @@
import cisticola.base
import cisticola.scraper.base
from typing import Generator
import snscrape.modules
from datetime import datetime, timezone
import snscrape.modules
class TelegramSnscrapeScraper(cisticola.scraper.base.Scraper):
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
class TelegramSnscrapeScraper(Scraper):
__version__ = "TelegramSnscrapeScraper 0.0.1"
def can_handle(self, channel):
if channel.platform == "Telegram" and channel.public and not channel.chat:
return True
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
scr = snscrape.modules.telegram.TelegramChannelScraper(
channel.screenname)
@@ -34,7 +35,7 @@ class TelegramSnscrapeScraper(cisticola.scraper.base.Scraper):
archived_url = self.archive_media(media_blob, content_type, key)
archived_urls[post.video] = archived_url
yield cisticola.base.ScraperResult(
yield ScraperResult(
scraper=self.__version__,
platform="Telegram",
channel=channel.id,

View File

@@ -1,17 +1,19 @@
import cisticola.base
import cisticola.scraper.base
from datetime import datetime, timezone
from typing import Generator
import snscrape.modules
from loguru import logger
from urllib.parse import urlparse, parse_qs
class TwitterScraper(cisticola.scraper.base.Scraper):
from snscrape.modules.twitter import TwitterProfileScraper, Video, Gif, Photo
from loguru import logger
from cisticola.base import Channel, ScraperResult
from cisticola.scraper.base import Scraper
class TwitterScraper(Scraper):
"""An implementation of a Scraper for Twitter, using snscrape library"""
__version__ = "TwitterScraper 0.0.1"
def get_posts(self, channel: cisticola.base.Channel, since: cisticola.base.ScraperResult = None) -> Generator[cisticola.base.ScraperResult, None, None]:
scraper = snscrape.modules.twitter.TwitterProfileScraper(channel.platform_id)
def get_posts(self, channel: Channel, since: ScraperResult = None) -> Generator[ScraperResult, None, None]:
scraper = TwitterProfileScraper(channel.platform_id)
first = True
@@ -28,13 +30,13 @@ class TwitterScraper(cisticola.scraper.base.Scraper):
if tweet.media:
for media in tweet.media:
if type(media) == snscrape.modules.twitter.Video:
if type(media) == Video:
variant = max(
[v for v in media.variants if v.bitrate], key=lambda v: v.bitrate)
url = variant.url
elif type(media) == snscrape.modules.twitter.Gif:
elif type(media) == Gif:
url = media.variants[0].url
elif type(media) == snscrape.modules.twitter.Photo:
elif type(media) == Photo:
url = media.fullUrl
else:
logger.warning(f"Could not get media URL of {media}")
@@ -45,7 +47,7 @@ class TwitterScraper(cisticola.scraper.base.Scraper):
archived_url = self.archive_media(media_blob, content_type, key)
archived_urls[url] = archived_url
yield cisticola.base.ScraperResult(
yield ScraperResult(
scraper=self.__version__,
platform="Twitter",
channel=channel.id,

View File

@@ -1,16 +1,2 @@
import cisticola.base
class Transformer:
"""Interface class for transformers"""
__version__ = "Transformer 0.0.0"
def __init__(self):
pass
def can_handle(data: cisticola.base.ScraperResult) -> bool:
pass
def transform(data: cisticola.base.ScraperResult) -> cisticola.base.TransformedResult:
pass
from . import base
from .twitter import TwitterTransformer

View File

@@ -0,0 +1,16 @@
from cisticola.base import ScraperResult, TransformedResult
class Transformer:
"""Interface class for transformers"""
__version__ = "Transformer 0.0.0"
def __init__(self):
pass
def can_handle(data: ScraperResult) -> bool:
pass
def transform(data: ScraperResult) -> TransformedResult:
pass

View File

@@ -1,17 +1,17 @@
import cisticola.transformer
import cisticola.base
import json
from cisticola.transformer.base import Transformer
from cisticola.base import ScraperResult, TransformedResult
class TwitterTransformer(cisticola.transformer.Transformer):
class TwitterTransformer(Transformer):
"""A Twitter specific ScraperResult, with a method ETL/transforming"""
__version__ = "TwitterTransformer 0.0.1"
def transform(self, data: cisticola.base.ScraperResult) -> cisticola.base.TransformedResult:
def transform(self, data: ScraperResult) -> TransformedResult:
raw = json.loads(data.raw_data)
transformed = cisticola.base.TransformedResult(
transformed = TransformedResult(
raw_id=data.id,
scraper=data.scraper,
transformer=self.__version__,

181
test.py
View File

@@ -1,76 +1,127 @@
import cisticola
import cisticola.scraper.telegram_snscrape
import cisticola.scraper.twitter
import cisticola.scraper.gettr
import cisticola.scraper.bitchute
import cisticola.scraper.odysee
import cisticola.scraper.gab
import cisticola.scraper.rumble
from sqlalchemy import create_engine
from cisticola.base import Channel
from cisticola.scraper import (
ScraperController,
BitchuteScraper,
GabScraper,
GettrScraper,
OdyseeScraper,
RumbleScraper,
TelegramSnscrapeScraper,
TwitterScraper)
test_channels = [
cisticola.base.Channel(id=0, name="Logan Williams (test)", platform_id=891729132,
category="test", followers=None, platform="Twitter",
url="https://twitter.com/obtusatum", screenname="obtusatum", country="US",
influencer=None, public=True, chat=False,
notes=""),
cisticola.base.Channel(id=1, name="South West Ohio Proud Boys (test)", platform_id=-1001276612436,
category="test", followers=None, platform="Telegram",
url="https://t.me/SouthwestOhioPB", screenname="SouthwestOhioPB", country="US",
influencer=None, public=True, chat=False, notes=""),
cisticola.base.Channel(id=2, name="LizardRepublic (test)", platform_id='lizardrepublic',
category="test", followers=None, platform="Gettr",
url="https://www.gettr.com/user/lizardrepublic", screenname="lizardrepublic", country="US",
influencer=None, public=True, chat=False, notes=""),
cisticola.base.Channel(
id=4, name="bestonlinejewelrystoresusa@gmail.com (test)", platform_id='bestonlinejewelrystoresusagmailcom',
category="test", followers=None, platform="Bitchute",
url="https://www.bitchute.com/channel/bestonlinejewelrystoresusagmailcom/", screenname=None, country="US",
influencer=None, public=True, chat=False, notes=""),
cisticola.base.Channel(
id=5, name="Mak1n' Bacon (test)", platform_id='Mak1nBacon',
category="test", followers=None, platform="Odysee",
url="https://odysee.com/@Mak1nBacon", screenname='Mak1nBacon', country="US",
influencer=None, public=True, chat=False, notes=""),
cisticola.base.Channel(
id=6, name="Capt. Marc Simon (test)", platform_id='marc_capt',
category="test", followers=None, platform="Gab",
url="https://gab.com/marc_capt", screenname='marc_capt', country="CA",
influencer=None, public=True, chat=False, notes=""),
cisticola.base.Channel(
id=7, name="we are uploading videos wow products and problem solving products.please share like and subscribe our channelwe are uploading videos wow products and problem solving products.please share like and subscribe our channel", platform_id='c-916305',
category="test", followers=None, platform="Rumble",
url="https://rumble.com/c/c-916305", screenname='we are uploading', country="CA",
influencer=None, public=True, chat=False, notes="")]
Channel(
id=0,
name="Logan Williams (test)",
platform_id=891729132,
category="test",
followers=None,
platform="Twitter",
url="https://twitter.com/obtusatum",
screenname="obtusatum",
country="US",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=1,
name="South West Ohio Proud Boys (test)",
platform_id=-1001276612436,
category="test",
followers=None,
platform="Telegram",
url="https://t.me/SouthwestOhioPB",
screenname="SouthwestOhioPB",
country="US",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=2,
name="LizardRepublic (test)",
platform_id='lizardrepublic',
category="test",
followers=None,
platform="Gettr",
url="https://www.gettr.com/user/lizardrepublic",
screenname="lizardrepublic",
country="US",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=4,
name="bestonlinejewelrystoresusa@gmail.com (test)", platform_id='bestonlinejewelrystoresusagmailcom',
category="test",
followers=None,
platform="Bitchute",
url="https://www.bitchute.com/channel/bestonlinejewelrystoresusagmailcom/", screenname=None,
country="US",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=5,
name="Mak1n' Bacon (test)",
platform_id='Mak1nBacon',
category="test",
followers=None,
platform="Odysee",
url="https://odysee.com/@Mak1nBacon",
screenname='Mak1nBacon',
country="US",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=6,
name="Capt. Marc Simon (test)",
platform_id='marc_capt',
category="test",
followers=None,
platform="Gab",
url="https://gab.com/marc_capt",
screenname='marc_capt',
country="CA",
influencer=None,
public=True,
chat=False,
notes=""),
Channel(
id=7,
name="we are uploading videos wow products and problem solving products.please share like and subscribe our channelwe are uploading videos wow products and problem solving products.please share like and subscribe our channel", platform_id='c-916305',
category="test",
followers=None,
platform="Rumble",
url="https://rumble.com/c/c-916305",
screenname='we are uploading',
country="CA",
influencer=None,
public=True,
chat=False,
notes="")]
controller = ScraperController()
controller = cisticola.ScraperController()
scrapers = [
BitchuteScraper(),
GabScraper(),
GettrScraper(),
OdyseeScraper(),
RumbleScraper(),
TelegramSnscrapeScraper(),
TwitterScraper()]
twitter = cisticola.scraper.twitter.TwitterScraper()
controller.register_scraper(twitter)
telegram = cisticola.scraper.telegram_snscrape.TelegramSnscrapeScraper()
controller.register_scraper(telegram)
gettr = cisticola.scraper.gettr.GettrScraper()
controller.register_scraper(gettr)
bitchute = cisticola.scraper.bitchute.BitchuteScraper()
controller.register_scraper(bitchute)
odysee = cisticola.scraper.odysee.OdyseeScraper()
controller.register_scraper(odysee)
gab = cisticola.scraper.gab.GabScraper()
controller.register_scraper(gab)
rumble = cisticola.scraper.rumble.RumbleScraper()
controller.register_scraper(rumble)
controller.register_scrapers(scrapers)
engine = create_engine('sqlite:///test3.db')
controller.connect_to_db(engine)
controller.scrape_channels(test_channels)
controller.scrape_channels(test_channels)