mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-08 03:18:34 +03:00
234 lines
5.7 KiB
Python
234 lines
5.7 KiB
Python
import os
|
|
import pytest
|
|
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
|
from cisticola.scraper import ScraperController
|
|
from cisticola.transformer import ETLController
|
|
|
|
BITCHUTE_CHANNEL_KWARGS = {
|
|
"name": "bestonlinejewelrystoresusa@gmail.com (test)",
|
|
"platform_id": "bestonlinejewelrystoresusagmailcom",
|
|
"category": "test",
|
|
"platform": "Bitchute",
|
|
"url": "https://www.bitchute.com/channel/bestonlinejewelrystoresusagmailcom/",
|
|
"screenname": None,
|
|
"country": "US",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
GAB_CHANNEL_KWARGS = {
|
|
"name": "Capt. Marc Simon (test)",
|
|
"platform_id": "marc_capt",
|
|
"category": "test",
|
|
"platform": "Gab",
|
|
"url": "https://gab.com/marc_capt",
|
|
"screenname": "marc_capt",
|
|
"country": "CA",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
GAB_GROUP_KWARGS = {
|
|
"name": "iran group (test)",
|
|
"platform_id": "10001",
|
|
"category": "test",
|
|
"platform": "Gab",
|
|
"url": "https://gab.com/groups/10001",
|
|
"screenname": "iran group",
|
|
"country": "IR",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": True,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
GETTR_CHANNEL_KWARGS = {
|
|
"name": "LizardRepublic (test)",
|
|
"platform_id": "lizardrepublic",
|
|
"category": "test",
|
|
"platform": "Gettr",
|
|
"url": "https://www.gettr.com/user/lizardrepublic",
|
|
"screenname": "lizardrepublic",
|
|
"country": "US",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
INSTAGRAM_CHANNEL_KWARGS = {
|
|
"name": "borland.88 (test)",
|
|
"platform_id": "borland.88",
|
|
"category": "test",
|
|
"platform": "Instagram",
|
|
"url": "https://www.instagram.com/borland.88/",
|
|
"screenname": "borland.88",
|
|
"country": "UA",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
ODYSEE_CHANNEL_KWARGS = {
|
|
"name": "Mak1n' Bacon (test)",
|
|
"platform_id": "Mak1nBacon",
|
|
"category": "test",
|
|
"platform": "Odysee",
|
|
"url": "https://odysee.com/@Mak1nBacon",
|
|
"screenname": "Mak1nBacon",
|
|
"country": "US",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
RUMBLE_CHANNEL_KWARGS = {
|
|
"name": "we are uploading videos wow products (test)",
|
|
"platform_id": "c-916305",
|
|
"category": "test",
|
|
"platform": "Rumble",
|
|
"url": "https://rumble.com/c/c-916305",
|
|
"screenname": "we are uploading",
|
|
"country": "CA",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
TELEGRAM_CHANNEL_KWARGS = {
|
|
"name": "Бутылка (test)",
|
|
"platform_id": "-1001760492118",
|
|
"category": "test",
|
|
"platform": "Telegram",
|
|
"url": "https://t.me/butylka1488",
|
|
"screenname": "butylka1488",
|
|
"country": "RU",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
TWITTER_CHANNEL_KWARGS = {
|
|
"name": "L Weber (test)",
|
|
"platform_id": "1424979017749442595",
|
|
"category": "test",
|
|
"platform": "Twitter",
|
|
"url": "https://twitter.com/LWeber33662141",
|
|
"screenname": "LWeber33662141",
|
|
"country": "US",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
VKONTAKTE_CHANNEL_KWARGS = {
|
|
"name": "Wwg1wgA (test)",
|
|
"platform_id": "club201278078",
|
|
"category": "test",
|
|
"platform": "Vkontakte",
|
|
"url": "https://vk.com/club201278078",
|
|
"screenname": "Wwg1wgA",
|
|
"country": "FR",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
YOUTUBE_CHANNEL_KWARGS = {
|
|
"name": "AnEs87 (test)",
|
|
"platform_id": "UCP6exBqGoxGLv_pM9Dxk2pA",
|
|
"category": "test",
|
|
"platform": "Youtube",
|
|
"url": "https://www.youtube.com/channel/UCP6exBqGoxGLv_pM9Dxk2pA",
|
|
"screenname": "AnEs87",
|
|
"country": "SV",
|
|
"influencer": None,
|
|
"public": True,
|
|
"chat": False,
|
|
"notes": "",
|
|
"source": "researcher",
|
|
}
|
|
|
|
|
|
@pytest.fixture(scope="package")
|
|
def engine(tmpdir_factory):
|
|
"""Initialize a SQLite database and SQLAlchemy engine to be used for all
|
|
tests in the package"""
|
|
|
|
engine = create_engine(os.environ["TEST_DB"])
|
|
|
|
return engine
|
|
|
|
|
|
@pytest.fixture(scope="package")
|
|
def session(engine):
|
|
"""Initialize a SQLAlchemy session to be used for all tests in the package"""
|
|
|
|
sessionfactory = sessionmaker()
|
|
sessionfactory.configure(bind=engine)
|
|
return sessionfactory()
|
|
|
|
|
|
@pytest.fixture(scope="package")
|
|
def controller(engine):
|
|
"""Initialize ScraperController to be used for all tests in the package."""
|
|
|
|
scraper_controller = ScraperController()
|
|
scraper_controller.connect_to_db(engine)
|
|
|
|
return scraper_controller
|
|
|
|
|
|
@pytest.fixture(scope="package")
|
|
def etl_controller(engine):
|
|
"""Initialize ETLController to be used for all tests in the package."""
|
|
|
|
etl_controller = ETLController()
|
|
etl_controller.connect_to_db(engine)
|
|
|
|
return etl_controller
|
|
|
|
|
|
@pytest.fixture(scope="package")
|
|
def channel_kwargs():
|
|
"""Define keyword arguments to use for defining test channels for each
|
|
platform to be scraped.
|
|
"""
|
|
|
|
return {
|
|
"bitchute": BITCHUTE_CHANNEL_KWARGS,
|
|
"gab": GAB_CHANNEL_KWARGS,
|
|
"gab_group": GAB_GROUP_KWARGS,
|
|
"gettr": GETTR_CHANNEL_KWARGS,
|
|
"instagram": INSTAGRAM_CHANNEL_KWARGS,
|
|
"odysee": ODYSEE_CHANNEL_KWARGS,
|
|
"rumble": RUMBLE_CHANNEL_KWARGS,
|
|
"telegram": TELEGRAM_CHANNEL_KWARGS,
|
|
"twitter": TWITTER_CHANNEL_KWARGS,
|
|
"vkontakte": VKONTAKTE_CHANNEL_KWARGS,
|
|
"youtube": YOUTUBE_CHANNEL_KWARGS,
|
|
}
|