mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-07 19:08:35 +03:00
refactored tests to reduce redundancy, got tests workig for Telegram, Bitchute, Gettr, and Rumble
This commit is contained in:
@@ -496,8 +496,6 @@ class ScraperController:
|
||||
session = self.session()
|
||||
|
||||
while True:
|
||||
# # DEBUG
|
||||
# assert 0
|
||||
self.archive_unarchived_media_batch(self, session=session, chronological=chronological)
|
||||
|
||||
|
||||
@@ -566,6 +564,8 @@ class ScraperController:
|
||||
"""Drop all data from the connected SQLAlchemy database.
|
||||
"""
|
||||
|
||||
self.session.close_all()
|
||||
|
||||
mapper_registry.metadata.drop_all(bind=self.engine)
|
||||
self.connect_to_db(self.engine)
|
||||
|
||||
|
||||
@@ -24,7 +24,8 @@ class BitchuteScraper(Scraper):
|
||||
|
||||
return username
|
||||
|
||||
@logger.catch
|
||||
# @logger.catch
|
||||
@logger.catch(reraise = True)
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
76
tests/base.py
Normal file
76
tests/base.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import pytest
|
||||
from sqlalchemy.sql import text
|
||||
|
||||
from cisticola.base import Post, Channel, ChannelInfo, Media, ScraperResult, RawChannelInfo
|
||||
from cisticola.scraper import (
|
||||
TelegramTelethonScraper,
|
||||
BitchuteScraper,
|
||||
GettrScraper,
|
||||
RumbleScraper)
|
||||
from cisticola.transformer import (
|
||||
TelegramTelethonTransformer,
|
||||
BitchuteTransformer,
|
||||
GettrTransformer,
|
||||
RumbleTransformer)
|
||||
|
||||
CONTROLLERS = {
|
||||
'telegram' : {
|
||||
'scraper': TelegramTelethonScraper,
|
||||
'transformer': TelegramTelethonTransformer
|
||||
},
|
||||
'bitchute': {
|
||||
'scraper': BitchuteScraper,
|
||||
'transformer': BitchuteTransformer
|
||||
},
|
||||
'gettr': {
|
||||
'scraper': GettrScraper,
|
||||
'transformer': GettrTransformer
|
||||
},
|
||||
'rumble': {
|
||||
'scraper': RumbleScraper,
|
||||
'transformer': RumbleTransformer
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize('platform', ['telegram','bitchute', 'gettr', 'rumble'])
|
||||
def test_scraper_and_transformer(platform, session, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
controller.remove_all_scrapers()
|
||||
|
||||
# necessary for comments/replies to be processed correctly
|
||||
session.execute(text('INSERT INTO posts(id) VALUES (-1)'))
|
||||
session.commit()
|
||||
|
||||
channels = [Channel(**channel_kwargs[platform])]
|
||||
scraper = CONTROLLERS[platform]['scraper']
|
||||
controller.register_scraper(scraper = scraper())
|
||||
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
controller.scrape_all_channel_info()
|
||||
controller.archive_unarchived_media_batch()
|
||||
|
||||
raw_posts = session.query(ScraperResult).all()
|
||||
raw_channel_info = session.query(RawChannelInfo).all()
|
||||
archived_urls = session.query(ScraperResult.archived_urls).order_by(ScraperResult.date_archived.desc()).first()
|
||||
|
||||
assert len(raw_posts) > 0
|
||||
assert len(raw_channel_info) > 0
|
||||
assert len(archived_urls) > 0
|
||||
|
||||
controller.remove_all_scrapers()
|
||||
|
||||
transformer = CONTROLLERS[platform]['transformer']
|
||||
|
||||
etl_controller.register_transformer(transformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
etl_controller.transform_all_untransformed_media()
|
||||
|
||||
posts = session.query(Post).all()
|
||||
channel_info = session.query(ChannelInfo).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) > 0
|
||||
assert len(channel_info) > 0
|
||||
assert len(media) > 0
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
import pytest
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from cisticola.scraper import ScraperController
|
||||
from cisticola.transformer import ETLController
|
||||
@@ -105,12 +106,12 @@ RUMBLE_CHANNEL_KWARGS = {
|
||||
'source': 'researcher'}
|
||||
|
||||
TELEGRAM_CHANNEL_KWARGS = {
|
||||
'name': 'Star Game (test)',
|
||||
'platform_id': "-1001866374682",
|
||||
'name': 'Бутылка (test)',
|
||||
'platform_id': "-1001760492118",
|
||||
'category': 'test',
|
||||
'platform': 'Telegram',
|
||||
'url': 'https://t.me/stargameinfo',
|
||||
'screenname': 'stargameinfo',
|
||||
'url': 'https://t.me/butylka1488',
|
||||
'screenname': 'butylka1488',
|
||||
'country': 'RU',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
@@ -170,6 +171,13 @@ def engine(tmpdir_factory):
|
||||
|
||||
return engine
|
||||
|
||||
@pytest.fixture(scope='package')
|
||||
def session(engine):
|
||||
"""Initialize a SQLAlchemy session to be used for all tests in the package"""
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
return sessionfactory()
|
||||
|
||||
@pytest.fixture(scope='package')
|
||||
def controller(engine):
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import BitchuteScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_bitchute_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['bitchute'])]
|
||||
controller.register_scraper(scraper = BitchuteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_bitchute_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_bitchute_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['bitchute'])]
|
||||
controller.register_scraper(scraper = BitchuteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_bitchute_profile(channel_kwargs):
|
||||
|
||||
scraper = BitchuteScraper()
|
||||
channel = Channel(**channel_kwargs['bitchute'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,55 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import GabScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_gab_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_gab_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_gab_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_gab_profile(channel_kwargs):
|
||||
|
||||
scraper = GabScraper()
|
||||
channel = Channel(**channel_kwargs['gab'])
|
||||
scraper.get_profile(channel=channel)
|
||||
|
||||
def test_scrape_gab_group_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab_group'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_gab_group(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab_group'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_gab_group_profile(channel_kwargs):
|
||||
|
||||
scraper = GabScraper()
|
||||
channel = Channel(**channel_kwargs['gab_group'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import GettrScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_gettr_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gettr'])]
|
||||
controller.register_scraper(scraper = GettrScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_gettr_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_gettr_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['gettr'])]
|
||||
controller.register_scraper(scraper = GettrScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_gettr_profile(channel_kwargs):
|
||||
|
||||
scraper = GettrScraper()
|
||||
channel = Channel(**channel_kwargs['gettr'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import InstagramScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_instagram_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['instagram'])]
|
||||
controller.register_scraper(scraper = InstagramScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_instagram_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_instagram_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['instagram'])]
|
||||
controller.register_scraper(scraper = InstagramScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_instagram_profile(channel_kwargs):
|
||||
|
||||
scraper = InstagramScraper()
|
||||
channel = Channel(**channel_kwargs['instagram'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import OdyseeScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_odysee_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['odysee'])]
|
||||
controller.register_scraper(scraper = OdyseeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_odysee_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_odysee_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['odysee'])]
|
||||
controller.register_scraper(scraper = OdyseeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_odysee_profile(channel_kwargs):
|
||||
|
||||
scraper = OdyseeScraper()
|
||||
channel = Channel(**channel_kwargs['odysee'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import RumbleScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_rumble_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['rumble'])]
|
||||
controller.register_scraper(scraper = RumbleScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_rumble_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_rumble_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['rumble'])]
|
||||
controller.register_scraper(scraper = RumbleScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_rumble_profile(channel_kwargs):
|
||||
|
||||
scraper = RumbleScraper()
|
||||
channel = Channel(**channel_kwargs['rumble'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,38 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TelegramTelethonScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
|
||||
controller.remove_all_scrapers()
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramTelethonScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_telegram_telethon_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media_batch()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
controller.remove_all_scrapers()
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramTelethonScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_telegram_telethon_profile(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
controller.remove_all_scrapers()
|
||||
|
||||
scraper = TelegramTelethonScraper()
|
||||
channel = Channel(**channel_kwargs['telegram'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TwitterScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_twitter_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['twitter'])]
|
||||
controller.register_scraper(scraper = TwitterScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_twitter_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_twitter_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['twitter'])]
|
||||
controller.register_scraper(scraper = TwitterScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_twitter_profile(channel_kwargs):
|
||||
|
||||
scraper = TwitterScraper()
|
||||
channel = Channel(**channel_kwargs['twitter'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import VkontakteScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_vkontakte_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['vkontakte'])]
|
||||
controller.register_scraper(scraper = VkontakteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_vkontakte_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_vkontakte_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['vkontakte'])]
|
||||
controller.register_scraper(scraper = VkontakteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_vkontakte_profile(channel_kwargs):
|
||||
|
||||
scraper = VkontakteScraper()
|
||||
channel = Channel(**channel_kwargs['vkontakte'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import YoutubeScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_youtube_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['youtube'])]
|
||||
controller.register_scraper(scraper = YoutubeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_youtube_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_youtube_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['youtube'])]
|
||||
controller.register_scraper(scraper = YoutubeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_youtube_profile(channel_kwargs):
|
||||
|
||||
scraper = YoutubeScraper()
|
||||
channel = Channel(**channel_kwargs['youtube'])
|
||||
scraper.get_profile(channel=channel)
|
||||
@@ -1,35 +0,0 @@
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import BitchuteScraper
|
||||
from cisticola.transformer import BitchuteTransformer
|
||||
from cisticola.base import Post, Media
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_etl_bitchute(engine, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['bitchute'])]
|
||||
controller.register_scraper(scraper = BitchuteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
controller.scrape_all_channel_info()
|
||||
|
||||
etl_controller.register_transformer(BitchuteTransformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
session = sessionfactory()
|
||||
|
||||
posts = session.query(Post).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) == 5
|
||||
# assert len(media) == 0
|
||||
|
||||
assert 'Pendant are something that the advanced ladies can fuse in her every day look' in posts[0].content
|
||||
# assert json.loads(media[0].exif)['Composite:ImageSize'] == "826 728"
|
||||
@@ -1,35 +0,0 @@
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import GettrScraper
|
||||
from cisticola.transformer import GettrTransformer
|
||||
from cisticola.base import Post, Media
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_etl_gettr(engine, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['gettr'])]
|
||||
controller.register_scraper(scraper = GettrScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
controller.scrape_all_channel_info()
|
||||
|
||||
etl_controller.register_transformer(GettrTransformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
session = sessionfactory()
|
||||
|
||||
posts = session.query(Post).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) == 23
|
||||
# assert len(media) == 0
|
||||
|
||||
assert 'Nigerian gender studies' in posts[-1].content
|
||||
# assert json.loads(media[0].exif)['Composite:ImageSize'] == "826 728"
|
||||
@@ -1,35 +0,0 @@
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import RumbleScraper
|
||||
from cisticola.transformer import RumbleTransformer
|
||||
from cisticola.base import Post, Media
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_etl_rumble(engine, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['rumble'])]
|
||||
controller.register_scraper(scraper = RumbleScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
controller.scrape_all_channel_info()
|
||||
|
||||
etl_controller.register_transformer(RumbleTransformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
session = sessionfactory()
|
||||
|
||||
posts = session.query(Post).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) == 7
|
||||
# assert len(media) == 0
|
||||
|
||||
assert '#whitegold #icedoutcuban' in posts[0].content
|
||||
# assert json.loads(media[0].exif)['Composite:ImageSize'] == "826 728"
|
||||
@@ -1,35 +0,0 @@
|
||||
from sqlalchemy.orm import sessionmaker, with_polymorphic
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TelegramTelethonScraper
|
||||
from cisticola.transformer import TelegramTelethonTransformer
|
||||
from cisticola.base import Post, Media
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_etl_telegram_telethon(engine, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramTelethonScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
controller.scrape_all_channel_info()
|
||||
|
||||
etl_controller.register_transformer(TelegramTelethonTransformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
session = sessionfactory()
|
||||
|
||||
posts = session.query(Post).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) == 19
|
||||
# assert len(media) == 13
|
||||
|
||||
assert posts[16].content == "Taking pre-orders now"
|
||||
# assert json.loads(media[0].exif)['Composite:ImageSize'] == "1028 1280"
|
||||
@@ -1,35 +0,0 @@
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TwitterScraper
|
||||
from cisticola.transformer import TwitterTransformer
|
||||
from cisticola.base import Post, Media
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_etl_twitter(engine, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['twitter'])]
|
||||
controller.register_scraper(scraper = TwitterScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
controller.scrape_all_channel_info()
|
||||
|
||||
etl_controller.register_transformer(TwitterTransformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
session = sessionfactory()
|
||||
|
||||
posts = session.query(Post).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) == 12
|
||||
assert len(media) == 8
|
||||
|
||||
assert posts[2].content == "BARN"
|
||||
assert json.loads(media[0].exif)['Composite:ImageSize'] == "826 728"
|
||||
@@ -1,35 +0,0 @@
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import VkontakteScraper
|
||||
from cisticola.transformer import VkontakteTransformer
|
||||
from cisticola.base import Post, Media
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_etl_vkontakte(engine, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['vkontakte'])]
|
||||
controller.register_scraper(scraper = VkontakteScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
controller.scrape_all_channel_info()
|
||||
|
||||
etl_controller.register_transformer(VkontakteTransformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
session = sessionfactory()
|
||||
|
||||
posts = session.query(Post).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) == 23
|
||||
# assert len(media) == 0
|
||||
|
||||
assert 'Nigerian gender studies' in posts[-1].content
|
||||
# assert json.loads(media[0].exif)['Composite:ImageSize'] == "826 728"
|
||||
Reference in New Issue
Block a user