updated transformer tests

This commit is contained in:
Tristan Lee
2022-05-19 16:34:19 -05:00
parent 424c063ef2
commit f0414a4f4d
7 changed files with 187 additions and 1253 deletions

View File

@@ -0,0 +1,34 @@
from sqlalchemy.orm import sessionmaker, with_polymorphic
import json
import pytest
from cisticola.base import Channel
from cisticola.scraper import TelegramTelethonScraper
from cisticola.transformer import TelegramTelethonTransformer
from cisticola.base import Post, Media
@pytest.mark.media
def test_scrape_etl_telegram_telethon(engine, controller, etl_controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramTelethonScraper())
controller.scrape_channels(channels = channels, archive_media = True)
etl_controller.register_transformer(TelegramTelethonTransformer())
etl_controller.transform_all_untransformed()
etl_controller.transform_all_untransformed_info()
sessionfactory = sessionmaker()
sessionfactory.configure(bind=engine)
session = sessionfactory()
posts = session.query(Post).all()
media = session.query(Media).all()
assert len(posts) == 19
assert len(media) == 13
assert posts[16].content == "Taking pre-orders now"
assert json.loads(media[0].exif)['Composite:ImageSize'] == "1028 1280"

View File

@@ -1,4 +1,4 @@
from sqlalchemy.orm import sessionmaker, with_polymorphic
from sqlalchemy.orm import sessionmaker
import json
import pytest
@@ -18,6 +18,7 @@ def test_scrape_etl_twitter(engine, controller, etl_controller, channel_kwargs):
etl_controller.register_transformer(TwitterTransformer())
etl_controller.transform_all_untransformed()
etl_controller.transform_all_untransformed_info()
sessionfactory = sessionmaker()
sessionfactory.configure(bind=engine)
@@ -26,8 +27,8 @@ def test_scrape_etl_twitter(engine, controller, etl_controller, channel_kwargs):
posts = session.query(Post).all()
media = session.query(Media).all()
assert len(posts) == 10
assert len(media) == 7
assert len(posts) == 12
assert len(media) == 4
assert posts[-1].content == "BARN"
assert json.loads(media[-1].exif)['Composite:ImageSize'] == "826 728"
assert posts[2].content == "BARN"
assert json.loads(media[0].exif)['Composite:ImageSize'] == "826 728"