mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-12 21:38:33 +03:00
updated transformer tests
This commit is contained in:
34
tests/transformer/telegram_telethon.py
Normal file
34
tests/transformer/telegram_telethon.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from sqlalchemy.orm import sessionmaker, with_polymorphic
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TelegramTelethonScraper
|
||||
from cisticola.transformer import TelegramTelethonTransformer
|
||||
from cisticola.base import Post, Media
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_etl_telegram_telethon(engine, controller, etl_controller, channel_kwargs):
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramTelethonScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
etl_controller.register_transformer(TelegramTelethonTransformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
session = sessionfactory()
|
||||
|
||||
posts = session.query(Post).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) == 19
|
||||
assert len(media) == 13
|
||||
|
||||
assert posts[16].content == "Taking pre-orders now"
|
||||
assert json.loads(media[0].exif)['Composite:ImageSize'] == "1028 1280"
|
||||
@@ -1,4 +1,4 @@
|
||||
from sqlalchemy.orm import sessionmaker, with_polymorphic
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import json
|
||||
|
||||
import pytest
|
||||
@@ -18,6 +18,7 @@ def test_scrape_etl_twitter(engine, controller, etl_controller, channel_kwargs):
|
||||
|
||||
etl_controller.register_transformer(TwitterTransformer())
|
||||
etl_controller.transform_all_untransformed()
|
||||
etl_controller.transform_all_untransformed_info()
|
||||
|
||||
sessionfactory = sessionmaker()
|
||||
sessionfactory.configure(bind=engine)
|
||||
@@ -26,8 +27,8 @@ def test_scrape_etl_twitter(engine, controller, etl_controller, channel_kwargs):
|
||||
posts = session.query(Post).all()
|
||||
media = session.query(Media).all()
|
||||
|
||||
assert len(posts) == 10
|
||||
assert len(media) == 7
|
||||
assert len(posts) == 12
|
||||
assert len(media) == 4
|
||||
|
||||
assert posts[-1].content == "BARN"
|
||||
assert json.loads(media[-1].exif)['Composite:ImageSize'] == "826 728"
|
||||
assert posts[2].content == "BARN"
|
||||
assert json.loads(media[0].exif)['Composite:ImageSize'] == "826 728"
|
||||
Reference in New Issue
Block a user