diff --git a/.gitignore b/.gitignore index a5b32c9..65dd3d7 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,7 @@ service_account.json # Unit test / coverage reports reports -.coverage +.coverage* .cache .pytest_cache/ cover/ diff --git a/app.py b/app.py index e7f9969..a55faab 100644 --- a/app.py +++ b/app.py @@ -1,12 +1,12 @@ import argparse from loguru import logger import gspread -from sqlalchemy import create_engine +from sqlalchemy import create_engine, func from sqlalchemy.orm import sessionmaker import os import time -from cisticola.base import Channel, mapper_registry +from cisticola.base import Channel, RawChannelInfo, mapper_registry from cisticola.scraper import ( ScraperController, BitchuteScraper, @@ -82,7 +82,7 @@ def get_scraper_controller(): controller.connect_to_db(engine) scrapers = [ - # TelegramTelethonScraper(), + TelegramTelethonScraper(), TwitterScraper()] controller.register_scrapers(scrapers) diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 4ab84b5..fb25b58 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -303,6 +303,9 @@ class ScraperController: """ self.scrapers.extend(scraper) + def remove_all_scrapers(self): + self.scrapers = [] + def scrape_all_channels(self, archive_media: bool = True): if self.session is None: logger.error("No DB session") diff --git a/cisticola/scraper/twitter.py b/cisticola/scraper/twitter.py index b44c794..6ed37db 100644 --- a/cisticola/scraper/twitter.py +++ b/cisticola/scraper/twitter.py @@ -37,8 +37,6 @@ class TwitterScraper(Scraper): if tweet.media: media_list += tweet.media - print(tweet.json()) - if tweet.retweetedTweet and hasattr(tweet.retweetedTweet, 'media') and tweet.retweetedTweet.media: media_list += tweet.retweetedTweet.media diff --git a/tests/scraper/telegram_telethon.py b/tests/scraper/telegram_telethon.py index c6fb399..ee994eb 100644 --- a/tests/scraper/telegram_telethon.py +++ b/tests/scraper/telegram_telethon.py @@ -4,6 +4,7 @@ from cisticola.base import Channel from cisticola.scraper import TelegramTelethonScraper def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs): + controller.remove_all_scrapers() channels = [Channel(**channel_kwargs['telegram'])] controller.register_scraper(scraper = TelegramTelethonScraper()) @@ -13,6 +14,7 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs): def test_scrape_telegram_telethon_channel(controller, channel_kwargs): controller.reset_db() + controller.remove_all_scrapers() channels = [Channel(**channel_kwargs['telegram'])] controller.register_scraper(scraper = TelegramTelethonScraper())