Add option to clear registered scrapers, necessary for tests

This commit is contained in:
Logan Williams
2022-03-31 16:17:35 +02:00
parent c8d1b96e3f
commit 7f87b03de5
5 changed files with 9 additions and 6 deletions

2
.gitignore vendored
View File

@@ -15,7 +15,7 @@ service_account.json
# Unit test / coverage reports
reports
.coverage
.coverage*
.cache
.pytest_cache/
cover/

6
app.py
View File

@@ -1,12 +1,12 @@
import argparse
from loguru import logger
import gspread
from sqlalchemy import create_engine
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker
import os
import time
from cisticola.base import Channel, mapper_registry
from cisticola.base import Channel, RawChannelInfo, mapper_registry
from cisticola.scraper import (
ScraperController,
BitchuteScraper,
@@ -82,7 +82,7 @@ def get_scraper_controller():
controller.connect_to_db(engine)
scrapers = [
# TelegramTelethonScraper(),
TelegramTelethonScraper(),
TwitterScraper()]
controller.register_scrapers(scrapers)

View File

@@ -303,6 +303,9 @@ class ScraperController:
"""
self.scrapers.extend(scraper)
def remove_all_scrapers(self):
self.scrapers = []
def scrape_all_channels(self, archive_media: bool = True):
if self.session is None:
logger.error("No DB session")

View File

@@ -37,8 +37,6 @@ class TwitterScraper(Scraper):
if tweet.media:
media_list += tweet.media
print(tweet.json())
if tweet.retweetedTweet and hasattr(tweet.retweetedTweet, 'media') and tweet.retweetedTweet.media:
media_list += tweet.retweetedTweet.media

View File

@@ -4,6 +4,7 @@ from cisticola.base import Channel
from cisticola.scraper import TelegramTelethonScraper
def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
controller.remove_all_scrapers()
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramTelethonScraper())
@@ -13,6 +14,7 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
controller.reset_db()
controller.remove_all_scrapers()
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramTelethonScraper())