From 6cf3b8842d0e9d7205edf149fbbd51db4a124aa5 Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Wed, 9 Mar 2022 13:19:35 -0600 Subject: [PATCH] renamed 'archive_media' and 'media' to avoid name collision, changed scope of test fixture controller to 'function' so that db is fresh for each executed test --- cisticola/scraper/base.py | 8 +-- cisticola/scraper/bitchute.py | 10 +-- cisticola/scraper/gab.py | 12 ++-- cisticola/scraper/gettr.py | 16 ++--- cisticola/scraper/odysee.py | 10 +-- cisticola/scraper/rumble.py | 12 ++-- cisticola/scraper/telegram_snscrape.py | 8 +-- cisticola/scraper/telegram_telethon.py | 6 +- cisticola/scraper/twitter.py | 40 ++++++------ russian_telegram_ingest.py | 86 +++++++++++++------------- test.py | 9 ++- tests/conftest.py | 2 +- tests/scraper/bitchute.py | 6 +- tests/scraper/gab.py | 4 +- tests/scraper/gettr.py | 4 +- tests/scraper/odysee.py | 4 +- tests/scraper/rumble.py | 4 +- tests/scraper/telegram_snscrape.py | 4 +- tests/scraper/telegram_telethon.py | 6 +- tests/scraper/twitter.py | 4 +- 20 files changed, 130 insertions(+), 125 deletions(-) diff --git a/cisticola/scraper/base.py b/cisticola/scraper/base.py index 9b717d5..ea68f70 100644 --- a/cisticola/scraper/base.py +++ b/cisticola/scraper/base.py @@ -69,7 +69,7 @@ class Scraper: return blob, content_type, key - def archive_media(self, blob: bytes, content_type: str, key: str) -> str: + def archive_blob(self, blob: bytes, content_type: str, key: str) -> str: filename = self.__version__.replace(' ', '_') + '/' + key @@ -83,7 +83,7 @@ class Scraper: def can_handle(self, channel: Channel) -> bool: raise NotImplementedError - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: raise NotImplementedError @@ -103,7 +103,7 @@ class ScraperController: self.scrapers.extend(scraper) @logger.catch - def scrape_channels(self, channels: List[Channel], media: bool = True): + def scrape_channels(self, channels: List[Channel], archive_media: bool = True): if self.session is None: logger.error("No DB session") return @@ -128,7 +128,7 @@ class ScraperController: else: since = None - posts = scraper.get_posts(channel, since=since, media=media) + posts = scraper.get_posts(channel, since=since, archive_media=archive_media) for post in posts: session.add(post) diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py index 9e0957b..a5292aa 100644 --- a/cisticola/scraper/bitchute.py +++ b/cisticola/scraper/bitchute.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone import time import re from html.parser import HTMLParser @@ -22,7 +22,7 @@ class BitchuteScraper(Scraper): return username - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: session = requests.Session() session.headers.update(self.headers) @@ -43,11 +43,11 @@ class BitchuteScraper(Scraper): archived_urls = {} - if media: + if archive_media: if 'video_url' in post: url = post['video_url'] media_blob, content_type, key = self.url_to_blob(url) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[url] = archived_url yield ScraperResult( @@ -56,7 +56,7 @@ class BitchuteScraper(Scraper): channel=channel.id, platform_id=post['id'], date=datetime.fromtimestamp(post['timestamp']), - date_archived=datetime.now(), + date_archived=datetime.now(timezone.utc), raw_data=json.dumps(post), archived_urls=archived_urls) diff --git a/cisticola/scraper/gab.py b/cisticola/scraper/gab.py index 16f058a..910ebc2 100644 --- a/cisticola/scraper/gab.py +++ b/cisticola/scraper/gab.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone import json from typing import Generator @@ -16,7 +16,7 @@ class GabScraper(Scraper): return username - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: client = Garc(profile = 'main') username = GabScraper.get_username_from_url(channel.url) @@ -29,7 +29,7 @@ class GabScraper(Scraper): media_urls = [] archived_urls = {} - if media: + if archive_media: media_urls.extend([p['url'] for p in post['media_attachments']]) @@ -38,7 +38,7 @@ class GabScraper(Scraper): for url in media_urls: media_blob, content_type, key = self.url_to_blob(url) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[url] = archived_url yield ScraperResult( @@ -46,8 +46,8 @@ class GabScraper(Scraper): platform="Gab", channel=channel.id, platform_id=post['id'], - date=datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo = None), - date_archived=datetime.now(), + date=datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc), + date_archived=datetime.now(timezone.utc), raw_data=json.dumps(post), archived_urls=archived_urls) diff --git a/cisticola/scraper/gettr.py b/cisticola/scraper/gettr.py index 8f4cc19..3cd069e 100644 --- a/cisticola/scraper/gettr.py +++ b/cisticola/scraper/gettr.py @@ -1,6 +1,6 @@ -from datetime import datetime +from datetime import datetime, timezone import json -from typing import Generator, Tuple +from typing import Generator from urllib.parse import urlparse from gogettr import PublicClient @@ -19,7 +19,7 @@ class GettrScraper(Scraper): return username - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: client = PublicClient() username = GettrScraper.get_username_from_url(channel.url) scraper = client.user_activity(username=username, type="posts") @@ -30,25 +30,25 @@ class GettrScraper(Scraper): archived_urls = {} - if media: + if archive_media: if 'imgs' in post: for img in post['imgs']: url = "https://media.gettr.com/" + img media_blob, content_type, key = self.url_to_blob(url) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[img] = archived_url if 'main' in post: url = "https://media.gettr.com/" + post['main'] media_blob, content_type, key = self.url_to_blob(url) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[post['main']] = archived_url if 'vid' in post: url = "https://media.gettr.com/" + post['vid'] media_blob, content_type, key = self.m3u8_url_to_blob(url) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[post['vid']] = archived_url yield ScraperResult( @@ -57,7 +57,7 @@ class GettrScraper(Scraper): channel=channel.id, platform_id=post['_id'], date=datetime.fromtimestamp(post['cdate']/1000.), - date_archived=datetime.now(), + date_archived=datetime.now(timezone.utc), raw_data=json.dumps(post), archived_urls=archived_urls) diff --git a/cisticola/scraper/odysee.py b/cisticola/scraper/odysee.py index 27921bd..61ed9ca 100644 --- a/cisticola/scraper/odysee.py +++ b/cisticola/scraper/odysee.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone import json from typing import Generator from urllib.parse import urlparse @@ -19,7 +19,7 @@ class OdyseeScraper(Scraper): return username - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: username = OdyseeScraper.get_username_from_url(channel.url) odysee_channel = OdyseeChannel(channel_name = username) @@ -32,7 +32,7 @@ class OdyseeScraper(Scraper): archived_urls = {} - if media: + if archive_media: url = video.info['streaming_url'] # Check if file is a video file or an m3u8 file @@ -42,7 +42,7 @@ class OdyseeScraper(Scraper): else: media_blob, content_type, key = self.url_to_blob(url) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[url] = archived_url all_comments = video.get_all_comments() @@ -53,7 +53,7 @@ class OdyseeScraper(Scraper): channel=channel.id, platform_id=video.info['claim_id'], date=datetime.fromtimestamp(video.info['created']), - date_archived=datetime.now(), + date_archived=datetime.now(timezone.utc), raw_data=json.dumps(video.info), archived_urls=archived_urls) diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py index e4316d5..dbb4194 100644 --- a/cisticola/scraper/rumble.py +++ b/cisticola/scraper/rumble.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone import json from typing import Generator, Tuple import tempfile @@ -22,7 +22,7 @@ class RumbleScraper(Scraper): return username - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: username = RumbleScraper.get_username_from_url(channel.url) scraper = get_channel_videos(username) @@ -33,12 +33,12 @@ class RumbleScraper(Scraper): archived_urls = {} - if media: + if archive_media: url = post['media_url'] media_blob, content_type, key = self.url_to_blob(url) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[post['media_url']] = archived_url yield ScraperResult( @@ -46,8 +46,8 @@ class RumbleScraper(Scraper): platform="Rumble", channel=channel.id, platform_id=post['media_url'].split('/')[-2], - date=datetime.fromisoformat(post['datetime']).replace(tzinfo=None), - date_archived=datetime.now(), + date=datetime.fromisoformat(post['datetime']).replace(tzinfo=timezone.utc), + date_archived=datetime.now(timezone.utc), raw_data=json.dumps(post), archived_urls=archived_urls) diff --git a/cisticola/scraper/telegram_snscrape.py b/cisticola/scraper/telegram_snscrape.py index 458f726..3f3f45d 100644 --- a/cisticola/scraper/telegram_snscrape.py +++ b/cisticola/scraper/telegram_snscrape.py @@ -14,7 +14,7 @@ class TelegramSnscrapeScraper(Scraper): if channel.platform == "Telegram" and channel.public and not channel.chat: return True - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: scr = snscrape.modules.telegram.TelegramChannelScraper( channel.screenname) @@ -29,18 +29,18 @@ class TelegramSnscrapeScraper(Scraper): archived_urls = {} - if media: + if archive_media: for image_url in post.images: logger.debug(f'Archiving image: {image_url}') media_blob, content_type, key = self.url_to_blob(image_url) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[image_url] = archived_url if post.video: logger.debug(f'Archiving video: {post.video}') media_blob, content_type, key = self.url_to_blob(post.video) - archived_url = self.archive_media(media_blob, content_type, key) + archived_url = self.archive_blob(media_blob, content_type, key) archived_urls[post.video] = archived_url yield ScraperResult( diff --git a/cisticola/scraper/telegram_telethon.py b/cisticola/scraper/telegram_telethon.py index 5eb17e7..76d68f2 100644 --- a/cisticola/scraper/telegram_telethon.py +++ b/cisticola/scraper/telegram_telethon.py @@ -26,7 +26,7 @@ class TelegramTelethonScraper(Scraper): if channel.platform == "Telegram" and channel.public and not channel.chat: return True - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: username = self.get_username_from_url(channel.url) @@ -47,7 +47,7 @@ class TelegramTelethonScraper(Scraper): archived_urls = {} - if media: + if archive_media: if post.media is not None: with tempfile.TemporaryDirectory() as temp_dir: @@ -61,7 +61,7 @@ class TelegramTelethonScraper(Scraper): blob = f.read() # TODO specify Content-Type - archived_url = self.archive_media(blob = blob, content_type = '', key = output_file_with_ext) + archived_url = self.archive_blob(blob = blob, content_type = '', key = output_file_with_ext) archived_urls[post_url] = archived_url yield ScraperResult( diff --git a/cisticola/scraper/twitter.py b/cisticola/scraper/twitter.py index 19eb33c..b528383 100644 --- a/cisticola/scraper/twitter.py +++ b/cisticola/scraper/twitter.py @@ -12,7 +12,7 @@ class TwitterScraper(Scraper): """An implementation of a Scraper for Twitter, using snscrape library""" __version__ = "TwitterScraper 0.0.1" - def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]: + def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]: scraper = TwitterProfileScraper(channel.platform_id) first = True @@ -28,24 +28,26 @@ class TwitterScraper(Scraper): archived_urls = {} - if tweet.media: - for media in tweet.media: - if type(media) == Video: - variant = max( - [v for v in media.variants if v.bitrate], key=lambda v: v.bitrate) - url = variant.url - elif type(media) == Gif: - url = media.variants[0].url - elif type(media) == Photo: - url = media.fullUrl - else: - logger.warning(f"Could not get media URL of {media}") - url = None + if archive_media: - if url is not None: - media_blob, content_type, key = self.url_to_blob(url) - archived_url = self.archive_media(media_blob, content_type, key) - archived_urls[url] = archived_url + if tweet.media: + for media in tweet.media: + if type(media) == Video: + variant = max( + [v for v in media.variants if v.bitrate], key=lambda v: v.bitrate) + url = variant.url + elif type(media) == Gif: + url = media.variants[0].url + elif type(media) == Photo: + url = media.fullUrl + else: + logger.warning(f"Could not get media URL of {media}") + url = None + + if url is not None: + media_blob, content_type, key = self.url_to_blob(url) + archived_url = self.archive_blob(media_blob, content_type, key) + archived_urls[url] = archived_url yield ScraperResult( scraper=self.__version__, @@ -53,7 +55,7 @@ class TwitterScraper(Scraper): channel=channel.id, platform_id=tweet.id, date=tweet.date, - date_archived=datetime.now(), + date_archived=datetime.now(timezone.utc), raw_data=tweet.json(), archived_urls=archived_urls) diff --git a/russian_telegram_ingest.py b/russian_telegram_ingest.py index 7f490ca..23d3094 100644 --- a/russian_telegram_ingest.py +++ b/russian_telegram_ingest.py @@ -13,48 +13,48 @@ logger.add(sys.stderr, level="INFO") logger.add("../russian_telegram_ingest.log") test_channels = [ - # Channel( - # id=0, - # name="QAnon Россия", - # platform_id=-1001319637748, - # category="Qanon", - # followers=94048, - # platform="Telegram", - # url="https://t.me/qanonrus", - # screenname="qanonrus", - # country="RU", - # influencer=None, - # public=True, - # chat=False, - # notes=""), - # Channel( - # id=1, - # name="The Great Awakening | Q", - # platform_id=-1001325597521, - # category="Qanon", - # followers=5715, - # platform="Telegram", - # url="https://t.me/greatawakin", - # screenname="greatawakin", - # country="RU", - # influencer=None, - # public=True, - # chat=False, - # notes=""), - # Channel( - # id=2, - # name="Великое Пробуждение", - # platform_id=-1001285898079, - # category="Qanon", - # followers=5861, - # platform="Telegram", - # url="https://t.me/greatawakeningrus", - # screenname="greatawakeningrus", - # country="RU", - # influencer=None, - # public=True, - # chat=False, - # notes=""), + Channel( + id=0, + name="QAnon Россия", + platform_id=-1001319637748, + category="Qanon", + followers=94048, + platform="Telegram", + url="https://t.me/qanonrus", + screenname="qanonrus", + country="RU", + influencer=None, + public=True, + chat=False, + notes=""), + Channel( + id=1, + name="The Great Awakening | Q", + platform_id=-1001325597521, + category="Qanon", + followers=5715, + platform="Telegram", + url="https://t.me/greatawakin", + screenname="greatawakin", + country="RU", + influencer=None, + public=True, + chat=False, + notes=""), + Channel( + id=2, + name="Великое Пробуждение", + platform_id=-1001285898079, + category="Qanon", + followers=5861, + platform="Telegram", + url="https://t.me/greatawakeningrus", + screenname="greatawakeningrus", + country="RU", + influencer=None, + public=True, + chat=False, + notes=""), Channel( id=3, name="T🕊Редакция Президент Гордон🕊", @@ -134,5 +134,5 @@ controller.register_scraper(telegram) engine = create_engine('sqlite:///russian_telegram.db') controller.connect_to_db(engine) -controller.scrape_channels(test_channels) +controller.scrape_channels(test_channels, archive_media = False) diff --git a/test.py b/test.py index 21add12..8a2d624 100644 --- a/test.py +++ b/test.py @@ -1,4 +1,5 @@ from sqlalchemy import create_engine +from loguru import logger from cisticola.base import Channel from cisticola.scraper import ( @@ -12,6 +13,8 @@ from cisticola.scraper import ( TelegramTelethonScraper, TwitterScraper) +logger.add("../test.log") + test_channels = [ Channel( id=0, @@ -118,12 +121,12 @@ scrapers = [ OdyseeScraper(), RumbleScraper(), TelegramSnscrapeScraper(), - TwitterScraper() - TelegramTelethonScraper()] + TelegramTelethonScraper(), + TwitterScraper()] controller.register_scrapers(scrapers) engine = create_engine('sqlite:///test3.db') controller.connect_to_db(engine) -controller.scrape_channels(test_channels, media = True) \ No newline at end of file +controller.scrape_channels(test_channels, archive_media = False) \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 161439d..0608903 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -113,7 +113,7 @@ TWITTER_CHANNEL_KWARGS = { #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# -@pytest.fixture(scope='package') +@pytest.fixture(scope='function') def controller(tmpdir_factory): """Initialize ScraperController and SQLite database file to be used for all diff --git a/tests/scraper/bitchute.py b/tests/scraper/bitchute.py index 2071568..bc64c4b 100644 --- a/tests/scraper/bitchute.py +++ b/tests/scraper/bitchute.py @@ -5,10 +5,10 @@ def test_scrape_bitchute_channel_no_media(controller, channel_kwargs): channels = [Channel(**channel_kwargs['bitchute'])] controller.register_scraper(scraper = BitchuteScraper()) - controller.scrape_channels(channels = channels, media = False) - + controller.scrape_channels(channels = channels, archive_media = False) + def test_scrape_bitchute_channel(controller, channel_kwargs): channels = [Channel(**channel_kwargs['bitchute'])] controller.register_scraper(scraper = BitchuteScraper()) - controller.scrape_channels(channels = channels, media = True) + controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/gab.py b/tests/scraper/gab.py index daf100a..29fa34a 100644 --- a/tests/scraper/gab.py +++ b/tests/scraper/gab.py @@ -5,10 +5,10 @@ def test_scrape_gab_channel_no_media(controller, channel_kwargs): channels = [Channel(**channel_kwargs['gab'])] controller.register_scraper(scraper = GabScraper()) - controller.scrape_channels(channels = channels, media = False) + controller.scrape_channels(channels = channels, archive_media = False) def test_scrape_gab_channel(controller, channel_kwargs): channels = [Channel(**channel_kwargs['gab'])] controller.register_scraper(scraper = GabScraper()) - controller.scrape_channels(channels = channels, media = True) + controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/gettr.py b/tests/scraper/gettr.py index 13800c2..186b74c 100644 --- a/tests/scraper/gettr.py +++ b/tests/scraper/gettr.py @@ -5,10 +5,10 @@ def test_scrape_gettr_channel_no_media(controller, channel_kwargs): channels = [Channel(**channel_kwargs['gettr'])] controller.register_scraper(scraper = GettrScraper()) - controller.scrape_channels(channels = channels, media = False) + controller.scrape_channels(channels = channels, archive_media = False) def test_scrape_gettr_channel(controller, channel_kwargs): channels = [Channel(**channel_kwargs['gettr'])] controller.register_scraper(scraper = GettrScraper()) - controller.scrape_channels(channels = channels, media = True) + controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/odysee.py b/tests/scraper/odysee.py index 0fda0a7..8b9f89a 100644 --- a/tests/scraper/odysee.py +++ b/tests/scraper/odysee.py @@ -5,10 +5,10 @@ def test_scrape_odysee_channel_no_media(controller, channel_kwargs): channels = [Channel(**channel_kwargs['odysee'])] controller.register_scraper(scraper = OdyseeScraper()) - controller.scrape_channels(channels = channels, media = False) + controller.scrape_channels(channels = channels, archive_media = False) def test_scrape_odysee_channel(controller, channel_kwargs): channels = [Channel(**channel_kwargs['odysee'])] controller.register_scraper(scraper = OdyseeScraper()) - controller.scrape_channels(channels = channels, media = True) + controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/rumble.py b/tests/scraper/rumble.py index 0f43463..daf59f6 100644 --- a/tests/scraper/rumble.py +++ b/tests/scraper/rumble.py @@ -5,10 +5,10 @@ def test_scrape_rumble_channel_no_media(controller, channel_kwargs): channels = [Channel(**channel_kwargs['rumble'])] controller.register_scraper(scraper = RumbleScraper()) - controller.scrape_channels(channels = channels, media = False) + controller.scrape_channels(channels = channels, archive_media = False) def test_scrape_rumble_channel(controller, channel_kwargs): channels = [Channel(**channel_kwargs['rumble'])] controller.register_scraper(scraper = RumbleScraper()) - controller.scrape_channels(channels = channels, media = True) + controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/telegram_snscrape.py b/tests/scraper/telegram_snscrape.py index 677d949..af25ed7 100644 --- a/tests/scraper/telegram_snscrape.py +++ b/tests/scraper/telegram_snscrape.py @@ -5,10 +5,10 @@ def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs): channels = [Channel(**channel_kwargs['telegram'])] controller.register_scraper(scraper = TelegramSnscrapeScraper()) - controller.scrape_channels(channels = channels, media = False) + controller.scrape_channels(channels = channels, archive_media = False) def test_scrape_telegram_snscrape_channel(controller, channel_kwargs): channels = [Channel(**channel_kwargs['telegram'])] controller.register_scraper(scraper = TelegramSnscrapeScraper()) - controller.scrape_channels(channels = channels, media = True) + controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/telegram_telethon.py b/tests/scraper/telegram_telethon.py index 3590c2e..1cfc529 100644 --- a/tests/scraper/telegram_telethon.py +++ b/tests/scraper/telegram_telethon.py @@ -5,10 +5,10 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs): channels = [Channel(**channel_kwargs['telegram'])] controller.register_scraper(scraper = TelegramTelethonScraper()) - controller.scrape_channels(channels = channels, media = False) - + controller.scrape_channels(channels = channels, archive_media = False) + def test_scrape_telegram_telethon_channel(controller, channel_kwargs): channels = [Channel(**channel_kwargs['telegram'])] controller.register_scraper(scraper = TelegramTelethonScraper()) - controller.scrape_channels(channels = channels, media = True) + controller.scrape_channels(channels = channels, archive_media = True) diff --git a/tests/scraper/twitter.py b/tests/scraper/twitter.py index cb03045..ef375b7 100644 --- a/tests/scraper/twitter.py +++ b/tests/scraper/twitter.py @@ -5,10 +5,10 @@ def test_scrape_twitter_channel_no_media(controller, channel_kwargs): channels = [Channel(**channel_kwargs['twitter'])] controller.register_scraper(scraper = TwitterScraper()) - controller.scrape_channels(channels = channels, media = False) + controller.scrape_channels(channels = channels, archive_media = False) def test_scrape_twitter_channel(controller, channel_kwargs): channels = [Channel(**channel_kwargs['twitter'])] controller.register_scraper(scraper = TwitterScraper()) - controller.scrape_channels(channels = channels, media = True) + controller.scrape_channels(channels = channels, archive_media = True)