mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-11 04:48:33 +03:00
renamed 'archive_media' and 'media' to avoid name collision, changed scope of test fixture controller to 'function' so that db is fresh for each executed test
This commit is contained in:
@@ -69,7 +69,7 @@ class Scraper:
|
||||
|
||||
return blob, content_type, key
|
||||
|
||||
def archive_media(self, blob: bytes, content_type: str, key: str) -> str:
|
||||
def archive_blob(self, blob: bytes, content_type: str, key: str) -> str:
|
||||
|
||||
filename = self.__version__.replace(' ', '_') + '/' + key
|
||||
|
||||
@@ -83,7 +83,7 @@ class Scraper:
|
||||
def can_handle(self, channel: Channel) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ class ScraperController:
|
||||
self.scrapers.extend(scraper)
|
||||
|
||||
@logger.catch
|
||||
def scrape_channels(self, channels: List[Channel], media: bool = True):
|
||||
def scrape_channels(self, channels: List[Channel], archive_media: bool = True):
|
||||
if self.session is None:
|
||||
logger.error("No DB session")
|
||||
return
|
||||
@@ -128,7 +128,7 @@ class ScraperController:
|
||||
else:
|
||||
since = None
|
||||
|
||||
posts = scraper.get_posts(channel, since=since, media=media)
|
||||
posts = scraper.get_posts(channel, since=since, archive_media=archive_media)
|
||||
|
||||
for post in posts:
|
||||
session.add(post)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
import time
|
||||
import re
|
||||
from html.parser import HTMLParser
|
||||
@@ -22,7 +22,7 @@ class BitchuteScraper(Scraper):
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
session = requests.Session()
|
||||
session.headers.update(self.headers)
|
||||
@@ -43,11 +43,11 @@ class BitchuteScraper(Scraper):
|
||||
|
||||
archived_urls = {}
|
||||
|
||||
if media:
|
||||
if archive_media:
|
||||
if 'video_url' in post:
|
||||
url = post['video_url']
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
@@ -56,7 +56,7 @@ class BitchuteScraper(Scraper):
|
||||
channel=channel.id,
|
||||
platform_id=post['id'],
|
||||
date=datetime.fromtimestamp(post['timestamp']),
|
||||
date_archived=datetime.now(),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from typing import Generator
|
||||
|
||||
@@ -16,7 +16,7 @@ class GabScraper(Scraper):
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
client = Garc(profile = 'main')
|
||||
username = GabScraper.get_username_from_url(channel.url)
|
||||
|
||||
@@ -29,7 +29,7 @@ class GabScraper(Scraper):
|
||||
media_urls = []
|
||||
archived_urls = {}
|
||||
|
||||
if media:
|
||||
if archive_media:
|
||||
|
||||
media_urls.extend([p['url'] for p in post['media_attachments']])
|
||||
|
||||
@@ -38,7 +38,7 @@ class GabScraper(Scraper):
|
||||
|
||||
for url in media_urls:
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
@@ -46,8 +46,8 @@ class GabScraper(Scraper):
|
||||
platform="Gab",
|
||||
channel=channel.id,
|
||||
platform_id=post['id'],
|
||||
date=datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo = None),
|
||||
date_archived=datetime.now(),
|
||||
date=datetime.fromisoformat(post['created_at'].replace("Z", "+00:00")).replace(tzinfo=timezone.utc),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from typing import Generator, Tuple
|
||||
from typing import Generator
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from gogettr import PublicClient
|
||||
@@ -19,7 +19,7 @@ class GettrScraper(Scraper):
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
client = PublicClient()
|
||||
username = GettrScraper.get_username_from_url(channel.url)
|
||||
scraper = client.user_activity(username=username, type="posts")
|
||||
@@ -30,25 +30,25 @@ class GettrScraper(Scraper):
|
||||
|
||||
archived_urls = {}
|
||||
|
||||
if media:
|
||||
if archive_media:
|
||||
|
||||
if 'imgs' in post:
|
||||
for img in post['imgs']:
|
||||
url = "https://media.gettr.com/" + img
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[img] = archived_url
|
||||
|
||||
if 'main' in post:
|
||||
url = "https://media.gettr.com/" + post['main']
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[post['main']] = archived_url
|
||||
|
||||
if 'vid' in post:
|
||||
url = "https://media.gettr.com/" + post['vid']
|
||||
media_blob, content_type, key = self.m3u8_url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[post['vid']] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
@@ -57,7 +57,7 @@ class GettrScraper(Scraper):
|
||||
channel=channel.id,
|
||||
platform_id=post['_id'],
|
||||
date=datetime.fromtimestamp(post['cdate']/1000.),
|
||||
date_archived=datetime.now(),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from typing import Generator
|
||||
from urllib.parse import urlparse
|
||||
@@ -19,7 +19,7 @@ class OdyseeScraper(Scraper):
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
username = OdyseeScraper.get_username_from_url(channel.url)
|
||||
odysee_channel = OdyseeChannel(channel_name = username)
|
||||
@@ -32,7 +32,7 @@ class OdyseeScraper(Scraper):
|
||||
|
||||
archived_urls = {}
|
||||
|
||||
if media:
|
||||
if archive_media:
|
||||
url = video.info['streaming_url']
|
||||
|
||||
# Check if file is a video file or an m3u8 file
|
||||
@@ -42,7 +42,7 @@ class OdyseeScraper(Scraper):
|
||||
else:
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
|
||||
all_comments = video.get_all_comments()
|
||||
@@ -53,7 +53,7 @@ class OdyseeScraper(Scraper):
|
||||
channel=channel.id,
|
||||
platform_id=video.info['claim_id'],
|
||||
date=datetime.fromtimestamp(video.info['created']),
|
||||
date_archived=datetime.now(),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(video.info),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from typing import Generator, Tuple
|
||||
import tempfile
|
||||
@@ -22,7 +22,7 @@ class RumbleScraper(Scraper):
|
||||
|
||||
return username
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
username = RumbleScraper.get_username_from_url(channel.url)
|
||||
scraper = get_channel_videos(username)
|
||||
@@ -33,12 +33,12 @@ class RumbleScraper(Scraper):
|
||||
|
||||
archived_urls = {}
|
||||
|
||||
if media:
|
||||
if archive_media:
|
||||
|
||||
url = post['media_url']
|
||||
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[post['media_url']] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
@@ -46,8 +46,8 @@ class RumbleScraper(Scraper):
|
||||
platform="Rumble",
|
||||
channel=channel.id,
|
||||
platform_id=post['media_url'].split('/')[-2],
|
||||
date=datetime.fromisoformat(post['datetime']).replace(tzinfo=None),
|
||||
date_archived=datetime.now(),
|
||||
date=datetime.fromisoformat(post['datetime']).replace(tzinfo=timezone.utc),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=json.dumps(post),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ class TelegramSnscrapeScraper(Scraper):
|
||||
if channel.platform == "Telegram" and channel.public and not channel.chat:
|
||||
return True
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
scr = snscrape.modules.telegram.TelegramChannelScraper(
|
||||
channel.screenname)
|
||||
|
||||
@@ -29,18 +29,18 @@ class TelegramSnscrapeScraper(Scraper):
|
||||
|
||||
archived_urls = {}
|
||||
|
||||
if media:
|
||||
if archive_media:
|
||||
|
||||
for image_url in post.images:
|
||||
logger.debug(f'Archiving image: {image_url}')
|
||||
media_blob, content_type, key = self.url_to_blob(image_url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[image_url] = archived_url
|
||||
|
||||
if post.video:
|
||||
logger.debug(f'Archiving video: {post.video}')
|
||||
media_blob, content_type, key = self.url_to_blob(post.video)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[post.video] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
|
||||
@@ -26,7 +26,7 @@ class TelegramTelethonScraper(Scraper):
|
||||
if channel.platform == "Telegram" and channel.public and not channel.chat:
|
||||
return True
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
|
||||
username = self.get_username_from_url(channel.url)
|
||||
|
||||
@@ -47,7 +47,7 @@ class TelegramTelethonScraper(Scraper):
|
||||
|
||||
archived_urls = {}
|
||||
|
||||
if media:
|
||||
if archive_media:
|
||||
|
||||
if post.media is not None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
@@ -61,7 +61,7 @@ class TelegramTelethonScraper(Scraper):
|
||||
blob = f.read()
|
||||
|
||||
# TODO specify Content-Type
|
||||
archived_url = self.archive_media(blob = blob, content_type = '', key = output_file_with_ext)
|
||||
archived_url = self.archive_blob(blob = blob, content_type = '', key = output_file_with_ext)
|
||||
archived_urls[post_url] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
|
||||
@@ -12,7 +12,7 @@ class TwitterScraper(Scraper):
|
||||
"""An implementation of a Scraper for Twitter, using snscrape library"""
|
||||
__version__ = "TwitterScraper 0.0.1"
|
||||
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
def get_posts(self, channel: Channel, since: ScraperResult = None, archive_media: bool = True) -> Generator[ScraperResult, None, None]:
|
||||
scraper = TwitterProfileScraper(channel.platform_id)
|
||||
|
||||
first = True
|
||||
@@ -28,24 +28,26 @@ class TwitterScraper(Scraper):
|
||||
|
||||
archived_urls = {}
|
||||
|
||||
if tweet.media:
|
||||
for media in tweet.media:
|
||||
if type(media) == Video:
|
||||
variant = max(
|
||||
[v for v in media.variants if v.bitrate], key=lambda v: v.bitrate)
|
||||
url = variant.url
|
||||
elif type(media) == Gif:
|
||||
url = media.variants[0].url
|
||||
elif type(media) == Photo:
|
||||
url = media.fullUrl
|
||||
else:
|
||||
logger.warning(f"Could not get media URL of {media}")
|
||||
url = None
|
||||
if archive_media:
|
||||
|
||||
if url is not None:
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_media(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
if tweet.media:
|
||||
for media in tweet.media:
|
||||
if type(media) == Video:
|
||||
variant = max(
|
||||
[v for v in media.variants if v.bitrate], key=lambda v: v.bitrate)
|
||||
url = variant.url
|
||||
elif type(media) == Gif:
|
||||
url = media.variants[0].url
|
||||
elif type(media) == Photo:
|
||||
url = media.fullUrl
|
||||
else:
|
||||
logger.warning(f"Could not get media URL of {media}")
|
||||
url = None
|
||||
|
||||
if url is not None:
|
||||
media_blob, content_type, key = self.url_to_blob(url)
|
||||
archived_url = self.archive_blob(media_blob, content_type, key)
|
||||
archived_urls[url] = archived_url
|
||||
|
||||
yield ScraperResult(
|
||||
scraper=self.__version__,
|
||||
@@ -53,7 +55,7 @@ class TwitterScraper(Scraper):
|
||||
channel=channel.id,
|
||||
platform_id=tweet.id,
|
||||
date=tweet.date,
|
||||
date_archived=datetime.now(),
|
||||
date_archived=datetime.now(timezone.utc),
|
||||
raw_data=tweet.json(),
|
||||
archived_urls=archived_urls)
|
||||
|
||||
|
||||
@@ -13,48 +13,48 @@ logger.add(sys.stderr, level="INFO")
|
||||
logger.add("../russian_telegram_ingest.log")
|
||||
|
||||
test_channels = [
|
||||
# Channel(
|
||||
# id=0,
|
||||
# name="QAnon Россия",
|
||||
# platform_id=-1001319637748,
|
||||
# category="Qanon",
|
||||
# followers=94048,
|
||||
# platform="Telegram",
|
||||
# url="https://t.me/qanonrus",
|
||||
# screenname="qanonrus",
|
||||
# country="RU",
|
||||
# influencer=None,
|
||||
# public=True,
|
||||
# chat=False,
|
||||
# notes=""),
|
||||
# Channel(
|
||||
# id=1,
|
||||
# name="The Great Awakening | Q",
|
||||
# platform_id=-1001325597521,
|
||||
# category="Qanon",
|
||||
# followers=5715,
|
||||
# platform="Telegram",
|
||||
# url="https://t.me/greatawakin",
|
||||
# screenname="greatawakin",
|
||||
# country="RU",
|
||||
# influencer=None,
|
||||
# public=True,
|
||||
# chat=False,
|
||||
# notes=""),
|
||||
# Channel(
|
||||
# id=2,
|
||||
# name="Великое Пробуждение",
|
||||
# platform_id=-1001285898079,
|
||||
# category="Qanon",
|
||||
# followers=5861,
|
||||
# platform="Telegram",
|
||||
# url="https://t.me/greatawakeningrus",
|
||||
# screenname="greatawakeningrus",
|
||||
# country="RU",
|
||||
# influencer=None,
|
||||
# public=True,
|
||||
# chat=False,
|
||||
# notes=""),
|
||||
Channel(
|
||||
id=0,
|
||||
name="QAnon Россия",
|
||||
platform_id=-1001319637748,
|
||||
category="Qanon",
|
||||
followers=94048,
|
||||
platform="Telegram",
|
||||
url="https://t.me/qanonrus",
|
||||
screenname="qanonrus",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=1,
|
||||
name="The Great Awakening | Q",
|
||||
platform_id=-1001325597521,
|
||||
category="Qanon",
|
||||
followers=5715,
|
||||
platform="Telegram",
|
||||
url="https://t.me/greatawakin",
|
||||
screenname="greatawakin",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=2,
|
||||
name="Великое Пробуждение",
|
||||
platform_id=-1001285898079,
|
||||
category="Qanon",
|
||||
followers=5861,
|
||||
platform="Telegram",
|
||||
url="https://t.me/greatawakeningrus",
|
||||
screenname="greatawakeningrus",
|
||||
country="RU",
|
||||
influencer=None,
|
||||
public=True,
|
||||
chat=False,
|
||||
notes=""),
|
||||
Channel(
|
||||
id=3,
|
||||
name="T🕊Редакция Президент Гордон🕊",
|
||||
@@ -134,5 +134,5 @@ controller.register_scraper(telegram)
|
||||
engine = create_engine('sqlite:///russian_telegram.db')
|
||||
controller.connect_to_db(engine)
|
||||
|
||||
controller.scrape_channels(test_channels)
|
||||
controller.scrape_channels(test_channels, archive_media = False)
|
||||
|
||||
|
||||
9
test.py
9
test.py
@@ -1,4 +1,5 @@
|
||||
from sqlalchemy import create_engine
|
||||
from loguru import logger
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import (
|
||||
@@ -12,6 +13,8 @@ from cisticola.scraper import (
|
||||
TelegramTelethonScraper,
|
||||
TwitterScraper)
|
||||
|
||||
logger.add("../test.log")
|
||||
|
||||
test_channels = [
|
||||
Channel(
|
||||
id=0,
|
||||
@@ -118,12 +121,12 @@ scrapers = [
|
||||
OdyseeScraper(),
|
||||
RumbleScraper(),
|
||||
TelegramSnscrapeScraper(),
|
||||
TwitterScraper()
|
||||
TelegramTelethonScraper()]
|
||||
TelegramTelethonScraper(),
|
||||
TwitterScraper()]
|
||||
|
||||
controller.register_scrapers(scrapers)
|
||||
|
||||
engine = create_engine('sqlite:///test3.db')
|
||||
controller.connect_to_db(engine)
|
||||
|
||||
controller.scrape_channels(test_channels, media = True)
|
||||
controller.scrape_channels(test_channels, archive_media = False)
|
||||
@@ -113,7 +113,7 @@ TWITTER_CHANNEL_KWARGS = {
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
@pytest.fixture(scope='package')
|
||||
@pytest.fixture(scope='function')
|
||||
def controller(tmpdir_factory):
|
||||
|
||||
"""Initialize ScraperController and SQLite database file to be used for all
|
||||
|
||||
@@ -5,10 +5,10 @@ def test_scrape_bitchute_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['bitchute'])]
|
||||
controller.register_scraper(scraper = BitchuteScraper())
|
||||
controller.scrape_channels(channels = channels, media = False)
|
||||
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_bitchute_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['bitchute'])]
|
||||
controller.register_scraper(scraper = BitchuteScraper())
|
||||
controller.scrape_channels(channels = channels, media = True)
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@@ -5,10 +5,10 @@ def test_scrape_gab_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, media = False)
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_gab_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gab'])]
|
||||
controller.register_scraper(scraper = GabScraper())
|
||||
controller.scrape_channels(channels = channels, media = True)
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@@ -5,10 +5,10 @@ def test_scrape_gettr_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gettr'])]
|
||||
controller.register_scraper(scraper = GettrScraper())
|
||||
controller.scrape_channels(channels = channels, media = False)
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_gettr_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['gettr'])]
|
||||
controller.register_scraper(scraper = GettrScraper())
|
||||
controller.scrape_channels(channels = channels, media = True)
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@@ -5,10 +5,10 @@ def test_scrape_odysee_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['odysee'])]
|
||||
controller.register_scraper(scraper = OdyseeScraper())
|
||||
controller.scrape_channels(channels = channels, media = False)
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_odysee_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['odysee'])]
|
||||
controller.register_scraper(scraper = OdyseeScraper())
|
||||
controller.scrape_channels(channels = channels, media = True)
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@@ -5,10 +5,10 @@ def test_scrape_rumble_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['rumble'])]
|
||||
controller.register_scraper(scraper = RumbleScraper())
|
||||
controller.scrape_channels(channels = channels, media = False)
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_rumble_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['rumble'])]
|
||||
controller.register_scraper(scraper = RumbleScraper())
|
||||
controller.scrape_channels(channels = channels, media = True)
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@@ -5,10 +5,10 @@ def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramSnscrapeScraper())
|
||||
controller.scrape_channels(channels = channels, media = False)
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramSnscrapeScraper())
|
||||
controller.scrape_channels(channels = channels, media = True)
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@@ -5,10 +5,10 @@ def test_scrape_telegram_telethon_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramTelethonScraper())
|
||||
controller.scrape_channels(channels = channels, media = False)
|
||||
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_telegram_telethon_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramTelethonScraper())
|
||||
controller.scrape_channels(channels = channels, media = True)
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@@ -5,10 +5,10 @@ def test_scrape_twitter_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['twitter'])]
|
||||
controller.register_scraper(scraper = TwitterScraper())
|
||||
controller.scrape_channels(channels = channels, media = False)
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_twitter_channel(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['twitter'])]
|
||||
controller.register_scraper(scraper = TwitterScraper())
|
||||
controller.scrape_channels(channels = channels, media = True)
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
Reference in New Issue
Block a user