fixed some issues with Telegram tests

This commit is contained in:
Tristan Lee
2023-07-27 13:07:44 -05:00
parent 99cc4d80b2
commit 249f411a1d
4 changed files with 12 additions and 65 deletions

View File

@@ -120,8 +120,6 @@ class BitchuteScraper(Scraper):
raw_data=json.dumps(profile, default = str),
date_archived=datetime.now(timezone.utc))
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
def strip_tags(html, convert_newlines=True):
r"""
Strip HTML from a string
@@ -157,8 +155,6 @@ def strip_tags(html, convert_newlines=True):
stripper.feed(html)
return stripper.get_data()
#-----------------------------------------------------------------------------#
def request_from_bitchute(session, method, url, headers=None, data=None):
"""
Request something via the BitChute API (or non-API)
@@ -203,8 +199,6 @@ def request_from_bitchute(session, method, url, headers=None, data=None):
return response
#-----------------------------------------------------------------------------#
def append_details(video, detail):
"""
Append extra metadata to video data
@@ -395,8 +389,6 @@ def append_details(video, detail):
time.sleep(0.25)
return (video, comments)
#-----------------------------------------------------------------------------#
def get_videos_user(session, user, csrftoken, detail):
"""
Scrape videos for given BitChute user
@@ -470,8 +462,6 @@ def get_videos_user(session, user, csrftoken, detail):
# before the video, which is weird
yield comment
#-----------------------------------------------------------------------------#
def decode_cfemail(cfemail):
"""https://stackoverflow.com/questions/36911296/scraping-of-protected-email

View File

@@ -80,8 +80,6 @@ class RumbleScraper(Scraper):
raw_data=json.dumps(profile),
date_archived=datetime.now(timezone.utc))
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
def get_media_url(url):
r = make_request(url = url)
@@ -182,6 +180,4 @@ def get_channel_profile(url):
'cover': cover_soup.get('src') if cover_soup else None,
'subscribers': soup.find('span', {'class' : 'subscribe-button-count'}).text}
return profile
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
return profile

View File

@@ -1,3 +1,4 @@
import os
import pytest
from sqlalchemy import create_engine
@@ -5,8 +6,6 @@ from sqlalchemy import create_engine
from cisticola.scraper import ScraperController
from cisticola.transformer import ETLController
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
BITCHUTE_CHANNEL_KWARGS = {
'name': 'bestonlinejewelrystoresusa@gmail.com (test)',
'platform_id': 'bestonlinejewelrystoresusagmailcom',
@@ -37,7 +36,7 @@ GAB_CHANNEL_KWARGS = {
GAB_GROUP_KWARGS = {
'name': 'iran group (test)',
'platform_id': 10001,
'platform_id': "10001",
'category': 'test',
'platform': 'Gab',
'url': 'https://gab.com/groups/10001',
@@ -92,7 +91,7 @@ ODYSEE_CHANNEL_KWARGS = {
'source': 'researcher'}
RUMBLE_CHANNEL_KWARGS = {
'name': 'we are uploading videos wow products',
'name': 'we are uploading videos wow products (test)',
'platform_id': 'c-916305',
'category': 'test',
'platform': 'Rumble',
@@ -106,13 +105,13 @@ RUMBLE_CHANNEL_KWARGS = {
'source': 'researcher'}
TELEGRAM_CHANNEL_KWARGS = {
'name': 'South West Ohio Proud Boys (test)',
'platform_id': -1001276612436,
'name': 'Star Game (test)',
'platform_id': "-1001866374682",
'category': 'test',
'platform': 'Telegram',
'url': 'https://t.me/SouthwestOhioPB',
'screenname': 'SouthwestOhioPB',
'country': 'US',
'url': 'https://t.me/stargameinfo',
'screenname': 'stargameinfo',
'country': 'RU',
'influencer': None,
'public': True,
'chat': False,
@@ -121,7 +120,7 @@ TELEGRAM_CHANNEL_KWARGS = {
TWITTER_CHANNEL_KWARGS = {
'name': 'L Weber (test)',
'platform_id': 1424979017749442595,
'platform_id': "1424979017749442595",
'category': 'test',
'platform': 'Twitter',
'url': 'https://twitter.com/LWeber33662141',
@@ -161,16 +160,13 @@ YOUTUBE_CHANNEL_KWARGS = {
'notes': '',
'source': 'researcher'}
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
@pytest.fixture(scope='package')
def engine(tmpdir_factory):
"""Initialize a SQLite database and SQLAlchemy engine to be used for all
tests in the package"""
file = tmpdir_factory.mktemp('test_data').join('test.db')
engine = create_engine(f'sqlite:///{file}')
engine = create_engine(os.environ["TEST_DB"])
return engine
@@ -210,6 +206,4 @@ def channel_kwargs():
'telegram' : TELEGRAM_CHANNEL_KWARGS,
'twitter' : TWITTER_CHANNEL_KWARGS,
'vkontakte' : VKONTAKTE_CHANNEL_KWARGS,
'youtube' : YOUTUBE_CHANNEL_KWARGS}
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
'youtube' : YOUTUBE_CHANNEL_KWARGS}

View File

@@ -1,33 +0,0 @@
import pytest
from cisticola.base import Channel
from cisticola.scraper import TelegramSnscrapeScraper
@pytest.mark.unarchived
def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs):
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramSnscrapeScraper())
controller.scrape_channels(channels = channels, archive_media = False)
@pytest.mark.media
@pytest.mark.unarchived
def test_scrape_telegram_snscrape_channel_unarchived_media(controller):
controller.archive_unarchived_media()
@pytest.mark.media
def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
controller.reset_db()
channels = [Channel(**channel_kwargs['telegram'])]
controller.register_scraper(scraper = TelegramSnscrapeScraper())
controller.scrape_channels(channels = channels, archive_media = True)
@pytest.mark.profile
def test_scrape_telegram_snscrape_profile(channel_kwargs):
scraper = TelegramSnscrapeScraper()
channel = Channel(**channel_kwargs['telegram'])
scraper.get_profile(channel=channel)