From 249f411a1d5583d72fc3d242d4c2b946f3aa53ad Mon Sep 17 00:00:00 2001 From: Tristan Lee Date: Thu, 27 Jul 2023 13:07:44 -0500 Subject: [PATCH] fixed some issues with Telegram tests --- cisticola/scraper/bitchute.py | 10 --------- cisticola/scraper/rumble.py | 6 +----- tests/conftest.py | 28 ++++++++++--------------- tests/scraper/telegram_snscrape.py | 33 ------------------------------ 4 files changed, 12 insertions(+), 65 deletions(-) delete mode 100644 tests/scraper/telegram_snscrape.py diff --git a/cisticola/scraper/bitchute.py b/cisticola/scraper/bitchute.py index 284b300..f5c79fb 100644 --- a/cisticola/scraper/bitchute.py +++ b/cisticola/scraper/bitchute.py @@ -120,8 +120,6 @@ class BitchuteScraper(Scraper): raw_data=json.dumps(profile, default = str), date_archived=datetime.now(timezone.utc)) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# - def strip_tags(html, convert_newlines=True): r""" Strip HTML from a string @@ -157,8 +155,6 @@ def strip_tags(html, convert_newlines=True): stripper.feed(html) return stripper.get_data() -#-----------------------------------------------------------------------------# - def request_from_bitchute(session, method, url, headers=None, data=None): """ Request something via the BitChute API (or non-API) @@ -203,8 +199,6 @@ def request_from_bitchute(session, method, url, headers=None, data=None): return response -#-----------------------------------------------------------------------------# - def append_details(video, detail): """ Append extra metadata to video data @@ -395,8 +389,6 @@ def append_details(video, detail): time.sleep(0.25) return (video, comments) -#-----------------------------------------------------------------------------# - def get_videos_user(session, user, csrftoken, detail): """ Scrape videos for given BitChute user @@ -470,8 +462,6 @@ def get_videos_user(session, user, csrftoken, detail): # before the video, which is weird yield comment -#-----------------------------------------------------------------------------# - def decode_cfemail(cfemail): """https://stackoverflow.com/questions/36911296/scraping-of-protected-email diff --git a/cisticola/scraper/rumble.py b/cisticola/scraper/rumble.py index fe7adb3..b044a5f 100644 --- a/cisticola/scraper/rumble.py +++ b/cisticola/scraper/rumble.py @@ -80,8 +80,6 @@ class RumbleScraper(Scraper): raw_data=json.dumps(profile), date_archived=datetime.now(timezone.utc)) -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# - def get_media_url(url): r = make_request(url = url) @@ -182,6 +180,4 @@ def get_channel_profile(url): 'cover': cover_soup.get('src') if cover_soup else None, 'subscribers': soup.find('span', {'class' : 'subscribe-button-count'}).text} - return profile - -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# \ No newline at end of file + return profile \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 27e6180..26f8a94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,4 @@ +import os import pytest from sqlalchemy import create_engine @@ -5,8 +6,6 @@ from sqlalchemy import create_engine from cisticola.scraper import ScraperController from cisticola.transformer import ETLController -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# - BITCHUTE_CHANNEL_KWARGS = { 'name': 'bestonlinejewelrystoresusa@gmail.com (test)', 'platform_id': 'bestonlinejewelrystoresusagmailcom', @@ -37,7 +36,7 @@ GAB_CHANNEL_KWARGS = { GAB_GROUP_KWARGS = { 'name': 'iran group (test)', - 'platform_id': 10001, + 'platform_id': "10001", 'category': 'test', 'platform': 'Gab', 'url': 'https://gab.com/groups/10001', @@ -92,7 +91,7 @@ ODYSEE_CHANNEL_KWARGS = { 'source': 'researcher'} RUMBLE_CHANNEL_KWARGS = { - 'name': 'we are uploading videos wow products', + 'name': 'we are uploading videos wow products (test)', 'platform_id': 'c-916305', 'category': 'test', 'platform': 'Rumble', @@ -106,13 +105,13 @@ RUMBLE_CHANNEL_KWARGS = { 'source': 'researcher'} TELEGRAM_CHANNEL_KWARGS = { - 'name': 'South West Ohio Proud Boys (test)', - 'platform_id': -1001276612436, + 'name': 'Star Game (test)', + 'platform_id': "-1001866374682", 'category': 'test', 'platform': 'Telegram', - 'url': 'https://t.me/SouthwestOhioPB', - 'screenname': 'SouthwestOhioPB', - 'country': 'US', + 'url': 'https://t.me/stargameinfo', + 'screenname': 'stargameinfo', + 'country': 'RU', 'influencer': None, 'public': True, 'chat': False, @@ -121,7 +120,7 @@ TELEGRAM_CHANNEL_KWARGS = { TWITTER_CHANNEL_KWARGS = { 'name': 'L Weber (test)', - 'platform_id': 1424979017749442595, + 'platform_id': "1424979017749442595", 'category': 'test', 'platform': 'Twitter', 'url': 'https://twitter.com/LWeber33662141', @@ -161,16 +160,13 @@ YOUTUBE_CHANNEL_KWARGS = { 'notes': '', 'source': 'researcher'} -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# - @pytest.fixture(scope='package') def engine(tmpdir_factory): """Initialize a SQLite database and SQLAlchemy engine to be used for all tests in the package""" - file = tmpdir_factory.mktemp('test_data').join('test.db') - engine = create_engine(f'sqlite:///{file}') + engine = create_engine(os.environ["TEST_DB"]) return engine @@ -210,6 +206,4 @@ def channel_kwargs(): 'telegram' : TELEGRAM_CHANNEL_KWARGS, 'twitter' : TWITTER_CHANNEL_KWARGS, 'vkontakte' : VKONTAKTE_CHANNEL_KWARGS, - 'youtube' : YOUTUBE_CHANNEL_KWARGS} - -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# + 'youtube' : YOUTUBE_CHANNEL_KWARGS} \ No newline at end of file diff --git a/tests/scraper/telegram_snscrape.py b/tests/scraper/telegram_snscrape.py deleted file mode 100644 index 5dbe151..0000000 --- a/tests/scraper/telegram_snscrape.py +++ /dev/null @@ -1,33 +0,0 @@ -import pytest - -from cisticola.base import Channel -from cisticola.scraper import TelegramSnscrapeScraper - -@pytest.mark.unarchived -def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs): - - channels = [Channel(**channel_kwargs['telegram'])] - controller.register_scraper(scraper = TelegramSnscrapeScraper()) - controller.scrape_channels(channels = channels, archive_media = False) - -@pytest.mark.media -@pytest.mark.unarchived -def test_scrape_telegram_snscrape_channel_unarchived_media(controller): - - controller.archive_unarchived_media() - -@pytest.mark.media -def test_scrape_telegram_snscrape_channel(controller, channel_kwargs): - - controller.reset_db() - - channels = [Channel(**channel_kwargs['telegram'])] - controller.register_scraper(scraper = TelegramSnscrapeScraper()) - controller.scrape_channels(channels = channels, archive_media = True) - -@pytest.mark.profile -def test_scrape_telegram_snscrape_profile(channel_kwargs): - - scraper = TelegramSnscrapeScraper() - channel = Channel(**channel_kwargs['telegram']) - scraper.get_profile(channel=channel) \ No newline at end of file