fixed some issues with Telegram tests

2026-06-11 12:58:33 +03:00 · 2023-07-27 13:07:44 -05:00
parent 99cc4d80b2
commit 249f411a1d
4 changed files with 12 additions and 65 deletions
--- a/cisticola/scraper/bitchute.py
+++ b/cisticola/scraper/bitchute.py
@@ -120,8 +120,6 @@ class BitchuteScraper(Scraper):
            raw_data=json.dumps(profile, default = str),
            date_archived=datetime.now(timezone.utc))
            
-#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
-
 def strip_tags(html, convert_newlines=True):
    r"""
    Strip HTML from a string
@@ -157,8 +155,6 @@ def strip_tags(html, convert_newlines=True):
    stripper.feed(html)
    return stripper.get_data()

-#-----------------------------------------------------------------------------#
-
 def request_from_bitchute(session, method, url, headers=None, data=None):
    """
    Request something via the BitChute API (or non-API)
@@ -203,8 +199,6 @@ def request_from_bitchute(session, method, url, headers=None, data=None):

    return response

-#-----------------------------------------------------------------------------#
-
 def append_details(video, detail):
    """
    Append extra metadata to video data
@@ -395,8 +389,6 @@ def append_details(video, detail):
    time.sleep(0.25)
    return (video, comments)

-#-----------------------------------------------------------------------------#
-
 def get_videos_user(session, user, csrftoken, detail):
    """
    Scrape videos for given BitChute user
@@ -470,8 +462,6 @@ def get_videos_user(session, user, csrftoken, detail):
                # before the video, which is weird
                yield comment

-#-----------------------------------------------------------------------------#
-
 def decode_cfemail(cfemail):
    
    """https://stackoverflow.com/questions/36911296/scraping-of-protected-email
--- a/cisticola/scraper/rumble.py
+++ b/cisticola/scraper/rumble.py
@@ -80,8 +80,6 @@ class RumbleScraper(Scraper):
            raw_data=json.dumps(profile),
            date_archived=datetime.now(timezone.utc))

-#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
-
 def get_media_url(url):
    
    r = make_request(url = url)
@@ -182,6 +180,4 @@ def get_channel_profile(url):
        'cover':  cover_soup.get('src') if cover_soup else None,
        'subscribers': soup.find('span', {'class' : 'subscribe-button-count'}).text}
        
-    return profile
-
-#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
+    return profile
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,3 +1,4 @@
+import os
 import pytest

 from sqlalchemy import create_engine
@@ -5,8 +6,6 @@ from sqlalchemy import create_engine
 from cisticola.scraper import ScraperController
 from cisticola.transformer import ETLController

-#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
-
 BITCHUTE_CHANNEL_KWARGS = {
    'name': 'bestonlinejewelrystoresusa@gmail.com (test)',
    'platform_id': 'bestonlinejewelrystoresusagmailcom',
@@ -37,7 +36,7 @@ GAB_CHANNEL_KWARGS = {

 GAB_GROUP_KWARGS = {
    'name': 'iran group (test)',
-    'platform_id': 10001,
+    'platform_id': "10001",
    'category': 'test',
    'platform': 'Gab',
    'url': 'https://gab.com/groups/10001',
@@ -92,7 +91,7 @@ ODYSEE_CHANNEL_KWARGS = {
    'source': 'researcher'}

 RUMBLE_CHANNEL_KWARGS = {
-    'name': 'we are uploading videos wow products',
+    'name': 'we are uploading videos wow products (test)',
    'platform_id': 'c-916305',
    'category': 'test',
    'platform': 'Rumble',
@@ -106,13 +105,13 @@ RUMBLE_CHANNEL_KWARGS = {
    'source': 'researcher'}

 TELEGRAM_CHANNEL_KWARGS = {
-    'name': 'South West Ohio Proud Boys (test)',
-    'platform_id': -1001276612436,
+    'name': 'Star Game (test)',
+    'platform_id': "-1001866374682",
    'category': 'test',
    'platform': 'Telegram',
-    'url': 'https://t.me/SouthwestOhioPB',
-    'screenname': 'SouthwestOhioPB',
-    'country': 'US',
+    'url': 'https://t.me/stargameinfo',
+    'screenname': 'stargameinfo',
+    'country': 'RU',
    'influencer': None,
    'public': True,
    'chat': False,
@@ -121,7 +120,7 @@ TELEGRAM_CHANNEL_KWARGS = {
    
 TWITTER_CHANNEL_KWARGS = {
    'name': 'L Weber (test)',
-    'platform_id': 1424979017749442595,
+    'platform_id': "1424979017749442595",
    'category': 'test',
    'platform': 'Twitter',
    'url': 'https://twitter.com/LWeber33662141',
@@ -161,16 +160,13 @@ YOUTUBE_CHANNEL_KWARGS = {
    'notes': '',
    'source': 'researcher'}

-#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
-

@pytest.fixture(scope='package')
 def engine(tmpdir_factory):
    """Initialize a SQLite database and SQLAlchemy engine to be used for all
    tests in the package"""

-    file = tmpdir_factory.mktemp('test_data').join('test.db')
-    engine = create_engine(f'sqlite:///{file}')
+    engine = create_engine(os.environ["TEST_DB"])
    
    return engine

@@ -210,6 +206,4 @@ def channel_kwargs():
        'telegram' : TELEGRAM_CHANNEL_KWARGS,
        'twitter' : TWITTER_CHANNEL_KWARGS,
        'vkontakte' : VKONTAKTE_CHANNEL_KWARGS,
-        'youtube' : YOUTUBE_CHANNEL_KWARGS}
-
-#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
+        'youtube' : YOUTUBE_CHANNEL_KWARGS}
--- a/tests/scraper/telegram_snscrape.py
+++ b/tests/scraper/telegram_snscrape.py
@@ -1,33 +0,0 @@
-import pytest
-
-from cisticola.base import Channel
-from cisticola.scraper import TelegramSnscrapeScraper
-
-@pytest.mark.unarchived
-def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs):
-
-    channels = [Channel(**channel_kwargs['telegram'])]
-    controller.register_scraper(scraper = TelegramSnscrapeScraper())
-    controller.scrape_channels(channels = channels, archive_media = False)
-
-@pytest.mark.media
-@pytest.mark.unarchived
-def test_scrape_telegram_snscrape_channel_unarchived_media(controller):
-
-    controller.archive_unarchived_media()
-
-@pytest.mark.media
-def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
-
-    controller.reset_db()
-
-    channels = [Channel(**channel_kwargs['telegram'])]
-    controller.register_scraper(scraper = TelegramSnscrapeScraper())
-    controller.scrape_channels(channels = channels, archive_media = True)
-
-@pytest.mark.profile
-def test_scrape_telegram_snscrape_profile(channel_kwargs):
-
-    scraper = TelegramSnscrapeScraper()
-    channel = Channel(**channel_kwargs['telegram'])
-    scraper.get_profile(channel=channel)