mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-11 12:58:33 +03:00
fixed some issues with Telegram tests
This commit is contained in:
@@ -120,8 +120,6 @@ class BitchuteScraper(Scraper):
|
||||
raw_data=json.dumps(profile, default = str),
|
||||
date_archived=datetime.now(timezone.utc))
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
def strip_tags(html, convert_newlines=True):
|
||||
r"""
|
||||
Strip HTML from a string
|
||||
@@ -157,8 +155,6 @@ def strip_tags(html, convert_newlines=True):
|
||||
stripper.feed(html)
|
||||
return stripper.get_data()
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
|
||||
def request_from_bitchute(session, method, url, headers=None, data=None):
|
||||
"""
|
||||
Request something via the BitChute API (or non-API)
|
||||
@@ -203,8 +199,6 @@ def request_from_bitchute(session, method, url, headers=None, data=None):
|
||||
|
||||
return response
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
|
||||
def append_details(video, detail):
|
||||
"""
|
||||
Append extra metadata to video data
|
||||
@@ -395,8 +389,6 @@ def append_details(video, detail):
|
||||
time.sleep(0.25)
|
||||
return (video, comments)
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
|
||||
def get_videos_user(session, user, csrftoken, detail):
|
||||
"""
|
||||
Scrape videos for given BitChute user
|
||||
@@ -470,8 +462,6 @@ def get_videos_user(session, user, csrftoken, detail):
|
||||
# before the video, which is weird
|
||||
yield comment
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
|
||||
def decode_cfemail(cfemail):
|
||||
|
||||
"""https://stackoverflow.com/questions/36911296/scraping-of-protected-email
|
||||
|
||||
@@ -80,8 +80,6 @@ class RumbleScraper(Scraper):
|
||||
raw_data=json.dumps(profile),
|
||||
date_archived=datetime.now(timezone.utc))
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
def get_media_url(url):
|
||||
|
||||
r = make_request(url = url)
|
||||
@@ -182,6 +180,4 @@ def get_channel_profile(url):
|
||||
'cover': cover_soup.get('src') if cover_soup else None,
|
||||
'subscribers': soup.find('span', {'class' : 'subscribe-button-count'}).text}
|
||||
|
||||
return profile
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
return profile
|
||||
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
@@ -5,8 +6,6 @@ from sqlalchemy import create_engine
|
||||
from cisticola.scraper import ScraperController
|
||||
from cisticola.transformer import ETLController
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
BITCHUTE_CHANNEL_KWARGS = {
|
||||
'name': 'bestonlinejewelrystoresusa@gmail.com (test)',
|
||||
'platform_id': 'bestonlinejewelrystoresusagmailcom',
|
||||
@@ -37,7 +36,7 @@ GAB_CHANNEL_KWARGS = {
|
||||
|
||||
GAB_GROUP_KWARGS = {
|
||||
'name': 'iran group (test)',
|
||||
'platform_id': 10001,
|
||||
'platform_id': "10001",
|
||||
'category': 'test',
|
||||
'platform': 'Gab',
|
||||
'url': 'https://gab.com/groups/10001',
|
||||
@@ -92,7 +91,7 @@ ODYSEE_CHANNEL_KWARGS = {
|
||||
'source': 'researcher'}
|
||||
|
||||
RUMBLE_CHANNEL_KWARGS = {
|
||||
'name': 'we are uploading videos wow products',
|
||||
'name': 'we are uploading videos wow products (test)',
|
||||
'platform_id': 'c-916305',
|
||||
'category': 'test',
|
||||
'platform': 'Rumble',
|
||||
@@ -106,13 +105,13 @@ RUMBLE_CHANNEL_KWARGS = {
|
||||
'source': 'researcher'}
|
||||
|
||||
TELEGRAM_CHANNEL_KWARGS = {
|
||||
'name': 'South West Ohio Proud Boys (test)',
|
||||
'platform_id': -1001276612436,
|
||||
'name': 'Star Game (test)',
|
||||
'platform_id': "-1001866374682",
|
||||
'category': 'test',
|
||||
'platform': 'Telegram',
|
||||
'url': 'https://t.me/SouthwestOhioPB',
|
||||
'screenname': 'SouthwestOhioPB',
|
||||
'country': 'US',
|
||||
'url': 'https://t.me/stargameinfo',
|
||||
'screenname': 'stargameinfo',
|
||||
'country': 'RU',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
@@ -121,7 +120,7 @@ TELEGRAM_CHANNEL_KWARGS = {
|
||||
|
||||
TWITTER_CHANNEL_KWARGS = {
|
||||
'name': 'L Weber (test)',
|
||||
'platform_id': 1424979017749442595,
|
||||
'platform_id': "1424979017749442595",
|
||||
'category': 'test',
|
||||
'platform': 'Twitter',
|
||||
'url': 'https://twitter.com/LWeber33662141',
|
||||
@@ -161,16 +160,13 @@ YOUTUBE_CHANNEL_KWARGS = {
|
||||
'notes': '',
|
||||
'source': 'researcher'}
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
|
||||
@pytest.fixture(scope='package')
|
||||
def engine(tmpdir_factory):
|
||||
"""Initialize a SQLite database and SQLAlchemy engine to be used for all
|
||||
tests in the package"""
|
||||
|
||||
file = tmpdir_factory.mktemp('test_data').join('test.db')
|
||||
engine = create_engine(f'sqlite:///{file}')
|
||||
engine = create_engine(os.environ["TEST_DB"])
|
||||
|
||||
return engine
|
||||
|
||||
@@ -210,6 +206,4 @@ def channel_kwargs():
|
||||
'telegram' : TELEGRAM_CHANNEL_KWARGS,
|
||||
'twitter' : TWITTER_CHANNEL_KWARGS,
|
||||
'vkontakte' : VKONTAKTE_CHANNEL_KWARGS,
|
||||
'youtube' : YOUTUBE_CHANNEL_KWARGS}
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
'youtube' : YOUTUBE_CHANNEL_KWARGS}
|
||||
@@ -1,33 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import TelegramSnscrapeScraper
|
||||
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_telegram_snscrape_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramSnscrapeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
@pytest.mark.media
|
||||
@pytest.mark.unarchived
|
||||
def test_scrape_telegram_snscrape_channel_unarchived_media(controller):
|
||||
|
||||
controller.archive_unarchived_media()
|
||||
|
||||
@pytest.mark.media
|
||||
def test_scrape_telegram_snscrape_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['telegram'])]
|
||||
controller.register_scraper(scraper = TelegramSnscrapeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
|
||||
@pytest.mark.profile
|
||||
def test_scrape_telegram_snscrape_profile(channel_kwargs):
|
||||
|
||||
scraper = TelegramSnscrapeScraper()
|
||||
channel = Channel(**channel_kwargs['telegram'])
|
||||
scraper.get_profile(channel=channel)
|
||||
Reference in New Issue
Block a user