mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-13 05:48:33 +03:00
added youtube scraper, moved from official youtube-dl repo to using yt-dlp because download speed for youtube videos is much better
This commit is contained in:
@@ -126,6 +126,21 @@ VKONTAKTE_CHANNEL_KWARGS = {
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
YOUTUBE_CHANNEL_KWARGS = {
|
||||
'id': 7,
|
||||
'name': 'AnEs87 (test)',
|
||||
'platform_id': 'UCP6exBqGoxGLv_pM9Dxk2pA',
|
||||
'category': 'test',
|
||||
'followers': None,
|
||||
'platform': 'Youtube',
|
||||
'url': 'https://www.youtube.com/channel/UCP6exBqGoxGLv_pM9Dxk2pA',
|
||||
'screenname': 'AnEs87',
|
||||
'country': 'SV',
|
||||
'influencer': None,
|
||||
'public': True,
|
||||
'chat': False,
|
||||
'notes': ''}
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
|
||||
@pytest.fixture(scope='package')
|
||||
@@ -158,6 +173,7 @@ def channel_kwargs():
|
||||
'rumble' : RUMBLE_CHANNEL_KWARGS,
|
||||
'telegram' : TELEGRAM_CHANNEL_KWARGS,
|
||||
'twitter' : TWITTER_CHANNEL_KWARGS,
|
||||
'vkontakte' : VKONTAKTE_CHANNEL_KWARGS}
|
||||
'vkontakte' : VKONTAKTE_CHANNEL_KWARGS,
|
||||
'youtube' : YOUTUBE_CHANNEL_KWARGS}
|
||||
|
||||
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
|
||||
16
tests/scraper/youtube.py
Normal file
16
tests/scraper/youtube.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from cisticola.base import Channel
|
||||
from cisticola.scraper import YoutubeScraper
|
||||
|
||||
def test_scrape_youtube_channel_no_media(controller, channel_kwargs):
|
||||
|
||||
channels = [Channel(**channel_kwargs['youtube'])]
|
||||
controller.register_scraper(scraper = YoutubeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = False)
|
||||
|
||||
def test_scrape_youtube_channel(controller, channel_kwargs):
|
||||
|
||||
controller.reset_db()
|
||||
|
||||
channels = [Channel(**channel_kwargs['youtube'])]
|
||||
controller.register_scraper(scraper = YoutubeScraper())
|
||||
controller.scrape_channels(channels = channels, archive_media = True)
|
||||
Reference in New Issue
Block a user