mirror of
https://github.com/bellingcat/cisticola.git
synced 2026-06-08 03:18:34 +03:00
Add RumbleScraper, YoutubeScraper, and BitchuteScraper to the active scrapers
This commit is contained in:
874
Pipfile.lock
generated
874
Pipfile.lock
generated
File diff suppressed because it is too large
Load Diff
10
app.py
10
app.py
@@ -13,6 +13,9 @@ from cisticola.scraper import (
|
||||
VkontakteScraper,
|
||||
TelegramTelethonScraper,
|
||||
GettrScraper,
|
||||
BitchuteScraper,
|
||||
YoutubeScraper,
|
||||
RumbleScraper,
|
||||
)
|
||||
|
||||
|
||||
@@ -92,7 +95,12 @@ def get_scraper_controller():
|
||||
controller = ScraperController()
|
||||
controller.connect_to_db(engine)
|
||||
|
||||
scrapers = [TelegramTelethonScraper(), VkontakteScraper(), GettrScraper()]
|
||||
scrapers = [VkontakteScraper(),
|
||||
TelegramTelethonScraper(),
|
||||
GettrScraper(),
|
||||
BitchuteScraper(),
|
||||
RumbleScraper(),
|
||||
YoutubeScraper()]
|
||||
|
||||
controller.register_scrapers(scrapers)
|
||||
|
||||
|
||||
@@ -10,7 +10,8 @@ from loguru import logger
|
||||
import ffmpeg
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import yt_dlp
|
||||
from sqlalchemy.sql.expression import func
|
||||
from sqlalchemy.sql.expression import func
|
||||
from pathlib import Path
|
||||
|
||||
from cisticola.base import Channel, ScraperResult, mapper_registry
|
||||
from cisticola.utils import make_request
|
||||
@@ -181,6 +182,10 @@ class Scraper:
|
||||
content_type = 'video/mp4'
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
cookiefile = Path(temp_dir)/self.cookiefilename
|
||||
with open(cookiefile, 'w') as f:
|
||||
f.write(self.cookiestring)
|
||||
|
||||
ydl_opts = {
|
||||
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
|
||||
"merge_output_format": "mp4",
|
||||
@@ -188,7 +193,9 @@ class Scraper:
|
||||
"noplaylist": True,
|
||||
"quiet": True,
|
||||
"verbose": False,
|
||||
"retries": 5}
|
||||
"retries": 5,
|
||||
"cookiefile": cookiefile}
|
||||
|
||||
ydl = yt_dlp.YoutubeDL(ydl_opts)
|
||||
|
||||
try:
|
||||
|
||||
@@ -16,7 +16,7 @@ from cisticola.scraper.base import Scraper
|
||||
class BitchuteScraper(Scraper):
|
||||
"""An implementation of a Scraper for Bitchute, using classes from the 4cat
|
||||
library"""
|
||||
__version__ = "BitchuteScraper 0.0.0"
|
||||
__version__ = "BitchuteScraper 0.0.1"
|
||||
|
||||
def get_username_from_url(self, url):
|
||||
username = url.split('bitchute.com/channel/')[-1].strip('/')
|
||||
|
||||
@@ -13,7 +13,7 @@ from cisticola.scraper import Scraper
|
||||
|
||||
class YoutubeScraper(Scraper):
|
||||
"""An implementation of a Scraper for Youtube, using youtube-dl"""
|
||||
__version__ = "YoutubeScraper 0.0.0"
|
||||
__version__ = "YoutubeScraper 0.0.1"
|
||||
|
||||
cookiestring = os.environ["YOUTUBE_COOKIESTRING"].replace(r'\n', '\n').replace(r'\t', '\t')
|
||||
cookiefilename = 'cookiefile.txt'
|
||||
|
||||
Reference in New Issue
Block a user