Add RumbleScraper, YoutubeScraper, and BitchuteScraper to the active scrapers

This commit is contained in:
Logan Williams
2022-04-12 14:55:45 +02:00
parent 6f11b88f94
commit bbb9d283d5
5 changed files with 53 additions and 846 deletions

874
Pipfile.lock generated

File diff suppressed because it is too large Load Diff

10
app.py
View File

@@ -13,6 +13,9 @@ from cisticola.scraper import (
VkontakteScraper,
TelegramTelethonScraper,
GettrScraper,
BitchuteScraper,
YoutubeScraper,
RumbleScraper,
)
@@ -92,7 +95,12 @@ def get_scraper_controller():
controller = ScraperController()
controller.connect_to_db(engine)
scrapers = [TelegramTelethonScraper(), VkontakteScraper(), GettrScraper()]
scrapers = [VkontakteScraper(),
TelegramTelethonScraper(),
GettrScraper(),
BitchuteScraper(),
RumbleScraper(),
YoutubeScraper()]
controller.register_scrapers(scrapers)

View File

@@ -10,7 +10,8 @@ from loguru import logger
import ffmpeg
from sqlalchemy.orm import sessionmaker
import yt_dlp
from sqlalchemy.sql.expression import func
from sqlalchemy.sql.expression import func
from pathlib import Path
from cisticola.base import Channel, ScraperResult, mapper_registry
from cisticola.utils import make_request
@@ -181,6 +182,10 @@ class Scraper:
content_type = 'video/mp4'
with tempfile.TemporaryDirectory() as temp_dir:
cookiefile = Path(temp_dir)/self.cookiefilename
with open(cookiefile, 'w') as f:
f.write(self.cookiestring)
ydl_opts = {
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
"merge_output_format": "mp4",
@@ -188,7 +193,9 @@ class Scraper:
"noplaylist": True,
"quiet": True,
"verbose": False,
"retries": 5}
"retries": 5,
"cookiefile": cookiefile}
ydl = yt_dlp.YoutubeDL(ydl_opts)
try:

View File

@@ -16,7 +16,7 @@ from cisticola.scraper.base import Scraper
class BitchuteScraper(Scraper):
"""An implementation of a Scraper for Bitchute, using classes from the 4cat
library"""
__version__ = "BitchuteScraper 0.0.0"
__version__ = "BitchuteScraper 0.0.1"
def get_username_from_url(self, url):
username = url.split('bitchute.com/channel/')[-1].strip('/')

View File

@@ -13,7 +13,7 @@ from cisticola.scraper import Scraper
class YoutubeScraper(Scraper):
"""An implementation of a Scraper for Youtube, using youtube-dl"""
__version__ = "YoutubeScraper 0.0.0"
__version__ = "YoutubeScraper 0.0.1"
cookiestring = os.environ["YOUTUBE_COOKIESTRING"].replace(r'\n', '\n').replace(r'\t', '\t')
cookiefilename = 'cookiefile.txt'