vk and yt-dlp

This commit is contained in:
msramalho
2023-01-18 23:15:25 +00:00
parent 176ce7e8da
commit 9bbc13e9be
4 changed files with 74 additions and 4 deletions

View File

@@ -5,7 +5,7 @@ from .archiver import Archiverv2
# from .telethon_archiver import TelethonArchiver
# from .tiktok_archiver import TiktokArchiver
from .wayback_archiver import WaybackArchiver
from .youtubedl_archiver import YoutubeDLArchiver
# from .youtubedl_archiver import YoutubeDLArchiver
# from .twitter_archiver import TwitterArchiver
# from .vk_archiver import VkArchiver
# from .twitter_api_archiver import TwitterApiArchiver
@@ -17,4 +17,5 @@ from .twitter_api_archiverv2 import TwitterApiArchiver
from .instagram_archiverv2 import InstagramArchiver
from .tiktok_archiverv2 import TiktokArchiver
from .telegram_archiverv2 import TelegramArchiver
from .vk_archiverv2 import VkArchiver
from .vk_archiverv2 import VkArchiver
from .youtubedl_archiverv2 import YoutubeDLArchiver

View File

@@ -1,9 +1,9 @@
from loguru import logger
from vk_url_scraper import VkScraper
from utils.misc import dump_payload
from metadata import Metadata
from media import Media
from utils.misc import dump_payload
from .archiver import Archiverv2

View File

@@ -0,0 +1,70 @@
import datetime
import os
import yt_dlp
from loguru import logger
from metadata import Metadata
from media import Media
from .archiver import Archiverv2
class YoutubeDLArchiver(Archiverv2):
name = "youtubedl_enricher"
def __init__(self, config: dict) -> None:
super().__init__(config)
@staticmethod
def configs() -> dict:
return {
"facebook_cookie": {"default": None, "help": "optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx'"},
}
def download(self, item: Metadata) -> Metadata:
url = item.get_url()
if item.netloc in ['facebook.com', 'www.facebook.com'] and self.facebook_cookie:
logger.debug('Using Facebook cookie')
yt_dlp.utils.std_headers['cookie'] = self.facebook_cookie
ydl = yt_dlp.YoutubeDL({'outtmpl': os.path.join(item.get_tmp_dir(), f'%(id)s.%(ext)s'), 'quiet': False})
try:
# don'd download since it can be a live stream
info = ydl.extract_info(url, download=False)
if info.get('is_live', False):
logger.warning("Live streaming media, not archiving now")
return False
except yt_dlp.utils.DownloadError as e:
logger.debug(f'No video - Youtube normal control flow: {e}')
return False
except Exception as e:
logger.debug(f'ytdlp exception which is normal for example a facebook page with images only will cause a IndexError: list index out of range. Exception here is: \n {e}')
return False
# this time download
info = ydl.extract_info(url, download=True)
if "entries" in info:
entries = info.get("entries", [])
if not len(entries):
logger.warning('YoutubeDLArchiver could not find any video')
return False
else: entries = [info]
result = Metadata()
result.set_title(info.get("title"))
for entry in entries:
filename = ydl.prepare_filename(entry)
if not os.path.exists(filename):
filename = filename.split('.')[0] + '.mkv'
result.add_media(Media(filename).set("duration", info.get("duration")))
if (timestamp := info.get("timestamp")):
timestamp = datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=datetime.timezone.utc).isoformat()
result.set_timestamp(timestamp)
if (upload_date := info.get("upload_date")):
upload_date = datetime.datetime.strptime(upload_date, '%Y%m%d').replace(tzinfo=datetime.timezone.utc)
result.set("upload_date", upload_date)
return result.success("yt-dlp")

View File

@@ -25,7 +25,6 @@ class ThumbnailEnricher(Enricher):
folder = os.path.join(to_enrich.get_tmp_dir(), str(uuid.uuid4()))
os.makedirs(folder, exist_ok=True)
for i, m in enumerate(to_enrich.media[::]):
logger.info(m)
if m.is_video():
logger.debug(f"generating thumbnails for {m.filename}")
fps, duration = 0.5, m.get("duration")