mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-11 12:48:28 +03:00
vk and yt-dlp
This commit is contained in:
@@ -5,7 +5,7 @@ from .archiver import Archiverv2
|
||||
# from .telethon_archiver import TelethonArchiver
|
||||
# from .tiktok_archiver import TiktokArchiver
|
||||
from .wayback_archiver import WaybackArchiver
|
||||
from .youtubedl_archiver import YoutubeDLArchiver
|
||||
# from .youtubedl_archiver import YoutubeDLArchiver
|
||||
# from .twitter_archiver import TwitterArchiver
|
||||
# from .vk_archiver import VkArchiver
|
||||
# from .twitter_api_archiver import TwitterApiArchiver
|
||||
@@ -17,4 +17,5 @@ from .twitter_api_archiverv2 import TwitterApiArchiver
|
||||
from .instagram_archiverv2 import InstagramArchiver
|
||||
from .tiktok_archiverv2 import TiktokArchiver
|
||||
from .telegram_archiverv2 import TelegramArchiver
|
||||
from .vk_archiverv2 import VkArchiver
|
||||
from .vk_archiverv2 import VkArchiver
|
||||
from .youtubedl_archiverv2 import YoutubeDLArchiver
|
||||
@@ -1,9 +1,9 @@
|
||||
from loguru import logger
|
||||
from vk_url_scraper import VkScraper
|
||||
|
||||
from utils.misc import dump_payload
|
||||
from metadata import Metadata
|
||||
from media import Media
|
||||
from utils.misc import dump_payload
|
||||
from .archiver import Archiverv2
|
||||
|
||||
|
||||
|
||||
70
src/archivers/youtubedl_archiverv2.py
Normal file
70
src/archivers/youtubedl_archiverv2.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import datetime
|
||||
import os
|
||||
|
||||
import yt_dlp
|
||||
from loguru import logger
|
||||
|
||||
from metadata import Metadata
|
||||
from media import Media
|
||||
from .archiver import Archiverv2
|
||||
|
||||
|
||||
class YoutubeDLArchiver(Archiverv2):
|
||||
name = "youtubedl_enricher"
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
super().__init__(config)
|
||||
|
||||
@staticmethod
|
||||
def configs() -> dict:
|
||||
return {
|
||||
"facebook_cookie": {"default": None, "help": "optional facebook cookie to have more access to content, from browser, looks like 'cookie: datr= xxxx'"},
|
||||
}
|
||||
|
||||
def download(self, item: Metadata) -> Metadata:
|
||||
url = item.get_url()
|
||||
|
||||
if item.netloc in ['facebook.com', 'www.facebook.com'] and self.facebook_cookie:
|
||||
logger.debug('Using Facebook cookie')
|
||||
yt_dlp.utils.std_headers['cookie'] = self.facebook_cookie
|
||||
|
||||
ydl = yt_dlp.YoutubeDL({'outtmpl': os.path.join(item.get_tmp_dir(), f'%(id)s.%(ext)s'), 'quiet': False})
|
||||
|
||||
try:
|
||||
# don'd download since it can be a live stream
|
||||
info = ydl.extract_info(url, download=False)
|
||||
if info.get('is_live', False):
|
||||
logger.warning("Live streaming media, not archiving now")
|
||||
return False
|
||||
except yt_dlp.utils.DownloadError as e:
|
||||
logger.debug(f'No video - Youtube normal control flow: {e}')
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.debug(f'ytdlp exception which is normal for example a facebook page with images only will cause a IndexError: list index out of range. Exception here is: \n {e}')
|
||||
return False
|
||||
|
||||
# this time download
|
||||
info = ydl.extract_info(url, download=True)
|
||||
if "entries" in info:
|
||||
entries = info.get("entries", [])
|
||||
if not len(entries):
|
||||
logger.warning('YoutubeDLArchiver could not find any video')
|
||||
return False
|
||||
else: entries = [info]
|
||||
|
||||
result = Metadata()
|
||||
result.set_title(info.get("title"))
|
||||
for entry in entries:
|
||||
filename = ydl.prepare_filename(entry)
|
||||
if not os.path.exists(filename):
|
||||
filename = filename.split('.')[0] + '.mkv'
|
||||
result.add_media(Media(filename).set("duration", info.get("duration")))
|
||||
|
||||
if (timestamp := info.get("timestamp")):
|
||||
timestamp = datetime.datetime.utcfromtimestamp(timestamp).replace(tzinfo=datetime.timezone.utc).isoformat()
|
||||
result.set_timestamp(timestamp)
|
||||
if (upload_date := info.get("upload_date")):
|
||||
upload_date = datetime.datetime.strptime(upload_date, '%Y%m%d').replace(tzinfo=datetime.timezone.utc)
|
||||
result.set("upload_date", upload_date)
|
||||
|
||||
return result.success("yt-dlp")
|
||||
@@ -25,7 +25,6 @@ class ThumbnailEnricher(Enricher):
|
||||
folder = os.path.join(to_enrich.get_tmp_dir(), str(uuid.uuid4()))
|
||||
os.makedirs(folder, exist_ok=True)
|
||||
for i, m in enumerate(to_enrich.media[::]):
|
||||
logger.info(m)
|
||||
if m.is_video():
|
||||
logger.debug(f"generating thumbnails for {m.filename}")
|
||||
fps, duration = 0.5, m.get("duration")
|
||||
|
||||
Reference in New Issue
Block a user