Files
auto-archiver/tests/archivers/test_youtubedl_archiver.py
2025-01-15 19:00:57 +01:00

71 lines
3.5 KiB
Python

import pytest
from pathlib import Path
from auto_archiver.archivers.youtubedl_archiver import YoutubeDLArchiver
from .test_archiver_base import TestArchiverBase
class TestYoutubeDLArchiver(TestArchiverBase):
"""Tests YoutubeDL Archiver
"""
archiver_class = YoutubeDLArchiver
config = {
'subtitles': False,
'comments': False,
'livestreams': False,
'live_from_start': False,
'end_means_success': True,
'allow_playlist': False,
'max_downloads': "inf",
'proxy': None,
'cookies_from_browser': False,
'cookie_file': None,
}
@pytest.mark.parametrize("url, is_suitable", [
("https://www.youtube.com/watch?v=5qap5aO4i9A", True),
("https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970?lang=en", True),
("https://www.instagram.com/p/CU1J9JYJ9Zz/", True),
("https://www.facebook.com/nytimes/videos/10160796550110716", True),
("https://www.twitch.tv/videos/1167226570", True),
("https://bellingcat.com/news/2021/10/08/ukrainian-soldiers-are-being-killed-by-landmines-in-the-donbas/", True),
("https://google.com", True)])
def test_suitable_urls(self, make_item, url, is_suitable):
"""
Note: expected behaviour is to return True for all URLs, as YoutubeDLArchiver should be able to handle all URLs
This behaviour may be changed in the future (e.g. if we want the youtubedl archiver to just handle URLs it has extractors for,
and then if and only if all archivers fails, does it fall back to the generic archiver)
"""
assert self.archiver.suitable(make_item(url)) == is_suitable
@pytest.mark.download
def test_download_tiktok(self, make_item):
item = make_item("https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970")
result = self.archiver.download(item)
assert result.get_url() == "https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970"
@pytest.mark.download
def test_download_youtube(self, make_item):
# url https://www.youtube.com/watch?v=5qap5aO4i9A
item = make_item("https://www.youtube.com/watch?v=J---aiyznGQ")
result = self.archiver.download(item)
assert result.get_url() == "https://www.youtube.com/watch?v=J---aiyznGQ"
assert result.get_title() == "Keyboard Cat! - THE ORIGINAL!"
assert result.get('description') == "Buy NEW Keyboard Cat Merch! https://keyboardcat.creator-spring.com\n\nxo Keyboard Cat memes make your day better!\nhttp://www.keyboardcatstore.com/\nhttps://www.facebook.com/thekeyboardcat\nhttp://www.charlieschmidt.com/"
assert len(result.media) == 2
assert Path(result.media[0].filename).name == "J---aiyznGQ.webm"
assert Path(result.media[1].filename).name == "hqdefault.jpg"
@pytest.mark.skip("ytdlp supports bluesky, but there's currently no way to extract info from pages without videos")
@pytest.mark.download
def test_download_bluesky_with_images(self, make_item):
item = make_item("https://bsky.app/profile/colborne.bsky.social/post/3lec2bqjc5s2y")
result = self.archiver.download(item)
assert result is not False
@pytest.mark.skip("ytdlp supports twitter, but there's currently no way to extract info from pages without videos")
@pytest.mark.download
def test_download_twitter_textonly(self, make_item):
item = make_item("https://x.com/bellingcat/status/1874097816571961839")
result = self.archiver.download(item)
assert result is not False