mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
71 lines
3.5 KiB
Python
71 lines
3.5 KiB
Python
import pytest
|
|
from pathlib import Path
|
|
|
|
from auto_archiver.archivers.youtubedl_archiver import YoutubeDLArchiver
|
|
|
|
from .test_archiver_base import TestArchiverBase
|
|
|
|
class TestYoutubeDLArchiver(TestArchiverBase):
|
|
"""Tests YoutubeDL Archiver
|
|
"""
|
|
archiver_class = YoutubeDLArchiver
|
|
config = {
|
|
'subtitles': False,
|
|
'comments': False,
|
|
'livestreams': False,
|
|
'live_from_start': False,
|
|
'end_means_success': True,
|
|
'allow_playlist': False,
|
|
'max_downloads': "inf",
|
|
'proxy': None,
|
|
'cookies_from_browser': False,
|
|
'cookie_file': None,
|
|
}
|
|
|
|
@pytest.mark.parametrize("url, is_suitable", [
|
|
("https://www.youtube.com/watch?v=5qap5aO4i9A", True),
|
|
("https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970?lang=en", True),
|
|
("https://www.instagram.com/p/CU1J9JYJ9Zz/", True),
|
|
("https://www.facebook.com/nytimes/videos/10160796550110716", True),
|
|
("https://www.twitch.tv/videos/1167226570", True),
|
|
("https://bellingcat.com/news/2021/10/08/ukrainian-soldiers-are-being-killed-by-landmines-in-the-donbas/", True),
|
|
("https://google.com", True)])
|
|
def test_suitable_urls(self, make_item, url, is_suitable):
|
|
"""
|
|
Note: expected behaviour is to return True for all URLs, as YoutubeDLArchiver should be able to handle all URLs
|
|
This behaviour may be changed in the future (e.g. if we want the youtubedl archiver to just handle URLs it has extractors for,
|
|
and then if and only if all archivers fails, does it fall back to the generic archiver)
|
|
"""
|
|
assert self.archiver.suitable(make_item(url)) == is_suitable
|
|
|
|
@pytest.mark.download
|
|
def test_download_tiktok(self, make_item):
|
|
item = make_item("https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970")
|
|
result = self.archiver.download(item)
|
|
assert result.get_url() == "https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970"
|
|
|
|
@pytest.mark.download
|
|
def test_download_youtube(self, make_item):
|
|
# url https://www.youtube.com/watch?v=5qap5aO4i9A
|
|
item = make_item("https://www.youtube.com/watch?v=J---aiyznGQ")
|
|
result = self.archiver.download(item)
|
|
assert result.get_url() == "https://www.youtube.com/watch?v=J---aiyznGQ"
|
|
assert result.get_title() == "Keyboard Cat! - THE ORIGINAL!"
|
|
assert result.get('description') == "Buy NEW Keyboard Cat Merch! https://keyboardcat.creator-spring.com\n\nxo Keyboard Cat memes make your day better!\nhttp://www.keyboardcatstore.com/\nhttps://www.facebook.com/thekeyboardcat\nhttp://www.charlieschmidt.com/"
|
|
assert len(result.media) == 2
|
|
assert Path(result.media[0].filename).name == "J---aiyznGQ.webm"
|
|
assert Path(result.media[1].filename).name == "hqdefault.jpg"
|
|
|
|
@pytest.mark.skip("ytdlp supports bluesky, but there's currently no way to extract info from pages without videos")
|
|
@pytest.mark.download
|
|
def test_download_bluesky_with_images(self, make_item):
|
|
item = make_item("https://bsky.app/profile/colborne.bsky.social/post/3lec2bqjc5s2y")
|
|
result = self.archiver.download(item)
|
|
assert result is not False
|
|
|
|
@pytest.mark.skip("ytdlp supports twitter, but there's currently no way to extract info from pages without videos")
|
|
@pytest.mark.download
|
|
def test_download_twitter_textonly(self, make_item):
|
|
item = make_item("https://x.com/bellingcat/status/1874097816571961839")
|
|
result = self.archiver.download(item)
|
|
assert result is not False |