diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 71bdbf2..dae9381 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -4,6 +4,7 @@ import datetime import os import importlib import subprocess +import traceback import zipfile from typing import Generator, Type @@ -305,7 +306,7 @@ class GenericExtractor(Extractor): result.set_url(url) if "description" in video_data and not result.get("content"): - result.set_content(video_data.get("description")) + result.set_content(video_data.pop("description")) # extract comments if enabled if self.comments and video_data.get("comments", None) is not None: result.set( @@ -406,9 +407,9 @@ class GenericExtractor(Extractor): logger.error(f"Error loading subtitle file {val.get('filepath')}: {e}") result.add_media(new_media) except Exception as e: - logger.error(f"Error processing entry {entry}: {e}") + logger.error(f"Error processing entry {str(entry)[:256]}: {e} {traceback.format_exc()}") if not len(result.media): - logger.info(f"No media found for entry {entry}, skipping.") + logger.info(f"No media found for entry {str(entry)[:256]}, skipping.") return False return self.add_metadata(data, info_extractor, url, result) @@ -604,9 +605,9 @@ class GenericExtractor(Extractor): validated_options ) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en" + result: Metadata = None for info_extractor in self.suitable_extractors(url): - result = self.download_for_extractor(info_extractor, url, ydl) - if result: - return result - - return False + local_result: Metadata = self.download_for_extractor(info_extractor, url, ydl) + if local_result: + result = result.merge(local_result) if result else local_result + return result if result else False diff --git a/tests/extractors/test_generic_extractor.py b/tests/extractors/test_generic_extractor.py index fe3baf5..e5d2f3a 100644 --- a/tests/extractors/test_generic_extractor.py +++ b/tests/extractors/test_generic_extractor.py @@ -48,8 +48,6 @@ class TestGenericExtractor(TestExtractorBase): ("https://www.youtube.com/watch?v=5qap5aO4i9A", ["youtube"]), ("https://www.tiktok.com/@funnycats0ftiktok/video/7345101300750748970?lang=en", ["tiktok"]), ("https://www.instagram.com/p/CU1J9JYJ9Zz/", ["instagram"]), - ("https://www.facebook.com/nytimes/videos/10160796550110716", ["facebook"]), - ("https://www.facebook.com/BylineFest/photos/t.100057299682816/927879487315946/", ["facebook"]), ], ) def test_suitable_extractors(self, url, suitable_extractors): @@ -148,6 +146,7 @@ class TestGenericExtractor(TestExtractorBase): def test_bluesky_download_video(self, make_item): item = make_item("https://bsky.app/profile/bellingcat.com/post/3le2l4gsxlk2i") result = self.extractor.download(item) + assert result.get_url() == "https://bsky.app/profile/bellingcat.com/post/3le2l4gsxlk2i" assert result is not False @pytest.mark.skipif(not TEST_TRUTH_SOCIAL, reason="Truth social download tests disabled in environment variables.")