From d60d02c16ebfded91c9b6d88652de77c1227b1d1 Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Wed, 11 Jun 2025 16:50:31 +0100 Subject: [PATCH] improves download_from_url --- src/auto_archiver/core/extractor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/auto_archiver/core/extractor.py b/src/auto_archiver/core/extractor.py index ca3359d..5dca928 100644 --- a/src/auto_archiver/core/extractor.py +++ b/src/auto_archiver/core/extractor.py @@ -77,6 +77,8 @@ class Extractor(BaseModule): downloads a URL to provided filename, or inferred from URL, returns local filename Warning: if try_best_quality is True, it will return a tuple of (filename, best_quality_url) if the download was successful. """ + if any(url.startswith(x) for x in ["blob:", "data:"]): + return None, url if try_best_quality else None if try_best_quality: with suppress(Exception): @@ -116,6 +118,8 @@ class Extractor(BaseModule): except requests.RequestException as e: logger.warning(f"Failed to fetch the Media URL: {str(e)[:250]}") + if try_best_quality: + return None, url @abstractmethod def download(self, item: Metadata) -> Metadata | False: