generic_extractor config to use proxy only when needed to avoid overzealousness

This commit is contained in:
msramalho
2025-07-05 16:54:58 +01:00
parent 52ed8196a5
commit a8c1ef3912
2 changed files with 20 additions and 2 deletions

View File

@@ -60,6 +60,10 @@ If you are having issues with the extractor, you can review the version of `yt-d
"default": "",
"help": "http/https/socks proxy to use for the webdriver, eg https://proxy-user:password@proxy-ip:port",
},
"proxy_on_failure_only": {
"default": True,
"help": "Applies only if a proxy is set. In that case if this setting is True, the extractor will only use the proxy if the initial request fails; if it is False, the extractor will always use the proxy.",
},
"end_means_success": {
"default": True,
"help": "if True, any archived content will mean a 'success', if False this extractor will not return a 'success' stage; this is useful for cases when the yt-dlp will archive a video but ignore other types of content like images or text only pages that the subsequent extractors can retrieve.",

View File

@@ -502,6 +502,9 @@ class GenericExtractor(Extractor):
try:
result = self.get_metadata_for_post(info_extractor, url, ydl)
except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e:
if "NSFW tweet requires authentication." in str(post_e):
logger.warning(str(post_e))
return False
logger.error("Error downloading metadata for post: {error}", error=str(post_e))
return False
except Exception as generic_e:
@@ -525,13 +528,24 @@ class GenericExtractor(Extractor):
return result
def download(self, item: Metadata) -> Metadata:
def download(self, item: Metadata, skip_proxy: bool = False) -> Metadata:
url = item.get_url()
# TODO: this is a temporary hack until this issue is closed: https://github.com/yt-dlp/yt-dlp/issues/11025
if url.startswith("https://ya.ru"):
url = url.replace("https://ya.ru", "https://yandex.ru")
item.set("replaced_url", url)
logger.debug(f"{skip_proxy=}, {self.proxy_on_failure_only=}, {self.proxy=}")
# proxy_on_failure_only logic
if self.proxy and self.proxy_on_failure_only and not skip_proxy:
# when proxy_on_failure_only is True, we first try to download without a proxy and only continue with execution if that fails
try:
if without_proxy := self.download(item, skip_proxy=True):
logger.info("Downloaded successfully without proxy.")
return without_proxy
except Exception:
logger.debug("Download without proxy failed, trying with proxy...")
ydl_options = [
"-o",
@@ -546,7 +560,7 @@ class GenericExtractor(Extractor):
]
# proxy handling
if self.proxy:
if self.proxy and not skip_proxy:
ydl_options.extend(["--proxy", self.proxy])
# max_downloads handling