removes exclude_media_extensions option

This commit is contained in:
msramalho
2025-06-10 18:34:33 +01:00
parent 6279610a43
commit 8314833ae8
4 changed files with 3 additions and 14 deletions

View File

@@ -17,10 +17,6 @@
"default": 50,
"help": "maximum number of videos to download from the page (0 = no download, inf = no limit).",
},
"exclude_media_extensions": {
"default": ".svg,.ico,.gif",
"help": "CSV of media (image/video) file extensions to exclude from download",
},
"user_data_dir": {
"default": "secrets/antibot_user_data",
"help": "Path to the user data directory for the webdriver. This is used to persist browser state, such as cookies and local storage. If you use the docker deployment, this path will be appended with `_docker` that is because the folder cannot be shared between the host and the container due to user permissions.",

View File

@@ -1,6 +1,5 @@
import base64
import math
import mimetypes
import os
import sys
import traceback
@@ -26,10 +25,6 @@ class AntibotExtractorEnricher(Extractor, Enricher):
self.agent = None # Use the default UserAgent
# parse configuration options
self.exclude_media_mimetypes = set(
[mimetypes.guess_type(f"file{m}")[0] for m in self.exclude_media_extensions.split(",")]
) - {None}
if self.max_download_images == "inf":
self.max_download_images = math.inf
else:
@@ -292,9 +287,6 @@ class AntibotExtractorEnricher(Extractor, Enricher):
break
if not is_relevant_url(src):
continue
mimetype = mimetypes.guess_type(src)[0]
if mimetype in self.exclude_media_mimetypes:
continue
full_src = urljoin(url, src)
if full_src not in all_urls:
filename, full_src = self.download_from_url(full_src, try_best_quality=True)

View File

@@ -111,12 +111,14 @@ def is_relevant_url(url: str) -> bool:
("emoji.redditmedia.com",),
]
# TODO: make these globally configurable
IRRELEVANT_ENDS_WITH = [
".svg", # ignore SVGs
".ico", # ignore icons
# ignore index files for videos, these should be handled by ytdlp
".m3u8",
".mpd",
".ism", # ignore index files for videos, these should be handled by ytdlp
".ism",
]
for end in IRRELEVANT_ENDS_WITH:

View File

@@ -34,7 +34,6 @@ class TestAntibotExtractorEnricher(TestExtractorBase):
"save_to_pdf": False,
"max_download_images": 0,
"max_download_videos": 0,
"exclude_media_extensions": ".svg,.ico,.gif",
"proxy": None,
}