mirror of
https://github.com/bellingcat/auto-archiver.git
synced 2026-06-08 03:18:28 +03:00
Merge pull request #262 from bellingcat/generic_extractor_args
Add flexible extractor_args to generic_extractor.py This allows users to pass any of the options listed [here](https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments) to yt-dlp extractor_args. example usage: ``` generic_extractor: facebook_cookie: ... extractor_args: youtube: player_client: web,tv generic: is_live: true ```
This commit is contained in:
@@ -74,6 +74,11 @@ If you are having issues with the extractor, you can review the version of `yt-d
|
||||
"default": "inf",
|
||||
"help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.",
|
||||
},
|
||||
"extractor_args": {
|
||||
"default": {},
|
||||
"help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.",
|
||||
"type": "json_loader",
|
||||
},
|
||||
"ytdlp_update_interval": {
|
||||
"default": 5,
|
||||
"help": "How often to check for yt-dlp updates (days). If positive, will check and update yt-dlp every [num] days. Set it to -1 to disable, or 0 to always update on every run.",
|
||||
|
||||
@@ -422,16 +422,20 @@ class GenericExtractor(Extractor):
|
||||
"--write-subs" if self.subtitles else "--no-write-subs",
|
||||
"--write-auto-subs" if self.subtitles else "--no-write-auto-subs",
|
||||
"--live-from-start" if self.live_from_start else "--no-live-from-start",
|
||||
"--proxy",
|
||||
self.proxy if self.proxy else "",
|
||||
f"--max-downloads {self.max_downloads}" if self.max_downloads != "inf" else "",
|
||||
f"--playlist-end {self.max_downloads}" if self.max_downloads != "inf" else "",
|
||||
]
|
||||
|
||||
# proxy handling
|
||||
if self.proxy:
|
||||
ydl_options.extend(["--proxy", self.proxy])
|
||||
|
||||
# max_downloads handling
|
||||
if self.max_downloads != "inf":
|
||||
ydl_options.extend(["--max-downloads", str(self.max_downloads)])
|
||||
ydl_options.extend(["--playlist-end", str(self.max_downloads)])
|
||||
|
||||
# set up auth
|
||||
auth = self.auth_for_site(url, extract_cookies=False)
|
||||
|
||||
# order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file
|
||||
# order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file
|
||||
if auth:
|
||||
if "username" in auth and "password" in auth:
|
||||
logger.debug(f"Using provided auth username and password for {url}")
|
||||
@@ -447,6 +451,16 @@ class GenericExtractor(Extractor):
|
||||
logger.debug(f"Using cookies from file {auth['cookies_file']} for {url}")
|
||||
ydl_options.extend(("--cookies", auth["cookies_file"]))
|
||||
|
||||
# Applying user-defined extractor_args
|
||||
if self.extractor_args:
|
||||
for key, args in self.extractor_args.items():
|
||||
logger.debug(f"Setting extractor_args: {key}")
|
||||
if isinstance(args, dict):
|
||||
arg_str = ";".join(f"{k}={v}" for k, v in args.items())
|
||||
else:
|
||||
arg_str = str(args)
|
||||
ydl_options.extend(["--extractor-args", f"{key}:{arg_str}"])
|
||||
|
||||
if self.ytdlp_args:
|
||||
logger.debug("Adding additional ytdlp arguments: {self.ytdlp_args}")
|
||||
ydl_options += self.ytdlp_args.split(" ")
|
||||
|
||||
@@ -81,8 +81,20 @@ def test_load_modules(module_name):
|
||||
# check that default settings are applied
|
||||
default_config = module.configs
|
||||
assert loaded_module.name in loaded_module.config.keys()
|
||||
defaults = {k for k in default_config}
|
||||
assert defaults in [loaded_module.config[module_name].keys()]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
||||
def test_config_defaults(module_name):
|
||||
# test the values of the default config values are set
|
||||
# Note: some modules can alter values in the setup() method, this test checks cases that don't
|
||||
module = ModuleFactory().get_module_lazy(module_name)
|
||||
loaded_module = module.load({})
|
||||
# check that default config values are set
|
||||
default_config = module.configs
|
||||
defaults = {k: v.get("default") for k, v in default_config.items()}
|
||||
assert loaded_module.config[module_name] == defaults
|
||||
assert defaults == loaded_module.config[module_name]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
||||
|
||||
Reference in New Issue
Block a user