From 2921061fde756cf9f04a5e994546c6afdd386770 Mon Sep 17 00:00:00 2001 From: erinhmclark Date: Wed, 19 Mar 2025 19:19:28 +0000 Subject: [PATCH] Add flexible extractor_args to generic_extractor.py --- .../modules/generic_extractor/__manifest__.py | 5 ++++ .../generic_extractor/generic_extractor.py | 26 ++++++++++++++----- tests/test_modules.py | 2 +- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/__manifest__.py b/src/auto_archiver/modules/generic_extractor/__manifest__.py index 128b006..9ef1cb3 100644 --- a/src/auto_archiver/modules/generic_extractor/__manifest__.py +++ b/src/auto_archiver/modules/generic_extractor/__manifest__.py @@ -74,6 +74,11 @@ If you are having issues with the extractor, you can review the version of `yt-d "default": "inf", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", }, + "extractor_args": { + "default": {}, + "help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.", + "type": "json_loader", + }, "ytdlp_update_interval": { "default": 5, "help": "How often to check for yt-dlp updates (days). If positive, will check and update yt-dlp every [num] days. Set it to -1 to disable, or 0 to always update on every run.", diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index 6a9e28f..c2bf054 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -422,16 +422,20 @@ class GenericExtractor(Extractor): "--write-subs" if self.subtitles else "--no-write-subs", "--write-auto-subs" if self.subtitles else "--no-write-auto-subs", "--live-from-start" if self.live_from_start else "--no-live-from-start", - "--proxy", - self.proxy if self.proxy else "", - f"--max-downloads {self.max_downloads}" if self.max_downloads != "inf" else "", - f"--playlist-end {self.max_downloads}" if self.max_downloads != "inf" else "", ] + # proxy handling + if self.proxy: + ydl_options.extend(["--proxy", self.proxy]) + + # max_downloads handling + if self.max_downloads != "inf": + ydl_options.extend(["--max-downloads", str(self.max_downloads)]) + ydl_options.extend(["--playlist-end", str(self.max_downloads)]) + # set up auth auth = self.auth_for_site(url, extract_cookies=False) - - # order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file + # order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file if auth: if "username" in auth and "password" in auth: logger.debug(f"Using provided auth username and password for {url}") @@ -447,6 +451,16 @@ class GenericExtractor(Extractor): logger.debug(f"Using cookies from file {auth['cookies_file']} for {url}") ydl_options.extend(("--cookies", auth["cookies_file"])) + # Applying user-defined extractor_args + if self.extractor_args: + for key, args in self.extractor_args.items(): + logger.debug(f"Setting extractor_args: {key}") + if isinstance(args, dict): + arg_str = ";".join(f"{k}={v}" for k, v in args.items()) + else: + arg_str = str(args) + ydl_options.extend(["--extractor-args", f"{key}:{arg_str}"]) + if self.ytdlp_args: logger.debug("Adding additional ytdlp arguments: {self.ytdlp_args}") ydl_options += self.ytdlp_args.split(" ") diff --git a/tests/test_modules.py b/tests/test_modules.py index f672ca6..248e16d 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -82,7 +82,7 @@ def test_load_modules(module_name): default_config = module.configs assert loaded_module.name in loaded_module.config.keys() defaults = {k: v.get("default") for k, v in default_config.items()} - assert loaded_module.config[module_name] == defaults + assert defaults.keys() in [loaded_module.config[module_name].keys()] @pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])