kopia lustrzana https://github.com/bellingcat/auto-archiver
Add flexible extractor_args to generic_extractor.py
rodzic
90932a7bc8
commit
2921061fde
|
@ -74,6 +74,11 @@ If you are having issues with the extractor, you can review the version of `yt-d
|
|||
"default": "inf",
|
||||
"help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.",
|
||||
},
|
||||
"extractor_args": {
|
||||
"default": {},
|
||||
"help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.",
|
||||
"type": "json_loader",
|
||||
},
|
||||
"ytdlp_update_interval": {
|
||||
"default": 5,
|
||||
"help": "How often to check for yt-dlp updates (days). If positive, will check and update yt-dlp every [num] days. Set it to -1 to disable, or 0 to always update on every run.",
|
||||
|
|
|
@ -422,16 +422,20 @@ class GenericExtractor(Extractor):
|
|||
"--write-subs" if self.subtitles else "--no-write-subs",
|
||||
"--write-auto-subs" if self.subtitles else "--no-write-auto-subs",
|
||||
"--live-from-start" if self.live_from_start else "--no-live-from-start",
|
||||
"--proxy",
|
||||
self.proxy if self.proxy else "",
|
||||
f"--max-downloads {self.max_downloads}" if self.max_downloads != "inf" else "",
|
||||
f"--playlist-end {self.max_downloads}" if self.max_downloads != "inf" else "",
|
||||
]
|
||||
|
||||
# proxy handling
|
||||
if self.proxy:
|
||||
ydl_options.extend(["--proxy", self.proxy])
|
||||
|
||||
# max_downloads handling
|
||||
if self.max_downloads != "inf":
|
||||
ydl_options.extend(["--max-downloads", str(self.max_downloads)])
|
||||
ydl_options.extend(["--playlist-end", str(self.max_downloads)])
|
||||
|
||||
# set up auth
|
||||
auth = self.auth_for_site(url, extract_cookies=False)
|
||||
|
||||
# order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file
|
||||
# order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file
|
||||
if auth:
|
||||
if "username" in auth and "password" in auth:
|
||||
logger.debug(f"Using provided auth username and password for {url}")
|
||||
|
@ -447,6 +451,16 @@ class GenericExtractor(Extractor):
|
|||
logger.debug(f"Using cookies from file {auth['cookies_file']} for {url}")
|
||||
ydl_options.extend(("--cookies", auth["cookies_file"]))
|
||||
|
||||
# Applying user-defined extractor_args
|
||||
if self.extractor_args:
|
||||
for key, args in self.extractor_args.items():
|
||||
logger.debug(f"Setting extractor_args: {key}")
|
||||
if isinstance(args, dict):
|
||||
arg_str = ";".join(f"{k}={v}" for k, v in args.items())
|
||||
else:
|
||||
arg_str = str(args)
|
||||
ydl_options.extend(["--extractor-args", f"{key}:{arg_str}"])
|
||||
|
||||
if self.ytdlp_args:
|
||||
logger.debug("Adding additional ytdlp arguments: {self.ytdlp_args}")
|
||||
ydl_options += self.ytdlp_args.split(" ")
|
||||
|
|
|
@ -82,7 +82,7 @@ def test_load_modules(module_name):
|
|||
default_config = module.configs
|
||||
assert loaded_module.name in loaded_module.config.keys()
|
||||
defaults = {k: v.get("default") for k, v in default_config.items()}
|
||||
assert loaded_module.config[module_name] == defaults
|
||||
assert defaults.keys() in [loaded_module.config[module_name].keys()]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("module_name", ["local_storage", "generic_extractor", "html_formatter", "csv_db"])
|
||||
|
|
Ładowanie…
Reference in New Issue