Allow flexible extractor_args in generic_extractor.py.

pull/222/head
erinhmclark 2025-02-24 11:40:44 +00:00
rodzic 0eae2bee6a
commit dd07b0b830
2 zmienionych plików z 64 dodań i 16 usunięć

Wyświetl plik

@ -64,5 +64,10 @@ via the command line using the `--dropins` option (TODO!).
"default": "inf", "default": "inf",
"help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.", "help": "Use to limit the number of videos to download when a channel or long page is being extracted. 'inf' means no limit.",
}, },
"extractor_args": {
"default": {},
"help": "Additional arguments to pass to the yt-dlp extractor. See https://github.com/yt-dlp/yt-dlp/blob/master/README.md#extractor-arguments.",
"type": "json_loader",
},
}, },
} }

Wyświetl plik

@ -170,8 +170,8 @@ class GenericExtractor(Extractor):
logger.error(f"Error processing entry {entry}: {e}") logger.error(f"Error processing entry {entry}: {e}")
return self.add_metadata(data, info_extractor, url, result) return self.add_metadata(data, info_extractor, url, result)
def dropin_for_name(self, dropin_name: str, additional_paths = [], package=__package__) -> Type[InfoExtractor]: def dropin_for_name(self, dropin_name: str, additional_paths=[], package=__package__) -> Type[InfoExtractor]:
dropin_name = dropin_name.lower() dropin_name = dropin_name.lower()
if dropin_name == "generic": if dropin_name == "generic":
@ -179,6 +179,7 @@ class GenericExtractor(Extractor):
return None return None
dropin_class_name = dropin_name.title() dropin_class_name = dropin_name.title()
def _load_dropin(dropin): def _load_dropin(dropin):
dropin_class = getattr(dropin, dropin_class_name)() dropin_class = getattr(dropin, dropin_class_name)()
return self._dropins.setdefault(dropin_name, dropin_class) return self._dropins.setdefault(dropin_name, dropin_class)
@ -202,7 +203,7 @@ class GenericExtractor(Extractor):
return _load_dropin(dropin) return _load_dropin(dropin)
except (FileNotFoundError, ModuleNotFoundError): except (FileNotFoundError, ModuleNotFoundError):
pass pass
# fallback to loading the dropins within auto-archiver # fallback to loading the dropins within auto-archiver
try: try:
return _load_dropin(importlib.import_module(f".{dropin_name}", package=package)) return _load_dropin(importlib.import_module(f".{dropin_name}", package=package))
@ -241,7 +242,8 @@ class GenericExtractor(Extractor):
# don't clutter the logs with issues about the 'generic' extractor not having a dropin # don't clutter the logs with issues about the 'generic' extractor not having a dropin
return False return False
logger.debug(f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead') logger.debug(
f'Issue using "{info_extractor.IE_NAME}" extractor to download video (error: {repr(e)}), attempting to use extractor to get post data instead')
try: try:
result = self.get_metadata_for_post(info_extractor, url, ydl) result = self.get_metadata_for_post(info_extractor, url, ydl)
except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e: except (yt_dlp.utils.DownloadError, yt_dlp.utils.ExtractorError) as post_e:
@ -273,15 +275,22 @@ class GenericExtractor(Extractor):
ydl_options = {'outtmpl': os.path.join(self.tmp_dir, f'%(id)s.%(ext)s'), ydl_options = {'outtmpl': os.path.join(self.tmp_dir, f'%(id)s.%(ext)s'),
'quiet': False, 'noplaylist': not self.allow_playlist , 'quiet': False,
'writesubtitles': self.subtitles,'writeautomaticsub': self.subtitles, 'noplaylist': not self.allow_playlist ,
"live_from_start": self.live_from_start, "proxy": self.proxy, 'writesubtitles': self.subtitles,
"max_downloads": self.max_downloads, "playlistend": self.max_downloads} 'writeautomaticsub': self.subtitles,
"live_from_start": self.live_from_start,
# set up auth "proxy": self.proxy,
auth = self.auth_for_site(url, extract_cookies=False) "max_downloads": self.max_downloads,
"playlistend": self.max_downloads,
# TODO
# "verbose": True,
# "print_traffic": True,
}
# order of importance: username/pasword -> api_key -> cookie -> cookies_from_browser -> cookies_file # Set up auth
auth = self.auth_for_site(url, extract_cookies=False)
# order of importance: username/password -> api_key -> cookie -> cookies_from_browser -> cookies_file
if auth: if auth:
if 'username' in auth and 'password' in auth: if 'username' in auth and 'password' in auth:
logger.debug(f'Using provided auth username and password for {url}') logger.debug(f'Using provided auth username and password for {url}')
@ -297,12 +306,46 @@ class GenericExtractor(Extractor):
logger.debug(f'Using cookies from file {auth["cookies_file"]} for {url}') logger.debug(f'Using cookies from file {auth["cookies_file"]} for {url}')
ydl_options['cookiefile'] = auth['cookies_file'] ydl_options['cookiefile'] = auth['cookies_file']
# Applying user-defined extractor_args
if self.extractor_args:
logger.info(f"Applying user-defined extractor_args")
ydl_options.setdefault('extractor_args', {})
for key, args in self.extractor_args.items():
logger.debug(f"Setting extractor_args: {key}")
if isinstance(args, dict):
# Site specific arguments (e.g., youtube: somekey=value)
ydl_options['extractor_args'].setdefault(key, {}).update(args)
else:
# General extractor_args (e.g., somekey=value)
ydl_options['extractor_args'][key] = args
ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en" ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en"
for info_extractor in self.suitable_extractors(url): for info_extractor in self.suitable_extractors(url):
result = self.download_for_extractor(info_extractor, url, ydl) try:
if result: result = self.download_for_extractor(info_extractor, url, ydl)
return result if result:
return result
except yt_dlp.utils.ExtractorError as e:
# TODO Does this catch empty/ incomplete failures?
if self.extractor_args:
logger.warning(
f"Extraction with custom extractor_args failed for {url}. Retrying without extractor_args...")
# Remove extractor_args and try without
del ydl_options['extractor_args']
ydl = yt_dlp.YoutubeDL(ydl_options)
try:
result = self.download_for_extractor(info_extractor, url, ydl)
if result:
return result
except Exception as retry_error:
logger.error(f"Extraction failed for {url} after retrying: {retry_error}")
return False
else:
logger.error(f"Extraction failed for {url}: {e}")
return False
return False return False