From ac24fd8f49beba64836a124eba8c1a0939fe4a0c Mon Sep 17 00:00:00 2001 From: msramalho <19508417+msramalho@users.noreply.github.com> Date: Mon, 2 Jun 2025 12:03:51 +0100 Subject: [PATCH] improves generic extractor edge-cases and yt-dlp updates --- .../modules/generic_extractor/generic_extractor.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/auto_archiver/modules/generic_extractor/generic_extractor.py b/src/auto_archiver/modules/generic_extractor/generic_extractor.py index e56167a..1a4a0f1 100644 --- a/src/auto_archiver/modules/generic_extractor/generic_extractor.py +++ b/src/auto_archiver/modules/generic_extractor/generic_extractor.py @@ -156,7 +156,7 @@ class GenericExtractor(Extractor): logger.error("generate_once.js not found after transpilation.") return - self.extractor_args.setdefault("youtube", {})["getpot_bgutil_script"] = script_path + self.extractor_args.setdefault("youtubepot-bgutilscript", {})["script_path"] = script_path logger.info(f"PO Token script configured at: {script_path}") except Exception as e: @@ -301,7 +301,7 @@ class GenericExtractor(Extractor): result.set_url(url) if "description" in video_data and not result.get("content"): - result.set_content(video_data["description"]) + result.set_content(video_data.pop("description")) # extract comments if enabled if self.comments: result.set( @@ -370,7 +370,6 @@ class GenericExtractor(Extractor): return False else: entries = [data] - result = Metadata() for entry in entries: @@ -379,6 +378,10 @@ class GenericExtractor(Extractor): if not os.path.exists(filename): filename = filename.split(".")[0] + ".mkv" + if not os.path.exists(filename): + logger.warning(f"File {filename} does not exist (see yt-dlp logs), skipping this entry.") + continue + new_media = Media(filename) for x in ["duration", "original_url", "fulltitle", "description", "upload_date"]: if x in entry: @@ -396,6 +399,9 @@ class GenericExtractor(Extractor): result.add_media(new_media) except Exception as e: logger.error(f"Error processing entry {entry}: {e}") + if not len(result.media): + logger.warning(f"No media found for entry {entry}, skipping.") + return False return self.add_metadata(data, info_extractor, url, result)