generalizes ydl info to filename method for reusing

pull/313/head
msramalho 2025-06-07 18:14:08 +01:00
rodzic c0be41950d
commit c7a84bc97a
Nie znaleziono w bazie danych klucza dla tego podpisu
2 zmienionych plików z 27 dodań i 16 usunięć

Wyświetl plik

@ -20,6 +20,7 @@ from loguru import logger
from auto_archiver.core.extractor import Extractor
from auto_archiver.core import Metadata, Media
from auto_archiver.utils import get_datetime_from_str
from auto_archiver.utils.misc import ydl_entry_to_filename
from .dropin import GenericDropin
@ -382,27 +383,13 @@ class GenericExtractor(Extractor):
entries = [data]
result = Metadata()
def _helper_get_filename(entry: dict) -> str:
entry_url = entry.get("url")
filename = ydl.prepare_filename(entry)
base_filename, _ = os.path.splitext(filename) # '/get/path/to/file' ignore '.ext'
directory = os.path.dirname(base_filename) # '/get/path/to'
basename = os.path.basename(base_filename) # 'file'
for f in os.listdir(directory):
if (
f.startswith(basename)
or (entry_url and os.path.splitext(f)[0] in entry_url)
and "video/" in (mimetypes.guess_type(f)[0] or "")
):
return os.path.join(directory, f)
return False
for entry in entries:
try:
filename = _helper_get_filename(entry)
filename = ydl_entry_to_filename(ydl, entry)
if not filename or not os.path.exists(filename):
if not filename:
# file was not downloaded or could not be retrieved, example: sensitive videos on YT without using cookies.
continue

Wyświetl plik

@ -1,5 +1,6 @@
import hashlib
import json
import mimetypes
import os
import uuid
from datetime import datetime, timezone
@ -116,3 +117,26 @@ def get_timestamp(ts, utc=True, iso=True, dayfirst=True) -> str | datetime | Non
def get_current_timestamp() -> str:
return get_timestamp(datetime.now())
def ydl_entry_to_filename(ydl, entry: dict) -> str:
import yt_dlp
ydl: yt_dlp.YoutubeDL
entry_url = entry.get("url")
filename = ydl.prepare_filename(entry)
if os.path.exists(filename):
return filename
base_filename, _ = os.path.splitext(filename) # '/get/path/to/file' ignore '.ext'
directory = os.path.dirname(base_filename) # '/get/path/to'
basename = os.path.basename(base_filename) # 'file'
for f in os.listdir(directory):
if (
f.startswith(basename)
or (entry_url and os.path.splitext(f)[0] in entry_url)
and "video/" in (mimetypes.guess_type(f)[0] or "")
):
return os.path.join(directory, f)
return False