adds proxy/end support and bug fix for yt-dlp

pull/129/head
msramalho 2024-02-20 12:14:47 +00:00
rodzic 80937c2388
commit e0a5c60dbd
1 zmienionych plików z 25 dodań i 18 usunięć

Wyświetl plik

@ -14,6 +14,7 @@ class YoutubeDLArchiver(Archiver):
self.comments = bool(self.comments)
self.livestreams = bool(self.livestreams)
self.live_from_start = bool(self.live_from_start)
self.end_means_success = bool(self.end_means_success)
@staticmethod
def configs() -> dict:
@ -23,17 +24,18 @@ class YoutubeDLArchiver(Archiver):
"comments": {"default": False, "help": "download all comments if available, may lead to large metadata"},
"livestreams": {"default": False, "help": "if set, will download live streams, otherwise will skip them; see --max-filesize for more control"},
"live_from_start": {"default": False, "help": "if set, will download live streams from their earliest available moment, otherwise starts now."},
"proxy": {"default": "", "help": "http/socks (https seems to not work atm) proxy to use for the webdriver, eg https://proxy-user:password@proxy-ip:port"},
"end_means_success": {"default": True, "help": "if True, any archived content will mean a 'success', if False this archiver will not return a 'success' stage; this is useful for cases when the yt-dlp will archive a video but ignore other types of content like images or text only pages that the subsequent archivers can retrieve."},
}
def download(self, item: Metadata) -> Metadata:
#TODO: yt-dlp for transcripts?
url = item.get_url()
if item.netloc in ['facebook.com', 'www.facebook.com'] and self.facebook_cookie:
logger.debug('Using Facebook cookie')
yt_dlp.utils.std_headers['cookie'] = self.facebook_cookie
ydl_options = {'outtmpl': os.path.join(ArchivingContext.get_tmp_dir(), f'%(id)s.%(ext)s'), 'quiet': False, 'noplaylist': True, 'writesubtitles': self.subtitles, 'writeautomaticsub': self.subtitles, "live_from_start": self.live_from_start}
ydl_options = {'outtmpl': os.path.join(ArchivingContext.get_tmp_dir(), f'%(id)s.%(ext)s'), 'quiet': False, 'noplaylist': True, 'writesubtitles': self.subtitles, 'writeautomaticsub': self.subtitles, "live_from_start": self.live_from_start, "proxy": self.proxy}
ydl = yt_dlp.YoutubeDL(ydl_options) # allsubtitles and subtitleslangs not working as expected, so default lang is always "en"
try:
@ -63,21 +65,24 @@ class YoutubeDLArchiver(Archiver):
result = Metadata()
result.set_title(info.get("title"))
for entry in entries:
filename = ydl.prepare_filename(entry)
if not os.path.exists(filename):
filename = filename.split('.')[0] + '.mkv'
new_media = Media(filename).set("duration", info.get("duration"))
# read text from subtitles if enabled
if self.subtitles:
for lang, val in info.get('requested_subtitles', {}).items():
try:
subs = pysubs2.load(val.get('filepath'), encoding="utf-8")
text = " ".join([line.text for line in subs])
new_media.set(f"subtitles_{lang}", text)
except Exception as e:
logger.error(f"Error loading subtitle file {val.get('filepath')}: {e}")
result.add_media(new_media)
try:
filename = ydl.prepare_filename(entry)
if not os.path.exists(filename):
filename = filename.split('.')[0] + '.mkv'
new_media = Media(filename).set("duration", info.get("duration"))
# read text from subtitles if enabled
if self.subtitles:
for lang, val in (info.get('requested_subtitles') or {}).items():
try:
subs = pysubs2.load(val.get('filepath'), encoding="utf-8")
text = " ".join([line.text for line in subs])
new_media.set(f"subtitles_{lang}", text)
except Exception as e:
logger.error(f"Error loading subtitle file {val.get('filepath')}: {e}")
result.add_media(new_media)
except Exception as e:
logger.error(f"Error processing entry {entry}: {e}")
# extract comments if enabled
if self.comments:
@ -94,4 +99,6 @@ class YoutubeDLArchiver(Archiver):
upload_date = datetime.datetime.strptime(upload_date, '%Y%m%d').replace(tzinfo=datetime.timezone.utc)
result.set("upload_date", upload_date)
return result.success("yt-dlp")
if self.end_means_success: result.success("yt-dlp")
else: result.status = "yt-dlp"
return result