Porównaj commity

...

6 Commity

Autor SHA1 Wiadomość Data
lauren n. liberda d630e14312
Merge 1a68959dc4 into e3b42d8b1b 2024-04-20 19:28:52 +02:00
bashonly e3b42d8b1b
[ie/facebook] Fix DASH formats extraction (#9734)
Closes #9720
Authored by: bashonly
2024-04-20 10:23:12 +00:00
bashonly c9ce57d9bf
[ie/patreon] Fix Vimeo embed extraction (#9712)
Fixes regression in 36b240f9a7

Closes #9709
Authored by: bashonly
2024-04-18 23:18:56 +00:00
bashonly 02483bea1c
[build] Normalize `curl_cffi` group to `curl-cffi` (#9698)
Closes #9682
Authored by: bashonly
2024-04-18 23:11:12 +00:00
lauren n. liberda 1a68959dc4
[extractor/polskieradio] cache the channel list 2024-03-16 00:53:22 +01:00
lauren n. liberda 1597bcf660
[extractor/polskieradio] fix live player 2024-03-16 00:20:38 +01:00
6 zmienionych plików z 59 dodań i 33 usunięć

Wyświetl plik

@ -254,7 +254,7 @@ jobs:
# We need to fuse our own universal2 wheels for curl_cffi
python3 -m pip install -U --user delocate
mkdir curl_cffi_whls curl_cffi_universal2
python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
python3 -m pip download \
--only-binary=:all: \
@ -362,7 +362,7 @@ jobs:
- name: Install Requirements
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include py2exe --include curl_cffi
python devscripts/install_deps.py --include py2exe --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
- name: Prepare

Wyświetl plik

@ -202,7 +202,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
* Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]`
* Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds

Wyświetl plik

@ -53,7 +53,7 @@ dependencies = [
[project.optional-dependencies]
default = []
curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
secretstorage = [
"cffi",
"secretstorage",

Wyświetl plik

@ -560,7 +560,7 @@ class FacebookIE(InfoExtractor):
js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats):
dash_manifest = video.get('dash_manifest')
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),

Wyświetl plik

@ -1,8 +1,8 @@
import itertools
import urllib.parse
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..compat import compat_urllib_parse_unquote
from ..networking.exceptions import HTTPError
from ..utils import (
KNOWN_EXTENSIONS,
@ -14,7 +14,6 @@ from ..utils import (
parse_iso8601,
str_or_none,
traverse_obj,
try_get,
url_or_none,
urljoin,
)
@ -199,6 +198,27 @@ class PatreonIE(PatreonBaseIE):
'channel_id': '2147162',
'uploader_url': 'https://www.patreon.com/yaboyroshi',
},
}, {
# NSFW vimeo embed URL
'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
'info_dict': {
'id': '902250943',
'ext': 'mp4',
'title': '❤️(4K) Spiderman Girl Yeonhwas Gift ❤️(4K) 스파이더맨걸 연화의 선물',
'description': '❤️(4K) Spiderman Girl Yeonhwas Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
'uploader': 'Npickyeonhwa',
'uploader_id': '90574422',
'uploader_url': 'https://www.patreon.com/Yeonhwa726',
'channel_id': '10237902',
'channel_url': 'https://www.patreon.com/Yeonhwa726',
'duration': 70,
'timestamp': 1705150153,
'upload_date': '20240113',
'comment_count': int,
'like_count': int,
'thumbnail': r're:^https?://.+',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
@ -268,16 +288,19 @@ class PatreonIE(PatreonBaseIE):
})
# handle Vimeo embeds
if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
embed_html = try_get(attributes, lambda x: x['embed']['html'])
v_url = url_or_none(compat_urllib_parse_unquote(
self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
if v_url:
v_url = VimeoIE._smuggle_referrer(v_url, 'https://patreon.com')
if self._request_webpage(v_url, video_id, 'Checking Vimeo embed URL', fatal=False, errnote=False):
return self.url_result(v_url, VimeoIE, url_transparent=True, **info)
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
v_url = urllib.parse.unquote(self._html_search_regex(
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
if url_or_none(v_url) and self._request_webpage(
v_url, video_id, 'Checking Vimeo embed URL',
headers={'Referer': 'https://patreon.com/'},
fatal=False, errnote=False):
return self.url_result(
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
VimeoIE, url_transparent=True, **info)
embed_url = try_get(attributes, lambda x: x['embed']['url'])
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
return self.url_result(embed_url, **info)

Wyświetl plik

@ -459,7 +459,10 @@ class PolskieRadioPlayerIE(InfoExtractor):
'info_dict': {
'id': '3',
'ext': 'm4a',
'title': 'Trójka',
'title': r're:Trójka \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'thumbnail': 'https://player.polskieradio.pl/images/trojka-color-logo.png',
'live_status': 'is_live',
'display_id': 'trojka',
},
'params': {
'format': 'bestaudio',
@ -468,12 +471,20 @@ class PolskieRadioPlayerIE(InfoExtractor):
}]
def _get_channel_list(self, channel_url='no_channel'):
webpage = self._download_webpage(self._BASE_URL, channel_url)
player_hash = self._search_regex(r'/main\.bundle\.js\?([a-f0-9]+)', webpage, 'player hash')
channel_list = self.cache.load('polskieradio-player-channel-list', player_hash)
if channel_list:
return channel_list
player_code = self._download_webpage(
self._PLAYER_URL, channel_url,
note='Downloading js player')
channel_list = js_to_json(self._search_regex(
r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list'))
return self._parse_json(channel_list, channel_url)
channel_list = self._search_json(
r''';\s*var\s[a-zA-Z_]+\s*=\s*["']anteny["']\s*,\s*[a-zA-Z_]+\s*=\s*''',
player_code, 'channel list', channel_url, transform_source=js_to_json,
contains_pattern=r'\[{(?s:.+)}\]')
self.cache.store('polskieradio-player-channel-list', player_hash, channel_list)
return channel_list
def _real_extract(self, url):
channel_url = self._match_id(url)
@ -496,19 +507,11 @@ class PolskieRadioPlayerIE(InfoExtractor):
if not station:
raise ExtractorError('Station not found even though we extracted channel')
formats = []
for stream_url in station['Streams']:
stream_url = self._proto_relative_url(stream_url)
if stream_url.endswith('/playlist.m3u8'):
formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True))
elif stream_url.endswith('/manifest.f4m'):
formats.extend(self._extract_mpd_formats(stream_url, channel_url))
elif stream_url.endswith('/Manifest'):
formats.extend(self._extract_ism_formats(stream_url, channel_url))
else:
formats.append({
'url': stream_url,
})
formats = self._extract_m3u8_formats(
next((
stream_url.replace('http:', 'https:') for stream_url in station['Streams'] if stream_url.endswith('.m3u8')
)),
channel_url, live=True)
return {
'id': compat_str(channel['id']),