Merge cc63951a83 into e3b42d8b1b

[ie/facebook] Fix DASH formats extraction (#9734 )
Closes #9720 Authored by: bashonly
2024-04-20 20:43:50 +09:00 · 2024-04-20 10:23:12 +00:00 · 2024-04-18 23:18:56 +00:00 · 2024-04-18 23:11:12 +00:00 · 2024-03-21 19:49:24 +09:00 · 2024-03-21 19:14:13 +09:00
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -254,7 +254,7 @@ jobs:
          # We need to fuse our own universal2 wheels for curl_cffi
          python3 -m pip install -U --user delocate
          mkdir curl_cffi_whls curl_cffi_universal2
-          python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt
+          python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
          for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
            python3 -m pip download \
              --only-binary=:all: \
@ -362,7 +362,7 @@ jobs:
      - name: Install Requirements
        run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
          python devscripts/install_deps.py -o --include build
-          python devscripts/install_deps.py --include py2exe --include curl_cffi
+          python devscripts/install_deps.py --include py2exe --include curl-cffi
          python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"

      - name: Prepare
--- a/README.md
+++ b/README.md
@ -202,7 +202,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
 The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. 

 * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
-  * Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]`
+  * Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
  * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds


--- a/pyproject.toml
+++ b/pyproject.toml
@ -53,7 +53,7 @@ dependencies = [

 [project.optional-dependencies]
 default = []
-curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
+curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
 secretstorage = [
    "cffi",
    "secretstorage",
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -560,7 +560,7 @@ class FacebookIE(InfoExtractor):
                    js_data, lambda x: x['jsmods']['instances'], list) or [])

        def extract_dash_manifest(video, formats):
-            dash_manifest = video.get('dash_manifest')
+            dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@ -1,8 +1,8 @@
 import itertools
+import urllib.parse

 from .common import InfoExtractor
 from .vimeo import VimeoIE
-from ..compat import compat_urllib_parse_unquote
 from ..networking.exceptions import HTTPError
 from ..utils import (
    KNOWN_EXTENSIONS,
@ -14,7 +14,6 @@ from ..utils import (
    parse_iso8601,
    str_or_none,
    traverse_obj,
-    try_get,
    url_or_none,
    urljoin,
 )
@ -199,6 +198,27 @@ class PatreonIE(PatreonBaseIE):
            'channel_id': '2147162',
            'uploader_url': 'https://www.patreon.com/yaboyroshi',
        },
+    }, {
+        # NSFW vimeo embed URL
+        'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
+        'info_dict': {
+            'id': '902250943',
+            'ext': 'mp4',
+            'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
+            'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
+            'uploader': 'Npickyeonhwa',
+            'uploader_id': '90574422',
+            'uploader_url': 'https://www.patreon.com/Yeonhwa726',
+            'channel_id': '10237902',
+            'channel_url': 'https://www.patreon.com/Yeonhwa726',
+            'duration': 70,
+            'timestamp': 1705150153,
+            'upload_date': '20240113',
+            'comment_count': int,
+            'like_count': int,
+            'thumbnail': r're:^https?://.+',
+        },
+        'params': {'skip_download': 'm3u8'},
    }]

    def _real_extract(self, url):
@ -268,16 +288,19 @@ class PatreonIE(PatreonBaseIE):
                })

        # handle Vimeo embeds
-        if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
-            embed_html = try_get(attributes, lambda x: x['embed']['html'])
-            v_url = url_or_none(compat_urllib_parse_unquote(
-                self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
-            if v_url:
-                v_url = VimeoIE._smuggle_referrer(v_url, 'https://patreon.com')
-                if self._request_webpage(v_url, video_id, 'Checking Vimeo embed URL', fatal=False, errnote=False):
-                    return self.url_result(v_url, VimeoIE, url_transparent=True, **info)
+        if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
+            v_url = urllib.parse.unquote(self._html_search_regex(
+                r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
+                traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
+            if url_or_none(v_url) and self._request_webpage(
+                    v_url, video_id, 'Checking Vimeo embed URL',
+                    headers={'Referer': 'https://patreon.com/'},
+                    fatal=False, errnote=False):
+                return self.url_result(
+                    VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
+                    VimeoIE, url_transparent=True, **info)

-        embed_url = try_get(attributes, lambda x: x['embed']['url'])
+        embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
        if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
            return self.url_result(embed_url, **info)

--- a/yt_dlp/extractor/tver.py
+++ b/yt_dlp/extractor/tver.py
@ -21,6 +21,8 @@ class TVerIE(InfoExtractor):
            'episode': '売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
            'alt_title': '売り場席巻のチーズSP＆財前直見×森泉親子の脱東京暮らし密着！',
            'channel': 'テレビ朝日',
+            'id': 'ep83nf3w4p',
+            'ext': 'mp4',
        },
        'add_ie': ['BrightcoveNew'],
    }, {
@ -29,6 +31,9 @@ class TVerIE(InfoExtractor):
    }, {
        'url': 'https://tver.jp/lp/f0033031',
        'only_matching': True,
+    }, {
+        'url': 'https://tver.jp/series/srkq2shp9d',
+        'only_matching': True,
    }]
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
    _PLATFORM_UID = None
@ -45,8 +50,29 @@ class TVerIE(InfoExtractor):
        self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid'))
        self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token'))

+    def _entries(self, series_id):
+        season_json = self._download_json(f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}', series_id, headers={'x-tver-platform-type': 'web'})
+        seasons = traverse_obj(season_json, ('result', 'contents', lambda _, s: s['type'] == 'season', 'content', 'id'), default=[])
+        for season_id in seasons:
+            episode_json = self._download_json(
+                f'https://platform-api.tver.jp/service/api/v1/callSeasonEpisodes/{season_id}',
+                season_id,
+                headers={'x-tver-platform-type': 'web'},
+                query={
+                    'platform_uid': self._PLATFORM_UID,
+                    'platform_token': self._PLATFORM_TOKEN,
+                },
+            )
+            episodes = traverse_obj(episode_json, ('result', 'contents', lambda _, e: e['type'] == 'episode', 'content', 'id'), default=[])
+            for video_id in episodes:
+                yield self.url_result(f'https://tver.jp/episodes/{video_id}', TVerIE, video_id)
+
    def _real_extract(self, url):
        video_id, video_type = self._match_valid_url(url).group('id', 'type')
+
+        if video_type == 'series':
+            return self.playlist_result(self._entries(video_id), video_id)
+
        if video_type not in {'series', 'episodes'}:
            webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
            video_id = self._match_id(self._search_regex(
@ -65,11 +91,13 @@ class TVerIE(InfoExtractor):
        episode_content = traverse_obj(
            episode_info, ('result', 'episode', 'content')) or {}

+        version = str_or_none(episode_content.get('version')) or '5'
        video_info = self._download_json(
            f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
            query={
-                'v': str_or_none(episode_content.get('version')) or '5',
-            }, headers={
+                'v': version,
+            },
+            headers={
                'Origin': 'https://tver.jp',
                'Referer': 'https://tver.jp/',
            })
@ -88,6 +116,21 @@ class TVerIE(InfoExtractor):
        provider = str_or_none(episode_content.get('productionProviderName'))
        onair_label = str_or_none(episode_content.get('broadcastDateLabel'))

+        thumbnails = [
+            {
+                'id': quality,
+                'url': f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg?v={version}',
+                'width': width,
+                'height': height,
+            }
+            for quality, width, height in [
+                ('small', 480, 270),
+                ('medium', 640, 360),
+                ('large', 960, 540),
+                ('xlarge', 1280, 720),
+            ]
+        ]
+
        return {
            '_type': 'url_transparent',
            'title': title,
@ -97,6 +140,7 @@ class TVerIE(InfoExtractor):
            'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
            'channel': provider,
            'description': str_or_none(video_info.get('description')),
+            'thumbnails': thumbnails,
            'url': smuggle_url(
                self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
            'ie_key': 'BrightcoveNew',
Autor	SHA1	Wiadomość	Data
vvto33	bcc649722f	Merge `cc63951a83` into `e3b42d8b1b`	2024-04-20 20:43:50 +09:00
bashonly	e3b42d8b1b	[ie/facebook] Fix DASH formats extraction (#9734 ) Closes #9720 Authored by: bashonly	2024-04-20 10:23:12 +00:00
bashonly	c9ce57d9bf	[ie/patreon] Fix Vimeo embed extraction (#9712 ) Fixes regression in `36b240f9a7` Closes #9709 Authored by: bashonly	2024-04-18 23:18:56 +00:00
bashonly	02483bea1c	[build] Normalize `curl_cffi` group to `curl-cffi` (#9698 ) Closes #9682 Authored by: bashonly	2024-04-18 23:11:12 +00:00
vvto33	cc63951a83	Modify _TEST to pass extractor tests	2024-03-21 19:49:24 +09:00
vvto33	ca02368c30	Support for series URL	2024-03-21 19:14:13 +09:00
vvto33	e42f4a53e8	Add thumbnail URL support For cases where the thumbnail obtained from BrightCove is the broadcaster's default image, this change ensures that the thumbnail hosted by TVer.jp is always used instead.	2024-03-21 15:44:37 +09:00