different solution for traversal issues

flake8 check
Making the parse_model function, address comments
2024-04-21 18:17:52 -07:00 · 2024-04-21 16:41:19 -07:00 · 2024-04-21 16:22:46 -07:00 · 2024-04-19 10:50:22 -07:00 · 2024-04-19 10:06:59 -07:00 · 2024-04-19 10:04:05 -07:00
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@ -798,9 +798,11 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
            'id': 'p0hj0lq7',
            'ext': 'mp4',
            'title': 'Nasser Hospital doctor describes his treatment by IDF',
-            'description': 'Doctor Abu Sabha said he was detained by Israeli forces after the raid on Nasser Hospital and feared for his life.\n\nThe IDF said "during the activity, about 200 terrorists and suspects of terrorist activity were detained, including some who posed as medical teams, many weapons were found, as well as closed medicines intended for Israeli hostages."',
+            'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
            'thumbnail': r're:https?://.+/.+\.jpg',
-            'timestamp': 1710270205000,
+            'timestamp': 1710188248,
+            'upload_date': '20240311',
+            'duration': 104,
        },
    }, {
        # single video article embedded with data-media-vpid
@ -1266,44 +1268,45 @@ class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
                lambda s: self._parse_json(s, playlist_id, fatal=False),
                re.findall(pattern, webpage))))

-        # US accessed article with single embedded video (e.g.
-        # https://www.bbc.com/news/uk-68546268)
-        video_id = self._match_id(url)
-        next_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['page']
-        video_data = None
-        timestamp = None
-        for key in next_data:
-            for item in (try_get(next_data, lambda x: x[key]['contents'], list) or []):
-                if item.get('type') == 'video':
-                    video_data = item
-                elif item.get('type') == 'timestamp':
-                    timestamp = item
-        if video_data:
-            for item in (try_get(video_data, lambda x: x['model']['blocks'], list) or []):
-                if item.get('type') == 'media':
-                    for subtype in (try_get(item, lambda x: x['model']['blocks'], list) or []):
-                        if subtype.get('type') == 'mediaMetadata':
-                            model = subtype.get('model')
-                            if model:
-                                item_id = try_get(model, lambda x: x['versions'][0]['versionId'])
-                                item_thumbnail = model.get('imageUrl')
-                                item_title = model.get('title')
-                                formats, subtitles = self._download_media_selector(item_id)
-                                synopses = model.get('synopses') or {}
-            item_time = None
-            if timestamp:
-                item_time = try_get(timestamp, lambda x: x['model']['timestamp'])
-            entries.append({
+        def parse_model(model):
+            '''Extract single video from model structure'''
+            item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
+            if not item_id:
+                return
+            formats, subtitles = self._download_media_selector(item_id)
+            return {
                'id': item_id,
-                'title': item_title,
-                'thumbnail': item_thumbnail,
                'formats': formats,
                'subtitles': subtitles,
-                'timestamp': item_time,
-                'description': dict_get(synopses, ('long', 'medium', 'short'))
-            })
-            return self.playlist_result(
-                entries, playlist_id, playlist_title, playlist_description)
+                **traverse_obj(model, {
+                    'title': ('title', {str}),
+                    'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
+                    'description': (
+                        'synopses', ('long', 'medium', 'short'), {str}, any),
+                    'duration': ('versions', 0, 'duration', {int}),
+                    'timestamp': ('versions', 0, 'availableFrom', {lambda x: int_or_none(x, scale=1000)}),
+                })
+            }
+
+        # US accessed article with single embedded video (e.g.
+        # https://www.bbc.com/news/uk-68546268)
+        next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}), (
+            'props', 'pageProps', 'page'))
+        model = traverse_obj(next_data, (
+            ..., 'contents', lambda _, v: v['type'] == 'video',
+            'model', 'blocks', lambda _, v: v['type'] == 'media',
+            'model', 'blocks', lambda _, v: v['type'] == 'mediaMetadata',
+            'model', {dict}, any))
+        if model:
+            entry = parse_model(model)
+            if entry:
+                if entry.get('timestamp') is None:
+                    entry['timestamp'] = traverse_obj(next_data, (
+                        ..., 'contents', lambda _, v: v['type'] == 'timestamp',
+                        'model', 'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
+                entries.append(entry)
+                return self.playlist_result(
+                    entries, playlist_id, playlist_title, playlist_description)

        # Multiple video article (e.g.
        # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
Autor	SHA1	Wiadomość	Data
Kyle Gonsalves	6ef8990320	different solution for traversal issues	2024-04-21 18:17:52 -07:00
Kyle Gonsalves	4b9a54b464	flake8 check	2024-04-21 16:41:19 -07:00
Kyle Gonsalves	e2ae76e84c	Making the parse_model function, address comments	2024-04-21 16:22:46 -07:00
Kyle Gonsalves	9dbd9fc873	more streamlining	2024-04-19 10:50:22 -07:00
Kyle Gonsalves	b9af6bf2ce	nit, style	2024-04-19 10:06:59 -07:00
Kyle Gonsalves	89eaee2ff8	one more tranverse	2024-04-19 10:04:05 -07:00
Kyle Gonsalves	5f35e17572	Using traverse_obj	2024-04-18 09:12:36 -07:00