Porównaj commity

...

7 Commity

Autor SHA1 Wiadomość Data
Kyle Gonsalves 6ef8990320 different solution for traversal issues 2024-04-21 18:17:52 -07:00
Kyle Gonsalves 4b9a54b464 flake8 check 2024-04-21 16:41:19 -07:00
Kyle Gonsalves e2ae76e84c Making the parse_model function, address comments 2024-04-21 16:22:46 -07:00
Kyle Gonsalves 9dbd9fc873 more streamlining 2024-04-19 10:50:22 -07:00
Kyle Gonsalves b9af6bf2ce nit, style 2024-04-19 10:06:59 -07:00
Kyle Gonsalves 89eaee2ff8 one more tranverse 2024-04-19 10:04:05 -07:00
Kyle Gonsalves 5f35e17572 Using traverse_obj 2024-04-18 09:12:36 -07:00
1 zmienionych plików z 40 dodań i 37 usunięć

Wyświetl plik

@ -798,9 +798,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'id': 'p0hj0lq7',
'ext': 'mp4',
'title': 'Nasser Hospital doctor describes his treatment by IDF',
'description': 'Doctor Abu Sabha said he was detained by Israeli forces after the raid on Nasser Hospital and feared for his life.\n\nThe IDF said "during the activity, about 200 terrorists and suspects of terrorist activity were detained, including some who posed as medical teams, many weapons were found, as well as closed medicines intended for Israeli hostages."',
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
'thumbnail': r're:https?://.+/.+\.jpg',
'timestamp': 1710270205000,
'timestamp': 1710188248,
'upload_date': '20240311',
'duration': 104,
},
}, {
# single video article embedded with data-media-vpid
@ -1266,44 +1268,45 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
lambda s: self._parse_json(s, playlist_id, fatal=False),
re.findall(pattern, webpage))))
# US accessed article with single embedded video (e.g.
# https://www.bbc.com/news/uk-68546268)
video_id = self._match_id(url)
next_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['page']
video_data = None
timestamp = None
for key in next_data:
for item in (try_get(next_data, lambda x: x[key]['contents'], list) or []):
if item.get('type') == 'video':
video_data = item
elif item.get('type') == 'timestamp':
timestamp = item
if video_data:
for item in (try_get(video_data, lambda x: x['model']['blocks'], list) or []):
if item.get('type') == 'media':
for subtype in (try_get(item, lambda x: x['model']['blocks'], list) or []):
if subtype.get('type') == 'mediaMetadata':
model = subtype.get('model')
if model:
item_id = try_get(model, lambda x: x['versions'][0]['versionId'])
item_thumbnail = model.get('imageUrl')
item_title = model.get('title')
formats, subtitles = self._download_media_selector(item_id)
synopses = model.get('synopses') or {}
item_time = None
if timestamp:
item_time = try_get(timestamp, lambda x: x['model']['timestamp'])
entries.append({
def parse_model(model):
'''Extract single video from model structure'''
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
if not item_id:
return
formats, subtitles = self._download_media_selector(item_id)
return {
'id': item_id,
'title': item_title,
'thumbnail': item_thumbnail,
'formats': formats,
'subtitles': subtitles,
'timestamp': item_time,
'description': dict_get(synopses, ('long', 'medium', 'short'))
})
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
**traverse_obj(model, {
'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': (
'synopses', ('long', 'medium', 'short'), {str}, any),
'duration': ('versions', 0, 'duration', {int}),
'timestamp': ('versions', 0, 'availableFrom', {lambda x: int_or_none(x, scale=1000)}),
})
}
# US accessed article with single embedded video (e.g.
# https://www.bbc.com/news/uk-68546268)
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}), (
'props', 'pageProps', 'page'))
model = traverse_obj(next_data, (
..., 'contents', lambda _, v: v['type'] == 'video',
'model', 'blocks', lambda _, v: v['type'] == 'media',
'model', 'blocks', lambda _, v: v['type'] == 'mediaMetadata',
'model', {dict}, any))
if model:
entry = parse_model(model)
if entry:
if entry.get('timestamp') is None:
entry['timestamp'] = traverse_obj(next_data, (
..., 'contents', lambda _, v: v['type'] == 'timestamp',
'model', 'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
entries.append(entry)
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
# Multiple video article (e.g.
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)