[tvopengr] Add extractors (#2297)

Authored by: zmousm
pull/2402/head
Zenon Mousmoulas 2022-01-19 22:43:02 +02:00 zatwierdzone przez GitHub
rodzic f7085283e1
commit 1a20d29552
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
3 zmienionych plików z 169 dodań i 0 usunięć

Wyświetl plik

@ -1679,6 +1679,10 @@ from .tvnow import (
TVNowAnnualIE,
TVNowShowIE,
)
from .tvopengr import (
TVOpenGrWatchIE,
TVOpenGrEmbedIE,
)
from .tvp import (
TVPEmbedIE,
TVPIE,

Wyświetl plik

@ -139,6 +139,7 @@ from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE
from .simplecast import SimplecastIE
from .wimtv import WimTVIE
from .tvopengr import TVOpenGrEmbedIE
from .tvp import TVPEmbedIE
from .blogger import BloggerIE
from .mainstreaming import MainStreamingIE
@ -2227,6 +2228,22 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
{
# tvopengr:embed
'url': 'https://www.ethnos.gr/World/article/190604/hparosiaxekinoynoisynomiliessthgeneyhmethskiatoypolemoypanoapothnoykrania',
'md5': 'eb0c3995d0a6f18f6538c8e057865d7d',
'info_dict': {
'id': '101119',
'ext': 'mp4',
'display_id': 'oikarpoitondiapragmateyseonhparosias',
'title': 'md5:b979f4d640c568617d6547035528a149',
'description': 'md5:e54fc1977c7159b01cc11cd7d9d85550',
'timestamp': 1641772800,
'upload_date': '20220110',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
}
},
{
# blogger embed
'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
@ -3671,6 +3688,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
# Look for (tvopen|ethnos).gr embeds
tvopengr_urls = list(TVOpenGrEmbedIE._extract_urls(webpage))
if tvopengr_urls:
return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
tvp_urls = TVPEmbedIE._extract_urls(webpage)
if tvp_urls:
return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())

Wyświetl plik

@ -0,0 +1,143 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
get_elements_text_and_html_by_attribute,
merge_dicts,
unescapeHTML,
)
class TVOpenGrBaseIE(InfoExtractor):
def _return_canonical_url(self, url, video_id):
webpage = self._download_webpage(url, video_id)
canonical_url = self._og_search_url(webpage)
title = self._og_search_title(webpage)
return self.url_result(canonical_url, ie=TVOpenGrWatchIE.ie_key(), video_id=video_id, video_title=title)
class TVOpenGrWatchIE(TVOpenGrBaseIE):
IE_NAME = 'tvopengr:watch'
IE_DESC = 'tvopen.gr (and ethnos.gr) videos'
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:tvopen|ethnos)\.gr)/watch/(?P<id>\d+)/(?P<slug>[^/]+)'
_API_ENDPOINT = 'https://www.tvopen.gr/templates/data/player'
_TESTS = [{
'url': 'https://www.ethnos.gr/watch/101009/nikoskaprabelosdenexoymekanenanasthenhsemethmethmetallaxhomikron',
'md5': '8728570e3a72e0f8d9475ba94859fdc1',
'info_dict': {
'id': '101009',
'title': 'md5:51f68773dcb6c70498cd326f45fefdf0',
'display_id': 'nikoskaprabelosdenexoymekanenanasthenhsemethmethmetallaxhomikron',
'description': 'md5:78fff49f18fb3effe41b070e5c7685d6',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/d573ba71-ec5f-43c6-b4cb-d181f327d3a8.jpg',
'ext': 'mp4',
'upload_date': '20220109',
'timestamp': 1641686400,
},
}, {
'url': 'https://www.tvopen.gr/watch/100979/se28099agapaomenalla7cepeisodio267cmhthrargiapashskakias',
'md5': '38f98a1be0c577db4ea2d1b1c0770c48',
'info_dict': {
'id': '100979',
'title': 'md5:e021f3001e16088ee40fa79b20df305b',
'display_id': 'se28099agapaomenalla7cepeisodio267cmhthrargiapashskakias',
'description': 'md5:ba17db53954134eb8d625d199e2919fb',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/9bb71cf1-21da-43a9-9d65-367950fde4e3.jpg',
'ext': 'mp4',
'upload_date': '20220108',
'timestamp': 1641600000,
},
}]
def _extract_formats_and_subs(self, response, video_id):
formats, subs = [], {}
for format_id, format_url in response.items():
if format_id not in ('stream', 'httpstream', 'mpegdash'):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', m3u8_id=format_id,
fatal=False)
elif ext == 'mpd':
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
format_url, video_id, 'mp4', fatal=False)
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
continue
formats.extend(formats_)
self._merge_subtitles(subs_, target=subs)
self._sort_formats(formats)
return formats, subs
@staticmethod
def _scale_thumbnails_to_max_width(formats, thumbnails, url_width_re):
_keys = ('width', 'height')
max_dimensions = max(
[tuple(format.get(k) or 0 for k in _keys) for format in formats],
default=(0, 0))
if not max_dimensions[0]:
return thumbnails
return [
merge_dicts(
{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
dict(zip(_keys, max_dimensions)), thumbnail)
for thumbnail in thumbnails
]
def _real_extract(self, url):
netloc, video_id, display_id = self._match_valid_url(url).group('netloc', 'id', 'slug')
if netloc.find('tvopen.gr') == -1:
return self._return_canonical_url(url, video_id)
webpage = self._download_webpage(url, video_id)
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject')
info['formats'], info['subtitles'] = self._extract_formats_and_subs(
self._download_json(self._API_ENDPOINT, video_id, query={'cid': video_id}),
video_id)
info['thumbnails'] = self._scale_thumbnails_to_max_width(
info['formats'], info['thumbnails'], r'(?<=/imgHandler/)\d+')
description, _html = next(get_elements_text_and_html_by_attribute('class', 'description', webpage))
if description and _html.startswith('<span '):
info['description'] = description
info['id'] = video_id
info['display_id'] = display_id
return info
class TVOpenGrEmbedIE(TVOpenGrBaseIE):
IE_NAME = 'tvopengr:embed'
IE_DESC = 'tvopen.gr embedded videos'
_VALID_URL = r'(?:https?:)?//(?:www\.|cdn\.|)(?:tvopen|ethnos).gr/embed/(?P<id>\d+)'
_EMBED_RE = re.compile(rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''')
_TESTS = [{
'url': 'https://cdn.ethnos.gr/embed/100963',
'md5': '2da147881f45571d81662d94d086628b',
'info_dict': {
'id': '100963',
'display_id': 'koronoiosapotoysdieythyntestonsxoleionselftestgiaosoysdenbrhkan',
'title': 'md5:2c71876fadf0cda6043da0da5fca2936',
'description': 'md5:17482b4432e5ed30eccd93b05d6ea509',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/5804e07f-799a-4247-a696-33842c94ca37.jpg',
'ext': 'mp4',
'upload_date': '20220108',
'timestamp': 1641600000,
},
}]
@classmethod
def _extract_urls(cls, webpage):
for mobj in cls._EMBED_RE.finditer(webpage):
yield unescapeHTML(mobj.group('url'))
def _real_extract(self, url):
video_id = self._match_id(url)
return self._return_canonical_url(url, video_id)