[3qsdn] Add extractor

pull/8/head
Sergey M․ 2016-05-14 23:35:03 +06:00
rodzic 364cf465dd
commit 5c86bfe70f
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 2C393E0F18A9236D
2 zmienionych plików z 133 dodań i 0 usunięć
youtube_dl/extractor

Wyświetl plik

@ -766,6 +766,7 @@ from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .threeqsdn import ThreeQSDNIE
from .tinypic import TinyPicIE
from .tlc import TlcDeIE
from .tmz import (

Wyświetl plik

@ -0,0 +1,132 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
js_to_json,
mimetype2ext,
)
class ThreeQSDNIE(InfoExtractor):
IE_NAME = '3qsdn'
IE_DESC = '3Q SDN'
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd',
'info_dict': {
'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
'ext': 'mp4',
'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
'is_live': False,
},
'expected_warnings': ['Failed to download MPD manifest'],
}, {
# live video stream
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
'info_dict': {
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
'ext': 'mp4',
'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
'is_live': False,
},
}, {
# live audio stream
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
'only_matching': True,
}, {
# live audio stream with some 404 URLs
'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48',
'only_matching': True,
}, {
# geo restricted with 'This content is not available in your country'
'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48',
'only_matching': True,
}, {
# geo restricted with 'playout.3qsdn.com/forbidden'
'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48',
'only_matching': True,
}, {
# live video with rtmp link
'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
js = self._download_webpage(
'http://playout.3qsdn.com/%s' % video_id, video_id,
query={'js': 'true'})
if any(p in js for p in (
'>This content is not available in your country',
'playout.3qsdn.com/forbidden')):
self.raise_geo_restricted()
stream_content = self._search_regex(
r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js,
'stream content', default='demand', group='content')
live = stream_content == 'live'
stream_type = self._search_regex(
r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js,
'stream type', default='video', group='type')
formats = []
urls = set()
def extract_formats(item_url, item={}):
if not item_url or item_url in urls:
return
urls.add(item_url)
type_ = item.get('type')
ext = determine_ext(item_url, default_ext=None)
if type_ == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
item_url, video_id, mpd_id='mpd', fatal=False))
elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
item_url, video_id, 'mp4',
entry_protocol='m3u8' if live else 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
item_url, video_id, f4m_id='hds', fatal=False))
else:
if not self._is_valid_url(item_url, video_id):
return
formats.append({
'url': item_url,
'format_id': item.get('quality'),
'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext,
'vcodec': 'none' if stream_type == 'audio' else None,
})
for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js):
f = self._parse_json(
item_js, video_id, transform_source=js_to_json, fatal=False)
if not f:
continue
extract_formats(f.get('src'), f)
# More relaxed version to collect additional URLs and acting
# as a future-proof fallback
for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js):
extract_formats(src)
self._sort_formats(formats)
title = self._live_title(video_id) if live else video_id
return {
'id': video_id,
'title': title,
'is_live': live,
'formats': formats,
}