Add an extractor for Internet Movie Database trailers (closes #1832)

pull/1850/head
Jaime Marquínez Ferrándiz 2013-11-28 13:32:49 +01:00
rodzic 2be54167d0
commit d8d6148628
2 zmienionych plików z 60 dodań i 0 usunięć

Wyświetl plik

@ -71,6 +71,7 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE

Wyświetl plik

@ -0,0 +1,59 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_attribute,
)
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
u'info_dict': {
u'id': u'2524815897',
u'ext': u'mp4',
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
u'duration': 151,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url,video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
flags=re.MULTILINE)
formats = []
for f_id, f_path in available_formats:
format_page = self._download_webpage(
compat_urlparse.urljoin(url, f_path),
u'Downloading info for %s format' % f_id)
json_data = get_element_by_attribute('class', 'imdb-player-data',
format_page)
info = json.loads(json_data)
format_info = info['videoPlayerObject']['video']
formats.append({
'format_id': f_id,
'url': format_info['url'],
'height': format_info['height'],
'width': format_info['width'],
})
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': format_info['slate'],
'duration': int(info['titleObject']['title']['duration_seconds']),
}