[allocine] add extractor for allocine.fr (fixes #3189)

pull/8/head
Petr Půlpán 2014-07-05 14:42:26 +02:00
rodzic ba4133c9eb
commit 49cbe7c8e3
2 zmienionych plików z 90 dodań i 0 usunięć

Wyświetl plik

@ -3,6 +3,7 @@ from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE

Wyświetl plik

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_str,
qualities,
determine_ext,
)
class AllocineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
_TESTS = [{
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
'md5': '0c9fcf59a841f65635fa300ac43d8269',
'info_dict': {
'id': '19546517',
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
'info_dict': {
'id': '19540403',
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
'md5': '101250fb127ef9ca3d73186ff22a47ce',
'info_dict': {
'id': '19544709',
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:e74a4dc750894bac300ece46c7036490',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
typ = mobj.group('typ')
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
if typ == 'film':
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
else:
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
player_data = json.loads(player)
video_id = compat_str(player_data['refMedia'])
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
video = xml.find('.//AcVisionVideo').attrib
quality = qualities(['ld', 'md', 'hd'])
formats = []
for k, v in video.items():
if re.match(r'.+_path', k):
format_id = k.split('_')[0]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
'url': v,
'ext': determine_ext(v),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video['videoTitle'],
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
}