Merge branch 'master' into subtitles_rework

pull/1326/head
Ismael Mejia 2013-09-11 14:26:48 +02:00
commit cf1dd0c59e
12 zmienionych plików z 158 dodań i 20 usunięć

Wyświetl plik

@ -3,7 +3,8 @@
import json
import sys
import hashlib
import urllib.request
import os.path
if len(sys.argv) <= 1:
print('Specify the version number as parameter')
@ -25,6 +26,7 @@ filenames = {
'tar': 'youtube-dl-%s.tar.gz' % version}
build_dir = os.path.join('..', '..', 'build', version)
for key, filename in filenames.items():
url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename)
fn = os.path.join(build_dir, filename)
with open(fn, 'rb') as f:
data = f.read()

Wyświetl plik

@ -14,7 +14,7 @@ def main():
template = tmplf.read()
ie_htmls = []
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
try:
ie_html += ': {}'.format(ie.IE_DESC)

Wyświetl plik

@ -23,9 +23,9 @@ tests = [
# 86 - vfluy6kdb 2013/09/06
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
# 85
# 85 - vflkuzxcs 2013/09/11
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
"T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"),
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),

Wyświetl plik

@ -72,10 +72,13 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
def test_youtube_extract(self):
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
def test_no_duplicates(self):
ies = gen_extractors()

Wyświetl plik

@ -66,7 +66,7 @@ class FileDownloader(object):
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, eta_mins) = divmod(mins, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:

Wyświetl plik

@ -29,6 +29,10 @@ from .escapist import EscapistIE
from .exfm import ExfmIE
from .facebook import FacebookIE
from .flickr import FlickrIE
from .francetv import (
PluzzIE,
FranceTvInfoIE,
)
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
from .gamespot import GameSpotIE
@ -75,6 +79,7 @@ from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .sina import SinaIE
from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE

Wyświetl plik

@ -5,7 +5,7 @@ from .common import InfoExtractor
class Canalc2IE(InfoExtractor):
_IE_NAME = 'canalc2.tv'
IE_NAME = 'canalc2.tv'
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
_TEST = {

Wyświetl plik

@ -0,0 +1,77 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
)
class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id):
xml_desc = self._download_webpage(
'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
video_url = video_url.replace('/z/', '/i/')
thumbnail_path = info.find('image').text
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': info.find('titre').text,
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
'description': info.find('synopsis').text,
}
class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = u'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
_TEST = {
u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
u'file': u'88439064.mp4',
u'info_dict': {
u'title': u'Allô Rufo',
u'description': u'md5:d909f1ebdf963814b65772aea250400e',
},
u'params': {
u'skip_download': True,
},
}
def _real_extract(self, url):
title = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, title)
video_id = self._search_regex(
r'data-diffusion="(\d+)"', webpage, 'ID')
return self._extract_video(video_id)
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = u'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
_TEST = {
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
u'file': u'84981923.mp4',
u'info_dict': {
u'title': u'Soir 3',
},
u'params': {
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
return self._extract_video(video_id)

Wyświetl plik

@ -0,0 +1,47 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
ExtractorError,
)
class SlideshareIE(InfoExtractor):
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
_TEST = {
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
u'file': u'25665706.mp4',
u'info_dict': {
u'title': u'Managing Scale and Complexity',
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
r'var slideshare_object = ({.*?}); var user_info =',
webpage, u'slideshare object')
info = json.loads(slideshare_obj)
if info['slideshow']['type'] != u'video':
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
doc = info['doc']
bucket = info['jsplayer']['video_bucket']
ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
return {
'_type': 'video',
'id': info['slideshow']['id'],
'title': info['slideshow']['title'],
'ext': ext,
'url': video_url,
'thumbnail': info['slideshow']['pin_image_url'],
'description': self._og_search_description(webpage),
}

Wyświetl plik

@ -8,7 +8,7 @@ from ..utils import ExtractorError
class SohuIE(InfoExtractor):
_VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
_TEST = {
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
@ -21,8 +21,11 @@ class SohuIE(InfoExtractor):
def _real_extract(self, url):
def _fetch_data(vid_id):
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
def _fetch_data(vid_id, mytv=False):
if mytv:
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
else:
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
data_url = base_data_url + str(vid_id)
data_json = self._download_webpage(
data_url, video_id,
@ -31,15 +34,16 @@ class SohuIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
mytv = mobj.group('mytv') is not None
webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
webpage, u'video title')
title = raw_title.partition('-')[0].strip()
vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
u'video path')
data = _fetch_data(vid)
data = _fetch_data(vid, mytv)
QUALITIES = ('ori', 'super', 'high', 'nor')
vid_ids = [data['data'][q + 'Vid']
@ -51,7 +55,7 @@ class SohuIE(InfoExtractor):
# For now, we just pick the highest available quality
vid_id = vid_ids[-1]
format_data = data if vid == vid_id else _fetch_data(vid_id)
format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
part_count = format_data['data']['totalBlocks']
allot = format_data['allot']
prot = format_data['prot']

Wyświetl plik

@ -209,7 +209,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
|youtu\.be/ # just youtu.be/xxxx
)
)? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow
$"""
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
@ -484,7 +484,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
elif len(s) == 86:
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
elif len(s) == 85:
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
elif len(s) == 84:
return s[81:36:-1] + s[0] + s[35:2:-1]
elif len(s) == 83:
@ -583,7 +583,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
formats_urls = _get_urls(manifest)
for format_url in formats_urls:
itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
url_map[itag] = format_url
return url_map

Wyświetl plik

@ -1,2 +1,2 @@
__version__ = '2013.09.06.1'
__version__ = '2013.11.09'