Merge branch 'master' of github.com:rg3/youtube-dl

pull/3655/head
Jaime Marquínez Ferrándiz 2014-08-31 23:47:26 +02:00
commit 9ea9b61448
4 zmienionych plików z 146 dodań i 2 usunięć

Wyświetl plik

@ -135,6 +135,7 @@ from .grooveshark import GroovesharkIE
from .hark import HarkIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hornbunny import HornBunnyIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE

Wyświetl plik

@ -5,6 +5,7 @@ import re
import json
import base64
import zlib
import xml.etree.ElementTree
from hashlib import sha1
from math import pow, sqrt, floor
@ -17,6 +18,7 @@ from ..utils import (
intlist_to_bytes,
unified_strdate,
clean_html,
urlencode_postdata,
)
from ..aes import (
aes_cbc_decrypt,
@ -51,6 +53,26 @@ class CrunchyrollIE(InfoExtractor):
'1080': ('80', '108'),
}
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
self.report_login()
login_url = 'https://www.crunchyroll.com/?a=formhandler'
data = urlencode_postdata({
'formname': 'RpcApiUser_Login',
'name': username,
'password': password,
})
login_request = compat_urllib_request.Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info')
def _real_initialize(self):
self._login()
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(data)
iv = bytes_to_intlist(iv)
@ -97,6 +119,75 @@ class CrunchyrollIE(InfoExtractor):
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
return output
def _convert_subtitles_to_ass(self, subtitles):
output = ''
def ass_bool(strvalue):
assvalue = '0'
if strvalue == '1':
assvalue = '-1'
return assvalue
sub_root = xml.etree.ElementTree.fromstring(subtitles)
if not sub_root:
return output
output = '[Script Info]\n'
output += 'Title: %s\n' % sub_root.attrib["title"]
output += 'ScriptType: v4.00+\n'
output += 'WrapStyle: %s\n' % sub_root.attrib["wrap_style"]
output += 'PlayResX: %s\n' % sub_root.attrib["play_res_x"]
output += 'PlayResY: %s\n' % sub_root.attrib["play_res_y"]
output += """ScaledBorderAndShadow: yes
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
"""
for style in sub_root.findall('./styles/style'):
output += 'Style: ' + style.attrib["name"]
output += ',' + style.attrib["font_name"]
output += ',' + style.attrib["font_size"]
output += ',' + style.attrib["primary_colour"]
output += ',' + style.attrib["secondary_colour"]
output += ',' + style.attrib["outline_colour"]
output += ',' + style.attrib["back_colour"]
output += ',' + ass_bool(style.attrib["bold"])
output += ',' + ass_bool(style.attrib["italic"])
output += ',' + ass_bool(style.attrib["underline"])
output += ',' + ass_bool(style.attrib["strikeout"])
output += ',' + style.attrib["scale_x"]
output += ',' + style.attrib["scale_y"]
output += ',' + style.attrib["spacing"]
output += ',' + style.attrib["angle"]
output += ',' + style.attrib["border_style"]
output += ',' + style.attrib["outline"]
output += ',' + style.attrib["shadow"]
output += ',' + style.attrib["alignment"]
output += ',' + style.attrib["margin_l"]
output += ',' + style.attrib["margin_r"]
output += ',' + style.attrib["margin_v"]
output += ',' + style.attrib["encoding"]
output += '\n'
output += """
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
for event in sub_root.findall('./events/event'):
output += 'Dialogue: 0'
output += ',' + event.attrib["start"]
output += ',' + event.attrib["end"]
output += ',' + event.attrib["style"]
output += ',' + event.attrib["name"]
output += ',' + event.attrib["margin_l"]
output += ',' + event.attrib["margin_r"]
output += ',' + event.attrib["margin_v"]
output += ',' + event.attrib["effect"]
output += ',' + event.attrib["text"]
output += '\n'
return output
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
@ -158,6 +249,7 @@ class CrunchyrollIE(InfoExtractor):
})
subtitles = {}
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
video_id, note='Downloading subtitles for '+sub_name)
@ -174,7 +266,10 @@ class CrunchyrollIE(InfoExtractor):
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
if sub_format == 'ass':
subtitles[lang_code] = self._convert_subtitles_to_ass(subtitle)
else:
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
return {
'id': video_id,

Wyświetl plik

@ -0,0 +1,44 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class HornBunnyIE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
_TEST = {
'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
'md5': '95e40865aedd08eff60272b704852ad7',
'info_dict': {
'id': '5227',
'ext': 'flv',
'title': 'panty slut jerk off instruction',
'duration': 550
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'class="title">(.*?)</h2>', webpage, 'title')
redirect_url = self._html_search_regex(r'pg&settings=(.*?)\|0"\);', webpage, 'title')
webpage2 = self._download_webpage(redirect_url, video_id)
video_url = self._html_search_regex(r'flvMask:(.*?);', webpage2, 'video_url')
mobj = re.search(r'<strong>Runtime:</strong> (?P<minutes>\d+):(?P<seconds>\d+)</div>', webpage)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
view_count = self._html_search_regex(r'<strong>Views:</strong> (\d+)</div>', webpage, 'view count', fatal=False)
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': 'flv',
'duration': duration,
'view_count': int_or_none(view_count),
}

Wyświetl plik

@ -316,6 +316,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"upload_date": u"20121002",
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
u"categories": [u'Science & Technology'],
'like_count': int,
'dislike_count': int,
}
},
{
@ -784,7 +786,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date)
m_cat_container = get_element_by_id("eow-category", video_webpage)
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', fatal=False)
if m_cat_container:
category = self._html_search_regex(
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',