[pearvideo] Improve (closes )

pull/8/head
Sergey M․ 2017-07-16 03:06:04 +07:00
rodzic 94b817edeb
commit decf86044d
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 2C393E0F18A9236D
3 zmienionych plików z 64 dodań i 35 usunięć

Wyświetl plik

@ -762,7 +762,7 @@ from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .pear import PearIE
from .pearvideo import PearVideoIE
from .people import PeopleIE
from .periscope import (
PeriscopeIE,

Wyświetl plik

@ -1,34 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class PearIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.pearvideo.com/video_1076290',
'info_dict': {
'id': '1076290',
'ext': 'mp4',
'title': '小浣熊在主人家玻璃上滚石头:没砸',
'description': '小浣熊找到一个小石头,仿佛发现了一个宝贝。它不停地用石头按在玻璃上,滚来滚去,吸引主人注意。',
'url': 'http://video.pearvideo.com/mp4/short/20170508/cont-1076290-10438018-hd.mp4'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1[^>]+class="video-tt">(.+)</h1>', webpage, 'title', fatal=False)
description = self._html_search_regex(r'<div[^>]+class="summary"[^>]*>([^<]+)<', webpage, 'description', fatal=False)
url = self._html_search_regex(r'hdUrl="(.*?)"', webpage, 'url', fatal=False)
return {
'id': video_id,
'ext': 'mp4',
'title': title,
'description': description,
'url': url
}

Wyświetl plik

@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
qualities,
unified_timestamp,
)
class PearVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)'
_TEST = {
'url': 'http://www.pearvideo.com/video_1076290',
'info_dict': {
'id': '1076290',
'ext': 'mp4',
'title': '小浣熊在主人家玻璃上滚石头:没砸',
'description': 'md5:01d576b747de71be0ee85eb7cac25f9d',
'timestamp': 1494275280,
'upload_date': '20170508',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
quality = qualities(
('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src'))
formats = [{
'url': mobj.group('url'),
'format_id': mobj.group('id'),
'quality': quality(mobj.group('id')),
} for mobj in re.finditer(
r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2',
webpage)]
self._sort_formats(formats)
title = self._search_regex(
(r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'),
webpage, 'title', group='value')
description = self._search_regex(
(r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)',
r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'),
webpage, 'description', default=None,
group='value') or self._html_search_meta('Description', webpage)
timestamp = unified_timestamp(self._search_regex(
r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)',
webpage, 'timestamp', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'formats': formats,
}