Merge branch 'Roman2K-pornovoisines'

pull/8/head
Sergey M․ 2015-04-04 19:14:01 +06:00
commit 3fde134791
4 zmienionych plików z 102 dodań i 1 usunięć

Wyświetl plik

@ -227,6 +227,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual( self.assertEqual(
unified_strdate('2/2/2015 6:47:40 PM', day_first=False), unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
'20150202') '20150202')
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
def test_find_xpath_attr(self): def test_find_xpath_attr(self):
testxml = '''<root> testxml = '''<root>

Wyświetl plik

@ -388,6 +388,7 @@ from .pornhub import (
PornHubPlaylistIE, PornHubPlaylistIE,
) )
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE from .pornoxo import PornoXOIE
from .primesharetv import PrimeShareTVIE from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE from .promptfile import PromptFileIE

Wyświetl plik

@ -0,0 +1,96 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import random
from .common import InfoExtractor
from ..utils import (
int_or_none,
float_or_none,
unified_strdate,
)
class PornoVoisinesIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
_VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
'/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
_SERVER_NUMBERS = (1, 2)
_TEST = {
'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
'info_dict': {
'id': '1285',
'display_id': 'recherche-appartement',
'ext': 'mp4',
'title': 'Recherche appartement',
'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20140925',
'duration': 120,
'view_count': int,
'average_rating': float,
'categories': ['Débutante', 'Scénario', 'Sodomie'],
'age_limit': 18,
}
}
@classmethod
def build_video_url(cls, num):
return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, video_id)
video_url = self.build_video_url(video_id)
title = self._html_search_regex(
r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
description = self._html_search_regex(
r'<article id="descriptif">(.+?)</article>',
webpage, "description", fatal=False, flags=re.DOTALL)
thumbnail = self._search_regex(
r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
webpage, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
upload_date = unified_strdate(self._search_regex(
r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
duration = int_or_none(self._search_regex(
'Durée (\d+)', webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex(
r'(\d+) vues', webpage, 'view count', fatal=False))
average_rating = self._search_regex(
r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
if average_rating:
average_rating = float_or_none(average_rating.replace(',', '.'))
categories = self._html_search_meta(
'keywords', webpage, 'categories', fatal=False)
if categories:
categories = [category.strip() for category in categories.split(',')]
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'average_rating': average_rating,
'categories': categories,
'age_limit': 18,
}

Wyświetl plik

@ -730,7 +730,8 @@ def unified_strdate(date_str, day_first=True):
# Replace commas # Replace commas
date_str = date_str.replace(',', ' ') date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2 # %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone # Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
@ -759,6 +760,7 @@ def unified_strdate(date_str, day_first=True):
] ]
if day_first: if day_first:
format_expressions.extend([ format_expressions.extend([
'%d-%m-%Y',
'%d.%m.%Y', '%d.%m.%Y',
'%d/%m/%Y', '%d/%m/%Y',
'%d/%m/%y', '%d/%m/%y',
@ -766,6 +768,7 @@ def unified_strdate(date_str, day_first=True):
]) ])
else: else:
format_expressions.extend([ format_expressions.extend([
'%m-%d-%Y',
'%m.%d.%Y', '%m.%d.%Y',
'%m/%d/%Y', '%m/%d/%Y',
'%m/%d/%y', '%m/%d/%y',