Add CSpanIE (closes #312)

pull/672/merge
Jaime Marquínez Ferrándiz 2013-06-26 17:55:54 +02:00
rodzic 2e32528012
commit aa0c87391c
3 zmienionych plików z 56 dodań i 0 usunięć

Wyświetl plik

@ -695,5 +695,15 @@
"info_dict": {
"title": "卡马乔国足开大脚长传冲吊集锦"
}
},
{
"name": "CSpan",
"url": "http://www.c-spanvideo.org/program/HolderonV",
"file": "315139.flv",
"md5": "74a623266956f69e4df0068ab6c80fe4",
"info_dict": {
"title": "Attorney General Eric Holder on Voting Rights Act Decision"
},
"skip": "Requires rtmpdump"
}
]

Wyświetl plik

@ -6,6 +6,7 @@ from .bliptv import BlipTVIE, BlipTVUserIE
from .breakcom import BreakIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE
from .depositfiles import DepositFilesIE
from .eighttracks import EightTracksIE
@ -132,6 +133,7 @@ def gen_extractors():
VevoIE(),
JukeboxIE(),
TudouIE(),
CSpanIE(),
GenericIE()
]

Wyświetl plik

@ -0,0 +1,44 @@
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
)
class CSpanIE(InfoExtractor):
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
prog_name = mobj.group(1)
webpage = self._download_webpage(url, prog_name)
video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
data = compat_urllib_parse.urlencode({'programid': video_id,
'dynamic':'1'})
info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
self.report_extraction(video_id)
title = self._html_search_regex(r'<string name="title">(.*?)</string>',
video_info, 'title')
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
webpage, 'description',
flags=re.MULTILINE|re.DOTALL)
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"',
webpage, 'thumbnail')
url = self._search_regex(r'<string name="URL">(.*?)</string>',
video_info, 'video url')
url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
path = self._search_regex(r'<string name="path">(.*?)</string>',
video_info, 'rtmp play path')
return {'id': video_id,
'title': title,
'ext': 'flv',
'url': url,
'play_path': path,
'description': description,
'thumbnail': thumbnail,
}