[rtrfm] Add extractor (#1628)

Authored by: pabs3
pull/1686/head
Paul Wise 2021-11-19 06:14:38 +08:00 zatwierdzone przez GitHub
rodzic 402cd603a4
commit cfcaf64a4b
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
2 zmienionych plików z 68 dodań i 0 usunięć

Wyświetl plik

@ -1235,6 +1235,7 @@ from .rtl2 import (
RTL2YouSeriesIE, RTL2YouSeriesIE,
) )
from .rtp import RTPIE from .rtp import RTPIE
from .rtrfm import RTRFMIE
from .rts import RTSIE from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
from .rtvnh import RTVNHIE from .rtvnh import RTVNHIE

Wyświetl plik

@ -0,0 +1,67 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class RTRFMIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P<id>[^/?\#&]+)'
_TESTS = [
{
'url': 'https://rtrfm.com.au/shows/breakfast/',
'md5': '46168394d3a5ce237cf47e85d0745413',
'info_dict': {
'id': 'breakfast-2021-11-16',
'ext': 'mp3',
'series': 'Breakfast with Taylah',
'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$',
'description': 'md5:0979c3ab1febfbec3f1ccb743633c611',
},
'skip': 'ID and md5 changes daily',
},
{
'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/',
'md5': '396bedf1e40f96c62b30d4999202a790',
'info_dict': {
'id': 'breakfast-2021-11-11',
'ext': 'mp3',
'series': 'Breakfast with Taylah',
'title': 'Breakfast with Taylah 2021-11-11',
'description': 'md5:0979c3ab1febfbec3f1ccb743633c611',
},
},
{
'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/',
'md5': '594027f513ec36a24b15d65007a24dff',
'info_dict': {
'id': 'breakfast-2020-06-01',
'ext': 'mp3',
'series': 'Breakfast with Taylah',
'title': 'Breakfast with Taylah 2020-06-01',
'description': r're:^Breakfast with Taylah ',
},
'skip': 'This audio has expired',
},
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
show, date, title = self._search_regex(
r'''\.playShow(?:From)?\(['"](?P<show>[^'"]+)['"],\s*['"](?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P<title>[^'"]+)['"]''',
webpage, 'details', group=('show', 'date', 'title'))
url = self._download_json(
'https://restreams.rtrfm.com.au/rzz',
show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u']
# This is the only indicator of an error until trying to download the URL and
# downloads of mp4 URLs always fail (403 for current episodes, 404 for missing).
if '.mp4' in url:
url = None
self.raise_no_formats('Expired or no episode on this date', expected=True)
return {
'id': '%s-%s' % (show, date),
'title': '%s %s' % (title, date),
'series': title,
'url': url,
'release_date': date,
'description': self._og_search_description(webpage),
}