From eb2969d32427624bd7adaf2d576a0ed5a129bd4b Mon Sep 17 00:00:00 2001 From: HobbyistDev Date: Sat, 6 Apr 2024 13:19:42 +0800 Subject: [PATCH 1/7] [extractor/godresource] Add GodResource extractor --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/godresource.py | 85 +++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 yt_dlp/extractor/godresource.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 2ad5801c4..f7b251a04 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -713,6 +713,7 @@ from .globo import ( from .gmanetwork import GMANetworkVideoIE from .go import GoIE from .godtube import GodTubeIE +from .godresource import GodResourceIE from .gofile import GofileIE from .golem import GolemIE from .goodgame import GoodGameIE diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py new file mode 100644 index 000000000..602880bff --- /dev/null +++ b/yt_dlp/extractor/godresource.py @@ -0,0 +1,85 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + merge_dicts, + unified_timestamp, + url_or_none +) +from ..utils.traversal import traverse_obj + + +class GodResourceIE(InfoExtractor): + _VALID_URL = r'https?://new\.godresource\.com/video/(?P\w+)' + _TESTS = [{ + # hls stream + 'url': 'https://new.godresource.com/video/A01mTKjyf6w', + 'info_dict': { + 'id': 'A01mTKjyf6w', + 'ext': 'mp4', + 'view_count': int, + 'timestamp': 1710978666, + 'channel_id': 5, + 'thumbnail': 'https://cdn-02.godresource.com/e42968ac-9e8b-4231-ab86-f4f9d775841f/thumbnail.jpg', + 'channel': 'Stedfast Baptist Church', + 'upload_date': '20240320', + 'title': 'GodResource', + } + }, { + # mp4 link + 'url': 'https://new.godresource.com/video/01DXmBbQv_X', + 'info_dict': { + 'id': '01DXmBbQv_X', + 'ext': 'mp4', + 'channel_id': 12, + 'view_count': int, + 'timestamp': 1687996800, + 'thumbnail': 'https://cdn-02.godresource.com/sodomitedeception/thumbnail.jpg', + 'channel': 'Documentaries', + 'title': 'The Sodomite Deception', + 'upload_date': '20230629', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + # the website is oddly giving all request as 404 at first and then loaded with js + webpage, _ = self._download_webpage_handle(url, display_id, expected_status=404) + + api_data = self._download_json( + f'https://api.godresource.com/api/Streams/{display_id}', display_id) + + video_url = api_data['streamUrl'] + + # TODO: better name? + extraction_result = {} + if determine_ext(video_url) == 'm3u8': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + api_data['streamUrl'], display_id) + + extraction_result = { + 'formats': formats, + 'subtitles': subtitles + } + elif determine_ext(video_url) == 'mp4': + extraction_result = { + 'url': video_url, + 'ext': 'mp4' + } + + return { + 'id': display_id, + **extraction_result, + **merge_dicts(traverse_obj(api_data, { + 'title': ('title', {str}), # can be None in API + 'thumbnail': ('thumbnail', {url_or_none}), + 'view_count': ('views', {int}), + 'channel': ('channelName', {str}), + 'channel_id': 'channelId', + 'timestamp': ('streamDateCreated', {unified_timestamp}), + 'modified_timestamp': ('streamDataModified', {unified_timestamp}) + }), { + # This title is not reliable (only give the site name) + 'title': self._html_extract_title(webpage), + }), + } From 6c311b14c6e93bf6d798420db38bb321f994bb88 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 11 Apr 2024 07:21:59 +0800 Subject: [PATCH 2/7] Use auto-generated title instead of `title` tag of the website Co-authored-by: pukkandan --- yt_dlp/extractor/godresource.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index 602880bff..de7fa996b 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -70,16 +70,14 @@ class GodResourceIE(InfoExtractor): return { 'id': display_id, **extraction_result, - **merge_dicts(traverse_obj(api_data, { - 'title': ('title', {str}), # can be None in API + 'title': '', + **traverse_obj(api_data, { + 'title': ('title', {str}), 'thumbnail': ('thumbnail', {url_or_none}), 'view_count': ('views', {int}), 'channel': ('channelName', {str}), - 'channel_id': 'channelId', + 'channel_id': ('channelId', {str_or_none}), 'timestamp': ('streamDateCreated', {unified_timestamp}), 'modified_timestamp': ('streamDataModified', {unified_timestamp}) - }), { - # This title is not reliable (only give the site name) - 'title': self._html_extract_title(webpage), - }), + }) } From 08563a1cad39da4b80c6c0d66a6b23ab0e76f6f5 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 11 Apr 2024 07:22:28 +0800 Subject: [PATCH 3/7] Remove unneccassry `webpage` extraction Co-authored-by: pukkandan --- yt_dlp/extractor/godresource.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index de7fa996b..54f2e55c4 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -43,8 +43,6 @@ class GodResourceIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - # the website is oddly giving all request as 404 at first and then loaded with js - webpage, _ = self._download_webpage_handle(url, display_id, expected_status=404) api_data = self._download_json( f'https://api.godresource.com/api/Streams/{display_id}', display_id) From 89cb7f7d98d18c2ed53a2ce0a0e048c7e02d36ff Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 11 Apr 2024 07:22:56 +0800 Subject: [PATCH 4/7] Simplify extraction process Co-authored-by: pukkandan --- yt_dlp/extractor/godresource.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index 54f2e55c4..723657270 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -48,22 +48,16 @@ class GodResourceIE(InfoExtractor): f'https://api.godresource.com/api/Streams/{display_id}', display_id) video_url = api_data['streamUrl'] - - # TODO: better name? - extraction_result = {} - if determine_ext(video_url) == 'm3u8': + if (ext := determine_ext(video_url)) == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles( api_data['streamUrl'], display_id) - - extraction_result = { - 'formats': formats, - 'subtitles': subtitles - } - elif determine_ext(video_url) == 'mp4': - extraction_result = { + elif ext == 'mp4': + formats, subtitles = [{ 'url': video_url, - 'ext': 'mp4' - } + 'ext': ext + }], {} + else: + raise ExtractorError(f'Unexpected video format {ext}') return { 'id': display_id, From f326e05e9f027466a9914a9724364979c3321256 Mon Sep 17 00:00:00 2001 From: HobbyistDev Date: Thu, 11 Apr 2024 07:25:54 +0800 Subject: [PATCH 5/7] Add `formats` and `subtitles` in return --- yt_dlp/extractor/godresource.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index 723657270..cc9b85a96 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -1,7 +1,8 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, - merge_dicts, + str_or_none, unified_timestamp, url_or_none ) @@ -43,7 +44,6 @@ class GodResourceIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - api_data = self._download_json( f'https://api.godresource.com/api/Streams/{display_id}', display_id) @@ -61,7 +61,8 @@ class GodResourceIE(InfoExtractor): return { 'id': display_id, - **extraction_result, + 'formats': formats, + 'subtitles': subtitles, 'title': '', **traverse_obj(api_data, { 'title': ('title', {str}), From d493ce72b63483e20d8001f8eabcc286cd4eb3b2 Mon Sep 17 00:00:00 2001 From: HobbyistDev Date: Thu, 11 Apr 2024 07:29:12 +0800 Subject: [PATCH 6/7] Change test value --- yt_dlp/extractor/godresource.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index cc9b85a96..f8267d38f 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -19,11 +19,11 @@ class GodResourceIE(InfoExtractor): 'ext': 'mp4', 'view_count': int, 'timestamp': 1710978666, - 'channel_id': 5, + 'channel_id': '5', 'thumbnail': 'https://cdn-02.godresource.com/e42968ac-9e8b-4231-ab86-f4f9d775841f/thumbnail.jpg', 'channel': 'Stedfast Baptist Church', 'upload_date': '20240320', - 'title': 'GodResource', + 'title': 'GodResource video #A01mTKjyf6w', } }, { # mp4 link @@ -31,7 +31,7 @@ class GodResourceIE(InfoExtractor): 'info_dict': { 'id': '01DXmBbQv_X', 'ext': 'mp4', - 'channel_id': 12, + 'channel_id': '12', 'view_count': int, 'timestamp': 1687996800, 'thumbnail': 'https://cdn-02.godresource.com/sodomitedeception/thumbnail.jpg', From 4741aded6fda773eefc3b6d53335ea26b9d36b4a Mon Sep 17 00:00:00 2001 From: HobbyistDev Date: Thu, 11 Apr 2024 08:01:24 +0800 Subject: [PATCH 7/7] add `md5` key for mp4 link test case --- yt_dlp/extractor/godresource.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index f8267d38f..f3d6160d2 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -28,6 +28,7 @@ class GodResourceIE(InfoExtractor): }, { # mp4 link 'url': 'https://new.godresource.com/video/01DXmBbQv_X', + 'md5': '0e8f72aa89a106b9d5c011ba6f8717b7', 'info_dict': { 'id': '01DXmBbQv_X', 'ext': 'mp4',