From 73ac85678588b1c2997a94c0069ac0a9309adf19 Mon Sep 17 00:00:00 2001
From: Luc Ritchie <luc.ritchie@gmail.com>
Date: Tue, 10 Nov 2020 17:47:40 -0500
Subject: [PATCH] [youtube] max_pages=5 for search, unlimited for everything
 else

Also drop a few leftover methods in search that are no longer used.
---
 youtube_dlc/extractor/youtube.py | 39 ++++----------------------------
 1 file changed, 4 insertions(+), 35 deletions(-)

diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index d8d12a721..2fea11070 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -328,7 +328,7 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 
         return entries, try_get(c, lambda x: x["continuation"])
 
-    def _entries(self, page, playlist_id, n=1):
+    def _entries(self, page, playlist_id, max_pages=None):
         seen = []
 
         yt_conf = {}
@@ -340,8 +340,7 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
 
         data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
 
-        # for page_num in itertools.count(1):
-        for page_num in range(n):
+        for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):
             entries, continuation = self._find_entries_in_json(data_json)
             processed = self._process_entries(entries, seen)
 
@@ -366,7 +365,7 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
                     data_json = self._download_json(
                         'https://www.youtube.com%s' % continuation_url,
                         playlist_id,
-                        'Downloading page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
+                        'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
 
                         transform_source=uppercase_escape,
                         query={
@@ -3418,41 +3417,11 @@ class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
             c["continuation"] = obj["nextContinuationData"]
             return
 
-    def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
-        search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
-
-        result_items = self._find_videos_in_json(search_response)
-
-        for renderer in result_items:
-            video_id = try_get(renderer, lambda x: x['videoId'])
-            video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
-
-            if video_id is None or video_title is None:
-                # we do not have a videoRenderer or title extraction broke
-                continue
-
-            video_title = video_title.strip()
-
-            try:
-                idx = ids_in_page.index(video_id)
-                if video_title and not titles_in_page[idx]:
-                    titles_in_page[idx] = video_title
-            except ValueError:
-                ids_in_page.append(video_id)
-                titles_in_page.append(video_title)
-
-    def extract_videos_from_page(self, page):
-        ids_in_page = []
-        titles_in_page = []
-        self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
-        return zip(ids_in_page, titles_in_page)
-
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
         webpage = self._download_webpage(url, query)
-        # data_json = self._process_initial_data(webpage)
-        return self.playlist_result(self._entries(webpage, query, n=5), playlist_title=query)
+        return self.playlist_result(self._entries(webpage, query, max_pages=0), playlist_title=query)
 
 
 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):