From 480d31eb5e68c0d90ab8e3f43034484bfc0e158a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Gaillard?= Date: Thu, 26 Nov 2020 07:22:47 +0100 Subject: [PATCH 1/3] fix: ytInitialData parsing with regex --- src/invidious/helpers/helpers.cr | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 620d2ec81..7ff68b323 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -598,12 +598,7 @@ def create_notification_stream(env, topics, connection_channel) end def extract_initial_data(body) : Hash(String, JSON::Any) - initial_data = body.match(/(window\["ytInitialData"\]|var\s+ytInitialData)\s*=\s*(?.*?);+\s*\n/).try &.["info"] || "{}" - if initial_data.starts_with?("JSON.parse(\"") - return JSON.parse(JSON.parse(%({"initial_data":"#{initial_data[12..-3]}"}))["initial_data"].as_s).as_h - else - return JSON.parse(initial_data).as_h - end + return JSON.parse(body.match(/(window\["ytInitialData"\]|var\s*ytInitialData)\s*=\s*(?\{.*?\});/).try &.["info"] || "{}").as_h end def proxy_file(response, env) From 1ba17a0e148b7cb52b34d756c945f5d8976b9913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Gaillard?= Date: Thu, 26 Nov 2020 13:12:35 +0100 Subject: [PATCH 2/3] feat: centralize ytInitialData parsing --- src/invidious/channels.cr | 33 +++++++++++++------------------- src/invidious/helpers/helpers.cr | 2 +- src/invidious/videos.cr | 3 +-- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index 656b9953a..392c44ee7 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -775,41 +775,34 @@ def extract_channel_community_cursor(continuation) cursor end -INITDATA_PREQUERY = "window[\"ytInitialData\"] = {" - def get_about_info(ucid, locale) - about = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en") - if about.status_code != 200 - about = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en") + result = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en") + if result.status_code != 200 + result = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en") end - if md = about.headers["location"]?.try &.match(/\/channel\/(?UC[a-zA-Z0-9_-]{22})/) + if md = result.headers["location"]?.try &.match(/\/channel\/(?UC[a-zA-Z0-9_-]{22})/) raise ChannelRedirect.new(channel_id: md["ucid"]) end - if about.status_code != 200 + if result.status_code != 200 error_message = translate(locale, "This channel does not exist.") raise error_message end - initdata_pre = about.body.index(INITDATA_PREQUERY) - initdata_post = initdata_pre.nil? ? nil : about.body.index("};", initdata_pre) - if initdata_post.nil? - about = XML.parse_html(about.body) - error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip - error_message ||= translate(locale, "Could not get channel info.") - raise error_message - end - initdata_pre = initdata_pre.not_nil! + INITDATA_PREQUERY.size - 1 - - initdata = JSON.parse(about.body[initdata_pre, initdata_post - initdata_pre + 1]) - about = XML.parse_html(about.body) - + about = XML.parse_html(result.body) if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")])) error_message = translate(locale, "This channel does not exist.") raise error_message end + initdata = extract_initial_data(result.body) + if initdata.empty? + error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip + error_message ||= translate(locale, "Could not get channel info.") + raise error_message + end + author = about.xpath_node(%q(//meta[@name="title"])).not_nil!["content"] author_url = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"] author_thumbnail = about.xpath_node(%q(//link[@rel="image_src"])).not_nil!["href"] diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 7ff68b323..5e4cd4ef1 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -598,7 +598,7 @@ def create_notification_stream(env, topics, connection_channel) end def extract_initial_data(body) : Hash(String, JSON::Any) - return JSON.parse(body.match(/(window\["ytInitialData"\]|var\s*ytInitialData)\s*=\s*(?\{.*?\});/).try &.["info"] || "{}").as_h + return JSON.parse(body.match(/(window\["ytInitialData"\]|var\s*ytInitialData)\s*=\s*(?\{.*?\});/m).try &.["info"] || "{}").as_h end def proxy_file(response, env) diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 8e314fe0d..200484606 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -839,8 +839,7 @@ def extract_polymer_config(body) params[f] = player_response[f] if player_response[f]? end - yt_initial_data = body.match(/(window\["ytInitialData"\]|var\s+ytInitialData)\s*=\s*(?.*?);\s*\n/) - .try { |r| JSON.parse(r["info"]).as_h } + yt_initial_data = extract_initial_data(body) params["relatedVideos"] = yt_initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]? .try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r| From b41ca72d2b85c8d6ee5186873bf4be4c331e3798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Gaillard?= Date: Thu, 26 Nov 2020 17:52:16 +0100 Subject: [PATCH 3/3] revert: remove 'JSON.parse("' --- src/invidious/helpers/helpers.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 5e4cd4ef1..7a0cb3d3b 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -598,7 +598,7 @@ def create_notification_stream(env, topics, connection_channel) end def extract_initial_data(body) : Hash(String, JSON::Any) - return JSON.parse(body.match(/(window\["ytInitialData"\]|var\s*ytInitialData)\s*=\s*(?\{.*?\});/m).try &.["info"] || "{}").as_h + return JSON.parse(body.match(/(window\["ytInitialData"\]|var\s*ytInitialData)\s*=\s*(JSON\.parse\(")?(?\{.*?\})("\))?;/m).try &.["info"] || "{}").as_h end def proxy_file(response, env)