From a7723e6ded3238c6b0d845da85bd9ae8777ab41f Mon Sep 17 00:00:00 2001 From: afrmtbl Date: Sat, 30 Mar 2019 21:18:34 -0400 Subject: [PATCH] Implement "fields" parameter from the YouTube Data API (#429) * Implement fields handling --- src/invidious/helpers/handlers.cr | 10 ++ src/invidious/helpers/json_filter.cr | 248 +++++++++++++++++++++++++++ 2 files changed, 258 insertions(+) create mode 100644 src/invidious/helpers/json_filter.cr diff --git a/src/invidious/helpers/handlers.cr b/src/invidious/helpers/handlers.cr index 9d5ebd239..b613488a2 100644 --- a/src/invidious/helpers/handlers.cr +++ b/src/invidious/helpers/handlers.cr @@ -97,6 +97,16 @@ class APIHandler < Kemal::Handler if env.response.headers["Content-Type"]?.try &.== "application/json" response = JSON.parse(response) + if env.params.query["fields"]? + fields_text = env.params.query["fields"] + begin + JSONFilter.filter(response, fields_text) + rescue ex + env.response.status_code = 400 + response = {"error" => ex.message} + end + end + if env.params.query["pretty"]? && env.params.query["pretty"] == "1" response = response.to_pretty_json else diff --git a/src/invidious/helpers/json_filter.cr b/src/invidious/helpers/json_filter.cr new file mode 100644 index 000000000..e4b57cea1 --- /dev/null +++ b/src/invidious/helpers/json_filter.cr @@ -0,0 +1,248 @@ +module JSONFilter + alias BracketIndex = Hash(Int64, Int64) + + alias GroupedFieldsValue = String | Array(GroupedFieldsValue) + alias GroupedFieldsList = Array(GroupedFieldsValue) + + class FieldsParser + class ParseError < Exception + end + + # Returns the `Regex` pattern used to match nest groups + def self.nest_group_pattern : Regex + # uses a '.' character to match json keys as they are allowed + # to contain any unicode codepoint + /(?:|,)(?[^,\n]*?)\(/ + end + + # Returns the `Regex` pattern used to check if there are any empty nest groups + def self.unnamed_nest_group_pattern : Regex + /^\(|\(\(|\/\(/ + end + + def self.parse_fields(fields_text : String) : Nil + if fields_text.empty? + raise FieldsParser::ParseError.new "Fields is empty" + end + + opening_bracket_count = fields_text.count('(') + closing_bracket_count = fields_text.count(')') + + if opening_bracket_count != closing_bracket_count + bracket_type = opening_bracket_count > closing_bracket_count ? "opening" : "closing" + raise FieldsParser::ParseError.new "There are too many #{bracket_type} brackets (#{opening_bracket_count}:#{closing_bracket_count})" + elsif match_result = unnamed_nest_group_pattern.match(fields_text) + raise FieldsParser::ParseError.new "Unnamed nest group at position #{match_result.begin}" + end + + # first, handle top-level single nested properties: items/id, playlistItems/snippet, etc + parse_single_nests(fields_text) { |nest_list| yield nest_list } + + # next, handle nest groups: items(id, etag, etc) + parse_nest_groups(fields_text) { |nest_list| yield nest_list } + end + + def self.parse_single_nests(fields_text : String) : Nil + single_nests = remove_nest_groups(fields_text) + + if !single_nests.empty? + property_nests = single_nests.split(',') + + property_nests.each do |nest| + nest_list = nest.split('/') + if nest_list.includes? "" + raise FieldsParser::ParseError.new "Empty key in nest list: #{nest_list}" + end + yield nest_list + end + # else + # raise FieldsParser::ParseError.new "Empty key in nest list 22: #{fields_text} | #{single_nests}" + end + end + + def self.parse_nest_groups(fields_text : String) : Nil + nest_stack = [] of NamedTuple(group_name: String, closing_bracket_index: Int64) + bracket_pairs = get_bracket_pairs(fields_text, true) + + text_index = 0 + regex_index = 0 + + while regex_result = self.nest_group_pattern.match(fields_text, regex_index) + raw_match = regex_result[0] + group_name = regex_result["groupname"] + + text_index = regex_result.begin + regex_index = regex_result.end + + if text_index.nil? || regex_index.nil? + raise FieldsParser::ParseError.new "Received invalid index while parsing nest groups: text_index: #{text_index} | regex_index: #{regex_index}" + end + + offset = raw_match.starts_with?(',') ? 1 : 0 + + opening_bracket_index = (text_index + group_name.size) + offset + closing_bracket_index = bracket_pairs[opening_bracket_index] + content_start = opening_bracket_index + 1 + + content = fields_text[content_start...closing_bracket_index] + + if content.empty? + raise FieldsParser::ParseError.new "Empty nest group at position #{content_start}" + else + content = remove_nest_groups(content) + end + + while nest_stack.size > 0 && closing_bracket_index > nest_stack[nest_stack.size - 1][:closing_bracket_index] + if nest_stack.size + nest_stack.pop + end + end + + group_name.split('/').each do |group_name| + nest_stack.push({ + group_name: group_name, + closing_bracket_index: closing_bracket_index, + }) + end + + if !content.empty? + properties = content.split(',') + + properties.each do |prop| + nest_list = nest_stack.map { |nest_prop| nest_prop[:group_name] } + + if !prop.empty? + if prop.includes?('/') + parse_single_nests(prop) { |list| nest_list += list } + else + nest_list.push prop + end + else + raise FieldsParser::ParseError.new "Empty key in nest list: #{nest_list << prop}" + end + + yield nest_list + end + end + end + end + + def self.remove_nest_groups(text : String) : String + content_bracket_pairs = get_bracket_pairs(text, false) + + content_bracket_pairs.each_key.to_a.reverse.each do |opening_bracket| + closing_bracket = content_bracket_pairs[opening_bracket] + last_comma = text.rindex(',', opening_bracket) || 0 + + text = text[0...last_comma] + text[closing_bracket + 1...text.size] + end + + return text.starts_with?(',') ? text[1...text.size] : text + end + + def self.get_bracket_pairs(text : String, recursive = true) : BracketIndex + istart = [] of Int64 + bracket_index = BracketIndex.new + + text.each_char_with_index do |char, index| + if char == '(' + istart.push(index.to_i64) + end + + if char == ')' + begin + opening = istart.pop + if recursive || (!recursive && istart.size == 0) + bracket_index[opening] = index.to_i64 + end + rescue + raise FieldsParser::ParseError.new "No matching opening parenthesis at: #{index}" + end + end + end + + if istart.size != 0 + idx = istart.pop + raise FieldsParser::ParseError.new "No matching closing parenthesis at: #{idx}" + end + + return bracket_index + end + end + + class FieldsGrouper + alias SkeletonValue = Hash(String, SkeletonValue) + + def self.create_json_skeleton(fields_text : String) : SkeletonValue + root_hash = {} of String => SkeletonValue + + FieldsParser.parse_fields(fields_text) do |nest_list| + current_item = root_hash + nest_list.each do |key| + if current_item[key]? + current_item = current_item[key] + else + current_item[key] = {} of String => SkeletonValue + current_item = current_item[key] + end + end + end + root_hash + end + + def self.create_grouped_fields_list(json_skeleton : SkeletonValue) : GroupedFieldsList + grouped_fields_list = GroupedFieldsList.new + json_skeleton.each do |key, value| + grouped_fields_list.push key + + nested_keys = create_grouped_fields_list(value) + grouped_fields_list.push nested_keys unless nested_keys.empty? + end + return grouped_fields_list + end + end + + class FilterError < Exception + end + + def self.filter(item : JSON::Any, fields_text : String, in_place : Bool = true) + skeleton = FieldsGrouper.create_json_skeleton(fields_text) + grouped_fields_list = FieldsGrouper.create_grouped_fields_list(skeleton) + filter(item, grouped_fields_list, in_place) + end + + def self.filter(item : JSON::Any, grouped_fields_list : GroupedFieldsList, in_place : Bool = true) : JSON::Any + item = item.clone unless in_place + + if !item.as_h? && !item.as_a? + raise FilterError.new "Can't filter '#{item}' by #{grouped_fields_list}" + end + + top_level_keys = Array(String).new + grouped_fields_list.each do |value| + if value.is_a? String + top_level_keys.push value + elsif value.is_a? Array + if !top_level_keys.empty? + key_to_filter = top_level_keys.last + + if item.as_h? + filter(item[key_to_filter], value, in_place: true) + elsif item.as_a? + item.as_a.each { |arr_item| filter(arr_item[key_to_filter], value, in_place: true) } + end + else + raise FilterError.new "Tried to filter while top level keys list is empty" + end + end + end + + if item.as_h? + item.as_h.select! top_level_keys + elsif item.as_a? + item.as_a.map { |value| filter(value, top_level_keys, in_place: true) } + end + + item + end +end