Require word boundaries before search query filters (CVE-2024-39317)

Requiring a word boundary before the start of a filter prevents backtracking, as substrings of the filter name are no longer valid filter names.

This also makes matching around an order of magnitude faster.
stable/5.2.x
Jake Howard 2024-06-27 16:36:36 +01:00 zatwierdzone przez Matt Westcott
rodzic d398c3f382
commit 31b1e8532d
2 zmienionych plików z 29 dodań i 11 usunięć
wagtail/search

Wyświetl plik

@ -258,6 +258,30 @@ class TestParseQueryString(SimpleTestCase):
self.assertDictEqual(filters.dict(), {"author": "foo bar", "bar": "beer"})
self.assertEqual(repr(query), repr(Phrase("hello world")))
def test_long_queries(self):
filters, query = parse_query_string("0" * 60_000)
self.assertEqual(filters.dict(), {})
self.assertEqual(repr(query), repr(PlainText("0" * 60_000)))
filters, _ = parse_query_string(f'{"a" * 60_000}:"foo bar"')
self.assertEqual(filters.dict(), {"a" * 60_000: "foo bar"})
def test_long_filter_value(self):
filters, _ = parse_query_string(f'foo:ba{"r" * 60_000}')
self.assertEqual(filters.dict(), {"foo": f"ba{"r" * 60_000}"})
def test_joined_filters(self):
filters, query = parse_query_string("foo:bar:baz")
self.assertEqual(filters.dict(), {"foo": "bar"})
self.assertEqual(repr(query), repr(PlainText(":baz")))
filters, query = parse_query_string("foo:'bar':baz")
self.assertEqual(filters.dict(), {"foo": "bar"})
self.assertEqual(repr(query), repr(PlainText(":baz")))
filters, query = parse_query_string("foo:'bar:baz'")
self.assertEqual(filters.dict(), {"foo": "bar:baz"})
def test_multiple_phrases(self):
filters, query = parse_query_string('"hello world" "hi earth"')

Wyświetl plik

@ -69,6 +69,8 @@ MUL = partial(balanced_reduce, operator.mul)
MAX_QUERY_STRING_LENGTH = 255
filters_regexp = re.compile(r'\b(\w+):(\w+|"[^"]+"|\'[^\']+\')')
def normalise_query_string(query_string):
# Truncate query string
@ -83,20 +85,12 @@ def normalise_query_string(query_string):
def separate_filters_from_query(query_string):
filters_regexp = r'(\w+):(\w+|"[^"]+"|\'[^\']+\')'
filters = QueryDict(mutable=True)
for match_object in re.finditer(filters_regexp, query_string):
for match_object in filters_regexp.finditer(query_string):
key, value = match_object.groups()
filters.update(
{
key: value.strip('"')
if value.strip('"') is not value
else value.strip("'")
}
)
filters.update({key: value.strip("\"'")})
query_string = re.sub(filters_regexp, "", query_string).strip()
query_string = filters_regexp.sub("", query_string).strip()
return filters, query_string