Prevent syntax error on MySQL search when query includes symbols

Fixes #8614 and #12811

We match MySQL's behaviour of treating non-alphanumeric characters as token separators, equivalent to spaces. This way, the Lexeme instances consist of alphanumerics only, avoiding escaping issues that lead to these syntax errors.

(note: the \w class in regexp also counts underscores as "alphanumeric", but this is also consistent with the MySQL behaviour and presents no issues with escaping.)
pull/12838/head
Matt Westcott 2025-01-28 17:11:43 +00:00
rodzic 88cbfb2875
commit 7722ffed1e
2 zmienionych plików z 61 dodań i 1 usunięć

Wyświetl plik

@ -1,3 +1,4 @@
import re
import warnings
from collections import OrderedDict
@ -344,7 +345,10 @@ class MySQLSearchQueryCompiler(BaseSearchQueryCompiler):
def build_search_query_content(self, query, invert=False):
if isinstance(query, PlainText):
terms = query.query_string.split()
# For Boolean full text search queries in MySQL,
# non-alphanumeric characters act as separators
terms = [term for term in re.split(r"\W+", query.query_string) if term]
if not terms:
return SearchQuery("")

Wyświetl plik

@ -87,6 +87,62 @@ class TestMySQLSearchBackend(BackendTests, TransactionTestCase):
set(),
)
def test_empty_autocomplete(self):
results = self.backend.autocomplete("", models.Book.objects.all())
self.assertSetEqual(
{r.title for r in results},
set(),
)
results = self.backend.autocomplete(" ", models.Book.objects.all())
self.assertSetEqual(
{r.title for r in results},
set(),
)
results = self.backend.autocomplete("*", models.Book.objects.all())
self.assertSetEqual(
{r.title for r in results},
set(),
)
def test_symbols_in_search_term(self):
# symbols as their own tokens should be ignored
results = self.backend.search("javascript @ parts", models.Book.objects.all())
self.assertSetEqual(
{r.title for r in results},
{"JavaScript: The good parts"},
)
results = self.backend.search("javascript parts @", models.Book.objects.all())
self.assertSetEqual(
{r.title for r in results},
{"JavaScript: The good parts"},
)
results = self.backend.search("@ javascript parts", models.Book.objects.all())
self.assertSetEqual(
{r.title for r in results},
{"JavaScript: The good parts"},
)
# tokens containing both symbols and alphanumerics should not be discarded
# or treated as equivalent to the same token without symbols
results = self.backend.search("java@script parts", models.Book.objects.all())
self.assertSetEqual(
{r.title for r in results},
set(),
)
def test_autocomplete_with_symbols(self):
# the * is not part of the autocomplete mechanism, but if someone includes it
# we want it to be gracefully ignored
results = self.backend.autocomplete("parts javasc*", models.Book.objects.all())
self.assertSetEqual(
{r.title for r in results},
{"JavaScript: The good parts"},
)
@skip(
"The MySQL backend doesn't support choosing individual fields for the search, only (body, title) or (autocomplete) fields may be searched."
)