diff --git a/wagtail/search/backends/database/mysql/mysql.py b/wagtail/search/backends/database/mysql/mysql.py index 639b841ba4..769e5c69aa 100644 --- a/wagtail/search/backends/database/mysql/mysql.py +++ b/wagtail/search/backends/database/mysql/mysql.py @@ -1,3 +1,4 @@ +import re import warnings from collections import OrderedDict @@ -336,7 +337,10 @@ class MySQLSearchQueryCompiler(BaseSearchQueryCompiler): def build_search_query_content(self, query, invert=False): if isinstance(query, PlainText): - terms = query.query_string.split() + # For Boolean full text search queries in MySQL, + # non-alphanumeric characters act as separators + terms = [term for term in re.split(r"\W+", query.query_string) if term] + if not terms: return SearchQuery("") diff --git a/wagtail/search/tests/test_mysql_backend.py b/wagtail/search/tests/test_mysql_backend.py index 9739f231ea..0acfb60a6f 100644 --- a/wagtail/search/tests/test_mysql_backend.py +++ b/wagtail/search/tests/test_mysql_backend.py @@ -87,6 +87,62 @@ class TestMySQLSearchBackend(BackendTests, TransactionTestCase): set(), ) + def test_empty_autocomplete(self): + results = self.backend.autocomplete("", models.Book.objects.all()) + self.assertSetEqual( + {r.title for r in results}, + set(), + ) + + results = self.backend.autocomplete(" ", models.Book.objects.all()) + self.assertSetEqual( + {r.title for r in results}, + set(), + ) + + results = self.backend.autocomplete("*", models.Book.objects.all()) + self.assertSetEqual( + {r.title for r in results}, + set(), + ) + + def test_symbols_in_search_term(self): + # symbols as their own tokens should be ignored + results = self.backend.search("javascript @ parts", models.Book.objects.all()) + self.assertSetEqual( + {r.title for r in results}, + {"JavaScript: The good parts"}, + ) + + results = self.backend.search("javascript parts @", models.Book.objects.all()) + self.assertSetEqual( + {r.title for r in results}, + {"JavaScript: The good parts"}, + ) + + results = self.backend.search("@ javascript parts", models.Book.objects.all()) + self.assertSetEqual( + {r.title for r in results}, + {"JavaScript: The good parts"}, + ) + + # tokens containing both symbols and alphanumerics should not be discarded + # or treated as equivalent to the same token without symbols + results = self.backend.search("java@script parts", models.Book.objects.all()) + self.assertSetEqual( + {r.title for r in results}, + set(), + ) + + def test_autocomplete_with_symbols(self): + # the * is not part of the autocomplete mechanism, but if someone includes it + # we want it to be gracefully ignored + results = self.backend.autocomplete("parts javasc*", models.Book.objects.all()) + self.assertSetEqual( + {r.title for r in results}, + {"JavaScript: The good parts"}, + ) + @skip( "The MySQL backend doesn't support choosing individual fields for the search, only (body, title) or (autocomplete) fields may be searched." )