diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 44c87d355b..2cb372e4f8 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -8,6 +8,7 @@ Changelog * Implement `normalize` on `TypedTableBlock` to assist with setting `default` and `preview_value` (Sage Abdullah) * Apply normalization when modifying a `StreamBlock`'s value to assist with programmatic changes to `StreamField` (Matt Westcott) * Allow a custom image rendition model to define its unique constraint with `models.UniqueConstraint` instead of `unique_together` (Oliver Parker, Cynthia Kiser, Sage Abdullah) + * Default to the `standard` tokenizer on Elasticsearch, to correctly handle numbers as tokens (Matt Westcott) * Fix: Take preferred language into account for translatable strings in client-side code (Bernhard Bliem, Sage Abdullah) * Docs: Add missing `django.contrib.admin` to list of apps in "add to Django project" guide (Mohamed Rabiaa) diff --git a/docs/releases/6.5.md b/docs/releases/6.5.md index b44cc64932..cdf6cd0950 100644 --- a/docs/releases/6.5.md +++ b/docs/releases/6.5.md @@ -17,6 +17,7 @@ depth: 1 * Implement `normalize` on `TypedTableBlock` to assist with setting `default` and `preview_value` (Sage Abdullah) * Apply normalization when modifying a `StreamBlock`'s value to assist with programmatic changes to `StreamField` (Matt Westcott) * Allow a custom image rendition model to define its unique constraint with `models.UniqueConstraint` instead of `unique_together` (Oliver Parker, Cynthia Kiser, Sage Abdullah) + * Default to the `standard` tokenizer on Elasticsearch, to correctly handle numbers as tokens (Matt Westcott) ### Bug fixes diff --git a/wagtail/search/backends/elasticsearch7.py b/wagtail/search/backends/elasticsearch7.py index 0d16e8640b..6d5411e07c 100644 --- a/wagtail/search/backends/elasticsearch7.py +++ b/wagtail/search/backends/elasticsearch7.py @@ -1141,13 +1141,13 @@ class Elasticsearch7SearchBackend(BaseSearchBackend): "analyzer": { "ngram_analyzer": { "type": "custom", - "tokenizer": "lowercase", - "filter": ["asciifolding", "ngram"], + "tokenizer": "standard", + "filter": ["asciifolding", "lowercase", "ngram"], }, "edgengram_analyzer": { "type": "custom", - "tokenizer": "lowercase", - "filter": ["asciifolding", "edgengram"], + "tokenizer": "standard", + "filter": ["asciifolding", "lowercase", "edgengram"], }, }, "tokenizer": { diff --git a/wagtail/search/tests/elasticsearch_common_tests.py b/wagtail/search/tests/elasticsearch_common_tests.py index 7abe1743c2..7b3089e791 100644 --- a/wagtail/search/tests/elasticsearch_common_tests.py +++ b/wagtail/search/tests/elasticsearch_common_tests.py @@ -114,6 +114,49 @@ class ElasticsearchCommonSearchBackendTests(BackendTests): ], ) + def test_search_with_numeric_term(self): + book = models.Book.objects.create( + title="Harry Potter and the 31337 Goblets of Fire", + publication_date=date(2009, 7, 15), + number_of_pages=607, + ) + + index = self.backend.get_index_for_model(models.Book) + index.add_item(book) + index.refresh() + + results = self.backend.search("31337", models.Book) + self.assertUnsortedListEqual( + [r.title for r in results], + [ + "Harry Potter and the 31337 Goblets of Fire", + ], + ) + + results = self.backend.autocomplete("313", models.Book) + self.assertUnsortedListEqual( + [r.title for r in results], + [ + "Harry Potter and the 31337 Goblets of Fire", + ], + ) + + results = self.backend.search("31337 goblets", models.Book) + self.assertUnsortedListEqual( + [r.title for r in results], + [ + "Harry Potter and the 31337 Goblets of Fire", + ], + ) + + results = self.backend.autocomplete("31337 gob", models.Book) + self.assertUnsortedListEqual( + [r.title for r in results], + [ + "Harry Potter and the 31337 Goblets of Fire", + ], + ) + def test_and_operator_with_single_field(self): # Testing for bug #1859 results = self.backend.search(