kopia lustrzana https://github.com/wagtail/wagtail
Use standard tokenizer for Elasticsearch to preserve numeric tokens (#12851)
Since its inception the Elasticsearch backend has defaulted to the `lowercase` tokenizer (https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-tokenizers.html#_word_oriented_tokenizers), which treats non-letter characters as separators. This means that numbers within text (surrounded by whitespace) are skipped when indexing and cannot be searched. Change to the `standard` tokenizer, but apply the `lowercase` filter to keep searches case-insensitive.pull/12784/head
rodzic
801b1bb067
commit
225d3f384c
|
@ -8,6 +8,7 @@ Changelog
|
|||
* Implement `normalize` on `TypedTableBlock` to assist with setting `default` and `preview_value` (Sage Abdullah)
|
||||
* Apply normalization when modifying a `StreamBlock`'s value to assist with programmatic changes to `StreamField` (Matt Westcott)
|
||||
* Allow a custom image rendition model to define its unique constraint with `models.UniqueConstraint` instead of `unique_together` (Oliver Parker, Cynthia Kiser, Sage Abdullah)
|
||||
* Default to the `standard` tokenizer on Elasticsearch, to correctly handle numbers as tokens (Matt Westcott)
|
||||
* Fix: Take preferred language into account for translatable strings in client-side code (Bernhard Bliem, Sage Abdullah)
|
||||
* Docs: Add missing `django.contrib.admin` to list of apps in "add to Django project" guide (Mohamed Rabiaa)
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ depth: 1
|
|||
* Implement `normalize` on `TypedTableBlock` to assist with setting `default` and `preview_value` (Sage Abdullah)
|
||||
* Apply normalization when modifying a `StreamBlock`'s value to assist with programmatic changes to `StreamField` (Matt Westcott)
|
||||
* Allow a custom image rendition model to define its unique constraint with `models.UniqueConstraint` instead of `unique_together` (Oliver Parker, Cynthia Kiser, Sage Abdullah)
|
||||
* Default to the `standard` tokenizer on Elasticsearch, to correctly handle numbers as tokens (Matt Westcott)
|
||||
|
||||
### Bug fixes
|
||||
|
||||
|
|
|
@ -1141,13 +1141,13 @@ class Elasticsearch7SearchBackend(BaseSearchBackend):
|
|||
"analyzer": {
|
||||
"ngram_analyzer": {
|
||||
"type": "custom",
|
||||
"tokenizer": "lowercase",
|
||||
"filter": ["asciifolding", "ngram"],
|
||||
"tokenizer": "standard",
|
||||
"filter": ["asciifolding", "lowercase", "ngram"],
|
||||
},
|
||||
"edgengram_analyzer": {
|
||||
"type": "custom",
|
||||
"tokenizer": "lowercase",
|
||||
"filter": ["asciifolding", "edgengram"],
|
||||
"tokenizer": "standard",
|
||||
"filter": ["asciifolding", "lowercase", "edgengram"],
|
||||
},
|
||||
},
|
||||
"tokenizer": {
|
||||
|
|
|
@ -114,6 +114,49 @@ class ElasticsearchCommonSearchBackendTests(BackendTests):
|
|||
],
|
||||
)
|
||||
|
||||
def test_search_with_numeric_term(self):
|
||||
book = models.Book.objects.create(
|
||||
title="Harry Potter and the 31337 Goblets of Fire",
|
||||
publication_date=date(2009, 7, 15),
|
||||
number_of_pages=607,
|
||||
)
|
||||
|
||||
index = self.backend.get_index_for_model(models.Book)
|
||||
index.add_item(book)
|
||||
index.refresh()
|
||||
|
||||
results = self.backend.search("31337", models.Book)
|
||||
self.assertUnsortedListEqual(
|
||||
[r.title for r in results],
|
||||
[
|
||||
"Harry Potter and the 31337 Goblets of Fire",
|
||||
],
|
||||
)
|
||||
|
||||
results = self.backend.autocomplete("313", models.Book)
|
||||
self.assertUnsortedListEqual(
|
||||
[r.title for r in results],
|
||||
[
|
||||
"Harry Potter and the 31337 Goblets of Fire",
|
||||
],
|
||||
)
|
||||
|
||||
results = self.backend.search("31337 goblets", models.Book)
|
||||
self.assertUnsortedListEqual(
|
||||
[r.title for r in results],
|
||||
[
|
||||
"Harry Potter and the 31337 Goblets of Fire",
|
||||
],
|
||||
)
|
||||
|
||||
results = self.backend.autocomplete("31337 gob", models.Book)
|
||||
self.assertUnsortedListEqual(
|
||||
[r.title for r in results],
|
||||
[
|
||||
"Harry Potter and the 31337 Goblets of Fire",
|
||||
],
|
||||
)
|
||||
|
||||
def test_and_operator_with_single_field(self):
|
||||
# Testing for bug #1859
|
||||
results = self.backend.search(
|
||||
|
|
Ładowanie…
Reference in New Issue