Added AutocompleteField & autocomplete().

Squashes these commits:

Added AutocompleteField

Implemented AutocompleteField for Elasticsearch

Add autocomplete() method to search backends

Implement autocomplete API for Elasticsearch

Allow partial match to be disabled on search()

Refactor search/autocomplete methods on base search backend
pull/4686/head
Karl Hobley 2018-06-01 14:40:09 +01:00 zatwierdzone przez Bertrand Bordage
rodzic 1d517dd0d8
commit 90cc9c54ba
17 zmienionych plików z 190 dodań i 25 usunięć

Wyświetl plik

@ -1,3 +1,5 @@
import unittest
from django.test import TestCase
from wagtail.search.tests.test_backends import BackendTests
@ -35,3 +37,8 @@ class TestPostgresSearchBackend(BackendTests, TestCase):
[(6, 'A'), (4, 'B'), (2, 'C'), (0, 'D')])
self.assertListEqual(determine_boosts_weights([-2, -1, 0, 1, 2, 3, 4]),
[(4, 'A'), (2, 'B'), (0, 'C'), (-2, 'D')])
# Doesn't support autocomplete
@unittest.expectedFailure
def test_autocomplete(self):
super().test_autocomplete()

Wyświetl plik

@ -329,6 +329,7 @@ class Page(AbstractPage, index.Indexed, ClusterableModel, metaclass=PageBase):
search_fields = [
index.SearchField('title', partial_match=True, boost=2),
index.AutocompleteField('title'),
index.FilterField('title'),
index.FilterField('id'),
index.FilterField('live'),

Wyświetl plik

@ -39,9 +39,11 @@ class AbstractDocument(CollectionMember, index.Indexed, models.Model):
search_fields = CollectionMember.search_fields + [
index.SearchField('title', partial_match=True, boost=10),
index.AutocompleteField('title'),
index.FilterField('title'),
index.RelatedFields('tags', [
index.SearchField('name', partial_match=True, boost=10),
index.AutocompleteField('name'),
]),
index.FilterField('uploaded_by_user'),
]

Wyświetl plik

@ -153,9 +153,11 @@ class AbstractImage(CollectionMember, index.Indexed, models.Model):
search_fields = CollectionMember.search_fields + [
index.SearchField('title', partial_match=True, boost=10),
index.AutocompleteField('title'),
index.FilterField('title'),
index.RelatedFields('tags', [
index.SearchField('name', partial_match=True, boost=10),
index.AutocompleteField('name'),
]),
index.FilterField('uploaded_by_user'),
]

Wyświetl plik

@ -33,7 +33,7 @@ class OrderByFieldError(FieldError):
class BaseSearchQueryCompiler:
DEFAULT_OPERATOR = 'or'
def __init__(self, queryset, query, fields=None, operator=None, order_by_relevance=True):
def __init__(self, queryset, query, fields=None, operator=None, order_by_relevance=True, partial_match=True):
self.queryset = queryset
if query is None:
warn('Querying `None` is deprecated, use `MATCH_ALL` instead.',
@ -45,6 +45,7 @@ class BaseSearchQueryCompiler:
self.query = query
self.fields = fields
self.order_by_relevance = order_by_relevance
self.partial_match = partial_match
def _get_filterable_field(self, field_attname):
# Get field
@ -273,6 +274,7 @@ class EmptySearchResults(BaseSearchResults):
class BaseSearchBackend:
query_compiler_class = None
autocomplete_query_compiler_class = None
results_class = None
rebuilder_class = None
@ -303,7 +305,7 @@ class BaseSearchBackend:
def delete(self, obj):
raise NotImplementedError
def search(self, query, model_or_queryset, fields=None, operator=None, order_by_relevance=True):
def _search(self, query_compiler_class, query, model_or_queryset, **kwargs):
# Find model/queryset
if isinstance(model_or_queryset, QuerySet):
model = model_or_queryset.model
@ -321,11 +323,36 @@ class BaseSearchBackend:
return EmptySearchResults()
# Search
search_query = self.query_compiler_class(
queryset, query, fields=fields, operator=operator, order_by_relevance=order_by_relevance
query_compiler_class = query_compiler_class
search_query = query_compiler_class(
queryset, query, **kwargs
)
# Check the query
search_query.check()
return self.results_class(self, search_query)
def search(self, query, model_or_queryset, fields=None, operator=None, order_by_relevance=True, partial_match=True):
return self._search(
self.query_compiler_class,
query,
model_or_queryset,
fields=fields,
operator=operator,
order_by_relevance=order_by_relevance,
partial_match=partial_match,
)
def autocomplete(self, query, model_or_queryset, fields=None, operator=None, order_by_relevance=True):
if self.autocomplete_query_compiler_class is None:
raise NotImplementedError("This search backend does not support the autocomplete API")
return self._search(
self.autocomplete_query_compiler_class,
query,
model_or_queryset,
fields=fields,
operator=operator,
order_by_relevance=order_by_relevance,
)

Wyświetl plik

@ -13,7 +13,8 @@ from elasticsearch.helpers import bulk
from wagtail.search.backends.base import (
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
from wagtail.search.index import FilterField, Indexed, RelatedFields, SearchField, class_is_indexed
from wagtail.search.index import (
AutocompleteField, FilterField, Indexed, RelatedFields, SearchField, class_is_indexed)
from wagtail.search.query import (
And, Boost, Filter, Fuzzy, MatchAll, Not, Or, PlainText, Prefix, Term)
from wagtail.utils.deprecation import RemovedInWagtail22Warning
@ -111,6 +112,8 @@ class Elasticsearch2Mapping:
if isinstance(field, FilterField):
return prefix + field.get_attname(self.model) + '_filter'
elif isinstance(field, AutocompleteField):
return prefix + field.get_attname(self.model) + '_edgengrams'
elif isinstance(field, SearchField):
return prefix + field.get_attname(self.model)
elif isinstance(field, RelatedFields):
@ -171,6 +174,11 @@ class Elasticsearch2Mapping:
mapping['include_in_all'] = True
if isinstance(field, AutocompleteField):
mapping['type'] = self.text_type
mapping['include_in_all'] = False
mapping.update(self.edgengram_analyzer_config)
elif isinstance(field, FilterField):
if mapping['type'] == 'string':
mapping['type'] = self.keyword_type
@ -218,7 +226,7 @@ class Elasticsearch2Mapping:
def _get_nested_document(self, fields, obj):
doc = {}
partials = []
edgengrams = []
model = type(obj)
mapping = type(self)(model)
@ -227,15 +235,15 @@ class Elasticsearch2Mapping:
doc[mapping.get_field_column_name(field)] = value
# Check if this field should be added into _edgengrams
if isinstance(field, SearchField) and field.partial_match:
partials.append(value)
if (isinstance(field, SearchField) and field.partial_match) or isinstance(field, AutocompleteField):
edgengrams.append(value)
return doc, partials
return doc, edgengrams
def get_document(self, obj):
# Build document
doc = dict(pk=str(obj.pk), content_type=self.get_all_content_types())
partials = []
edgengrams = []
for field in self.model.get_search_fields():
value = field.get_value(obj)
@ -246,12 +254,12 @@ class Elasticsearch2Mapping:
for nested_obj in value.all():
nested_doc, extra_edgengrams = self._get_nested_document(field.fields, nested_obj)
nested_docs.append(nested_doc)
partials.extend(extra_edgengrams)
edgengrams.extend(extra_edgengrams)
value = nested_docs
elif isinstance(value, models.Model):
value, extra_edgengrams = self._get_nested_document(field.fields, value)
partials.extend(extra_edgengrams)
edgengrams.extend(extra_edgengrams)
elif isinstance(field, FilterField):
if isinstance(value, (models.Manager, models.QuerySet)):
value = list(value.values_list('pk', flat=True))
@ -261,11 +269,11 @@ class Elasticsearch2Mapping:
doc[self.get_field_column_name(field)] = value
# Check if this field should be added into _edgengrams
if isinstance(field, SearchField) and field.partial_match:
partials.append(value)
if (isinstance(field, SearchField) and field.partial_match) or isinstance(field, AutocompleteField):
edgengrams.append(value)
# Add partials to document
doc[self.edgengrams_field_name] = partials
doc[self.edgengrams_field_name] = edgengrams
return doc
@ -496,7 +504,12 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler):
% query.__class__.__name__)
def get_inner_query(self):
fields = self.remapped_fields or [self.mapping.all_field_name, self.mapping.edgengrams_field_name]
if self.remapped_fields:
fields = self.remapped_fields
elif self.partial_match:
fields = [self.mapping.all_field_name, self.mapping.edgengrams_field_name]
else:
fields = [self.mapping.all_field_name]
if len(fields) == 0:
# No fields. Return a query that'll match nothing
@ -605,6 +618,43 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler):
return json.dumps(self.get_query())
class ElasticsearchAutocompleteQueryCompilerImpl:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Convert field names into index column names
# Note: this overrides Elasticsearch2SearchQueryCompiler by using autocomplete fields instead of searchbale fields
if self.fields:
fields = []
autocomplete_fields = {f.field_name: f for f in self.queryset.model.get_autocomplete_search_fields()}
for field_name in self.fields:
if field_name in autocomplete_fields:
field_name = self.mapping.get_field_column_name(autocomplete_fields[field_name])
fields.append(field_name)
self.remapped_fields = fields
else:
self.remapped_fields = None
def get_inner_query(self):
fields = self.remapped_fields or [self.mapping.edgengrams_field_name]
if len(fields) == 0:
# No fields. Return a query that'll match nothing
return {
'bool': {
'mustNot': {'match_all': {}}
}
}
return self._compile_plaintext_query(self.query, fields)
class Elasticsearch2AutocompleteQueryCompiler(Elasticsearch2SearchQueryCompiler, ElasticsearchAutocompleteQueryCompilerImpl):
pass
class Elasticsearch2SearchResults(BaseSearchResults):
fields_param_name = 'fields'
supports_facet = True
@ -968,6 +1018,7 @@ class ElasticsearchAtomicIndexRebuilder(ElasticsearchIndexRebuilder):
class Elasticsearch2SearchBackend(BaseSearchBackend):
index_class = Elasticsearch2Index
query_compiler_class = Elasticsearch2SearchQueryCompiler
autocomplete_query_compiler_class = Elasticsearch2AutocompleteQueryCompiler
results_class = Elasticsearch2SearchResults
mapping_class = Elasticsearch2Mapping
basic_rebuilder_class = ElasticsearchIndexRebuilder

Wyświetl plik

@ -1,6 +1,6 @@
from .elasticsearch2 import (
Elasticsearch2Index, Elasticsearch2Mapping, Elasticsearch2SearchBackend,
Elasticsearch2SearchQueryCompiler, Elasticsearch2SearchResults)
Elasticsearch2SearchQueryCompiler, Elasticsearch2SearchResults, ElasticsearchAutocompleteQueryCompilerImpl)
class Elasticsearch5Mapping(Elasticsearch2Mapping):
@ -89,6 +89,10 @@ class Elasticsearch5SearchQueryCompiler(Elasticsearch2SearchQueryCompiler):
return inner_query
class Elasticsearch5AutocompleteQueryCompiler(Elasticsearch5SearchQueryCompiler, ElasticsearchAutocompleteQueryCompilerImpl):
pass
class Elasticsearch5SearchResults(Elasticsearch2SearchResults):
fields_param_name = 'stored_fields'
@ -97,6 +101,7 @@ class Elasticsearch5SearchBackend(Elasticsearch2SearchBackend):
mapping_class = Elasticsearch5Mapping
index_class = Elasticsearch5Index
query_compiler_class = Elasticsearch5SearchQueryCompiler
autocomplete_query_compiler_class = Elasticsearch5AutocompleteQueryCompiler
results_class = Elasticsearch5SearchResults

Wyświetl plik

@ -1,3 +1,4 @@
from .elasticsearch2 import ElasticsearchAutocompleteQueryCompilerImpl
from .elasticsearch5 import (
Elasticsearch5Index, Elasticsearch5Mapping, Elasticsearch5SearchBackend,
Elasticsearch5SearchQueryCompiler, Elasticsearch5SearchResults)
@ -48,10 +49,15 @@ class Elasticsearch6SearchResults(Elasticsearch5SearchResults):
pass
class Elasticsearch6AutocompleteQueryCompiler(Elasticsearch5SearchQueryCompiler, ElasticsearchAutocompleteQueryCompilerImpl):
pass
class Elasticsearch6SearchBackend(Elasticsearch5SearchBackend):
mapping_class = Elasticsearch6Mapping
index_class = Elasticsearch6Index
query_compiler_class = Elasticsearch6SearchQueryCompiler
autocomplete_query_compiler_class = Elasticsearch6AutocompleteQueryCompiler
results_class = Elasticsearch6SearchResults

Wyświetl plik

@ -60,6 +60,13 @@ class Indexed:
if isinstance(field, SearchField)
]
@classmethod
def get_autocomplete_search_fields(cls):
return [
field for field in cls.get_search_fields()
if isinstance(field, AutocompleteField)
]
@classmethod
def get_filterable_search_fields(cls):
return [
@ -222,6 +229,10 @@ class SearchField(BaseField):
self.partial_match = partial_match
class AutocompleteField(BaseField):
pass
class FilterField(BaseField):
pass

Wyświetl plik

@ -3,10 +3,19 @@ from wagtail.search.backends import get_search_backend
class SearchableQuerySetMixin:
def search(self, query, fields=None,
operator=None, order_by_relevance=True, backend='default'):
operator=None, order_by_relevance=True, partial_match=True, backend='default'):
"""
This runs a search query on all the items in the QuerySet
"""
search_backend = get_search_backend(backend)
return search_backend.search(query, self, fields=fields,
operator=operator, order_by_relevance=order_by_relevance)
operator=operator, order_by_relevance=order_by_relevance, partial_match=partial_match)
def autocomplete(self, query, fields=None,
operator=None, order_by_relevance=True, backend='default'):
"""
This runs an autocomplete query on all the items in the QuerySet
"""
search_backend = get_search_backend(backend)
return search_backend.autocomplete(query, self, fields=fields,
operator=operator, order_by_relevance=order_by_relevance)

Wyświetl plik

@ -36,6 +36,21 @@ class ElasticsearchCommonSearchBackendTests(BackendTests):
"JavaScript: The good parts"
])
def test_disabled_partial_search(self):
results = self.backend.search("Java", models.Book, partial_match=False)
self.assertUnsortedListEqual([r.title for r in results], [])
def test_disabled_partial_search_with_whole_term(self):
# Making sure that there isn't a different reason why the above test
# returned no results
results = self.backend.search("JavaScript", models.Book, partial_match=False)
self.assertUnsortedListEqual([r.title for r in results], [
"JavaScript: The Definitive Guide",
"JavaScript: The good parts"
])
def test_child_partial_search(self):
# Note: Expands to "Westeros". Which is in a field on Novel.setting
results = self.backend.search("Wes", models.Book)

Wyświetl plik

@ -172,6 +172,17 @@ class BackendTests(WagtailTestUtils):
"Two Scoops of Django 1.11"
])
# AUTOCOMPLETE TESTS
def test_autocomplete(self):
# This one shouldn't match "Django Two scoops" as "get_programming_language_display"
# isn't an autocomplete field
results = self.backend.autocomplete("Py", models.Book)
self.assertUnsortedListEqual([r.title for r in results], [
"Learning Python",
])
# FILTERING TESTS
def test_filter_exact_value(self):

Wyświetl plik

@ -8,6 +8,11 @@ from .test_backends import BackendTests
class TestDBBackend(BackendTests, TestCase):
backend_path = 'wagtail.search.backends.db'
# Doesn't support autocomplete
@unittest.expectedFailure
def test_autocomplete(self):
super().test_autocomplete()
# Doesn't support ranking
@unittest.expectedFailure
def test_ranking(self):

Wyświetl plik

@ -522,6 +522,7 @@ class TestElasticsearch2Mapping(TestCase):
'content_type': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
'_partials': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'include_in_all': False, 'type': 'string'},
'title': {'type': 'string', 'boost': 2.0, 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_edgengrams': {'type': 'string', 'include_in_all': False, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
'authors': {
'type': 'nested',
@ -561,8 +562,9 @@ class TestElasticsearch2Mapping(TestCase):
expected_result = {
'pk': '4',
'content_type': ["searchtests.Book"],
'_partials': ['The Fellowship of the Ring'],
'_partials': ['The Fellowship of the Ring', 'The Fellowship of the Ring'],
'title': 'The Fellowship of the Ring',
'title_edgengrams': 'The Fellowship of the Ring',
'title_filter': 'The Fellowship of the Ring',
'authors': [
{
@ -625,6 +627,7 @@ class TestElasticsearch2MappingInheritance(TestCase):
'content_type': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
'_partials': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'include_in_all': False, 'type': 'string'},
'title': {'type': 'string', 'boost': 2.0, 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_edgengrams': {'type': 'string', 'include_in_all': False, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
'authors': {
'type': 'nested',
@ -688,11 +691,12 @@ class TestElasticsearch2MappingInheritance(TestCase):
# Changed
'content_type': ["searchtests.Novel", "searchtests.Book"],
'_partials': ['Middle Earth', 'The Fellowship of the Ring'],
'_partials': ['Middle Earth', 'The Fellowship of the Ring', 'The Fellowship of the Ring'],
# Inherited
'pk': '4',
'title': 'The Fellowship of the Ring',
'title_edgengrams': 'The Fellowship of the Ring',
'title_filter': 'The Fellowship of the Ring',
'authors': [
{

Wyświetl plik

@ -523,6 +523,7 @@ class TestElasticsearch5Mapping(TestCase):
'content_type': {'type': 'keyword', 'include_in_all': False},
'_partials': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'include_in_all': False, 'type': 'text'},
'title': {'type': 'text', 'boost': 2.0, 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_edgengrams': {'type': 'text', 'include_in_all': False, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_filter': {'type': 'keyword', 'include_in_all': False},
'authors': {
'type': 'nested',
@ -562,8 +563,9 @@ class TestElasticsearch5Mapping(TestCase):
expected_result = {
'pk': '4',
'content_type': ["searchtests.Book"],
'_partials': ['The Fellowship of the Ring'],
'_partials': ['The Fellowship of the Ring', 'The Fellowship of the Ring'],
'title': 'The Fellowship of the Ring',
'title_edgengrams': 'The Fellowship of the Ring',
'title_filter': 'The Fellowship of the Ring',
'authors': [
{
@ -626,6 +628,7 @@ class TestElasticsearch5MappingInheritance(TestCase):
'content_type': {'type': 'keyword', 'include_in_all': False},
'_partials': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'include_in_all': False, 'type': 'text'},
'title': {'type': 'text', 'boost': 2.0, 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_edgengrams': {'type': 'text', 'include_in_all': False, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_filter': {'type': 'keyword', 'include_in_all': False},
'authors': {
'type': 'nested',
@ -689,11 +692,12 @@ class TestElasticsearch5MappingInheritance(TestCase):
# Changed
'content_type': ["searchtests.Novel", "searchtests.Book"],
'_partials': ['Middle Earth', 'The Fellowship of the Ring'],
'_partials': ['Middle Earth', 'The Fellowship of the Ring', 'The Fellowship of the Ring'],
# Inherited
'pk': '4',
'title': 'The Fellowship of the Ring',
'title_edgengrams': 'The Fellowship of the Ring',
'title_filter': 'The Fellowship of the Ring',
'authors': [
{

Wyświetl plik

@ -524,6 +524,7 @@ class TestElasticsearch6Mapping(TestCase):
'_all_text': {'type': 'text'},
'_edgengrams': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'type': 'text'},
'title': {'type': 'text', 'boost': 2.0, 'copy_to': '_all_text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_edgengrams': {'type': 'text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_filter': {'type': 'keyword'},
'authors': {
'type': 'nested',
@ -563,8 +564,9 @@ class TestElasticsearch6Mapping(TestCase):
expected_result = {
'pk': '4',
'content_type': ["searchtests.Book"],
'_edgengrams': ['The Fellowship of the Ring'],
'_edgengrams': ['The Fellowship of the Ring', 'The Fellowship of the Ring'],
'title': 'The Fellowship of the Ring',
'title_edgengrams': 'The Fellowship of the Ring',
'title_filter': 'The Fellowship of the Ring',
'authors': [
{
@ -628,6 +630,7 @@ class TestElasticsearch6MappingInheritance(TestCase):
'_all_text': {'type': 'text'},
'_edgengrams': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'type': 'text'},
'title': {'type': 'text', 'boost': 2.0, 'copy_to': '_all_text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_edgengrams': {'type': 'text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'},
'title_filter': {'type': 'keyword'},
'authors': {
'type': 'nested',
@ -691,11 +694,12 @@ class TestElasticsearch6MappingInheritance(TestCase):
# Changed
'content_type': ["searchtests.Novel", "searchtests.Book"],
'_edgengrams': ['Middle Earth', 'The Fellowship of the Ring'],
'_edgengrams': ['Middle Earth', 'The Fellowship of the Ring', 'The Fellowship of the Ring'],
# Inherited
'pk': '4',
'title': 'The Fellowship of the Ring',
'title_edgengrams': 'The Fellowship of the Ring',
'title_filter': 'The Fellowship of the Ring',
'authors': [
{

Wyświetl plik

@ -26,6 +26,7 @@ class Book(index.Indexed, models.Model):
search_fields = [
index.SearchField('title', partial_match=True, boost=2.0),
index.AutocompleteField('title'),
index.FilterField('title'),
index.RelatedFields('authors', Author.search_fields),
index.FilterField('publication_date'),