From 08ced063ff09d102922deaf279275e4c0fb5ff4d Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Sun, 22 Jun 2014 18:56:08 +0100 Subject: [PATCH 1/9] Moved search method into base search backend --- wagtail/wagtailcore/models.py | 2 +- wagtail/wagtailsearch/backends/base.py | 38 ++++++++++++++++++- wagtail/wagtailsearch/backends/db.py | 34 +++++------------ .../wagtailsearch/backends/elasticsearch.py | 34 +---------------- 4 files changed, 48 insertions(+), 60 deletions(-) diff --git a/wagtail/wagtailcore/models.py b/wagtail/wagtailcore/models.py index 4964966a3b..b6a6645487 100644 --- a/wagtail/wagtailcore/models.py +++ b/wagtail/wagtailcore/models.py @@ -514,7 +514,7 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, indexed.Index # Search s = get_search_backend() - return s.search(query_string, model=cls, fields=fields, filters=filters, prefetch_related=prefetch_related) + return s.search(query_string, cls, fields=fields, filters=filters, prefetch_related=prefetch_related) @classmethod def clean_subpage_types(cls): diff --git a/wagtail/wagtailsearch/backends/base.py b/wagtail/wagtailsearch/backends/base.py index 393ecae1be..82e2e8d563 100644 --- a/wagtail/wagtailsearch/backends/base.py +++ b/wagtail/wagtailsearch/backends/base.py @@ -1,6 +1,9 @@ from django.db import models +from django.db.models.query import QuerySet +from django.core.exceptions import ImproperlyConfigured from wagtail.wagtailsearch.indexed import Indexed +from wagtail.wagtailsearch.utils import normalise_query_string class BaseSearch(object): @@ -32,5 +35,38 @@ class BaseSearch(object): def delete(self, obj): return NotImplemented - def search(self, query_string, model, fields=None, filters={}, prefetch_related=[]): + def _search(self, queryset, query_string, fields=None): return NotImplemented + + def search(self, query_string, model_or_queryset, fields=None, filters=None, prefetch_related=None): + # Find model/queryset + if isinstance(model_or_queryset, QuerySet): + model = model_or_queryset.model + queryset = model_or_queryset + else: + model = model_or_queryset + queryset = model_or_queryset.objects.all() + + # Model must be a descendant of Indexed and be a django model + if not issubclass(model, Indexed) or not issubclass(model, models.Model): + return [] + + # Normalise query string + if query_string is not None: + query_string = normalise_query_string(query_string) + + # Check that theres still a query string after the clean up + if query_string == "": + return [] + + # Apply filters to queryset + if filters: + queryset = queryset.filter(**filters) + + # Prefetch related + if prefetch_related: + for prefetch in prefetch_related: + queryset = queryset.prefetch_related(prefetch) + + # Search + return self._search(queryset, query_string, fields=fields) diff --git a/wagtail/wagtailsearch/backends/db.py b/wagtail/wagtailsearch/backends/db.py index 56536e3430..a94fe3c584 100644 --- a/wagtail/wagtailsearch/backends/db.py +++ b/wagtail/wagtailsearch/backends/db.py @@ -2,7 +2,6 @@ from django.db import models from wagtail.wagtailsearch.backends.base import BaseSearch from wagtail.wagtailsearch.indexed import Indexed -from wagtail.wagtailsearch.utils import normalise_query_string class DBSearch(BaseSearch): @@ -27,26 +26,16 @@ class DBSearch(BaseSearch): def delete(self, obj): pass # Not needed - def search(self, query_string, model, fields=None, filters=None, prefetch_related=None): - # Get fields - if fields is None: - fields = [field.field_name for field in model.get_searchable_search_fields()] - - # Start with all objects - query = model.objects.all() - - # Apply filters - if filters: - query = query.filter(**filters) - + def _search(self, queryset, query_string, fields=None): if query_string is not None: - # Normalise query string - query_string = normalise_query_string(query_string) + # Get fields + if fields is None: + fields = [field.field_name for field in queryset.model.get_searchable_search_fields()] # Get terms terms = query_string.split() if not terms: - return model.objects.none() + return queryset.model.objects.none() # Filter by terms for term in terms: @@ -54,21 +43,16 @@ class DBSearch(BaseSearch): for field_name in fields: # Check if the field exists (this will filter out indexed callables) try: - model._meta.get_field_by_name(field_name) + queryset.model._meta.get_field_by_name(field_name) except: continue # Filter on this field term_query |= models.Q(**{'%s__icontains' % field_name: term}) - query = query.filter(term_query) + queryset = queryset.filter(term_query) # Distinct - query = query.distinct() + queryset = queryset.distinct() - # Prefetch related - if prefetch_related: - for prefetch in prefetch_related: - query = query.prefetch_related(prefetch) - - return query + return queryset diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index ea0413eeda..8f17ba6ce1 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -3,14 +3,12 @@ from __future__ import absolute_import import json from django.db import models -from django.db.models.query import QuerySet from elasticsearch import Elasticsearch, NotFoundError, RequestError from elasticsearch.helpers import bulk from wagtail.wagtailsearch.backends.base import BaseSearch from wagtail.wagtailsearch.indexed import Indexed, SearchField, FilterField -from wagtail.wagtailsearch.utils import normalise_query_string class ElasticSearchMapping(object): @@ -576,35 +574,5 @@ class ElasticSearch(BaseSearch): except NotFoundError: pass # Document doesn't exist, ignore this exception - def search(self, query_string, model_or_queryset, fields=None, filters=None, prefetch_related=None): - # Find model/queryset - if isinstance(model_or_queryset, QuerySet): - model = model_or_queryset.model - queryset = model_or_queryset - else: - model = model_or_queryset - queryset = model_or_queryset.objects.all() - - # Model must be a descendant of Indexed and be a django model - if not issubclass(model, Indexed) or not issubclass(model, models.Model): - return [] - - # Normalise query string - if query_string is not None: - query_string = normalise_query_string(query_string) - - # Check that theres still a query string after the clean up - if query_string == "": - return [] - - # Apply filters to queryset - if filters: - queryset = queryset.filter(**filters) - - # Prefetch related - if prefetch_related: - for prefetch in prefetch_related: - queryset = queryset.prefetch_related(prefetch) - - # Return search results + def _search(self, queryset, query_string, fields=None): return ElasticSearchResults(self, ElasticSearchQuery(queryset, query_string, fields=fields)) From e35e20dd10e2be669addbde992b24b41d78d9f5a Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Sun, 22 Jun 2014 19:14:46 +0100 Subject: [PATCH 2/9] Added search method to PageQuerySet --- wagtail/wagtailcore/models.py | 3 +++ wagtail/wagtailcore/query.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/wagtail/wagtailcore/models.py b/wagtail/wagtailcore/models.py index b6a6645487..daee31fb3a 100644 --- a/wagtail/wagtailcore/models.py +++ b/wagtail/wagtailcore/models.py @@ -228,6 +228,9 @@ class PageManager(models.Manager): def not_type(self, model): return self.get_queryset().not_type(model) + def search(self, query_string, fields=None, backend='default'): + return self.get_queryset().search(query_string, fields=fields, backend=backend) + class PageBase(models.base.ModelBase): """Metaclass for Page""" diff --git a/wagtail/wagtailcore/query.py b/wagtail/wagtailcore/query.py index 57e8ffff33..422e46f47a 100644 --- a/wagtail/wagtailcore/query.py +++ b/wagtail/wagtailcore/query.py @@ -2,6 +2,8 @@ from django.db.models import Q from django.contrib.contenttypes.models import ContentType from treebeard.mp_tree import MP_NodeQuerySet +from wagtail.wagtailsearch.backends import get_search_backend + class PageQuerySet(MP_NodeQuerySet): """ @@ -107,3 +109,7 @@ class PageQuerySet(MP_NodeQuerySet): def not_type(self, model): return self.exclude(self.type_q(model)) + + def search(self, query_string, fields=None, backend='default'): + search_backend = get_search_backend(backend) + return search_backend.search(query_string, self, fields=None) From b78b6486829f748daf42d6240426abb44d8329a5 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Sun, 22 Jun 2014 21:07:09 +0100 Subject: [PATCH 3/9] Implemented __in lookup in ElasticSearch backend Also added an error message if a user attempts to use a subquery with the __in lookup. --- wagtail/wagtailsearch/backends/elasticsearch.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 8f17ba6ce1..289b0f4b20 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -3,6 +3,7 @@ from __future__ import absolute_import import json from django.db import models +from django.db.models.sql.where import SubqueryConstraint from elasticsearch import Elasticsearch, NotFoundError, RequestError from elasticsearch.helpers import bulk @@ -201,7 +202,16 @@ class ElasticSearchQuery(object): } } - raise FilterError('Could not apply filter on ElasticSearch results "' + field_name + '__' + lookup + ' = ' + unicode(value) + '". Lookup "' + lookup + '"" not recognosed.') + if lookup == 'in': + return { + 'terms': { + field_index_name: value, + } + } + + raise FilterError('Could not apply filter on ElasticSearch results: "' + field_name + '__' + lookup + ' = ' + unicode(value) + '". Lookup "' + lookup + '"" not recognosed.') + elif isinstance(where_node, SubqueryConstraint): + raise FilterError('Could not apply filter on ElasticSearch results: Subqueries are not allowed.') # Get child filters connector = where_node.connector From 9e1bd2d601ab540eb0d2d383ddeec369ef7c94c1 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Tue, 24 Jun 2014 14:27:40 +0100 Subject: [PATCH 4/9] Added get_indexed_objects method to Indexed class This gives developers control over the QuerySet used when the model is added to the indexed --- wagtail/wagtailsearch/indexed.py | 4 ++++ wagtail/wagtailsearch/management/commands/update_index.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/wagtail/wagtailsearch/indexed.py b/wagtail/wagtailsearch/indexed.py index a2cb54d14f..2e506d6900 100644 --- a/wagtail/wagtailsearch/indexed.py +++ b/wagtail/wagtailsearch/indexed.py @@ -118,6 +118,10 @@ class Indexed(object): def get_filterable_search_fields(cls): return filter(lambda field: isinstance(field, FilterField), cls.get_search_fields()) + @classmethod + def get_indexed_objects(cls): + return cls.objects.all() + indexed_fields = () diff --git a/wagtail/wagtailsearch/management/commands/update_index.py b/wagtail/wagtailsearch/management/commands/update_index.py index 2c15936351..c52b756398 100644 --- a/wagtail/wagtailsearch/management/commands/update_index.py +++ b/wagtail/wagtailsearch/management/commands/update_index.py @@ -24,7 +24,7 @@ class Command(BaseCommand): toplevel_content_type = model.indexed_get_toplevel_content_type() # Loop through objects - for obj in model.objects.all(): + for obj in model.get_indexed_objects(): # Get key for this object key = toplevel_content_type + ':' + str(obj.pk) From b9d3e44ebf1b146987338c137c9bb867a81f52fd Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Tue, 24 Jun 2014 14:28:40 +0100 Subject: [PATCH 5/9] Use get_indexed_objects to speed up indexing of Images/Documents Previously, this created a query for every single image and document to get the tags. This was very slow on RCA which has over 15000 images. This commit fixes this by adding a prefetch_related to the QuerySet used for indexing. --- wagtail/wagtailadmin/taggable.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/wagtail/wagtailadmin/taggable.py b/wagtail/wagtailadmin/taggable.py index 91c36ca7f8..3b7de479c4 100644 --- a/wagtail/wagtailadmin/taggable.py +++ b/wagtail/wagtailadmin/taggable.py @@ -21,7 +21,11 @@ class TagSearchable(indexed.Indexed): @property def get_tags(self): - return ' '.join([tag.name for tag in self.tags.all()]) + return ' '.join([tag.name for tag in self.prefetched_tags()]) + + @classmethod + def get_indexed_objects(cls): + return super(TagSearchable, cls).get_indexed_objects().prefetch_related('tagged_items__tag') @classmethod def search(cls, q, results_per_page=None, page=1, prefetch_tags=False, filters={}): From 532bb6241e7cbced94fb044306e3eba7a447be82 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Thu, 3 Jul 2014 14:26:05 +0100 Subject: [PATCH 6/9] Added some more FilterFields to Page --- wagtail/wagtailcore/models.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wagtail/wagtailcore/models.py b/wagtail/wagtailcore/models.py index daee31fb3a..75021d5846 100644 --- a/wagtail/wagtailcore/models.py +++ b/wagtail/wagtailcore/models.py @@ -285,8 +285,12 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, indexed.Index search_fields = ( indexed.SearchField('title', partial_match=True, boost=100), + indexed.FilterField('id'), indexed.FilterField('live'), + indexed.FilterField('owner'), + indexed.FilterField('content_type'), indexed.FilterField('path'), + indexed.FilterField('depth'), ) def __init__(self, *args, **kwargs): From dcca9bd515f5bfbaed35c912236573d85a41d473 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 4 Jul 2014 10:24:32 +0100 Subject: [PATCH 7/9] Use a multi_match query instead of a query_string query --- .../wagtailsearch/backends/elasticsearch.py | 4 +-- .../tests/test_elasticsearch_backend.py | 26 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 289b0f4b20..c28b676eec 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -258,14 +258,14 @@ class ElasticSearchQuery(object): # Query if self.query_string is not None: query = { - 'query_string': { + 'multi_match': { 'query': self.query_string, } } # Fields if self.fields: - query['query_string']['fields'] = self.fields + query['multi_match']['fields'] = self.fields else: query = { 'match_all': {} diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py index 0c7906b393..6d2df3b4cf 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py @@ -88,7 +88,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.all(), "Hello") # Check it - expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_none_query_string(self): @@ -104,7 +104,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title="Test"), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'term': {'title_filter': 'Test'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'term': {'title_filter': 'Test'}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_and_filter(self): @@ -112,7 +112,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title="Test", live=True), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'and': [{'term': {'live_filter': True}}, {'term': {'title_filter': 'Test'}}]}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'and': [{'term': {'live_filter': True}}, {'term': {'title_filter': 'Test'}}]}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} # Make sure field filters are sorted (as they can be in any order which may cause false positives) query = query.to_es() @@ -131,7 +131,7 @@ class TestElasticSearchQuery(TestCase): field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'or': [{'term': {'live_filter': True}}, {'term': {'title_filter': 'Test'}}]}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'or': [{'term': {'live_filter': True}}, {'term': {'title_filter': 'Test'}}]}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query, expected_result) def test_negated_filter(self): @@ -139,7 +139,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.exclude(live=True), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'not': {'term': {'live_filter': True}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'not': {'term': {'live_filter': True}}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_fields(self): @@ -147,7 +147,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.all(), "Hello", fields=['title']) # Check it - expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'query_string': {'query': 'Hello', 'fields': ['title']}}}} + expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['title']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_exact_lookup(self): @@ -155,7 +155,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title__exact="Test"), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'term': {'title_filter': 'Test'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'term': {'title_filter': 'Test'}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_none_lookup(self): @@ -163,7 +163,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title=None), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'missing': {'field': 'title_filter'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'missing': {'field': 'title_filter'}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_isnull_true_lookup(self): @@ -171,7 +171,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title__isnull=True), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'missing': {'field': 'title_filter'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'missing': {'field': 'title_filter'}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_isnull_false_lookup(self): @@ -179,7 +179,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title__isnull=False), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'not': {'missing': {'field': 'title_filter'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'not': {'missing': {'field': 'title_filter'}}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_startswith_lookup(self): @@ -187,7 +187,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(title__startswith="Test"), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'prefix': {'title_filter': 'Test'}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'prefix': {'title_filter': 'Test'}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_gt_lookup(self): @@ -198,7 +198,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(published_date__gt=datetime.datetime(2014, 4, 29)), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'gt': '2014-04-29'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'gt': '2014-04-29'}}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_range_lookup(self): @@ -209,7 +209,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(published_date__range=(start_date, end_date)), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) From 7fc10398ddc657cc1708be8ce01b5285118b8598 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Fri, 4 Jul 2014 10:31:50 +0100 Subject: [PATCH 8/9] Minor optimisation when building single field queries --- .../wagtailsearch/backends/elasticsearch.py | 24 ++++++++++++------- .../tests/test_elasticsearch_backend.py | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index c28b676eec..55f1971a2e 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -121,7 +121,7 @@ class ElasticSearchQuery(object): def __init__(self, queryset, query_string, fields=None): self.queryset = queryset self.query_string = query_string - self.fields = fields or ['_all', '_partials'] + self.fields = fields def _get_filters_from_where(self, where_node): # Check if this is a leaf node @@ -257,15 +257,21 @@ class ElasticSearchQuery(object): def to_es(self): # Query if self.query_string is not None: - query = { - 'multi_match': { - 'query': self.query_string, - } - } + fields = self.fields or ['_all', '_partials'] - # Fields - if self.fields: - query['multi_match']['fields'] = self.fields + if len(fields) == 1: + query = { + 'match': { + fields[0]: self.query_string, + } + } + else: + query = { + 'multi_match': { + 'query': self.query_string, + 'fields': fields, + } + } else: query = { 'match_all': {} diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py index 6d2df3b4cf..eeb23ff336 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py @@ -147,7 +147,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.all(), "Hello", fields=['title']) # Check it - expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['title']}}}} + expected_result = {'filtered': {'filter': {'prefix': {'content_type': 'tests_searchtest'}}, 'query': {'match': {'title': 'Hello'}}}} self.assertDictEqual(query.to_es(), expected_result) def test_exact_lookup(self): From 9b8ee979d211be83bd73691df893e9c915b65df0 Mon Sep 17 00:00:00 2001 From: Matt Westcott Date: Wed, 9 Jul 2014 23:29:37 +0100 Subject: [PATCH 9/9] fix tests for lt, gte, lte filters to expect a multi_match response --- wagtail/wagtailsearch/tests/test_elasticsearch_backend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py index 18b3f7ce08..a95d442ff4 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py @@ -205,7 +205,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(published_date__lt=datetime.datetime(2014, 4, 29)), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'lt': '2014-04-29'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'lt': '2014-04-29'}}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_gte_lookup(self): @@ -213,7 +213,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(published_date__gte=datetime.datetime(2014, 4, 29)), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'gte': '2014-04-29'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'gte': '2014-04-29'}}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_lte_lookup(self): @@ -221,7 +221,7 @@ class TestElasticSearchQuery(TestCase): query = self.ElasticSearchQuery(models.SearchTest.objects.filter(published_date__lte=datetime.datetime(2014, 4, 29)), "Hello") # Check it - expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'lte': '2014-04-29'}}}]}, 'query': {'query_string': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + expected_result = {'filtered': {'filter': {'and': [{'prefix': {'content_type': 'tests_searchtest'}}, {'range': {'published_date_filter': {'lte': '2014-04-29'}}}]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} self.assertDictEqual(query.to_es(), expected_result) def test_range_lookup(self):