From d794a52ff8b4cf0f342990be5997c6f56b8b18b1 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Thu, 13 Apr 2017 10:00:53 +0100 Subject: [PATCH] Don't use scroll API when less than 100 items are requested --- .../wagtailsearch/backends/elasticsearch.py | 89 +++++++++++++------ .../tests/test_elasticsearch2_backend.py | 4 - .../tests/test_elasticsearch5_backend.py | 4 - .../tests/test_elasticsearch_backend.py | 4 - 4 files changed, 60 insertions(+), 41 deletions(-) diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 67ce613b6c..a365acaecd 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -447,28 +447,71 @@ class ElasticsearchSearchResults(BaseSearchResults): else: limit = None - # Params for elasticsearch query - params = dict( - index=self.backend.get_index_for_model(self.query.queryset.model).name, - body=self._get_es_body(), - _source=False, - from_=self.start, - scroll='2m', - size=min(limit or PAGE_SIZE, PAGE_SIZE), - ) + use_scroll = limit is None or limit > PAGE_SIZE - params[self.fields_param_name] = 'pk' + params = { + 'index': self.backend.get_index_for_model(self.query.queryset.model).name, + 'body': self._get_es_body(), + '_source': False, + 'from_': self.start, + self.fields_param_name: 'pk', + } - # Send to Elasticsearch - page = self.backend.es.search(**params) + if use_scroll: + params.update({ + 'scroll': '2m', + 'size': PAGE_SIZE, + }) - while True: - if len(page['hits']['hits']) == 0: - return + # Send to Elasticsearch + page = self.backend.es.search(**params) + + while True: + if len(page['hits']['hits']) == 0: + return + + # Get pks from results + pks = [hit['fields']['pk'][0] for hit in page['hits']['hits']] + scores = {str(hit['fields']['pk'][0]): hit['_score'] for hit in page['hits']['hits']} + + # Initialise results dictionary + results = dict((str(pk), None) for pk in pks) + + # Find objects in database and add them to dict + queryset = self.query.queryset.filter(pk__in=pks) + for obj in queryset: + results[str(obj.pk)] = obj + + if self._score_field: + setattr(obj, self._score_field, scores.get(str(obj.pk))) + + # Yield results in order given by Elasticsearch + for pk in pks: + if results[str(pk)]: + yield results[str(pk)] + + if limit is not None: + limit -= 1 + + if limit == 0: + return + + # Fetch next page of results + if '_scroll_id' not in page: + return + + page = self.backend.es.scroll(scroll_id=page['_scroll_id'], scroll='2m') + else: + params.update({ + 'size': limit or PAGE_SIZE, + }) + + # Send to Elasticsearch + hits = self.backend.es.search(**params) # Get pks from results - pks = [hit['fields']['pk'][0] for hit in page['hits']['hits']] - scores = {str(hit['fields']['pk'][0]): hit['_score'] for hit in page['hits']['hits']} + pks = [hit['fields']['pk'][0] for hit in hits['hits']['hits']] + scores = {str(hit['fields']['pk'][0]): hit['_score'] for hit in hits['hits']['hits']} # Initialise results dictionary results = dict((str(pk), None) for pk in pks) @@ -486,18 +529,6 @@ class ElasticsearchSearchResults(BaseSearchResults): if results[str(pk)]: yield results[str(pk)] - if limit is not None: - limit -= 1 - - if limit == 0: - return - - # Fetch next page of results - if '_scroll_id' not in page: - return - - page = self.backend.es.scroll(scroll_id=page['_scroll_id'], scroll='2m') - def _do_count(self): # Get count hit_count = self.backend.es.count( diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py index 370f3fad0b..c36016a5ba 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -427,7 +427,6 @@ class TestElasticsearch2SearchResults(TestCase): _source=False, fields='pk', index='wagtail__searchtests_book', - scroll='2m', size=1 ) @@ -444,7 +443,6 @@ class TestElasticsearch2SearchResults(TestCase): _source=False, fields='pk', index='wagtail__searchtests_book', - scroll='2m', size=3 ) @@ -461,7 +459,6 @@ class TestElasticsearch2SearchResults(TestCase): _source=False, fields='pk', index='wagtail__searchtests_book', - scroll='2m', size=10 ) @@ -479,7 +476,6 @@ class TestElasticsearch2SearchResults(TestCase): _source=False, fields='pk', index='wagtail__searchtests_book', - scroll='2m', size=1 ) diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py index f2eb23bc28..2898aafa56 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch5_backend.py @@ -431,7 +431,6 @@ class TestElasticsearch5SearchResults(TestCase): _source=False, stored_fields='pk', index='wagtail__searchtests_book', - scroll='2m', size=1 ) @@ -448,7 +447,6 @@ class TestElasticsearch5SearchResults(TestCase): _source=False, stored_fields='pk', index='wagtail__searchtests_book', - scroll='2m', size=3 ) @@ -465,7 +463,6 @@ class TestElasticsearch5SearchResults(TestCase): _source=False, stored_fields='pk', index='wagtail__searchtests_book', - scroll='2m', size=10 ) @@ -483,7 +480,6 @@ class TestElasticsearch5SearchResults(TestCase): _source=False, stored_fields='pk', index='wagtail__searchtests_book', - scroll='2m', size=1 ) diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py index 317f5880e8..f44aed4ad2 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py @@ -428,7 +428,6 @@ class TestElasticsearchSearchResults(TestCase): _source=False, fields='pk', index='wagtail', - scroll='2m', size=1 ) @@ -445,7 +444,6 @@ class TestElasticsearchSearchResults(TestCase): _source=False, fields='pk', index='wagtail', - scroll='2m', size=3 ) @@ -462,7 +460,6 @@ class TestElasticsearchSearchResults(TestCase): _source=False, fields='pk', index='wagtail', - scroll='2m', size=10 ) @@ -480,7 +477,6 @@ class TestElasticsearchSearchResults(TestCase): _source=False, fields='pk', index='wagtail', - scroll='2m', size=1 )