From 0149eff66a57c3ebe708fdbeff75a2efcf06d330 Mon Sep 17 00:00:00 2001 From: pySilver Date: Thu, 10 Oct 2019 12:00:14 +0200 Subject: [PATCH] Implements ES7 support (#5611) * Implements ES7 support * Linter fix * Adds docs * isort fix * Fixes typo --- .travis.yml | 4 + docs/topics/search/backends.rst | 8 +- runtests.py | 4 + scripts/travis/install_elasticsearch7.sh | 3 + tox.ini | 5 +- wagtail/search/backends/elasticsearch7.py | 98 +++ .../tests/test_elasticsearch7_backend.py | 813 ++++++++++++++++++ wagtail/tests/settings.py | 4 +- 8 files changed, 936 insertions(+), 3 deletions(-) create mode 100755 scripts/travis/install_elasticsearch7.sh create mode 100644 wagtail/search/backends/elasticsearch7.py create mode 100644 wagtail/search/tests/test_elasticsearch7_backend.py diff --git a/.travis.yml b/.travis.yml index e3c2548bac..bc87e5eeaf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,6 +41,8 @@ matrix: python: 3.6 - env: TOXENV=py37-dj22-postgres-elasticsearch6 INSTALL_ELASTICSEARCH6=yes python: 3.7 + - env: TOXENV=py37-dj22-postgres-elasticsearch7 INSTALL_ELASTICSEARCH7=yes + python: 3.7 allow_failures: # Ignore failures on Elasticsearch tests because ES on Travis is intermittently flaky - env: TOXENV=py36-dj20-sqlite-elasticsearch2 INSTALL_ELASTICSEARCH2=yes @@ -49,6 +51,7 @@ matrix: - env: TOXENV=py36-dj20-postgres-elasticsearch6 INSTALL_ELASTICSEARCH6=yes - env: TOXENV=py36-dj21-postgres-elasticsearch6 INSTALL_ELASTICSEARCH6=yes - env: TOXENV=py37-dj22-postgres-elasticsearch6 INSTALL_ELASTICSEARCH6=yes + - env: TOXENV=py37-dj22-postgres-elasticsearch7 INSTALL_ELASTICSEARCH7=yes # allow failures against Django 2.2.x stable branch - env: TOXENV=py37-dj22stable-postgres-noelasticsearch # allow failures against Django master @@ -67,6 +70,7 @@ install: - 'if [[ -n "$INSTALL_ELASTICSEARCH2" ]]; then ./scripts/travis/install_elasticsearch2.sh; fi' - 'if [[ -n "$INSTALL_ELASTICSEARCH5" ]]; then ./scripts/travis/install_elasticsearch5.sh; fi' - 'if [[ -n "$INSTALL_ELASTICSEARCH6" ]]; then ./scripts/travis/install_elasticsearch6.sh; fi' + - 'if [[ -n "$INSTALL_ELASTICSEARCH7" ]]; then ./scripts/travis/install_elasticsearch7.sh; fi' # Pre-test configuration before_script: diff --git a/docs/topics/search/backends.rst b/docs/topics/search/backends.rst index 26972c838b..2ef41a71b0 100644 --- a/docs/topics/search/backends.rst +++ b/docs/topics/search/backends.rst @@ -91,7 +91,7 @@ See :ref:`postgres_search` for more detail. Elasticsearch Backend --------------------- -Elasticsearch versions 2, 5 and 6 are supported. Use the appropriate backend for your version: +Elasticsearch versions 2, 5, 6 and 7 are supported. Use the appropriate backend for your version: ``wagtail.search.backends.elasticsearch2`` (Elasticsearch 2.x) @@ -99,6 +99,8 @@ Elasticsearch versions 2, 5 and 6 are supported. Use the appropriate backend for ``wagtail.search.backends.elasticsearch6`` (Elasticsearch 6.x) +``wagtail.search.backends.elasticsearch7`` (Elasticsearch 7.x) + Prerequisites are the `Elasticsearch`_ service itself and, via pip, the `elasticsearch-py`_ package. The major version of the package must match the installed version of Elasticsearch: .. _Elasticsearch: https://www.elastic.co/downloads/elasticsearch @@ -115,6 +117,10 @@ Prerequisites are the `Elasticsearch`_ service itself and, via pip, the `elastic pip install "elasticsearch>=6.4.0,<7.0.0" # for Elasticsearch 6.x +.. code-block:: sh + + pip install "elasticsearch>=7.0.0,<8.0.0" # for Elasticsearch 7.x + .. warning:: | Version 6.3.1 of the Elasticsearch client library is incompatible with Wagtail. Use 6.4.0 or above. diff --git a/runtests.py b/runtests.py index dbbedd9960..df8e6d5afa 100755 --- a/runtests.py +++ b/runtests.py @@ -18,6 +18,7 @@ def make_parser(): parser.add_argument('--elasticsearch2', action='store_true') parser.add_argument('--elasticsearch5', action='store_true') parser.add_argument('--elasticsearch6', action='store_true') + parser.add_argument('--elasticsearch7', action='store_true') parser.add_argument('--bench', action='store_true') return parser @@ -57,6 +58,9 @@ def runtests(): elif args.elasticsearch6: os.environ.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200') os.environ.setdefault('ELASTICSEARCH_VERSION', '6') + elif args.elasticsearch7: + os.environ.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200') + os.environ.setdefault('ELASTICSEARCH_VERSION', '7') elif 'ELASTICSEARCH_URL' in os.environ: # forcibly delete the ELASTICSEARCH_URL setting to skip those tests diff --git a/scripts/travis/install_elasticsearch7.sh b/scripts/travis/install_elasticsearch7.sh new file mode 100755 index 0000000000..9214a2b2b9 --- /dev/null +++ b/scripts/travis/install_elasticsearch7.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.4.0-amd64.deb && sudo dpkg -i --force-confnew elasticsearch-7.4.0-amd64.deb && sudo sed -i.old 's/-Xms1g/-Xms128m/' /etc/elasticsearch/jvm.options && sudo sed -i.old 's/-Xmx1g/-Xmx128m/' /etc/elasticsearch/jvm.options && echo -e '-XX:+DisableExplicitGC\n-Djdk.io.permissionsUseCanonicalPath=true\n-Dlog4j.skipJansi=true\n-server\n' | sudo tee -a /etc/elasticsearch/jvm.options && sudo chown -R elasticsearch:elasticsearch /etc/default/elasticsearch && sudo service elasticsearch restart diff --git a/tox.ini b/tox.ini index a8a72b46f9..29e9a2f197 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ skipsdist = True usedevelop = True -envlist = py{35,36,37}-dj{20,21,22,master}-{sqlite,postgres,mysql,mssql}-{elasticsearch6,elasticsearch5,elasticsearch2,noelasticsearch}, +envlist = py{35,36,37}-dj{20,21,22,master}-{sqlite,postgres,mysql,mssql}-{elasticsearch7,elasticsearch6,elasticsearch5,elasticsearch2,noelasticsearch}, [flake8] # D100: Missing docstring in public module @@ -30,6 +30,7 @@ commands = elasticsearch2: coverage run runtests.py wagtail.search wagtail.documents wagtail.images --elasticsearch2 elasticsearch5: coverage run runtests.py wagtail.search wagtail.documents wagtail.images --elasticsearch5 elasticsearch6: coverage run runtests.py wagtail.search wagtail.documents wagtail.images --elasticsearch6 + elasticsearch7: coverage run runtests.py wagtail.search wagtail.documents wagtail.images --elasticsearch7 noelasticsearch: coverage run runtests.py {posargs} basepython = @@ -56,6 +57,8 @@ deps = elasticsearch5: certifi elasticsearch6: elasticsearch>=6.4.0,<7 elasticsearch6: certifi + elasticsearch7: elasticsearch>=7,<8 + elasticsearch7: certifi setenv = postgres: DATABASE_ENGINE=django.db.backends.postgresql diff --git a/wagtail/search/backends/elasticsearch7.py b/wagtail/search/backends/elasticsearch7.py new file mode 100644 index 0000000000..e70bb20df8 --- /dev/null +++ b/wagtail/search/backends/elasticsearch7.py @@ -0,0 +1,98 @@ +from elasticsearch import NotFoundError +from elasticsearch.helpers import bulk + +from wagtail.search.backends.elasticsearch2 import ElasticsearchAutocompleteQueryCompilerImpl +from wagtail.search.backends.elasticsearch6 import ( + Elasticsearch6Index, Elasticsearch6Mapping, Elasticsearch6SearchBackend, + Elasticsearch6SearchQueryCompiler, Elasticsearch6SearchResults) +from wagtail.search.index import class_is_indexed + + +class Elasticsearch7Mapping(Elasticsearch6Mapping): + def get_mapping(self): + mapping = super().get_mapping() + return mapping[self.get_document_type()] + + +class Elasticsearch7Index(Elasticsearch6Index): + def add_model(self, model): + # Get mapping + mapping = self.mapping_class(model) + + # Put mapping + self.es.indices.put_mapping(index=self.name, body=mapping.get_mapping()) + + def add_item(self, item): + # Make sure the object can be indexed + if not class_is_indexed(item.__class__): + return + + # Get mapping + mapping = self.mapping_class(item.__class__) + + # Add document to index + self.es.index( + self.name, mapping.get_document(item), id=mapping.get_document_id(item) + ) + + def add_items(self, model, items): + if not class_is_indexed(model): + return + + # Get mapping + mapping = self.mapping_class(model) + doc_type = "_doc" + + # Create list of actions + actions = [] + for item in items: + # Create the action + action = {"_type": doc_type, "_id": mapping.get_document_id(item)} + action.update(mapping.get_document(item)) + actions.append(action) + + # Run the actions + bulk(self.es, actions, index=self.name) + + def delete_item(self, item): + # Make sure the object can be indexed + if not class_is_indexed(item.__class__): + return + + # Get mapping + mapping = self.mapping_class(item.__class__) + + # Delete document + try: + self.es.delete(self.name, mapping.get_document_id(item)) + except NotFoundError: + pass # Document doesn't exist, ignore this exception + + +class Elasticsearch7SearchQueryCompiler(Elasticsearch6SearchQueryCompiler): + mapping_class = Elasticsearch7Mapping + + +class Elasticsearch7SearchResults(Elasticsearch6SearchResults): + pass + + +class Elasticsearch7AutocompleteQueryCompiler( + Elasticsearch6SearchQueryCompiler, ElasticsearchAutocompleteQueryCompilerImpl +): + pass + + +class Elasticsearch7SearchBackend(Elasticsearch6SearchBackend): + mapping_class = Elasticsearch7Mapping + index_class = Elasticsearch7Index + query_compiler_class = Elasticsearch7SearchQueryCompiler + autocomplete_query_compiler_class = Elasticsearch7AutocompleteQueryCompiler + results_class = Elasticsearch7SearchResults + + def __init__(self, params): + self.settings["settings"]["index"] = {"max_ngram_diff": 12} + super().__init__(params) + + +SearchBackend = Elasticsearch7SearchBackend diff --git a/wagtail/search/tests/test_elasticsearch7_backend.py b/wagtail/search/tests/test_elasticsearch7_backend.py new file mode 100644 index 0000000000..30e727f8e5 --- /dev/null +++ b/wagtail/search/tests/test_elasticsearch7_backend.py @@ -0,0 +1,813 @@ +# -*- coding: utf-8 -*- +import datetime +import json +from unittest import mock + +from django.db.models import Q +from django.test import TestCase +from elasticsearch.serializer import JSONSerializer + +from wagtail.search.backends.elasticsearch7 import Elasticsearch7SearchBackend +from wagtail.search.query import MATCH_ALL +from wagtail.tests.search import models + +from .elasticsearch_common_tests import ElasticsearchCommonSearchBackendTests + + +class TestElasticsearch7SearchBackend(ElasticsearchCommonSearchBackendTests, TestCase): + backend_path = 'wagtail.search.backends.elasticsearch7' + + +class TestElasticsearch7SearchQuery(TestCase): + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) + ) + + query_compiler_class = Elasticsearch7SearchBackend.query_compiler_class + + def test_simple(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.all(), "Hello") + + # Check it + expected_result = {'bool': { + 'filter': {'match': {'content_type': 'searchtests.Book'}}, + 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_none_query_string(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.all(), MATCH_ALL) + + # Check it + expected_result = {'bool': { + 'filter': {'match': {'content_type': 'searchtests.Book'}}, + 'must': {'match_all': {}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_and_operator(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.all(), "Hello", operator='and') + + # Check it + expected_result = {'bool': { + 'filter': {'match': {'content_type': 'searchtests.Book'}}, + 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams'], 'operator': 'and'}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_filter(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.filter(title="Test"), "Hello") + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'term': {'title_filter': 'Test'}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_and_filter(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.filter(title="Test", publication_date=datetime.date(2017, 10, 18)), "Hello") + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'bool': {'must': [{'term': {'publication_date_filter': '2017-10-18'}}, {'term': {'title_filter': 'Test'}}]}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + + # Make sure field filters are sorted (as they can be in any order which may cause false positives) + query = query.get_query() + field_filters = query['bool']['filter'][1]['bool']['must'] + field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) + + self.assertDictEqual(query, expected_result) + + def test_or_filter(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.filter(Q(title="Test") | Q(publication_date=datetime.date(2017, 10, 18))), "Hello") + + # Make sure field filters are sorted (as they can be in any order which may cause false positives) + query = query.get_query() + field_filters = query['bool']['filter'][1]['bool']['should'] + field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'bool': {'should': [{'term': {'publication_date_filter': '2017-10-18'}}, {'term': {'title_filter': 'Test'}}]}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query, expected_result) + + def test_negated_filter(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.exclude(publication_date=datetime.date(2017, 10, 18)), "Hello") + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'bool': {'mustNot': {'term': {'publication_date_filter': '2017-10-18'}}}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_fields(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title']) + + # Check it + expected_result = {'bool': { + 'filter': {'match': {'content_type': 'searchtests.Book'}}, + 'must': {'match': {'title': {'query': 'Hello'}}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_fields_with_and_operator(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title'], operator='and') + + # Check it + expected_result = {'bool': { + 'filter': {'match': {'content_type': 'searchtests.Book'}}, + 'must': {'match': {'title': {'query': 'Hello', 'operator': 'and'}}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_multiple_fields(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.all(), "Hello", fields=['title', 'content']) + + # Check it + expected_result = {'bool': { + 'filter': {'match': {'content_type': 'searchtests.Book'}}, + 'must': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello'}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_multiple_fields_with_and_operator(self): + # Create a query + query = self.query_compiler_class( + models.Book.objects.all(), "Hello", fields=['title', 'content'], operator='and' + ) + + # Check it + expected_result = {'bool': { + 'filter': {'match': {'content_type': 'searchtests.Book'}}, + 'must': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello', 'operator': 'and'}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_exact_lookup(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.filter(title__exact="Test"), "Hello") + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'term': {'title_filter': 'Test'}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_none_lookup(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.filter(title=None), "Hello") + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'bool': {'mustNot': {'exists': {'field': 'title_filter'}}}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_isnull_true_lookup(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.filter(title__isnull=True), "Hello") + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'bool': {'mustNot': {'exists': {'field': 'title_filter'}}}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_isnull_false_lookup(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.filter(title__isnull=False), "Hello") + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'exists': {'field': 'title_filter'}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_startswith_lookup(self): + # Create a query + query = self.query_compiler_class(models.Book.objects.filter(title__startswith="Test"), "Hello") + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'prefix': {'title_filter': 'Test'}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_gt_lookup(self): + # This also tests conversion of python dates to strings + + # Create a query + query = self.query_compiler_class( + models.Book.objects.filter(publication_date__gt=datetime.datetime(2014, 4, 29)), "Hello" + ) + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'range': {'publication_date_filter': {'gt': '2014-04-29'}}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_lt_lookup(self): + # Create a query + query = self.query_compiler_class( + models.Book.objects.filter(publication_date__lt=datetime.datetime(2014, 4, 29)), "Hello" + ) + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'range': {'publication_date_filter': {'lt': '2014-04-29'}}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_gte_lookup(self): + # Create a query + query = self.query_compiler_class( + models.Book.objects.filter(publication_date__gte=datetime.datetime(2014, 4, 29)), "Hello" + ) + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'range': {'publication_date_filter': {'gte': '2014-04-29'}}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_lte_lookup(self): + # Create a query + query = self.query_compiler_class( + models.Book.objects.filter(publication_date__lte=datetime.datetime(2014, 4, 29)), "Hello" + ) + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'range': {'publication_date_filter': {'lte': '2014-04-29'}}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_range_lookup(self): + start_date = datetime.datetime(2014, 4, 29) + end_date = datetime.datetime(2014, 8, 19) + + # Create a query + query = self.query_compiler_class( + models.Book.objects.filter(publication_date__range=(start_date, end_date)), "Hello" + ) + + # Check it + expected_result = {'bool': {'filter': [ + {'match': {'content_type': 'searchtests.Book'}}, + {'range': {'publication_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}} + ], 'must': {'multi_match': {'query': 'Hello', 'fields': ['_all_text', '_edgengrams']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_custom_ordering(self): + # Create a query + query = self.query_compiler_class( + models.Book.objects.order_by('publication_date'), "Hello", order_by_relevance=False + ) + + # Check it + expected_result = [{'publication_date_filter': 'asc'}] + self.assertDictEqual(query.get_sort(), expected_result) + + def test_custom_ordering_reversed(self): + # Create a query + query = self.query_compiler_class( + models.Book.objects.order_by('-publication_date'), "Hello", order_by_relevance=False + ) + + # Check it + expected_result = [{'publication_date_filter': 'desc'}] + self.assertDictEqual(query.get_sort(), expected_result) + + def test_custom_ordering_multiple(self): + # Create a query + query = self.query_compiler_class( + models.Book.objects.order_by('publication_date', 'number_of_pages'), "Hello", order_by_relevance=False + ) + + # Check it + expected_result = [{'publication_date_filter': 'asc'}, {'number_of_pages_filter': 'asc'}] + self.assertDictEqual(query.get_sort(), expected_result) + + +class TestElasticsearch7SearchResults(TestCase): + fixtures = ['search'] + + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps + ) + + def get_results(self): + backend = Elasticsearch7SearchBackend({}) + query = mock.MagicMock() + query.queryset = models.Book.objects.all() + query.get_query.return_value = 'QUERY' + query.get_sort.return_value = None + return backend.results_class(backend, query) + + def construct_search_response(self, results): + return { + '_shards': {'failed': 0, 'successful': 5, 'total': 5}, + 'hits': { + 'hits': [ + { + '_id': 'searchtests_book:' + str(result), + '_index': 'wagtail', + '_score': 1, + '_type': 'searchtests_book', + 'fields': { + 'pk': [str(result)], + } + } + for result in results + ], + 'max_score': 1, + 'total': len(results) + }, + 'timed_out': False, + 'took': 2 + } + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_basic_search(self, search): + search.return_value = self.construct_search_response([]) + results = self.get_results() + + list(results) # Performs search + + search.assert_any_call( + body={'query': 'QUERY'}, + _source=False, + stored_fields='pk', + index='wagtail__searchtests_book', + scroll='2m', + size=100 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_get_single_item(self, search): + # Need to return something to prevent index error + search.return_value = self.construct_search_response([1]) + results = self.get_results() + + results[10] # Performs search + + search.assert_any_call( + from_=10, + body={'query': 'QUERY'}, + _source=False, + stored_fields='pk', + index='wagtail__searchtests_book', + size=1 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_slice_results(self, search): + search.return_value = self.construct_search_response([]) + results = self.get_results()[1:4] + + list(results) # Performs search + + search.assert_any_call( + from_=1, + body={'query': 'QUERY'}, + _source=False, + stored_fields='pk', + index='wagtail__searchtests_book', + size=3 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_slice_results_multiple_times(self, search): + search.return_value = self.construct_search_response([]) + results = self.get_results()[10:][:10] + + list(results) # Performs search + + search.assert_any_call( + from_=10, + body={'query': 'QUERY'}, + _source=False, + stored_fields='pk', + index='wagtail__searchtests_book', + size=10 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_slice_results_and_get_item(self, search): + # Need to return something to prevent index error + search.return_value = self.construct_search_response([1]) + results = self.get_results()[10:] + + results[10] # Performs search + + search.assert_any_call( + from_=20, + body={'query': 'QUERY'}, + _source=False, + stored_fields='pk', + index='wagtail__searchtests_book', + size=1 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_result_returned(self, search): + search.return_value = self.construct_search_response([1]) + results = self.get_results() + + self.assertEqual(results[0], models.Book.objects.get(id=1)) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_len_1(self, search): + search.return_value = self.construct_search_response([1]) + results = self.get_results() + + self.assertEqual(len(results), 1) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_len_2(self, search): + search.return_value = self.construct_search_response([1, 2]) + results = self.get_results() + + self.assertEqual(len(results), 2) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_duplicate_results(self, search): # Duplicates will not be removed + search.return_value = self.construct_search_response([1, 1]) + results = list(self.get_results()) # Must cast to list so we only create one query + + self.assertEqual(len(results), 2) + self.assertEqual(results[0], models.Book.objects.get(id=1)) + self.assertEqual(results[1], models.Book.objects.get(id=1)) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_result_order(self, search): + search.return_value = self.construct_search_response( + [1, 2, 3] + ) + results = list(self.get_results()) # Must cast to list so we only create one query + + self.assertEqual(results[0], models.Book.objects.get(id=1)) + self.assertEqual(results[1], models.Book.objects.get(id=2)) + self.assertEqual(results[2], models.Book.objects.get(id=3)) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_result_order_2(self, search): + search.return_value = self.construct_search_response( + [3, 2, 1] + ) + results = list(self.get_results()) # Must cast to list so we only create one query + + self.assertEqual(results[0], models.Book.objects.get(id=3)) + self.assertEqual(results[1], models.Book.objects.get(id=2)) + self.assertEqual(results[2], models.Book.objects.get(id=1)) + + +class TestElasticsearch7Mapping(TestCase): + fixtures = ['search'] + + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) + ) + + def setUp(self): + # Create ES mapping + self.es_mapping = Elasticsearch7SearchBackend.mapping_class(models.Book) + + # Create ES document + self.obj = models.Book.objects.get(id=4) + + def test_get_document_type(self): + self.assertEqual(self.es_mapping.get_document_type(), 'doc') + + def test_get_mapping(self): + # Build mapping + mapping = self.es_mapping.get_mapping() + + # Check + expected_result = { + 'properties': { + 'pk': {'type': 'keyword', 'store': True}, + 'content_type': {'type': 'keyword'}, + '_all_text': {'type': 'text'}, + '_edgengrams': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'type': 'text'}, + 'title': {'type': 'text', 'boost': 2.0, 'copy_to': '_all_text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'title_edgengrams': {'type': 'text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'title_filter': {'type': 'keyword'}, + 'authors': { + 'type': 'nested', + 'properties': { + 'name': {'type': 'text', 'copy_to': '_all_text'}, + 'date_of_birth_filter': {'type': 'date'}, + }, + }, + 'authors_filter': {'type': 'integer'}, + 'publication_date_filter': {'type': 'date'}, + 'number_of_pages_filter': {'type': 'integer'}, + 'tags': { + 'type': 'nested', + 'properties': { + 'name': {'type': 'text', 'copy_to': '_all_text'}, + 'slug_filter': {'type': 'keyword'}, + }, + }, + 'tags_filter': {'type': 'integer'} + } + } + + self.assertDictEqual(mapping, expected_result) + + def test_get_document_id(self): + self.assertEqual(self.es_mapping.get_document_id(self.obj), str(self.obj.pk)) + + def test_get_document(self): + # Get document + document = self.es_mapping.get_document(self.obj) + + # Sort edgengrams + if '_edgengrams' in document: + document['_edgengrams'].sort() + + # Check + expected_result = { + 'pk': '4', + 'content_type': ["searchtests.Book"], + '_edgengrams': ['The Fellowship of the Ring', 'The Fellowship of the Ring'], + 'title': 'The Fellowship of the Ring', + 'title_edgengrams': 'The Fellowship of the Ring', + 'title_filter': 'The Fellowship of the Ring', + 'authors': [ + { + 'name': 'J. R. R. Tolkien', + 'date_of_birth_filter': datetime.date(1892, 1, 3) + } + ], + 'authors_filter': [2], + 'publication_date_filter': datetime.date(1954, 7, 29), + 'number_of_pages_filter': 423, + 'tags': [], + 'tags_filter': [] + } + + self.assertDictEqual(document, expected_result) + + +class TestElasticsearch7MappingInheritance(TestCase): + fixtures = ['search'] + + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) + ) + + def setUp(self): + # Create ES mapping + self.es_mapping = Elasticsearch7SearchBackend.mapping_class(models.Novel) + + self.obj = models.Novel.objects.get(id=4) + + def test_get_document_type(self): + self.assertEqual(self.es_mapping.get_document_type(), 'doc') + + def test_get_mapping(self): + # Build mapping + mapping = self.es_mapping.get_mapping() + + # Check + expected_result = { + 'properties': { + # New + 'searchtests_novel__setting': {'type': 'text', 'copy_to': '_all_text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'searchtests_novel__protagonist': { + 'type': 'nested', + 'properties': { + 'name': {'type': 'text', 'boost': 0.5, 'copy_to': '_all_text'}, + 'novel_id_filter': {'type': 'integer'} + } + }, + 'searchtests_novel__protagonist_id_filter': {'type': 'integer'}, + 'searchtests_novel__characters': { + 'type': 'nested', + 'properties': { + 'name': {'type': 'text', 'boost': 0.25, 'copy_to': '_all_text'} + } + }, + + # Inherited + 'pk': {'type': 'keyword', 'store': True}, + 'content_type': {'type': 'keyword'}, + '_all_text': {'type': 'text'}, + '_edgengrams': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'type': 'text'}, + 'title': {'type': 'text', 'boost': 2.0, 'copy_to': '_all_text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'title_edgengrams': {'type': 'text', 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'title_filter': {'type': 'keyword'}, + 'authors': { + 'type': 'nested', + 'properties': { + 'name': {'type': 'text', 'copy_to': '_all_text'}, + 'date_of_birth_filter': {'type': 'date'}, + }, + }, + 'authors_filter': {'type': 'integer'}, + 'publication_date_filter': {'type': 'date'}, + 'number_of_pages_filter': {'type': 'integer'}, + 'tags': { + 'type': 'nested', + 'properties': { + 'name': {'type': 'text', 'copy_to': '_all_text'}, + 'slug_filter': {'type': 'keyword'}, + }, + }, + 'tags_filter': {'type': 'integer'} + } + } + + self.assertDictEqual(mapping, expected_result) + + def test_get_document_id(self): + # This must be tests_searchtest instead of 'tests_searchtest_tests_searchtestchild' + # as it uses the contents base content type name. + # This prevents the same object being accidentally indexed twice. + self.assertEqual(self.es_mapping.get_document_id(self.obj), str(self.obj.pk)) + + def test_get_document(self): + # Build document + document = self.es_mapping.get_document(self.obj) + + # Sort edgengrams + if '_edgengrams' in document: + document['_edgengrams'].sort() + + # Sort characters + if 'searchtests_novel__characters' in document: + document['searchtests_novel__characters'].sort(key=lambda c: c['name']) + + # Check + expected_result = { + # New + 'searchtests_novel__setting': "Middle Earth", + 'searchtests_novel__protagonist': { + 'name': "Frodo Baggins", + 'novel_id_filter': 4 + }, + 'searchtests_novel__protagonist_id_filter': 8, + 'searchtests_novel__characters': [ + { + 'name': "Bilbo Baggins" + }, + { + 'name': "Frodo Baggins" + }, + { + 'name': "Gandalf" + } + ], + + # Changed + 'content_type': ["searchtests.Novel", "searchtests.Book"], + '_edgengrams': ['Middle Earth', 'The Fellowship of the Ring', 'The Fellowship of the Ring'], + + # Inherited + 'pk': '4', + 'title': 'The Fellowship of the Ring', + 'title_edgengrams': 'The Fellowship of the Ring', + 'title_filter': 'The Fellowship of the Ring', + 'authors': [ + { + 'name': 'J. R. R. Tolkien', + 'date_of_birth_filter': datetime.date(1892, 1, 3) + } + ], + 'authors_filter': [2], + 'publication_date_filter': datetime.date(1954, 7, 29), + 'number_of_pages_filter': 423, + 'tags': [], + 'tags_filter': [] + } + + self.assertDictEqual(document, expected_result) + + +@mock.patch('wagtail.search.backends.elasticsearch2.Elasticsearch') +class TestBackendConfiguration(TestCase): + def test_default_settings(self, Elasticsearch): + Elasticsearch7SearchBackend(params={}) + + Elasticsearch.assert_called_with( + hosts=[ + { + 'host': 'localhost', + 'port': 9200, + 'url_prefix': '', + 'use_ssl': False, + 'verify_certs': False, + 'http_auth': None + } + ], + timeout=10 + ) + + def test_hosts(self, Elasticsearch): + Elasticsearch7SearchBackend(params={ + 'HOSTS': [ + { + 'host': '127.0.0.1', + 'port': 9300, + 'use_ssl': True, + 'verify_certs': True, + } + ] + }) + + Elasticsearch.assert_called_with( + hosts=[ + { + 'host': '127.0.0.1', + 'port': 9300, + 'use_ssl': True, + 'verify_certs': True, + } + ], + timeout=10 + ) + + def test_urls(self, Elasticsearch): + # This test backwards compatibility with old URLS setting + Elasticsearch7SearchBackend(params={ + 'URLS': [ + 'http://localhost:12345', + 'https://127.0.0.1:54321', + 'http://username:password@elasticsearch.mysite.com', + 'https://elasticsearch.mysite.com/hello', + ], + }) + + Elasticsearch.assert_called_with( + hosts=[ + { + 'host': 'localhost', + 'port': 12345, + 'url_prefix': '', + 'use_ssl': False, + 'verify_certs': False, + 'http_auth': None, + }, + { + 'host': '127.0.0.1', + 'port': 54321, + 'url_prefix': '', + 'use_ssl': True, + 'verify_certs': True, + 'http_auth': None, + }, + { + 'host': 'elasticsearch.mysite.com', + 'port': 80, + 'url_prefix': '', + 'use_ssl': False, + 'verify_certs': False, + 'http_auth': ('username', 'password') + }, + { + 'host': 'elasticsearch.mysite.com', + 'port': 443, + 'url_prefix': '/hello', + 'use_ssl': True, + 'verify_certs': True, + 'http_auth': None, + }, + ], + timeout=10 + ) diff --git a/wagtail/tests/settings.py b/wagtail/tests/settings.py index 3d11b93fca..a7718cc909 100644 --- a/wagtail/tests/settings.py +++ b/wagtail/tests/settings.py @@ -176,7 +176,9 @@ if os.environ.get('DATABASE_ENGINE') == 'django.db.backends.postgresql': } if 'ELASTICSEARCH_URL' in os.environ: - if os.environ.get('ELASTICSEARCH_VERSION') == '6': + if os.environ.get('ELASTICSEARCH_VERSION') == '7': + backend = 'wagtail.search.backends.elasticsearch7' + elif os.environ.get('ELASTICSEARCH_VERSION') == '6': backend = 'wagtail.search.backends.elasticsearch6' elif os.environ.get('ELASTICSEARCH_VERSION') == '5': backend = 'wagtail.search.backends.elasticsearch5'