diff --git a/.travis.yml b/.travis.yml index ab64b75bf8..863d4d4f11 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,6 +32,15 @@ matrix: python: 2.7 - env: TOXENV=py34-dj19-sqlite-elasticsearch python: 3.5 + - env: TOXENV=py27-dj18-sqlite-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + python: 2.7 + sudo: true + - env: TOXENV=py27-dj19-postgres-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + python: 2.7 + sudo: true + - env: TOXENV=py34-dj19-sqlite-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + python: 3.5 + sudo: true - env: TOXENV=py27-dj110-sqlite-noelasticsearch python: 2.7 - env: TOXENV=py27-dj110-postgres-noelasticsearch @@ -40,6 +49,9 @@ matrix: python: 2.7 - env: TOXENV=py27-dj110-mysql-elasticsearch python: 2.7 + - env: TOXENV=py27-dj110-mysql-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + python: 2.7 + sudo: true - env: TOXENV=py34-dj110-postgres-noelasticsearch python: 3.4 - env: TOXENV=py34-dj110-sqlite-noelasticsearch @@ -54,12 +66,21 @@ matrix: python: 3.5 - env: TOXENV=py35-dj110-postgres-elasticsearch python: 3.5 + - env: TOXENV=py35-dj110-postgres-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + python: 3.5 + sudo: true allow_failures: - env: TOXENV=py27-dj18-sqlite-elasticsearch - env: TOXENV=py27-dj19-postgres-elasticsearch - env: TOXENV=py34-dj19-sqlite-elasticsearch - env: TOXENV=py27-dj110-mysql-elasticsearch - env: TOXENV=py35-dj110-postgres-elasticsearch + - env: TOXENV=py27-dj18-sqlite-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + - env: TOXENV=py27-dj19-postgres-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + - env: TOXENV=py34-dj19-sqlite-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + - env: TOXENV=py27-dj110-mysql-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + - env: TOXENV=py35-dj110-postgres-elasticsearch2 INSTALL_ELASTICSEARCH2=yes + # Services services: @@ -68,6 +89,7 @@ services: # Package installation install: - pip install tox coveralls + - 'if [[ -n "$INSTALL_ELASTICSEARCH2" ]]; then ./scripts/travis/install_elasticsearch2.sh; fi' # Pre-test configuration before_script: diff --git a/docs/advanced_topics/settings.rst b/docs/advanced_topics/settings.rst index fd4f55931c..6b7e114fd1 100644 --- a/docs/advanced_topics/settings.rst +++ b/docs/advanced_topics/settings.rst @@ -185,7 +185,7 @@ Search # Replace the search backend WAGTAILSEARCH_BACKENDS = { 'default': { - 'BACKEND': 'wagtail.wagtailsearch.backends.elasticsearch', + 'BACKEND': 'wagtail.wagtailsearch.backends.elasticsearch2', 'INDEX': 'myapp' } } @@ -368,7 +368,7 @@ URL Patterns urlpatterns = [ url(r'^django-admin/', include(admin.site.urls)), - + url(r'^admin/', include(wagtailadmin_urls)), url(r'^search/', include(wagtailsearch_urls)), url(r'^documents/', include(wagtaildocs_urls)), @@ -578,7 +578,7 @@ These two files should reside in your project directory (``myproject/myproject/` # Replace the search backend #WAGTAILSEARCH_BACKENDS = { # 'default': { - # 'BACKEND': 'wagtail.wagtailsearch.backends.elasticsearch', + # 'BACKEND': 'wagtail.wagtailsearch.backends.elasticsearch2', # 'INDEX': 'myapp' # } #} diff --git a/docs/topics/search/backends.rst b/docs/topics/search/backends.rst index a1d8ecd376..057e5b61a7 100644 --- a/docs/topics/search/backends.rst +++ b/docs/topics/search/backends.rst @@ -82,20 +82,22 @@ If any of these features are important to you, we recommend using Elasticsearch Elasticsearch Backend --------------------- -``wagtail.wagtailsearch.backends.elasticsearch`` +``wagtail.wagtailsearch.backends.elasticsearch`` (Elasticsearch 1.x) + +``wagtail.wagtailsearch.backends.elasticsearch2`` (Elasticsearch 2.x) .. versionchanged:: 1.1 Before 1.1, the full path to the backend class had to be specified: ``wagtail.wagtailsearch.backends.elasticsearch.ElasticSearch`` +.. versionchanged:: 1.7 + + Support for Elasticsearch 2.x was added + Prerequisites are the `Elasticsearch`_ service itself and, via pip, the `elasticsearch-py`_ package: .. _Elasticsearch: https://www.elastic.co/downloads/past-releases/elasticsearch-1-7-3 -.. note:: - - Wagtail doesn't support Elasticsearch 2.0 yet; please use 1.x in the meantime. Elasticsearch 2.0 support is scheduled for a future release. - .. code-block:: sh pip install elasticsearch @@ -106,7 +108,7 @@ The backend is configured in settings: WAGTAILSEARCH_BACKENDS = { 'default': { - 'BACKEND': 'wagtail.wagtailsearch.backends.elasticsearch', + 'BACKEND': 'wagtail.wagtailsearch.backends.elasticsearch2', 'URLS': ['http://localhost:9200'], 'INDEX': 'wagtail', 'TIMEOUT': 5, diff --git a/runtests.py b/runtests.py index ae3d100556..817ae13d78 100755 --- a/runtests.py +++ b/runtests.py @@ -17,6 +17,7 @@ def make_parser(): parser.add_argument('--deprecation', choices=['all', 'pending', 'imminent', 'none'], default='pending') parser.add_argument('--postgres', action='store_true') parser.add_argument('--elasticsearch', action='store_true') + parser.add_argument('--elasticsearch2', action='store_true') parser.add_argument('rest', nargs='*') return parser @@ -49,6 +50,13 @@ def runtests(): if args.elasticsearch: os.environ.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200') + os.environ.setdefault('ELASTICSEARCH_VERSION', '1') + + if args.elasticsearch2: + raise RuntimeError("You cannot test both Elasticsearch 1 and 2 together") + elif args.elasticsearch2: + os.environ.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200') + os.environ.setdefault('ELASTICSEARCH_VERSION', '2') elif 'ELASTICSEARCH_URL' in os.environ: # forcibly delete the ELASTICSEARCH_URL setting to skip those tests del os.environ['ELASTICSEARCH_URL'] diff --git a/scripts/travis/install_elasticsearch2.sh b/scripts/travis/install_elasticsearch2.sh new file mode 100755 index 0000000000..8c28d31b5a --- /dev/null +++ b/scripts/travis/install_elasticsearch2.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +sudo apt-get autoremove --purge elasticsearch +wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - +echo "deb http://packages.elastic.co/elasticsearch/2.x/debian stable main" | sudo tee -a /etc/apt/sources.list.d/elk.list +sudo apt-get update && sudo apt-get install elasticsearch -y +sudo service elasticsearch start diff --git a/tox.ini b/tox.ini index f59aee2f72..9e1a957483 100644 --- a/tox.ini +++ b/tox.ini @@ -2,14 +2,15 @@ skipsdist = True usedevelop = True -envlist = py{27,33,34,35}-dj{18,19}-{sqlite,postgres,mysql}-{elasticsearch,noelasticsearch}, - py{27,34,35}-dj110-{sqlite,postgres,mysql}-{elasticsearch,noelasticsearch}, +envlist = py{27,33,34,35}-dj{18,19}-{sqlite,postgres,mysql}-{elasticsearch2,elasticsearch,noelasticsearch}, + py{27,34,35}-dj110-{sqlite,postgres,mysql}-{elasticsearch2,elasticsearch,noelasticsearch}, flake8 [testenv] install_command = pip install -e ".[testing]" -U {opts} {packages} commands = elasticsearch: coverage run runtests.py wagtail.wagtailsearch wagtail.wagtaildocs wagtail.wagtailimages --elasticsearch + elasticsearch2: coverage run runtests.py wagtail.wagtailsearch wagtail.wagtaildocs wagtail.wagtailimages --elasticsearch2 noelasticsearch: coverage run runtests.py basepython = @@ -27,6 +28,7 @@ deps = dj110: Django>=1.10a1,<1.11 postgres: psycopg2>=2.6 mysql: mysqlclient==1.3.6 + elasticsearch2: elasticsearch>=2,<3 setenv = postgres: DATABASE_ENGINE=django.db.backends.postgresql_psycopg2 diff --git a/wagtail/tests/search/migrations/0002_anothersearchtestchild.py b/wagtail/tests/search/migrations/0002_anothersearchtestchild.py new file mode 100644 index 0000000000..ca38ac5a99 --- /dev/null +++ b/wagtail/tests/search/migrations/0002_anothersearchtestchild.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10 on 2016-08-25 15:17 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('searchtests', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='AnotherSearchTestChild', + fields=[ + ('searchtest_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='searchtests.SearchTest')), + ('subtitle', models.CharField(blank=True, max_length=255, null=True)), + ], + bases=('searchtests.searchtest',), + ), + ] diff --git a/wagtail/tests/search/models.py b/wagtail/tests/search/models.py index b795d26c9b..84c57c1131 100644 --- a/wagtail/tests/search/models.py +++ b/wagtail/tests/search/models.py @@ -69,3 +69,13 @@ class SearchTestChild(SearchTest): index.FilterField('live'), ]), ] + + +class AnotherSearchTestChild(SearchTest): + # Checks that having the same field name in two child models with different + # search configuration doesn't give an error + subtitle = models.CharField(max_length=255, null=True, blank=True) + + search_fields = SearchTest.search_fields + [ + index.SearchField('subtitle', boost=10), + ] diff --git a/wagtail/tests/settings.py b/wagtail/tests/settings.py index c74815ce57..28906fcab2 100644 --- a/wagtail/tests/settings.py +++ b/wagtail/tests/settings.py @@ -168,8 +168,13 @@ WAGTAILSEARCH_BACKENDS = { AUTH_USER_MODEL = 'customuser.CustomUser' if 'ELASTICSEARCH_URL' in os.environ: + if os.environ.get('ELASTICSEARCH_VERSION') == '2': + backend = 'wagtail.wagtailsearch.backends.elasticsearch2' + else: + backend = 'wagtail.wagtailsearch.backends.elasticsearch' + WAGTAILSEARCH_BACKENDS['elasticsearch'] = { - 'BACKEND': 'wagtail.wagtailsearch.backends.elasticsearch', + 'BACKEND': backend, 'URLS': [os.environ['ELASTICSEARCH_URL']], 'TIMEOUT': 10, 'max_retries': 1, diff --git a/wagtail/wagtailsearch/backends/elasticsearch.py b/wagtail/wagtailsearch/backends/elasticsearch.py index 5381ec4ce9..6f68f7c723 100644 --- a/wagtail/wagtailsearch/backends/elasticsearch.py +++ b/wagtail/wagtailsearch/backends/elasticsearch.py @@ -12,7 +12,8 @@ from elasticsearch.helpers import bulk from wagtail.utils.deprecation import RemovedInWagtail18Warning from wagtail.wagtailsearch.backends.base import ( BaseSearchBackend, BaseSearchQuery, BaseSearchResults) -from wagtail.wagtailsearch.index import FilterField, RelatedFields, SearchField, class_is_indexed +from wagtail.wagtailsearch.index import ( + FilterField, Indexed, RelatedFields, SearchField, class_is_indexed) class ElasticsearchMapping(object): @@ -42,9 +43,21 @@ class ElasticsearchMapping(object): 'TimeField': 'date', } + # Contains the configuration required to use the edgengram_analyzer + # on a field. It's different in Elasticsearch 2 so it's been put in + # an attribute here to make it easier to override in a subclass. + edgengram_analyzer_config = { + 'index_analyzer': 'edgengram_analyzer', + } + def __init__(self, model): self.model = model + def get_parent(self): + for base in self.model.__bases__: + if issubclass(base, Indexed) and issubclass(base, models.Model): + return type(self)(base) + def get_document_type(self): return self.model.indexed_get_content_type() @@ -75,7 +88,7 @@ class ElasticsearchMapping(object): mapping['boost'] = field.boost if field.partial_match: - mapping['index_analyzer'] = 'edgengram_analyzer' + mapping.update(self.edgengram_analyzer_config) mapping['include_in_all'] = True @@ -94,8 +107,9 @@ class ElasticsearchMapping(object): fields = { 'pk': dict(type='string', index='not_analyzed', store='yes', include_in_all=False), 'content_type': dict(type='string', index='not_analyzed', include_in_all=False), - '_partials': dict(type='string', index_analyzer='edgengram_analyzer', include_in_all=False), + '_partials': dict(type='string', include_in_all=False), } + fields['_partials'].update(self.edgengram_analyzer_config) fields.update(dict( self.get_field_mapping(field) for field in self.model.get_search_fields() @@ -305,15 +319,18 @@ class ElasticsearchSearchQuery(BaseSearchQuery): return query + def get_content_type_filter(self): + return { + 'prefix': { + 'content_type': self.queryset.model.indexed_get_content_type() + } + } + def get_filters(self): filters = [] # Filter by content type - filters.append({ - 'prefix': { - 'content_type': self.queryset.model.indexed_get_content_type() - } - }) + filters.append(self.get_content_type_filter()) # Apply filters from queryset queryset_filters = self._get_filters_from_queryset() diff --git a/wagtail/wagtailsearch/backends/elasticsearch2.py b/wagtail/wagtailsearch/backends/elasticsearch2.py new file mode 100644 index 0000000000..6eb409e8c6 --- /dev/null +++ b/wagtail/wagtailsearch/backends/elasticsearch2.py @@ -0,0 +1,143 @@ +from __future__ import absolute_import, unicode_literals + +from wagtail.wagtailsearch.index import FilterField, RelatedFields, SearchField + +from .elasticsearch import ( + ElasticsearchIndex, ElasticsearchMapping, ElasticsearchSearchBackend, ElasticsearchSearchQuery, + ElasticsearchSearchResults) + + +def get_model_root(model): + """ + This function finds the root model for any given model. The root model is + the highest concrete model that it descends from. If the model doesn't + descend from another concrete model then the model is it's own root model so + it is returned. + + Examples: + >>> get_model_root(wagtailcore.Page) + wagtailcore.Page + + >>> get_model_root(myapp.HomePage) + wagtailcore.Page + + >>> get_model_root(wagtailimages.Image) + wagtailimages.Image + """ + if model._meta.parents: + parent_model = list(model._meta.parents.items())[0][0] + return get_model_root(parent_model) + + return model + + +class Elasticsearch2Mapping(ElasticsearchMapping): + edgengram_analyzer_config = { + 'analyzer': 'edgengram_analyzer', + 'search_analyzer': 'standard', + } + + def get_field_column_name(self, field): + # Fields in derived models get prefixed with their model name, fields + # in the root model don't get prefixed at all + # This is to prevent mapping clashes in cases where two page types have + # a field with the same name but a different type. + root_model = get_model_root(self.model) + definition_model = field.get_definition_model(self.model) + + if definition_model != root_model: + prefix = definition_model._meta.app_label.lower() + '_' + definition_model.__name__.lower() + '__' + else: + prefix = '' + + if isinstance(field, FilterField): + return prefix + field.get_attname(self.model) + '_filter' + elif isinstance(field, SearchField): + return prefix + field.get_attname(self.model) + elif isinstance(field, RelatedFields): + return prefix + field.field_name + + def get_content_type(self): + """ + Returns the content type as a string for the model. + + For example: "wagtailcore.Page" + "myapp.MyModel" + """ + return self.model._meta.app_label + '.' + self.model.__name__ + + def get_all_content_types(self): + """ + Returns all the content type strings that apply to this model. + This includes the models' content type and all concrete ancestor + models that inherit from Indexed. + + For example: ["myapp.MyPageModel", "wagtailcore.Page"] + ["myapp.MyModel"] + """ + # Add our content type + content_types = [self.get_content_type()] + + # Add all ancestor classes content types as well + ancestor = self.get_parent() + while ancestor: + content_types.append(ancestor.get_content_type()) + ancestor = ancestor.get_parent() + + return content_types + + def get_document(self, obj): + # In the Elasticsearch 2 backend, we use a more efficient way to + # represent the content type of a document. + + # Instead of using a long string of model names that is queried using a + # "prefix" query, we instead use a multi-value string field and query it + # using a simple "match" query. + + # The only reason why this isn't implemented in the Elasticsearch 1.x + # backend yet is backwards compatibility + doc = super(Elasticsearch2Mapping, self).get_document(obj) + doc['content_type'] = self.get_all_content_types() + return doc + + +class Elasticsearch2Index(ElasticsearchIndex): + pass + + +class Elasticsearch2SearchQuery(ElasticsearchSearchQuery): + mapping_class = Elasticsearch2Mapping + + def get_content_type_filter(self): + # Query content_type using a "match" query. See comment in + # Elasticsearch2Mapping.get_document for more details + content_type = self.mapping_class(self.queryset.model).get_content_type() + + return { + 'match': { + 'content_type': content_type + } + } + + +class Elasticsearch2SearchResults(ElasticsearchSearchResults): + pass + + +class Elasticsearch2SearchBackend(ElasticsearchSearchBackend): + mapping_class = Elasticsearch2Mapping + index_class = Elasticsearch2Index + query_class = Elasticsearch2SearchQuery + results_class = Elasticsearch2SearchResults + + def get_index_for_model(self, model): + # Split models up into separate indices based on their root model. + # For example, all page-derived models get put together in one index, + # while images and documents each have their own index. + root_model = get_model_root(model) + index_suffix = '__' + root_model._meta.app_label.lower() + '_' + root_model.__name__.lower() + + return self.index_class(self, self.index_name + index_suffix) + + +SearchBackend = Elasticsearch2SearchBackend diff --git a/wagtail/wagtailsearch/index.py b/wagtail/wagtailsearch/index.py index 79a3557f70..dc650f5cf6 100644 --- a/wagtail/wagtailsearch/index.py +++ b/wagtail/wagtailsearch/index.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, unicode_literals +import inspect import logging from django.apps import apps @@ -178,6 +179,16 @@ class BaseField(object): except models.fields.FieldDoesNotExist: return self.field_name + def get_definition_model(self, cls): + try: + field = self.get_field(cls) + return field.model + except models.fields.FieldDoesNotExist: + # Find where it was defined by walking the inheritance tree + for base_cls in inspect.getmro(cls): + if self.field_name in base_cls.__dict__: + return base_cls + def get_type(self, cls): if 'type' in self.kwargs: return self.kwargs['type'] @@ -224,6 +235,10 @@ class RelatedFields(object): def get_field(self, cls): return cls._meta.get_field(self.field_name) + def get_definition_model(self, cls): + field = self.get_field(cls) + return field.model + def get_value(self, obj): field = self.get_field(obj.__class__) diff --git a/wagtail/wagtailsearch/tests/test_backends.py b/wagtail/wagtailsearch/tests/test_backends.py index 88aa80ac13..45cd12b634 100644 --- a/wagtail/wagtailsearch/tests/test_backends.py +++ b/wagtail/wagtailsearch/tests/test_backends.py @@ -17,6 +17,7 @@ from wagtail.wagtailsearch.backends import ( InvalidSearchBackendError, get_search_backend, get_search_backends) from wagtail.wagtailsearch.backends.base import FieldError from wagtail.wagtailsearch.backends.db import DatabaseSearchBackend +from wagtail.wagtailsearch.management.commands.update_index import group_models_by_index class BackendTests(WagtailTestUtils): @@ -35,11 +36,22 @@ class BackendTests(WagtailTestUtils): self.load_test_data() + def reset_index(self): + if self.backend.rebuilder_class: + for index, indexed_models in group_models_by_index(self.backend, [models.SearchTest, models.SearchTestChild]).items(): + rebuilder = self.backend.rebuilder_class(index) + index = rebuilder.start() + for model in indexed_models: + index.add_model(model) + rebuilder.finish() + + def refresh_index(self): + index = self.backend.get_index_for_model(models.SearchTest) + if index: + index.refresh() + def load_test_data(self): - # Reset the index - self.backend.reset_index() - self.backend.add_type(models.SearchTest) - self.backend.add_type(models.SearchTestChild) + self.reset_index() # Create a test database testa = models.SearchTest() @@ -71,8 +83,7 @@ class BackendTests(WagtailTestUtils): self.backend.add(testd) self.testd = testd - # Refresh the index - self.backend.refresh_index() + self.refresh_index() def test_blank_search(self): results = self.backend.search("", models.SearchTest) @@ -148,14 +159,14 @@ class BackendTests(WagtailTestUtils): # Delete one of the objects self.backend.delete(self.testa) self.testa.delete() - self.backend.refresh_index() + self.refresh_index() results = self.backend.search(None, models.SearchTest) self.assertEqual(set(results), {self.testb, self.testc.searchtest_ptr, self.testd.searchtest_ptr}) def test_update_index_command(self): # Reset the index, this should clear out the index - self.backend.reset_index() + self.reset_index() # Give Elasticsearch some time to catch up... time.sleep(1) diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py new file mode 100644 index 0000000000..0eded964e7 --- /dev/null +++ b/wagtail/wagtailsearch/tests/test_elasticsearch2_backend.py @@ -0,0 +1,1110 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals + +import datetime +import json +import os +import time +import unittest + +import mock +from django.core import management +from django.db.models import Q +from django.test import TestCase +from django.utils.six import StringIO +from elasticsearch.serializer import JSONSerializer + +from wagtail.tests.search import models +from wagtail.wagtailsearch.backends import get_search_backend +from wagtail.wagtailsearch.backends.elasticsearch2 import ( + Elasticsearch2SearchBackend, get_model_root) + +from .test_backends import BackendTests + + +class TestElasticsearch2SearchBackend(BackendTests, TestCase): + backend_path = 'wagtail.wagtailsearch.backends.elasticsearch2' + + def test_search_with_spaces_only(self): + # Search for some space characters and hope it doesn't crash + results = self.backend.search(" ", models.SearchTest) + + # Queries are lazily evaluated, force it to run + list(results) + + # Didn't crash, yay! + + def test_filter_on_non_filterindex_field(self): + # id is not listed in the search_fields for SearchTest; this should raise a FieldError + from wagtail.wagtailsearch.backends.base import FieldError + + with self.assertRaises(FieldError): + list(self.backend.search("Hello", models.SearchTest, filters=dict(id=42))) + + def test_filter_with_unsupported_lookup_type(self): + from wagtail.wagtailsearch.backends.base import FilterError + + with self.assertRaises(FilterError): + list(self.backend.search("Hello", models.SearchTest, filters=dict(title__iregex='h(ea)llo'))) + + def test_partial_search(self): + # Reset the index + self.reset_index() + self.backend.add_type(models.SearchTest) + self.backend.add_type(models.SearchTestChild) + + # Add some test data + obj = models.SearchTest() + obj.title = "HelloWorld" + obj.live = True + obj.save() + self.backend.add(obj) + + # Refresh the index + self.refresh_index() + + # Search and check + results = self.backend.search("HelloW", models.SearchTest.objects.all()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].id, obj.id) + + def test_child_partial_search(self): + # Reset the index + self.reset_index() + self.backend.add_type(models.SearchTest) + self.backend.add_type(models.SearchTestChild) + + obj = models.SearchTestChild() + obj.title = "WorldHello" + obj.subtitle = "HelloWorld" + obj.live = True + obj.save() + self.backend.add(obj) + + # Refresh the index + self.refresh_index() + + # Search and check + results = self.backend.search("HelloW", models.SearchTest.objects.all()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].id, obj.id) + + def test_ascii_folding(self): + # Reset the index + self.reset_index() + self.backend.add_type(models.SearchTest) + self.backend.add_type(models.SearchTestChild) + + # Add some test data + obj = models.SearchTest() + obj.title = "Ĥéllø" + obj.live = True + obj.save() + self.backend.add(obj) + + # Refresh the index + self.refresh_index() + + # Search and check + results = self.backend.search("Hello", models.SearchTest.objects.all()) + + self.assertEqual(len(results), 1) + self.assertEqual(results[0].id, obj.id) + + def test_query_analyser(self): + """ + This is testing that fields that use edgengram_analyzer as their index analyser do not + have it also as their query analyser + """ + # Reset the index + self.reset_index() + self.backend.add_type(models.SearchTest) + self.backend.add_type(models.SearchTestChild) + + # Add some test data + obj = models.SearchTest() + obj.title = "Hello" + obj.live = True + obj.save() + self.backend.add(obj) + + # Refresh the index + self.refresh_index() + + # Test search for "Hello" + results = self.backend.search("Hello", models.SearchTest.objects.all()) + + # Should find the result + self.assertEqual(len(results), 1) + + # Test search for "Horse" + results = self.backend.search("Horse", models.SearchTest.objects.all()) + + # Even though they both start with the letter "H". This should not be considered a match + self.assertEqual(len(results), 0) + + def test_search_with_hyphen(self): + """ + This tests that punctuation characters are treated the same + way in both indexing and querying. + + See: https://github.com/torchbox/wagtail/issues/937 + """ + # Reset the index + self.reset_index() + self.backend.add_type(models.SearchTest) + self.backend.add_type(models.SearchTestChild) + + # Add some test data + obj = models.SearchTest() + obj.title = "Hello-World" + obj.live = True + obj.save() + self.backend.add(obj) + + # Refresh the index + self.refresh_index() + + # Test search for "Hello-World" + results = self.backend.search("Hello-World", models.SearchTest.objects.all()) + + # Should find the result + self.assertEqual(len(results), 1) + + def test_custom_ordering(self): + # Reset the index + self.reset_index() + self.backend.add_type(models.SearchTest) + + # Add some test data + # a is more relevant, but b is more recent + a = models.SearchTest() + a.title = "Hello Hello World" + a.live = True + a.published_date = datetime.date(2015, 10, 11) + a.save() + self.backend.add(a) + + b = models.SearchTest() + b.title = "Hello World" + b.live = True + b.published_date = datetime.date(2015, 10, 12) + b.save() + self.backend.add(b) + + # Refresh the index + self.refresh_index() + + # Do a search ordered by relevence + results = self.backend.search("Hello", models.SearchTest.objects.all()) + self.assertEqual(list(results), [a, b]) + + # Do a search ordered by published date + results = self.backend.search( + "Hello", models.SearchTest.objects.order_by('-published_date'), order_by_relevance=False + ) + self.assertEqual(list(results), [b, a]) + + def test_and_operator_with_single_field(self): + # Testing for bug #1859 + + # Reset the index + self.reset_index() + self.backend.add_type(models.SearchTest) + + a = models.SearchTest() + a.title = "Hello World" + a.live = True + a.published_date = datetime.date(2015, 10, 12) + a.save() + self.backend.add(a) + + # Refresh the index + self.refresh_index() + + # Run query with "and" operator and single field + results = self.backend.search("Hello World", models.SearchTest, operator='and', fields=['title']) + self.assertEqual(list(results), [a]) + + def test_update_index_command_schema_only(self): + # Reset the index, this should clear out the index + self.reset_index() + + # Give Elasticsearch some time to catch up... + time.sleep(1) + + results = self.backend.search(None, models.SearchTest) + self.assertEqual(set(results), set()) + + # Run update_index command + with self.ignore_deprecation_warnings(): + # ignore any DeprecationWarnings thrown by models with old-style indexed_fields definitions + management.call_command( + 'update_index', backend_name=self.backend_name, schema_only=True, interactive=False, stdout=StringIO() + ) + + # Unlike the test_update_index_command test. This should not give any results + results = self.backend.search(None, models.SearchTest) + self.assertEqual(set(results), set()) + + +class TestElasticsearch2SearchQuery(TestCase): + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) + ) + + query_class = Elasticsearch2SearchBackend.query_class + + def test_simple(self): + # Create a query + query = self.query_class(models.SearchTest.objects.all(), "Hello") + + # Check it + expected_result = {'filtered': { + 'filter': {'match': {'content_type': 'searchtests.SearchTest'}}, + 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_none_query_string(self): + # Create a query + query = self.query_class(models.SearchTest.objects.all(), None) + + # Check it + expected_result = {'filtered': { + 'filter': {'match': {'content_type': 'searchtests.SearchTest'}}, + 'query': {'match_all': {}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_and_operator(self): + # Create a query + query = self.query_class(models.SearchTest.objects.all(), "Hello", operator='and') + + # Check it + expected_result = {'filtered': { + 'filter': {'match': {'content_type': 'searchtests.SearchTest'}}, + 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials'], 'operator': 'and'}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_filter(self): + # Create a query + query = self.query_class(models.SearchTest.objects.filter(title="Test"), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'term': {'title_filter': 'Test'}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_and_filter(self): + # Create a query + query = self.query_class(models.SearchTest.objects.filter(title="Test", live=True), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'and': [{'term': {'live_filter': True}}, {'term': {'title_filter': 'Test'}}]} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + + # Make sure field filters are sorted (as they can be in any order which may cause false positives) + query = query.get_query() + field_filters = query['filtered']['filter']['and'][1]['and'] + field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) + + self.assertDictEqual(query, expected_result) + + def test_or_filter(self): + # Create a query + query = self.query_class(models.SearchTest.objects.filter(Q(title="Test") | Q(live=True)), "Hello") + + # Make sure field filters are sorted (as they can be in any order which may cause false positives) + query = query.get_query() + field_filters = query['filtered']['filter']['and'][1]['or'] + field_filters[:] = sorted(field_filters, key=lambda f: list(f['term'].keys())[0]) + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'or': [{'term': {'live_filter': True}}, {'term': {'title_filter': 'Test'}}]} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query, expected_result) + + def test_negated_filter(self): + # Create a query + query = self.query_class(models.SearchTest.objects.exclude(live=True), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'not': {'term': {'live_filter': True}}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_fields(self): + # Create a query + query = self.query_class(models.SearchTest.objects.all(), "Hello", fields=['title']) + + # Check it + expected_result = {'filtered': { + 'filter': {'match': {'content_type': 'searchtests.SearchTest'}}, + 'query': {'match': {'title': 'Hello'}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_fields_with_and_operator(self): + # Create a query + query = self.query_class(models.SearchTest.objects.all(), "Hello", fields=['title'], operator='and') + + # Check it + expected_result = {'filtered': { + 'filter': {'match': {'content_type': 'searchtests.SearchTest'}}, + 'query': {'match': {'title': {'query': 'Hello', 'operator': 'and'}}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_multiple_fields(self): + # Create a query + query = self.query_class(models.SearchTest.objects.all(), "Hello", fields=['title', 'content']) + + # Check it + expected_result = {'filtered': { + 'filter': {'match': {'content_type': 'searchtests.SearchTest'}}, + 'query': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello'}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_multiple_fields_with_and_operator(self): + # Create a query + query = self.query_class( + models.SearchTest.objects.all(), "Hello", fields=['title', 'content'], operator='and' + ) + + # Check it + expected_result = {'filtered': { + 'filter': {'match': {'content_type': 'searchtests.SearchTest'}}, + 'query': {'multi_match': {'fields': ['title', 'content'], 'query': 'Hello', 'operator': 'and'}} + }} + self.assertDictEqual(query.get_query(), expected_result) + + def test_exact_lookup(self): + # Create a query + query = self.query_class(models.SearchTest.objects.filter(title__exact="Test"), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'term': {'title_filter': 'Test'}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_none_lookup(self): + # Create a query + query = self.query_class(models.SearchTest.objects.filter(title=None), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'missing': {'field': 'title_filter'}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_isnull_true_lookup(self): + # Create a query + query = self.query_class(models.SearchTest.objects.filter(title__isnull=True), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'missing': {'field': 'title_filter'}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_isnull_false_lookup(self): + # Create a query + query = self.query_class(models.SearchTest.objects.filter(title__isnull=False), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'not': {'missing': {'field': 'title_filter'}}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_startswith_lookup(self): + # Create a query + query = self.query_class(models.SearchTest.objects.filter(title__startswith="Test"), "Hello") + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'prefix': {'title_filter': 'Test'}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_gt_lookup(self): + # This also tests conversion of python dates to strings + + # Create a query + query = self.query_class( + models.SearchTest.objects.filter(published_date__gt=datetime.datetime(2014, 4, 29)), "Hello" + ) + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'range': {'published_date_filter': {'gt': '2014-04-29'}}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_lt_lookup(self): + # Create a query + query = self.query_class( + models.SearchTest.objects.filter(published_date__lt=datetime.datetime(2014, 4, 29)), "Hello" + ) + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'range': {'published_date_filter': {'lt': '2014-04-29'}}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_gte_lookup(self): + # Create a query + query = self.query_class( + models.SearchTest.objects.filter(published_date__gte=datetime.datetime(2014, 4, 29)), "Hello" + ) + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'range': {'published_date_filter': {'gte': '2014-04-29'}}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_lte_lookup(self): + # Create a query + query = self.query_class( + models.SearchTest.objects.filter(published_date__lte=datetime.datetime(2014, 4, 29)), "Hello" + ) + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'range': {'published_date_filter': {'lte': '2014-04-29'}}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_range_lookup(self): + start_date = datetime.datetime(2014, 4, 29) + end_date = datetime.datetime(2014, 8, 19) + + # Create a query + query = self.query_class( + models.SearchTest.objects.filter(published_date__range=(start_date, end_date)), "Hello" + ) + + # Check it + expected_result = {'filtered': {'filter': {'and': [ + {'match': {'content_type': 'searchtests.SearchTest'}}, + {'range': {'published_date_filter': {'gte': '2014-04-29', 'lte': '2014-08-19'}}} + ]}, 'query': {'multi_match': {'query': 'Hello', 'fields': ['_all', '_partials']}}}} + self.assertDictEqual(query.get_query(), expected_result) + + def test_custom_ordering(self): + # Create a query + query = self.query_class( + models.SearchTest.objects.order_by('published_date'), "Hello", order_by_relevance=False + ) + + # Check it + expected_result = [{'published_date_filter': 'asc'}] + self.assertDictEqual(query.get_sort(), expected_result) + + def test_custom_ordering_reversed(self): + # Create a query + query = self.query_class( + models.SearchTest.objects.order_by('-published_date'), "Hello", order_by_relevance=False + ) + + # Check it + expected_result = [{'published_date_filter': 'desc'}] + self.assertDictEqual(query.get_sort(), expected_result) + + def test_custom_ordering_multiple(self): + # Create a query + query = self.query_class( + models.SearchTest.objects.order_by('published_date', 'live'), "Hello", order_by_relevance=False + ) + + # Check it + expected_result = [{'published_date_filter': 'asc'}, {'live_filter': 'asc'}] + self.assertDictEqual(query.get_sort(), expected_result) + + +class TestElasticsearch2SearchResults(TestCase): + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps + ) + + def setUp(self): + self.objects = [] + + for i in range(3): + self.objects.append(models.SearchTest.objects.create(title=str(i))) + + def get_results(self): + backend = Elasticsearch2SearchBackend({}) + query = mock.MagicMock() + query.queryset = models.SearchTest.objects.all() + query.get_query.return_value = 'QUERY' + query.get_sort.return_value = None + return backend.results_class(backend, query) + + def construct_search_response(self, results): + return { + '_shards': {'failed': 0, 'successful': 5, 'total': 5}, + 'hits': { + 'hits': [ + { + '_id': 'searchtests_searchtest:' + str(result), + '_index': 'wagtail', + '_score': 1, + '_type': 'searchtests_searchtest', + 'fields': { + 'pk': [str(result)], + } + } + for result in results + ], + 'max_score': 1, + 'total': len(results) + }, + 'timed_out': False, + 'took': 2 + } + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_basic_search(self, search): + search.return_value = self.construct_search_response([]) + results = self.get_results() + + list(results) # Performs search + + search.assert_any_call( + from_=0, + body={'query': 'QUERY'}, + _source=False, + fields='pk', + index='wagtail__searchtests_searchtest' + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_get_single_item(self, search): + # Need to return something to prevent index error + search.return_value = self.construct_search_response([self.objects[0].id]) + results = self.get_results() + + results[10] # Performs search + + search.assert_any_call( + from_=10, + body={'query': 'QUERY'}, + _source=False, + fields='pk', + index='wagtail__searchtests_searchtest', + size=1 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_slice_results(self, search): + search.return_value = self.construct_search_response([]) + results = self.get_results()[1:4] + + list(results) # Performs search + + search.assert_any_call( + from_=1, + body={'query': 'QUERY'}, + _source=False, + fields='pk', + index='wagtail__searchtests_searchtest', + size=3 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_slice_results_multiple_times(self, search): + search.return_value = self.construct_search_response([]) + results = self.get_results()[10:][:10] + + list(results) # Performs search + + search.assert_any_call( + from_=10, + body={'query': 'QUERY'}, + _source=False, + fields='pk', + index='wagtail__searchtests_searchtest', + size=10 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_slice_results_and_get_item(self, search): + # Need to return something to prevent index error + search.return_value = self.construct_search_response([self.objects[0].id]) + results = self.get_results()[10:] + + results[10] # Performs search + + search.assert_any_call( + from_=20, + body={'query': 'QUERY'}, + _source=False, + fields='pk', + index='wagtail__searchtests_searchtest', + size=1 + ) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_result_returned(self, search): + search.return_value = self.construct_search_response([self.objects[0].id]) + results = self.get_results() + + self.assertEqual(results[0], self.objects[0]) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_len_1(self, search): + search.return_value = self.construct_search_response([self.objects[0].id]) + results = self.get_results() + + self.assertEqual(len(results), 1) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_len_2(self, search): + search.return_value = self.construct_search_response([self.objects[0].id, self.objects[1].id]) + results = self.get_results() + + self.assertEqual(len(results), 2) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_duplicate_results(self, search): # Duplicates will not be removed + search.return_value = self.construct_search_response([self.objects[0].id, self.objects[0].id]) + results = list(self.get_results()) # Must cast to list so we only create one query + + self.assertEqual(len(results), 2) + self.assertEqual(results[0], self.objects[0]) + self.assertEqual(results[1], self.objects[0]) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_result_order(self, search): + search.return_value = self.construct_search_response( + [self.objects[0].id, self.objects[1].id, self.objects[2].id] + ) + results = list(self.get_results()) # Must cast to list so we only create one query + + self.assertEqual(results[0], self.objects[0]) + self.assertEqual(results[1], self.objects[1]) + self.assertEqual(results[2], self.objects[2]) + + @mock.patch('elasticsearch.Elasticsearch.search') + def test_result_order_2(self, search): + search.return_value = self.construct_search_response( + [self.objects[2].id, self.objects[1].id, self.objects[0].id] + ) + results = list(self.get_results()) # Must cast to list so we only create one query + + self.assertEqual(results[0], self.objects[2]) + self.assertEqual(results[1], self.objects[1]) + self.assertEqual(results[2], self.objects[0]) + + +class TestElasticsearch2Mapping(TestCase): + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) + ) + + def setUp(self): + # Create ES mapping + self.es_mapping = Elasticsearch2SearchBackend.mapping_class(models.SearchTest) + + # Create ES document + self.obj = models.SearchTest(title="Hello") + self.obj.save() + self.obj.tags.add("a tag") + + def test_get_document_type(self): + self.assertEqual(self.es_mapping.get_document_type(), 'searchtests_searchtest') + + def test_get_mapping(self): + # Build mapping + mapping = self.es_mapping.get_mapping() + + # Check + expected_result = { + 'searchtests_searchtest': { + 'properties': { + 'pk': {'index': 'not_analyzed', 'type': 'string', 'store': 'yes', 'include_in_all': False}, + 'content_type': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + '_partials': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'include_in_all': False, 'type': 'string'}, + 'live_filter': {'index': 'not_analyzed', 'type': 'boolean', 'include_in_all': False}, + 'published_date_filter': {'index': 'not_analyzed', 'type': 'date', 'include_in_all': False}, + 'title': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'title_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'content': {'type': 'string', 'include_in_all': True}, + 'callable_indexed_field': {'type': 'string', 'include_in_all': True}, + 'tags': { + 'type': 'nested', + 'properties': { + 'name': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + } + }, + } + } + } + + self.assertDictEqual(mapping, expected_result) + + def test_get_document_id(self): + self.assertEqual(self.es_mapping.get_document_id(self.obj), 'searchtests_searchtest:' + str(self.obj.pk)) + + def test_get_document(self): + # Get document + document = self.es_mapping.get_document(self.obj) + + # Sort partials + if '_partials' in document: + document['_partials'].sort() + + # Check + expected_result = { + 'pk': str(self.obj.pk), + 'content_type': ['searchtests.SearchTest'], + '_partials': ['Hello', 'a tag'], + 'live_filter': False, + 'published_date_filter': None, + 'title': 'Hello', + 'title_filter': 'Hello', + 'callable_indexed_field': 'Callable', + 'content': '', + 'tags': [ + { + 'name': 'a tag', + 'slug_filter': 'a-tag', + } + ], + } + + self.assertDictEqual(document, expected_result) + + +class TestElasticsearch2MappingInheritance(TestCase): + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) + ) + + def setUp(self): + # Create ES mapping + self.es_mapping = Elasticsearch2SearchBackend.mapping_class(models.SearchTestChild) + + # Create ES document + self.obj = models.SearchTestChild(title="Hello", subtitle="World", page_id=1) + self.obj.save() + self.obj.tags.add("a tag") + + def test_get_document_type(self): + self.assertEqual(self.es_mapping.get_document_type(), 'searchtests_searchtest_searchtests_searchtestchild') + + def test_get_mapping(self): + # Build mapping + mapping = self.es_mapping.get_mapping() + + # Check + expected_result = { + 'searchtests_searchtest_searchtests_searchtestchild': { + 'properties': { + # New + 'searchtests_searchtestchild__extra_content': {'type': 'string', 'include_in_all': True}, + 'searchtests_searchtestchild__subtitle': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'searchtests_searchtestchild__page': { + 'type': 'nested', + 'properties': { + 'title': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'search_description': {'type': 'string', 'include_in_all': True}, + 'live_filter': {'index': 'not_analyzed', 'type': 'boolean', 'include_in_all': False}, + } + }, + + # Inherited + 'pk': {'index': 'not_analyzed', 'type': 'string', 'store': 'yes', 'include_in_all': False}, + 'content_type': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + '_partials': {'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard', 'include_in_all': False, 'type': 'string'}, + 'live_filter': {'index': 'not_analyzed', 'type': 'boolean', 'include_in_all': False}, + 'published_date_filter': {'index': 'not_analyzed', 'type': 'date', 'include_in_all': False}, + 'title': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'title_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + 'content': {'type': 'string', 'include_in_all': True}, + 'callable_indexed_field': {'type': 'string', 'include_in_all': True}, + 'tags': { + 'type': 'nested', + 'properties': { + 'name': {'type': 'string', 'include_in_all': True, 'analyzer': 'edgengram_analyzer', 'search_analyzer': 'standard'}, + 'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, + } + }, + } + } + } + + self.assertDictEqual(mapping, expected_result) + + def test_get_document_id(self): + # This must be tests_searchtest instead of 'tests_searchtest_tests_searchtestchild' + # as it uses the contents base content type name. + # This prevents the same object being accidentally indexed twice. + self.assertEqual(self.es_mapping.get_document_id(self.obj), 'searchtests_searchtest:' + str(self.obj.pk)) + + def test_get_document(self): + # Build document + document = self.es_mapping.get_document(self.obj) + + # Sort partials + if '_partials' in document: + document['_partials'].sort() + + # Check + expected_result = { + # New + 'searchtests_searchtestchild__extra_content': '', + 'searchtests_searchtestchild__subtitle': 'World', + 'searchtests_searchtestchild__page': { + 'title': 'Root', + 'search_description': '', + 'live_filter': True, + }, + + # Changed + 'content_type': ['searchtests.SearchTestChild', 'searchtests.SearchTest'], + + # Inherited + 'pk': str(self.obj.pk), + '_partials': ['Hello', 'Root', 'World', 'a tag'], + 'live_filter': False, + 'published_date_filter': None, + 'title': 'Hello', + 'title_filter': 'Hello', + 'callable_indexed_field': 'Callable', + 'content': '', + 'tags': [ + { + 'name': 'a tag', + 'slug_filter': 'a-tag', + } + ], + } + + self.assertDictEqual(document, expected_result) + + +class TestBackendConfiguration(TestCase): + def test_default_settings(self): + backend = Elasticsearch2SearchBackend(params={}) + + self.assertEqual(len(backend.hosts), 1) + self.assertEqual(backend.hosts[0]['host'], 'localhost') + self.assertEqual(backend.hosts[0]['port'], 9200) + self.assertEqual(backend.hosts[0]['use_ssl'], False) + + def test_hosts(self): + # This tests that HOSTS goes to es_hosts + backend = Elasticsearch2SearchBackend(params={ + 'HOSTS': [ + { + 'host': '127.0.0.1', + 'port': 9300, + 'use_ssl': True, + 'verify_certs': True, + } + ] + }) + + self.assertEqual(len(backend.hosts), 1) + self.assertEqual(backend.hosts[0]['host'], '127.0.0.1') + self.assertEqual(backend.hosts[0]['port'], 9300) + self.assertEqual(backend.hosts[0]['use_ssl'], True) + + def test_urls(self): + # This test backwards compatibility with old URLS setting + backend = Elasticsearch2SearchBackend(params={ + 'URLS': [ + 'http://localhost:12345', + 'https://127.0.0.1:54321', + 'http://username:password@elasticsearch.mysite.com', + 'https://elasticsearch.mysite.com/hello', + ], + }) + + self.assertEqual(len(backend.hosts), 4) + self.assertEqual(backend.hosts[0]['host'], 'localhost') + self.assertEqual(backend.hosts[0]['port'], 12345) + self.assertEqual(backend.hosts[0]['use_ssl'], False) + + self.assertEqual(backend.hosts[1]['host'], '127.0.0.1') + self.assertEqual(backend.hosts[1]['port'], 54321) + self.assertEqual(backend.hosts[1]['use_ssl'], True) + + self.assertEqual(backend.hosts[2]['host'], 'elasticsearch.mysite.com') + self.assertEqual(backend.hosts[2]['port'], 80) + self.assertEqual(backend.hosts[2]['use_ssl'], False) + self.assertEqual(backend.hosts[2]['http_auth'], ('username', 'password')) + + self.assertEqual(backend.hosts[3]['host'], 'elasticsearch.mysite.com') + self.assertEqual(backend.hosts[3]['port'], 443) + self.assertEqual(backend.hosts[3]['use_ssl'], True) + self.assertEqual(backend.hosts[3]['url_prefix'], '/hello') + + +@unittest.skipUnless(os.environ.get('ELASTICSEARCH_URL', False), "ELASTICSEARCH_URL not set") +@unittest.skipUnless(os.environ.get('ELASTICSEARCH_VERSION', '1') == '2', "ELASTICSEARCH_VERSION not set to 2") +class TestRebuilder(TestCase): + def assertDictEqual(self, a, b): + default = JSONSerializer().default + self.assertEqual( + json.dumps(a, sort_keys=True, default=default), json.dumps(b, sort_keys=True, default=default) + ) + + def setUp(self): + self.backend = get_search_backend('elasticsearch') + self.es = self.backend.es + self.rebuilder = self.backend.get_rebuilder() + + self.backend.reset_index() + + def test_start_creates_index(self): + # First, make sure the index is deleted + try: + self.es.indices.delete(self.backend.index_name) + except self.NotFoundError: + pass + + self.assertFalse(self.es.indices.exists(self.backend.index_name)) + + # Run start + self.rebuilder.start() + + # Check the index exists + self.assertTrue(self.es.indices.exists(self.backend.index_name)) + + def test_start_deletes_existing_index(self): + # Put an alias into the index so we can check it was deleted + self.es.indices.put_alias(name='this_index_should_be_deleted', index=self.backend.index_name) + self.assertTrue( + self.es.indices.exists_alias(name='this_index_should_be_deleted', index=self.backend.index_name) + ) + + # Run start + self.rebuilder.start() + + # The alias should be gone (proving the index was deleted and recreated) + self.assertFalse( + self.es.indices.exists_alias(name='this_index_should_be_deleted', index=self.backend.index_name) + ) + + +@unittest.skipUnless(os.environ.get('ELASTICSEARCH_URL', False), "ELASTICSEARCH_URL not set") +@unittest.skipUnless(os.environ.get('ELASTICSEARCH_VERSION', '1') == '2', "ELASTICSEARCH_VERSION not set to 2") +class TestAtomicRebuilder(TestCase): + def setUp(self): + self.backend = get_search_backend('elasticsearch') + self.backend.rebuilder_class = self.backend.atomic_rebuilder_class + self.es = self.backend.es + self.rebuilder = self.backend.get_rebuilder() + + self.backend.reset_index() + + def test_start_creates_new_index(self): + # Rebuilder should make up a new index name that doesn't currently exist + self.assertFalse(self.es.indices.exists(self.rebuilder.index.name)) + + # Run start + self.rebuilder.start() + + # Check the index exists + self.assertTrue(self.es.indices.exists(self.rebuilder.index.name)) + + def test_start_doesnt_delete_current_index(self): + # Get current index name + current_index_name = list(self.es.indices.get_alias(name=self.rebuilder.alias.name).keys())[0] + + # Run start + self.rebuilder.start() + + # The index should still exist + self.assertTrue(self.es.indices.exists(current_index_name)) + + # And the alias should still point to it + self.assertTrue(self.es.indices.exists_alias(name=self.rebuilder.alias.name, index=current_index_name)) + + def test_finish_updates_alias(self): + # Run start + self.rebuilder.start() + + # Check that the alias doesn't point to new index + self.assertFalse( + self.es.indices.exists_alias(name=self.rebuilder.alias.name, index=self.rebuilder.index.name) + ) + + # Run finish + self.rebuilder.finish() + + # Check that the alias now points to the new index + self.assertTrue(self.es.indices.exists_alias(name=self.rebuilder.alias.name, index=self.rebuilder.index.name)) + + def test_finish_deletes_old_index(self): + # Get current index name + current_index_name = list(self.es.indices.get_alias(name=self.rebuilder.alias.name).keys())[0] + + # Run start + self.rebuilder.start() + + # Index should still exist + self.assertTrue(self.es.indices.exists(current_index_name)) + + # Run finish + self.rebuilder.finish() + + # Index should be gone + self.assertFalse(self.es.indices.exists(current_index_name)) + + +class TestGetModelRoot(TestCase): + def test_root_model(self): + from wagtail.wagtailcore.models import Page + + self.assertEqual(get_model_root(Page), Page) + + def test_child_model(self): + from wagtail.wagtailcore.models import Page + from wagtail.tests.testapp.models import SimplePage + + self.assertEqual(get_model_root(SimplePage), Page) + + def test_grandchild_model(self): + # MTIChildPage inherits from MTIBasePage which inherits from Page + from wagtail.wagtailcore.models import Page + from wagtail.tests.testapp.models import MTIChildPage + + self.assertEqual(get_model_root(MTIChildPage), Page) diff --git a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py index e0c6919a32..1e598f9d2e 100644 --- a/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py +++ b/wagtail/wagtailsearch/tests/test_elasticsearch_backend.py @@ -50,7 +50,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): def test_partial_search(self): # Reset the index - self.backend.reset_index() + self.reset_index() self.backend.add_type(models.SearchTest) self.backend.add_type(models.SearchTestChild) @@ -62,7 +62,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): self.backend.add(obj) # Refresh the index - self.backend.refresh_index() + self.refresh_index() # Search and check results = self.backend.search("HelloW", models.SearchTest.objects.all()) @@ -72,7 +72,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): def test_child_partial_search(self): # Reset the index - self.backend.reset_index() + self.reset_index() self.backend.add_type(models.SearchTest) self.backend.add_type(models.SearchTestChild) @@ -84,7 +84,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): self.backend.add(obj) # Refresh the index - self.backend.refresh_index() + self.refresh_index() # Search and check results = self.backend.search("HelloW", models.SearchTest.objects.all()) @@ -94,7 +94,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): def test_ascii_folding(self): # Reset the index - self.backend.reset_index() + self.reset_index() self.backend.add_type(models.SearchTest) self.backend.add_type(models.SearchTestChild) @@ -106,7 +106,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): self.backend.add(obj) # Refresh the index - self.backend.refresh_index() + self.refresh_index() # Search and check results = self.backend.search("Hello", models.SearchTest.objects.all()) @@ -120,7 +120,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): have it also as their query analyser """ # Reset the index - self.backend.reset_index() + self.reset_index() self.backend.add_type(models.SearchTest) self.backend.add_type(models.SearchTestChild) @@ -132,7 +132,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): self.backend.add(obj) # Refresh the index - self.backend.refresh_index() + self.refresh_index() # Test search for "Hello" results = self.backend.search("Hello", models.SearchTest.objects.all()) @@ -154,7 +154,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): See: https://github.com/torchbox/wagtail/issues/937 """ # Reset the index - self.backend.reset_index() + self.reset_index() self.backend.add_type(models.SearchTest) self.backend.add_type(models.SearchTestChild) @@ -166,7 +166,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): self.backend.add(obj) # Refresh the index - self.backend.refresh_index() + self.refresh_index() # Test search for "Hello-World" results = self.backend.search("Hello-World", models.SearchTest.objects.all()) @@ -176,7 +176,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): def test_custom_ordering(self): # Reset the index - self.backend.reset_index() + self.reset_index() self.backend.add_type(models.SearchTest) # Add some test data @@ -196,7 +196,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): self.backend.add(b) # Refresh the index - self.backend.refresh_index() + self.refresh_index() # Do a search ordered by relevence results = self.backend.search("Hello", models.SearchTest.objects.all()) @@ -212,7 +212,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): # Testing for bug #1859 # Reset the index - self.backend.reset_index() + self.reset_index() self.backend.add_type(models.SearchTest) a = models.SearchTest() @@ -223,7 +223,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): self.backend.add(a) # Refresh the index - self.backend.refresh_index() + self.refresh_index() # Run query with "and" operator and single field results = self.backend.search("Hello World", models.SearchTest, operator='and', fields=['title']) @@ -231,7 +231,7 @@ class TestElasticsearchSearchBackend(BackendTests, TestCase): def test_update_index_command_schema_only(self): # Reset the index, this should clear out the index - self.backend.reset_index() + self.reset_index() # Give Elasticsearch some time to catch up... time.sleep(1) @@ -979,6 +979,7 @@ class TestBackendConfiguration(TestCase): @unittest.skipUnless(os.environ.get('ELASTICSEARCH_URL', False), "ELASTICSEARCH_URL not set") +@unittest.skipUnless(os.environ.get('ELASTICSEARCH_VERSION', '1') == '1', "ELASTICSEARCH_VERSION not set to 1") class TestRebuilder(TestCase): def assertDictEqual(self, a, b): default = JSONSerializer().default @@ -1025,6 +1026,7 @@ class TestRebuilder(TestCase): @unittest.skipUnless(os.environ.get('ELASTICSEARCH_URL', False), "ELASTICSEARCH_URL not set") +@unittest.skipUnless(os.environ.get('ELASTICSEARCH_VERSION', '1') == '1', "ELASTICSEARCH_VERSION not set to 1") class TestAtomicRebuilder(TestCase): def setUp(self): self.backend = get_search_backend('elasticsearch')