From fe76c11043424d95a8ffee68f1ad4f404dd8c6e9 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Wed, 2 May 2018 14:11:09 +0100 Subject: [PATCH] Implemented facet() method on search results Add error handling for when facet field doesnt exist Count('id' -> 'pk') Use assertDictEqual Fix indexing related fields using FilterField --- docs/topics/search/searching.rst | 23 ++++++++++ wagtail/contrib/postgres_search/backend.py | 25 ++++++++++- wagtail/search/backends/base.py | 5 +++ wagtail/search/backends/db.py | 24 +++++++++- wagtail/search/backends/elasticsearch2.py | 45 ++++++++++++++++++- wagtail/search/tests/test_backends.py | 38 +++++++++++++++- .../tests/test_elasticsearch2_backend.py | 12 +++-- .../tests/test_elasticsearch5_backend.py | 12 +++-- wagtail/tests/search/models.py | 1 + 9 files changed, 171 insertions(+), 14 deletions(-) diff --git a/docs/topics/search/searching.rst b/docs/topics/search/searching.rst index 07c9559b22..c09a328e1a 100644 --- a/docs/topics/search/searching.rst +++ b/docs/topics/search/searching.rst @@ -97,6 +97,29 @@ This can be limited to a certain set of fields by using the ``fields`` keyword a >>> EventPage.objects.search("Event", fields=["title"]) [, ] +Faceted search +-------------- + +Wagtail supports faceted search which is kind of filtering based on a taxonomy +field (such as category or page type). + +The ``.facet(field_name)`` method returns an ``OrderedDict``. The keys are the +the IDs of the related objects that have been referenced by the field and the +values are number of references to each ID. The results are ordered by number +of references descending. + +For example, to find the most common page types in the search results: + +.. code-block::python + + >>> Page.objects.search("Test").facet("content_type_id") + + # Note: The keys correspond to the ID of a ContentType object, the values are the + # number of pages returned for that type + OrderedDict([ + ('2', 4), # 4 pages have content_type_id == 2 + ('1', 2), # 2 pages have content_type_id == 1 + ]) Changing search behaviour ------------------------- diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py index 5ec932417c..ec9a28a912 100644 --- a/wagtail/contrib/postgres_search/backend.py +++ b/wagtail/contrib/postgres_search/backend.py @@ -1,15 +1,16 @@ +from collections import OrderedDict from warnings import warn from django.contrib.postgres.search import SearchQuery as PostgresSearchQuery from django.contrib.postgres.search import SearchRank, SearchVector from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction -from django.db.models import F, Manager, Q, TextField, Value +from django.db.models import Count, F, Manager, Q, TextField, Value from django.db.models.constants import LOOKUP_SEP from django.db.models.functions import Cast from django.utils.encoding import force_text from wagtail.search.backends.base import ( - BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError) from wagtail.search.index import RelatedFields, SearchField, get_indexed_models from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term from wagtail.search.utils import ADD, AND, OR @@ -316,6 +317,26 @@ class PostgresSearchResults(BaseSearchResults): self.backend.config, None, None, score_field=self._score_field).count() + supports_facet = True + + def facet(self, field_name): + # Get field + field = self.query_compiler._get_filterable_field(field_name) + if field is None: + raise FilterFieldError( + 'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' + + field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.', + field_name=field_name + ) + + query = self.query_compiler.search(self.backend.get_config(), None, None) + results = query.values(field_name).annotate(count=Count('pk')).order_by('-count') + + return OrderedDict([ + (result[field_name], result['count']) + for result in results + ]) + class PostgresSearchRebuilder: def __init__(self, index): diff --git a/wagtail/search/backends/base.py b/wagtail/search/backends/base.py index 8a2bc1b6ea..c33c6dd34a 100644 --- a/wagtail/search/backends/base.py +++ b/wagtail/search/backends/base.py @@ -161,6 +161,8 @@ class BaseSearchQueryCompiler: class BaseSearchResults: + supports_facet = False + def __init__(self, backend, query_compiler, prefetch_related=None): self.backend = backend self.query_compiler = query_compiler @@ -251,6 +253,9 @@ class BaseSearchResults: clone._score_field = field_name return clone + def facet(self, field_name): + raise NotImplementedError("This search backend does not support faceting") + class EmptySearchResults(BaseSearchResults): def __init__(self): diff --git a/wagtail/search/backends/db.py b/wagtail/search/backends/db.py index 31ac73f94e..dc420ba668 100644 --- a/wagtail/search/backends/db.py +++ b/wagtail/search/backends/db.py @@ -1,10 +1,12 @@ +from collections import OrderedDict from warnings import warn from django.db import models +from django.db.models import Count from django.db.models.expressions import Value from wagtail.search.backends.base import ( - BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError) from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term from wagtail.search.utils import AND, OR @@ -106,6 +108,26 @@ class DatabaseSearchResults(BaseSearchResults): def _do_count(self): return self.get_queryset().count() + supports_facet = True + + def facet(self, field_name): + # Get field + field = self.query_compiler._get_filterable_field(field_name) + if field is None: + raise FilterFieldError( + 'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' + + field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.', + field_name=field_name + ) + + query = self.get_queryset() + results = query.values(field_name).annotate(count=Count('pk')).order_by('-count') + + return OrderedDict([ + (result[field_name], result['count']) + for result in results + ]) + class DatabaseSearchBackend(BaseSearchBackend): query_compiler_class = DatabaseSearchQueryCompiler diff --git a/wagtail/search/backends/elasticsearch2.py b/wagtail/search/backends/elasticsearch2.py index 11b624f773..c0f852388b 100644 --- a/wagtail/search/backends/elasticsearch2.py +++ b/wagtail/search/backends/elasticsearch2.py @@ -1,6 +1,7 @@ import copy import json import warnings +from collections import OrderedDict from urllib.parse import urlparse from django.db import DEFAULT_DB_ALIAS, models @@ -11,7 +12,7 @@ from elasticsearch import Elasticsearch, NotFoundError from elasticsearch.helpers import bulk from wagtail.search.backends.base import ( - BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults) + BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError) from wagtail.search.index import FilterField, Indexed, RelatedFields, SearchField, class_is_indexed from wagtail.search.query import ( And, Boost, Filter, Fuzzy, MatchAll, Not, Or, PlainText, Prefix, Term) @@ -239,7 +240,7 @@ class Elasticsearch2Mapping: value = field.get_value(obj) if isinstance(field, RelatedFields): - if isinstance(value, models.Manager): + if isinstance(value, (models.Manager, models.QuerySet)): nested_docs = [] for nested_obj in value.all(): @@ -251,6 +252,11 @@ class Elasticsearch2Mapping: elif isinstance(value, models.Model): value, extra_edgengrams = self._get_nested_document(field.fields, value) partials.extend(extra_edgengrams) + elif isinstance(field, FilterField): + if isinstance(value, (models.Manager, models.QuerySet)): + value = list(value.values_list('pk', flat=True)) + elif isinstance(value, models.Model): + value = value.pk doc[self.get_field_column_name(field)] = value @@ -601,6 +607,41 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler): class Elasticsearch2SearchResults(BaseSearchResults): fields_param_name = 'fields' + supports_facet = True + + def facet(self, field_name): + # Get field + field = self.query_compiler._get_filterable_field(field_name) + if field is None: + raise FilterFieldError( + 'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' + + field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.', + field_name=field_name + ) + + # Build body + body = self._get_es_body() + column_name = self.query_compiler.mapping.get_field_column_name(field) + + body['aggregations'] = { + field_name: { + 'terms': { + 'field': column_name, + } + } + } + + # Send to Elasticsearch + response = self.backend.es.search( + index=self.backend.get_index_for_model(self.query_compiler.queryset.model).name, + body=body, + size=0, + ) + + return OrderedDict([ + (bucket['key'], bucket['doc_count']) + for bucket in response['aggregations'][field_name]['buckets'] + ]) def _get_es_body(self, for_count=False): body = { diff --git a/wagtail/search/tests/test_backends.py b/wagtail/search/tests/test_backends.py index e5181ead85..4cb3b61e26 100644 --- a/wagtail/search/tests/test_backends.py +++ b/wagtail/search/tests/test_backends.py @@ -1,6 +1,7 @@ # coding: utf-8 import unittest +from collections import OrderedDict from datetime import date from io import StringIO @@ -8,10 +9,11 @@ from django.conf import settings from django.core import management from django.test import TestCase from django.test.utils import override_settings +from taggit.models import Tag from wagtail.search.backends import ( InvalidSearchBackendError, get_search_backend, get_search_backends) -from wagtail.search.backends.base import FieldError +from wagtail.search.backends.base import FieldError, FilterFieldError from wagtail.search.backends.db import DatabaseSearchBackend from wagtail.search.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Prefix, Term from wagtail.tests.search import models @@ -394,6 +396,40 @@ class BackendTests(WagtailTestUtils): "A Game of Thrones" ]) + # FACET TESTS + + def test_facet(self): + results = self.backend.search(MATCH_ALL, models.ProgrammingGuide).facet('programming_language') + + # Not testing ordering here as two of the items have the same count, so the ordering is undefined. + # See test_facet_tags for a test of the ordering + self.assertDictEqual(dict(results), {'js': 2, 'py': 2, 'rs': 1}) + + def test_facet_tags(self): + # The test data doesn't contain any tags, add some + FANTASY_BOOKS = [1, 2, 3, 4, 5, 6, 7] + SCIFI_BOOKS = [10] + for book_id in FANTASY_BOOKS: + models.Book.objects.get(id=book_id).tags.add('Fantasy') + for book_id in SCIFI_BOOKS: + models.Book.objects.get(id=book_id).tags.add('Science Fiction') + + fantasy_tag = Tag.objects.get(name='Fantasy') + scifi_tag = Tag.objects.get(name='Science Fiction') + + results = self.backend.search(MATCH_ALL, models.Book).facet('tags') + + self.assertEqual(results, OrderedDict([ + (fantasy_tag.id, 7), + (None, 5), + (scifi_tag.id, 1), + ])) + + def test_facet_with_nonexistent_field(self): + with self.assertRaises(FilterFieldError): + self.backend.search(MATCH_ALL, models.ProgrammingGuide).facet('foo') + + # MISC TESTS def test_same_rank_pages(self): diff --git a/wagtail/search/tests/test_elasticsearch2_backend.py b/wagtail/search/tests/test_elasticsearch2_backend.py index ad8f662bc3..052e192230 100644 --- a/wagtail/search/tests/test_elasticsearch2_backend.py +++ b/wagtail/search/tests/test_elasticsearch2_backend.py @@ -538,7 +538,8 @@ class TestElasticsearch2Mapping(TestCase): 'name': {'type': 'string', 'include_in_all': True}, 'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, }, - } + }, + 'tags_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False} } } } @@ -571,7 +572,8 @@ class TestElasticsearch2Mapping(TestCase): ], 'publication_date_filter': datetime.date(1954, 7, 29), 'number_of_pages_filter': 423, - 'tags': [] + 'tags': [], + 'tags_filter': [] } self.assertDictEqual(document, expected_result) @@ -639,7 +641,8 @@ class TestElasticsearch2MappingInheritance(TestCase): 'name': {'type': 'string', 'include_in_all': True}, 'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, }, - } + }, + 'tags_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False} } } } @@ -699,7 +702,8 @@ class TestElasticsearch2MappingInheritance(TestCase): ], 'publication_date_filter': datetime.date(1954, 7, 29), 'number_of_pages_filter': 423, - 'tags': [] + 'tags': [], + 'tags_filter': [] } self.assertDictEqual(document, expected_result) diff --git a/wagtail/search/tests/test_elasticsearch5_backend.py b/wagtail/search/tests/test_elasticsearch5_backend.py index 121b8bee27..ae87fd7987 100644 --- a/wagtail/search/tests/test_elasticsearch5_backend.py +++ b/wagtail/search/tests/test_elasticsearch5_backend.py @@ -539,7 +539,8 @@ class TestElasticsearch5Mapping(TestCase): 'name': {'type': 'text', 'include_in_all': True}, 'slug_filter': {'type': 'keyword', 'include_in_all': False}, }, - } + }, + 'tags_filter': {'type': 'keyword', 'include_in_all': False} } } } @@ -572,7 +573,8 @@ class TestElasticsearch5Mapping(TestCase): ], 'publication_date_filter': datetime.date(1954, 7, 29), 'number_of_pages_filter': 423, - 'tags': [] + 'tags': [], + 'tags_filter': [] } self.assertDictEqual(document, expected_result) @@ -640,7 +642,8 @@ class TestElasticsearch5MappingInheritance(TestCase): 'name': {'type': 'text', 'include_in_all': True}, 'slug_filter': {'type': 'keyword', 'include_in_all': False}, }, - } + }, + 'tags_filter': {'type': 'keyword', 'include_in_all': False} } } } @@ -700,7 +703,8 @@ class TestElasticsearch5MappingInheritance(TestCase): ], 'publication_date_filter': datetime.date(1954, 7, 29), 'number_of_pages_filter': 423, - 'tags': [] + 'tags': [], + 'tags_filter': [] } self.assertDictEqual(document, expected_result) diff --git a/wagtail/tests/search/models.py b/wagtail/tests/search/models.py index cc81714485..24b05b7bee 100644 --- a/wagtail/tests/search/models.py +++ b/wagtail/tests/search/models.py @@ -34,6 +34,7 @@ class Book(index.Indexed, models.Model): index.SearchField('name'), index.FilterField('slug'), ]), + index.FilterField('tags'), ] @classmethod