Implemented facet() method on search results

Add error handling for when facet field doesnt exist

Count('id' -> 'pk')

Use assertDictEqual

Fix indexing related fields using FilterField
pull/4583/merge
Karl Hobley 2018-05-02 14:11:09 +01:00 zatwierdzone przez Bertrand Bordage
rodzic 3cd18f3c1f
commit fe76c11043
9 zmienionych plików z 171 dodań i 14 usunięć

Wyświetl plik

@ -97,6 +97,29 @@ This can be limited to a certain set of fields by using the ``fields`` keyword a
>>> EventPage.objects.search("Event", fields=["title"])
[<EventPage: Event 1>, <EventPage: Event 2>]
Faceted search
--------------
Wagtail supports faceted search which is kind of filtering based on a taxonomy
field (such as category or page type).
The ``.facet(field_name)`` method returns an ``OrderedDict``. The keys are the
the IDs of the related objects that have been referenced by the field and the
values are number of references to each ID. The results are ordered by number
of references descending.
For example, to find the most common page types in the search results:
.. code-block::python
>>> Page.objects.search("Test").facet("content_type_id")
# Note: The keys correspond to the ID of a ContentType object, the values are the
# number of pages returned for that type
OrderedDict([
('2', 4), # 4 pages have content_type_id == 2
('1', 2), # 2 pages have content_type_id == 1
])
Changing search behaviour
-------------------------

Wyświetl plik

@ -1,15 +1,16 @@
from collections import OrderedDict
from warnings import warn
from django.contrib.postgres.search import SearchQuery as PostgresSearchQuery
from django.contrib.postgres.search import SearchRank, SearchVector
from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction
from django.db.models import F, Manager, Q, TextField, Value
from django.db.models import Count, F, Manager, Q, TextField, Value
from django.db.models.constants import LOOKUP_SEP
from django.db.models.functions import Cast
from django.utils.encoding import force_text
from wagtail.search.backends.base import (
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
from wagtail.search.index import RelatedFields, SearchField, get_indexed_models
from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term
from wagtail.search.utils import ADD, AND, OR
@ -316,6 +317,26 @@ class PostgresSearchResults(BaseSearchResults):
self.backend.config, None, None,
score_field=self._score_field).count()
supports_facet = True
def facet(self, field_name):
# Get field
field = self.query_compiler._get_filterable_field(field_name)
if field is None:
raise FilterFieldError(
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
field_name=field_name
)
query = self.query_compiler.search(self.backend.get_config(), None, None)
results = query.values(field_name).annotate(count=Count('pk')).order_by('-count')
return OrderedDict([
(result[field_name], result['count'])
for result in results
])
class PostgresSearchRebuilder:
def __init__(self, index):

Wyświetl plik

@ -161,6 +161,8 @@ class BaseSearchQueryCompiler:
class BaseSearchResults:
supports_facet = False
def __init__(self, backend, query_compiler, prefetch_related=None):
self.backend = backend
self.query_compiler = query_compiler
@ -251,6 +253,9 @@ class BaseSearchResults:
clone._score_field = field_name
return clone
def facet(self, field_name):
raise NotImplementedError("This search backend does not support faceting")
class EmptySearchResults(BaseSearchResults):
def __init__(self):

Wyświetl plik

@ -1,10 +1,12 @@
from collections import OrderedDict
from warnings import warn
from django.db import models
from django.db.models import Count
from django.db.models.expressions import Value
from wagtail.search.backends.base import (
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term
from wagtail.search.utils import AND, OR
@ -106,6 +108,26 @@ class DatabaseSearchResults(BaseSearchResults):
def _do_count(self):
return self.get_queryset().count()
supports_facet = True
def facet(self, field_name):
# Get field
field = self.query_compiler._get_filterable_field(field_name)
if field is None:
raise FilterFieldError(
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
field_name=field_name
)
query = self.get_queryset()
results = query.values(field_name).annotate(count=Count('pk')).order_by('-count')
return OrderedDict([
(result[field_name], result['count'])
for result in results
])
class DatabaseSearchBackend(BaseSearchBackend):
query_compiler_class = DatabaseSearchQueryCompiler

Wyświetl plik

@ -1,6 +1,7 @@
import copy
import json
import warnings
from collections import OrderedDict
from urllib.parse import urlparse
from django.db import DEFAULT_DB_ALIAS, models
@ -11,7 +12,7 @@ from elasticsearch import Elasticsearch, NotFoundError
from elasticsearch.helpers import bulk
from wagtail.search.backends.base import (
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
from wagtail.search.index import FilterField, Indexed, RelatedFields, SearchField, class_is_indexed
from wagtail.search.query import (
And, Boost, Filter, Fuzzy, MatchAll, Not, Or, PlainText, Prefix, Term)
@ -239,7 +240,7 @@ class Elasticsearch2Mapping:
value = field.get_value(obj)
if isinstance(field, RelatedFields):
if isinstance(value, models.Manager):
if isinstance(value, (models.Manager, models.QuerySet)):
nested_docs = []
for nested_obj in value.all():
@ -251,6 +252,11 @@ class Elasticsearch2Mapping:
elif isinstance(value, models.Model):
value, extra_edgengrams = self._get_nested_document(field.fields, value)
partials.extend(extra_edgengrams)
elif isinstance(field, FilterField):
if isinstance(value, (models.Manager, models.QuerySet)):
value = list(value.values_list('pk', flat=True))
elif isinstance(value, models.Model):
value = value.pk
doc[self.get_field_column_name(field)] = value
@ -601,6 +607,41 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler):
class Elasticsearch2SearchResults(BaseSearchResults):
fields_param_name = 'fields'
supports_facet = True
def facet(self, field_name):
# Get field
field = self.query_compiler._get_filterable_field(field_name)
if field is None:
raise FilterFieldError(
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
field_name=field_name
)
# Build body
body = self._get_es_body()
column_name = self.query_compiler.mapping.get_field_column_name(field)
body['aggregations'] = {
field_name: {
'terms': {
'field': column_name,
}
}
}
# Send to Elasticsearch
response = self.backend.es.search(
index=self.backend.get_index_for_model(self.query_compiler.queryset.model).name,
body=body,
size=0,
)
return OrderedDict([
(bucket['key'], bucket['doc_count'])
for bucket in response['aggregations'][field_name]['buckets']
])
def _get_es_body(self, for_count=False):
body = {

Wyświetl plik

@ -1,6 +1,7 @@
# coding: utf-8
import unittest
from collections import OrderedDict
from datetime import date
from io import StringIO
@ -8,10 +9,11 @@ from django.conf import settings
from django.core import management
from django.test import TestCase
from django.test.utils import override_settings
from taggit.models import Tag
from wagtail.search.backends import (
InvalidSearchBackendError, get_search_backend, get_search_backends)
from wagtail.search.backends.base import FieldError
from wagtail.search.backends.base import FieldError, FilterFieldError
from wagtail.search.backends.db import DatabaseSearchBackend
from wagtail.search.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Prefix, Term
from wagtail.tests.search import models
@ -394,6 +396,40 @@ class BackendTests(WagtailTestUtils):
"A Game of Thrones"
])
# FACET TESTS
def test_facet(self):
results = self.backend.search(MATCH_ALL, models.ProgrammingGuide).facet('programming_language')
# Not testing ordering here as two of the items have the same count, so the ordering is undefined.
# See test_facet_tags for a test of the ordering
self.assertDictEqual(dict(results), {'js': 2, 'py': 2, 'rs': 1})
def test_facet_tags(self):
# The test data doesn't contain any tags, add some
FANTASY_BOOKS = [1, 2, 3, 4, 5, 6, 7]
SCIFI_BOOKS = [10]
for book_id in FANTASY_BOOKS:
models.Book.objects.get(id=book_id).tags.add('Fantasy')
for book_id in SCIFI_BOOKS:
models.Book.objects.get(id=book_id).tags.add('Science Fiction')
fantasy_tag = Tag.objects.get(name='Fantasy')
scifi_tag = Tag.objects.get(name='Science Fiction')
results = self.backend.search(MATCH_ALL, models.Book).facet('tags')
self.assertEqual(results, OrderedDict([
(fantasy_tag.id, 7),
(None, 5),
(scifi_tag.id, 1),
]))
def test_facet_with_nonexistent_field(self):
with self.assertRaises(FilterFieldError):
self.backend.search(MATCH_ALL, models.ProgrammingGuide).facet('foo')
# MISC TESTS
def test_same_rank_pages(self):

Wyświetl plik

@ -538,7 +538,8 @@ class TestElasticsearch2Mapping(TestCase):
'name': {'type': 'string', 'include_in_all': True},
'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
},
}
},
'tags_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}
}
}
}
@ -571,7 +572,8 @@ class TestElasticsearch2Mapping(TestCase):
],
'publication_date_filter': datetime.date(1954, 7, 29),
'number_of_pages_filter': 423,
'tags': []
'tags': [],
'tags_filter': []
}
self.assertDictEqual(document, expected_result)
@ -639,7 +641,8 @@ class TestElasticsearch2MappingInheritance(TestCase):
'name': {'type': 'string', 'include_in_all': True},
'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
},
}
},
'tags_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}
}
}
}
@ -699,7 +702,8 @@ class TestElasticsearch2MappingInheritance(TestCase):
],
'publication_date_filter': datetime.date(1954, 7, 29),
'number_of_pages_filter': 423,
'tags': []
'tags': [],
'tags_filter': []
}
self.assertDictEqual(document, expected_result)

Wyświetl plik

@ -539,7 +539,8 @@ class TestElasticsearch5Mapping(TestCase):
'name': {'type': 'text', 'include_in_all': True},
'slug_filter': {'type': 'keyword', 'include_in_all': False},
},
}
},
'tags_filter': {'type': 'keyword', 'include_in_all': False}
}
}
}
@ -572,7 +573,8 @@ class TestElasticsearch5Mapping(TestCase):
],
'publication_date_filter': datetime.date(1954, 7, 29),
'number_of_pages_filter': 423,
'tags': []
'tags': [],
'tags_filter': []
}
self.assertDictEqual(document, expected_result)
@ -640,7 +642,8 @@ class TestElasticsearch5MappingInheritance(TestCase):
'name': {'type': 'text', 'include_in_all': True},
'slug_filter': {'type': 'keyword', 'include_in_all': False},
},
}
},
'tags_filter': {'type': 'keyword', 'include_in_all': False}
}
}
}
@ -700,7 +703,8 @@ class TestElasticsearch5MappingInheritance(TestCase):
],
'publication_date_filter': datetime.date(1954, 7, 29),
'number_of_pages_filter': 423,
'tags': []
'tags': [],
'tags_filter': []
}
self.assertDictEqual(document, expected_result)

Wyświetl plik

@ -34,6 +34,7 @@ class Book(index.Indexed, models.Model):
index.SearchField('name'),
index.FilterField('slug'),
]),
index.FilterField('tags'),
]
@classmethod