kopia lustrzana https://github.com/wagtail/wagtail
Implemented facet() method on search results
Add error handling for when facet field doesnt exist Count('id' -> 'pk') Use assertDictEqual Fix indexing related fields using FilterFieldpull/4583/merge
rodzic
3cd18f3c1f
commit
fe76c11043
|
@ -97,6 +97,29 @@ This can be limited to a certain set of fields by using the ``fields`` keyword a
|
|||
>>> EventPage.objects.search("Event", fields=["title"])
|
||||
[<EventPage: Event 1>, <EventPage: Event 2>]
|
||||
|
||||
Faceted search
|
||||
--------------
|
||||
|
||||
Wagtail supports faceted search which is kind of filtering based on a taxonomy
|
||||
field (such as category or page type).
|
||||
|
||||
The ``.facet(field_name)`` method returns an ``OrderedDict``. The keys are the
|
||||
the IDs of the related objects that have been referenced by the field and the
|
||||
values are number of references to each ID. The results are ordered by number
|
||||
of references descending.
|
||||
|
||||
For example, to find the most common page types in the search results:
|
||||
|
||||
.. code-block::python
|
||||
|
||||
>>> Page.objects.search("Test").facet("content_type_id")
|
||||
|
||||
# Note: The keys correspond to the ID of a ContentType object, the values are the
|
||||
# number of pages returned for that type
|
||||
OrderedDict([
|
||||
('2', 4), # 4 pages have content_type_id == 2
|
||||
('1', 2), # 2 pages have content_type_id == 1
|
||||
])
|
||||
|
||||
Changing search behaviour
|
||||
-------------------------
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
from collections import OrderedDict
|
||||
from warnings import warn
|
||||
|
||||
from django.contrib.postgres.search import SearchQuery as PostgresSearchQuery
|
||||
from django.contrib.postgres.search import SearchRank, SearchVector
|
||||
from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction
|
||||
from django.db.models import F, Manager, Q, TextField, Value
|
||||
from django.db.models import Count, F, Manager, Q, TextField, Value
|
||||
from django.db.models.constants import LOOKUP_SEP
|
||||
from django.db.models.functions import Cast
|
||||
from django.utils.encoding import force_text
|
||||
|
||||
from wagtail.search.backends.base import (
|
||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
|
||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
|
||||
from wagtail.search.index import RelatedFields, SearchField, get_indexed_models
|
||||
from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term
|
||||
from wagtail.search.utils import ADD, AND, OR
|
||||
|
@ -316,6 +317,26 @@ class PostgresSearchResults(BaseSearchResults):
|
|||
self.backend.config, None, None,
|
||||
score_field=self._score_field).count()
|
||||
|
||||
supports_facet = True
|
||||
|
||||
def facet(self, field_name):
|
||||
# Get field
|
||||
field = self.query_compiler._get_filterable_field(field_name)
|
||||
if field is None:
|
||||
raise FilterFieldError(
|
||||
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
|
||||
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
|
||||
field_name=field_name
|
||||
)
|
||||
|
||||
query = self.query_compiler.search(self.backend.get_config(), None, None)
|
||||
results = query.values(field_name).annotate(count=Count('pk')).order_by('-count')
|
||||
|
||||
return OrderedDict([
|
||||
(result[field_name], result['count'])
|
||||
for result in results
|
||||
])
|
||||
|
||||
|
||||
class PostgresSearchRebuilder:
|
||||
def __init__(self, index):
|
||||
|
|
|
@ -161,6 +161,8 @@ class BaseSearchQueryCompiler:
|
|||
|
||||
|
||||
class BaseSearchResults:
|
||||
supports_facet = False
|
||||
|
||||
def __init__(self, backend, query_compiler, prefetch_related=None):
|
||||
self.backend = backend
|
||||
self.query_compiler = query_compiler
|
||||
|
@ -251,6 +253,9 @@ class BaseSearchResults:
|
|||
clone._score_field = field_name
|
||||
return clone
|
||||
|
||||
def facet(self, field_name):
|
||||
raise NotImplementedError("This search backend does not support faceting")
|
||||
|
||||
|
||||
class EmptySearchResults(BaseSearchResults):
|
||||
def __init__(self):
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
from collections import OrderedDict
|
||||
from warnings import warn
|
||||
|
||||
from django.db import models
|
||||
from django.db.models import Count
|
||||
from django.db.models.expressions import Value
|
||||
|
||||
from wagtail.search.backends.base import (
|
||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
|
||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
|
||||
from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term
|
||||
from wagtail.search.utils import AND, OR
|
||||
|
||||
|
@ -106,6 +108,26 @@ class DatabaseSearchResults(BaseSearchResults):
|
|||
def _do_count(self):
|
||||
return self.get_queryset().count()
|
||||
|
||||
supports_facet = True
|
||||
|
||||
def facet(self, field_name):
|
||||
# Get field
|
||||
field = self.query_compiler._get_filterable_field(field_name)
|
||||
if field is None:
|
||||
raise FilterFieldError(
|
||||
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
|
||||
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
|
||||
field_name=field_name
|
||||
)
|
||||
|
||||
query = self.get_queryset()
|
||||
results = query.values(field_name).annotate(count=Count('pk')).order_by('-count')
|
||||
|
||||
return OrderedDict([
|
||||
(result[field_name], result['count'])
|
||||
for result in results
|
||||
])
|
||||
|
||||
|
||||
class DatabaseSearchBackend(BaseSearchBackend):
|
||||
query_compiler_class = DatabaseSearchQueryCompiler
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import copy
|
||||
import json
|
||||
import warnings
|
||||
from collections import OrderedDict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from django.db import DEFAULT_DB_ALIAS, models
|
||||
|
@ -11,7 +12,7 @@ from elasticsearch import Elasticsearch, NotFoundError
|
|||
from elasticsearch.helpers import bulk
|
||||
|
||||
from wagtail.search.backends.base import (
|
||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
|
||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
|
||||
from wagtail.search.index import FilterField, Indexed, RelatedFields, SearchField, class_is_indexed
|
||||
from wagtail.search.query import (
|
||||
And, Boost, Filter, Fuzzy, MatchAll, Not, Or, PlainText, Prefix, Term)
|
||||
|
@ -239,7 +240,7 @@ class Elasticsearch2Mapping:
|
|||
value = field.get_value(obj)
|
||||
|
||||
if isinstance(field, RelatedFields):
|
||||
if isinstance(value, models.Manager):
|
||||
if isinstance(value, (models.Manager, models.QuerySet)):
|
||||
nested_docs = []
|
||||
|
||||
for nested_obj in value.all():
|
||||
|
@ -251,6 +252,11 @@ class Elasticsearch2Mapping:
|
|||
elif isinstance(value, models.Model):
|
||||
value, extra_edgengrams = self._get_nested_document(field.fields, value)
|
||||
partials.extend(extra_edgengrams)
|
||||
elif isinstance(field, FilterField):
|
||||
if isinstance(value, (models.Manager, models.QuerySet)):
|
||||
value = list(value.values_list('pk', flat=True))
|
||||
elif isinstance(value, models.Model):
|
||||
value = value.pk
|
||||
|
||||
doc[self.get_field_column_name(field)] = value
|
||||
|
||||
|
@ -601,6 +607,41 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler):
|
|||
|
||||
class Elasticsearch2SearchResults(BaseSearchResults):
|
||||
fields_param_name = 'fields'
|
||||
supports_facet = True
|
||||
|
||||
def facet(self, field_name):
|
||||
# Get field
|
||||
field = self.query_compiler._get_filterable_field(field_name)
|
||||
if field is None:
|
||||
raise FilterFieldError(
|
||||
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
|
||||
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
|
||||
field_name=field_name
|
||||
)
|
||||
|
||||
# Build body
|
||||
body = self._get_es_body()
|
||||
column_name = self.query_compiler.mapping.get_field_column_name(field)
|
||||
|
||||
body['aggregations'] = {
|
||||
field_name: {
|
||||
'terms': {
|
||||
'field': column_name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Send to Elasticsearch
|
||||
response = self.backend.es.search(
|
||||
index=self.backend.get_index_for_model(self.query_compiler.queryset.model).name,
|
||||
body=body,
|
||||
size=0,
|
||||
)
|
||||
|
||||
return OrderedDict([
|
||||
(bucket['key'], bucket['doc_count'])
|
||||
for bucket in response['aggregations'][field_name]['buckets']
|
||||
])
|
||||
|
||||
def _get_es_body(self, for_count=False):
|
||||
body = {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# coding: utf-8
|
||||
|
||||
import unittest
|
||||
from collections import OrderedDict
|
||||
from datetime import date
|
||||
from io import StringIO
|
||||
|
||||
|
@ -8,10 +9,11 @@ from django.conf import settings
|
|||
from django.core import management
|
||||
from django.test import TestCase
|
||||
from django.test.utils import override_settings
|
||||
from taggit.models import Tag
|
||||
|
||||
from wagtail.search.backends import (
|
||||
InvalidSearchBackendError, get_search_backend, get_search_backends)
|
||||
from wagtail.search.backends.base import FieldError
|
||||
from wagtail.search.backends.base import FieldError, FilterFieldError
|
||||
from wagtail.search.backends.db import DatabaseSearchBackend
|
||||
from wagtail.search.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Prefix, Term
|
||||
from wagtail.tests.search import models
|
||||
|
@ -394,6 +396,40 @@ class BackendTests(WagtailTestUtils):
|
|||
"A Game of Thrones"
|
||||
])
|
||||
|
||||
# FACET TESTS
|
||||
|
||||
def test_facet(self):
|
||||
results = self.backend.search(MATCH_ALL, models.ProgrammingGuide).facet('programming_language')
|
||||
|
||||
# Not testing ordering here as two of the items have the same count, so the ordering is undefined.
|
||||
# See test_facet_tags for a test of the ordering
|
||||
self.assertDictEqual(dict(results), {'js': 2, 'py': 2, 'rs': 1})
|
||||
|
||||
def test_facet_tags(self):
|
||||
# The test data doesn't contain any tags, add some
|
||||
FANTASY_BOOKS = [1, 2, 3, 4, 5, 6, 7]
|
||||
SCIFI_BOOKS = [10]
|
||||
for book_id in FANTASY_BOOKS:
|
||||
models.Book.objects.get(id=book_id).tags.add('Fantasy')
|
||||
for book_id in SCIFI_BOOKS:
|
||||
models.Book.objects.get(id=book_id).tags.add('Science Fiction')
|
||||
|
||||
fantasy_tag = Tag.objects.get(name='Fantasy')
|
||||
scifi_tag = Tag.objects.get(name='Science Fiction')
|
||||
|
||||
results = self.backend.search(MATCH_ALL, models.Book).facet('tags')
|
||||
|
||||
self.assertEqual(results, OrderedDict([
|
||||
(fantasy_tag.id, 7),
|
||||
(None, 5),
|
||||
(scifi_tag.id, 1),
|
||||
]))
|
||||
|
||||
def test_facet_with_nonexistent_field(self):
|
||||
with self.assertRaises(FilterFieldError):
|
||||
self.backend.search(MATCH_ALL, models.ProgrammingGuide).facet('foo')
|
||||
|
||||
|
||||
# MISC TESTS
|
||||
|
||||
def test_same_rank_pages(self):
|
||||
|
|
|
@ -538,7 +538,8 @@ class TestElasticsearch2Mapping(TestCase):
|
|||
'name': {'type': 'string', 'include_in_all': True},
|
||||
'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
|
||||
},
|
||||
}
|
||||
},
|
||||
'tags_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -571,7 +572,8 @@ class TestElasticsearch2Mapping(TestCase):
|
|||
],
|
||||
'publication_date_filter': datetime.date(1954, 7, 29),
|
||||
'number_of_pages_filter': 423,
|
||||
'tags': []
|
||||
'tags': [],
|
||||
'tags_filter': []
|
||||
}
|
||||
|
||||
self.assertDictEqual(document, expected_result)
|
||||
|
@ -639,7 +641,8 @@ class TestElasticsearch2MappingInheritance(TestCase):
|
|||
'name': {'type': 'string', 'include_in_all': True},
|
||||
'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
|
||||
},
|
||||
}
|
||||
},
|
||||
'tags_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -699,7 +702,8 @@ class TestElasticsearch2MappingInheritance(TestCase):
|
|||
],
|
||||
'publication_date_filter': datetime.date(1954, 7, 29),
|
||||
'number_of_pages_filter': 423,
|
||||
'tags': []
|
||||
'tags': [],
|
||||
'tags_filter': []
|
||||
}
|
||||
|
||||
self.assertDictEqual(document, expected_result)
|
||||
|
|
|
@ -539,7 +539,8 @@ class TestElasticsearch5Mapping(TestCase):
|
|||
'name': {'type': 'text', 'include_in_all': True},
|
||||
'slug_filter': {'type': 'keyword', 'include_in_all': False},
|
||||
},
|
||||
}
|
||||
},
|
||||
'tags_filter': {'type': 'keyword', 'include_in_all': False}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -572,7 +573,8 @@ class TestElasticsearch5Mapping(TestCase):
|
|||
],
|
||||
'publication_date_filter': datetime.date(1954, 7, 29),
|
||||
'number_of_pages_filter': 423,
|
||||
'tags': []
|
||||
'tags': [],
|
||||
'tags_filter': []
|
||||
}
|
||||
|
||||
self.assertDictEqual(document, expected_result)
|
||||
|
@ -640,7 +642,8 @@ class TestElasticsearch5MappingInheritance(TestCase):
|
|||
'name': {'type': 'text', 'include_in_all': True},
|
||||
'slug_filter': {'type': 'keyword', 'include_in_all': False},
|
||||
},
|
||||
}
|
||||
},
|
||||
'tags_filter': {'type': 'keyword', 'include_in_all': False}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -700,7 +703,8 @@ class TestElasticsearch5MappingInheritance(TestCase):
|
|||
],
|
||||
'publication_date_filter': datetime.date(1954, 7, 29),
|
||||
'number_of_pages_filter': 423,
|
||||
'tags': []
|
||||
'tags': [],
|
||||
'tags_filter': []
|
||||
}
|
||||
|
||||
self.assertDictEqual(document, expected_result)
|
||||
|
|
|
@ -34,6 +34,7 @@ class Book(index.Indexed, models.Model):
|
|||
index.SearchField('name'),
|
||||
index.FilterField('slug'),
|
||||
]),
|
||||
index.FilterField('tags'),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
|
|
Ładowanie…
Reference in New Issue