kopia lustrzana https://github.com/wagtail/wagtail
Implemented facet() method on search results
Add error handling for when facet field doesnt exist
Count('id' -> 'pk')
Use assertDictEqual
Fix indexing related fields using FilterField
pull/4583/merge
rodzic
3cd18f3c1f
commit
fe76c11043
|
|
@ -97,6 +97,29 @@ This can be limited to a certain set of fields by using the ``fields`` keyword a
|
||||||
>>> EventPage.objects.search("Event", fields=["title"])
|
>>> EventPage.objects.search("Event", fields=["title"])
|
||||||
[<EventPage: Event 1>, <EventPage: Event 2>]
|
[<EventPage: Event 1>, <EventPage: Event 2>]
|
||||||
|
|
||||||
|
Faceted search
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Wagtail supports faceted search which is kind of filtering based on a taxonomy
|
||||||
|
field (such as category or page type).
|
||||||
|
|
||||||
|
The ``.facet(field_name)`` method returns an ``OrderedDict``. The keys are the
|
||||||
|
the IDs of the related objects that have been referenced by the field and the
|
||||||
|
values are number of references to each ID. The results are ordered by number
|
||||||
|
of references descending.
|
||||||
|
|
||||||
|
For example, to find the most common page types in the search results:
|
||||||
|
|
||||||
|
.. code-block::python
|
||||||
|
|
||||||
|
>>> Page.objects.search("Test").facet("content_type_id")
|
||||||
|
|
||||||
|
# Note: The keys correspond to the ID of a ContentType object, the values are the
|
||||||
|
# number of pages returned for that type
|
||||||
|
OrderedDict([
|
||||||
|
('2', 4), # 4 pages have content_type_id == 2
|
||||||
|
('1', 2), # 2 pages have content_type_id == 1
|
||||||
|
])
|
||||||
|
|
||||||
Changing search behaviour
|
Changing search behaviour
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,16 @@
|
||||||
|
from collections import OrderedDict
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
|
|
||||||
from django.contrib.postgres.search import SearchQuery as PostgresSearchQuery
|
from django.contrib.postgres.search import SearchQuery as PostgresSearchQuery
|
||||||
from django.contrib.postgres.search import SearchRank, SearchVector
|
from django.contrib.postgres.search import SearchRank, SearchVector
|
||||||
from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction
|
from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction
|
||||||
from django.db.models import F, Manager, Q, TextField, Value
|
from django.db.models import Count, F, Manager, Q, TextField, Value
|
||||||
from django.db.models.constants import LOOKUP_SEP
|
from django.db.models.constants import LOOKUP_SEP
|
||||||
from django.db.models.functions import Cast
|
from django.db.models.functions import Cast
|
||||||
from django.utils.encoding import force_text
|
from django.utils.encoding import force_text
|
||||||
|
|
||||||
from wagtail.search.backends.base import (
|
from wagtail.search.backends.base import (
|
||||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
|
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
|
||||||
from wagtail.search.index import RelatedFields, SearchField, get_indexed_models
|
from wagtail.search.index import RelatedFields, SearchField, get_indexed_models
|
||||||
from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term
|
from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term
|
||||||
from wagtail.search.utils import ADD, AND, OR
|
from wagtail.search.utils import ADD, AND, OR
|
||||||
|
|
@ -316,6 +317,26 @@ class PostgresSearchResults(BaseSearchResults):
|
||||||
self.backend.config, None, None,
|
self.backend.config, None, None,
|
||||||
score_field=self._score_field).count()
|
score_field=self._score_field).count()
|
||||||
|
|
||||||
|
supports_facet = True
|
||||||
|
|
||||||
|
def facet(self, field_name):
|
||||||
|
# Get field
|
||||||
|
field = self.query_compiler._get_filterable_field(field_name)
|
||||||
|
if field is None:
|
||||||
|
raise FilterFieldError(
|
||||||
|
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
|
||||||
|
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
|
||||||
|
field_name=field_name
|
||||||
|
)
|
||||||
|
|
||||||
|
query = self.query_compiler.search(self.backend.get_config(), None, None)
|
||||||
|
results = query.values(field_name).annotate(count=Count('pk')).order_by('-count')
|
||||||
|
|
||||||
|
return OrderedDict([
|
||||||
|
(result[field_name], result['count'])
|
||||||
|
for result in results
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
class PostgresSearchRebuilder:
|
class PostgresSearchRebuilder:
|
||||||
def __init__(self, index):
|
def __init__(self, index):
|
||||||
|
|
|
||||||
|
|
@ -161,6 +161,8 @@ class BaseSearchQueryCompiler:
|
||||||
|
|
||||||
|
|
||||||
class BaseSearchResults:
|
class BaseSearchResults:
|
||||||
|
supports_facet = False
|
||||||
|
|
||||||
def __init__(self, backend, query_compiler, prefetch_related=None):
|
def __init__(self, backend, query_compiler, prefetch_related=None):
|
||||||
self.backend = backend
|
self.backend = backend
|
||||||
self.query_compiler = query_compiler
|
self.query_compiler = query_compiler
|
||||||
|
|
@ -251,6 +253,9 @@ class BaseSearchResults:
|
||||||
clone._score_field = field_name
|
clone._score_field = field_name
|
||||||
return clone
|
return clone
|
||||||
|
|
||||||
|
def facet(self, field_name):
|
||||||
|
raise NotImplementedError("This search backend does not support faceting")
|
||||||
|
|
||||||
|
|
||||||
class EmptySearchResults(BaseSearchResults):
|
class EmptySearchResults(BaseSearchResults):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,12 @@
|
||||||
|
from collections import OrderedDict
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
|
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
from django.db.models import Count
|
||||||
from django.db.models.expressions import Value
|
from django.db.models.expressions import Value
|
||||||
|
|
||||||
from wagtail.search.backends.base import (
|
from wagtail.search.backends.base import (
|
||||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
|
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
|
||||||
from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term
|
from wagtail.search.query import And, MatchAll, Not, Or, Prefix, SearchQueryShortcut, Term
|
||||||
from wagtail.search.utils import AND, OR
|
from wagtail.search.utils import AND, OR
|
||||||
|
|
||||||
|
|
@ -106,6 +108,26 @@ class DatabaseSearchResults(BaseSearchResults):
|
||||||
def _do_count(self):
|
def _do_count(self):
|
||||||
return self.get_queryset().count()
|
return self.get_queryset().count()
|
||||||
|
|
||||||
|
supports_facet = True
|
||||||
|
|
||||||
|
def facet(self, field_name):
|
||||||
|
# Get field
|
||||||
|
field = self.query_compiler._get_filterable_field(field_name)
|
||||||
|
if field is None:
|
||||||
|
raise FilterFieldError(
|
||||||
|
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
|
||||||
|
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
|
||||||
|
field_name=field_name
|
||||||
|
)
|
||||||
|
|
||||||
|
query = self.get_queryset()
|
||||||
|
results = query.values(field_name).annotate(count=Count('pk')).order_by('-count')
|
||||||
|
|
||||||
|
return OrderedDict([
|
||||||
|
(result[field_name], result['count'])
|
||||||
|
for result in results
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
class DatabaseSearchBackend(BaseSearchBackend):
|
class DatabaseSearchBackend(BaseSearchBackend):
|
||||||
query_compiler_class = DatabaseSearchQueryCompiler
|
query_compiler_class = DatabaseSearchQueryCompiler
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import warnings
|
import warnings
|
||||||
|
from collections import OrderedDict
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from django.db import DEFAULT_DB_ALIAS, models
|
from django.db import DEFAULT_DB_ALIAS, models
|
||||||
|
|
@ -11,7 +12,7 @@ from elasticsearch import Elasticsearch, NotFoundError
|
||||||
from elasticsearch.helpers import bulk
|
from elasticsearch.helpers import bulk
|
||||||
|
|
||||||
from wagtail.search.backends.base import (
|
from wagtail.search.backends.base import (
|
||||||
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults)
|
BaseSearchBackend, BaseSearchQueryCompiler, BaseSearchResults, FilterFieldError)
|
||||||
from wagtail.search.index import FilterField, Indexed, RelatedFields, SearchField, class_is_indexed
|
from wagtail.search.index import FilterField, Indexed, RelatedFields, SearchField, class_is_indexed
|
||||||
from wagtail.search.query import (
|
from wagtail.search.query import (
|
||||||
And, Boost, Filter, Fuzzy, MatchAll, Not, Or, PlainText, Prefix, Term)
|
And, Boost, Filter, Fuzzy, MatchAll, Not, Or, PlainText, Prefix, Term)
|
||||||
|
|
@ -239,7 +240,7 @@ class Elasticsearch2Mapping:
|
||||||
value = field.get_value(obj)
|
value = field.get_value(obj)
|
||||||
|
|
||||||
if isinstance(field, RelatedFields):
|
if isinstance(field, RelatedFields):
|
||||||
if isinstance(value, models.Manager):
|
if isinstance(value, (models.Manager, models.QuerySet)):
|
||||||
nested_docs = []
|
nested_docs = []
|
||||||
|
|
||||||
for nested_obj in value.all():
|
for nested_obj in value.all():
|
||||||
|
|
@ -251,6 +252,11 @@ class Elasticsearch2Mapping:
|
||||||
elif isinstance(value, models.Model):
|
elif isinstance(value, models.Model):
|
||||||
value, extra_edgengrams = self._get_nested_document(field.fields, value)
|
value, extra_edgengrams = self._get_nested_document(field.fields, value)
|
||||||
partials.extend(extra_edgengrams)
|
partials.extend(extra_edgengrams)
|
||||||
|
elif isinstance(field, FilterField):
|
||||||
|
if isinstance(value, (models.Manager, models.QuerySet)):
|
||||||
|
value = list(value.values_list('pk', flat=True))
|
||||||
|
elif isinstance(value, models.Model):
|
||||||
|
value = value.pk
|
||||||
|
|
||||||
doc[self.get_field_column_name(field)] = value
|
doc[self.get_field_column_name(field)] = value
|
||||||
|
|
||||||
|
|
@ -601,6 +607,41 @@ class Elasticsearch2SearchQueryCompiler(BaseSearchQueryCompiler):
|
||||||
|
|
||||||
class Elasticsearch2SearchResults(BaseSearchResults):
|
class Elasticsearch2SearchResults(BaseSearchResults):
|
||||||
fields_param_name = 'fields'
|
fields_param_name = 'fields'
|
||||||
|
supports_facet = True
|
||||||
|
|
||||||
|
def facet(self, field_name):
|
||||||
|
# Get field
|
||||||
|
field = self.query_compiler._get_filterable_field(field_name)
|
||||||
|
if field is None:
|
||||||
|
raise FilterFieldError(
|
||||||
|
'Cannot facet search results with field "' + field_name + '". Please add index.FilterField(\'' +
|
||||||
|
field_name + '\') to ' + self.query_compiler.queryset.model.__name__ + '.search_fields.',
|
||||||
|
field_name=field_name
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build body
|
||||||
|
body = self._get_es_body()
|
||||||
|
column_name = self.query_compiler.mapping.get_field_column_name(field)
|
||||||
|
|
||||||
|
body['aggregations'] = {
|
||||||
|
field_name: {
|
||||||
|
'terms': {
|
||||||
|
'field': column_name,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Send to Elasticsearch
|
||||||
|
response = self.backend.es.search(
|
||||||
|
index=self.backend.get_index_for_model(self.query_compiler.queryset.model).name,
|
||||||
|
body=body,
|
||||||
|
size=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
return OrderedDict([
|
||||||
|
(bucket['key'], bucket['doc_count'])
|
||||||
|
for bucket in response['aggregations'][field_name]['buckets']
|
||||||
|
])
|
||||||
|
|
||||||
def _get_es_body(self, for_count=False):
|
def _get_es_body(self, for_count=False):
|
||||||
body = {
|
body = {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
from collections import OrderedDict
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
|
|
@ -8,10 +9,11 @@ from django.conf import settings
|
||||||
from django.core import management
|
from django.core import management
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.test.utils import override_settings
|
from django.test.utils import override_settings
|
||||||
|
from taggit.models import Tag
|
||||||
|
|
||||||
from wagtail.search.backends import (
|
from wagtail.search.backends import (
|
||||||
InvalidSearchBackendError, get_search_backend, get_search_backends)
|
InvalidSearchBackendError, get_search_backend, get_search_backends)
|
||||||
from wagtail.search.backends.base import FieldError
|
from wagtail.search.backends.base import FieldError, FilterFieldError
|
||||||
from wagtail.search.backends.db import DatabaseSearchBackend
|
from wagtail.search.backends.db import DatabaseSearchBackend
|
||||||
from wagtail.search.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Prefix, Term
|
from wagtail.search.query import MATCH_ALL, And, Boost, Filter, Not, Or, PlainText, Prefix, Term
|
||||||
from wagtail.tests.search import models
|
from wagtail.tests.search import models
|
||||||
|
|
@ -394,6 +396,40 @@ class BackendTests(WagtailTestUtils):
|
||||||
"A Game of Thrones"
|
"A Game of Thrones"
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# FACET TESTS
|
||||||
|
|
||||||
|
def test_facet(self):
|
||||||
|
results = self.backend.search(MATCH_ALL, models.ProgrammingGuide).facet('programming_language')
|
||||||
|
|
||||||
|
# Not testing ordering here as two of the items have the same count, so the ordering is undefined.
|
||||||
|
# See test_facet_tags for a test of the ordering
|
||||||
|
self.assertDictEqual(dict(results), {'js': 2, 'py': 2, 'rs': 1})
|
||||||
|
|
||||||
|
def test_facet_tags(self):
|
||||||
|
# The test data doesn't contain any tags, add some
|
||||||
|
FANTASY_BOOKS = [1, 2, 3, 4, 5, 6, 7]
|
||||||
|
SCIFI_BOOKS = [10]
|
||||||
|
for book_id in FANTASY_BOOKS:
|
||||||
|
models.Book.objects.get(id=book_id).tags.add('Fantasy')
|
||||||
|
for book_id in SCIFI_BOOKS:
|
||||||
|
models.Book.objects.get(id=book_id).tags.add('Science Fiction')
|
||||||
|
|
||||||
|
fantasy_tag = Tag.objects.get(name='Fantasy')
|
||||||
|
scifi_tag = Tag.objects.get(name='Science Fiction')
|
||||||
|
|
||||||
|
results = self.backend.search(MATCH_ALL, models.Book).facet('tags')
|
||||||
|
|
||||||
|
self.assertEqual(results, OrderedDict([
|
||||||
|
(fantasy_tag.id, 7),
|
||||||
|
(None, 5),
|
||||||
|
(scifi_tag.id, 1),
|
||||||
|
]))
|
||||||
|
|
||||||
|
def test_facet_with_nonexistent_field(self):
|
||||||
|
with self.assertRaises(FilterFieldError):
|
||||||
|
self.backend.search(MATCH_ALL, models.ProgrammingGuide).facet('foo')
|
||||||
|
|
||||||
|
|
||||||
# MISC TESTS
|
# MISC TESTS
|
||||||
|
|
||||||
def test_same_rank_pages(self):
|
def test_same_rank_pages(self):
|
||||||
|
|
|
||||||
|
|
@ -538,7 +538,8 @@ class TestElasticsearch2Mapping(TestCase):
|
||||||
'name': {'type': 'string', 'include_in_all': True},
|
'name': {'type': 'string', 'include_in_all': True},
|
||||||
'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
|
'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
'tags_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -571,7 +572,8 @@ class TestElasticsearch2Mapping(TestCase):
|
||||||
],
|
],
|
||||||
'publication_date_filter': datetime.date(1954, 7, 29),
|
'publication_date_filter': datetime.date(1954, 7, 29),
|
||||||
'number_of_pages_filter': 423,
|
'number_of_pages_filter': 423,
|
||||||
'tags': []
|
'tags': [],
|
||||||
|
'tags_filter': []
|
||||||
}
|
}
|
||||||
|
|
||||||
self.assertDictEqual(document, expected_result)
|
self.assertDictEqual(document, expected_result)
|
||||||
|
|
@ -639,7 +641,8 @@ class TestElasticsearch2MappingInheritance(TestCase):
|
||||||
'name': {'type': 'string', 'include_in_all': True},
|
'name': {'type': 'string', 'include_in_all': True},
|
||||||
'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
|
'slug_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False},
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
'tags_filter': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -699,7 +702,8 @@ class TestElasticsearch2MappingInheritance(TestCase):
|
||||||
],
|
],
|
||||||
'publication_date_filter': datetime.date(1954, 7, 29),
|
'publication_date_filter': datetime.date(1954, 7, 29),
|
||||||
'number_of_pages_filter': 423,
|
'number_of_pages_filter': 423,
|
||||||
'tags': []
|
'tags': [],
|
||||||
|
'tags_filter': []
|
||||||
}
|
}
|
||||||
|
|
||||||
self.assertDictEqual(document, expected_result)
|
self.assertDictEqual(document, expected_result)
|
||||||
|
|
|
||||||
|
|
@ -539,7 +539,8 @@ class TestElasticsearch5Mapping(TestCase):
|
||||||
'name': {'type': 'text', 'include_in_all': True},
|
'name': {'type': 'text', 'include_in_all': True},
|
||||||
'slug_filter': {'type': 'keyword', 'include_in_all': False},
|
'slug_filter': {'type': 'keyword', 'include_in_all': False},
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
'tags_filter': {'type': 'keyword', 'include_in_all': False}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -572,7 +573,8 @@ class TestElasticsearch5Mapping(TestCase):
|
||||||
],
|
],
|
||||||
'publication_date_filter': datetime.date(1954, 7, 29),
|
'publication_date_filter': datetime.date(1954, 7, 29),
|
||||||
'number_of_pages_filter': 423,
|
'number_of_pages_filter': 423,
|
||||||
'tags': []
|
'tags': [],
|
||||||
|
'tags_filter': []
|
||||||
}
|
}
|
||||||
|
|
||||||
self.assertDictEqual(document, expected_result)
|
self.assertDictEqual(document, expected_result)
|
||||||
|
|
@ -640,7 +642,8 @@ class TestElasticsearch5MappingInheritance(TestCase):
|
||||||
'name': {'type': 'text', 'include_in_all': True},
|
'name': {'type': 'text', 'include_in_all': True},
|
||||||
'slug_filter': {'type': 'keyword', 'include_in_all': False},
|
'slug_filter': {'type': 'keyword', 'include_in_all': False},
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
'tags_filter': {'type': 'keyword', 'include_in_all': False}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -700,7 +703,8 @@ class TestElasticsearch5MappingInheritance(TestCase):
|
||||||
],
|
],
|
||||||
'publication_date_filter': datetime.date(1954, 7, 29),
|
'publication_date_filter': datetime.date(1954, 7, 29),
|
||||||
'number_of_pages_filter': 423,
|
'number_of_pages_filter': 423,
|
||||||
'tags': []
|
'tags': [],
|
||||||
|
'tags_filter': []
|
||||||
}
|
}
|
||||||
|
|
||||||
self.assertDictEqual(document, expected_result)
|
self.assertDictEqual(document, expected_result)
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ class Book(index.Indexed, models.Model):
|
||||||
index.SearchField('name'),
|
index.SearchField('name'),
|
||||||
index.FilterField('slug'),
|
index.FilterField('slug'),
|
||||||
]),
|
]),
|
||||||
|
index.FilterField('tags'),
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
||||||
Ładowanie…
Reference in New Issue