Merge branch 'kaedroho-searchchanges/searchonqueryset'

pull/443/head
Matt Westcott 2014-07-08 20:26:17 +01:00
commit 747aeb9f78
5 zmienionych plików z 203 dodań i 109 usunięć

Wyświetl plik

@ -521,7 +521,7 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, indexed.Index
# Search # Search
s = get_search_backend() s = get_search_backend()
return s.search(query_string, model=cls, fields=fields, filters=filters, prefetch_related=prefetch_related) return s.search(query_string, cls, fields=fields, filters=filters, prefetch_related=prefetch_related)
@classmethod @classmethod
def clean_subpage_types(cls): def clean_subpage_types(cls):

Wyświetl plik

@ -27,43 +27,44 @@ class DBSearch(BaseSearch):
def delete(self, obj): def delete(self, obj):
pass # Not needed pass # Not needed
def search(self, query_string, model, fields=None, filters={}, prefetch_related=[]): def search(self, query_string, model, fields=None, filters=None, prefetch_related=None):
# Normalise query string
query_string = normalise_query_string(query_string)
# Get terms
terms = query_string.split()
if not terms:
return model.objects.none()
# Get fields # Get fields
if fields is None: if fields is None:
fields = [field.field_name for field in model.get_searchable_search_fields()] fields = [field.field_name for field in model.get_searchable_search_fields()]
# Start will all objects # Start with all objects
query = model.objects.all() query = model.objects.all()
# Apply filters # Apply filters
if filters: if filters:
query = query.filter(**filters) query = query.filter(**filters)
# Filter by terms if query_string is not None:
for term in terms: # Normalise query string
term_query = models.Q() query_string = normalise_query_string(query_string)
for field_name in fields:
# Check if the field exists (this will filter out indexed callables)
try:
model._meta.get_field_by_name(field_name)
except:
continue
# Filter on this field # Get terms
term_query |= models.Q(**{'%s__icontains' % field_name: term}) terms = query_string.split()
if not terms:
return model.objects.none()
query = query.filter(term_query) # Filter by terms
for term in terms:
term_query = models.Q()
for field_name in fields:
# Check if the field exists (this will filter out indexed callables)
try:
model._meta.get_field_by_name(field_name)
except:
continue
# Distinct # Filter on this field
query = query.distinct() term_query |= models.Q(**{'%s__icontains' % field_name: term})
query = query.filter(term_query)
# Distinct
query = query.distinct()
# Prefetch related # Prefetch related
if prefetch_related: if prefetch_related:

Wyświetl plik

@ -3,6 +3,7 @@ from __future__ import absolute_import
import json import json
from django.db import models from django.db import models
from django.db.models.query import QuerySet
from elasticsearch import Elasticsearch, NotFoundError, RequestError from elasticsearch import Elasticsearch, NotFoundError, RequestError
from elasticsearch.helpers import bulk from elasticsearch.helpers import bulk
@ -109,12 +110,123 @@ class ElasticSearchMapping(object):
return '<ElasticSearchMapping: %s>' % (self.model.__name__, ) return '<ElasticSearchMapping: %s>' % (self.model.__name__, )
class FilterError(Exception):
pass
class FieldError(Exception):
pass
class ElasticSearchQuery(object): class ElasticSearchQuery(object):
def __init__(self, model, query_string, fields=None, filters={}): def __init__(self, queryset, query_string, fields=None):
self.model = model self.queryset = queryset
self.query_string = query_string self.query_string = query_string
self.fields = fields or ['_all', '_partials'] self.fields = fields or ['_all', '_partials']
self.filters = filters
def _get_filters_from_where(self, where_node):
# Check if this is a leaf node
if isinstance(where_node, tuple):
field_name = where_node[0].col
lookup = where_node[1]
value = where_node[3]
# Get field
field = dict(
(field.get_attname(self.queryset.model), field)
for field in self.queryset.model.get_filterable_search_fields()
).get(field_name, None)
# Give error if the field doesn't exist
if field is None:
raise FieldError('Cannot filter ElasticSearch results with field "' + field_name + '". Please add FilterField(\'' + field_name + '\') to ' + self.queryset.model.__name__ + '.search_fields.')
# Get the name of the field in the index
field_index_name = field.get_index_name(self.queryset.model)
# Find lookup
if lookup == 'exact':
if value is None:
return {
'missing': {
'field': field_index_name,
}
}
else:
return {
'term': {
field_index_name: value,
}
}
if lookup == 'isnull':
if value:
return {
'missing': {
'field': field_index_name,
}
}
else:
return {
'not': {
'missing': {
'field': field_index_name,
}
}
}
if lookup in ['startswith', 'prefix']:
return {
'prefix': {
field_index_name: value,
}
}
if lookup in ['gt', 'gte', 'lt', 'lte']:
return {
'range': {
field_index_name: {
lookup: value,
}
}
}
if lookup == 'range':
lower, upper = value
return {
'range': {
field_index_name: {
'gte': lower,
'lte': upper,
}
}
}
raise FilterError('Could not apply filter on ElasticSearch results "' + field_name + '__' + lookup + ' = ' + unicode(value) + '". Lookup "' + lookup + '"" not recognosed.')
# Get child filters
connector = where_node.connector
child_filters = [self._get_filters_from_where(child) for child in where_node.children]
child_filters = [child_filter for child_filter in child_filters if child_filter]
# Connect them
if child_filters:
if len(child_filters) == 1:
filter_out = child_filters[0]
else:
filter_out = {
connector.lower(): [
fil for fil in child_filters if fil is not None
]
}
if where_node.negated:
filter_out = {
'not': filter_out
}
return filter_out
def _get_filters(self): def _get_filters(self):
# Filters # Filters
@ -123,85 +235,54 @@ class ElasticSearchQuery(object):
# Filter by content type # Filter by content type
filters.append({ filters.append({
'prefix': { 'prefix': {
'content_type': self.model.indexed_get_content_type() 'content_type': self.queryset.model.indexed_get_content_type()
} }
}) })
# Extra filters # Apply filters from queryset
if self.filters: queryset_filters = self._get_filters_from_where(self.queryset.query.where)
for key, value in self.filters.items(): if queryset_filters:
if '__' in key: filters.append(queryset_filters)
field, lookup = key.split('__')
else:
field = key
lookup = None
if lookup is None:
if value is None:
filters.append({
'missing': {
'field': field,
}
})
else:
filters.append({
'term': {
field: value
}
})
if lookup in ['startswith', 'prefix']:
filters.append({
'prefix': {
field: value
}
})
if lookup in ['gt', 'gte', 'lt', 'lte']:
filters.append({
'range': {
field: {
lookup: value,
}
}
})
if lookup == 'range':
lower, upper = value
filters.append({
'range': {
field: {
'gte': lower,
'lte': upper,
}
}
})
return filters return filters
def to_es(self): def to_es(self):
# Query # Query
query = { if self.query_string is not None:
'query_string': { query = {
'query': self.query_string, 'query_string': {
'query': self.query_string,
}
} }
}
# Fields # Fields
if self.fields: if self.fields:
query['query_string']['fields'] = self.fields query['query_string']['fields'] = self.fields
else:
query = {
'match_all': {}
}
# Filters # Filters
filters = self._get_filters() filters = self._get_filters()
if len(filters) == 1:
return { query = {
'filtered': { 'filtered': {
'query': query, 'query': query,
'filter': { 'filter': filters[0],
'and': filters,
} }
} }
} elif len(filters) > 1:
query = {
'filtered': {
'query': query,
'filter': {
'and': filters,
}
}
}
return query
def __repr__(self): def __repr__(self):
return json.dumps(self.to_es()) return json.dumps(self.to_es())
@ -263,15 +344,8 @@ class ElasticSearchResults(object):
# Initialise results dictionary # Initialise results dictionary
results = dict((str(pk), None) for pk in pks) results = dict((str(pk), None) for pk in pks)
# Get queryset
queryset = self.query.model.objects.filter(pk__in=pks)
# Add prefetch related
if self.prefetch_related:
for prefetch in self.prefetch_related:
queryset = queryset.prefetch_related(prefetch)
# Find objects in database and add them to dict # Find objects in database and add them to dict
queryset = self.query.queryset.filter(pk__in=pks)
for obj in queryset: for obj in queryset:
results[str(obj.pk)] = obj results[str(obj.pk)] = obj
@ -502,17 +576,35 @@ class ElasticSearch(BaseSearch):
except NotFoundError: except NotFoundError:
pass # Document doesn't exist, ignore this exception pass # Document doesn't exist, ignore this exception
def search(self, query_string, model, fields=None, filters={}, prefetch_related=[]): def search(self, query_string, model_or_queryset, fields=None, filters=None, prefetch_related=None):
# Find model/queryset
if isinstance(model_or_queryset, QuerySet):
model = model_or_queryset.model
queryset = model_or_queryset
else:
model = model_or_queryset
queryset = model_or_queryset.objects.all()
# Model must be a descendant of Indexed and be a django model # Model must be a descendant of Indexed and be a django model
if not issubclass(model, Indexed) or not issubclass(model, models.Model): if not issubclass(model, Indexed) or not issubclass(model, models.Model):
return [] return []
# Normalise query string # Normalise query string
query_string = normalise_query_string(query_string) if query_string is not None:
query_string = normalise_query_string(query_string)
# Check that theres still a query string after the clean up # Check that theres still a query string after the clean up
if not query_string: if query_string == "":
return [] return []
# Apply filters to queryset
if filters:
queryset = queryset.filter(**filters)
# Prefetch related
if prefetch_related:
for prefetch in prefetch_related:
queryset = queryset.prefetch_related(prefetch)
# Return search results # Return search results
return ElasticSearchResults(self, ElasticSearchQuery(model, query_string, fields=fields, filters=filters), prefetch_related=prefetch_related) return ElasticSearchResults(self, ElasticSearchQuery(queryset, query_string, fields=fields))

Wyświetl plik

@ -112,13 +112,16 @@ class Indexed(object):
@classmethod @classmethod
def get_searchable_search_fields(cls): def get_searchable_search_fields(cls):
return filter(lambda field: field.searchable, cls.get_search_fields()) return filter(lambda field: isinstance(field, SearchField), cls.get_search_fields())
@classmethod
def get_filterable_search_fields(cls):
return filter(lambda field: isinstance(field, FilterField), cls.get_search_fields())
indexed_fields = () indexed_fields = ()
class BaseField(object): class BaseField(object):
searchable = False
suffix = '' suffix = ''
def __init__(self, field_name, **kwargs): def __init__(self, field_name, **kwargs):
@ -163,8 +166,6 @@ class BaseField(object):
class SearchField(BaseField): class SearchField(BaseField):
searchable = True
def __init__(self, field_name, boost=None, partial_match=False, **kwargs): def __init__(self, field_name, boost=None, partial_match=False, **kwargs):
super(SearchField, self).__init__(field_name, **kwargs) super(SearchField, self).__init__(field_name, **kwargs)
self.boost = boost self.boost = boost

Wyświetl plik

@ -94,7 +94,7 @@ class SearchTest(models.Model, indexed.Indexed):
indexed.SearchField('title'), indexed.SearchField('title'),
indexed.SearchField('content'), indexed.SearchField('content'),
indexed.SearchField('callable_indexed_field'), indexed.SearchField('callable_indexed_field'),
indexed.SearchField('live'), indexed.FilterField('live'),
) )
def callable_indexed_field(self): def callable_indexed_field(self):