kopia lustrzana https://github.com/wagtail/wagtail
Merge pull request #3940 from BertrandBordage/postgres_search_improvements
Postgres_search simplification.pull/3965/head
commit
6514650aa4
|
|
@ -33,7 +33,7 @@ def pytest_configure(config):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if config.getoption('postgres'):
|
if config.getoption('postgres'):
|
||||||
os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql_psycopg2'
|
os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql'
|
||||||
|
|
||||||
# Setup django after processing the pytest arguments so that the env
|
# Setup django after processing the pytest arguments so that the env
|
||||||
# variables are available in the settings
|
# variables are available in the settings
|
||||||
|
|
|
||||||
|
|
@ -560,7 +560,7 @@ These two files should reside in your project directory (``myproject/myproject/`
|
||||||
|
|
||||||
DATABASES = {
|
DATABASES = {
|
||||||
'default': {
|
'default': {
|
||||||
'ENGINE': 'django.db.backends.postgresql_psycopg2',
|
'ENGINE': 'django.db.backends.postgresql',
|
||||||
'NAME': 'myprojectdb',
|
'NAME': 'myprojectdb',
|
||||||
'USER': 'postgres',
|
'USER': 'postgres',
|
||||||
'PASSWORD': '',
|
'PASSWORD': '',
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ def runtests():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if args.postgres:
|
if args.postgres:
|
||||||
os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql_psycopg2'
|
os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql'
|
||||||
|
|
||||||
if args.elasticsearch:
|
if args.elasticsearch:
|
||||||
os.environ.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200')
|
os.environ.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200')
|
||||||
|
|
|
||||||
|
|
@ -16,25 +16,13 @@ from wagtail.wagtailsearch.index import RelatedFields, SearchField
|
||||||
|
|
||||||
from .models import IndexEntry
|
from .models import IndexEntry
|
||||||
from .utils import (
|
from .utils import (
|
||||||
ADD, AND, OR, WEIGHTS_VALUES, get_content_types_pks, get_postgresql_connections, get_weight,
|
ADD, AND, OR, WEIGHTS_VALUES, get_content_types_pk, get_descendants_content_types_pks,
|
||||||
keyword_split, unidecode)
|
get_postgresql_connections, get_weight, keyword_split, unidecode)
|
||||||
|
|
||||||
|
|
||||||
# TODO: Add autocomplete.
|
# TODO: Add autocomplete.
|
||||||
|
|
||||||
|
|
||||||
def get_db_alias(queryset):
|
|
||||||
return queryset._db or DEFAULT_DB_ALIAS
|
|
||||||
|
|
||||||
|
|
||||||
def get_sql(queryset):
|
|
||||||
return queryset.query.get_compiler(get_db_alias(queryset)).as_sql()
|
|
||||||
|
|
||||||
|
|
||||||
def get_pk_column(model):
|
|
||||||
return model._meta.pk.get_attname_column()[1]
|
|
||||||
|
|
||||||
|
|
||||||
@python_2_unicode_compatible
|
@python_2_unicode_compatible
|
||||||
class Index(object):
|
class Index(object):
|
||||||
def __init__(self, backend, model, db_alias=None):
|
def __init__(self, backend, model, db_alias=None):
|
||||||
|
|
@ -64,14 +52,13 @@ class Index(object):
|
||||||
existing_pks = (self.model._default_manager.using(self.db_alias)
|
existing_pks = (self.model._default_manager.using(self.db_alias)
|
||||||
.annotate(object_id=Cast('pk', TextField()))
|
.annotate(object_id=Cast('pk', TextField()))
|
||||||
.values('object_id'))
|
.values('object_id'))
|
||||||
stale_entries = (IndexEntry._default_manager.using(self.db_alias)
|
content_type_ids = get_descendants_content_types_pks(self.model)
|
||||||
.for_models(self.model)
|
stale_entries = (
|
||||||
.exclude(object_id__in=existing_pks))
|
IndexEntry._default_manager.using(self.db_alias)
|
||||||
|
.filter(content_type_id__in=content_type_ids)
|
||||||
|
.exclude(object_id__in=existing_pks))
|
||||||
stale_entries.delete()
|
stale_entries.delete()
|
||||||
|
|
||||||
def get_config(self):
|
|
||||||
return self.backend.params.get('SEARCH_CONFIG')
|
|
||||||
|
|
||||||
def prepare_value(self, value):
|
def prepare_value(self, value):
|
||||||
if isinstance(value, string_types):
|
if isinstance(value, string_types):
|
||||||
return value
|
return value
|
||||||
|
|
@ -134,9 +121,8 @@ class Index(object):
|
||||||
ids_and_objs = {}
|
ids_and_objs = {}
|
||||||
for obj in objs:
|
for obj in objs:
|
||||||
obj._search_vector = (
|
obj._search_vector = (
|
||||||
ADD([
|
ADD([SearchVector(Value(text), weight=weight, config=config)
|
||||||
SearchVector(Value(text), weight=weight, config=config)
|
for text, weight in obj._body_])
|
||||||
for text, weight in obj._body_])
|
|
||||||
if obj._body_ else SearchVector(Value('')))
|
if obj._body_ else SearchVector(Value('')))
|
||||||
ids_and_objs[obj._object_id] = obj
|
ids_and_objs[obj._object_id] = obj
|
||||||
index_entries = IndexEntry._default_manager.using(self.db_alias)
|
index_entries = IndexEntry._default_manager.using(self.db_alias)
|
||||||
|
|
@ -160,8 +146,8 @@ class Index(object):
|
||||||
index_entries.bulk_create(to_be_created)
|
index_entries.bulk_create(to_be_created)
|
||||||
|
|
||||||
def add_items(self, model, objs):
|
def add_items(self, model, objs):
|
||||||
content_type_pk = get_content_types_pks((model,), self.db_alias)[0]
|
content_type_pk = get_content_types_pk(model)
|
||||||
config = self.get_config()
|
config = self.backend.get_config()
|
||||||
for obj in objs:
|
for obj in objs:
|
||||||
obj._object_id = force_text(obj.pk)
|
obj._object_id = force_text(obj.pk)
|
||||||
obj._body_ = self.prepare_body(obj)
|
obj._body_ = self.prepare_body(obj)
|
||||||
|
|
@ -189,27 +175,6 @@ class PostgresSearchQuery(BaseSearchQuery):
|
||||||
return SearchQuery('')
|
return SearchQuery('')
|
||||||
return combine(SearchQuery(q, config=config) for q in search_terms)
|
return combine(SearchQuery(q, config=config) for q in search_terms)
|
||||||
|
|
||||||
def get_base_queryset(self):
|
|
||||||
# Removes order for performance’s sake.
|
|
||||||
return self.queryset.order_by()
|
|
||||||
|
|
||||||
def get_in_index_queryset(self, queryset, search_query):
|
|
||||||
return (IndexEntry._default_manager.using(get_db_alias(queryset))
|
|
||||||
.for_models(queryset.model).filter(body_search=search_query))
|
|
||||||
|
|
||||||
def get_in_index_count(self, queryset, search_query):
|
|
||||||
index_sql, index_params = get_sql(
|
|
||||||
self.get_in_index_queryset(queryset, search_query).pks())
|
|
||||||
model_sql, model_params = get_sql(queryset)
|
|
||||||
sql = """
|
|
||||||
SELECT COUNT(*)
|
|
||||||
FROM (%s) AS index_entry
|
|
||||||
INNER JOIN (%s) AS obj ON obj."%s" = index_entry.typed_pk;
|
|
||||||
""" % (index_sql, model_sql, get_pk_column(queryset.model))
|
|
||||||
with connections[get_db_alias(queryset)].cursor() as cursor:
|
|
||||||
cursor.execute(sql, index_params + model_params)
|
|
||||||
return cursor.fetchone()[0]
|
|
||||||
|
|
||||||
def get_boost(self, field_name, fields=None):
|
def get_boost(self, field_name, fields=None):
|
||||||
if fields is None:
|
if fields is None:
|
||||||
fields = self.search_fields
|
fields = self.search_fields
|
||||||
|
|
@ -226,78 +191,43 @@ class PostgresSearchQuery(BaseSearchQuery):
|
||||||
return self.get_boost(sub_field_name, field.fields)
|
return self.get_boost(sub_field_name, field.fields)
|
||||||
return field.boost
|
return field.boost
|
||||||
|
|
||||||
def get_in_fields_queryset(self, queryset, search_query):
|
|
||||||
if not self.fields:
|
|
||||||
return queryset.none()
|
|
||||||
return (
|
|
||||||
queryset.annotate(
|
|
||||||
_search_=ADD(
|
|
||||||
SearchVector(field, config=search_query.config,
|
|
||||||
weight=get_weight(self.get_boost(field)))
|
|
||||||
for field in self.fields))
|
|
||||||
.filter(_search_=search_query))
|
|
||||||
|
|
||||||
def search_count(self, config):
|
|
||||||
queryset = self.get_base_queryset()
|
|
||||||
search_query = self.get_search_query(config=config)
|
|
||||||
if self.fields is None:
|
|
||||||
return self.get_in_index_count(queryset, search_query)
|
|
||||||
return self.get_in_fields_queryset(queryset, search_query).count()
|
|
||||||
|
|
||||||
def search_in_index(self, queryset, search_query, start, stop):
|
|
||||||
index_entries = self.get_in_index_queryset(queryset, search_query)
|
|
||||||
values = ['typed_pk']
|
|
||||||
if self.order_by_relevance:
|
|
||||||
index_entries = index_entries.rank(search_query)
|
|
||||||
values.append('rank')
|
|
||||||
order_sql = 'index_entry.rank DESC, id ASC'
|
|
||||||
else:
|
|
||||||
order_sql = 'id ASC'
|
|
||||||
index_sql, index_params = get_sql(
|
|
||||||
index_entries.annotate_typed_pk()
|
|
||||||
.values(*values)
|
|
||||||
)
|
|
||||||
model_sql, model_params = get_sql(queryset)
|
|
||||||
model = queryset.model
|
|
||||||
sql = """
|
|
||||||
SELECT obj.*
|
|
||||||
FROM (%s) AS index_entry
|
|
||||||
INNER JOIN (%s) AS obj ON obj."%s" = index_entry.typed_pk
|
|
||||||
ORDER BY %s
|
|
||||||
OFFSET %%s LIMIT %%s;
|
|
||||||
""" % (index_sql, model_sql, get_pk_column(model), order_sql)
|
|
||||||
limits = (start, None if stop is None else stop - start)
|
|
||||||
return model._default_manager.using(get_db_alias(queryset)).raw(
|
|
||||||
sql, index_params + model_params + limits)
|
|
||||||
|
|
||||||
def search_in_fields(self, queryset, search_query, start, stop):
|
|
||||||
return (self.get_in_fields_queryset(queryset, search_query)
|
|
||||||
.annotate(_rank_=SearchRank(F('_search_'), search_query,
|
|
||||||
weights=WEIGHTS_VALUES))
|
|
||||||
.order_by('-_rank_'))[start:stop]
|
|
||||||
|
|
||||||
def search(self, config, start, stop):
|
def search(self, config, start, stop):
|
||||||
queryset = self.get_base_queryset()
|
|
||||||
if self.query_string is None:
|
if self.query_string is None:
|
||||||
return queryset[start:stop]
|
return self.queryset[start:stop]
|
||||||
search_query = self.get_search_query(config=config)
|
search_query = self.get_search_query(config=config)
|
||||||
|
queryset = self.queryset
|
||||||
|
query = queryset.query
|
||||||
if self.fields is None:
|
if self.fields is None:
|
||||||
return self.search_in_index(queryset, search_query, start, stop)
|
vector = F('index_entries__body_search')
|
||||||
return self.search_in_fields(queryset, search_query, start, stop)
|
else:
|
||||||
|
vector = ADD(
|
||||||
|
SearchVector(field, config=search_query.config,
|
||||||
|
weight=get_weight(self.get_boost(field)))
|
||||||
|
for field in self.fields)
|
||||||
|
vector = vector.resolve_expression(query)
|
||||||
|
search_query = search_query.resolve_expression(query)
|
||||||
|
lookup = IndexEntry._meta.get_field('body_search').get_lookup('exact')(
|
||||||
|
vector, search_query)
|
||||||
|
query.where.add(lookup, 'AND')
|
||||||
|
if self.order_by_relevance:
|
||||||
|
# Due to a Django bug, arrays are not automatically converted here.
|
||||||
|
converted_weights = '{' + ','.join(map(str, WEIGHTS_VALUES)) + '}'
|
||||||
|
queryset = queryset.order_by(SearchRank(vector, search_query,
|
||||||
|
weights=converted_weights).desc(),
|
||||||
|
'-pk')
|
||||||
|
elif not queryset.query.order_by:
|
||||||
|
# Adds a default ordering to avoid issue #3729.
|
||||||
|
queryset = queryset.order_by('-pk')
|
||||||
|
return queryset[start:stop]
|
||||||
|
|
||||||
|
|
||||||
class PostgresSearchResults(BaseSearchResults):
|
class PostgresSearchResults(BaseSearchResults):
|
||||||
def get_config(self):
|
|
||||||
queryset = self.query.queryset
|
|
||||||
return self.backend.get_index_for_model(
|
|
||||||
queryset.model, queryset._db).get_config()
|
|
||||||
|
|
||||||
def _do_search(self):
|
def _do_search(self):
|
||||||
return list(self.query.search(self.get_config(),
|
return list(self.query.search(self.backend.get_config(),
|
||||||
self.start, self.stop))
|
self.start, self.stop))
|
||||||
|
|
||||||
def _do_count(self):
|
def _do_count(self):
|
||||||
return self.query.search_count(self.get_config())
|
return self.query.search(self.backend.get_config(), None, None).count()
|
||||||
|
|
||||||
|
|
||||||
class PostgresSearchRebuilder:
|
class PostgresSearchRebuilder:
|
||||||
|
|
@ -345,6 +275,10 @@ class PostgresSearchBackend(BaseSearchBackend):
|
||||||
self.params = params
|
self.params = params
|
||||||
if params.get('ATOMIC_REBUILD', False):
|
if params.get('ATOMIC_REBUILD', False):
|
||||||
self.rebuilder_class = self.atomic_rebuilder_class
|
self.rebuilder_class = self.atomic_rebuilder_class
|
||||||
|
IndexEntry.add_generic_relations()
|
||||||
|
|
||||||
|
def get_config(self):
|
||||||
|
return self.params.get('SEARCH_CONFIG')
|
||||||
|
|
||||||
def get_index_for_model(self, model, db_alias=None):
|
def get_index_for_model(self, model, db_alias=None):
|
||||||
return Index(self, model, db_alias)
|
return Index(self, model, db_alias)
|
||||||
|
|
@ -370,7 +304,7 @@ class PostgresSearchBackend(BaseSearchBackend):
|
||||||
self.get_index_for_object(obj_list[0]).add_items(model, obj_list)
|
self.get_index_for_object(obj_list[0]).add_items(model, obj_list)
|
||||||
|
|
||||||
def delete(self, obj):
|
def delete(self, obj):
|
||||||
IndexEntry._default_manager.for_object(obj).delete()
|
obj.index_entries.all().delete()
|
||||||
|
|
||||||
|
|
||||||
SearchBackend = PostgresSearchBackend
|
SearchBackend = PostgresSearchBackend
|
||||||
|
|
|
||||||
|
|
@ -44,5 +44,11 @@ class Migration(migrations.Migration):
|
||||||
'CREATE INDEX {0}_body_search ON {0} '
|
'CREATE INDEX {0}_body_search ON {0} '
|
||||||
'USING GIN(body_search);'.format(table),
|
'USING GIN(body_search);'.format(table),
|
||||||
'DROP INDEX {}_body_search;'.format(table),
|
'DROP INDEX {}_body_search;'.format(table),
|
||||||
|
state_operations=[migrations.AddIndex(
|
||||||
|
model_name='indexentry',
|
||||||
|
index=django.contrib.postgres.indexes.GinIndex(
|
||||||
|
fields=['body_search'],
|
||||||
|
name='postgres_se_body_se_70ba1a_gin'),
|
||||||
|
)],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -1,50 +1,43 @@
|
||||||
from __future__ import absolute_import, unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
from django.contrib.contenttypes.fields import GenericForeignKey
|
from django.apps import apps
|
||||||
|
from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation
|
||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
from django.contrib.postgres.search import SearchRank, SearchVectorField
|
from django.contrib.postgres.indexes import GinIndex
|
||||||
from django.db.models import (
|
from django.contrib.postgres.search import SearchVectorField
|
||||||
CASCADE, AutoField, BigAutoField, BigIntegerField, F, ForeignKey, IntegerField, Model, QuerySet,
|
from django.db.models import CASCADE, ForeignKey, Model, TextField
|
||||||
TextField)
|
|
||||||
from django.db.models.functions import Cast
|
from django.db.models.functions import Cast
|
||||||
from django.utils.encoding import force_text, python_2_unicode_compatible
|
from django.utils.encoding import python_2_unicode_compatible
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from .utils import WEIGHTS_VALUES, get_descendants_content_types_pks
|
from ...wagtailsearch.index import class_is_indexed
|
||||||
|
from .utils import get_descendants_content_types_pks
|
||||||
|
|
||||||
|
|
||||||
class IndexQuerySet(QuerySet):
|
class TextIDGenericRelation(GenericRelation):
|
||||||
def for_models(self, *models):
|
def get_content_type_lookup(self, alias, remote_alias):
|
||||||
if not models:
|
field = self.remote_field.model._meta.get_field(
|
||||||
return self.none()
|
self.content_type_field_name)
|
||||||
return self.filter(
|
return field.get_lookup('in')(
|
||||||
content_type_id__in=get_descendants_content_types_pks(models,
|
field.get_col(remote_alias),
|
||||||
self._db))
|
get_descendants_content_types_pks(self.model))
|
||||||
|
|
||||||
def for_object(self, obj):
|
def get_object_id_lookup(self, alias, remote_alias):
|
||||||
db_alias = obj._state.db
|
from_field = self.remote_field.model._meta.get_field(
|
||||||
return (self.using(db_alias).for_models(obj._meta.model)
|
self.object_id_field_name)
|
||||||
.filter(object_id=force_text(obj.pk)))
|
to_field = self.model._meta.pk
|
||||||
|
return from_field.get_lookup('exact')(
|
||||||
|
from_field.get_col(remote_alias),
|
||||||
|
Cast(to_field.get_col(alias), from_field))
|
||||||
|
|
||||||
def add_rank(self, search_query):
|
def get_extra_restriction(self, where_class, alias, remote_alias):
|
||||||
return self.annotate(
|
cond = where_class()
|
||||||
rank=SearchRank(
|
cond.add(self.get_content_type_lookup(alias, remote_alias), 'AND')
|
||||||
F('body_search'), search_query,
|
cond.add(self.get_object_id_lookup(alias, remote_alias), 'AND')
|
||||||
weights='{' + ','.join(map(str, WEIGHTS_VALUES)) + '}'))
|
return cond
|
||||||
|
|
||||||
def rank(self, search_query):
|
def resolve_related_fields(self):
|
||||||
return self.add_rank(search_query).order_by('-rank')
|
return []
|
||||||
|
|
||||||
def annotate_typed_pk(self):
|
|
||||||
cast_field = self.model._meta.pk
|
|
||||||
if isinstance(cast_field, BigAutoField):
|
|
||||||
cast_field = BigIntegerField()
|
|
||||||
elif isinstance(cast_field, AutoField):
|
|
||||||
cast_field = IntegerField()
|
|
||||||
return self.annotate(typed_pk=Cast('object_id', cast_field))
|
|
||||||
|
|
||||||
def pks(self):
|
|
||||||
return self.annotate_typed_pk().values_list('typed_pk', flat=True)
|
|
||||||
|
|
||||||
|
|
||||||
@python_2_unicode_compatible
|
@python_2_unicode_compatible
|
||||||
|
|
@ -57,13 +50,11 @@ class IndexEntry(Model):
|
||||||
# TODO: Add per-object boosting.
|
# TODO: Add per-object boosting.
|
||||||
body_search = SearchVectorField()
|
body_search = SearchVectorField()
|
||||||
|
|
||||||
objects = IndexQuerySet.as_manager()
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
unique_together = ('content_type', 'object_id')
|
unique_together = ('content_type', 'object_id')
|
||||||
verbose_name = _('index entry')
|
verbose_name = _('index entry')
|
||||||
verbose_name_plural = _('index entries')
|
verbose_name_plural = _('index entries')
|
||||||
# TODO: Move here the GIN index from the migration.
|
indexes = [GinIndex(['body_search'])]
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '%s: %s' % (self.content_type.name, self.content_object)
|
return '%s: %s' % (self.content_type.name, self.content_object)
|
||||||
|
|
@ -71,3 +62,10 @@ class IndexEntry(Model):
|
||||||
@property
|
@property
|
||||||
def model(self):
|
def model(self):
|
||||||
return self.content_type.model
|
return self.content_type.model
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def add_generic_relations(cls):
|
||||||
|
for model in apps.get_models():
|
||||||
|
if class_is_indexed(model):
|
||||||
|
TextIDGenericRelation(cls).contribute_to_class(model,
|
||||||
|
'index_entries')
|
||||||
|
|
|
||||||
|
|
@ -60,17 +60,17 @@ def get_descendant_models(model):
|
||||||
return descendant_models
|
return descendant_models
|
||||||
|
|
||||||
|
|
||||||
def get_descendants_content_types_pks(models, db_alias):
|
def get_descendants_content_types_pks(model):
|
||||||
return get_content_types_pks(
|
from django.contrib.contenttypes.models import ContentType
|
||||||
tuple(descendant_model for model in models
|
return [ct.pk for ct in
|
||||||
for descendant_model in get_descendant_models(model)), db_alias)
|
ContentType.objects.get_for_models(*get_descendant_models(model))
|
||||||
|
.values()]
|
||||||
|
|
||||||
|
|
||||||
def get_content_types_pks(models, db_alias):
|
def get_content_types_pk(model):
|
||||||
# We import it locally because this file is loaded before apps are ready.
|
# We import it locally because this file is loaded before apps are ready.
|
||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
content_types_dict = ContentType.objects.db_manager(db_alias).get_for_models(*models)
|
return ContentType.objects.get_for_model(model).pk
|
||||||
return [ct.pk for ct in content_types_dict.values()]
|
|
||||||
|
|
||||||
|
|
||||||
def get_search_fields(search_fields):
|
def get_search_fields(search_fields):
|
||||||
|
|
|
||||||
Ładowanie…
Reference in New Issue