kopia lustrzana https://github.com/wagtail/wagtail
Merge pull request #3940 from BertrandBordage/postgres_search_improvements
Postgres_search simplification.pull/3965/head
commit
6514650aa4
|
|
@ -33,7 +33,7 @@ def pytest_configure(config):
|
|||
pass
|
||||
|
||||
if config.getoption('postgres'):
|
||||
os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql_psycopg2'
|
||||
os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql'
|
||||
|
||||
# Setup django after processing the pytest arguments so that the env
|
||||
# variables are available in the settings
|
||||
|
|
|
|||
|
|
@ -560,7 +560,7 @@ These two files should reside in your project directory (``myproject/myproject/`
|
|||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.postgresql_psycopg2',
|
||||
'ENGINE': 'django.db.backends.postgresql',
|
||||
'NAME': 'myprojectdb',
|
||||
'USER': 'postgres',
|
||||
'PASSWORD': '',
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ def runtests():
|
|||
pass
|
||||
|
||||
if args.postgres:
|
||||
os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql_psycopg2'
|
||||
os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql'
|
||||
|
||||
if args.elasticsearch:
|
||||
os.environ.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200')
|
||||
|
|
|
|||
|
|
@ -16,25 +16,13 @@ from wagtail.wagtailsearch.index import RelatedFields, SearchField
|
|||
|
||||
from .models import IndexEntry
|
||||
from .utils import (
|
||||
ADD, AND, OR, WEIGHTS_VALUES, get_content_types_pks, get_postgresql_connections, get_weight,
|
||||
keyword_split, unidecode)
|
||||
ADD, AND, OR, WEIGHTS_VALUES, get_content_types_pk, get_descendants_content_types_pks,
|
||||
get_postgresql_connections, get_weight, keyword_split, unidecode)
|
||||
|
||||
|
||||
# TODO: Add autocomplete.
|
||||
|
||||
|
||||
def get_db_alias(queryset):
|
||||
return queryset._db or DEFAULT_DB_ALIAS
|
||||
|
||||
|
||||
def get_sql(queryset):
|
||||
return queryset.query.get_compiler(get_db_alias(queryset)).as_sql()
|
||||
|
||||
|
||||
def get_pk_column(model):
|
||||
return model._meta.pk.get_attname_column()[1]
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
class Index(object):
|
||||
def __init__(self, backend, model, db_alias=None):
|
||||
|
|
@ -64,14 +52,13 @@ class Index(object):
|
|||
existing_pks = (self.model._default_manager.using(self.db_alias)
|
||||
.annotate(object_id=Cast('pk', TextField()))
|
||||
.values('object_id'))
|
||||
stale_entries = (IndexEntry._default_manager.using(self.db_alias)
|
||||
.for_models(self.model)
|
||||
.exclude(object_id__in=existing_pks))
|
||||
content_type_ids = get_descendants_content_types_pks(self.model)
|
||||
stale_entries = (
|
||||
IndexEntry._default_manager.using(self.db_alias)
|
||||
.filter(content_type_id__in=content_type_ids)
|
||||
.exclude(object_id__in=existing_pks))
|
||||
stale_entries.delete()
|
||||
|
||||
def get_config(self):
|
||||
return self.backend.params.get('SEARCH_CONFIG')
|
||||
|
||||
def prepare_value(self, value):
|
||||
if isinstance(value, string_types):
|
||||
return value
|
||||
|
|
@ -134,9 +121,8 @@ class Index(object):
|
|||
ids_and_objs = {}
|
||||
for obj in objs:
|
||||
obj._search_vector = (
|
||||
ADD([
|
||||
SearchVector(Value(text), weight=weight, config=config)
|
||||
for text, weight in obj._body_])
|
||||
ADD([SearchVector(Value(text), weight=weight, config=config)
|
||||
for text, weight in obj._body_])
|
||||
if obj._body_ else SearchVector(Value('')))
|
||||
ids_and_objs[obj._object_id] = obj
|
||||
index_entries = IndexEntry._default_manager.using(self.db_alias)
|
||||
|
|
@ -160,8 +146,8 @@ class Index(object):
|
|||
index_entries.bulk_create(to_be_created)
|
||||
|
||||
def add_items(self, model, objs):
|
||||
content_type_pk = get_content_types_pks((model,), self.db_alias)[0]
|
||||
config = self.get_config()
|
||||
content_type_pk = get_content_types_pk(model)
|
||||
config = self.backend.get_config()
|
||||
for obj in objs:
|
||||
obj._object_id = force_text(obj.pk)
|
||||
obj._body_ = self.prepare_body(obj)
|
||||
|
|
@ -189,27 +175,6 @@ class PostgresSearchQuery(BaseSearchQuery):
|
|||
return SearchQuery('')
|
||||
return combine(SearchQuery(q, config=config) for q in search_terms)
|
||||
|
||||
def get_base_queryset(self):
|
||||
# Removes order for performance’s sake.
|
||||
return self.queryset.order_by()
|
||||
|
||||
def get_in_index_queryset(self, queryset, search_query):
|
||||
return (IndexEntry._default_manager.using(get_db_alias(queryset))
|
||||
.for_models(queryset.model).filter(body_search=search_query))
|
||||
|
||||
def get_in_index_count(self, queryset, search_query):
|
||||
index_sql, index_params = get_sql(
|
||||
self.get_in_index_queryset(queryset, search_query).pks())
|
||||
model_sql, model_params = get_sql(queryset)
|
||||
sql = """
|
||||
SELECT COUNT(*)
|
||||
FROM (%s) AS index_entry
|
||||
INNER JOIN (%s) AS obj ON obj."%s" = index_entry.typed_pk;
|
||||
""" % (index_sql, model_sql, get_pk_column(queryset.model))
|
||||
with connections[get_db_alias(queryset)].cursor() as cursor:
|
||||
cursor.execute(sql, index_params + model_params)
|
||||
return cursor.fetchone()[0]
|
||||
|
||||
def get_boost(self, field_name, fields=None):
|
||||
if fields is None:
|
||||
fields = self.search_fields
|
||||
|
|
@ -226,78 +191,43 @@ class PostgresSearchQuery(BaseSearchQuery):
|
|||
return self.get_boost(sub_field_name, field.fields)
|
||||
return field.boost
|
||||
|
||||
def get_in_fields_queryset(self, queryset, search_query):
|
||||
if not self.fields:
|
||||
return queryset.none()
|
||||
return (
|
||||
queryset.annotate(
|
||||
_search_=ADD(
|
||||
SearchVector(field, config=search_query.config,
|
||||
weight=get_weight(self.get_boost(field)))
|
||||
for field in self.fields))
|
||||
.filter(_search_=search_query))
|
||||
|
||||
def search_count(self, config):
|
||||
queryset = self.get_base_queryset()
|
||||
search_query = self.get_search_query(config=config)
|
||||
if self.fields is None:
|
||||
return self.get_in_index_count(queryset, search_query)
|
||||
return self.get_in_fields_queryset(queryset, search_query).count()
|
||||
|
||||
def search_in_index(self, queryset, search_query, start, stop):
|
||||
index_entries = self.get_in_index_queryset(queryset, search_query)
|
||||
values = ['typed_pk']
|
||||
if self.order_by_relevance:
|
||||
index_entries = index_entries.rank(search_query)
|
||||
values.append('rank')
|
||||
order_sql = 'index_entry.rank DESC, id ASC'
|
||||
else:
|
||||
order_sql = 'id ASC'
|
||||
index_sql, index_params = get_sql(
|
||||
index_entries.annotate_typed_pk()
|
||||
.values(*values)
|
||||
)
|
||||
model_sql, model_params = get_sql(queryset)
|
||||
model = queryset.model
|
||||
sql = """
|
||||
SELECT obj.*
|
||||
FROM (%s) AS index_entry
|
||||
INNER JOIN (%s) AS obj ON obj."%s" = index_entry.typed_pk
|
||||
ORDER BY %s
|
||||
OFFSET %%s LIMIT %%s;
|
||||
""" % (index_sql, model_sql, get_pk_column(model), order_sql)
|
||||
limits = (start, None if stop is None else stop - start)
|
||||
return model._default_manager.using(get_db_alias(queryset)).raw(
|
||||
sql, index_params + model_params + limits)
|
||||
|
||||
def search_in_fields(self, queryset, search_query, start, stop):
|
||||
return (self.get_in_fields_queryset(queryset, search_query)
|
||||
.annotate(_rank_=SearchRank(F('_search_'), search_query,
|
||||
weights=WEIGHTS_VALUES))
|
||||
.order_by('-_rank_'))[start:stop]
|
||||
|
||||
def search(self, config, start, stop):
|
||||
queryset = self.get_base_queryset()
|
||||
if self.query_string is None:
|
||||
return queryset[start:stop]
|
||||
return self.queryset[start:stop]
|
||||
search_query = self.get_search_query(config=config)
|
||||
queryset = self.queryset
|
||||
query = queryset.query
|
||||
if self.fields is None:
|
||||
return self.search_in_index(queryset, search_query, start, stop)
|
||||
return self.search_in_fields(queryset, search_query, start, stop)
|
||||
vector = F('index_entries__body_search')
|
||||
else:
|
||||
vector = ADD(
|
||||
SearchVector(field, config=search_query.config,
|
||||
weight=get_weight(self.get_boost(field)))
|
||||
for field in self.fields)
|
||||
vector = vector.resolve_expression(query)
|
||||
search_query = search_query.resolve_expression(query)
|
||||
lookup = IndexEntry._meta.get_field('body_search').get_lookup('exact')(
|
||||
vector, search_query)
|
||||
query.where.add(lookup, 'AND')
|
||||
if self.order_by_relevance:
|
||||
# Due to a Django bug, arrays are not automatically converted here.
|
||||
converted_weights = '{' + ','.join(map(str, WEIGHTS_VALUES)) + '}'
|
||||
queryset = queryset.order_by(SearchRank(vector, search_query,
|
||||
weights=converted_weights).desc(),
|
||||
'-pk')
|
||||
elif not queryset.query.order_by:
|
||||
# Adds a default ordering to avoid issue #3729.
|
||||
queryset = queryset.order_by('-pk')
|
||||
return queryset[start:stop]
|
||||
|
||||
|
||||
class PostgresSearchResults(BaseSearchResults):
|
||||
def get_config(self):
|
||||
queryset = self.query.queryset
|
||||
return self.backend.get_index_for_model(
|
||||
queryset.model, queryset._db).get_config()
|
||||
|
||||
def _do_search(self):
|
||||
return list(self.query.search(self.get_config(),
|
||||
return list(self.query.search(self.backend.get_config(),
|
||||
self.start, self.stop))
|
||||
|
||||
def _do_count(self):
|
||||
return self.query.search_count(self.get_config())
|
||||
return self.query.search(self.backend.get_config(), None, None).count()
|
||||
|
||||
|
||||
class PostgresSearchRebuilder:
|
||||
|
|
@ -345,6 +275,10 @@ class PostgresSearchBackend(BaseSearchBackend):
|
|||
self.params = params
|
||||
if params.get('ATOMIC_REBUILD', False):
|
||||
self.rebuilder_class = self.atomic_rebuilder_class
|
||||
IndexEntry.add_generic_relations()
|
||||
|
||||
def get_config(self):
|
||||
return self.params.get('SEARCH_CONFIG')
|
||||
|
||||
def get_index_for_model(self, model, db_alias=None):
|
||||
return Index(self, model, db_alias)
|
||||
|
|
@ -370,7 +304,7 @@ class PostgresSearchBackend(BaseSearchBackend):
|
|||
self.get_index_for_object(obj_list[0]).add_items(model, obj_list)
|
||||
|
||||
def delete(self, obj):
|
||||
IndexEntry._default_manager.for_object(obj).delete()
|
||||
obj.index_entries.all().delete()
|
||||
|
||||
|
||||
SearchBackend = PostgresSearchBackend
|
||||
|
|
|
|||
|
|
@ -44,5 +44,11 @@ class Migration(migrations.Migration):
|
|||
'CREATE INDEX {0}_body_search ON {0} '
|
||||
'USING GIN(body_search);'.format(table),
|
||||
'DROP INDEX {}_body_search;'.format(table),
|
||||
state_operations=[migrations.AddIndex(
|
||||
model_name='indexentry',
|
||||
index=django.contrib.postgres.indexes.GinIndex(
|
||||
fields=['body_search'],
|
||||
name='postgres_se_body_se_70ba1a_gin'),
|
||||
)],
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,50 +1,43 @@
|
|||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from django.contrib.contenttypes.fields import GenericForeignKey
|
||||
from django.apps import apps
|
||||
from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.contrib.postgres.search import SearchRank, SearchVectorField
|
||||
from django.db.models import (
|
||||
CASCADE, AutoField, BigAutoField, BigIntegerField, F, ForeignKey, IntegerField, Model, QuerySet,
|
||||
TextField)
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.contrib.postgres.search import SearchVectorField
|
||||
from django.db.models import CASCADE, ForeignKey, Model, TextField
|
||||
from django.db.models.functions import Cast
|
||||
from django.utils.encoding import force_text, python_2_unicode_compatible
|
||||
from django.utils.encoding import python_2_unicode_compatible
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from .utils import WEIGHTS_VALUES, get_descendants_content_types_pks
|
||||
from ...wagtailsearch.index import class_is_indexed
|
||||
from .utils import get_descendants_content_types_pks
|
||||
|
||||
|
||||
class IndexQuerySet(QuerySet):
|
||||
def for_models(self, *models):
|
||||
if not models:
|
||||
return self.none()
|
||||
return self.filter(
|
||||
content_type_id__in=get_descendants_content_types_pks(models,
|
||||
self._db))
|
||||
class TextIDGenericRelation(GenericRelation):
|
||||
def get_content_type_lookup(self, alias, remote_alias):
|
||||
field = self.remote_field.model._meta.get_field(
|
||||
self.content_type_field_name)
|
||||
return field.get_lookup('in')(
|
||||
field.get_col(remote_alias),
|
||||
get_descendants_content_types_pks(self.model))
|
||||
|
||||
def for_object(self, obj):
|
||||
db_alias = obj._state.db
|
||||
return (self.using(db_alias).for_models(obj._meta.model)
|
||||
.filter(object_id=force_text(obj.pk)))
|
||||
def get_object_id_lookup(self, alias, remote_alias):
|
||||
from_field = self.remote_field.model._meta.get_field(
|
||||
self.object_id_field_name)
|
||||
to_field = self.model._meta.pk
|
||||
return from_field.get_lookup('exact')(
|
||||
from_field.get_col(remote_alias),
|
||||
Cast(to_field.get_col(alias), from_field))
|
||||
|
||||
def add_rank(self, search_query):
|
||||
return self.annotate(
|
||||
rank=SearchRank(
|
||||
F('body_search'), search_query,
|
||||
weights='{' + ','.join(map(str, WEIGHTS_VALUES)) + '}'))
|
||||
def get_extra_restriction(self, where_class, alias, remote_alias):
|
||||
cond = where_class()
|
||||
cond.add(self.get_content_type_lookup(alias, remote_alias), 'AND')
|
||||
cond.add(self.get_object_id_lookup(alias, remote_alias), 'AND')
|
||||
return cond
|
||||
|
||||
def rank(self, search_query):
|
||||
return self.add_rank(search_query).order_by('-rank')
|
||||
|
||||
def annotate_typed_pk(self):
|
||||
cast_field = self.model._meta.pk
|
||||
if isinstance(cast_field, BigAutoField):
|
||||
cast_field = BigIntegerField()
|
||||
elif isinstance(cast_field, AutoField):
|
||||
cast_field = IntegerField()
|
||||
return self.annotate(typed_pk=Cast('object_id', cast_field))
|
||||
|
||||
def pks(self):
|
||||
return self.annotate_typed_pk().values_list('typed_pk', flat=True)
|
||||
def resolve_related_fields(self):
|
||||
return []
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
|
|
@ -57,13 +50,11 @@ class IndexEntry(Model):
|
|||
# TODO: Add per-object boosting.
|
||||
body_search = SearchVectorField()
|
||||
|
||||
objects = IndexQuerySet.as_manager()
|
||||
|
||||
class Meta:
|
||||
unique_together = ('content_type', 'object_id')
|
||||
verbose_name = _('index entry')
|
||||
verbose_name_plural = _('index entries')
|
||||
# TODO: Move here the GIN index from the migration.
|
||||
indexes = [GinIndex(['body_search'])]
|
||||
|
||||
def __str__(self):
|
||||
return '%s: %s' % (self.content_type.name, self.content_object)
|
||||
|
|
@ -71,3 +62,10 @@ class IndexEntry(Model):
|
|||
@property
|
||||
def model(self):
|
||||
return self.content_type.model
|
||||
|
||||
@classmethod
|
||||
def add_generic_relations(cls):
|
||||
for model in apps.get_models():
|
||||
if class_is_indexed(model):
|
||||
TextIDGenericRelation(cls).contribute_to_class(model,
|
||||
'index_entries')
|
||||
|
|
|
|||
|
|
@ -60,17 +60,17 @@ def get_descendant_models(model):
|
|||
return descendant_models
|
||||
|
||||
|
||||
def get_descendants_content_types_pks(models, db_alias):
|
||||
return get_content_types_pks(
|
||||
tuple(descendant_model for model in models
|
||||
for descendant_model in get_descendant_models(model)), db_alias)
|
||||
def get_descendants_content_types_pks(model):
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
return [ct.pk for ct in
|
||||
ContentType.objects.get_for_models(*get_descendant_models(model))
|
||||
.values()]
|
||||
|
||||
|
||||
def get_content_types_pks(models, db_alias):
|
||||
def get_content_types_pk(model):
|
||||
# We import it locally because this file is loaded before apps are ready.
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
content_types_dict = ContentType.objects.db_manager(db_alias).get_for_models(*models)
|
||||
return [ct.pk for ct in content_types_dict.values()]
|
||||
return ContentType.objects.get_for_model(model).pk
|
||||
|
||||
|
||||
def get_search_fields(search_fields):
|
||||
|
|
|
|||
Ładowanie…
Reference in New Issue