kopia lustrzana https://github.com/wagtail/wagtail
				
				
				
			Merge pull request #3940 from BertrandBordage/postgres_search_improvements
Postgres_search simplification.pull/3965/head
						commit
						6514650aa4
					
				|  | @ -33,7 +33,7 @@ def pytest_configure(config): | |||
|         pass | ||||
| 
 | ||||
|     if config.getoption('postgres'): | ||||
|         os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql_psycopg2' | ||||
|         os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql' | ||||
| 
 | ||||
|     # Setup django after processing the pytest arguments so that the env | ||||
|     # variables are available in the settings | ||||
|  |  | |||
|  | @ -560,7 +560,7 @@ These two files should reside in your project directory (``myproject/myproject/` | |||
| 
 | ||||
|   DATABASES = { | ||||
|       'default': { | ||||
|           'ENGINE': 'django.db.backends.postgresql_psycopg2', | ||||
|           'ENGINE': 'django.db.backends.postgresql', | ||||
|           'NAME': 'myprojectdb', | ||||
|           'USER': 'postgres', | ||||
|           'PASSWORD': '', | ||||
|  |  | |||
|  | @ -47,7 +47,7 @@ def runtests(): | |||
|         pass | ||||
| 
 | ||||
|     if args.postgres: | ||||
|         os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql_psycopg2' | ||||
|         os.environ['DATABASE_ENGINE'] = 'django.db.backends.postgresql' | ||||
| 
 | ||||
|     if args.elasticsearch: | ||||
|         os.environ.setdefault('ELASTICSEARCH_URL', 'http://localhost:9200') | ||||
|  |  | |||
|  | @ -16,25 +16,13 @@ from wagtail.wagtailsearch.index import RelatedFields, SearchField | |||
| 
 | ||||
| from .models import IndexEntry | ||||
| from .utils import ( | ||||
|     ADD, AND, OR, WEIGHTS_VALUES, get_content_types_pks, get_postgresql_connections, get_weight, | ||||
|     keyword_split, unidecode) | ||||
|     ADD, AND, OR, WEIGHTS_VALUES, get_content_types_pk, get_descendants_content_types_pks, | ||||
|     get_postgresql_connections, get_weight, keyword_split, unidecode) | ||||
| 
 | ||||
| 
 | ||||
| # TODO: Add autocomplete. | ||||
| 
 | ||||
| 
 | ||||
| def get_db_alias(queryset): | ||||
|     return queryset._db or DEFAULT_DB_ALIAS | ||||
| 
 | ||||
| 
 | ||||
| def get_sql(queryset): | ||||
|     return queryset.query.get_compiler(get_db_alias(queryset)).as_sql() | ||||
| 
 | ||||
| 
 | ||||
| def get_pk_column(model): | ||||
|     return model._meta.pk.get_attname_column()[1] | ||||
| 
 | ||||
| 
 | ||||
| @python_2_unicode_compatible | ||||
| class Index(object): | ||||
|     def __init__(self, backend, model, db_alias=None): | ||||
|  | @ -64,14 +52,13 @@ class Index(object): | |||
|         existing_pks = (self.model._default_manager.using(self.db_alias) | ||||
|                         .annotate(object_id=Cast('pk', TextField())) | ||||
|                         .values('object_id')) | ||||
|         stale_entries = (IndexEntry._default_manager.using(self.db_alias) | ||||
|                          .for_models(self.model) | ||||
|                          .exclude(object_id__in=existing_pks)) | ||||
|         content_type_ids = get_descendants_content_types_pks(self.model) | ||||
|         stale_entries = ( | ||||
|             IndexEntry._default_manager.using(self.db_alias) | ||||
|             .filter(content_type_id__in=content_type_ids) | ||||
|             .exclude(object_id__in=existing_pks)) | ||||
|         stale_entries.delete() | ||||
| 
 | ||||
|     def get_config(self): | ||||
|         return self.backend.params.get('SEARCH_CONFIG') | ||||
| 
 | ||||
|     def prepare_value(self, value): | ||||
|         if isinstance(value, string_types): | ||||
|             return value | ||||
|  | @ -134,9 +121,8 @@ class Index(object): | |||
|         ids_and_objs = {} | ||||
|         for obj in objs: | ||||
|             obj._search_vector = ( | ||||
|                 ADD([ | ||||
|                     SearchVector(Value(text), weight=weight, config=config) | ||||
|                     for text, weight in obj._body_]) | ||||
|                 ADD([SearchVector(Value(text), weight=weight, config=config) | ||||
|                      for text, weight in obj._body_]) | ||||
|                 if obj._body_ else SearchVector(Value(''))) | ||||
|             ids_and_objs[obj._object_id] = obj | ||||
|         index_entries = IndexEntry._default_manager.using(self.db_alias) | ||||
|  | @ -160,8 +146,8 @@ class Index(object): | |||
|         index_entries.bulk_create(to_be_created) | ||||
| 
 | ||||
|     def add_items(self, model, objs): | ||||
|         content_type_pk = get_content_types_pks((model,), self.db_alias)[0] | ||||
|         config = self.get_config() | ||||
|         content_type_pk = get_content_types_pk(model) | ||||
|         config = self.backend.get_config() | ||||
|         for obj in objs: | ||||
|             obj._object_id = force_text(obj.pk) | ||||
|             obj._body_ = self.prepare_body(obj) | ||||
|  | @ -189,27 +175,6 @@ class PostgresSearchQuery(BaseSearchQuery): | |||
|             return SearchQuery('') | ||||
|         return combine(SearchQuery(q, config=config) for q in search_terms) | ||||
| 
 | ||||
|     def get_base_queryset(self): | ||||
|         # Removes order for performance’s sake. | ||||
|         return self.queryset.order_by() | ||||
| 
 | ||||
|     def get_in_index_queryset(self, queryset, search_query): | ||||
|         return (IndexEntry._default_manager.using(get_db_alias(queryset)) | ||||
|                 .for_models(queryset.model).filter(body_search=search_query)) | ||||
| 
 | ||||
|     def get_in_index_count(self, queryset, search_query): | ||||
|         index_sql, index_params = get_sql( | ||||
|             self.get_in_index_queryset(queryset, search_query).pks()) | ||||
|         model_sql, model_params = get_sql(queryset) | ||||
|         sql = """ | ||||
|             SELECT COUNT(*) | ||||
|             FROM (%s) AS index_entry | ||||
|             INNER JOIN (%s) AS obj ON obj."%s" = index_entry.typed_pk; | ||||
|             """ % (index_sql, model_sql, get_pk_column(queryset.model)) | ||||
|         with connections[get_db_alias(queryset)].cursor() as cursor: | ||||
|             cursor.execute(sql, index_params + model_params) | ||||
|             return cursor.fetchone()[0] | ||||
| 
 | ||||
|     def get_boost(self, field_name, fields=None): | ||||
|         if fields is None: | ||||
|             fields = self.search_fields | ||||
|  | @ -226,78 +191,43 @@ class PostgresSearchQuery(BaseSearchQuery): | |||
|                     return self.get_boost(sub_field_name, field.fields) | ||||
|                 return field.boost | ||||
| 
 | ||||
|     def get_in_fields_queryset(self, queryset, search_query): | ||||
|         if not self.fields: | ||||
|             return queryset.none() | ||||
|         return ( | ||||
|             queryset.annotate( | ||||
|                 _search_=ADD( | ||||
|                     SearchVector(field, config=search_query.config, | ||||
|                                  weight=get_weight(self.get_boost(field))) | ||||
|                     for field in self.fields)) | ||||
|             .filter(_search_=search_query)) | ||||
| 
 | ||||
|     def search_count(self, config): | ||||
|         queryset = self.get_base_queryset() | ||||
|         search_query = self.get_search_query(config=config) | ||||
|         if self.fields is None: | ||||
|             return self.get_in_index_count(queryset, search_query) | ||||
|         return self.get_in_fields_queryset(queryset, search_query).count() | ||||
| 
 | ||||
|     def search_in_index(self, queryset, search_query, start, stop): | ||||
|         index_entries = self.get_in_index_queryset(queryset, search_query) | ||||
|         values = ['typed_pk'] | ||||
|         if self.order_by_relevance: | ||||
|             index_entries = index_entries.rank(search_query) | ||||
|             values.append('rank') | ||||
|             order_sql = 'index_entry.rank DESC, id ASC' | ||||
|         else: | ||||
|             order_sql = 'id ASC' | ||||
|         index_sql, index_params = get_sql( | ||||
|             index_entries.annotate_typed_pk() | ||||
|             .values(*values) | ||||
|         ) | ||||
|         model_sql, model_params = get_sql(queryset) | ||||
|         model = queryset.model | ||||
|         sql = """ | ||||
|             SELECT obj.* | ||||
|             FROM (%s) AS index_entry | ||||
|             INNER JOIN (%s) AS obj ON obj."%s" = index_entry.typed_pk | ||||
|             ORDER BY %s | ||||
|             OFFSET %%s LIMIT %%s; | ||||
|             """ % (index_sql, model_sql, get_pk_column(model), order_sql) | ||||
|         limits = (start, None if stop is None else stop - start) | ||||
|         return model._default_manager.using(get_db_alias(queryset)).raw( | ||||
|             sql, index_params + model_params + limits) | ||||
| 
 | ||||
|     def search_in_fields(self, queryset, search_query, start, stop): | ||||
|         return (self.get_in_fields_queryset(queryset, search_query) | ||||
|                 .annotate(_rank_=SearchRank(F('_search_'), search_query, | ||||
|                                             weights=WEIGHTS_VALUES)) | ||||
|                 .order_by('-_rank_'))[start:stop] | ||||
| 
 | ||||
|     def search(self, config, start, stop): | ||||
|         queryset = self.get_base_queryset() | ||||
|         if self.query_string is None: | ||||
|             return queryset[start:stop] | ||||
|             return self.queryset[start:stop] | ||||
|         search_query = self.get_search_query(config=config) | ||||
|         queryset = self.queryset | ||||
|         query = queryset.query | ||||
|         if self.fields is None: | ||||
|             return self.search_in_index(queryset, search_query, start, stop) | ||||
|         return self.search_in_fields(queryset, search_query, start, stop) | ||||
|             vector = F('index_entries__body_search') | ||||
|         else: | ||||
|             vector = ADD( | ||||
|                 SearchVector(field, config=search_query.config, | ||||
|                              weight=get_weight(self.get_boost(field))) | ||||
|                 for field in self.fields) | ||||
|         vector = vector.resolve_expression(query) | ||||
|         search_query = search_query.resolve_expression(query) | ||||
|         lookup = IndexEntry._meta.get_field('body_search').get_lookup('exact')( | ||||
|             vector, search_query) | ||||
|         query.where.add(lookup, 'AND') | ||||
|         if self.order_by_relevance: | ||||
|             # Due to a Django bug, arrays are not automatically converted here. | ||||
|             converted_weights = '{' + ','.join(map(str, WEIGHTS_VALUES)) + '}' | ||||
|             queryset = queryset.order_by(SearchRank(vector, search_query, | ||||
|                                                     weights=converted_weights).desc(), | ||||
|                                          '-pk') | ||||
|         elif not queryset.query.order_by: | ||||
|             # Adds a default ordering to avoid issue #3729. | ||||
|             queryset = queryset.order_by('-pk') | ||||
|         return queryset[start:stop] | ||||
| 
 | ||||
| 
 | ||||
| class PostgresSearchResults(BaseSearchResults): | ||||
|     def get_config(self): | ||||
|         queryset = self.query.queryset | ||||
|         return self.backend.get_index_for_model( | ||||
|             queryset.model, queryset._db).get_config() | ||||
| 
 | ||||
|     def _do_search(self): | ||||
|         return list(self.query.search(self.get_config(), | ||||
|         return list(self.query.search(self.backend.get_config(), | ||||
|                                       self.start, self.stop)) | ||||
| 
 | ||||
|     def _do_count(self): | ||||
|         return self.query.search_count(self.get_config()) | ||||
|         return self.query.search(self.backend.get_config(), None, None).count() | ||||
| 
 | ||||
| 
 | ||||
| class PostgresSearchRebuilder: | ||||
|  | @ -345,6 +275,10 @@ class PostgresSearchBackend(BaseSearchBackend): | |||
|         self.params = params | ||||
|         if params.get('ATOMIC_REBUILD', False): | ||||
|             self.rebuilder_class = self.atomic_rebuilder_class | ||||
|         IndexEntry.add_generic_relations() | ||||
| 
 | ||||
|     def get_config(self): | ||||
|         return self.params.get('SEARCH_CONFIG') | ||||
| 
 | ||||
|     def get_index_for_model(self, model, db_alias=None): | ||||
|         return Index(self, model, db_alias) | ||||
|  | @ -370,7 +304,7 @@ class PostgresSearchBackend(BaseSearchBackend): | |||
|             self.get_index_for_object(obj_list[0]).add_items(model, obj_list) | ||||
| 
 | ||||
|     def delete(self, obj): | ||||
|         IndexEntry._default_manager.for_object(obj).delete() | ||||
|         obj.index_entries.all().delete() | ||||
| 
 | ||||
| 
 | ||||
| SearchBackend = PostgresSearchBackend | ||||
|  |  | |||
|  | @ -44,5 +44,11 @@ class Migration(migrations.Migration): | |||
|             'CREATE INDEX {0}_body_search ON {0} ' | ||||
|             'USING GIN(body_search);'.format(table), | ||||
|             'DROP INDEX {}_body_search;'.format(table), | ||||
|             state_operations=[migrations.AddIndex( | ||||
|                 model_name='indexentry', | ||||
|                 index=django.contrib.postgres.indexes.GinIndex( | ||||
|                     fields=['body_search'], | ||||
|                     name='postgres_se_body_se_70ba1a_gin'), | ||||
|             )], | ||||
|         ), | ||||
|     ] | ||||
|  |  | |||
|  | @ -1,50 +1,43 @@ | |||
| from __future__ import absolute_import, unicode_literals | ||||
| 
 | ||||
| from django.contrib.contenttypes.fields import GenericForeignKey | ||||
| from django.apps import apps | ||||
| from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation | ||||
| from django.contrib.contenttypes.models import ContentType | ||||
| from django.contrib.postgres.search import SearchRank, SearchVectorField | ||||
| from django.db.models import ( | ||||
|     CASCADE, AutoField, BigAutoField, BigIntegerField, F, ForeignKey, IntegerField, Model, QuerySet, | ||||
|     TextField) | ||||
| from django.contrib.postgres.indexes import GinIndex | ||||
| from django.contrib.postgres.search import SearchVectorField | ||||
| from django.db.models import CASCADE, ForeignKey, Model, TextField | ||||
| from django.db.models.functions import Cast | ||||
| from django.utils.encoding import force_text, python_2_unicode_compatible | ||||
| from django.utils.encoding import python_2_unicode_compatible | ||||
| from django.utils.translation import ugettext_lazy as _ | ||||
| 
 | ||||
| from .utils import WEIGHTS_VALUES, get_descendants_content_types_pks | ||||
| from ...wagtailsearch.index import class_is_indexed | ||||
| from .utils import get_descendants_content_types_pks | ||||
| 
 | ||||
| 
 | ||||
| class IndexQuerySet(QuerySet): | ||||
|     def for_models(self, *models): | ||||
|         if not models: | ||||
|             return self.none() | ||||
|         return self.filter( | ||||
|             content_type_id__in=get_descendants_content_types_pks(models, | ||||
|                                                                   self._db)) | ||||
| class TextIDGenericRelation(GenericRelation): | ||||
|     def get_content_type_lookup(self, alias, remote_alias): | ||||
|         field = self.remote_field.model._meta.get_field( | ||||
|             self.content_type_field_name) | ||||
|         return field.get_lookup('in')( | ||||
|             field.get_col(remote_alias), | ||||
|             get_descendants_content_types_pks(self.model)) | ||||
| 
 | ||||
|     def for_object(self, obj): | ||||
|         db_alias = obj._state.db | ||||
|         return (self.using(db_alias).for_models(obj._meta.model) | ||||
|                 .filter(object_id=force_text(obj.pk))) | ||||
|     def get_object_id_lookup(self, alias, remote_alias): | ||||
|         from_field = self.remote_field.model._meta.get_field( | ||||
|             self.object_id_field_name) | ||||
|         to_field = self.model._meta.pk | ||||
|         return from_field.get_lookup('exact')( | ||||
|             from_field.get_col(remote_alias), | ||||
|             Cast(to_field.get_col(alias), from_field)) | ||||
| 
 | ||||
|     def add_rank(self, search_query): | ||||
|         return self.annotate( | ||||
|             rank=SearchRank( | ||||
|                 F('body_search'), search_query, | ||||
|                 weights='{' + ','.join(map(str, WEIGHTS_VALUES)) + '}')) | ||||
|     def get_extra_restriction(self, where_class, alias, remote_alias): | ||||
|         cond = where_class() | ||||
|         cond.add(self.get_content_type_lookup(alias, remote_alias), 'AND') | ||||
|         cond.add(self.get_object_id_lookup(alias, remote_alias), 'AND') | ||||
|         return cond | ||||
| 
 | ||||
|     def rank(self, search_query): | ||||
|         return self.add_rank(search_query).order_by('-rank') | ||||
| 
 | ||||
|     def annotate_typed_pk(self): | ||||
|         cast_field = self.model._meta.pk | ||||
|         if isinstance(cast_field, BigAutoField): | ||||
|             cast_field = BigIntegerField() | ||||
|         elif isinstance(cast_field, AutoField): | ||||
|             cast_field = IntegerField() | ||||
|         return self.annotate(typed_pk=Cast('object_id', cast_field)) | ||||
| 
 | ||||
|     def pks(self): | ||||
|         return self.annotate_typed_pk().values_list('typed_pk', flat=True) | ||||
|     def resolve_related_fields(self): | ||||
|         return [] | ||||
| 
 | ||||
| 
 | ||||
| @python_2_unicode_compatible | ||||
|  | @ -57,13 +50,11 @@ class IndexEntry(Model): | |||
|     # TODO: Add per-object boosting. | ||||
|     body_search = SearchVectorField() | ||||
| 
 | ||||
|     objects = IndexQuerySet.as_manager() | ||||
| 
 | ||||
|     class Meta: | ||||
|         unique_together = ('content_type', 'object_id') | ||||
|         verbose_name = _('index entry') | ||||
|         verbose_name_plural = _('index entries') | ||||
|         # TODO: Move here the GIN index from the migration. | ||||
|         indexes = [GinIndex(['body_search'])] | ||||
| 
 | ||||
|     def __str__(self): | ||||
|         return '%s: %s' % (self.content_type.name, self.content_object) | ||||
|  | @ -71,3 +62,10 @@ class IndexEntry(Model): | |||
|     @property | ||||
|     def model(self): | ||||
|         return self.content_type.model | ||||
| 
 | ||||
|     @classmethod | ||||
|     def add_generic_relations(cls): | ||||
|         for model in apps.get_models(): | ||||
|             if class_is_indexed(model): | ||||
|                 TextIDGenericRelation(cls).contribute_to_class(model, | ||||
|                                                                'index_entries') | ||||
|  |  | |||
|  | @ -60,17 +60,17 @@ def get_descendant_models(model): | |||
|     return descendant_models | ||||
| 
 | ||||
| 
 | ||||
| def get_descendants_content_types_pks(models, db_alias): | ||||
|     return get_content_types_pks( | ||||
|         tuple(descendant_model for model in models | ||||
|               for descendant_model in get_descendant_models(model)), db_alias) | ||||
| def get_descendants_content_types_pks(model): | ||||
|     from django.contrib.contenttypes.models import ContentType | ||||
|     return [ct.pk for ct in | ||||
|             ContentType.objects.get_for_models(*get_descendant_models(model)) | ||||
|             .values()] | ||||
| 
 | ||||
| 
 | ||||
| def get_content_types_pks(models, db_alias): | ||||
| def get_content_types_pk(model): | ||||
|     # We import it locally because this file is loaded before apps are ready. | ||||
|     from django.contrib.contenttypes.models import ContentType | ||||
|     content_types_dict = ContentType.objects.db_manager(db_alias).get_for_models(*models) | ||||
|     return [ct.pk for ct in content_types_dict.values()] | ||||
|     return ContentType.objects.get_for_model(model).pk | ||||
| 
 | ||||
| 
 | ||||
| def get_search_fields(search_fields): | ||||
|  |  | |||
		Ładowanie…
	
		Reference in New Issue
	
	 Karl Hobley
						Karl Hobley