From e62d67bd730a1358124b821d291c272f9b955797 Mon Sep 17 00:00:00 2001
From: Bertrand Bordage <bordage.bertrand@gmail.com>
Date: Wed, 12 Apr 2017 17:16:16 +0200
Subject: [PATCH] Adds PostgreSQL search backend. (#3515)

* Adds PostgreSQL search backend.

* Isort nitpicks.

* Fixes PostgreSQL versions incompatibilities.

* Uses Django lru_cache instead of building our own.

* Fixes PostgreSQL search index on some empty vector & query cases.

* Never sets the PostgreSQL search vector to NULL.

* Simplification + removes caching on two fast enough functions.

* Rewrites stale entries deletion to use the ORM.
---
 docs/reference/contrib/index.rst              |   1 +
 docs/reference/contrib/postgres_search.rst    | 131 +++++++
 docs/topics/search/backends.rst               |  10 +
 docs/topics/search/searching.rst              |   2 +
 wagtail/contrib/postgres_search/__init__.py   |   1 +
 wagtail/contrib/postgres_search/apps.py       |  29 ++
 wagtail/contrib/postgres_search/backend.py    | 367 ++++++++++++++++++
 .../migrations/0001_initial.py                |  48 +++
 .../postgres_search/migrations/__init__.py    |   0
 wagtail/contrib/postgres_search/models.py     |  71 ++++
 .../contrib/postgres_search/tests/__init__.py |   0
 .../postgres_search/tests/test_backend.py     |  42 ++
 wagtail/contrib/postgres_search/utils.py      | 118 ++++++
 wagtail/tests/settings.py                     |   9 +
 14 files changed, 829 insertions(+)
 create mode 100644 docs/reference/contrib/postgres_search.rst
 create mode 100644 wagtail/contrib/postgres_search/__init__.py
 create mode 100644 wagtail/contrib/postgres_search/apps.py
 create mode 100644 wagtail/contrib/postgres_search/backend.py
 create mode 100644 wagtail/contrib/postgres_search/migrations/0001_initial.py
 create mode 100644 wagtail/contrib/postgres_search/migrations/__init__.py
 create mode 100644 wagtail/contrib/postgres_search/models.py
 create mode 100644 wagtail/contrib/postgres_search/tests/__init__.py
 create mode 100644 wagtail/contrib/postgres_search/tests/test_backend.py
 create mode 100644 wagtail/contrib/postgres_search/utils.py

diff --git a/docs/reference/contrib/index.rst b/docs/reference/contrib/index.rst
index bac4346839..98703861a2 100644
--- a/docs/reference/contrib/index.rst
+++ b/docs/reference/contrib/index.rst
@@ -14,6 +14,7 @@ Wagtail ships with a variety of extra optional modules.
     routablepage
     api/index
     modeladmin/index
+    postgres_search
     searchpromotions
     table_block
 
diff --git a/docs/reference/contrib/postgres_search.rst b/docs/reference/contrib/postgres_search.rst
new file mode 100644
index 0000000000..47fe10e8ec
--- /dev/null
+++ b/docs/reference/contrib/postgres_search.rst
@@ -0,0 +1,131 @@
+.. _postgres_search:
+
+========================
+PostgreSQL search engine
+========================
+
+This contrib module provides a search engine backend for Wagtail using
+`PostgreSQL full-text search capabilities <https://www.postgresql.org/docs/current/static/textsearch.html>`_.
+
+.. warning::
+
+    | You need to use Django 1.10 or more to be able to use this backend.
+    | You can only use this module to index data from a PostgreSQL database.
+
+**Features**:
+
+- Supports all the search features available in Wagtail.
+- Easy to install and adds no external dependency or service.
+- Excellent performance for sites with up to 200 000 pages.
+  Stays decent for sites up to a million pages.
+- Faster to reindex than Elasticsearch if you use PostgreSQL 9.5 or more.
+
+The only known **downsides** concern :
+
+**Downsides**:
+
+- ``SearchField(partial_match=True)`` is not handled.
+- Due to a PostgreSQL limitation, ``SearchField(boost=…)`` is only partially
+  respected. It is changed so that there can only be 4 different boosts.
+  If you define 4 or less different boosts,
+  everything will be perfectly accurate.
+  However, your search will be a little less accurate if you define more than
+  4 different boosts. That being said, it will work and be roughly the same.
+- When :ref:`wagtailsearch_specifying_fields`, the index is not used,
+  so it will be slow on huge sites.
+- Still when :ref:`wagtailsearch_specifying_fields`, you cannot search
+  on a specific method.
+
+
+Installation
+============
+
+Add ``'wagtail.contrib.postgres_search',`` anywhere in your ``INSTALLED_APPS``:
+
+.. code-block:: python
+
+    INSTALLED_APPS = [
+        ...
+        'wagtail.contrib.postgres_search',
+        ...
+    ]
+
+Then configure Wagtail to use it as a search backend.
+Give it the alias `'default'` if you want it to be the default search backend:
+
+.. code-block:: python
+
+    WAGTAILSEARCH_BACKENDS = {
+        'default': {
+            'BACKEND': 'wagtail.contrib.postgres_search.backend',
+        },
+    }
+
+You then need to index data inside this backend using
+the :ref:`update_index` command. You can reuse this command whenever
+you want. However, it should not be needed after a first usage since
+the search engine is automatically updated when data is modified.
+To disable this behaviour, see :ref:`wagtailsearch_backends_auto_update`.
+
+
+Configuration
+=============
+
+Language / PostgreSQL search configuration
+------------------------------------------
+
+Use the additional ``'SEARCH_CONFIG'`` key to define which PostgreSQL
+search configuration should be used. For example:
+
+.. code-block:: python
+
+    WAGTAILSEARCH_BACKENDS = {
+        'default': {
+            'BACKEND': 'wagtail.contrib.postgres_search.backend',
+            'SEARCH_CONFIG': 'english',
+        }
+    }
+
+As you can deduce, a PostgreSQL search configuration is mostly used to define
+rules for a language, English in this case. A search configuration consists
+in a compilation of algorithms (parsers & analysers)
+and language specifications (stop words, stems, dictionaries, synonyms,
+thesauruses, etc.).
+
+A few search configurations are already defined by default in PostgreSQL,
+you can list them using ``sudo -u postgres psql -c "\dF"`` in a Unix shell
+or by using this SQL query: ``SELECT cfgname FROM pg_catalog.pg_ts_config``.
+
+These already-defined search configurations are decent, but they’re basic
+compared to commercial search engines.
+If you want a nicer support of your language, you will have to create
+your own PostgreSQL search configuration. See the PostgreSQL documentation for
+`an example <https://www.postgresql.org/docs/current/static/textsearch-configuration.html>`_,
+`the list of parsers <https://www.postgresql.org/docs/current/static/textsearch-parsers.html>`_
+and `a guide to use dictionaries <https://www.postgresql.org/docs/current/static/textsearch-dictionaries.html>`_.
+
+Atomic rebuild
+--------------
+
+Like the Elasticsearch backend, this backend supports
+:ref:`wagtailsearch_backends_atomic_rebuild`:
+
+.. code-block:: python
+
+    WAGTAILSEARCH_BACKENDS = {
+        'default': {
+            'BACKEND': 'wagtail.contrib.postgres_search.backend',
+            'ATOMIC_REBUILD': True,
+        }
+    }
+
+This is nearly useless with this backend. In Elasticsearch, all data
+is removed before rebuilding the index. But in this PostgreSQL backend,
+only objects no longer in the database are removed. Then the index is
+progressively updated, with no moment where the index is empty.
+
+However, if you want to be extra sure that nothing wrong happens while updating
+the index, you can use atomic rebuild. The index will be rebuilt but nobody
+will have access to it until reindex is complete. If any error occurs during
+the operation, all changes to the index are reverted
+as if reindexing never happened.
diff --git a/docs/topics/search/backends.rst b/docs/topics/search/backends.rst
index 712d50a591..4df1d33101 100644
--- a/docs/topics/search/backends.rst
+++ b/docs/topics/search/backends.rst
@@ -70,6 +70,16 @@ It also doesn't support:
 
 If any of these features are important to you, we recommend using Elasticsearch instead.
 
+PostgreSQL Backend
+------------------
+
+``wagtail.contrib.postgres_search.backend``
+
+If you use PostgreSQL for your database and your site has less than
+a million pages, you probably want to use this backend.
+
+See :ref:`postgres_search` for more detail.
+
 
 .. _wagtailsearch_backends_elasticsearch:
 
diff --git a/docs/topics/search/searching.rst b/docs/topics/search/searching.rst
index 61e11359a9..a9f510230f 100644
--- a/docs/topics/search/searching.rst
+++ b/docs/topics/search/searching.rst
@@ -82,6 +82,8 @@ You can also pass a QuerySet into the ``search`` method which allows you to add
     [<Book: Great Expectations>]
 
 
+.. _wagtailsearch_specifying_fields:
+
 Specifying the fields to search
 -------------------------------
 
diff --git a/wagtail/contrib/postgres_search/__init__.py b/wagtail/contrib/postgres_search/__init__.py
new file mode 100644
index 0000000000..a4dcfe6e66
--- /dev/null
+++ b/wagtail/contrib/postgres_search/__init__.py
@@ -0,0 +1 @@
+default_app_config = 'wagtail.contrib.postgres_search.apps.PostgresSearchConfig'
diff --git a/wagtail/contrib/postgres_search/apps.py b/wagtail/contrib/postgres_search/apps.py
new file mode 100644
index 0000000000..8d0b93fbce
--- /dev/null
+++ b/wagtail/contrib/postgres_search/apps.py
@@ -0,0 +1,29 @@
+from __future__ import absolute_import, unicode_literals
+
+from django.apps import AppConfig
+from django.core.checks import Error, Tags, register
+
+from .utils import (
+    BOOSTS_WEIGHTS, WEIGHTS_COUNT, WEIGHTS_VALUES, determine_boosts_weights,
+    get_postgresql_connections)
+
+
+class PostgresSearchConfig(AppConfig):
+    name = 'wagtail.contrib.postgres_search'
+
+    def ready(self):
+        @register(Tags.compatibility, Tags.database)
+        def check_if_postgresql(app_configs, **kwargs):
+            if get_postgresql_connections():
+                return []
+            return [Error('You must use a PostgreSQL database '
+                          'to use PostgreSQL search.',
+                          id='wagtail.contrib.postgres_search.E001')]
+
+        BOOSTS_WEIGHTS.extend(determine_boosts_weights())
+        sorted_boosts_weights = sorted(BOOSTS_WEIGHTS, key=lambda t: t[0])
+        max_weight = sorted_boosts_weights[-1][0]
+        WEIGHTS_VALUES.extend([v / max_weight
+                               for v, w in sorted_boosts_weights])
+        for _ in range(WEIGHTS_COUNT - len(WEIGHTS_VALUES)):
+            WEIGHTS_VALUES.insert(0, 0)
diff --git a/wagtail/contrib/postgres_search/backend.py b/wagtail/contrib/postgres_search/backend.py
new file mode 100644
index 0000000000..dcec8f7afb
--- /dev/null
+++ b/wagtail/contrib/postgres_search/backend.py
@@ -0,0 +1,367 @@
+# coding: utf-8
+
+from __future__ import absolute_import, unicode_literals
+
+from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
+from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections, transaction
+from django.db.models import F, Manager, TextField, Value
+from django.db.models.constants import LOOKUP_SEP
+from django.db.models.functions import Cast
+from django.utils.encoding import force_text, python_2_unicode_compatible
+from django.utils.six import string_types
+
+from wagtail.wagtailsearch.backends.base import (
+    BaseSearchBackend, BaseSearchQuery, BaseSearchResults)
+from wagtail.wagtailsearch.index import RelatedFields, SearchField
+
+from .models import IndexEntry
+from .utils import (
+    ADD, AND, OR, WEIGHTS_VALUES, get_content_types_pks, get_postgresql_connections, get_weight,
+    keyword_split, unidecode)
+
+
+# TODO: Add autocomplete.
+
+
+def get_db_alias(queryset):
+    return queryset._db or DEFAULT_DB_ALIAS
+
+
+def get_sql(queryset):
+    return queryset.query.get_compiler(get_db_alias(queryset)).as_sql()
+
+
+def get_pk_column(model):
+    return model._meta.pk.get_attname_column()[1]
+
+
+@python_2_unicode_compatible
+class Index(object):
+    def __init__(self, backend, model, db_alias=None):
+        self.backend = backend
+        self.model = model
+        if db_alias is None:
+            db_alias = DEFAULT_DB_ALIAS
+        if connections[db_alias].vendor != 'postgresql':
+            raise NotSupportedError(
+                'You must select a PostgreSQL database '
+                'to use PostgreSQL search.')
+        self.db_alias = db_alias
+        self.name = model._meta.label
+        self.search_fields = self.model.get_search_fields()
+
+    def add_model(self, model):
+        pass
+
+    def refresh(self):
+        pass
+
+    def delete_stale_entries(self):
+        if self.model._meta.parents:
+            # We don’t need to delete stale entries for non-root models,
+            # since we already delete them by deleting roots.
+            return
+        existing_pks = (self.model._default_manager.using(self.db_alias)
+                        .annotate(object_id=Cast('pk', TextField()))
+                        .values('object_id'))
+        stale_entries = (IndexEntry._default_manager.using(self.db_alias)
+                         .for_models(self.model)
+                         .exclude(object_id__in=existing_pks))
+        stale_entries.delete()
+
+    def get_config(self):
+        return self.backend.params.get('SEARCH_CONFIG')
+
+    def prepare_value(self, value):
+        if isinstance(value, string_types):
+            return value
+        if isinstance(value, list):
+            return ', '.join(self.prepare_value(item) for item in value)
+        if isinstance(value, dict):
+            return ', '.join(self.prepare_value(item)
+                             for item in value.values())
+        return force_text(value)
+
+    def prepare_field(self, obj, field):
+        if isinstance(field, SearchField):
+            yield (unidecode(self.prepare_value(field.get_value(obj))),
+                   get_weight(field.boost))
+        elif isinstance(field, RelatedFields):
+            sub_obj = getattr(obj, field.field_name)
+            if sub_obj is None:
+                return
+            if callable(sub_obj):
+                sub_obj = sub_obj()
+            if isinstance(sub_obj, Manager):
+                sub_objs = sub_obj.all()
+            else:
+                sub_objs = [sub_obj]
+            for sub_obj in sub_objs:
+                for sub_field in field.fields:
+                    for value in self.prepare_field(sub_obj, sub_field):
+                        yield value
+
+    def prepare_body(self, obj):
+        return [(value, boost) for field in self.search_fields
+                for value, boost in self.prepare_field(obj, field)]
+
+    def add_item(self, obj):
+        self.add_items(self.model, [obj])
+
+    def add_items_upsert(self, connection, content_type_pk, objs, config):
+        vectors_sql = []
+        data_params = []
+        sql_template = ('to_tsvector(%s)' if config is None
+                        else "to_tsvector('%s', %%s)" % config)
+        sql_template = 'setweight(%s, %%s)' % sql_template
+        for obj in objs:
+            data_params.extend((content_type_pk, obj._object_id))
+            if obj._body_:
+                vectors_sql.append('||'.join(sql_template for _ in obj._body_))
+                data_params.extend([v for t in obj._body_ for v in t])
+            else:
+                vectors_sql.append("''::tsvector")
+        data_sql = ', '.join(['(%%s, %%s, %s)' % s for s in vectors_sql])
+        with connection.cursor() as cursor:
+            cursor.execute("""
+                INSERT INTO %s(content_type_id, object_id, body_search)
+                (VALUES %s)
+                ON CONFLICT (content_type_id, object_id)
+                DO UPDATE SET body_search = EXCLUDED.body_search
+                """ % (IndexEntry._meta.db_table, data_sql), data_params)
+
+    def add_items_update_then_create(self, content_type_pk, objs, config):
+        ids_and_objs = {}
+        for obj in objs:
+            obj._search_vector = (
+                ADD([
+                    SearchVector(Value(text), weight=weight, config=config)
+                    for text, weight in obj._body_])
+                if obj._body_ else SearchVector(Value('')))
+            ids_and_objs[obj._object_id] = obj
+        index_entries = IndexEntry._default_manager.using(self.db_alias)
+        index_entries_for_ct = index_entries.filter(
+            content_type_id=content_type_pk)
+        indexed_ids = frozenset(
+            index_entries_for_ct.filter(object_id__in=ids_and_objs)
+            .values_list('object_id', flat=True))
+        for indexed_id in indexed_ids:
+            obj = ids_and_objs[indexed_id]
+            index_entries_for_ct.filter(object_id=obj._object_id) \
+                .update(body_search=obj._search_vector)
+        to_be_created = []
+        for object_id in ids_and_objs:
+            if object_id not in indexed_ids:
+                to_be_created.append(IndexEntry(
+                    content_type_id=content_type_pk,
+                    object_id=object_id,
+                    body_search=ids_and_objs[object_id]._search_vector,
+                ))
+        index_entries.bulk_create(to_be_created)
+
+    def add_items(self, model, objs):
+        content_type_pk = get_content_types_pks((model,), self.db_alias)[0]
+        config = self.get_config()
+        for obj in objs:
+            obj._object_id = force_text(obj.pk)
+            obj._body_ = self.prepare_body(obj)
+        connection = connections[self.db_alias]
+        if connection.pg_version >= 90500:  # PostgreSQL >= 9.5
+            self.add_items_upsert(connection, content_type_pk, objs, config)
+        else:
+            self.add_items_update_then_create(content_type_pk, objs, config)
+
+    def __str__(self):
+        return self.name
+
+
+class PostgresSearchQuery(BaseSearchQuery):
+    DEFAULT_OPERATOR = 'and'
+
+    def __init__(self, *args, **kwargs):
+        super(PostgresSearchQuery, self).__init__(*args, **kwargs)
+        self.search_fields = self.queryset.model.get_search_fields()
+
+    def get_search_query(self, config):
+        combine = OR if self.operator == 'or' else AND
+        search_terms = keyword_split(unidecode(self.query_string))
+        if not search_terms:
+            return SearchQuery('')
+        return combine(SearchQuery(q, config=config) for q in search_terms)
+
+    def get_base_queryset(self):
+        # Removes order for performance’s sake.
+        return self.queryset.order_by()
+
+    def get_in_index_queryset(self, queryset, search_query):
+        return (IndexEntry._default_manager.using(get_db_alias(queryset))
+                .for_models(queryset.model).filter(body_search=search_query))
+
+    def get_in_index_count(self, queryset, search_query):
+        index_sql, index_params = get_sql(
+            self.get_in_index_queryset(queryset, search_query).pks())
+        model_sql, model_params = get_sql(queryset)
+        sql = """
+            SELECT COUNT(*)
+            FROM (%s) AS index_entry
+            INNER JOIN (%s) AS obj ON obj."%s" = index_entry.typed_pk;
+            """ % (index_sql, model_sql, get_pk_column(queryset.model))
+        with connections[get_db_alias(queryset)].cursor() as cursor:
+            cursor.execute(sql, index_params + model_params)
+            return cursor.fetchone()[0]
+
+    def get_boost(self, field_name, fields=None):
+        if fields is None:
+            fields = self.search_fields
+        if LOOKUP_SEP in field_name:
+            field_name, sub_field_name = field_name.split(LOOKUP_SEP, 1)
+        else:
+            sub_field_name = None
+        for field in fields:
+            if field.field_name == field_name:
+                # Note: Searching on a specific related field using
+                # `.search(fields=…)` is not yet supported by Wagtail.
+                # This method anticipates by already implementing it.
+                if isinstance(field, RelatedFields):
+                    return self.get_boost(sub_field_name, field.fields)
+                return field.boost
+
+    def get_in_fields_queryset(self, queryset, search_query):
+        if not self.fields:
+            return queryset.none()
+        return (
+            queryset.annotate(
+                _search_=ADD(
+                    SearchVector(field, config=search_query.config,
+                                 weight=get_weight(self.get_boost(field)))
+                    for field in self.fields))
+            .filter(_search_=search_query))
+
+    def search_count(self, config):
+        queryset = self.get_base_queryset()
+        search_query = self.get_search_query(config=config)
+        if self.fields is None:
+            return self.get_in_index_count(queryset, search_query)
+        return self.get_in_fields_queryset(queryset, search_query).count()
+
+    def search_in_index(self, queryset, search_query, start, stop):
+        index_entries = self.get_in_index_queryset(queryset, search_query)
+        if self.order_by_relevance:
+            index_entries = index_entries.rank(search_query)
+        index_sql, index_params = get_sql(index_entries.pks())
+        model_sql, model_params = get_sql(queryset)
+        model = queryset.model
+        sql = """
+            SELECT obj.*
+            FROM (%s) AS index_entry
+            INNER JOIN (%s) AS obj ON obj."%s" = index_entry.typed_pk
+            OFFSET %%s LIMIT %%s;
+            """ % (index_sql, model_sql, get_pk_column(model))
+        limits = (start, None if stop is None else stop - start)
+        return model._default_manager.using(get_db_alias(queryset)).raw(
+            sql, index_params + model_params + limits)
+
+    def search_in_fields(self, queryset, search_query, start, stop):
+        return (self.get_in_fields_queryset(queryset, search_query)
+                .annotate(_rank_=SearchRank(F('_search_'), search_query,
+                                            weights=WEIGHTS_VALUES))
+                .order_by('-_rank_'))[start:stop]
+
+    def search(self, config, start, stop):
+        queryset = self.get_base_queryset()
+        if self.query_string is None:
+            return queryset[start:stop]
+        search_query = self.get_search_query(config=config)
+        if self.fields is None:
+            return self.search_in_index(queryset, search_query, start, stop)
+        return self.search_in_fields(queryset, search_query, start, stop)
+
+
+class PostgresSearchResult(BaseSearchResults):
+    def get_config(self):
+        queryset = self.query.queryset
+        return self.backend.get_index_for_model(
+            queryset.model, queryset._db).get_config()
+
+    def _do_search(self):
+        return list(self.query.search(self.get_config(),
+                                      self.start, self.stop))
+
+    def _do_count(self):
+        return self.query.search_count(self.get_config())
+
+
+class PostgresSearchRebuilder:
+    def __init__(self, index):
+        self.index = index
+
+    def start(self):
+        self.index.delete_stale_entries()
+        return self.index
+
+    def finish(self):
+        pass
+
+
+class PostgresSearchAtomicRebuilder(PostgresSearchRebuilder):
+    def __init__(self, index):
+        super(PostgresSearchAtomicRebuilder, self).__init__(index)
+        self.transaction = transaction.atomic(using=index.db_alias)
+        self.transaction_opened = False
+
+    def start(self):
+        self.transaction.__enter__()
+        self.transaction_opened = True
+        return super(PostgresSearchAtomicRebuilder, self).start()
+
+    def finish(self):
+        self.transaction.__exit__(None, None, None)
+        self.transaction_opened = False
+
+    def __del__(self):
+        # TODO: Implement a cleaner way to close the connection on failure.
+        if self.transaction_opened:
+            self.transaction.needs_rollback = True
+            self.finish()
+
+
+class PostgresSearchBackend(BaseSearchBackend):
+    query_class = PostgresSearchQuery
+    results_class = PostgresSearchResult
+    rebuilder_class = PostgresSearchRebuilder
+    atomic_rebuilder_class = PostgresSearchAtomicRebuilder
+
+    def __init__(self, params):
+        super(PostgresSearchBackend, self).__init__(params)
+        self.params = params
+        if params.get('ATOMIC_REBUILD', False):
+            self.rebuilder_class = self.atomic_rebuilder_class
+
+    def get_index_for_model(self, model, db_alias=None):
+        return Index(self, model, db_alias)
+
+    def get_index_for_object(self, obj):
+        return self.get_index_for_model(obj._meta.model, obj._state.db)
+
+    def reset_index(self):
+        for connection in get_postgresql_connections():
+            IndexEntry._default_manager.using(connection.alias).delete()
+
+    def add_type(self, model):
+        pass  # Not needed.
+
+    def refresh_index(self):
+        pass  # Not needed.
+
+    def add(self, obj):
+        self.get_index_for_object(obj).add_item(obj)
+
+    def add_bulk(self, model, obj_list):
+        if obj_list:
+            self.get_index_for_object(obj_list[0]).add_items(model, obj_list)
+
+    def delete(self, obj):
+        IndexEntry._default_manager.for_object(obj).delete()
+
+
+SearchBackend = PostgresSearchBackend
diff --git a/wagtail/contrib/postgres_search/migrations/0001_initial.py b/wagtail/contrib/postgres_search/migrations/0001_initial.py
new file mode 100644
index 0000000000..685526071d
--- /dev/null
+++ b/wagtail/contrib/postgres_search/migrations/0001_initial.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.10.1 on 2017-03-22 14:53
+from __future__ import unicode_literals
+
+import django.db.models.deletion
+
+from django.db import migrations, models
+
+import django.contrib.postgres.fields.jsonb
+import django.contrib.postgres.search
+from ..models import IndexEntry
+
+
+table = IndexEntry._meta.db_table
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+        ('contenttypes', '0002_remove_content_type_name'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='IndexEntry',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('object_id', models.TextField()),
+                ('body_search', django.contrib.postgres.search.SearchVectorField()),
+                ('content_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='contenttypes.ContentType')),
+            ],
+            options={
+                'verbose_name_plural': 'index entries',
+                'verbose_name': 'index entry',
+            },
+        ),
+        migrations.AlterUniqueTogether(
+            name='indexentry',
+            unique_together=set([('content_type', 'object_id')]),
+        ),
+        migrations.RunSQL(
+            'CREATE INDEX {0}_body_search ON {0} '
+            'USING GIN(body_search);'.format(table),
+            'DROP INDEX {}_body_search;'.format(table),
+        ),
+    ]
diff --git a/wagtail/contrib/postgres_search/migrations/__init__.py b/wagtail/contrib/postgres_search/migrations/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/wagtail/contrib/postgres_search/models.py b/wagtail/contrib/postgres_search/models.py
new file mode 100644
index 0000000000..8826d91288
--- /dev/null
+++ b/wagtail/contrib/postgres_search/models.py
@@ -0,0 +1,71 @@
+from __future__ import absolute_import, unicode_literals
+
+from django.contrib.contenttypes.fields import GenericForeignKey
+from django.contrib.contenttypes.models import ContentType
+from django.contrib.postgres.search import SearchRank, SearchVectorField
+from django.db.models import (
+    CASCADE, AutoField, BigAutoField, BigIntegerField, F, ForeignKey, IntegerField, Model, QuerySet,
+    TextField)
+from django.db.models.functions import Cast
+from django.utils.encoding import force_text, python_2_unicode_compatible
+from django.utils.translation import ugettext_lazy as _
+
+from .utils import WEIGHTS_VALUES, get_descendants_content_types_pks
+
+
+class IndexQuerySet(QuerySet):
+    def for_models(self, *models):
+        if not models:
+            return self.none()
+        return self.filter(
+            content_type_id__in=get_descendants_content_types_pks(models,
+                                                                  self._db))
+
+    def for_object(self, obj):
+        db_alias = obj._state.db
+        return (self.using(db_alias).for_models(obj._meta.model)
+                .filter(object_id=force_text(obj.pk)))
+
+    def add_rank(self, search_query):
+        return self.annotate(
+            rank=SearchRank(
+                F('body_search'), search_query,
+                weights='{' + ','.join(map(str, WEIGHTS_VALUES)) + '}'))
+
+    def rank(self, search_query):
+        return self.add_rank(search_query).order_by('-rank')
+
+    def pks(self):
+        cast_field = self.model._meta.pk
+        if isinstance(cast_field, BigAutoField):
+            cast_field = BigIntegerField()
+        elif isinstance(cast_field, AutoField):
+            cast_field = IntegerField()
+        return (self.annotate(typed_pk=Cast('object_id', cast_field))
+                .values_list('typed_pk', flat=True))
+
+
+@python_2_unicode_compatible
+class IndexEntry(Model):
+    content_type = ForeignKey(ContentType, on_delete=CASCADE)
+    # We do not use an IntegerField since primary keys are not always integers.
+    object_id = TextField()
+    content_object = GenericForeignKey()
+
+    # TODO: Add per-object boosting.
+    body_search = SearchVectorField()
+
+    objects = IndexQuerySet.as_manager()
+
+    class Meta:
+        unique_together = ('content_type', 'object_id')
+        verbose_name = _('index entry')
+        verbose_name_plural = _('index entries')
+        # TODO: Move here the GIN index from the migration.
+
+    def __str__(self):
+        return '%s: %s' % (self.content_type.name, self.content_object)
+
+    @property
+    def model(self):
+        return self.content_type.model
diff --git a/wagtail/contrib/postgres_search/tests/__init__.py b/wagtail/contrib/postgres_search/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/wagtail/contrib/postgres_search/tests/test_backend.py b/wagtail/contrib/postgres_search/tests/test_backend.py
new file mode 100644
index 0000000000..9ce885f0c5
--- /dev/null
+++ b/wagtail/contrib/postgres_search/tests/test_backend.py
@@ -0,0 +1,42 @@
+from __future__ import absolute_import, unicode_literals
+
+from django.core.management import call_command
+from django.test import TestCase
+from django.utils.six import StringIO
+
+from wagtail.tests.search.models import SearchTest
+from wagtail.wagtailsearch.tests.test_backends import BackendTests
+
+
+class TestPostgresSearchBackend(BackendTests, TestCase):
+    backend_path = 'wagtail.contrib.postgres_search.backend'
+
+    def test_update_index_command(self):
+        self.backend.reset_index()
+
+        results = self.backend.search(None, SearchTest)
+        # We find results anyway because we searched for nothing.
+        self.assertSetEqual(set(results),
+                            {self.testa, self.testb, self.testc.searchtest_ptr,
+                             self.testd.searchtest_ptr})
+
+        # But now, we can't find anything because the index is empty.
+        results = self.backend.search('hello', SearchTest)
+        self.assertSetEqual(set(results), set())
+        results = self.backend.search('world', SearchTest)
+        self.assertSetEqual(set(results), set())
+
+        # Run update_index command
+        with self.ignore_deprecation_warnings():
+            # ignore any DeprecationWarnings thrown by models with old-style
+            # indexed_fields definitions
+            call_command('update_index', backend_name=self.backend_name,
+                         interactive=False, stdout=StringIO())
+
+        # And now we can finally find results.
+        results = self.backend.search('hello', SearchTest)
+        self.assertSetEqual(set(results), {self.testa, self.testb,
+                                           self.testc.searchtest_ptr})
+        results = self.backend.search('world', SearchTest)
+        self.assertSetEqual(set(results), {self.testa,
+                                           self.testd.searchtest_ptr})
diff --git a/wagtail/contrib/postgres_search/utils.py b/wagtail/contrib/postgres_search/utils.py
new file mode 100644
index 0000000000..0d8393c0d4
--- /dev/null
+++ b/wagtail/contrib/postgres_search/utils.py
@@ -0,0 +1,118 @@
+from __future__ import absolute_import, unicode_literals
+
+import operator
+import re
+from functools import partial, reduce
+
+from django.apps import apps
+from django.db import connections
+from django.db.models import Q
+from django.utils.lru_cache import lru_cache
+
+from wagtail.wagtailsearch.index import Indexed, RelatedFields, SearchField
+
+try:
+    # Only use the GPLv2 licensed unidecode if it's installed.
+    from unidecode import unidecode
+except ImportError:
+    def unidecode(value):
+        return value
+
+
+def get_postgresql_connections():
+    return [connection for connection in connections.all()
+            if connection.vendor == 'postgresql']
+
+
+# Reduce any iterable to a single value using a logical OR e.g. (a | b | ...)
+OR = partial(reduce, operator.or_)
+# Reduce any iterable to a single value using a logical AND e.g. (a & b & ...)
+AND = partial(reduce, operator.and_)
+# Reduce any iterable to a single value using an addition
+ADD = partial(reduce, operator.add)
+
+
+def keyword_split(keywords):
+    """
+    Return all the keywords in a keyword string.
+
+    Keeps keywords surrounded by quotes together, removing the surrounding quotes:
+
+    >>> keyword_split('Hello I\\'m looking for "something special"')
+    ['Hello', "I'm", 'looking', 'for', 'something special']
+
+    Nested quoted strings are returned as is:
+
+    >>> keyword_split("He said \\"I'm looking for 'something special'\\" so I've given him the 'special item'")
+    ['He', 'said', "I'm looking for 'something special'", 'so', "I've", 'given', 'him', 'the', 'special item']
+
+    """
+    matches = re.findall(r'"([^"]+)"|\'([^\']+)\'|(\S+)', keywords)
+    return [match[0] or match[1] or match[2] for match in matches]
+
+
+def get_descendant_models(model):
+    """
+    Returns all descendants of a model, including the model itself.
+    """
+    descendant_models = {other_model for other_model in apps.get_models()
+                         if issubclass(other_model, model)}
+    descendant_models.add(model)
+    return descendant_models
+
+
+def get_descendants_content_types_pks(models, db_alias):
+    return get_content_types_pks(
+        tuple(descendant_model for model in models
+              for descendant_model in get_descendant_models(model)), db_alias)
+
+
+@lru_cache()
+def get_content_types_pks(models, db_alias):
+    # We import it locally because this file is loaded before apps are ready.
+    from django.contrib.contenttypes.models import ContentType
+    return list(ContentType._default_manager.using(db_alias)
+                .filter(OR([Q(app_label=model._meta.app_label,
+                              model=model._meta.model_name)
+                            for model in models]))
+                .values_list('pk', flat=True))
+
+
+def get_search_fields(search_fields):
+    for search_field in search_fields:
+        if isinstance(search_field, SearchField):
+            yield search_field
+        elif isinstance(search_field, RelatedFields):
+            for sub_field in get_search_fields(search_field.fields):
+                yield sub_field
+
+
+WEIGHTS = 'ABCD'
+WEIGHTS_COUNT = len(WEIGHTS)
+# These are filled when apps are ready.
+BOOSTS_WEIGHTS = []
+WEIGHTS_VALUES = []
+
+
+def determine_boosts_weights():
+    boosts = set()
+    for model in apps.get_models():
+        if issubclass(model, Indexed):
+            for search_field in get_search_fields(model.get_search_fields()):
+                boost = search_field.boost
+                boosts.add(0 if boost is None else boost)
+    if len(boosts) <= WEIGHTS_COUNT:
+        return zip(reversed(sorted(boosts)), WEIGHTS)
+    min_boost = min(boosts)
+    max_boost = max(boosts)
+    boost_step = (max_boost - min_boost) / WEIGHTS_COUNT
+    return [(min_boost + (i * boost_step), weight)
+            for i, weight in zip(range(WEIGHTS_COUNT), WEIGHTS)]
+
+
+def get_weight(boost):
+    if boost is None:
+        boost = 0
+    for max_boost, weight in BOOSTS_WEIGHTS:
+        if boost >= max_boost:
+            return weight
diff --git a/wagtail/tests/settings.py b/wagtail/tests/settings.py
index 008361d7fa..5302e6a65d 100644
--- a/wagtail/tests/settings.py
+++ b/wagtail/tests/settings.py
@@ -178,6 +178,15 @@ WAGTAILSEARCH_BACKENDS = {
 
 AUTH_USER_MODEL = 'customuser.CustomUser'
 
+if django.VERSION >= (1, 10) and os.environ.get('DATABASE_ENGINE') in (
+        # Remove next line when Django 1.8 support is dropped.
+        'django.db.backends.postgresql_psycopg2',
+        'django.db.backends.postgresql'):
+    INSTALLED_APPS += ('wagtail.contrib.postgres_search',)
+    WAGTAILSEARCH_BACKENDS['postgresql'] = {
+        'BACKEND': 'wagtail.contrib.postgres_search.backend',
+    }
+
 if 'ELASTICSEARCH_URL' in os.environ:
     if os.environ.get('ELASTICSEARCH_VERSION') == '5':
         backend = 'wagtail.wagtailsearch.backends.elasticsearch5'