Separated `body` and `title` `FULLTEXT` indexes

2021-10-08 10:36:03 +02:00 · 2021-10-08 10:36:03 +02:00 · 121212c6d0
commit 121212c6d0
--- a/wagtail/search/backends/database/mysql/mysql.py
+++ b/wagtail/search/backends/database/mysql/mysql.py
@ -407,7 +407,7 @@ class MySQLSearchQueryCompiler(BaseSearchQueryCompiler):
            negated = False

        search_query = self.build_search_query(query)
-        match_expression = MatchExpression(search_query, output_field=BooleanField())  # For example: MATCH (`title`, `body`) AGAINST ('+query' IN BOOLEAN MODE)
+        match_expression = MatchExpression(search_query, columns=['title', 'body'], output_field=BooleanField())  # For example: MATCH (`title`, `body`) AGAINST ('+query' IN BOOLEAN MODE)
        score_expression = (
            MatchExpression(search_query, columns=['title'], output_field=FloatField()) * F('title_norm')
            + MatchExpression(search_query, columns=['body'], output_field=FloatField())
--- a/wagtail/search/backends/database/mysql/query.py
+++ b/wagtail/search/backends/database/mysql/query.py
@ -198,7 +198,7 @@ class MatchExpression(Expression):
    def __init__(self, query: SearchQueryCombinable, columns: List[str] = None, output_field: Field = BooleanField()) -> None:
        super().__init__(output_field=output_field)
        self.query = query
-        self.columns = columns or ['title', 'body']
+        self.columns = columns or ['title', 'body']  # We need to provide a default list of columns if the user doesn't specify one. We have a joint index for for 'title' and 'body' (see wagtail.search.migrations.0006_customise_indexentry), so we'll pick that one.

    def as_sql(self, compiler, connection):
        compiled_query = compiler.compile(self.query)  # Compile the query to a string
--- a/wagtail/search/migrations/0006_customise_indexentry.py
+++ b/wagtail/search/migrations/0006_customise_indexentry.py
@ -107,6 +107,35 @@ class Migration(migrations.Migration):
        # that forces to specify the max length of the TextFields that get referenced by the
        # FULLTEXT index. If we do it manually, it works, because Django can't check that we are
        # defining a new index.
+        operations.append(
+            migrations.RunSQL(
+                sql="""
+                ALTER TABLE wagtailsearch_indexentry
+                    ADD FULLTEXT INDEX `fulltext_body` (`body`)
+                """,
+                reverse_sql="""
+                ALTER TABLE wagtailsearch_indexentry
+                    DROP INDEX `fulltext_body`
+                """
+            )
+        )
+
+        # We create two separate FULLTEXT indexes for the 'body' and 'title' columns, so that we are able to handle them separately afterwards.
+        # We handle them separately, for example, when we do scoring: there, we multiply the 'title' score by the value of the 'title_norm' column. This can't be done if we index 'title' and 'body' in the same index, because MySQL doesn't allow to search on subparts of a defined index (we need to search all the columns of the index at the same time).
+        operations.append(
+            migrations.RunSQL(
+                sql="""
+                ALTER TABLE wagtailsearch_indexentry
+                    ADD FULLTEXT INDEX `fulltext_title` (`title`)
+                """,
+                reverse_sql="""
+                ALTER TABLE wagtailsearch_indexentry
+                    DROP INDEX `fulltext_title`
+                """
+            )
+        )
+
+        # We also need to create a joint index on 'title' and 'body', to be able to query both at the same time. If we don't have this, some queries may return wrong results. For example, if we match 'A AND (NOT B)' against 'A, B', it returns false, but if we do (match 'A AND (NOT B)' against 'A') or (match 'A AND (NOT B)' against 'B'), the first one would return True, and the whole expression would be True (wrong result). That's the same as saying that testing subsets does not neccessarily produce the same result as testing the whole set.
        operations.append(
            migrations.RunSQL(
                sql="""