From 121212c6d073679c9b2c1461940ca13c231b19da Mon Sep 17 00:00:00 2001 From: ACMCMC <20495460+ACMCMC@users.noreply.github.com> Date: Fri, 8 Oct 2021 10:36:03 +0200 Subject: [PATCH] Separated `body` and `title` `FULLTEXT` indexes --- .../search/backends/database/mysql/mysql.py | 2 +- .../search/backends/database/mysql/query.py | 2 +- .../migrations/0006_customise_indexentry.py | 29 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/wagtail/search/backends/database/mysql/mysql.py b/wagtail/search/backends/database/mysql/mysql.py index a8f7d4f7c5..5761aaef47 100644 --- a/wagtail/search/backends/database/mysql/mysql.py +++ b/wagtail/search/backends/database/mysql/mysql.py @@ -407,7 +407,7 @@ class MySQLSearchQueryCompiler(BaseSearchQueryCompiler): negated = False search_query = self.build_search_query(query) - match_expression = MatchExpression(search_query, output_field=BooleanField()) # For example: MATCH (`title`, `body`) AGAINST ('+query' IN BOOLEAN MODE) + match_expression = MatchExpression(search_query, columns=['title', 'body'], output_field=BooleanField()) # For example: MATCH (`title`, `body`) AGAINST ('+query' IN BOOLEAN MODE) score_expression = ( MatchExpression(search_query, columns=['title'], output_field=FloatField()) * F('title_norm') + MatchExpression(search_query, columns=['body'], output_field=FloatField()) diff --git a/wagtail/search/backends/database/mysql/query.py b/wagtail/search/backends/database/mysql/query.py index d012bed1ba..888ca8368c 100644 --- a/wagtail/search/backends/database/mysql/query.py +++ b/wagtail/search/backends/database/mysql/query.py @@ -198,7 +198,7 @@ class MatchExpression(Expression): def __init__(self, query: SearchQueryCombinable, columns: List[str] = None, output_field: Field = BooleanField()) -> None: super().__init__(output_field=output_field) self.query = query - self.columns = columns or ['title', 'body'] + self.columns = columns or ['title', 'body'] # We need to provide a default list of columns if the user doesn't specify one. We have a joint index for for 'title' and 'body' (see wagtail.search.migrations.0006_customise_indexentry), so we'll pick that one. def as_sql(self, compiler, connection): compiled_query = compiler.compile(self.query) # Compile the query to a string diff --git a/wagtail/search/migrations/0006_customise_indexentry.py b/wagtail/search/migrations/0006_customise_indexentry.py index f986a9c569..d87f641e94 100644 --- a/wagtail/search/migrations/0006_customise_indexentry.py +++ b/wagtail/search/migrations/0006_customise_indexentry.py @@ -107,6 +107,35 @@ class Migration(migrations.Migration): # that forces to specify the max length of the TextFields that get referenced by the # FULLTEXT index. If we do it manually, it works, because Django can't check that we are # defining a new index. + operations.append( + migrations.RunSQL( + sql=""" + ALTER TABLE wagtailsearch_indexentry + ADD FULLTEXT INDEX `fulltext_body` (`body`) + """, + reverse_sql=""" + ALTER TABLE wagtailsearch_indexentry + DROP INDEX `fulltext_body` + """ + ) + ) + + # We create two separate FULLTEXT indexes for the 'body' and 'title' columns, so that we are able to handle them separately afterwards. + # We handle them separately, for example, when we do scoring: there, we multiply the 'title' score by the value of the 'title_norm' column. This can't be done if we index 'title' and 'body' in the same index, because MySQL doesn't allow to search on subparts of a defined index (we need to search all the columns of the index at the same time). + operations.append( + migrations.RunSQL( + sql=""" + ALTER TABLE wagtailsearch_indexentry + ADD FULLTEXT INDEX `fulltext_title` (`title`) + """, + reverse_sql=""" + ALTER TABLE wagtailsearch_indexentry + DROP INDEX `fulltext_title` + """ + ) + ) + + # We also need to create a joint index on 'title' and 'body', to be able to query both at the same time. If we don't have this, some queries may return wrong results. For example, if we match 'A AND (NOT B)' against 'A, B', it returns false, but if we do (match 'A AND (NOT B)' against 'A') or (match 'A AND (NOT B)' against 'B'), the first one would return True, and the whole expression would be True (wrong result). That's the same as saying that testing subsets does not neccessarily produce the same result as testing the whole set. operations.append( migrations.RunSQL( sql="""