From b3d8d6a4dac979cd2eef52488264d7e2da21b40d Mon Sep 17 00:00:00 2001 From: Eliot Berriot Date: Mon, 16 Dec 2019 17:46:32 +0100 Subject: [PATCH] Fix #994: use PostgreSQL full-text-search --- api/config/settings/common.py | 1 + api/funkwhale_api/common/search.py | 12 ++ .../music/migrations/0044_full_text_search.py | 109 ++++++++++++++++++ api/funkwhale_api/music/models.py | 10 +- api/funkwhale_api/music/utils.py | 6 +- api/funkwhale_api/music/views.py | 15 ++- 6 files changed, 148 insertions(+), 5 deletions(-) create mode 100644 api/funkwhale_api/music/migrations/0044_full_text_search.py diff --git a/api/config/settings/common.py b/api/config/settings/common.py index 2fb7b496c..307f09329 100644 --- a/api/config/settings/common.py +++ b/api/config/settings/common.py @@ -928,3 +928,4 @@ MODERATION_EMAIL_NOTIFICATIONS_ENABLED = env.bool( # Delay in days after signup before we show the "support us" messages INSTANCE_SUPPORT_MESSAGE_DELAY = env.int("INSTANCE_SUPPORT_MESSAGE_DELAY", default=15) FUNKWHALE_SUPPORT_MESSAGE_DELAY = env.int("FUNKWHALE_SUPPORT_MESSAGE_DELAY", default=15) +USE_FULL_TEXT_SEARCH = env.bool("USE_FULL_TEXT_SEARCH", default=False) diff --git a/api/funkwhale_api/common/search.py b/api/funkwhale_api/common/search.py index 4e42fd346..b43342594 100644 --- a/api/funkwhale_api/common/search.py +++ b/api/funkwhale_api/common/search.py @@ -1,5 +1,6 @@ import re +from django.contrib.postgres.search import SearchQuery from django.db.models import Q @@ -56,6 +57,17 @@ def get_query(query_string, search_fields): return query +def get_fts_query(query_string): + if not query_string.startswith('"') and not query_string.endswith('"'): + parts = query_string.split(" ") + parts = ["{}:*".format(p) for p in parts if p] + if not parts: + return Q(pk=None) + + query_string = "&".join(parts) + return Q(body_text=SearchQuery(query_string, search_type="raw")) + + def filter_tokens(tokens, valid): return [t for t in tokens if t["key"] in valid] diff --git a/api/funkwhale_api/music/migrations/0044_full_text_search.py b/api/funkwhale_api/music/migrations/0044_full_text_search.py new file mode 100644 index 000000000..e44df90d9 --- /dev/null +++ b/api/funkwhale_api/music/migrations/0044_full_text_search.py @@ -0,0 +1,109 @@ +# Generated by Django 2.2.7 on 2019-12-16 15:06 + +import django.contrib.postgres.search +import django.contrib.postgres.indexes +from django.db import migrations, models +import django.db.models.deletion +from django.db import connection + +FIELDS = { + "music.Artist": { + "fields": [ + 'name', + ], + "trigger_name": "music_artist_update_body_text" + }, + "music.Track": { + "fields": ['title', 'copyright'], + "trigger_name": "music_track_update_body_text" + }, + "music.Album": { + "fields": ['title'], + "trigger_name": "music_album_update_body_text" + }, +} + +def populate_body_text(apps, schema_editor): + for label, search_config in FIELDS.items(): + model = apps.get_model(*label.split('.')) + print('Populating search index for {}…'.format(model.__name__)) + vector = django.contrib.postgres.search.SearchVector(*search_config['fields']) + model.objects.update(body_text=vector) + +def rewind(apps, schema_editor): + pass + +def setup_triggers(apps, schema_editor): + cursor = connection.cursor() + for label, search_config in FIELDS.items(): + model = apps.get_model(*label.split('.')) + table = model._meta.db_table + print('Creating database trigger {} on {}…'.format(search_config['trigger_name'], table)) + sql = """ + CREATE TRIGGER {trigger_name} + BEFORE INSERT OR UPDATE + ON {table} + FOR EACH ROW + EXECUTE PROCEDURE + tsvector_update_trigger(body_text, 'pg_catalog.english', {fields}) + """.format( + trigger_name=search_config['trigger_name'], + table=table, + fields=', '.join(search_config['fields']), + ) + print(sql) + cursor.execute(sql) + +def rewind_triggers(apps, schema_editor): + cursor = connection.cursor() + for label, search_config in FIELDS.items(): + model = apps.get_model(*label.split('.')) + table = model._meta.db_table + print('Dropping database trigger {} on {}…'.format(search_config['trigger_name'], table)) + sql = """ + DROP TRIGGER IF EXISTS {trigger_name} ON {table} + """.format( + trigger_name=search_config['trigger_name'], + table=table, + ) + + cursor.execute(sql) + +class Migration(migrations.Migration): + + dependencies = [ + ('music', '0043_album_cover_attachment'), + ] + + operations = [ + migrations.AddField( + model_name='album', + name='body_text', + field=django.contrib.postgres.search.SearchVectorField(blank=True), + ), + migrations.AddField( + model_name='artist', + name='body_text', + field=django.contrib.postgres.search.SearchVectorField(blank=True), + ), + migrations.AddField( + model_name='track', + name='body_text', + field=django.contrib.postgres.search.SearchVectorField(blank=True), + ), + migrations.AddIndex( + model_name='album', + index=django.contrib.postgres.indexes.GinIndex(fields=['body_text'], name='music_album_body_te_0ec97a_gin'), + ), + migrations.AddIndex( + model_name='artist', + index=django.contrib.postgres.indexes.GinIndex(fields=['body_text'], name='music_artis_body_te_5c408d_gin'), + ), + migrations.AddIndex( + model_name='track', + index=django.contrib.postgres.indexes.GinIndex(fields=['body_text'], name='music_track_body_te_da0a66_gin'), + ), + + migrations.RunPython(setup_triggers, rewind_triggers), + migrations.RunPython(populate_body_text, rewind), + ] diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index f53e8e463..3f8c0ffdf 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -11,6 +11,8 @@ import pydub from django.conf import settings from django.contrib.contenttypes.fields import GenericRelation from django.contrib.postgres.fields import JSONField +from django.contrib.postgres.search import SearchVectorField +from django.contrib.postgres.indexes import GinIndex from django.core.exceptions import ObjectDoesNotExist from django.core.files.base import ContentFile from django.core.serializers.json import DjangoJSONEncoder @@ -19,7 +21,6 @@ from django.db.models.signals import post_save, pre_save from django.dispatch import receiver from django.urls import reverse from django.utils import timezone - from versatileimagefield.fields import VersatileImageField from funkwhale_api import musicbrainz @@ -56,10 +57,14 @@ class APIModelMixin(models.Model): api_includes = [] creation_date = models.DateTimeField(default=timezone.now, db_index=True) import_hooks = [] + body_text = SearchVectorField(blank=True) class Meta: abstract = True ordering = ["-creation_date"] + indexes = [ + GinIndex(fields=["body_text"]), + ] @classmethod def get_or_create_from_api(cls, mbid): @@ -524,6 +529,9 @@ class Track(APIModelMixin): class Meta: ordering = ["album", "disc_number", "position"] + indexes = [ + GinIndex(fields=["body_text"]), + ] def __str__(self): return self.title diff --git a/api/funkwhale_api/music/utils.py b/api/funkwhale_api/music/utils.py index 09c8cbd12..b728549d7 100644 --- a/api/funkwhale_api/music/utils.py +++ b/api/funkwhale_api/music/utils.py @@ -4,7 +4,11 @@ import magic import mutagen import pydub -from funkwhale_api.common.search import normalize_query, get_query # noqa +from funkwhale_api.common.search import ( + normalize_query, + get_query, + get_fts_query, +) # noqa def guess_mimetype(f): diff --git a/api/funkwhale_api/music/views.py b/api/funkwhale_api/music/views.py index 57d009e99..3b692cb4f 100644 --- a/api/funkwhale_api/music/views.py +++ b/api/funkwhale_api/music/views.py @@ -629,7 +629,10 @@ class Search(views.APIView): "album__title__unaccent", "artist__name__unaccent", ] - query_obj = utils.get_query(query, search_fields) + if settings.USE_FULL_TEXT_SEARCH: + query_obj = utils.get_fts_query(query) + else: + query_obj = utils.get_query(query, search_fields) qs = ( models.Track.objects.all() .filter(query_obj) @@ -639,7 +642,10 @@ class Search(views.APIView): def get_albums(self, query): search_fields = ["mbid", "title__unaccent", "artist__name__unaccent"] - query_obj = utils.get_query(query, search_fields) + if settings.USE_FULL_TEXT_SEARCH: + query_obj = utils.get_fts_query(query) + else: + query_obj = utils.get_query(query, search_fields) qs = ( models.Album.objects.all() .filter(query_obj) @@ -649,7 +655,10 @@ class Search(views.APIView): def get_artists(self, query): search_fields = ["mbid", "name__unaccent"] - query_obj = utils.get_query(query, search_fields) + if settings.USE_FULL_TEXT_SEARCH: + query_obj = utils.get_fts_query(query) + else: + query_obj = utils.get_query(query, search_fields) qs = models.Artist.objects.all().filter(query_obj).with_albums() return common_utils.order_for_search(qs, "name")[: self.max_results]