From eada1e96eef7b4aa42aed910b7020677ef4a5c79 Mon Sep 17 00:00:00 2001 From: Greyson Parrelli Date: Wed, 21 Dec 2022 09:26:30 -0500 Subject: [PATCH] Improve emoji search rankings. --- .../securesms/database/EmojiSearchTable.kt | 73 +++++++++++++++---- .../helpers/SignalDatabaseMigrations.kt | 7 +- .../migration/V169_EmojiSearchIndexRank.kt | 27 +++++++ .../database/model/EmojiSearchData.java | 18 +++++ .../emoji/search/EmojiSearchRepository.kt | 2 +- .../org/signal/core/util/CursorExtensions.kt | 11 +++ 6 files changed, 123 insertions(+), 15 deletions(-) create mode 100644 app/src/main/java/org/thoughtcrime/securesms/database/helpers/migration/V169_EmojiSearchIndexRank.kt diff --git a/app/src/main/java/org/thoughtcrime/securesms/database/EmojiSearchTable.kt b/app/src/main/java/org/thoughtcrime/securesms/database/EmojiSearchTable.kt index fa5f9073f..be2a28bc9 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/database/EmojiSearchTable.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/database/EmojiSearchTable.kt @@ -3,12 +3,13 @@ package org.thoughtcrime.securesms.database import android.content.Context import android.text.TextUtils import androidx.core.content.contentValuesOf +import org.signal.core.util.readToSingleInt +import org.signal.core.util.requireInt import org.signal.core.util.requireNonNullString import org.signal.core.util.select import org.signal.core.util.withinTransaction import org.thoughtcrime.securesms.database.model.EmojiSearchData import kotlin.math.max -import kotlin.math.roundToInt /** * Contains all info necessary for full-text search of emoji tags. @@ -17,9 +18,24 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab companion object { const val TABLE_NAME = "emoji_search" + const val ID = "_id" const val LABEL = "label" const val EMOJI = "emoji" - const val CREATE_TABLE = "CREATE VIRTUAL TABLE $TABLE_NAME USING fts5($LABEL, $EMOJI UNINDEXED)" + const val RANK = "rank" + + //language=sql + const val CREATE_TABLE = """ + CREATE TABLE $TABLE_NAME ( + $ID INTEGER PRIMARY KEY, + $LABEL TEXT NOT NULL, + $EMOJI TEXT NOT NULL, + $RANK INTEGER DEFAULT ${Int.MAX_VALUE} + ) + """ + + val CREATE_INDEXES = arrayOf( + "CREATE INDEX emoji_search_rank_covering ON $TABLE_NAME ($RANK, $LABEL, $EMOJI)" + ) } /** @@ -33,27 +49,41 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab return emptyList() } - val limit: Int = max(originalLimit, 100) + val limit: Int = max(originalLimit, 200) val entries = mutableListOf() + val maxRank = readableDatabase + .select("MAX($RANK) AS max") + .from(TABLE_NAME) + .where("$RANK != ${Int.MAX_VALUE}") + .run() + .readToSingleInt() + readableDatabase - .select(LABEL, EMOJI) + .select(LABEL, EMOJI, RANK) .from(TABLE_NAME) .where("$LABEL LIKE ?", "%$query%") + .orderBy("$RANK ASC") .limit(limit) .run() .use { cursor -> while (cursor.moveToNext()) { entries += Entry( label = cursor.requireNonNullString(LABEL), - emoji = cursor.requireNonNullString(EMOJI) + emoji = cursor.requireNonNullString(EMOJI), + rank = cursor.requireInt(RANK) ) } } return entries .sortedWith { lhs, rhs -> - similarityScore(query, lhs.label) - similarityScore(query, rhs.label) + val result = similarityScore(query, lhs, maxRank) - similarityScore(query, rhs, maxRank) + when { + result < 0 -> -1 + result > 0 -> 1 + else -> 0 + } } .distinctBy { it.emoji } .take(originalLimit) @@ -73,7 +103,8 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab for (label in searchData.tags) { val values = contentValuesOf( LABEL to label, - EMOJI to searchData.emoji + EMOJI to searchData.emoji, + RANK to if (searchData.rank == 0) Int.MAX_VALUE else searchData.rank ) db.insert(TABLE_NAME, null, values) } @@ -89,9 +120,11 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab * We determine similarity by how many letters appear before or after the `searchTerm` in the `match`. * We give letters that come before the term a bigger weight than those that come after as a way to prefer matches that are prefixed by the `searchTerm`. */ - private fun similarityScore(searchTerm: String, match: String): Int { + private fun similarityScore(searchTerm: String, entry: Entry, maxRank: Int): Float { + val match: String = entry.label + if (searchTerm == match) { - return 0 + return entry.scaledRank(maxRank) } val startIndex = match.indexOf(searchTerm) @@ -99,11 +132,25 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab val prefixCount = startIndex val suffixCount = match.length - (startIndex + searchTerm.length) - val prefixRankWeight = 1.5f - val suffixRankWeight = 1f + val prefixRankWeight = 1.75f + val suffixRankWeight = 0.75f + val notExactMatchPenalty = 2f - return ((prefixCount * prefixRankWeight) + (suffixCount * suffixRankWeight)).roundToInt() + return notExactMatchPenalty + + (prefixCount * prefixRankWeight) + + (suffixCount * suffixRankWeight) + + entry.scaledRank(maxRank) } - private data class Entry(val label: String, val emoji: String) + private data class Entry(val label: String, val emoji: String, val rank: Int) { + fun scaledRank(maxRank: Int): Float { + val unranked = 2f + val scaleFactor: Float = unranked / maxRank + return if (rank == Int.MAX_VALUE) { + unranked + } else { + rank * scaleFactor + } + } + } } diff --git a/app/src/main/java/org/thoughtcrime/securesms/database/helpers/SignalDatabaseMigrations.kt b/app/src/main/java/org/thoughtcrime/securesms/database/helpers/SignalDatabaseMigrations.kt index b60a64a69..dc140ea2f 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/database/helpers/SignalDatabaseMigrations.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/database/helpers/SignalDatabaseMigrations.kt @@ -24,6 +24,7 @@ import org.thoughtcrime.securesms.database.helpers.migration.V165_MmsMessageBoxP import org.thoughtcrime.securesms.database.helpers.migration.V166_ThreadAndMessageForeignKeys import org.thoughtcrime.securesms.database.helpers.migration.V167_RecreateReactionTriggers import org.thoughtcrime.securesms.database.helpers.migration.V168_SingleMessageTableMigration +import org.thoughtcrime.securesms.database.helpers.migration.V169_EmojiSearchIndexRank /** * Contains all of the database migrations for [SignalDatabase]. Broken into a separate file for cleanliness. @@ -32,7 +33,7 @@ object SignalDatabaseMigrations { val TAG: String = Log.tag(SignalDatabaseMigrations.javaClass) - const val DATABASE_VERSION = 168 + const val DATABASE_VERSION = 169 @JvmStatic fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) { @@ -115,6 +116,10 @@ object SignalDatabaseMigrations { if (oldVersion < 168) { V168_SingleMessageTableMigration.migrate(context, db, oldVersion, newVersion) } + + if (oldVersion < 169) { + V169_EmojiSearchIndexRank.migrate(context, db, oldVersion, newVersion) + } } @JvmStatic diff --git a/app/src/main/java/org/thoughtcrime/securesms/database/helpers/migration/V169_EmojiSearchIndexRank.kt b/app/src/main/java/org/thoughtcrime/securesms/database/helpers/migration/V169_EmojiSearchIndexRank.kt new file mode 100644 index 000000000..40fafa0ed --- /dev/null +++ b/app/src/main/java/org/thoughtcrime/securesms/database/helpers/migration/V169_EmojiSearchIndexRank.kt @@ -0,0 +1,27 @@ +package org.thoughtcrime.securesms.database.helpers.migration + +import android.app.Application +import net.zetetic.database.sqlcipher.SQLiteDatabase + +/** + * We want to add a new `rank` column to the emoji_search table, and we no longer use it as an FTS + * table, so we can get rid of that too. + */ +object V169_EmojiSearchIndexRank : SignalDatabaseMigration { + override fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) { + db.execSQL( + """ + CREATE TABLE emoji_search_tmp ( + _id INTEGER PRIMARY KEY, + label TEXT NOT NULL, + emoji TEXT NOT NULL, + rank INTEGER DEFAULT ${Int.MAX_VALUE} + ) + """ + ) + db.execSQL("INSERT INTO emoji_search_tmp (label, emoji) SELECT label, emoji from emoji_search") + db.execSQL("DROP TABLE emoji_search") + db.execSQL("ALTER TABLE emoji_search_tmp RENAME TO emoji_search") + db.execSQL("CREATE INDEX emoji_search_rank_covering ON emoji_search (rank, label, emoji)") + } +} diff --git a/app/src/main/java/org/thoughtcrime/securesms/database/model/EmojiSearchData.java b/app/src/main/java/org/thoughtcrime/securesms/database/model/EmojiSearchData.java index ad5cfe94b..59f9b02a7 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/database/model/EmojiSearchData.java +++ b/app/src/main/java/org/thoughtcrime/securesms/database/model/EmojiSearchData.java @@ -1,6 +1,7 @@ package org.thoughtcrime.securesms.database.model; import androidx.annotation.NonNull; +import androidx.annotation.Nullable; import com.fasterxml.jackson.annotation.JsonProperty; @@ -16,6 +17,12 @@ public final class EmojiSearchData { @JsonProperty private List tags; + @JsonProperty + private String shortName; + + @JsonProperty + private int rank; + public EmojiSearchData() {} public @NonNull String getEmoji() { @@ -25,4 +32,15 @@ public final class EmojiSearchData { public @NonNull List getTags() { return tags; } + + public @Nullable String getShortName() { + return shortName; + } + + /** + * A value representing how popular an emoji is, with 1 being the best rank. A value of 0 means this emoji has no rank at all. + */ + public int getRank() { + return rank; + } } diff --git a/app/src/main/java/org/thoughtcrime/securesms/keyboard/emoji/search/EmojiSearchRepository.kt b/app/src/main/java/org/thoughtcrime/securesms/keyboard/emoji/search/EmojiSearchRepository.kt index de8eb00d3..7716cf0a6 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/keyboard/emoji/search/EmojiSearchRepository.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/keyboard/emoji/search/EmojiSearchRepository.kt @@ -16,7 +16,7 @@ import java.util.function.Consumer private const val MINIMUM_QUERY_THRESHOLD = 1 private const val MINIMUM_INLINE_QUERY_THRESHOLD = 2 -private const val EMOJI_SEARCH_LIMIT = 20 +private const val EMOJI_SEARCH_LIMIT = 50 private val NOT_PUNCTUATION = "[^\\p{Punct}]".toRegex() diff --git a/core-util/src/main/java/org/signal/core/util/CursorExtensions.kt b/core-util/src/main/java/org/signal/core/util/CursorExtensions.kt index c2cf2a25e..97bf03356 100644 --- a/core-util/src/main/java/org/signal/core/util/CursorExtensions.kt +++ b/core-util/src/main/java/org/signal/core/util/CursorExtensions.kt @@ -78,6 +78,17 @@ fun Cursor.readToSingleLong(defaultValue: Long = 0): Long { } } +@JvmOverloads +fun Cursor.readToSingleInt(defaultValue: Int = 0): Int { + return use { + if (it.moveToFirst()) { + it.getInt(0) + } else { + defaultValue + } + } +} + @JvmOverloads inline fun Cursor.readToList(predicate: (T) -> Boolean = { true }, mapper: (Cursor) -> T): List { val list = mutableListOf()