Improve emoji search rankings.

main
Greyson Parrelli 2022-12-21 09:26:30 -05:00
rodzic 91fbc236ce
commit eada1e96ee
6 zmienionych plików z 123 dodań i 15 usunięć

Wyświetl plik

@ -3,12 +3,13 @@ package org.thoughtcrime.securesms.database
import android.content.Context import android.content.Context
import android.text.TextUtils import android.text.TextUtils
import androidx.core.content.contentValuesOf import androidx.core.content.contentValuesOf
import org.signal.core.util.readToSingleInt
import org.signal.core.util.requireInt
import org.signal.core.util.requireNonNullString import org.signal.core.util.requireNonNullString
import org.signal.core.util.select import org.signal.core.util.select
import org.signal.core.util.withinTransaction import org.signal.core.util.withinTransaction
import org.thoughtcrime.securesms.database.model.EmojiSearchData import org.thoughtcrime.securesms.database.model.EmojiSearchData
import kotlin.math.max import kotlin.math.max
import kotlin.math.roundToInt
/** /**
* Contains all info necessary for full-text search of emoji tags. * Contains all info necessary for full-text search of emoji tags.
@ -17,9 +18,24 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
companion object { companion object {
const val TABLE_NAME = "emoji_search" const val TABLE_NAME = "emoji_search"
const val ID = "_id"
const val LABEL = "label" const val LABEL = "label"
const val EMOJI = "emoji" const val EMOJI = "emoji"
const val CREATE_TABLE = "CREATE VIRTUAL TABLE $TABLE_NAME USING fts5($LABEL, $EMOJI UNINDEXED)" const val RANK = "rank"
//language=sql
const val CREATE_TABLE = """
CREATE TABLE $TABLE_NAME (
$ID INTEGER PRIMARY KEY,
$LABEL TEXT NOT NULL,
$EMOJI TEXT NOT NULL,
$RANK INTEGER DEFAULT ${Int.MAX_VALUE}
)
"""
val CREATE_INDEXES = arrayOf(
"CREATE INDEX emoji_search_rank_covering ON $TABLE_NAME ($RANK, $LABEL, $EMOJI)"
)
} }
/** /**
@ -33,27 +49,41 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
return emptyList() return emptyList()
} }
val limit: Int = max(originalLimit, 100) val limit: Int = max(originalLimit, 200)
val entries = mutableListOf<Entry>() val entries = mutableListOf<Entry>()
val maxRank = readableDatabase
.select("MAX($RANK) AS max")
.from(TABLE_NAME)
.where("$RANK != ${Int.MAX_VALUE}")
.run()
.readToSingleInt()
readableDatabase readableDatabase
.select(LABEL, EMOJI) .select(LABEL, EMOJI, RANK)
.from(TABLE_NAME) .from(TABLE_NAME)
.where("$LABEL LIKE ?", "%$query%") .where("$LABEL LIKE ?", "%$query%")
.orderBy("$RANK ASC")
.limit(limit) .limit(limit)
.run() .run()
.use { cursor -> .use { cursor ->
while (cursor.moveToNext()) { while (cursor.moveToNext()) {
entries += Entry( entries += Entry(
label = cursor.requireNonNullString(LABEL), label = cursor.requireNonNullString(LABEL),
emoji = cursor.requireNonNullString(EMOJI) emoji = cursor.requireNonNullString(EMOJI),
rank = cursor.requireInt(RANK)
) )
} }
} }
return entries return entries
.sortedWith { lhs, rhs -> .sortedWith { lhs, rhs ->
similarityScore(query, lhs.label) - similarityScore(query, rhs.label) val result = similarityScore(query, lhs, maxRank) - similarityScore(query, rhs, maxRank)
when {
result < 0 -> -1
result > 0 -> 1
else -> 0
}
} }
.distinctBy { it.emoji } .distinctBy { it.emoji }
.take(originalLimit) .take(originalLimit)
@ -73,7 +103,8 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
for (label in searchData.tags) { for (label in searchData.tags) {
val values = contentValuesOf( val values = contentValuesOf(
LABEL to label, LABEL to label,
EMOJI to searchData.emoji EMOJI to searchData.emoji,
RANK to if (searchData.rank == 0) Int.MAX_VALUE else searchData.rank
) )
db.insert(TABLE_NAME, null, values) db.insert(TABLE_NAME, null, values)
} }
@ -89,9 +120,11 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
* We determine similarity by how many letters appear before or after the `searchTerm` in the `match`. * We determine similarity by how many letters appear before or after the `searchTerm` in the `match`.
* We give letters that come before the term a bigger weight than those that come after as a way to prefer matches that are prefixed by the `searchTerm`. * We give letters that come before the term a bigger weight than those that come after as a way to prefer matches that are prefixed by the `searchTerm`.
*/ */
private fun similarityScore(searchTerm: String, match: String): Int { private fun similarityScore(searchTerm: String, entry: Entry, maxRank: Int): Float {
val match: String = entry.label
if (searchTerm == match) { if (searchTerm == match) {
return 0 return entry.scaledRank(maxRank)
} }
val startIndex = match.indexOf(searchTerm) val startIndex = match.indexOf(searchTerm)
@ -99,11 +132,25 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
val prefixCount = startIndex val prefixCount = startIndex
val suffixCount = match.length - (startIndex + searchTerm.length) val suffixCount = match.length - (startIndex + searchTerm.length)
val prefixRankWeight = 1.5f val prefixRankWeight = 1.75f
val suffixRankWeight = 1f val suffixRankWeight = 0.75f
val notExactMatchPenalty = 2f
return ((prefixCount * prefixRankWeight) + (suffixCount * suffixRankWeight)).roundToInt() return notExactMatchPenalty +
(prefixCount * prefixRankWeight) +
(suffixCount * suffixRankWeight) +
entry.scaledRank(maxRank)
} }
private data class Entry(val label: String, val emoji: String) private data class Entry(val label: String, val emoji: String, val rank: Int) {
fun scaledRank(maxRank: Int): Float {
val unranked = 2f
val scaleFactor: Float = unranked / maxRank
return if (rank == Int.MAX_VALUE) {
unranked
} else {
rank * scaleFactor
}
}
}
} }

Wyświetl plik

@ -24,6 +24,7 @@ import org.thoughtcrime.securesms.database.helpers.migration.V165_MmsMessageBoxP
import org.thoughtcrime.securesms.database.helpers.migration.V166_ThreadAndMessageForeignKeys import org.thoughtcrime.securesms.database.helpers.migration.V166_ThreadAndMessageForeignKeys
import org.thoughtcrime.securesms.database.helpers.migration.V167_RecreateReactionTriggers import org.thoughtcrime.securesms.database.helpers.migration.V167_RecreateReactionTriggers
import org.thoughtcrime.securesms.database.helpers.migration.V168_SingleMessageTableMigration import org.thoughtcrime.securesms.database.helpers.migration.V168_SingleMessageTableMigration
import org.thoughtcrime.securesms.database.helpers.migration.V169_EmojiSearchIndexRank
/** /**
* Contains all of the database migrations for [SignalDatabase]. Broken into a separate file for cleanliness. * Contains all of the database migrations for [SignalDatabase]. Broken into a separate file for cleanliness.
@ -32,7 +33,7 @@ object SignalDatabaseMigrations {
val TAG: String = Log.tag(SignalDatabaseMigrations.javaClass) val TAG: String = Log.tag(SignalDatabaseMigrations.javaClass)
const val DATABASE_VERSION = 168 const val DATABASE_VERSION = 169
@JvmStatic @JvmStatic
fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) { fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
@ -115,6 +116,10 @@ object SignalDatabaseMigrations {
if (oldVersion < 168) { if (oldVersion < 168) {
V168_SingleMessageTableMigration.migrate(context, db, oldVersion, newVersion) V168_SingleMessageTableMigration.migrate(context, db, oldVersion, newVersion)
} }
if (oldVersion < 169) {
V169_EmojiSearchIndexRank.migrate(context, db, oldVersion, newVersion)
}
} }
@JvmStatic @JvmStatic

Wyświetl plik

@ -0,0 +1,27 @@
package org.thoughtcrime.securesms.database.helpers.migration
import android.app.Application
import net.zetetic.database.sqlcipher.SQLiteDatabase
/**
* We want to add a new `rank` column to the emoji_search table, and we no longer use it as an FTS
* table, so we can get rid of that too.
*/
object V169_EmojiSearchIndexRank : SignalDatabaseMigration {
override fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
db.execSQL(
"""
CREATE TABLE emoji_search_tmp (
_id INTEGER PRIMARY KEY,
label TEXT NOT NULL,
emoji TEXT NOT NULL,
rank INTEGER DEFAULT ${Int.MAX_VALUE}
)
"""
)
db.execSQL("INSERT INTO emoji_search_tmp (label, emoji) SELECT label, emoji from emoji_search")
db.execSQL("DROP TABLE emoji_search")
db.execSQL("ALTER TABLE emoji_search_tmp RENAME TO emoji_search")
db.execSQL("CREATE INDEX emoji_search_rank_covering ON emoji_search (rank, label, emoji)")
}
}

Wyświetl plik

@ -1,6 +1,7 @@
package org.thoughtcrime.securesms.database.model; package org.thoughtcrime.securesms.database.model;
import androidx.annotation.NonNull; import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
@ -16,6 +17,12 @@ public final class EmojiSearchData {
@JsonProperty @JsonProperty
private List<String> tags; private List<String> tags;
@JsonProperty
private String shortName;
@JsonProperty
private int rank;
public EmojiSearchData() {} public EmojiSearchData() {}
public @NonNull String getEmoji() { public @NonNull String getEmoji() {
@ -25,4 +32,15 @@ public final class EmojiSearchData {
public @NonNull List<String> getTags() { public @NonNull List<String> getTags() {
return tags; return tags;
} }
public @Nullable String getShortName() {
return shortName;
}
/**
* A value representing how popular an emoji is, with 1 being the best rank. A value of 0 means this emoji has no rank at all.
*/
public int getRank() {
return rank;
}
} }

Wyświetl plik

@ -16,7 +16,7 @@ import java.util.function.Consumer
private const val MINIMUM_QUERY_THRESHOLD = 1 private const val MINIMUM_QUERY_THRESHOLD = 1
private const val MINIMUM_INLINE_QUERY_THRESHOLD = 2 private const val MINIMUM_INLINE_QUERY_THRESHOLD = 2
private const val EMOJI_SEARCH_LIMIT = 20 private const val EMOJI_SEARCH_LIMIT = 50
private val NOT_PUNCTUATION = "[^\\p{Punct}]".toRegex() private val NOT_PUNCTUATION = "[^\\p{Punct}]".toRegex()

Wyświetl plik

@ -78,6 +78,17 @@ fun Cursor.readToSingleLong(defaultValue: Long = 0): Long {
} }
} }
@JvmOverloads
fun Cursor.readToSingleInt(defaultValue: Int = 0): Int {
return use {
if (it.moveToFirst()) {
it.getInt(0)
} else {
defaultValue
}
}
}
@JvmOverloads @JvmOverloads
inline fun <T> Cursor.readToList(predicate: (T) -> Boolean = { true }, mapper: (Cursor) -> T): List<T> { inline fun <T> Cursor.readToList(predicate: (T) -> Boolean = { true }, mapper: (Cursor) -> T): List<T> {
val list = mutableListOf<T>() val list = mutableListOf<T>()