kopia lustrzana https://github.com/ryukoposting/Signal-Android
Improve emoji search rankings.
rodzic
91fbc236ce
commit
eada1e96ee
|
@ -3,12 +3,13 @@ package org.thoughtcrime.securesms.database
|
||||||
import android.content.Context
|
import android.content.Context
|
||||||
import android.text.TextUtils
|
import android.text.TextUtils
|
||||||
import androidx.core.content.contentValuesOf
|
import androidx.core.content.contentValuesOf
|
||||||
|
import org.signal.core.util.readToSingleInt
|
||||||
|
import org.signal.core.util.requireInt
|
||||||
import org.signal.core.util.requireNonNullString
|
import org.signal.core.util.requireNonNullString
|
||||||
import org.signal.core.util.select
|
import org.signal.core.util.select
|
||||||
import org.signal.core.util.withinTransaction
|
import org.signal.core.util.withinTransaction
|
||||||
import org.thoughtcrime.securesms.database.model.EmojiSearchData
|
import org.thoughtcrime.securesms.database.model.EmojiSearchData
|
||||||
import kotlin.math.max
|
import kotlin.math.max
|
||||||
import kotlin.math.roundToInt
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains all info necessary for full-text search of emoji tags.
|
* Contains all info necessary for full-text search of emoji tags.
|
||||||
|
@ -17,9 +18,24 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
const val TABLE_NAME = "emoji_search"
|
const val TABLE_NAME = "emoji_search"
|
||||||
|
const val ID = "_id"
|
||||||
const val LABEL = "label"
|
const val LABEL = "label"
|
||||||
const val EMOJI = "emoji"
|
const val EMOJI = "emoji"
|
||||||
const val CREATE_TABLE = "CREATE VIRTUAL TABLE $TABLE_NAME USING fts5($LABEL, $EMOJI UNINDEXED)"
|
const val RANK = "rank"
|
||||||
|
|
||||||
|
//language=sql
|
||||||
|
const val CREATE_TABLE = """
|
||||||
|
CREATE TABLE $TABLE_NAME (
|
||||||
|
$ID INTEGER PRIMARY KEY,
|
||||||
|
$LABEL TEXT NOT NULL,
|
||||||
|
$EMOJI TEXT NOT NULL,
|
||||||
|
$RANK INTEGER DEFAULT ${Int.MAX_VALUE}
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
val CREATE_INDEXES = arrayOf(
|
||||||
|
"CREATE INDEX emoji_search_rank_covering ON $TABLE_NAME ($RANK, $LABEL, $EMOJI)"
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -33,27 +49,41 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
||||||
return emptyList()
|
return emptyList()
|
||||||
}
|
}
|
||||||
|
|
||||||
val limit: Int = max(originalLimit, 100)
|
val limit: Int = max(originalLimit, 200)
|
||||||
val entries = mutableListOf<Entry>()
|
val entries = mutableListOf<Entry>()
|
||||||
|
|
||||||
|
val maxRank = readableDatabase
|
||||||
|
.select("MAX($RANK) AS max")
|
||||||
|
.from(TABLE_NAME)
|
||||||
|
.where("$RANK != ${Int.MAX_VALUE}")
|
||||||
|
.run()
|
||||||
|
.readToSingleInt()
|
||||||
|
|
||||||
readableDatabase
|
readableDatabase
|
||||||
.select(LABEL, EMOJI)
|
.select(LABEL, EMOJI, RANK)
|
||||||
.from(TABLE_NAME)
|
.from(TABLE_NAME)
|
||||||
.where("$LABEL LIKE ?", "%$query%")
|
.where("$LABEL LIKE ?", "%$query%")
|
||||||
|
.orderBy("$RANK ASC")
|
||||||
.limit(limit)
|
.limit(limit)
|
||||||
.run()
|
.run()
|
||||||
.use { cursor ->
|
.use { cursor ->
|
||||||
while (cursor.moveToNext()) {
|
while (cursor.moveToNext()) {
|
||||||
entries += Entry(
|
entries += Entry(
|
||||||
label = cursor.requireNonNullString(LABEL),
|
label = cursor.requireNonNullString(LABEL),
|
||||||
emoji = cursor.requireNonNullString(EMOJI)
|
emoji = cursor.requireNonNullString(EMOJI),
|
||||||
|
rank = cursor.requireInt(RANK)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return entries
|
return entries
|
||||||
.sortedWith { lhs, rhs ->
|
.sortedWith { lhs, rhs ->
|
||||||
similarityScore(query, lhs.label) - similarityScore(query, rhs.label)
|
val result = similarityScore(query, lhs, maxRank) - similarityScore(query, rhs, maxRank)
|
||||||
|
when {
|
||||||
|
result < 0 -> -1
|
||||||
|
result > 0 -> 1
|
||||||
|
else -> 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
.distinctBy { it.emoji }
|
.distinctBy { it.emoji }
|
||||||
.take(originalLimit)
|
.take(originalLimit)
|
||||||
|
@ -73,7 +103,8 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
||||||
for (label in searchData.tags) {
|
for (label in searchData.tags) {
|
||||||
val values = contentValuesOf(
|
val values = contentValuesOf(
|
||||||
LABEL to label,
|
LABEL to label,
|
||||||
EMOJI to searchData.emoji
|
EMOJI to searchData.emoji,
|
||||||
|
RANK to if (searchData.rank == 0) Int.MAX_VALUE else searchData.rank
|
||||||
)
|
)
|
||||||
db.insert(TABLE_NAME, null, values)
|
db.insert(TABLE_NAME, null, values)
|
||||||
}
|
}
|
||||||
|
@ -89,9 +120,11 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
||||||
* We determine similarity by how many letters appear before or after the `searchTerm` in the `match`.
|
* We determine similarity by how many letters appear before or after the `searchTerm` in the `match`.
|
||||||
* We give letters that come before the term a bigger weight than those that come after as a way to prefer matches that are prefixed by the `searchTerm`.
|
* We give letters that come before the term a bigger weight than those that come after as a way to prefer matches that are prefixed by the `searchTerm`.
|
||||||
*/
|
*/
|
||||||
private fun similarityScore(searchTerm: String, match: String): Int {
|
private fun similarityScore(searchTerm: String, entry: Entry, maxRank: Int): Float {
|
||||||
|
val match: String = entry.label
|
||||||
|
|
||||||
if (searchTerm == match) {
|
if (searchTerm == match) {
|
||||||
return 0
|
return entry.scaledRank(maxRank)
|
||||||
}
|
}
|
||||||
|
|
||||||
val startIndex = match.indexOf(searchTerm)
|
val startIndex = match.indexOf(searchTerm)
|
||||||
|
@ -99,11 +132,25 @@ class EmojiSearchTable(context: Context, databaseHelper: SignalDatabase) : Datab
|
||||||
val prefixCount = startIndex
|
val prefixCount = startIndex
|
||||||
val suffixCount = match.length - (startIndex + searchTerm.length)
|
val suffixCount = match.length - (startIndex + searchTerm.length)
|
||||||
|
|
||||||
val prefixRankWeight = 1.5f
|
val prefixRankWeight = 1.75f
|
||||||
val suffixRankWeight = 1f
|
val suffixRankWeight = 0.75f
|
||||||
|
val notExactMatchPenalty = 2f
|
||||||
|
|
||||||
return ((prefixCount * prefixRankWeight) + (suffixCount * suffixRankWeight)).roundToInt()
|
return notExactMatchPenalty +
|
||||||
|
(prefixCount * prefixRankWeight) +
|
||||||
|
(suffixCount * suffixRankWeight) +
|
||||||
|
entry.scaledRank(maxRank)
|
||||||
}
|
}
|
||||||
|
|
||||||
private data class Entry(val label: String, val emoji: String)
|
private data class Entry(val label: String, val emoji: String, val rank: Int) {
|
||||||
|
fun scaledRank(maxRank: Int): Float {
|
||||||
|
val unranked = 2f
|
||||||
|
val scaleFactor: Float = unranked / maxRank
|
||||||
|
return if (rank == Int.MAX_VALUE) {
|
||||||
|
unranked
|
||||||
|
} else {
|
||||||
|
rank * scaleFactor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.thoughtcrime.securesms.database.helpers.migration.V165_MmsMessageBoxP
|
||||||
import org.thoughtcrime.securesms.database.helpers.migration.V166_ThreadAndMessageForeignKeys
|
import org.thoughtcrime.securesms.database.helpers.migration.V166_ThreadAndMessageForeignKeys
|
||||||
import org.thoughtcrime.securesms.database.helpers.migration.V167_RecreateReactionTriggers
|
import org.thoughtcrime.securesms.database.helpers.migration.V167_RecreateReactionTriggers
|
||||||
import org.thoughtcrime.securesms.database.helpers.migration.V168_SingleMessageTableMigration
|
import org.thoughtcrime.securesms.database.helpers.migration.V168_SingleMessageTableMigration
|
||||||
|
import org.thoughtcrime.securesms.database.helpers.migration.V169_EmojiSearchIndexRank
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains all of the database migrations for [SignalDatabase]. Broken into a separate file for cleanliness.
|
* Contains all of the database migrations for [SignalDatabase]. Broken into a separate file for cleanliness.
|
||||||
|
@ -32,7 +33,7 @@ object SignalDatabaseMigrations {
|
||||||
|
|
||||||
val TAG: String = Log.tag(SignalDatabaseMigrations.javaClass)
|
val TAG: String = Log.tag(SignalDatabaseMigrations.javaClass)
|
||||||
|
|
||||||
const val DATABASE_VERSION = 168
|
const val DATABASE_VERSION = 169
|
||||||
|
|
||||||
@JvmStatic
|
@JvmStatic
|
||||||
fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
|
fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
|
||||||
|
@ -115,6 +116,10 @@ object SignalDatabaseMigrations {
|
||||||
if (oldVersion < 168) {
|
if (oldVersion < 168) {
|
||||||
V168_SingleMessageTableMigration.migrate(context, db, oldVersion, newVersion)
|
V168_SingleMessageTableMigration.migrate(context, db, oldVersion, newVersion)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (oldVersion < 169) {
|
||||||
|
V169_EmojiSearchIndexRank.migrate(context, db, oldVersion, newVersion)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@JvmStatic
|
@JvmStatic
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
package org.thoughtcrime.securesms.database.helpers.migration
|
||||||
|
|
||||||
|
import android.app.Application
|
||||||
|
import net.zetetic.database.sqlcipher.SQLiteDatabase
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We want to add a new `rank` column to the emoji_search table, and we no longer use it as an FTS
|
||||||
|
* table, so we can get rid of that too.
|
||||||
|
*/
|
||||||
|
object V169_EmojiSearchIndexRank : SignalDatabaseMigration {
|
||||||
|
override fun migrate(context: Application, db: SQLiteDatabase, oldVersion: Int, newVersion: Int) {
|
||||||
|
db.execSQL(
|
||||||
|
"""
|
||||||
|
CREATE TABLE emoji_search_tmp (
|
||||||
|
_id INTEGER PRIMARY KEY,
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
emoji TEXT NOT NULL,
|
||||||
|
rank INTEGER DEFAULT ${Int.MAX_VALUE}
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
db.execSQL("INSERT INTO emoji_search_tmp (label, emoji) SELECT label, emoji from emoji_search")
|
||||||
|
db.execSQL("DROP TABLE emoji_search")
|
||||||
|
db.execSQL("ALTER TABLE emoji_search_tmp RENAME TO emoji_search")
|
||||||
|
db.execSQL("CREATE INDEX emoji_search_rank_covering ON emoji_search (rank, label, emoji)")
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
package org.thoughtcrime.securesms.database.model;
|
package org.thoughtcrime.securesms.database.model;
|
||||||
|
|
||||||
import androidx.annotation.NonNull;
|
import androidx.annotation.NonNull;
|
||||||
|
import androidx.annotation.Nullable;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
|
@ -16,6 +17,12 @@ public final class EmojiSearchData {
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
private List<String> tags;
|
private List<String> tags;
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
private String shortName;
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
private int rank;
|
||||||
|
|
||||||
public EmojiSearchData() {}
|
public EmojiSearchData() {}
|
||||||
|
|
||||||
public @NonNull String getEmoji() {
|
public @NonNull String getEmoji() {
|
||||||
|
@ -25,4 +32,15 @@ public final class EmojiSearchData {
|
||||||
public @NonNull List<String> getTags() {
|
public @NonNull List<String> getTags() {
|
||||||
return tags;
|
return tags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public @Nullable String getShortName() {
|
||||||
|
return shortName;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A value representing how popular an emoji is, with 1 being the best rank. A value of 0 means this emoji has no rank at all.
|
||||||
|
*/
|
||||||
|
public int getRank() {
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,7 @@ import java.util.function.Consumer
|
||||||
|
|
||||||
private const val MINIMUM_QUERY_THRESHOLD = 1
|
private const val MINIMUM_QUERY_THRESHOLD = 1
|
||||||
private const val MINIMUM_INLINE_QUERY_THRESHOLD = 2
|
private const val MINIMUM_INLINE_QUERY_THRESHOLD = 2
|
||||||
private const val EMOJI_SEARCH_LIMIT = 20
|
private const val EMOJI_SEARCH_LIMIT = 50
|
||||||
|
|
||||||
private val NOT_PUNCTUATION = "[^\\p{Punct}]".toRegex()
|
private val NOT_PUNCTUATION = "[^\\p{Punct}]".toRegex()
|
||||||
|
|
||||||
|
|
|
@ -78,6 +78,17 @@ fun Cursor.readToSingleLong(defaultValue: Long = 0): Long {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JvmOverloads
|
||||||
|
fun Cursor.readToSingleInt(defaultValue: Int = 0): Int {
|
||||||
|
return use {
|
||||||
|
if (it.moveToFirst()) {
|
||||||
|
it.getInt(0)
|
||||||
|
} else {
|
||||||
|
defaultValue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@JvmOverloads
|
@JvmOverloads
|
||||||
inline fun <T> Cursor.readToList(predicate: (T) -> Boolean = { true }, mapper: (Cursor) -> T): List<T> {
|
inline fun <T> Cursor.readToList(predicate: (T) -> Boolean = { true }, mapper: (Cursor) -> T): List<T> {
|
||||||
val list = mutableListOf<T>()
|
val list = mutableListOf<T>()
|
||||||
|
|
Ładowanie…
Reference in New Issue