Avoids breaking tags and image/previews in translation

pull/150/head
Vitor Pamplona 2023-02-22 14:36:10 -05:00
rodzic da877d5f79
commit da949fa9fd
3 zmienionych plików z 123 dodań i 3 usunięć

Wyświetl plik

@ -119,6 +119,9 @@ dependencies {
implementation "com.google.accompanist:accompanist-pager:$accompanist_version" // Pager
implementation "com.google.accompanist:accompanist-pager-indicators:$accompanist_version"
// Parses URLs from Text:
implementation "io.github.url-detector:url-detector:0.1.23"
// For QR generation
implementation 'com.google.zxing:core:3.5.1'
implementation "androidx.camera:camera-camera2:1.2.1"

Wyświetl plik

@ -0,0 +1,67 @@
package com.vitorpamplona.amethyst
import androidx.test.ext.junit.runners.AndroidJUnit4
import com.google.android.gms.tasks.Tasks
import com.vitorpamplona.amethyst.service.lang.LanguageTranslatorService
import org.junit.Assert.assertEquals
import org.junit.Assert.assertTrue
import org.junit.Test
import org.junit.runner.RunWith
@RunWith(AndroidJUnit4::class)
class TranslationsTest {
fun translate(text: String): String? {
val task = LanguageTranslatorService.autoTranslate(text, emptySet(), "pt")
return Tasks.await(task).result
}
fun assertTranslate(expected: String, input: String) {
assertEquals(null, expected, translate(input))
}
fun assertTranslateContains(expected: String, input: String) {
assertTrue(null, translate(input)!!.contains(expected))
}
@Test
fun testTranslation() {
assertTranslate("Olá mundo", "Hello World")
}
@Test
fun testTranslationName() {
assertTranslate("Olá Vitor, como você está?", "Hello Vitor, how are you doing?")
}
@Test
fun testTranslationTag() {
assertTranslate("Você já viu isso, #[0]", "Have you seen this, #[0]")
}
@Test
fun testTranslationUrl() {
assertTranslateContains("https://t.me/mygroup", "Have you seen this https://t.me/mygroup")
assertTranslateContains("http://bananas.com", "Have you seen this http://bananas.com")
assertTranslateContains("http://bananas.com/myimage.jpg", "Have you seen this http://bananas.com/myimage.jpg")
assertTranslateContains("http://bananas.com?search=true&image=myimage.jpg", "Have you seen this http://bananas.com?search=true&image=myimage.jpg")
assertTranslate("https://i.imgur.com/EZ3QPsw.jpg", "https://i.imgur.com/EZ3QPsw.jpg")
assertTranslate("https://HaveYouSeenThis.com", "https://HaveYouSeenThis.com")
assertTranslate("https://haveyouseenthis.com", "https://haveyouseenthis.com")
assertTranslate("https://i.imgur.com/asdEZ3QPsw.jpg", "https://i.imgur.com/asdEZ3QPsw.jpg")
assertTranslateContains("https://i.imgur.com/asdEZ3QPswadfj2389rioasdjf9834riofaj9834aKLL.jpg", "Hi there! \n How are you doing? \n https://i.imgur.com/asdEZ3QPswadfj2389rioasdjf9834riofaj9834aKLL.jpg")
}
@Test
fun testTranslationEmail() {
assertTranslateContains("vitor@amethyst.social", "Have you seen this vitor@amethyst.social")
}
@Test
fun testTranslationLnInvoice() {
assertTranslateContains(
"lnbc12u1p3lvjeupp5a5ecgp45k6pa8tu7rnkgzfuwdy3l5ylv3k5tdzrg4cr8rj2f364sdq5g9kxy7fqd9h8vmmfvdjscqzpgxqyz5vqsp5zuzyetf33aphetf0e80w7tztw6dfsjs4lmvya4cyk8umfsx00qts9qyyssqke9hphcr36zvcav8wr502g0mhfhxpy8m9tt36zttg8vldm2qxw039ulccr8nwy3hjg2sw5vk65e99lwuhrhw0nuya2u57qszltvx7egp74jydn",
"Have you seen this: lnbc12u1p3lvjeupp5a5ecgp45k6pa8tu7rnkgzfuwdy3l5ylv3k5tdzrg4cr8rj2f364sdq5g9kxy7fqd9h8vmmfvdjscqzpgxqyz5vqsp5zuzyetf33aphetf0e80w7tztw6dfsjs4lmvya4cyk8umfsx00qts9qyyssqke9hphcr36zvcav8wr502g0mhfhxpy8m9tt36zttg8vldm2qxw039ulccr8nwy3hjg2sw5vk65e99lwuhrhw0nuya2u57qszltvx7egp74jydn I think I have to pay"
)
}
}

Wyświetl plik

@ -8,7 +8,9 @@ import com.google.mlkit.nl.translate.TranslateLanguage
import com.google.mlkit.nl.translate.Translation
import com.google.mlkit.nl.translate.Translator
import com.google.mlkit.nl.translate.TranslatorOptions
import java.util.ArrayList
import com.linkedin.urls.detection.UrlDetector
import com.linkedin.urls.detection.UrlDetectorOptions
import java.util.regex.Pattern
class ResultOrError(
var result: String?,
@ -19,6 +21,7 @@ class ResultOrError(
object LanguageTranslatorService {
private val languageIdentification = LanguageIdentification.getClient()
val lnRegex = Pattern.compile("\\blnbc[a-z0-9]+\\b")
private val translators =
object : LruCache<TranslatorOptions, Translator>(10) {
@ -43,6 +46,7 @@ object LanguageTranslatorService {
fun translate(text: String, source: String, target: String): Task<ResultOrError> {
val sourceLangCode = TranslateLanguage.fromLanguageTag(source)
val targetLangCode = TranslateLanguage.fromLanguageTag(target)
if (sourceLangCode == null || targetLangCode == null) {
return Tasks.forCanceled()
}
@ -56,20 +60,66 @@ object LanguageTranslatorService {
return translator.downloadModelIfNeeded().onSuccessTask {
val tasks = mutableListOf<Task<String>>()
for (paragraph in text.split("\n")) {
val dict = lnDictionary(text) + urlDictionary(text)
for (paragraph in encodeDictionary(text, dict).split("\n")) {
tasks.add(translator.translate(paragraph))
}
Tasks.whenAll(tasks).continueWith {
val results: MutableList<String> = ArrayList()
for (task in tasks) {
results.add(task.result)
var fixedText = task.result.replace("# [","#[") // fixes tags that always return with a space
results.add(decodeDictionary(fixedText, dict))
}
ResultOrError(results.joinToString("\n"), source, target, null)
}
}
}
private fun encodeDictionary(text: String, dict: Map<String, String>): String {
var newText = text
for (pair in dict) {
newText = newText.replace(pair.value, pair.key, true)
}
return newText
}
private fun decodeDictionary(text: String, dict: Map<String, String>): String {
var newText = text
for (pair in dict) {
newText = newText.replace(pair.key, pair.value, true)
}
return newText
}
private fun lnDictionary(text: String): Map<String, String> {
val matcher = lnRegex.matcher(text)
val returningList = mutableMapOf<String, String>()
while (matcher.find()) {
try {
val lnInvoice = matcher.group()
val short = lnInvoice.replaceRange(8, lnInvoice.length-8, "")
returningList.put(short, lnInvoice)
} catch (e: Exception) {
}
}
return returningList
}
private fun urlDictionary(text: String): Map<String, String> {
val parser = UrlDetector(text, UrlDetectorOptions.Default)
val urlsInText = parser.detect()
val counter = 0
return urlsInText.associate {
"Amethystindexer${counter}" to it.originalUrl
}
}
fun autoTranslate(text: String, dontTranslateFrom: Set<String>, translateTo: String): Task<ResultOrError> {
return identifyLanguage(text).onSuccessTask {
if (it == translateTo) {