kopia lustrzana https://github.com/vitorpamplona/amethyst
374 wiersze
14 KiB
Kotlin
374 wiersze
14 KiB
Kotlin
/**
|
|
* Copyright (c) 2024 Vitor Pamplona
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
* this software and associated documentation files (the "Software"), to deal in
|
|
* the Software without restriction, including without limitation the rights to use,
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
|
|
* Software, and to permit persons to whom the Software is furnished to do so,
|
|
* subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
* copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
|
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
package com.vitorpamplona.amethyst.service
|
|
|
|
import android.util.Log
|
|
import android.util.LruCache
|
|
import android.util.Patterns
|
|
import androidx.compose.runtime.Immutable
|
|
import com.linkedin.urls.detection.UrlDetector
|
|
import com.linkedin.urls.detection.UrlDetectorOptions
|
|
import com.vitorpamplona.amethyst.ui.components.ZoomableUrlContent
|
|
import com.vitorpamplona.amethyst.ui.components.ZoomableUrlImage
|
|
import com.vitorpamplona.amethyst.ui.components.ZoomableUrlVideo
|
|
import com.vitorpamplona.amethyst.ui.components.hashTagsPattern
|
|
import com.vitorpamplona.amethyst.ui.components.imageExtensions
|
|
import com.vitorpamplona.amethyst.ui.components.removeQueryParamsForExtensionComparison
|
|
import com.vitorpamplona.amethyst.ui.components.tagIndex
|
|
import com.vitorpamplona.amethyst.ui.components.videoExtensions
|
|
import com.vitorpamplona.quartz.encoders.Nip54
|
|
import com.vitorpamplona.quartz.encoders.Nip92
|
|
import com.vitorpamplona.quartz.events.FileHeaderEvent
|
|
import com.vitorpamplona.quartz.events.ImmutableListOfLists
|
|
import kotlinx.collections.immutable.ImmutableList
|
|
import kotlinx.collections.immutable.ImmutableMap
|
|
import kotlinx.collections.immutable.ImmutableSet
|
|
import kotlinx.collections.immutable.persistentListOf
|
|
import kotlinx.collections.immutable.toImmutableList
|
|
import kotlinx.collections.immutable.toImmutableMap
|
|
import kotlinx.collections.immutable.toImmutableSet
|
|
import kotlinx.collections.immutable.toPersistentList
|
|
import kotlinx.coroutines.CancellationException
|
|
import java.util.regex.Pattern
|
|
|
|
@Immutable
|
|
data class RichTextViewerState(
|
|
val urlSet: ImmutableSet<String>,
|
|
val imagesForPager: ImmutableMap<String, ZoomableUrlContent>,
|
|
val imageList: ImmutableList<ZoomableUrlContent>,
|
|
val customEmoji: ImmutableMap<String, String>,
|
|
val paragraphs: ImmutableList<ParagraphState>,
|
|
)
|
|
|
|
data class ParagraphState(val words: ImmutableList<Segment>, val isRTL: Boolean)
|
|
|
|
object CachedRichTextParser {
|
|
val richTextCache = LruCache<String, RichTextViewerState>(200)
|
|
|
|
fun parseText(
|
|
content: String,
|
|
tags: ImmutableListOfLists<String>,
|
|
): RichTextViewerState {
|
|
return if (richTextCache[content] != null) {
|
|
richTextCache[content]
|
|
} else {
|
|
val newUrls = RichTextParser().parseText(content, tags)
|
|
richTextCache.put(content, newUrls)
|
|
newUrls
|
|
}
|
|
}
|
|
}
|
|
|
|
// Group 1 = url, group 4 additional chars
|
|
// val noProtocolUrlValidator =
|
|
// Pattern.compile("(([\\w\\d-]+\\.)*[a-zA-Z][\\w-]+[\\.\\:]\\w+([\\/\\?\\=\\&\\#\\.]?[\\w-]+)*\\/?)(.*)")
|
|
|
|
// Android9 seems to have an issue starting this regex.
|
|
val noProtocolUrlValidator =
|
|
try {
|
|
Pattern.compile(
|
|
"(([\\w\\d-]+\\.)*[a-zA-Z][\\w-]+[\\.\\:]\\w+([\\/\\?\\=\\&\\#\\.]?[\\w-]+[^\\p{IsHan}\\p{IsHiragana}\\p{IsKatakana}])*\\/?)(.*)",
|
|
)
|
|
} catch (e: Exception) {
|
|
Pattern.compile(
|
|
"(([\\w\\d-]+\\.)*[a-zA-Z][\\w-]+[\\.\\:]\\w+([\\/\\?\\=\\&\\#\\.]?[\\w-]+)*\\/?)(.*)",
|
|
)
|
|
}
|
|
|
|
val HTTPRegex =
|
|
"^((http|https)://)?([A-Za-z0-9-_]+(\\.[A-Za-z0-9-_]+)+)(:[0-9]+)?(/[^?#]*)?(\\?[^#]*)?(#.*)?"
|
|
.toRegex(RegexOption.IGNORE_CASE)
|
|
|
|
class RichTextParser() {
|
|
fun parseMediaUrl(
|
|
fullUrl: String,
|
|
eventTags: ImmutableListOfLists<String>,
|
|
): ZoomableUrlContent? {
|
|
val removedParamsFromUrl = removeQueryParamsForExtensionComparison(fullUrl)
|
|
return if (imageExtensions.any { removedParamsFromUrl.endsWith(it) }) {
|
|
val frags = Nip54().parse(fullUrl)
|
|
val tags = Nip92().parse(fullUrl, eventTags.lists)
|
|
|
|
ZoomableUrlImage(
|
|
url = fullUrl,
|
|
description = frags[FileHeaderEvent.ALT] ?: tags[FileHeaderEvent.ALT],
|
|
hash = frags[FileHeaderEvent.HASH] ?: tags[FileHeaderEvent.HASH],
|
|
blurhash = frags[FileHeaderEvent.BLUR_HASH] ?: tags[FileHeaderEvent.BLUR_HASH],
|
|
dim = frags[FileHeaderEvent.DIMENSION] ?: tags[FileHeaderEvent.DIMENSION],
|
|
contentWarning = frags["content-warning"] ?: tags["content-warning"],
|
|
)
|
|
} else if (videoExtensions.any { removedParamsFromUrl.endsWith(it) }) {
|
|
val frags = Nip54().parse(fullUrl)
|
|
val tags = Nip92().parse(fullUrl, eventTags.lists)
|
|
ZoomableUrlVideo(
|
|
url = fullUrl,
|
|
description = frags[FileHeaderEvent.ALT] ?: tags[FileHeaderEvent.ALT],
|
|
hash = frags[FileHeaderEvent.HASH] ?: tags[FileHeaderEvent.HASH],
|
|
blurhash = frags[FileHeaderEvent.BLUR_HASH] ?: tags[FileHeaderEvent.BLUR_HASH],
|
|
dim = frags[FileHeaderEvent.DIMENSION] ?: tags[FileHeaderEvent.DIMENSION],
|
|
contentWarning = frags["content-warning"] ?: tags["content-warning"],
|
|
)
|
|
} else {
|
|
null
|
|
}
|
|
}
|
|
|
|
fun parseText(
|
|
content: String,
|
|
tags: ImmutableListOfLists<String>,
|
|
): RichTextViewerState {
|
|
val urls = UrlDetector(content, UrlDetectorOptions.Default).detect()
|
|
|
|
val urlSet =
|
|
urls.mapNotNullTo(LinkedHashSet(urls.size)) {
|
|
// removes e-mails
|
|
if (Patterns.EMAIL_ADDRESS.matcher(it.originalUrl).matches()) {
|
|
null
|
|
} else if (isNumber(it.originalUrl)) {
|
|
null
|
|
} else if (it.originalUrl.contains("。")) {
|
|
null
|
|
} else {
|
|
if (HTTPRegex.matches(it.originalUrl)) {
|
|
it.originalUrl
|
|
} else {
|
|
null
|
|
}
|
|
}
|
|
}
|
|
|
|
val imagesForPager =
|
|
urlSet.mapNotNull { fullUrl -> parseMediaUrl(fullUrl, tags) }.associateBy { it.url }
|
|
val imageList = imagesForPager.values.toList()
|
|
|
|
val emojiMap =
|
|
tags.lists.filter { it.size > 2 && it[0] == "emoji" }.associate { ":${it[1]}:" to it[2] }
|
|
|
|
val segments = findTextSegments(content, imagesForPager.keys, urlSet, emojiMap, tags)
|
|
|
|
return RichTextViewerState(
|
|
urlSet.toImmutableSet(),
|
|
imagesForPager.toImmutableMap(),
|
|
imageList.toImmutableList(),
|
|
emojiMap.toImmutableMap(),
|
|
segments,
|
|
)
|
|
}
|
|
|
|
private fun findTextSegments(
|
|
content: String,
|
|
images: Set<String>,
|
|
urls: Set<String>,
|
|
emojis: Map<String, String>,
|
|
tags: ImmutableListOfLists<String>,
|
|
): ImmutableList<ParagraphState> {
|
|
val lines = content.split('\n')
|
|
val paragraphSegments = ArrayList<ParagraphState>(lines.size)
|
|
|
|
lines.forEach { paragraph ->
|
|
var isDirty = false
|
|
val isRTL = isArabic(paragraph)
|
|
|
|
val wordList = paragraph.trimEnd().split(' ')
|
|
val segments = ArrayList<Segment>(wordList.size)
|
|
wordList.forEach { word ->
|
|
val wordSegment = wordIdentifier(word, images, urls, emojis, tags)
|
|
if (wordSegment !is RegularTextSegment) {
|
|
isDirty = true
|
|
}
|
|
segments.add(wordSegment)
|
|
}
|
|
|
|
val newSegments =
|
|
if (isDirty) {
|
|
ParagraphState(segments.toPersistentList(), isRTL)
|
|
} else {
|
|
ParagraphState(persistentListOf<Segment>(RegularTextSegment(paragraph)), isRTL)
|
|
}
|
|
|
|
paragraphSegments.add(newSegments)
|
|
}
|
|
|
|
return paragraphSegments.toImmutableList()
|
|
}
|
|
|
|
fun isNumber(word: String): Boolean {
|
|
return numberPattern.matcher(word).matches()
|
|
}
|
|
|
|
fun isDate(word: String): Boolean {
|
|
return shortDatePattern.matcher(word).matches() || longDatePattern.matcher(word).matches()
|
|
}
|
|
|
|
private fun isArabic(text: String): Boolean {
|
|
return text.any { it in '\u0600'..'\u06FF' || it in '\u0750'..'\u077F' }
|
|
}
|
|
|
|
private fun wordIdentifier(
|
|
word: String,
|
|
images: Set<String>,
|
|
urls: Set<String>,
|
|
emojis: Map<String, String>,
|
|
tags: ImmutableListOfLists<String>,
|
|
): Segment {
|
|
val emailMatcher = Patterns.EMAIL_ADDRESS.matcher(word)
|
|
val phoneMatcher = Patterns.PHONE.matcher(word)
|
|
val schemelessMatcher = noProtocolUrlValidator.matcher(word)
|
|
|
|
return if (word.isEmpty()) {
|
|
RegularTextSegment(word)
|
|
} else if (images.contains(word)) {
|
|
ImageSegment(word)
|
|
} else if (urls.contains(word)) {
|
|
LinkSegment(word)
|
|
} else if (emojis.any { word.contains(it.key) }) {
|
|
EmojiSegment(word)
|
|
} else if (word.startsWith("lnbc", true)) {
|
|
InvoiceSegment(word)
|
|
} else if (word.startsWith("lnurl", true)) {
|
|
WithdrawSegment(word)
|
|
} else if (word.startsWith("cashuA", true)) {
|
|
CashuSegment(word)
|
|
} else if (emailMatcher.matches()) {
|
|
EmailSegment(word)
|
|
} else if (word.length in 7..14 && !isDate(word) && phoneMatcher.matches()) {
|
|
PhoneSegment(word)
|
|
} else if (startsWithNIP19Scheme(word)) {
|
|
BechSegment(word)
|
|
} else if (word.startsWith("#")) {
|
|
parseHash(word, tags)
|
|
} else if (word.contains(".") && schemelessMatcher.find()) {
|
|
val url = schemelessMatcher.group(1) // url
|
|
val additionalChars = schemelessMatcher.group(4).ifEmpty { null } // additional chars
|
|
val pattern =
|
|
"""^([A-Za-z0-9-_]+(\.[A-Za-z0-9-_]+)+)(:[0-9]+)?(/[^?#]*)?(\?[^#]*)?(#.*)?"""
|
|
.toRegex(RegexOption.IGNORE_CASE)
|
|
if (pattern.find(word) != null) {
|
|
SchemelessUrlSegment(word, url, additionalChars)
|
|
} else {
|
|
RegularTextSegment(word)
|
|
}
|
|
} else {
|
|
RegularTextSegment(word)
|
|
}
|
|
}
|
|
|
|
private fun parseHash(
|
|
word: String,
|
|
tags: ImmutableListOfLists<String>,
|
|
): Segment {
|
|
// First #[n]
|
|
|
|
val matcher = tagIndex.matcher(word)
|
|
try {
|
|
if (matcher.find()) {
|
|
val index = matcher.group(1)?.toInt()
|
|
val suffix = matcher.group(2)
|
|
|
|
if (index != null && index >= 0 && index < tags.lists.size) {
|
|
val tag = tags.lists[index]
|
|
|
|
if (tag.size > 1) {
|
|
if (tag[0] == "p") {
|
|
return HashIndexUserSegment(word, tag[1], suffix)
|
|
} else if (tag[0] == "e" || tag[0] == "a") {
|
|
return HashIndexEventSegment(word, tag[1], suffix)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (e: Exception) {
|
|
if (e is CancellationException) throw e
|
|
Log.w("Tag Parser", "Couldn't link tag $word", e)
|
|
}
|
|
|
|
// Second #Amethyst
|
|
val hashtagMatcher = hashTagsPattern.matcher(word)
|
|
|
|
try {
|
|
if (hashtagMatcher.find()) {
|
|
val hashtag = hashtagMatcher.group(1)
|
|
if (hashtag != null) {
|
|
return HashTagSegment(word, hashtag, hashtagMatcher.group(2).ifEmpty { null })
|
|
}
|
|
}
|
|
} catch (e: Exception) {
|
|
if (e is CancellationException) throw e
|
|
Log.e("Hashtag Parser", "Couldn't link hashtag $word", e)
|
|
}
|
|
|
|
return RegularTextSegment(word)
|
|
}
|
|
|
|
companion object {
|
|
val longDatePattern: Pattern = Pattern.compile("^\\d{4}-\\d{2}-\\d{2}$")
|
|
val shortDatePattern: Pattern = Pattern.compile("^\\d{2}-\\d{2}-\\d{2}$")
|
|
val numberPattern: Pattern = Pattern.compile("^(-?[\\d.]+)([a-zA-Z%]*)$")
|
|
}
|
|
}
|
|
|
|
@Immutable open class Segment(val segmentText: String)
|
|
|
|
@Immutable class ImageSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable class LinkSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable class EmojiSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable class InvoiceSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable class WithdrawSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable class CashuSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable class EmailSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable class PhoneSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable class BechSegment(segment: String) : Segment(segment)
|
|
|
|
@Immutable
|
|
open class HashIndexSegment(segment: String, val hex: String, val extras: String?) :
|
|
Segment(segment)
|
|
|
|
@Immutable
|
|
class HashIndexUserSegment(segment: String, hex: String, extras: String?) :
|
|
HashIndexSegment(segment, hex, extras)
|
|
|
|
@Immutable
|
|
class HashIndexEventSegment(segment: String, hex: String, extras: String?) :
|
|
HashIndexSegment(segment, hex, extras)
|
|
|
|
@Immutable
|
|
class HashTagSegment(segment: String, val hashtag: String, val extras: String?) : Segment(segment)
|
|
|
|
@Immutable
|
|
class SchemelessUrlSegment(segment: String, val url: String, val extras: String?) :
|
|
Segment(segment)
|
|
|
|
@Immutable class RegularTextSegment(segment: String) : Segment(segment)
|
|
|
|
fun startsWithNIP19Scheme(word: String): Boolean {
|
|
val cleaned = word.lowercase().removePrefix("@").removePrefix("nostr:").removePrefix("@")
|
|
|
|
return listOf("npub1", "naddr1", "note1", "nprofile1", "nevent1").any { cleaned.startsWith(it) }
|
|
}
|