Merge pull request #819 from jiftechnify/meta-parser-benchmark

Add benchmark for MetaTagsParser
pull/821/head
Vitor Pamplona 2024-03-29 17:46:35 -04:00 zatwierdzone przez GitHub
commit 7fc43c96d6
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
5 zmienionych plików z 7959 dodań i 44 usunięć

Wyświetl plik

@ -20,6 +20,8 @@
*/
package com.vitorpamplona.amethyst.service.previews
import com.vitorpamplona.amethyst.commons.preview.MetaTag
import com.vitorpamplona.amethyst.commons.preview.MetaTagsParser
import com.vitorpamplona.amethyst.service.HttpClientManager
import com.vitorpamplona.amethyst.service.checkNotInMainThread
import kotlinx.coroutines.Dispatchers

Wyświetl plik

@ -0,0 +1,55 @@
/**
* Copyright (c) 2024 Vitor Pamplona
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
* Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package com.vitorpamplona.amethyst.benchmark
import androidx.benchmark.junit4.BenchmarkRule
import androidx.benchmark.junit4.measureRepeated
import androidx.test.ext.junit.runners.AndroidJUnit4
import androidx.test.platform.app.InstrumentationRegistry.getInstrumentation
import com.vitorpamplona.amethyst.commons.preview.MetaTagsParser
import org.junit.Assert.assertEquals
import org.junit.Assert.assertNotNull
import org.junit.Rule
import org.junit.Test
import org.junit.runner.RunWith
import java.nio.charset.Charset
@RunWith(AndroidJUnit4::class)
class MetaTagsParserBenchmark {
private val html =
getInstrumentation().context.assets.open("github_amethyst.html")
.readBytes().toString(Charset.forName("utf-8"))
@get:Rule
val benchmarkRule = BenchmarkRule()
@Test
fun parseMetaTags() {
benchmarkRule.measureRepeated {
val metaOgTitle = MetaTagsParser.parse(html).find { it.attr("property") == "og:title" }
assertNotNull(metaOgTitle)
assertEquals(
"GitHub - vitorpamplona/amethyst: Nostr client for Android",
metaOgTitle!!.attr("content"),
)
}
}
}

Wyświetl plik

@ -18,7 +18,7 @@
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package com.vitorpamplona.amethyst.service.previews
package com.vitorpamplona.amethyst.commons.preview
import org.junit.Assert.assertEquals
import org.junit.Test

Wyświetl plik

@ -18,26 +18,30 @@
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package com.vitorpamplona.amethyst.service.previews
package com.vitorpamplona.amethyst.commons.preview
import kotlinx.collections.immutable.toImmutableMap
import java.lang.StringBuilder
internal data class MetaTag(private val attrs: Map<String, String>) {
data class MetaTag(private val attrs: Map<String, String>) {
/**
* Returns a value of an attribute specified by its name (case insensitive), or empty string if it doesn't exist.
*/
fun attr(name: String): String = attrs[name.lowercase()] ?: ""
}
// parse a partial HTML document and extract meta tags
internal object MetaTagsParser {
object MetaTagsParser {
private val NON_ATTR_NAME_CHARS = setOf(Char(0x0), '"', '\'', '>', '/')
private val NON_UNQUOTED_ATTR_VALUE_CHARS = setOf('"', '\'', '=', '>', '<', '`')
/**
* Lazily parse a partial HTML document and extract meta tags.
*/
fun parse(input: String): Sequence<MetaTag> =
sequence {
val s = TagScanner(input)
while (!s.exhausted()) {
val t = s.nextTag() ?: continue
if (t.name == "/head") {
if (t.name == "head" && t.isEnd) {
break
}
if (t.name == "meta") {
@ -47,61 +51,45 @@ internal object MetaTagsParser {
}
}
private data class RawTag(val name: String, val attrPart: String)
private data class RawTag(val isEnd: Boolean, val name: String, val attrPart: String)
private class TagScanner(private val input: String) {
var p = 0
private var p = 0
fun exhausted(): Boolean = p >= input.length
private fun peek(): Char = input[p]
private fun consume(): Char {
return input[p++]
}
private fun consume(): Char = input[p++]
private fun consumeChar(c: Char): Boolean {
if (this.peek() == c) {
private fun skipWhile(pred: (Char) -> Boolean) {
while (!this.exhausted() && pred(this.peek())) {
this.consume()
return true
}
return false
}
private fun skipSpaces() {
while (!this.exhausted() && this.peek().isWhitespace()) {
this.consume()
}
}
private fun skipUntil(c: Char) {
while (!this.exhausted() && this.peek() != c) {
this.consume()
}
}
private fun readWhile(pred: (Char) -> Boolean): String {
val sb = StringBuilder()
while (!this.exhausted() && pred(this.peek())) {
sb.append(this.consume())
}
return sb.toString()
this.skipWhile { it.isWhitespace() }
}
fun nextTag(): RawTag? {
skipUntil('<')
skipWhile { it != '<' }
consume()
// read tag name
val name = StringBuilder()
if (consumeChar('/')) {
name.append('/')
val isEnd = peek() == '/'
if (isEnd) {
consume()
}
val n = readWhile { !it.isWhitespace() && it != '>' }
skipSpaces()
val nameStart = p
skipWhile { !it.isWhitespace() && it != '>' }
val nameEnd = p
// read until end of tag
val attrsPart = StringBuilder()
// seek to start of attrs part
skipSpaces()
val attrsStart = p
// skip until end of tag
var quote: Char? = null
while (!exhausted()) {
val c = consume()
@ -124,13 +112,15 @@ internal object MetaTagsParser {
quote = null
}
}
attrsPart.append(c)
}
val attrsEnd = p - 1
if (!n.matches(Regex("""[0-9a-zA-Z]+"""))) {
val name = input.slice(nameStart..<nameEnd)
if (!name.matches(Regex("""[0-9a-zA-Z]+"""))) {
return null
}
return RawTag(name.append(n).toString().lowercase(), attrsPart.toString())
val attrsPart = input.slice(attrsStart..<attrsEnd)
return RawTag(isEnd, name.lowercase(), attrsPart)
}
}

File diff suppressed because one or more lines are too long