From 1afb5de820bc7b7ffd394204f8ea0178890c0ebc Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Mon, 23 Jan 2023 18:58:23 +0800 Subject: [PATCH] add VarInt encoder for unsigned longs [#98] (#449) --- NOTICE.md | 1 + .../benchmarks/BenchmarkVarInt.java | 50 +++++++ .../onthegomap/planetiler/util/VarInt.java | 125 ++++++++++++++++++ .../planetiler/util/VarIntTest.java | 40 ++++++ 4 files changed, 216 insertions(+) create mode 100644 planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkVarInt.java create mode 100644 planetiler-core/src/main/java/com/onthegomap/planetiler/util/VarInt.java create mode 100644 planetiler-core/src/test/java/com/onthegomap/planetiler/util/VarIntTest.java diff --git a/NOTICE.md b/NOTICE.md index 0261f32f..10d1f2ea 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -41,6 +41,7 @@ The `planetiler-core` module includes the following software: from [github.com/rawrunprotected/hilbert_curves](https://github.com/rawrunprotected/hilbert_curves) (Public Domain) - `osmformat.proto` and `fileformat.proto` (generates `Osmformat.java` and `Fileformat.java`) from [openstreetmap/OSM-binary](https://github.com/openstreetmap/OSM-binary/tree/master/osmpbf) (MIT License) + - `VarInt` from [Bazel](https://github.com/bazelbuild/bazel) (Apache license) - Maven Dependencies: - org.yaml:snakeyaml (Apache license) - org.snakeyaml:snakeyaml-engine (Apache license) diff --git a/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkVarInt.java b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkVarInt.java new file mode 100644 index 00000000..ed077bc3 --- /dev/null +++ b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkVarInt.java @@ -0,0 +1,50 @@ +package com.onthegomap.planetiler.benchmarks; + +import static io.prometheus.client.Collector.NANOSECONDS_PER_SECOND; + +import com.onthegomap.planetiler.stats.Timer; +import com.onthegomap.planetiler.util.Format; +import com.onthegomap.planetiler.util.VarInt; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +public class BenchmarkVarInt { + + public static void main(String[] args) throws IOException { + + long num = 80000000; + + for (int i = 0; i < 3; i++) { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + var timer = Timer.start(); + + long sum = 0; + + for (long l = 0; l < num; l++) { + VarInt.putVarLong(l, stream); + sum += l; + } + + ByteBuffer buf = ByteBuffer.wrap(stream.toByteArray()); + + long acc = 0; + for (long l = 0; l < num; l++) { + acc += VarInt.getVarLong(buf); + } + + if (sum != acc) { + System.err.println("Sums do not match"); + } + + + System.err.println( + num + " varints took " + + Format.defaultInstance().duration(timer.stop().elapsed().wall()) + " (" + + Format.defaultInstance() + .numeric(num * 1d / (timer.stop().elapsed().wall().toNanos() / NANOSECONDS_PER_SECOND)) + + "/s)" + ); + } + } +} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/VarInt.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/VarInt.java new file mode 100644 index 00000000..a7991a9e --- /dev/null +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/VarInt.java @@ -0,0 +1,125 @@ +/* +Copyright 2014 The Bazel Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package com.onthegomap.planetiler.util; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +/** + * Encode and decode Protocol Buffer-style VarInts. + *

+ * getVarLong and putVarLong are adapted from Bazel. + */ +public class VarInt { + private VarInt() {} + + public static int varLongSize(long v) { + int result = 0; + do { + result++; + v >>>= 7; + } while (v != 0); + return result; + } + + /** + * Reads an up to 64 bit long varint from the current position of the given ByteBuffer and returns the decoded value + * as long. + * + *

+ * The position of the buffer is advanced to the first byte after the decoded varint. + * + * @param src the ByteBuffer to get the var int from + * @return The integer value of the decoded long varint + */ + public static long getVarLong(ByteBuffer src) { + long tmp; + if ((tmp = src.get()) >= 0) { + return tmp; + } + long result = tmp & 0x7f; + if ((tmp = src.get()) >= 0) { + result |= tmp << 7; + } else { + result |= (tmp & 0x7f) << 7; + if ((tmp = src.get()) >= 0) { + result |= tmp << 14; + } else { + result |= (tmp & 0x7f) << 14; + if ((tmp = src.get()) >= 0) { + result |= tmp << 21; + } else { + result |= (tmp & 0x7f) << 21; + if ((tmp = src.get()) >= 0) { + result |= tmp << 28; + } else { + result |= (tmp & 0x7f) << 28; + if ((tmp = src.get()) >= 0) { + result |= tmp << 35; + } else { + result |= (tmp & 0x7f) << 35; + if ((tmp = src.get()) >= 0) { + result |= tmp << 42; + } else { + result |= (tmp & 0x7f) << 42; + if ((tmp = src.get()) >= 0) { + result |= tmp << 49; + } else { + result |= (tmp & 0x7f) << 49; + if ((tmp = src.get()) >= 0) { + result |= tmp << 56; + } else { + result |= (tmp & 0x7f) << 56; + result |= ((long) src.get()) << 63; + } + } + } + } + } + } + } + } + return result; + } + + public static void putVarLong(long v, ByteBuffer sink) { + while (true) { + int bits = ((int) v) & 0x7f; + v >>>= 7; + if (v == 0) { + sink.put((byte) bits); + return; + } + sink.put((byte) (bits | 0x80)); + } + } + + /** + * Encodes a long integer in a variable-length encoding, 7 bits per byte. + * + * @param v the value to encode + * @param outputStream the OutputStream to add the encoded value + */ + public static void putVarLong(long v, OutputStream outputStream) throws IOException { + byte[] bytes = new byte[varLongSize(v)]; + ByteBuffer sink = ByteBuffer.wrap(bytes); + putVarLong(v, sink); + outputStream.write(bytes); + } +} diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/VarIntTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/VarIntTest.java new file mode 100644 index 00000000..31717b5c --- /dev/null +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/VarIntTest.java @@ -0,0 +1,40 @@ +package com.onthegomap.planetiler.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import org.junit.jupiter.api.Test; + +class VarIntTest { + + @Test + void testRoundTrip() throws IOException { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + VarInt.putVarLong(0, stream); + VarInt.putVarLong(1, stream); + VarInt.putVarLong(Long.MAX_VALUE, stream); + VarInt.putVarLong(Long.MIN_VALUE, stream); + ByteBuffer output = ByteBuffer.wrap(stream.toByteArray()); + assertEquals(0, VarInt.getVarLong(output)); + assertEquals(1, VarInt.getVarLong(output)); + assertEquals(Long.MAX_VALUE, VarInt.getVarLong(output)); + assertEquals(Long.MIN_VALUE, VarInt.getVarLong(output)); + } + + @Test + void testUnsignedEncoding() throws IOException { + byte[] rawbytes = {0, 1, 127, (byte) 0xe5, (byte) 0x8e, (byte) 0x26}; + ByteBuffer buf = ByteBuffer.wrap(rawbytes); + + assertEquals(0, VarInt.getVarLong(buf)); + assertEquals(1, VarInt.getVarLong(buf)); + assertEquals(127, VarInt.getVarLong(buf)); + assertEquals(624485, VarInt.getVarLong(buf)); + + byte[] max_safe_js_integer = + {(byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, 0xf}; + assertEquals(9007199254740991L, VarInt.getVarLong(ByteBuffer.wrap(max_safe_js_integer))); + } +}