From 88daeb4d0b7a9b9d5d15e3d670771b2bc58e9fb3 Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Fri, 27 Jan 2023 21:12:54 +0800 Subject: [PATCH] Speed up VarInt encoding, remove emitTilesInOrder option [#98] (#460) --- .../benchmarks/BenchmarkVarInt.java | 8 +-- .../planetiler/config/PlanetilerConfig.java | 2 - .../onthegomap/planetiler/util/VarInt.java | 37 ++++-------- .../planetiler/writer/TileArchiveWriter.java | 58 +++++++------------ .../planetiler/util/VarIntTest.java | 14 ++--- planetiler-custommap/README.md | 1 - planetiler-custommap/planetiler.schema.json | 11 ---- .../planetiler/custommap/Contexts.java | 1 - 8 files changed, 43 insertions(+), 89 deletions(-) diff --git a/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkVarInt.java b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkVarInt.java index ed077bc3..e49a0b4d 100644 --- a/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkVarInt.java +++ b/planetiler-benchmarks/src/main/java/com/onthegomap/planetiler/benchmarks/BenchmarkVarInt.java @@ -2,10 +2,10 @@ package com.onthegomap.planetiler.benchmarks; import static io.prometheus.client.Collector.NANOSECONDS_PER_SECOND; +import com.carrotsearch.hppc.ByteArrayList; import com.onthegomap.planetiler.stats.Timer; import com.onthegomap.planetiler.util.Format; import com.onthegomap.planetiler.util.VarInt; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; @@ -13,10 +13,10 @@ public class BenchmarkVarInt { public static void main(String[] args) throws IOException { - long num = 80000000; + long num = 100000000; for (int i = 0; i < 3; i++) { - ByteArrayOutputStream stream = new ByteArrayOutputStream(); + ByteArrayList stream = new ByteArrayList(); var timer = Timer.start(); long sum = 0; @@ -26,7 +26,7 @@ public class BenchmarkVarInt { sum += l; } - ByteBuffer buf = ByteBuffer.wrap(stream.toByteArray()); + ByteBuffer buf = ByteBuffer.wrap(stream.toArray()); long acc = 0; for (long l = 0; l < num; l++) { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java index 4e4f397a..6a3cda65 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java @@ -25,7 +25,6 @@ public record PlanetilerConfig( int maxzoomForRendering, boolean skipIndexCreation, boolean optimizeDb, - boolean emitTilesInOrder, boolean force, boolean gzipTempStorage, boolean mmapTempStorage, @@ -125,7 +124,6 @@ public record PlanetilerConfig( renderMaxzoom, arguments.getBoolean("skip_mbtiles_index_creation", "skip adding index to mbtiles file", false), arguments.getBoolean("optimize_db", "Vacuum analyze mbtiles after writing", false), - arguments.getBoolean("emit_tiles_in_order", "emit tiles in index order", true), arguments.getBoolean("force", "overwriting output file and ignore disk/RAM warnings", false), arguments.getBoolean("gzip_temp", "gzip temporary feature storage (uses more CPU, but less disk space)", false), arguments.getBoolean("mmap_temp", "use memory-mapped IO for temp feature files", true), diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/VarInt.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/VarInt.java index a7991a9e..6d89bfbe 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/VarInt.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/VarInt.java @@ -16,8 +16,7 @@ limitations under the License. package com.onthegomap.planetiler.util; -import java.io.IOException; -import java.io.OutputStream; +import com.carrotsearch.hppc.ByteArrayList; import java.nio.ByteBuffer; /** @@ -29,15 +28,6 @@ import java.nio.ByteBuffer; public class VarInt { private VarInt() {} - public static int varLongSize(long v) { - int result = 0; - do { - result++; - v >>>= 7; - } while (v != 0); - return result; - } - /** * Reads an up to 64 bit long varint from the current position of the given ByteBuffer and returns the decoded value * as long. @@ -98,28 +88,21 @@ public class VarInt { return result; } - public static void putVarLong(long v, ByteBuffer sink) { + /** + * Encodes a long integer in a variable-length encoding, 7 bits per byte. + * + * @param v the value to encode + * @param byteArrayList the bytes to add the encoded value + */ + public static void putVarLong(long v, ByteArrayList byteArrayList) { while (true) { int bits = ((int) v) & 0x7f; v >>>= 7; if (v == 0) { - sink.put((byte) bits); + byteArrayList.add((byte) bits); return; } - sink.put((byte) (bits | 0x80)); + byteArrayList.add((byte) (bits | 0x80)); } } - - /** - * Encodes a long integer in a variable-length encoding, 7 bits per byte. - * - * @param v the value to encode - * @param outputStream the OutputStream to add the encoded value - */ - public static void putVarLong(long v, OutputStream outputStream) throws IOException { - byte[] bytes = new byte[varLongSize(v)]; - ByteBuffer sink = ByteBuffer.wrap(bytes); - putVarLong(v, sink); - outputStream.write(bytes); - } } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/writer/TileArchiveWriter.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/writer/TileArchiveWriter.java index d2fd7921..fad9f06b 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/writer/TileArchiveWriter.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/writer/TileArchiveWriter.java @@ -125,44 +125,30 @@ public class TileArchiveWriter { ); WorkerPipeline encodeBranch, writeBranch = null; - if (config.emitTilesInOrder()) { - /* - * To emit tiles in order, fork the input queue and send features to both the encoder and writer. The writer - * waits on them to be encoded in the order they were received, and the encoder processes them in parallel. - * One batch might take a long time to process, so make the queues very big to avoid idle encoding CPUs. - */ - WorkQueue writerQueue = new WorkQueue<>("archive_writer_queue", queueSize, 1, stats); - encodeBranch = pipeline - .fromGenerator(secondStageName, next -> { - var writerEnqueuer = writerQueue.threadLocalWriter(); - writer.readFeaturesAndBatch(batch -> { - next.accept(batch); - writerEnqueuer.accept(batch); // also send immediately to writer - }); - writerQueue.close(); - // use only 1 thread since readFeaturesAndBatch needs to be single-threaded - }, 1) - .addBuffer("reader_queue", queueSize) - .sinkTo("encode", processThreads, writer::tileEncoderSink); - // the tile writer will wait on the result of each batch to ensure tiles are written in order - writeBranch = pipeline.readFromQueue(writerQueue) - // use only 1 thread since tileWriter needs to be single-threaded - .sinkTo("write", 1, writer::tileWriter); - } else { - /* - * If we don't need to emit tiles in order, just send the features to the encoder, and when it finishes with - * a tile send that to the writer. - */ - encodeBranch = pipeline + /* + * To emit tiles in order, fork the input queue and send features to both the encoder and writer. The writer + * waits on them to be encoded in the order they were received, and the encoder processes them in parallel. + * One batch might take a long time to process, so make the queues very big to avoid idle encoding CPUs. + */ + WorkQueue writerQueue = new WorkQueue<>("archive_writer_queue", queueSize, 1, stats); + encodeBranch = pipeline + .fromGenerator(secondStageName, next -> { + var writerEnqueuer = writerQueue.threadLocalWriter(); + writer.readFeaturesAndBatch(batch -> { + next.accept(batch); + writerEnqueuer.accept(batch); // also send immediately to writer + }); + writerQueue.close(); // use only 1 thread since readFeaturesAndBatch needs to be single-threaded - .fromGenerator(secondStageName, writer::readFeaturesAndBatch, 1) - .addBuffer("reader_queue", queueSize) - .addWorker("encoder", processThreads, writer::tileEncoder) - .addBuffer("writer_queue", queueSize) - // use only 1 thread since tileWriter needs to be single-threaded - .sinkTo("write", 1, writer::tileWriter); - } + }, 1) + .addBuffer("reader_queue", queueSize) + .sinkTo("encode", processThreads, writer::tileEncoderSink); + + // the tile writer will wait on the result of each batch to ensure tiles are written in order + writeBranch = pipeline.readFromQueue(writerQueue) + // use only 1 thread since tileWriter needs to be single-threaded + .sinkTo("write", 1, writer::tileWriter); var loggers = ProgressLoggers.create() .addRatePercentCounter("features", features.numFeaturesWritten(), writer.featuresProcessed, true) diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/VarIntTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/VarIntTest.java index 31717b5c..067e8db4 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/VarIntTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/VarIntTest.java @@ -2,7 +2,7 @@ package com.onthegomap.planetiler.util; import static org.junit.jupiter.api.Assertions.assertEquals; -import java.io.ByteArrayOutputStream; +import com.carrotsearch.hppc.ByteArrayList; import java.io.IOException; import java.nio.ByteBuffer; import org.junit.jupiter.api.Test; @@ -11,12 +11,12 @@ class VarIntTest { @Test void testRoundTrip() throws IOException { - ByteArrayOutputStream stream = new ByteArrayOutputStream(); - VarInt.putVarLong(0, stream); - VarInt.putVarLong(1, stream); - VarInt.putVarLong(Long.MAX_VALUE, stream); - VarInt.putVarLong(Long.MIN_VALUE, stream); - ByteBuffer output = ByteBuffer.wrap(stream.toByteArray()); + ByteArrayList dir = new ByteArrayList(); + VarInt.putVarLong(0, dir); + VarInt.putVarLong(1, dir); + VarInt.putVarLong(Long.MAX_VALUE, dir); + VarInt.putVarLong(Long.MIN_VALUE, dir); + ByteBuffer output = ByteBuffer.wrap(dir.toArray()); assertEquals(0, VarInt.getVarLong(output)); assertEquals(1, VarInt.getVarLong(output)); assertEquals(Long.MAX_VALUE, VarInt.getVarLong(output)); diff --git a/planetiler-custommap/README.md b/planetiler-custommap/README.md index e38e219c..7da4d729 100644 --- a/planetiler-custommap/README.md +++ b/planetiler-custommap/README.md @@ -153,7 +153,6 @@ cat planetiler-custommap/planetiler.schema.json | jq -r '.properties.args.proper - `render_maxzoom` - Maximum rendering zoom level up to - `skip_mbtiles_index_creation` - Skip adding index to mbtiles file - `optimize_db` - Vacuum analyze mbtiles file after writing -- `emit_tiles_in_order` - Emit vector tiles in index order - `force` - Overwriting output file and ignore warnings - `gzip_temp` - Gzip temporary feature storage (uses more CPU, but less disk space) - `mmap_temp` - Use memory-mapped IO for temp feature files diff --git a/planetiler-custommap/planetiler.schema.json b/planetiler-custommap/planetiler.schema.json index d6faf868..bea2cdd6 100644 --- a/planetiler-custommap/planetiler.schema.json +++ b/planetiler-custommap/planetiler.schema.json @@ -160,17 +160,6 @@ } ] }, - "emit_tiles_in_order": { - "description": "Emit vector tiles in index order", - "anyOf": [ - { - "type": "string" - }, - { - "type": "boolean" - } - ] - }, "force": { "description": "Overwriting output file and ignore warnings", "anyOf": [ diff --git a/planetiler-custommap/src/main/java/com/onthegomap/planetiler/custommap/Contexts.java b/planetiler-custommap/src/main/java/com/onthegomap/planetiler/custommap/Contexts.java index 73a9e74d..1c15de8d 100644 --- a/planetiler-custommap/src/main/java/com/onthegomap/planetiler/custommap/Contexts.java +++ b/planetiler-custommap/src/main/java/com/onthegomap/planetiler/custommap/Contexts.java @@ -189,7 +189,6 @@ public class Contexts { argumentValues.put("render_maxzoom", config.maxzoomForRendering()); argumentValues.put("skip_mbtiles_index_creation", config.skipIndexCreation()); argumentValues.put("optimize_db", config.optimizeDb()); - argumentValues.put("emit_tiles_in_order", config.emitTilesInOrder()); argumentValues.put("force", config.force()); argumentValues.put("gzip_temp", config.gzipTempStorage()); argumentValues.put("mmap_temp", config.mmapTempStorage());