Speed up VarInt encoding, remove emitTilesInOrder option [#98] (#460)

pull/464/head
Brandon Liu 2023-01-27 21:12:54 +08:00 zatwierdzone przez GitHub
rodzic def8625d0a
commit 88daeb4d0b
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
8 zmienionych plików z 43 dodań i 89 usunięć

Wyświetl plik

@ -2,10 +2,10 @@ package com.onthegomap.planetiler.benchmarks;
import static io.prometheus.client.Collector.NANOSECONDS_PER_SECOND;
import com.carrotsearch.hppc.ByteArrayList;
import com.onthegomap.planetiler.stats.Timer;
import com.onthegomap.planetiler.util.Format;
import com.onthegomap.planetiler.util.VarInt;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
@ -13,10 +13,10 @@ public class BenchmarkVarInt {
public static void main(String[] args) throws IOException {
long num = 80000000;
long num = 100000000;
for (int i = 0; i < 3; i++) {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
ByteArrayList stream = new ByteArrayList();
var timer = Timer.start();
long sum = 0;
@ -26,7 +26,7 @@ public class BenchmarkVarInt {
sum += l;
}
ByteBuffer buf = ByteBuffer.wrap(stream.toByteArray());
ByteBuffer buf = ByteBuffer.wrap(stream.toArray());
long acc = 0;
for (long l = 0; l < num; l++) {

Wyświetl plik

@ -25,7 +25,6 @@ public record PlanetilerConfig(
int maxzoomForRendering,
boolean skipIndexCreation,
boolean optimizeDb,
boolean emitTilesInOrder,
boolean force,
boolean gzipTempStorage,
boolean mmapTempStorage,
@ -125,7 +124,6 @@ public record PlanetilerConfig(
renderMaxzoom,
arguments.getBoolean("skip_mbtiles_index_creation", "skip adding index to mbtiles file", false),
arguments.getBoolean("optimize_db", "Vacuum analyze mbtiles after writing", false),
arguments.getBoolean("emit_tiles_in_order", "emit tiles in index order", true),
arguments.getBoolean("force", "overwriting output file and ignore disk/RAM warnings", false),
arguments.getBoolean("gzip_temp", "gzip temporary feature storage (uses more CPU, but less disk space)", false),
arguments.getBoolean("mmap_temp", "use memory-mapped IO for temp feature files", true),

Wyświetl plik

@ -16,8 +16,7 @@ limitations under the License.
package com.onthegomap.planetiler.util;
import java.io.IOException;
import java.io.OutputStream;
import com.carrotsearch.hppc.ByteArrayList;
import java.nio.ByteBuffer;
/**
@ -29,15 +28,6 @@ import java.nio.ByteBuffer;
public class VarInt {
private VarInt() {}
public static int varLongSize(long v) {
int result = 0;
do {
result++;
v >>>= 7;
} while (v != 0);
return result;
}
/**
* Reads an up to 64 bit long varint from the current position of the given ByteBuffer and returns the decoded value
* as long.
@ -98,28 +88,21 @@ public class VarInt {
return result;
}
public static void putVarLong(long v, ByteBuffer sink) {
/**
* Encodes a long integer in a variable-length encoding, 7 bits per byte.
*
* @param v the value to encode
* @param byteArrayList the bytes to add the encoded value
*/
public static void putVarLong(long v, ByteArrayList byteArrayList) {
while (true) {
int bits = ((int) v) & 0x7f;
v >>>= 7;
if (v == 0) {
sink.put((byte) bits);
byteArrayList.add((byte) bits);
return;
}
sink.put((byte) (bits | 0x80));
byteArrayList.add((byte) (bits | 0x80));
}
}
/**
* Encodes a long integer in a variable-length encoding, 7 bits per byte.
*
* @param v the value to encode
* @param outputStream the OutputStream to add the encoded value
*/
public static void putVarLong(long v, OutputStream outputStream) throws IOException {
byte[] bytes = new byte[varLongSize(v)];
ByteBuffer sink = ByteBuffer.wrap(bytes);
putVarLong(v, sink);
outputStream.write(bytes);
}
}

Wyświetl plik

@ -125,44 +125,30 @@ public class TileArchiveWriter {
);
WorkerPipeline<TileBatch> encodeBranch, writeBranch = null;
if (config.emitTilesInOrder()) {
/*
* To emit tiles in order, fork the input queue and send features to both the encoder and writer. The writer
* waits on them to be encoded in the order they were received, and the encoder processes them in parallel.
* One batch might take a long time to process, so make the queues very big to avoid idle encoding CPUs.
*/
WorkQueue<TileBatch> writerQueue = new WorkQueue<>("archive_writer_queue", queueSize, 1, stats);
encodeBranch = pipeline
.<TileBatch>fromGenerator(secondStageName, next -> {
var writerEnqueuer = writerQueue.threadLocalWriter();
writer.readFeaturesAndBatch(batch -> {
next.accept(batch);
writerEnqueuer.accept(batch); // also send immediately to writer
});
writerQueue.close();
// use only 1 thread since readFeaturesAndBatch needs to be single-threaded
}, 1)
.addBuffer("reader_queue", queueSize)
.sinkTo("encode", processThreads, writer::tileEncoderSink);
// the tile writer will wait on the result of each batch to ensure tiles are written in order
writeBranch = pipeline.readFromQueue(writerQueue)
// use only 1 thread since tileWriter needs to be single-threaded
.sinkTo("write", 1, writer::tileWriter);
} else {
/*
* If we don't need to emit tiles in order, just send the features to the encoder, and when it finishes with
* a tile send that to the writer.
*/
encodeBranch = pipeline
/*
* To emit tiles in order, fork the input queue and send features to both the encoder and writer. The writer
* waits on them to be encoded in the order they were received, and the encoder processes them in parallel.
* One batch might take a long time to process, so make the queues very big to avoid idle encoding CPUs.
*/
WorkQueue<TileBatch> writerQueue = new WorkQueue<>("archive_writer_queue", queueSize, 1, stats);
encodeBranch = pipeline
.<TileBatch>fromGenerator(secondStageName, next -> {
var writerEnqueuer = writerQueue.threadLocalWriter();
writer.readFeaturesAndBatch(batch -> {
next.accept(batch);
writerEnqueuer.accept(batch); // also send immediately to writer
});
writerQueue.close();
// use only 1 thread since readFeaturesAndBatch needs to be single-threaded
.fromGenerator(secondStageName, writer::readFeaturesAndBatch, 1)
.addBuffer("reader_queue", queueSize)
.addWorker("encoder", processThreads, writer::tileEncoder)
.addBuffer("writer_queue", queueSize)
// use only 1 thread since tileWriter needs to be single-threaded
.sinkTo("write", 1, writer::tileWriter);
}
}, 1)
.addBuffer("reader_queue", queueSize)
.sinkTo("encode", processThreads, writer::tileEncoderSink);
// the tile writer will wait on the result of each batch to ensure tiles are written in order
writeBranch = pipeline.readFromQueue(writerQueue)
// use only 1 thread since tileWriter needs to be single-threaded
.sinkTo("write", 1, writer::tileWriter);
var loggers = ProgressLoggers.create()
.addRatePercentCounter("features", features.numFeaturesWritten(), writer.featuresProcessed, true)

Wyświetl plik

@ -2,7 +2,7 @@ package com.onthegomap.planetiler.util;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.ByteArrayOutputStream;
import com.carrotsearch.hppc.ByteArrayList;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.junit.jupiter.api.Test;
@ -11,12 +11,12 @@ class VarIntTest {
@Test
void testRoundTrip() throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
VarInt.putVarLong(0, stream);
VarInt.putVarLong(1, stream);
VarInt.putVarLong(Long.MAX_VALUE, stream);
VarInt.putVarLong(Long.MIN_VALUE, stream);
ByteBuffer output = ByteBuffer.wrap(stream.toByteArray());
ByteArrayList dir = new ByteArrayList();
VarInt.putVarLong(0, dir);
VarInt.putVarLong(1, dir);
VarInt.putVarLong(Long.MAX_VALUE, dir);
VarInt.putVarLong(Long.MIN_VALUE, dir);
ByteBuffer output = ByteBuffer.wrap(dir.toArray());
assertEquals(0, VarInt.getVarLong(output));
assertEquals(1, VarInt.getVarLong(output));
assertEquals(Long.MAX_VALUE, VarInt.getVarLong(output));

Wyświetl plik

@ -153,7 +153,6 @@ cat planetiler-custommap/planetiler.schema.json | jq -r '.properties.args.proper
- `render_maxzoom` - Maximum rendering zoom level up to
- `skip_mbtiles_index_creation` - Skip adding index to mbtiles file
- `optimize_db` - Vacuum analyze mbtiles file after writing
- `emit_tiles_in_order` - Emit vector tiles in index order
- `force` - Overwriting output file and ignore warnings
- `gzip_temp` - Gzip temporary feature storage (uses more CPU, but less disk space)
- `mmap_temp` - Use memory-mapped IO for temp feature files

Wyświetl plik

@ -160,17 +160,6 @@
}
]
},
"emit_tiles_in_order": {
"description": "Emit vector tiles in index order",
"anyOf": [
{
"type": "string"
},
{
"type": "boolean"
}
]
},
"force": {
"description": "Overwriting output file and ignore warnings",
"anyOf": [

Wyświetl plik

@ -189,7 +189,6 @@ public class Contexts {
argumentValues.put("render_maxzoom", config.maxzoomForRendering());
argumentValues.put("skip_mbtiles_index_creation", config.skipIndexCreation());
argumentValues.put("optimize_db", config.optimizeDb());
argumentValues.put("emit_tiles_in_order", config.emitTilesInOrder());
argumentValues.put("force", config.force());
argumentValues.put("gzip_temp", config.gzipTempStorage());
argumentValues.put("mmap_temp", config.mmapTempStorage());