Pmtiles package low level header and directory serialization/deserialization [#98] (#466)

pull/480/head
Brandon Liu 2023-02-08 19:44:04 +08:00 zatwierdzone przez GitHub
rodzic 7cb9b5f670
commit 9bf62d5644
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
3 zmienionych plików z 511 dodań i 0 usunięć

Wyświetl plik

@ -0,0 +1,45 @@
package com.onthegomap.planetiler.benchmarks;
import static io.prometheus.client.Collector.NANOSECONDS_PER_SECOND;
import com.onthegomap.planetiler.pmtiles.Pmtiles;
import com.onthegomap.planetiler.stats.Timer;
import com.onthegomap.planetiler.util.Format;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
public class BenchmarkPmtiles {
public static void main(String[] args) throws IOException {
long num = 60_000_000;
var random = new Random(0);
for (int i = 0; i < 3; i++) {
var entries = new ArrayList<Pmtiles.Entry>();
long offset = 0;
for (int j = 0; j < num; j++) {
int len = 200 + random.nextInt(64000);
entries.add(new Pmtiles.Entry(j, offset, len, 1));
offset += len;
}
var timer = Timer.start();
var result = Pmtiles.deserializeDirectory(Pmtiles.serializeDirectory(entries));
assert (result.size() == entries.size());
System.err.println(
num + " entries took " +
Format.defaultInstance().duration(timer.stop().elapsed().wall()) + " (" +
Format.defaultInstance()
.numeric(num * 1d / (timer.stop().elapsed().wall().toNanos() / NANOSECONDS_PER_SECOND)) +
"/s)"
);
}
}
}

Wyświetl plik

@ -0,0 +1,327 @@
package com.onthegomap.planetiler.pmtiles;
import com.carrotsearch.hppc.ByteArrayList;
import com.onthegomap.planetiler.reader.FileFormatException;
import com.onthegomap.planetiler.util.VarInt;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
/**
* PMTiles is a single-file tile archive format designed for efficient access on cloud storage.
*
* @see <a href="https://github.com/protomaps/PMTiles/blob/main/spec/v3/spec.md">PMTiles Specification</a>
*/
public final class Pmtiles {
static final int HEADER_LEN = 127;
public static final class Entry implements Comparable<Entry> {
private long tileId;
private long offset;
private int length;
private int runLength;
public Entry(long tileId, long offset, int length, int runLength) {
this.tileId = tileId;
this.offset = offset;
this.length = length;
this.runLength = runLength;
}
public long tileId() {
return tileId;
}
public long offset() {
return offset;
}
public long length() {
return length;
}
public long runLength() {
return runLength;
}
@Override
public boolean equals(Object o) {
return this == o || (o instanceof Entry other &&
tileId == other.tileId &&
offset == other.offset &&
length == other.length &&
runLength == other.runLength);
}
@Override
public int hashCode() {
return Objects.hash(tileId, offset, length, runLength);
}
@Override
public int compareTo(Entry that) {
return Long.compare(this.tileId, that.tileId);
}
}
public enum Compression {
UNKNOWN((byte) 0),
NONE((byte) 1),
GZIP((byte) 2);
private final byte value;
Compression(byte value) {
this.value = value;
}
public byte getValue() {
return this.value;
}
public static Compression fromByte(byte b) {
for (var entry : values()) {
if (entry.value == b) {
return entry;
}
}
return UNKNOWN;
}
}
public enum TileType {
UNKNOWN((byte) 0),
MVT((byte) 1);
private final byte value;
TileType(byte value) {
this.value = value;
}
public byte getValue() {
return this.value;
}
public static TileType fromByte(byte b) {
for (var entry : values()) {
if (entry.value == b) {
return entry;
}
}
return UNKNOWN;
}
}
public record Header(
byte specVersion,
long rootDirOffset,
long rootDirLength,
long jsonMetadataOffset,
long jsonMetadataLength,
long leafDirectoriesOffset,
long leafDirectoriesLength,
long tileDataOffset,
long tileDataLength,
long numAddressedTiles,
long numTileEntries,
long numTileContents,
boolean clustered,
Compression internalCompression,
Compression tileCompression,
TileType tileType,
byte minZoom,
byte maxZoom,
int minLonE7, // Store a decimal longitude as a signed 32-bit integer by multiplying by 10,000,000.
int minLatE7,
int maxLonE7,
int maxLatE7,
byte centerZoom,
int centerLonE7,
int centerLatE7) {
public byte[] toBytes() {
ByteBuffer buf = ByteBuffer.allocate(HEADER_LEN).order(ByteOrder.LITTLE_ENDIAN);
String magic = "PMTiles";
buf.put(magic.getBytes(StandardCharsets.UTF_8));
buf.put(specVersion);
buf.putLong(rootDirOffset);
buf.putLong(rootDirLength);
buf.putLong(jsonMetadataOffset);
buf.putLong(jsonMetadataLength);
buf.putLong(leafDirectoriesOffset);
buf.putLong(leafDirectoriesLength);
buf.putLong(tileDataOffset);
buf.putLong(tileDataLength);
buf.putLong(numAddressedTiles);
buf.putLong(numTileEntries);
buf.putLong(numTileContents);
buf.put((byte) (clustered ? 1 : 0));
buf.put(internalCompression.getValue());
buf.put(tileCompression.getValue());
buf.put(tileType.getValue());
buf.put(minZoom);
buf.put(maxZoom);
buf.putInt(minLonE7);
buf.putInt(minLatE7);
buf.putInt(maxLonE7);
buf.putInt(maxLatE7);
buf.put(centerZoom);
buf.putInt(centerLonE7);
buf.putInt(centerLatE7);
return buf.array();
}
public static Header fromBytes(byte[] bytes) {
ByteBuffer buffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN);
byte[] magic = new byte[7];
try {
buffer.get(magic);
if (!(new String(magic, StandardCharsets.UTF_8).equals("PMTiles"))) {
throw new FileFormatException("Incorrect magic number for PMTiles archive.");
}
byte specVersion = buffer.get();
long rootDirOffset = buffer.getLong();
long rootDirLength = buffer.getLong();
long jsonMetadataOffset = buffer.getLong();
long jsonMetadataLength = buffer.getLong();
long leafDirectoriesOffset = buffer.getLong();
long leafDirectoriesLength = buffer.getLong();
long tileDataOffset = buffer.getLong();
long tileDataLength = buffer.getLong();
long numAddressedTiles = buffer.getLong();
long numTileEntries = buffer.getLong();
long numTileContents = buffer.getLong();
boolean clustered = (buffer.get() == 0x1);
Compression internalCompression = Compression.fromByte(buffer.get());
Compression tileCompression = Compression.fromByte(buffer.get());
TileType tileType = TileType.fromByte(buffer.get());
byte minZoom = buffer.get();
byte maxZoom = buffer.get();
int minLonE7 = buffer.getInt();
int minLatE7 = buffer.getInt();
int maxLonE7 = buffer.getInt();
int maxLatE7 = buffer.getInt();
byte centerZoom = buffer.get();
int centerLonE7 = buffer.getInt();
int centerLatE7 = buffer.getInt();
return new Header(
specVersion,
rootDirOffset,
rootDirLength,
jsonMetadataOffset,
jsonMetadataLength,
leafDirectoriesOffset,
leafDirectoriesLength,
tileDataOffset,
tileDataLength,
numAddressedTiles,
numTileEntries,
numTileContents,
clustered,
internalCompression,
tileCompression,
tileType,
minZoom,
maxZoom,
minLonE7,
minLatE7,
maxLonE7,
maxLatE7,
centerZoom,
centerLonE7,
centerLatE7
);
} catch (BufferUnderflowException e) {
throw new FileFormatException("Failed to read enough bytes for PMTiles header.");
}
}
}
/**
* Convert a range of entries from a directory to bytes.
*
* @param slice a list of entries sorted by ascending {@code tileId} with size > 0.
* @param start the start index to serialize, inclusive.
* @param end the end index, exclusive.
* @return the uncompressed bytes of the directory.
*/
public static byte[] serializeDirectory(List<Entry> slice, int start, int end) {
return serializeDirectory(start == 0 && end == slice.size() ? slice : slice.subList(start, end));
}
/**
* Convert a directory of entries to bytes.
*
* @param slice a list of entries sorted by ascending {@code tileId} with size > 0.
* @return the uncompressed bytes of the directory.
*/
public static byte[] serializeDirectory(List<Entry> slice) {
ByteArrayList dir = new ByteArrayList();
VarInt.putVarLong(slice.size(), dir);
long lastId = 0;
for (var entry : slice) {
VarInt.putVarLong(entry.tileId - lastId, dir);
lastId = entry.tileId;
}
for (var entry : slice) {
VarInt.putVarLong(entry.runLength, dir);
}
for (var entry : slice) {
VarInt.putVarLong(entry.length, dir);
}
Pmtiles.Entry last = null;
for (var entry : slice) {
if (last != null && entry.offset == last.offset + last.length) {
VarInt.putVarLong(0, dir);
} else {
VarInt.putVarLong(entry.offset + 1, dir);
}
last = entry;
}
return dir.toArray();
}
public static List<Entry> deserializeDirectory(byte[] bytes) {
ByteBuffer buffer = ByteBuffer.wrap(bytes);
int numEntries = (int) VarInt.getVarLong(buffer);
ArrayList<Entry> result = new ArrayList<>(numEntries);
long lastId = 0;
for (int i = 0; i < numEntries; i++) {
long tileId = lastId + VarInt.getVarLong(buffer);
result.add(new Entry(tileId, 0, 0, 0));
lastId = tileId;
}
for (int i = 0; i < numEntries; i++) {
result.get(i).runLength = (int) VarInt.getVarLong(buffer);
}
for (int i = 0; i < numEntries; i++) {
result.get(i).length = (int) VarInt.getVarLong(buffer);
}
for (int i = 0; i < numEntries; i++) {
long tmp = VarInt.getVarLong(buffer);
if (i > 0 && tmp == 0) {
result.get(i).offset = result.get(i - 1).offset + result.get(i - 1).length;
} else {
result.get(i).offset = tmp - 1;
}
}
return result;
}
}

Wyświetl plik

@ -0,0 +1,139 @@
package com.onthegomap.planetiler.pmtiles;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import com.onthegomap.planetiler.reader.FileFormatException;
import java.util.ArrayList;
import java.util.List;
import org.junit.jupiter.api.Test;
class PmtilesTest {
@Test
void testRoundtripHeader() {
byte specVersion = 3;
long rootDirOffset = 1;
long rootDirLength = 2;
long jsonMetadataOffset = 3;
long jsonMetadataLength = 4;
long leafDirectoriesOffset = 5;
long leafDirectoriesLength = 6;
long tileDataOffset = 7;
long tileDataLength = 8;
long numAddressedTiles = 9;
long numTileEntries = 10;
long numTileContents = 11;
boolean clustered = true;
Pmtiles.Compression internalCompression = Pmtiles.Compression.GZIP;
Pmtiles.Compression tileCompression = Pmtiles.Compression.GZIP;
Pmtiles.TileType tileType = Pmtiles.TileType.MVT;
byte minZoom = 1;
byte maxZoom = 3;
int minLonE7 = -10_000_000;
int minLatE7 = -20_000_000;
int maxLonE7 = 10_000_000;
int maxLatE7 = 20_000_000;
byte centerZoom = 2;
int centerLonE7 = -5_000_000;
int centerLatE7 = -6_000_000;
Pmtiles.Header in = new Pmtiles.Header(
specVersion,
rootDirOffset,
rootDirLength,
jsonMetadataOffset,
jsonMetadataLength,
leafDirectoriesOffset,
leafDirectoriesLength,
tileDataOffset,
tileDataLength,
numAddressedTiles,
numTileEntries,
numTileContents,
clustered,
internalCompression,
tileCompression,
tileType,
minZoom,
maxZoom,
minLonE7,
minLatE7,
maxLonE7,
maxLatE7,
centerZoom,
centerLonE7,
centerLatE7
);
Pmtiles.Header out = Pmtiles.Header.fromBytes(in.toBytes());
assertEquals(specVersion, out.specVersion());
assertEquals(rootDirOffset, out.rootDirOffset());
assertEquals(rootDirLength, out.rootDirLength());
assertEquals(jsonMetadataOffset, out.jsonMetadataOffset());
assertEquals(jsonMetadataLength, out.jsonMetadataLength());
assertEquals(leafDirectoriesOffset, out.leafDirectoriesOffset());
assertEquals(leafDirectoriesLength, out.leafDirectoriesLength());
assertEquals(tileDataOffset, out.tileDataOffset());
assertEquals(tileDataLength, out.tileDataLength());
assertEquals(numAddressedTiles, out.numAddressedTiles());
assertEquals(numTileEntries, out.numTileEntries());
assertEquals(numTileContents, out.numTileContents());
assertEquals(clustered, out.clustered());
assertEquals(internalCompression, out.internalCompression());
assertEquals(tileCompression, out.tileCompression());
assertEquals(tileType, out.tileType());
assertEquals(minZoom, out.minZoom());
assertEquals(maxZoom, out.maxZoom());
assertEquals(minLonE7, out.minLonE7());
assertEquals(minLatE7, out.minLatE7());
assertEquals(maxLonE7, out.maxLonE7());
assertEquals(maxLatE7, out.maxLatE7());
assertEquals(centerZoom, out.centerZoom());
assertEquals(centerLonE7, out.centerLonE7());
assertEquals(centerLatE7, out.centerLatE7());
}
@Test
void testBadHeader() {
assertThrows(FileFormatException.class, () -> Pmtiles.Header.fromBytes(new byte[0]));
assertThrows(FileFormatException.class, () -> Pmtiles.Header.fromBytes(new byte[127]));
}
@Test
void testRoundtripDirectoryMinimal() {
ArrayList<Pmtiles.Entry> in = new ArrayList<>();
in.add(new Pmtiles.Entry(0, 0, 1, 1));
List<Pmtiles.Entry> out = Pmtiles.deserializeDirectory(Pmtiles.serializeDirectory(in));
assertEquals(in, out);
}
@Test
void testRoundtripDirectorySimple() {
ArrayList<Pmtiles.Entry> in = new ArrayList<>();
// make sure there are cases of contiguous entries and non-contiguous entries.
in.add(new Pmtiles.Entry(0, 0, 1, 0));
in.add(new Pmtiles.Entry(1, 1, 1, 1));
in.add(new Pmtiles.Entry(2, 3, 1, 1));
List<Pmtiles.Entry> out = Pmtiles.deserializeDirectory(Pmtiles.serializeDirectory(in));
assertEquals(in, out);
out = Pmtiles.deserializeDirectory(Pmtiles.serializeDirectory(in, 0, in.size()));
assertEquals(in, out);
}
@Test
void testRoundtripDirectorySlice() {
ArrayList<Pmtiles.Entry> in = new ArrayList<>();
// make sure there are cases of contiguous entries and non-contiguous entries.
in.add(new Pmtiles.Entry(0, 0, 1, 0));
in.add(new Pmtiles.Entry(1, 1, 1, 1));
in.add(new Pmtiles.Entry(2, 3, 1, 1));
List<Pmtiles.Entry> out = Pmtiles.deserializeDirectory(Pmtiles.serializeDirectory(in, 1, 2));
assertEquals(1, out.size());
}
}