planetiler/planetiler-core/src/main/java/com/onthegomap/planetiler/pmtiles/ReadablePmtiles.java

212 wiersze
7.0 KiB
Java
Czysty Zwykły widok Historia

package com.onthegomap.planetiler.pmtiles;
import com.onthegomap.planetiler.archive.ReadableTileArchive;
2023-09-22 01:44:09 +00:00
import com.onthegomap.planetiler.archive.Tile;
2023-03-18 18:38:04 +00:00
import com.onthegomap.planetiler.archive.TileArchiveMetadata;
import com.onthegomap.planetiler.archive.TileCompression;
import com.onthegomap.planetiler.geo.TileCoord;
import com.onthegomap.planetiler.util.CloseableIterator;
import com.onthegomap.planetiler.util.Gzip;
import java.io.IOException;
2023-03-18 18:38:04 +00:00
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
2023-03-18 18:38:04 +00:00
import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
2023-03-18 18:38:04 +00:00
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.stream.IntStream;
import java.util.stream.Stream;
Add support for "files"-archive (#761) * Add support for "files"-archive i.e. write individual pbf-files to disk in the format <base>/z/x/y.pbf in order to use that format it must be passed as "--ouput=/path/to/tiles?format=files" Fixes #536 * default to files format ...if no explict format query param given, path ends with a slash, or no extension given * output metadata.json in files-archive and refactor TileArchiveMetadata 1. put zoom into center (CoordinateXY->Coordinate) - in sync with mbtiles-format 2. add (De-)Serializer for Coordinate+Envelop => avoid duplication and cleaner 3. change the json and proto output for TileArchiveMetadata to be (more) in sync with mbtiles-format * add support for custom tile scheme in files-archive {z}/{x}/{y}.pbf is the default and can be configured as needed - e.g.: - different order: {x}/{y}/{z}.pbf - with intermediate dirs: {x}/a/{y}/b/{z}.pbf - with different extension: {z}/{y}/{y}.pbf.gz instead of {x} and {y}, {xs} and {xy} can be used which breaks up x and y into 2 directories each and ensures that each directory has <1000 children * fix issues with multiple writers 1. call finish archive only once after all writers are finished ...and not every time a writer finishes 2. log "zoom-progress" for the first tile write only (Finished z11 ... now starting z12) 3. remove file/dir-size progress logger bottleneck for files archive => each archive now reports the bytes written, which also fixes the issues of stream-archives reporting the size incorrectly 4. introduce printStats-hook on archive-level * add async file write support to files archive ...allow to use virtual threads ExecturService (bound only!) for tile writing also add some benchmark for writing tiles to disk: fixed, bound virtual, async, unbound virtual * Revert "add async file write support to files archive" This reverts commit b8cfa56977d98520aa8b62252c3a2726d440afe0. * few improvements - extract TileSchemeEncoding - use Counter.MultithreadCounter rather than LongAdder to count bytes written - add some JavaDoc * simplify files archive usage 1. allow to pass tile scheme directly via output: --output=tiles/{x}/{y}/{z}.pbf 2. auto-encode { (%7B) and } (%7D) => no need to encode it the URI on CLI * few more adjustments according to PR feeback 1. use WriteableTileArchive#bytesWritten in summmary as well 2. call WriteableTileArchive#init in a safer manner ..and a few more adjustments * more PR feedback
2024-01-03 01:37:49 +00:00
import org.locationtech.jts.geom.Coordinate;
public class ReadablePmtiles implements ReadableTileArchive {
private final SeekableByteChannel channel;
private final Pmtiles.Header header;
public ReadablePmtiles(SeekableByteChannel channel) throws IOException {
this.channel = channel;
this.header = Pmtiles.Header.fromBytes(getBytes(0, Pmtiles.HEADER_LEN));
}
2023-03-18 18:38:04 +00:00
public static ReadableTileArchive newReadFromFile(Path path) throws IOException {
return new ReadablePmtiles(FileChannel.open(path, StandardOpenOption.READ));
}
private synchronized byte[] getBytes(long start, int length) throws IOException {
channel.position(start);
var buf = ByteBuffer.allocate(length);
channel.read(buf);
return buf.array();
}
/**
* Finds the relevant entry for a tileId in a list of entries.
* <p>
* If there is an exact match for tileId, return that. Else if the tileId matches an entry's tileId + runLength,
* return that. Else if the preceding entry is a directory (runLength = 0), return that. Else return null.
*/
public static Pmtiles.Entry findTile(List<Pmtiles.Entry> entries, long tileId) {
int m = 0;
int n = entries.size() - 1;
while (m <= n) {
int k = (n + m) >> 1;
long cmp = tileId - entries.get(k).tileId();
if (cmp > 0) {
m = k + 1;
} else if (cmp < 0) {
n = k - 1;
} else {
return entries.get(k);
}
}
if (n >= 0 && (entries.get(n).runLength() == 0 || tileId - entries.get(n).tileId() < entries.get(n).runLength())) {
return entries.get(n);
}
return null;
}
@Override
@SuppressWarnings("java:S1168")
public byte[] getTile(int x, int y, int z) {
try {
var tileId = TileCoord.ofXYZ(x, y, z).hilbertEncoded();
long dirOffset = header.rootDirOffset();
int dirLength = (int) header.rootDirLength();
for (int depth = 0; depth <= 3; depth++) {
byte[] dirBytes = getBytes(dirOffset, dirLength);
if (header.internalCompression() == Pmtiles.Compression.GZIP) {
dirBytes = Gzip.gunzip(dirBytes);
}
var dir = Pmtiles.directoryFromBytes(dirBytes);
var entry = findTile(dir, tileId);
if (entry != null) {
if (entry.runLength() > 0) {
return getBytes(header.tileDataOffset() + entry.offset(), entry.length());
} else {
dirOffset = header.leafDirectoriesOffset() + entry.offset();
dirLength = entry.length();
}
} else {
return null;
}
}
} catch (IOException e) {
throw new IllegalStateException("Could not get tile", e);
}
return null;
}
public Pmtiles.Header getHeader() {
return header;
}
public Pmtiles.JsonMetadata getJsonMetadata() throws IOException {
var buf = getBytes(header.jsonMetadataOffset(), (int) header.jsonMetadataLength());
if (header.internalCompression() == Pmtiles.Compression.GZIP) {
buf = Gzip.gunzip(buf);
}
return Pmtiles.JsonMetadata.fromBytes(buf);
}
2023-03-18 18:38:04 +00:00
@Override
public TileArchiveMetadata metadata() {
TileCompression tileCompression = switch (header.tileCompression()) {
case GZIP -> TileCompression.GZIP;
case NONE -> TileCompression.NONE;
2024-01-10 10:21:03 +00:00
case UNKNOWN -> TileCompression.UNKNOWN;
};
String format = switch (header.tileType()) {
case MVT -> TileArchiveMetadata.MVT_FORMAT;
default -> null;
};
2023-03-18 18:38:04 +00:00
try {
var jsonMetadata = getJsonMetadata();
var map = new LinkedHashMap<>(jsonMetadata.otherMetadata());
return new TileArchiveMetadata(
map.remove(TileArchiveMetadata.NAME_KEY),
map.remove(TileArchiveMetadata.DESCRIPTION_KEY),
map.remove(TileArchiveMetadata.ATTRIBUTION_KEY),
map.remove(TileArchiveMetadata.VERSION_KEY),
map.remove(TileArchiveMetadata.TYPE_KEY),
format,
2023-03-18 18:38:04 +00:00
header.bounds(),
Add support for "files"-archive (#761) * Add support for "files"-archive i.e. write individual pbf-files to disk in the format <base>/z/x/y.pbf in order to use that format it must be passed as "--ouput=/path/to/tiles?format=files" Fixes #536 * default to files format ...if no explict format query param given, path ends with a slash, or no extension given * output metadata.json in files-archive and refactor TileArchiveMetadata 1. put zoom into center (CoordinateXY->Coordinate) - in sync with mbtiles-format 2. add (De-)Serializer for Coordinate+Envelop => avoid duplication and cleaner 3. change the json and proto output for TileArchiveMetadata to be (more) in sync with mbtiles-format * add support for custom tile scheme in files-archive {z}/{x}/{y}.pbf is the default and can be configured as needed - e.g.: - different order: {x}/{y}/{z}.pbf - with intermediate dirs: {x}/a/{y}/b/{z}.pbf - with different extension: {z}/{y}/{y}.pbf.gz instead of {x} and {y}, {xs} and {xy} can be used which breaks up x and y into 2 directories each and ensures that each directory has <1000 children * fix issues with multiple writers 1. call finish archive only once after all writers are finished ...and not every time a writer finishes 2. log "zoom-progress" for the first tile write only (Finished z11 ... now starting z12) 3. remove file/dir-size progress logger bottleneck for files archive => each archive now reports the bytes written, which also fixes the issues of stream-archives reporting the size incorrectly 4. introduce printStats-hook on archive-level * add async file write support to files archive ...allow to use virtual threads ExecturService (bound only!) for tile writing also add some benchmark for writing tiles to disk: fixed, bound virtual, async, unbound virtual * Revert "add async file write support to files archive" This reverts commit b8cfa56977d98520aa8b62252c3a2726d440afe0. * few improvements - extract TileSchemeEncoding - use Counter.MultithreadCounter rather than LongAdder to count bytes written - add some JavaDoc * simplify files archive usage 1. allow to pass tile scheme directly via output: --output=tiles/{x}/{y}/{z}.pbf 2. auto-encode { (%7B) and } (%7D) => no need to encode it the URI on CLI * few more adjustments according to PR feeback 1. use WriteableTileArchive#bytesWritten in summmary as well 2. call WriteableTileArchive#init in a safer manner ..and a few more adjustments * more PR feedback
2024-01-03 01:37:49 +00:00
new Coordinate(
header.center().getX(),
header.center().getY(),
header.centerZoom()
),
2023-03-18 18:38:04 +00:00
(int) header.minZoom(),
(int) header.maxZoom(),
Add support for "files"-archive (#761) * Add support for "files"-archive i.e. write individual pbf-files to disk in the format <base>/z/x/y.pbf in order to use that format it must be passed as "--ouput=/path/to/tiles?format=files" Fixes #536 * default to files format ...if no explict format query param given, path ends with a slash, or no extension given * output metadata.json in files-archive and refactor TileArchiveMetadata 1. put zoom into center (CoordinateXY->Coordinate) - in sync with mbtiles-format 2. add (De-)Serializer for Coordinate+Envelop => avoid duplication and cleaner 3. change the json and proto output for TileArchiveMetadata to be (more) in sync with mbtiles-format * add support for custom tile scheme in files-archive {z}/{x}/{y}.pbf is the default and can be configured as needed - e.g.: - different order: {x}/{y}/{z}.pbf - with intermediate dirs: {x}/a/{y}/b/{z}.pbf - with different extension: {z}/{y}/{y}.pbf.gz instead of {x} and {y}, {xs} and {xy} can be used which breaks up x and y into 2 directories each and ensures that each directory has <1000 children * fix issues with multiple writers 1. call finish archive only once after all writers are finished ...and not every time a writer finishes 2. log "zoom-progress" for the first tile write only (Finished z11 ... now starting z12) 3. remove file/dir-size progress logger bottleneck for files archive => each archive now reports the bytes written, which also fixes the issues of stream-archives reporting the size incorrectly 4. introduce printStats-hook on archive-level * add async file write support to files archive ...allow to use virtual threads ExecturService (bound only!) for tile writing also add some benchmark for writing tiles to disk: fixed, bound virtual, async, unbound virtual * Revert "add async file write support to files archive" This reverts commit b8cfa56977d98520aa8b62252c3a2726d440afe0. * few improvements - extract TileSchemeEncoding - use Counter.MultithreadCounter rather than LongAdder to count bytes written - add some JavaDoc * simplify files archive usage 1. allow to pass tile scheme directly via output: --output=tiles/{x}/{y}/{z}.pbf 2. auto-encode { (%7B) and } (%7D) => no need to encode it the URI on CLI * few more adjustments according to PR feeback 1. use WriteableTileArchive#bytesWritten in summmary as well 2. call WriteableTileArchive#init in a safer manner ..and a few more adjustments * more PR feedback
2024-01-03 01:37:49 +00:00
TileArchiveMetadata.TileArchiveMetadataJson.create(jsonMetadata.vectorLayers()),
map,
tileCompression
2023-03-18 18:38:04 +00:00
);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
2023-09-22 01:44:09 +00:00
private List<Pmtiles.Entry> readDir(long offset, int length) {
try {
var buf = getBytes(offset, length);
if (header.internalCompression() == Pmtiles.Compression.GZIP) {
buf = Gzip.gunzip(buf);
}
return Pmtiles.directoryFromBytes(buf);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
// Warning: this will only work on z15 or less pmtiles which planetiler creates
private Stream<TileCoord> getTileCoords(List<Pmtiles.Entry> dir) {
2023-09-22 01:44:09 +00:00
return dir.stream().flatMap(entry -> entry.runLength() == 0 ?
getTileCoords(readDir(header.leafDirectoriesOffset() + entry.offset(), entry.length())) : IntStream
.range((int) entry.tileId(), (int) entry.tileId() + entry.runLength()).mapToObj(TileCoord::hilbertDecode));
}
private Stream<Tile> getTiles(List<Pmtiles.Entry> dir) {
return dir.stream().mapMulti((entry, next) -> {
try {
2023-09-22 01:44:09 +00:00
if (entry.runLength == 0) {
getTiles(readDir(header.leafDirectoriesOffset() + entry.offset(), entry.length())).forEach(next);
} else {
var data = getBytes(header.tileDataOffset() + entry.offset(), entry.length());
for (int i = 0; i < entry.runLength(); i++) {
next.accept(new Tile(TileCoord.hilbertDecode((int) (entry.tileId() + i)), data));
}
}
} catch (IOException e) {
2023-09-22 01:44:09 +00:00
throw new IllegalStateException("Failed to iterate through pmtiles archive ", e);
}
});
}
@Override
public CloseableIterator<TileCoord> getAllTileCoords() {
2023-09-22 01:44:09 +00:00
List<Pmtiles.Entry> rootDir = readDir(header.rootDirOffset(), (int) header.rootDirLength());
return CloseableIterator.of(getTileCoords(rootDir));
}
@Override
public CloseableIterator<Tile> getAllTiles() {
List<Pmtiles.Entry> rootDir = readDir(header.rootDirOffset(), (int) header.rootDirLength());
return CloseableIterator.of(getTiles(rootDir));
}
@Override
public void close() throws IOException {
channel.close();
}
}