From e7e18d5f174ad0756279cfe54c636d048359ecce Mon Sep 17 00:00:00 2001 From: Michael Barry Date: Tue, 26 Sep 2023 20:56:04 -0400 Subject: [PATCH] Sort merged vector tile features by hilbert order (#673) --- layerstats/README.md | 9 +-- .../onthegomap/planetiler/FeatureMerge.java | 26 +++++++-- .../com/onthegomap/planetiler/VectorTile.java | 55 ++++++++++++++++++- .../planetiler/util/TileSizeStats.java | 10 ++++ .../planetiler/PlanetilerTests.java | 1 + .../onthegomap/planetiler/VectorTileTest.java | 38 +++++++++++++ .../planetiler/util/TileSizeStatsTest.java | 10 ++-- .../util/TilesetSummaryStatisticsTest.java | 28 +++++----- 8 files changed, 149 insertions(+), 28 deletions(-) diff --git a/layerstats/README.md b/layerstats/README.md index dae2eb07..8baf345d 100644 --- a/layerstats/README.md +++ b/layerstats/README.md @@ -24,6 +24,7 @@ The output is a gzipped tsv with a row per layer on each tile and the following | layer | layer name | | layer_bytes | encoded size of this layer on this tile | | layer_features | number of features in this layer | +| layer_geometries | number of geometries in features in this layer, including inside multipoint/multipolygons/multilinestring features | | layer_attr_bytes | encoded size of the [attribute key/value pairs](https://github.com/mapbox/vector-tile-spec/tree/master/2.1#44-feature-attributes) in this layer | | layer_attr_keys | number of distinct attribute keys in this layer on this tile | | layer_attr_values | number of distinct attribute values in this layer on this tile | @@ -42,10 +43,10 @@ Then get the biggest layers: SELECT * FROM layerstats ORDER BY layer_bytes DESC LIMIT 2; ``` -| z | x | y | hilbert | archived_tile_bytes | layer | layer_bytes | layer_features | layer_attr_bytes | layer_attr_keys | layer_attr_values | -|----|-------|------|-----------|---------------------|-------------|-------------|----------------|------------------|-----------------|-------------------| -| 14 | 13722 | 7013 | 305278258 | 1261474 | housenumber | 2412464 | 108384 | 30764 | 1 | 3021 | -| 14 | 13723 | 7014 | 305278256 | 1064044 | housenumber | 1848990 | 83038 | 26022 | 1 | 2542 | +| z | x | y | hilbert | archived_tile_bytes | layer | layer_bytes | layer_features | layer_geometries | layer_attr_bytes | layer_attr_keys | layer_attr_values | +|----|-------|------|-----------|---------------------|-------------|-------------|----------------|------------------|------------------|-----------------|-------------------| +| 14 | 13722 | 7013 | 305278258 | 1260526 | housenumber | 2412589 | 108390 | 108390 | 30764 | 1 | 3021 | +| 14 | 13723 | 7014 | 305278256 | 1059752 | housenumber | 1850041 | 83038 | 83038 | 26022 | 1 | 2542 | To get a table of biggest layers by zoom: diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/FeatureMerge.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/FeatureMerge.java index 56fb3a17..202178d8 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/FeatureMerge.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/FeatureMerge.java @@ -14,6 +14,7 @@ import com.onthegomap.planetiler.stats.Stats; import java.util.ArrayList; import java.util.BitSet; import java.util.Collection; +import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -49,6 +50,9 @@ public class FeatureMerge { private static final Logger LOGGER = LoggerFactory.getLogger(FeatureMerge.class); private static final BufferParameters bufferOps = new BufferParameters(); + // this is slightly faster than Comparator.comparingInt + private static final Comparator> BY_HILBERT_INDEX = + (o1, o2) -> Integer.compare(o1.hilbert, o2.hilbert); static { bufferOps.setJoinStyle(BufferParameters.JOIN_MITRE); @@ -125,9 +129,11 @@ public class FeatureMerge { result.add(feature1); } else { VectorTile.VectorGeometryMerger combined = VectorTile.newMerger(geometryType); - for (var feature : groupedFeatures) { - combined.accept(feature.geometry()); - } + groupedFeatures.stream() + .map(f -> new WithIndex<>(f, f.geometry().hilbertIndex())) + .sorted(BY_HILBERT_INDEX) + .map(d -> d.feature.geometry()) + .forEachOrdered(combined); result.add(feature1.copyWithNewGeometry(combined.finish())); } } @@ -180,7 +186,7 @@ public class FeatureMerge { if (simplified instanceof LineString simpleLineString) { line = simpleLineString; } else { - LOGGER.warn("line string merge simplify emitted " + simplified.getGeometryType()); + LOGGER.warn("line string merge simplify emitted {}", simplified.getGeometryType()); } } if (buffer >= 0) { @@ -191,6 +197,7 @@ public class FeatureMerge { } } if (!outputSegments.isEmpty()) { + outputSegments = sortByHilbertIndex(outputSegments); Geometry newGeometry = GeoUtils.combineLineStrings(outputSegments); result.add(feature1.copyWithNewGeometry(newGeometry)); } @@ -332,6 +339,7 @@ public class FeatureMerge { extractPolygons(merged, outPolygons, minArea, minHoleArea); } if (!outPolygons.isEmpty()) { + outPolygons = sortByHilbertIndex(outPolygons); Geometry combined = GeoUtils.combinePolygons(outPolygons); result.add(feature1.copyWithNewGeometry(combined)); } @@ -339,6 +347,14 @@ public class FeatureMerge { return result; } + private static List sortByHilbertIndex(List geometries) { + return geometries.stream() + .map(p -> new WithIndex<>(p, VectorTile.hilbertIndex(p))) + .sorted(BY_HILBERT_INDEX) + .map(d -> d.feature) + .toList(); + } + public static List mergeNearbyPolygons(List features, double minArea, double minHoleArea, double minDist, double buffer) throws GeometryException { return mergeNearbyPolygons(features, minArea, minHoleArea, minDist, buffer, DefaultStats.get()); @@ -555,4 +571,6 @@ public class FeatureMerge { } return result; } + + private record WithIndex (T feature, int hilbert) {} } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java index 567f9bdc..811b5084 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/VectorTile.java @@ -26,6 +26,7 @@ import com.onthegomap.planetiler.geo.GeoUtils; import com.onthegomap.planetiler.geo.GeometryException; import com.onthegomap.planetiler.geo.GeometryType; import com.onthegomap.planetiler.geo.MutableCoordinateSequence; +import com.onthegomap.planetiler.util.Hilbert; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -38,6 +39,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.concurrent.NotThreadSafe; import org.locationtech.jts.algorithm.Orientation; +import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.CoordinateSequence; import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.geom.GeometryFactory; @@ -419,6 +421,41 @@ public class VectorTile { return new VectorGeometryMerger(geometryType); } + /** + * Returns the hilbert index of the zig-zag-encoded first point of {@code geometry}. + *

+ * This can be useful for sorting geometries to minimize encoded vector tile geometry command size since smaller + * offsets take fewer bytes using protobuf varint encoding. + */ + public static int hilbertIndex(Geometry geometry) { + Coordinate coord = geometry.getCoordinate(); + int x = zigZagEncode((int) Math.round(coord.x * 4096 / 256)); + int y = zigZagEncode((int) Math.round(coord.y * 4096 / 256)); + return Hilbert.hilbertXYToIndex(15, x, y); + } + + /** + * Returns the number of internal geometries in this feature including points/lines/polygons inside multigeometries. + */ + public static int countGeometries(VectorTileProto.Tile.Feature feature) { + int result = 0; + int idx = 0; + int geomCount = feature.getGeometryCount(); + while (idx < geomCount) { + int length = feature.getGeometry(idx); + int command = length & ((1 << 3) - 1); + length = length >> 3; + if (command == Command.MOVE_TO.value) { + result += length; + } + idx += 1; + if (command != Command.CLOSE_PATH.value) { + idx += length * 2; + } + } + return result; + } + /** * Adds features in a layer to this tile. * @@ -587,9 +624,9 @@ public class VectorTile { // the sequence private final GeometryType geometryType; + private final IntArrayList result = new IntArrayList(); private int overallX = 0; private int overallY = 0; - private final IntArrayList result = new IntArrayList(); private VectorGeometryMerger(GeometryType geometryType) { this.geometryType = geometryType; @@ -923,6 +960,22 @@ public class VectorTile { return this; } } + + /** + * Returns the hilbert index of the zig-zag-encoded first point of this feature. + *

+ * This can be useful for sorting geometries to minimize encoded vector tile geometry command size since smaller + * offsets take fewer bytes using protobuf varint encoding. + */ + public int hilbertIndex() { + if (commands.length < 3) { + return 0; + } + int x = commands[1]; + int y = commands[2]; + return Hilbert.hilbertXYToIndex(15, x >> scale, y >> scale); + } + } /** diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java index 9276ef7e..cb3f6d87 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/TileSizeStats.java @@ -8,6 +8,7 @@ import com.fasterxml.jackson.databind.PropertyNamingStrategies; import com.fasterxml.jackson.databind.annotation.JsonNaming; import com.fasterxml.jackson.dataformat.csv.CsvMapper; import com.fasterxml.jackson.dataformat.csv.CsvSchema; +import com.onthegomap.planetiler.VectorTile; import com.onthegomap.planetiler.archive.Tile; import com.onthegomap.planetiler.archive.TileArchiveConfig; import com.onthegomap.planetiler.archive.TileArchives; @@ -175,6 +176,7 @@ public class TileSizeStats { layer.layer, layer.layerBytes, layer.layerFeatures, + layer.layerGeometries, layer.layerAttrBytes, layer.layerAttrKeys, layer.layerAttrValues @@ -220,10 +222,15 @@ public class TileSizeStats { for (var value : layer.getValuesList()) { attrSize += value.getSerializedSize(); } + int geomCount = 0; + for (var feature : layer.getFeaturesList()) { + geomCount += VectorTile.countGeometries(feature); + } result.add(new LayerStats( layer.getName(), layer.getSerializedSize(), layer.getFeaturesCount(), + geomCount, attrSize, layer.getKeysCount(), layer.getValuesCount() @@ -243,6 +250,7 @@ public class TileSizeStats { "layer", "layer_bytes", "layer_features", + "layer_geometries", "layer_attr_bytes", "layer_attr_keys", "layer_attr_values" @@ -257,6 +265,7 @@ public class TileSizeStats { String layer, int layerBytes, int layerFeatures, + int layerGeometries, int layerAttrBytes, int layerAttrKeys, int layerAttrValues @@ -267,6 +276,7 @@ public class TileSizeStats { String layer, int layerBytes, int layerFeatures, + int layerGeometries, int layerAttrBytes, int layerAttrKeys, int layerAttrValues diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java index bc9f9525..d1964b29 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java @@ -2028,6 +2028,7 @@ class PlanetilerTests { "layer", "layer_bytes", "layer_features", + "layer_geometries", "layer_attr_bytes", "layer_attr_keys", "layer_attr_values" diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/VectorTileTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/VectorTileTest.java index 46fea786..84a9f8b4 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/VectorTileTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/VectorTileTest.java @@ -595,6 +595,44 @@ class VectorTileTest { ); } + @TestFactory + Stream testCountInternalGeometries() { + record Case(int expected, Geometry geom) {} + return Stream.of( + new Case(1, newPoint(0, 0)), + new Case(2, newMultiPoint(newPoint(0, 0), newPoint(0, 1))), + new Case(3, newMultiPoint(newPoint(0, 0), newPoint(0, 1), newPoint(0, 2))), + new Case(1, newLineString(0, 0, 1, 1)), + new Case(2, newMultiLineString( + newLineString(0, 0, 1, 1), + newLineString(0, 0, 2, 2) + )), + new Case(3, newMultiLineString( + newLineString(0, 0, 1, 1), + newLineString(0, 0, 2, 2), + newLineString(0, 0, 2, 3) + )), + new Case(1, rectangle(0, 1)), + new Case(2, newMultiPolygon( + rectangle(0, 1), + rectangle(3, 4) + )), + new Case(3, newMultiPolygon( + rectangle(0, 1), + rectangle(3, 4), + rectangle(6, 8) + )) + ).map(test -> dynamicTest(test.toString(), + () -> { + var feature = new VectorTile.Feature( + "layer", 1, VectorTile.encodeGeometry(test.geom), Map.of() + ); + var tile = new VectorTile() + .addLayerFeatures("layer", List.of(feature)); + assertEquals(test.expected, VectorTile.countGeometries(tile.toProto().getLayers(0).getFeatures(0))); + })); + } + private static void assertArrayEquals(int[] a, int[] b) { assertEquals( IntStream.of(a).boxed().toList(), diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java index a35c7adf..538af3df 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TileSizeStatsTest.java @@ -40,8 +40,8 @@ class TileSizeStatsTest { var formatted = TileSizeStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); assertEquals( """ - z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_attr_bytes layer_attr_keys layer_attr_values - 3 1 2 34 999 layer 55 1 18 2 2 + z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_geometries layer_attr_bytes layer_attr_keys layer_attr_values + 3 1 2 34 999 layer 55 1 1 18 2 2 """ .trim(), (TileSizeStats.headerRow() + String.join("", formatted)).trim()); @@ -89,9 +89,9 @@ class TileSizeStatsTest { var formatted = TileSizeStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats); assertEquals( """ - z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_attr_bytes layer_attr_keys layer_attr_values - 3 1 2 34 999 a 72 2 20 2 3 - 3 1 2 34 999 b 19 1 0 0 0 + z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_geometries layer_attr_bytes layer_attr_keys layer_attr_values + 3 1 2 34 999 a 72 2 2 20 2 3 + 3 1 2 34 999 b 19 1 1 0 0 0 """ .trim(), (TileSizeStats.headerRow() + String.join("", formatted)).trim()); diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java index f41b00b3..6e7cc48a 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/TilesetSummaryStatisticsTest.java @@ -15,12 +15,12 @@ class TilesetSummaryStatisticsTest { var updater1 = tileStats.threadLocalUpdater(); var updater2 = tileStats.threadLocalUpdater(); updater1.recordTile(TileCoord.ofXYZ(0, 0, 1), 123, List.of( - new TileSizeStats.LayerStats("a", 1, 2, 3, 4, 5), - new TileSizeStats.LayerStats("b", 6, 7, 8, 9, 10) + new TileSizeStats.LayerStats("a", 1, 2, 2, 3, 4, 5), + new TileSizeStats.LayerStats("b", 6, 7, 7, 8, 9, 10) )); updater2.recordTile(TileCoord.ofXYZ(0, 1, 1), 345, List.of( - new TileSizeStats.LayerStats("b", 1, 2, 3, 4, 5), - new TileSizeStats.LayerStats("c", 6, 7, 8, 9, 10) + new TileSizeStats.LayerStats("b", 1, 2, 2, 3, 4, 5), + new TileSizeStats.LayerStats("c", 6, 7, 7, 8, 9, 10) )); var summary = tileStats.summary(); assertEquals(Set.of("a", "b", "c"), Set.copyOf(summary.layers())); @@ -51,7 +51,7 @@ class TilesetSummaryStatisticsTest { assertEquals(2, summary.get().numTiles()); updater1.recordTile(TileCoord.ofXYZ(0, 0, 2), 0, List.of( - new TileSizeStats.LayerStats("c", 10, 7, 8, 9, 10) + new TileSizeStats.LayerStats("c", 10, 7, 7, 8, 9, 10) )); assertEquals(""" z1 z2 all @@ -101,8 +101,8 @@ class TilesetSummaryStatisticsTest { List summaries = new ArrayList<>(); for (int i = 0; i < 20; i++) { var summary = new TilesetSummaryStatistics.TileSummary(TileCoord.decode(i), i, List.of( - new TileSizeStats.LayerStats("a", i * 2, i, 0, 0, 0), - new TileSizeStats.LayerStats("b", i * 3, i, 0, 0, 0) + new TileSizeStats.LayerStats("a", i * 2, i, i * 2, 0, 0, 0), + new TileSizeStats.LayerStats("b", i * 3, i, i * 2, 0, 0, 0) )); summaries.add(0, summary); (i % 2 == 0 ? updater1 : updater2).recordTile(summary.coord(), summary.archivedSize(), summary.layers()); @@ -140,21 +140,21 @@ class TilesetSummaryStatisticsTest { updater1.recordTile( TileCoord.ofXYZ(0, 0, 0), 100, - List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0)) + List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0, 0)) ); updater2.recordTile( TileCoord.ofXYZ(0, 0, 1), 200, List.of( - new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0), - new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0) + new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0, 0), + new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0, 0) ) ); updater2.recordTile( TileCoord.ofXYZ(0, 0, 2), // no stats 400, List.of( - new TileSizeStats.LayerStats("c", 40, 0, 0, 0, 0) + new TileSizeStats.LayerStats("c", 40, 0, 0, 0, 0, 0) ) ); @@ -189,14 +189,14 @@ class TilesetSummaryStatisticsTest { updater1.recordTile( TileCoord.ofXYZ(0, 0, 0), 100, - List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0)) + List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0, 0)) ); updater2.recordTile( TileCoord.ofXYZ(0, 0, 1), 200, List.of( - new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0), - new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0) + new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0, 0), + new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0, 0) ) );