Sort merged vector tile features by hilbert order (#673)

pull/675/head
Michael Barry 2023-09-26 20:56:04 -04:00 zatwierdzone przez GitHub
rodzic 1b53493ac7
commit e7e18d5f17
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
8 zmienionych plików z 149 dodań i 28 usunięć

Wyświetl plik

@ -24,6 +24,7 @@ The output is a gzipped tsv with a row per layer on each tile and the following
| layer | layer name |
| layer_bytes | encoded size of this layer on this tile |
| layer_features | number of features in this layer |
| layer_geometries | number of geometries in features in this layer, including inside multipoint/multipolygons/multilinestring features |
| layer_attr_bytes | encoded size of the [attribute key/value pairs](https://github.com/mapbox/vector-tile-spec/tree/master/2.1#44-feature-attributes) in this layer |
| layer_attr_keys | number of distinct attribute keys in this layer on this tile |
| layer_attr_values | number of distinct attribute values in this layer on this tile |
@ -42,10 +43,10 @@ Then get the biggest layers:
SELECT * FROM layerstats ORDER BY layer_bytes DESC LIMIT 2;
```
| z | x | y | hilbert | archived_tile_bytes | layer | layer_bytes | layer_features | layer_attr_bytes | layer_attr_keys | layer_attr_values |
|----|-------|------|-----------|---------------------|-------------|-------------|----------------|------------------|-----------------|-------------------|
| 14 | 13722 | 7013 | 305278258 | 1261474 | housenumber | 2412464 | 108384 | 30764 | 1 | 3021 |
| 14 | 13723 | 7014 | 305278256 | 1064044 | housenumber | 1848990 | 83038 | 26022 | 1 | 2542 |
| z | x | y | hilbert | archived_tile_bytes | layer | layer_bytes | layer_features | layer_geometries | layer_attr_bytes | layer_attr_keys | layer_attr_values |
|----|-------|------|-----------|---------------------|-------------|-------------|----------------|------------------|------------------|-----------------|-------------------|
| 14 | 13722 | 7013 | 305278258 | 1260526 | housenumber | 2412589 | 108390 | 108390 | 30764 | 1 | 3021 |
| 14 | 13723 | 7014 | 305278256 | 1059752 | housenumber | 1850041 | 83038 | 83038 | 26022 | 1 | 2542 |
To get a table of biggest layers by zoom:

Wyświetl plik

@ -14,6 +14,7 @@ import com.onthegomap.planetiler.stats.Stats;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@ -49,6 +50,9 @@ public class FeatureMerge {
private static final Logger LOGGER = LoggerFactory.getLogger(FeatureMerge.class);
private static final BufferParameters bufferOps = new BufferParameters();
// this is slightly faster than Comparator.comparingInt
private static final Comparator<WithIndex<?>> BY_HILBERT_INDEX =
(o1, o2) -> Integer.compare(o1.hilbert, o2.hilbert);
static {
bufferOps.setJoinStyle(BufferParameters.JOIN_MITRE);
@ -125,9 +129,11 @@ public class FeatureMerge {
result.add(feature1);
} else {
VectorTile.VectorGeometryMerger combined = VectorTile.newMerger(geometryType);
for (var feature : groupedFeatures) {
combined.accept(feature.geometry());
}
groupedFeatures.stream()
.map(f -> new WithIndex<>(f, f.geometry().hilbertIndex()))
.sorted(BY_HILBERT_INDEX)
.map(d -> d.feature.geometry())
.forEachOrdered(combined);
result.add(feature1.copyWithNewGeometry(combined.finish()));
}
}
@ -180,7 +186,7 @@ public class FeatureMerge {
if (simplified instanceof LineString simpleLineString) {
line = simpleLineString;
} else {
LOGGER.warn("line string merge simplify emitted " + simplified.getGeometryType());
LOGGER.warn("line string merge simplify emitted {}", simplified.getGeometryType());
}
}
if (buffer >= 0) {
@ -191,6 +197,7 @@ public class FeatureMerge {
}
}
if (!outputSegments.isEmpty()) {
outputSegments = sortByHilbertIndex(outputSegments);
Geometry newGeometry = GeoUtils.combineLineStrings(outputSegments);
result.add(feature1.copyWithNewGeometry(newGeometry));
}
@ -332,6 +339,7 @@ public class FeatureMerge {
extractPolygons(merged, outPolygons, minArea, minHoleArea);
}
if (!outPolygons.isEmpty()) {
outPolygons = sortByHilbertIndex(outPolygons);
Geometry combined = GeoUtils.combinePolygons(outPolygons);
result.add(feature1.copyWithNewGeometry(combined));
}
@ -339,6 +347,14 @@ public class FeatureMerge {
return result;
}
private static <G extends Geometry> List<G> sortByHilbertIndex(List<G> geometries) {
return geometries.stream()
.map(p -> new WithIndex<>(p, VectorTile.hilbertIndex(p)))
.sorted(BY_HILBERT_INDEX)
.map(d -> d.feature)
.toList();
}
public static List<VectorTile.Feature> mergeNearbyPolygons(List<VectorTile.Feature> features, double minArea,
double minHoleArea, double minDist, double buffer) throws GeometryException {
return mergeNearbyPolygons(features, minArea, minHoleArea, minDist, buffer, DefaultStats.get());
@ -555,4 +571,6 @@ public class FeatureMerge {
}
return result;
}
private record WithIndex<T> (T feature, int hilbert) {}
}

Wyświetl plik

@ -26,6 +26,7 @@ import com.onthegomap.planetiler.geo.GeoUtils;
import com.onthegomap.planetiler.geo.GeometryException;
import com.onthegomap.planetiler.geo.GeometryType;
import com.onthegomap.planetiler.geo.MutableCoordinateSequence;
import com.onthegomap.planetiler.util.Hilbert;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@ -38,6 +39,7 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.concurrent.NotThreadSafe;
import org.locationtech.jts.algorithm.Orientation;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.CoordinateSequence;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryFactory;
@ -419,6 +421,41 @@ public class VectorTile {
return new VectorGeometryMerger(geometryType);
}
/**
* Returns the hilbert index of the zig-zag-encoded first point of {@code geometry}.
* <p>
* This can be useful for sorting geometries to minimize encoded vector tile geometry command size since smaller
* offsets take fewer bytes using protobuf varint encoding.
*/
public static int hilbertIndex(Geometry geometry) {
Coordinate coord = geometry.getCoordinate();
int x = zigZagEncode((int) Math.round(coord.x * 4096 / 256));
int y = zigZagEncode((int) Math.round(coord.y * 4096 / 256));
return Hilbert.hilbertXYToIndex(15, x, y);
}
/**
* Returns the number of internal geometries in this feature including points/lines/polygons inside multigeometries.
*/
public static int countGeometries(VectorTileProto.Tile.Feature feature) {
int result = 0;
int idx = 0;
int geomCount = feature.getGeometryCount();
while (idx < geomCount) {
int length = feature.getGeometry(idx);
int command = length & ((1 << 3) - 1);
length = length >> 3;
if (command == Command.MOVE_TO.value) {
result += length;
}
idx += 1;
if (command != Command.CLOSE_PATH.value) {
idx += length * 2;
}
}
return result;
}
/**
* Adds features in a layer to this tile.
*
@ -587,9 +624,9 @@ public class VectorTile {
// the sequence
private final GeometryType geometryType;
private final IntArrayList result = new IntArrayList();
private int overallX = 0;
private int overallY = 0;
private final IntArrayList result = new IntArrayList();
private VectorGeometryMerger(GeometryType geometryType) {
this.geometryType = geometryType;
@ -923,6 +960,22 @@ public class VectorTile {
return this;
}
}
/**
* Returns the hilbert index of the zig-zag-encoded first point of this feature.
* <p>
* This can be useful for sorting geometries to minimize encoded vector tile geometry command size since smaller
* offsets take fewer bytes using protobuf varint encoding.
*/
public int hilbertIndex() {
if (commands.length < 3) {
return 0;
}
int x = commands[1];
int y = commands[2];
return Hilbert.hilbertXYToIndex(15, x >> scale, y >> scale);
}
}
/**

Wyświetl plik

@ -8,6 +8,7 @@ import com.fasterxml.jackson.databind.PropertyNamingStrategies;
import com.fasterxml.jackson.databind.annotation.JsonNaming;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import com.onthegomap.planetiler.VectorTile;
import com.onthegomap.planetiler.archive.Tile;
import com.onthegomap.planetiler.archive.TileArchiveConfig;
import com.onthegomap.planetiler.archive.TileArchives;
@ -175,6 +176,7 @@ public class TileSizeStats {
layer.layer,
layer.layerBytes,
layer.layerFeatures,
layer.layerGeometries,
layer.layerAttrBytes,
layer.layerAttrKeys,
layer.layerAttrValues
@ -220,10 +222,15 @@ public class TileSizeStats {
for (var value : layer.getValuesList()) {
attrSize += value.getSerializedSize();
}
int geomCount = 0;
for (var feature : layer.getFeaturesList()) {
geomCount += VectorTile.countGeometries(feature);
}
result.add(new LayerStats(
layer.getName(),
layer.getSerializedSize(),
layer.getFeaturesCount(),
geomCount,
attrSize,
layer.getKeysCount(),
layer.getValuesCount()
@ -243,6 +250,7 @@ public class TileSizeStats {
"layer",
"layer_bytes",
"layer_features",
"layer_geometries",
"layer_attr_bytes",
"layer_attr_keys",
"layer_attr_values"
@ -257,6 +265,7 @@ public class TileSizeStats {
String layer,
int layerBytes,
int layerFeatures,
int layerGeometries,
int layerAttrBytes,
int layerAttrKeys,
int layerAttrValues
@ -267,6 +276,7 @@ public class TileSizeStats {
String layer,
int layerBytes,
int layerFeatures,
int layerGeometries,
int layerAttrBytes,
int layerAttrKeys,
int layerAttrValues

Wyświetl plik

@ -2028,6 +2028,7 @@ class PlanetilerTests {
"layer",
"layer_bytes",
"layer_features",
"layer_geometries",
"layer_attr_bytes",
"layer_attr_keys",
"layer_attr_values"

Wyświetl plik

@ -595,6 +595,44 @@ class VectorTileTest {
);
}
@TestFactory
Stream<DynamicTest> testCountInternalGeometries() {
record Case(int expected, Geometry geom) {}
return Stream.of(
new Case(1, newPoint(0, 0)),
new Case(2, newMultiPoint(newPoint(0, 0), newPoint(0, 1))),
new Case(3, newMultiPoint(newPoint(0, 0), newPoint(0, 1), newPoint(0, 2))),
new Case(1, newLineString(0, 0, 1, 1)),
new Case(2, newMultiLineString(
newLineString(0, 0, 1, 1),
newLineString(0, 0, 2, 2)
)),
new Case(3, newMultiLineString(
newLineString(0, 0, 1, 1),
newLineString(0, 0, 2, 2),
newLineString(0, 0, 2, 3)
)),
new Case(1, rectangle(0, 1)),
new Case(2, newMultiPolygon(
rectangle(0, 1),
rectangle(3, 4)
)),
new Case(3, newMultiPolygon(
rectangle(0, 1),
rectangle(3, 4),
rectangle(6, 8)
))
).map(test -> dynamicTest(test.toString(),
() -> {
var feature = new VectorTile.Feature(
"layer", 1, VectorTile.encodeGeometry(test.geom), Map.of()
);
var tile = new VectorTile()
.addLayerFeatures("layer", List.of(feature));
assertEquals(test.expected, VectorTile.countGeometries(tile.toProto().getLayers(0).getFeatures(0)));
}));
}
private static void assertArrayEquals(int[] a, int[] b) {
assertEquals(
IntStream.of(a).boxed().toList(),

Wyświetl plik

@ -40,8 +40,8 @@ class TileSizeStatsTest {
var formatted = TileSizeStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats);
assertEquals(
"""
z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_attr_bytes layer_attr_keys layer_attr_values
3 1 2 34 999 layer 55 1 18 2 2
z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_geometries layer_attr_bytes layer_attr_keys layer_attr_values
3 1 2 34 999 layer 55 1 1 18 2 2
"""
.trim(),
(TileSizeStats.headerRow() + String.join("", formatted)).trim());
@ -89,9 +89,9 @@ class TileSizeStatsTest {
var formatted = TileSizeStats.formatOutputRows(TileCoord.ofXYZ(1, 2, 3), 999, stats);
assertEquals(
"""
z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_attr_bytes layer_attr_keys layer_attr_values
3 1 2 34 999 a 72 2 20 2 3
3 1 2 34 999 b 19 1 0 0 0
z x y hilbert archived_tile_bytes layer layer_bytes layer_features layer_geometries layer_attr_bytes layer_attr_keys layer_attr_values
3 1 2 34 999 a 72 2 2 20 2 3
3 1 2 34 999 b 19 1 1 0 0 0
"""
.trim(),
(TileSizeStats.headerRow() + String.join("", formatted)).trim());

Wyświetl plik

@ -15,12 +15,12 @@ class TilesetSummaryStatisticsTest {
var updater1 = tileStats.threadLocalUpdater();
var updater2 = tileStats.threadLocalUpdater();
updater1.recordTile(TileCoord.ofXYZ(0, 0, 1), 123, List.of(
new TileSizeStats.LayerStats("a", 1, 2, 3, 4, 5),
new TileSizeStats.LayerStats("b", 6, 7, 8, 9, 10)
new TileSizeStats.LayerStats("a", 1, 2, 2, 3, 4, 5),
new TileSizeStats.LayerStats("b", 6, 7, 7, 8, 9, 10)
));
updater2.recordTile(TileCoord.ofXYZ(0, 1, 1), 345, List.of(
new TileSizeStats.LayerStats("b", 1, 2, 3, 4, 5),
new TileSizeStats.LayerStats("c", 6, 7, 8, 9, 10)
new TileSizeStats.LayerStats("b", 1, 2, 2, 3, 4, 5),
new TileSizeStats.LayerStats("c", 6, 7, 7, 8, 9, 10)
));
var summary = tileStats.summary();
assertEquals(Set.of("a", "b", "c"), Set.copyOf(summary.layers()));
@ -51,7 +51,7 @@ class TilesetSummaryStatisticsTest {
assertEquals(2, summary.get().numTiles());
updater1.recordTile(TileCoord.ofXYZ(0, 0, 2), 0, List.of(
new TileSizeStats.LayerStats("c", 10, 7, 8, 9, 10)
new TileSizeStats.LayerStats("c", 10, 7, 7, 8, 9, 10)
));
assertEquals("""
z1 z2 all
@ -101,8 +101,8 @@ class TilesetSummaryStatisticsTest {
List<TilesetSummaryStatistics.TileSummary> summaries = new ArrayList<>();
for (int i = 0; i < 20; i++) {
var summary = new TilesetSummaryStatistics.TileSummary(TileCoord.decode(i), i, List.of(
new TileSizeStats.LayerStats("a", i * 2, i, 0, 0, 0),
new TileSizeStats.LayerStats("b", i * 3, i, 0, 0, 0)
new TileSizeStats.LayerStats("a", i * 2, i, i * 2, 0, 0, 0),
new TileSizeStats.LayerStats("b", i * 3, i, i * 2, 0, 0, 0)
));
summaries.add(0, summary);
(i % 2 == 0 ? updater1 : updater2).recordTile(summary.coord(), summary.archivedSize(), summary.layers());
@ -140,21 +140,21 @@ class TilesetSummaryStatisticsTest {
updater1.recordTile(
TileCoord.ofXYZ(0, 0, 0),
100,
List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0))
List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0, 0))
);
updater2.recordTile(
TileCoord.ofXYZ(0, 0, 1),
200,
List.of(
new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0),
new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0)
new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0, 0),
new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0, 0)
)
);
updater2.recordTile(
TileCoord.ofXYZ(0, 0, 2), // no stats
400,
List.of(
new TileSizeStats.LayerStats("c", 40, 0, 0, 0, 0)
new TileSizeStats.LayerStats("c", 40, 0, 0, 0, 0, 0)
)
);
@ -189,14 +189,14 @@ class TilesetSummaryStatisticsTest {
updater1.recordTile(
TileCoord.ofXYZ(0, 0, 0),
100,
List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0))
List.of(new TileSizeStats.LayerStats("a", 10, 0, 0, 0, 0, 0))
);
updater2.recordTile(
TileCoord.ofXYZ(0, 0, 1),
200,
List.of(
new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0),
new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0)
new TileSizeStats.LayerStats("a", 20, 0, 0, 0, 0, 0),
new TileSizeStats.LayerStats("b", 30, 0, 0, 0, 0, 0)
)
);