kopia lustrzana https://github.com/onthegomap/planetiler
440 wiersze
16 KiB
Java
440 wiersze
16 KiB
Java
|
package com.onthegomap.planetiler.util;
|
||
|
|
||
|
import com.onthegomap.planetiler.config.PlanetilerConfig;
|
||
|
import com.onthegomap.planetiler.geo.TileCoord;
|
||
|
import java.util.Comparator;
|
||
|
import java.util.HashMap;
|
||
|
import java.util.List;
|
||
|
import java.util.LongSummaryStatistics;
|
||
|
import java.util.Map;
|
||
|
import java.util.PriorityQueue;
|
||
|
import java.util.concurrent.CopyOnWriteArrayList;
|
||
|
import java.util.function.Function;
|
||
|
import java.util.stream.Collectors;
|
||
|
import java.util.stream.IntStream;
|
||
|
import org.slf4j.Logger;
|
||
|
import org.slf4j.LoggerFactory;
|
||
|
|
||
|
/**
|
||
|
* Utility that computes min/max/average sizes for each vector tile layers at each zoom level, then computes combined
|
||
|
* summary statistics at the end.
|
||
|
* <p>
|
||
|
* Provide a {@link TileWeights} instance to compute weighted average tile sizes based on actual tile traffic.
|
||
|
*/
|
||
|
public class TilesetSummaryStatistics {
|
||
|
|
||
|
private static final int TOP_N_TILES = 10;
|
||
|
private static final int WARN_BYTES = 100_000;
|
||
|
private static final int ERROR_BYTES = 500_000;
|
||
|
private static final Logger LOGGER = LoggerFactory.getLogger(TilesetSummaryStatistics.class);
|
||
|
private final TileWeights tileWeights;
|
||
|
|
||
|
// instead of threads updating concurrent data structures, each thread gets a thread-local
|
||
|
// Summary instance it can update without contention that are combined at the end.
|
||
|
private final List<Summary> summaries = new CopyOnWriteArrayList<>();
|
||
|
|
||
|
public TilesetSummaryStatistics(TileWeights tileWeights) {
|
||
|
this.tileWeights = tileWeights;
|
||
|
}
|
||
|
|
||
|
public TilesetSummaryStatistics() {
|
||
|
this(new TileWeights());
|
||
|
}
|
||
|
|
||
|
private static String tileBiggestLayers(Format formatter, TileSummary tile) {
|
||
|
int minSize = tile.layers.stream().mapToInt(l -> l.layerBytes()).max().orElse(0);
|
||
|
return tile.layers.stream()
|
||
|
.filter(d -> d.layerBytes() >= minSize)
|
||
|
.sorted(Comparator.comparingInt(d -> -d.layerBytes()))
|
||
|
.map(d -> d.layer() + ":" + formatter.storage(d.layerBytes()))
|
||
|
.collect(Collectors.joining(", "));
|
||
|
}
|
||
|
|
||
|
/** Returns a combined {@link Summary} from each thread's {@link Updater}. */
|
||
|
public Summary summary() {
|
||
|
return summaries.stream().reduce(new Summary(), Summary::mergeIn);
|
||
|
}
|
||
|
|
||
|
/** Logs biggest tiles, max layer size by zoom, and weighted average tile sizes. */
|
||
|
@SuppressWarnings("java:S2629")
|
||
|
public void printStats(String debugUrlPattern) {
|
||
|
LOGGER.debug("Tile stats:");
|
||
|
Summary result = summary();
|
||
|
var overallStats = result.get();
|
||
|
var formatter = Format.defaultInstance();
|
||
|
LOGGER.debug("Biggest tiles (gzipped)\n{}", overallStats.formatBiggestTiles(debugUrlPattern));
|
||
|
var alreadyListed = overallStats.biggestTiles().stream()
|
||
|
.map(TileSummary::coord)
|
||
|
.collect(Collectors.toSet());
|
||
|
var otherTiles = result.layers().stream()
|
||
|
.flatMap(layer -> result.get(layer).biggestTiles().stream().limit(1))
|
||
|
.filter(tile -> !alreadyListed.contains(tile.coord) && tile.archivedSize > WARN_BYTES)
|
||
|
.toList();
|
||
|
if (!otherTiles.isEmpty()) {
|
||
|
LOGGER.info("Other tiles with large layers\n{}",
|
||
|
otherTiles.stream()
|
||
|
.map(tile -> "%d/%d/%d (%s) %s (%s)".formatted(
|
||
|
tile.coord.z(),
|
||
|
tile.coord.x(),
|
||
|
tile.coord.y(),
|
||
|
formatter.storage(tile.archivedSize),
|
||
|
tile.coord.getDebugUrl(debugUrlPattern),
|
||
|
tileBiggestLayers(formatter, tile)
|
||
|
)).collect(Collectors.joining("\n")));
|
||
|
}
|
||
|
|
||
|
LOGGER.debug("Max tile sizes\n{}\n{}\n{}",
|
||
|
result.formatTable(n -> {
|
||
|
String string = " " + formatter.storage(n, true);
|
||
|
return n.intValue() > ERROR_BYTES ? AnsiColors.red(string) :
|
||
|
n.intValue() > WARN_BYTES ? AnsiColors.yellow(string) :
|
||
|
string;
|
||
|
}, Cell::maxSize),
|
||
|
result.formatRow("full tile",
|
||
|
formatter::storage,
|
||
|
z -> result.get(z).maxSize(),
|
||
|
result.get().maxSize()
|
||
|
),
|
||
|
result.formatRow("gzipped",
|
||
|
formatter::storage,
|
||
|
z -> result.get(z).maxArchivedSize(),
|
||
|
result.get().maxArchivedSize()
|
||
|
)
|
||
|
);
|
||
|
LOGGER.debug(" Max tile: {} (gzipped: {})",
|
||
|
formatter.storage(overallStats.maxSize()),
|
||
|
formatter.storage(overallStats.maxArchivedSize()));
|
||
|
LOGGER.debug(" Avg tile: {} (gzipped: {}) {}",
|
||
|
formatter.storage(overallStats.weightedAverageSize()),
|
||
|
formatter.storage(overallStats.weightedAverageArchivedSize()),
|
||
|
tileWeights.isEmpty() ?
|
||
|
"no tile weights, use --download-osm-tile-weights for weighted average" :
|
||
|
"using weighted average based on OSM traffic");
|
||
|
LOGGER.debug(" # tiles: {}", formatter.integer(overallStats.numTiles()));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns an {@link Updater} that accepts individual tile layer stats from a thread that will eventually be combined
|
||
|
* into the final tileset report.
|
||
|
*/
|
||
|
public Updater threadLocalUpdater() {
|
||
|
return new Updater();
|
||
|
}
|
||
|
|
||
|
/** Aggregated statistics for a layer/zoom, layer, zoom, or entire tileset. */
|
||
|
public static class Cell {
|
||
|
private final LongSummaryStatistics archivedBytes = new LongSummaryStatistics();
|
||
|
private final LongSummaryStatistics bytes = new LongSummaryStatistics();
|
||
|
private final PriorityQueue<TileSummary> topTiles = new PriorityQueue<>();
|
||
|
private long weightedBytesSum;
|
||
|
private long weightedArchivedBytesSum;
|
||
|
private long totalWeight;
|
||
|
private int bigTileCutoff = 0;
|
||
|
|
||
|
private static Cell combine(Cell a, Cell b) {
|
||
|
return new Cell().mergeIn(a).mergeIn(b);
|
||
|
}
|
||
|
|
||
|
/** Max raw layer bytes (or tile size when aggregated over all layers). */
|
||
|
public long maxSize() {
|
||
|
return Math.max(0, bytes.getMax());
|
||
|
}
|
||
|
|
||
|
/** Max gzipped tile bytes (or 0 when broken-out by layer). */
|
||
|
public long maxArchivedSize() {
|
||
|
return Math.max(0, archivedBytes.getMax());
|
||
|
}
|
||
|
|
||
|
/** Total tiles included in this aggregation. */
|
||
|
public long numTiles() {
|
||
|
return bytes.getCount();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns the biggest tiles in this aggregation by gzipped size (when aggregated over all layers) or raw size
|
||
|
* within an individual layer.
|
||
|
*/
|
||
|
public List<TileSummary> biggestTiles() {
|
||
|
return topTiles.stream().sorted(Comparator.comparingLong(s -> -s.archivedSize)).toList();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns average gzipped tile size in this aggregation, weighted by the {@link TileWeights} instance provided.
|
||
|
* <p>
|
||
|
* When multiple zoom-levels are combined, the weighted average respects the weight-per-zoom-level from
|
||
|
* {@link TileWeights} so that low zoom tiles are not overweighted when analyzing a small extract.
|
||
|
*/
|
||
|
public double weightedAverageArchivedSize() {
|
||
|
return totalWeight == 0 ? archivedBytes.getAverage() : (weightedArchivedBytesSum * 1d / totalWeight);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns average raw (not gzipped) tile size in this aggregation, weighted by the {@link TileWeights} instance
|
||
|
* provided.
|
||
|
*
|
||
|
* @see #weightedAverageArchivedSize()
|
||
|
*/
|
||
|
public double weightedAverageSize() {
|
||
|
return totalWeight == 0 ? bytes.getAverage() : (weightedBytesSum * 1d / totalWeight);
|
||
|
}
|
||
|
|
||
|
private Cell mergeIn(Cell other) {
|
||
|
return mergeIn(other, 1);
|
||
|
}
|
||
|
|
||
|
private Cell mergeIn(Cell other, double weight) {
|
||
|
totalWeight += other.totalWeight * weight;
|
||
|
weightedBytesSum += other.weightedBytesSum * weight;
|
||
|
weightedArchivedBytesSum += other.weightedArchivedBytesSum * weight;
|
||
|
archivedBytes.combine(other.archivedBytes);
|
||
|
bytes.combine(other.bytes);
|
||
|
for (var bigTile : other.topTiles) {
|
||
|
acceptBigTile(bigTile.coord, bigTile.archivedSize, bigTile.layers);
|
||
|
}
|
||
|
return this;
|
||
|
}
|
||
|
|
||
|
private void acceptBigTile(TileCoord coord, int archivedBytes, List<TileSizeStats.LayerStats> layerStats) {
|
||
|
if (archivedBytes >= bigTileCutoff) {
|
||
|
topTiles.offer(new TileSummary(coord, archivedBytes, layerStats));
|
||
|
while (topTiles.size() > TOP_N_TILES) {
|
||
|
topTiles.poll();
|
||
|
var min = topTiles.peek();
|
||
|
if (min != null) {
|
||
|
bigTileCutoff = min.archivedSize();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
String formatBiggestTiles(String debugUrlPattern) {
|
||
|
var biggestTiles = biggestTiles();
|
||
|
var formatter = Format.defaultInstance();
|
||
|
return IntStream.range(0, biggestTiles.size())
|
||
|
.mapToObj(index -> {
|
||
|
var tile = biggestTiles.get(index);
|
||
|
return "%d. %d/%d/%d (%s) %s (%s)".formatted(
|
||
|
index + 1,
|
||
|
tile.coord.z(),
|
||
|
tile.coord.x(),
|
||
|
tile.coord.y(),
|
||
|
formatter.storage(tile.archivedSize),
|
||
|
tile.coord.getDebugUrl(debugUrlPattern),
|
||
|
tileBiggestLayers(formatter, tile)
|
||
|
);
|
||
|
}).collect(Collectors.joining("\n"));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** Statistics for a tile and its layers. */
|
||
|
public record TileSummary(TileCoord coord, int archivedSize, List<TileSizeStats.LayerStats> layers)
|
||
|
implements Comparable<TileSummary> {
|
||
|
|
||
|
@Override
|
||
|
public int compareTo(TileSummary o) {
|
||
|
int result = Integer.compare(archivedSize, o.archivedSize);
|
||
|
if (result == 0) {
|
||
|
result = Integer.compare(coord.encoded(), o.coord.encoded());
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
TileSummary withSize(int newSize) {
|
||
|
return new TileSummary(coord, newSize, layers);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** Overall summary statistics for a tileset, aggregated from all {@link Updater Updaters}. */
|
||
|
public class Summary {
|
||
|
|
||
|
private final List<Cell> byTile =
|
||
|
IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM)
|
||
|
.mapToObj(i -> new Cell())
|
||
|
.toList();
|
||
|
|
||
|
private final List<Map<String, Cell>> byLayer =
|
||
|
IntStream.rangeClosed(PlanetilerConfig.MIN_MINZOOM, PlanetilerConfig.MAX_MAXZOOM)
|
||
|
.<Map<String, Cell>>mapToObj(i -> new HashMap<>())
|
||
|
.toList();
|
||
|
|
||
|
/** All the layers that appear in the tileset. */
|
||
|
public List<String> layers() {
|
||
|
return byLayer.stream().flatMap(e -> e.keySet().stream()).distinct().sorted().toList();
|
||
|
}
|
||
|
|
||
|
/** Returns the summary statistics for a layer at a zoom level. */
|
||
|
public Cell get(int z, String layer) {
|
||
|
return byLayer.get(z).getOrDefault(layer, new Cell());
|
||
|
}
|
||
|
|
||
|
/** Returns the summary statistics for a layer from all zoom levels. */
|
||
|
public Cell get(String layer) {
|
||
|
return combineZooms(byLayer.stream()
|
||
|
.map(e -> e.getOrDefault(layer, new Cell()))
|
||
|
.toList());
|
||
|
}
|
||
|
|
||
|
/** Returns the summary statistics for a zoom level from all layers. */
|
||
|
public Cell get(int z) {
|
||
|
return byTile.get(z);
|
||
|
}
|
||
|
|
||
|
/** Returns the summary statistics for the entire dataset by aggregating all layers and zoom-levels. */
|
||
|
public Cell get() {
|
||
|
return combineZooms(byTile);
|
||
|
}
|
||
|
|
||
|
/** Returns the minimum zoom a tile appears at in the tileset. */
|
||
|
public int minZoomWithData() {
|
||
|
return IntStream.range(0, byTile.size())
|
||
|
.filter(i -> byTile.get(i).numTiles() > 0)
|
||
|
.min()
|
||
|
.orElse(PlanetilerConfig.MAX_MAXZOOM);
|
||
|
}
|
||
|
|
||
|
/** Returns the maximum zoom a tile appears at in the tileset. */
|
||
|
public int maxZoomWithData() {
|
||
|
return IntStream.range(0, byTile.size())
|
||
|
.filter(i -> byTile.get(i).numTiles() > 0)
|
||
|
.max()
|
||
|
.orElse(PlanetilerConfig.MAX_MAXZOOM);
|
||
|
}
|
||
|
|
||
|
/** Returns the minimum zoom a specific layer appears at in the tileset. */
|
||
|
public int minZoomWithData(String layer) {
|
||
|
return IntStream.range(0, byLayer.size())
|
||
|
.filter(i -> byLayer.get(i).containsKey(layer))
|
||
|
.min()
|
||
|
.orElse(PlanetilerConfig.MAX_MAXZOOM);
|
||
|
}
|
||
|
|
||
|
private Summary mergeIn(Summary other) {
|
||
|
for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) {
|
||
|
byTile.get(z).mergeIn(other.byTile.get(z));
|
||
|
}
|
||
|
for (int z = PlanetilerConfig.MIN_MINZOOM; z <= PlanetilerConfig.MAX_MAXZOOM; z++) {
|
||
|
var ourMap = byLayer.get(z);
|
||
|
var theirMap = other.byLayer.get(z);
|
||
|
theirMap.forEach((layer, stats) -> ourMap.merge(layer, stats, Cell::combine));
|
||
|
}
|
||
|
return this;
|
||
|
}
|
||
|
|
||
|
private Cell combineZooms(List<Cell> byTile) {
|
||
|
// aggregate Cells over zoom levels, but respect the global zoom-level weights
|
||
|
// from TileWeights
|
||
|
double sumWeight = 0;
|
||
|
double preSumWeight = 0;
|
||
|
for (int z = 0; z < byTile.size(); z++) {
|
||
|
var cell = byTile.get(z);
|
||
|
long zoomWeight = tileWeights.getZoomWeight(z);
|
||
|
if (cell.numTiles() > 0 && zoomWeight > 0) {
|
||
|
sumWeight += zoomWeight;
|
||
|
preSumWeight += cell.totalWeight;
|
||
|
}
|
||
|
}
|
||
|
boolean noData = sumWeight == 0 || preSumWeight == 0;
|
||
|
Cell result = new Cell();
|
||
|
for (int z = 0; z < byTile.size(); z++) {
|
||
|
var cell = byTile.get(z);
|
||
|
long zoomWeight = tileWeights.getZoomWeight(z);
|
||
|
if ((cell.numTiles() > 0 && zoomWeight > 0) || noData) {
|
||
|
double weight = noData ? 1 : (zoomWeight / sumWeight) / (cell.totalWeight / preSumWeight);
|
||
|
result.mergeIn(cell, weight);
|
||
|
}
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
String formatRow(
|
||
|
String firstColumn,
|
||
|
Function<Number, String> formatter,
|
||
|
Function<Integer, Number> extractCells,
|
||
|
Number lastColumn
|
||
|
) {
|
||
|
return formatRow(firstColumn, extractCells.andThen(formatter), formatter.apply(lastColumn));
|
||
|
}
|
||
|
|
||
|
String formatRow(
|
||
|
String firstColumn,
|
||
|
Function<Integer, String> extractStat,
|
||
|
String lastColumn
|
||
|
) {
|
||
|
StringBuilder builder = new StringBuilder();
|
||
|
int minZoom = minZoomWithData();
|
||
|
int maxZoom = maxZoomWithData();
|
||
|
List<String> layers = layers().stream()
|
||
|
.sorted(Comparator.comparingInt(this::minZoomWithData))
|
||
|
.toList();
|
||
|
int maxLayerLength = Math.max(9, layers.stream().mapToInt(String::length).max().orElse(0));
|
||
|
String cellFormat = "%1$5s";
|
||
|
String layerFormat = "%1$" + maxLayerLength + "s";
|
||
|
|
||
|
builder.append(layerFormat.formatted(firstColumn));
|
||
|
for (int z = minZoom; z <= maxZoom; z++) {
|
||
|
builder.append(cellFormat.formatted(extractStat.apply(z)));
|
||
|
builder.append(' ');
|
||
|
}
|
||
|
builder.append(cellFormat.formatted(lastColumn));
|
||
|
return builder.toString();
|
||
|
}
|
||
|
|
||
|
String formatTable(Function<Number, String> formatter,
|
||
|
Function<Cell, Number> extractStat) {
|
||
|
StringBuilder builder = new StringBuilder();
|
||
|
List<String> layers = layers().stream()
|
||
|
.sorted(Comparator.comparingInt(this::minZoomWithData))
|
||
|
.toList();
|
||
|
|
||
|
// header: 0 1 2 3 4 ... 15
|
||
|
builder.append(formatRow("", z -> "z" + z, "all")).append('\n');
|
||
|
|
||
|
// each row: layer
|
||
|
for (var layer : layers) {
|
||
|
builder.append(formatRow(
|
||
|
layer,
|
||
|
formatter,
|
||
|
z -> extractStat.apply(get(z, layer)),
|
||
|
extractStat.apply(get(layer))
|
||
|
)).append('\n');
|
||
|
}
|
||
|
return builder.toString().stripTrailing();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** Thread local updater that accepts individual statistics for each tile. */
|
||
|
public class Updater {
|
||
|
private final Summary summary = new Summary();
|
||
|
|
||
|
private Updater() {
|
||
|
summaries.add(summary);
|
||
|
}
|
||
|
|
||
|
private static Cell newCell(String layer) {
|
||
|
return new Cell();
|
||
|
}
|
||
|
|
||
|
public void recordTile(TileCoord coord, int archivedBytes, List<TileSizeStats.LayerStats> layerStats) {
|
||
|
var tileStat = summary.byTile.get(coord.z());
|
||
|
var layerStat = summary.byLayer.get(coord.z());
|
||
|
tileStat.archivedBytes.accept(archivedBytes);
|
||
|
tileStat.acceptBigTile(coord, archivedBytes, layerStats);
|
||
|
long weight = tileWeights.getWeight(coord);
|
||
|
tileStat.totalWeight += weight;
|
||
|
tileStat.weightedArchivedBytesSum += weight * archivedBytes;
|
||
|
|
||
|
int sum = 0;
|
||
|
for (var layer : layerStats) {
|
||
|
var cell = layerStat.computeIfAbsent(layer.layer(), Updater::newCell);
|
||
|
cell.bytes.accept(layer.layerBytes());
|
||
|
cell.acceptBigTile(coord, layer.layerBytes(), layerStats);
|
||
|
sum += layer.layerBytes();
|
||
|
cell.weightedBytesSum += weight * layer.layerBytes();
|
||
|
cell.totalWeight += weight;
|
||
|
}
|
||
|
tileStat.weightedBytesSum += weight * sum;
|
||
|
tileStat.bytes.accept(sum);
|
||
|
}
|
||
|
}
|
||
|
}
|