kopia lustrzana https://github.com/onthegomap/planetiler
190 wiersze
6.9 KiB
Java
190 wiersze
6.9 KiB
Java
package com.onthegomap.planetiler.util;
|
|
|
|
import static com.onthegomap.planetiler.util.Exceptions.throwFatalException;
|
|
|
|
import com.google.common.io.LineReader;
|
|
import com.onthegomap.planetiler.config.Arguments;
|
|
import com.onthegomap.planetiler.config.PlanetilerConfig;
|
|
import com.onthegomap.planetiler.geo.TileCoord;
|
|
import com.onthegomap.planetiler.stats.ProgressLoggers;
|
|
import com.onthegomap.planetiler.stats.Stats;
|
|
import com.onthegomap.planetiler.worker.WorkerPipeline;
|
|
import java.io.BufferedInputStream;
|
|
import java.io.FileNotFoundException;
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.io.Reader;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.time.LocalDate;
|
|
import java.time.Period;
|
|
import java.time.ZoneOffset;
|
|
import java.util.ArrayList;
|
|
import java.util.Comparator;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.concurrent.CompletableFuture;
|
|
import java.util.concurrent.ExecutionException;
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
import java.util.regex.Pattern;
|
|
import java.util.stream.IntStream;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.tukaani.xz.XZInputStream;
|
|
|
|
/**
|
|
* A utility for computing {@link TileWeights} from historic openstreetmap.org tile traffic.
|
|
* <p>
|
|
* To download raw data from OSM tile logs, run with:
|
|
*
|
|
* <pre>
|
|
* {@code
|
|
* java -jar planetiler.jar top-osm-tiles --days=<# days to fetch> --top=<# tiles to include> --output=output.tsv.gz
|
|
* }
|
|
* </pre>
|
|
* <p>
|
|
* You can also fetch precomputed top-1m tile stats from summer 2023 using
|
|
* {@link #downloadPrecomputed(PlanetilerConfig, Stats)}
|
|
*/
|
|
public class TopOsmTiles {
|
|
|
|
private static final String DOWLOAD_URL =
|
|
"https://raw.githubusercontent.com/onthegomap/planetiler/main/layerstats/top_osm_tiles.tsv.gz";
|
|
private static final Logger LOGGER = LoggerFactory.getLogger(TopOsmTiles.class);
|
|
private final Stats stats;
|
|
private final PlanetilerConfig config;
|
|
private final Downloader downloader;
|
|
|
|
TopOsmTiles(PlanetilerConfig config, Stats stats) {
|
|
this.config = config;
|
|
this.stats = stats;
|
|
downloader = Downloader.create(config, stats);
|
|
}
|
|
|
|
Reader fetch(LocalDate date) throws IOException {
|
|
String url = "https://planet.openstreetmap.org/tile_logs/tiles-%4d-%02d-%02d.txt.xz".formatted(
|
|
date.getYear(),
|
|
date.getMonthValue(),
|
|
date.getDayOfMonth()
|
|
);
|
|
return new InputStreamReader(new XZInputStream(new BufferedInputStream(downloader.openStream(url))));
|
|
}
|
|
|
|
@SuppressWarnings("java:S2142")
|
|
TileWeights run(int threads, int topN, int maxZoom, List<LocalDate> toDownload) {
|
|
CompletableFuture<TileWeights> result = new CompletableFuture<>();
|
|
var timer = stats.startStage("top-osm-tiles");
|
|
|
|
AtomicLong downloaded = new AtomicLong();
|
|
|
|
var pipeline = WorkerPipeline.start("top-osm-tiles", stats)
|
|
.readFromTiny("urls", toDownload).<Map.Entry<Integer, Long>>addWorker("download", threads,
|
|
(prev, next) -> {
|
|
for (var date : prev) {
|
|
for (var line : readFile(maxZoom, date)) {
|
|
next.accept(line);
|
|
}
|
|
downloaded.incrementAndGet();
|
|
}
|
|
})
|
|
.addBuffer("lines", 100_000, 1_000)
|
|
.sinkTo("collect", 1, lines -> {
|
|
Map<Integer, Long> counts = new HashMap<>();
|
|
for (var line : lines) {
|
|
counts.merge(line.getKey(), line.getValue(), Long::sum);
|
|
}
|
|
LOGGER.info("Extracting top {} tiles from {} tiles", topN, counts.size());
|
|
var tileWeights = new TileWeights();
|
|
counts.entrySet().stream()
|
|
.sorted(Comparator.comparingLong(e -> -e.getValue()))
|
|
.limit(topN)
|
|
.forEach(entry -> tileWeights.put(TileCoord.decode(entry.getKey()), entry.getValue()));
|
|
result.complete(tileWeights);
|
|
});
|
|
|
|
ProgressLoggers progress = ProgressLoggers.create()
|
|
.addPercentCounter("files", toDownload.size(), downloaded)
|
|
.newLine()
|
|
.addPipelineStats(pipeline)
|
|
.newLine()
|
|
.addProcessStats();
|
|
|
|
pipeline.awaitAndLog(progress, config.logInterval());
|
|
timer.stop();
|
|
stats.printSummary();
|
|
try {
|
|
return result.get();
|
|
} catch (InterruptedException | ExecutionException e) {
|
|
return throwFatalException(e);
|
|
}
|
|
}
|
|
|
|
private List<Map.Entry<Integer, Long>> readFile(int maxZoom, LocalDate date) {
|
|
var splitter = Pattern.compile("[/ ]");
|
|
for (int i = 0; i <= config.httpRetries(); i++) {
|
|
List<Map.Entry<Integer, Long>> result = new ArrayList<>();
|
|
try (var reader = fetch(date)) {
|
|
LineReader lines = new LineReader(reader);
|
|
String line;
|
|
while ((line = lines.readLine()) != null) {
|
|
String[] parts = splitter.split(line);
|
|
if (parts.length == 4) {
|
|
// adjust osm tiles (256x256px) to vector (512x512px) by moving up one zoom level
|
|
int z = Integer.parseInt(parts[0]) - 1;
|
|
if (z >= 0 && z <= maxZoom) {
|
|
int x = Integer.parseInt(parts[1]) >> 1;
|
|
int y = Integer.parseInt(parts[2]) >> 1;
|
|
long loads = Long.parseLong(parts[3]);
|
|
result.add(Map.entry(TileCoord.ofXYZ(x, y, z).encoded(), loads));
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
} catch (FileNotFoundException e) {
|
|
LOGGER.info("No data for {}", date);
|
|
break;
|
|
} catch (IOException e) {
|
|
if (i == config.httpRetries()) {
|
|
LOGGER.warn("Failed getting {} {}", date, e);
|
|
}
|
|
}
|
|
}
|
|
return List.of();
|
|
}
|
|
|
|
public static void main(String[] args) throws IOException {
|
|
Arguments arguments = Arguments.fromArgsOrConfigFile(args).orElse(Arguments.of(Map.of(
|
|
"http-retries", "3"
|
|
)));
|
|
var config = PlanetilerConfig.from(arguments);
|
|
var stats = arguments.getStats();
|
|
var days = arguments.getInteger("days", "number of days into the past to look", 90);
|
|
var maxZoom = arguments.getInteger("maxzoom", "max zoom", 15);
|
|
var topN = arguments.getInteger("top", "top n", 1_000_000);
|
|
var output = arguments.file("output", "output", Path.of("top_osm_tiles.tsv.gz"));
|
|
var threads = arguments.getInteger("download-threads", "number of threads to use for downloading/parsing",
|
|
Math.min(10, arguments.threads()));
|
|
|
|
var date = LocalDate.now(ZoneOffset.UTC);
|
|
var toDownload = IntStream.range(0, days)
|
|
.mapToObj(i -> date.minus(Period.ofDays(i)))
|
|
.toList();
|
|
|
|
new TopOsmTiles(config, stats)
|
|
.run(threads, topN, maxZoom, toDownload)
|
|
.writeToFile(output);
|
|
}
|
|
|
|
/**
|
|
* Download precomputed top-1m tile stats from 90 days of openstreetmap.org tile logs to
|
|
* {@link PlanetilerConfig#tileWeights()} path if they don't already exist.
|
|
*/
|
|
public static void downloadPrecomputed(PlanetilerConfig config, Stats stats) {
|
|
if (!Files.exists(config.tileWeights())) {
|
|
Downloader.create(config, stats)
|
|
.downloadIfNecessary(new Downloader.ResourceToDownload("osm-tile-weights", DOWLOAD_URL, config.tileWeights()));
|
|
}
|
|
}
|
|
}
|