planetiler/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java

846 wiersze
37 KiB
Java

package com.onthegomap.planetiler;
import com.onthegomap.planetiler.archive.TileArchiveMetadata;
import com.onthegomap.planetiler.archive.TileArchiveWriter;
import com.onthegomap.planetiler.archive.WriteableTileArchive;
import com.onthegomap.planetiler.collection.FeatureGroup;
import com.onthegomap.planetiler.collection.LongLongMap;
import com.onthegomap.planetiler.collection.LongLongMultimap;
import com.onthegomap.planetiler.config.Arguments;
import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.mbtiles.Mbtiles;
import com.onthegomap.planetiler.reader.GeoPackageReader;
import com.onthegomap.planetiler.reader.NaturalEarthReader;
import com.onthegomap.planetiler.reader.ShapefileReader;
import com.onthegomap.planetiler.reader.osm.OsmInputFile;
import com.onthegomap.planetiler.reader.osm.OsmNodeBoundsProvider;
import com.onthegomap.planetiler.reader.osm.OsmReader;
import com.onthegomap.planetiler.stats.ProcessInfo;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.stats.Timers;
import com.onthegomap.planetiler.util.BuildInfo;
import com.onthegomap.planetiler.util.ByteBufferUtil;
import com.onthegomap.planetiler.util.Downloader;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.Format;
import com.onthegomap.planetiler.util.Geofabrik;
import com.onthegomap.planetiler.util.LogUtil;
import com.onthegomap.planetiler.util.ResourceUsage;
import com.onthegomap.planetiler.util.Translations;
import com.onthegomap.planetiler.util.Wikidata;
import com.onthegomap.planetiler.worker.RunnableThatThrows;
import java.io.IOException;
import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* High-level API for creating a new map that ties together lower-level utilities in a way that is suitable for the most
* common use-cases.
* <p>
* For example:
*
* <pre>
* <code>
* public static void main(String[] args) {
* Planetiler.create(arguments)
* .setProfile(new CustomProfile())
* .addShapefileSource("shapefile", Path.of("shapefile.zip"))
* .addNaturalEarthSource("natural_earth", Path.of("natural_earth.zip"))
* .addOsmSource("osm", Path.of("source.osm.pbf"))
* .setOutput("mbtiles", Path.of("output.mbtiles"))
* .run();
* }</code>
* </pre>
* <p>
* Each call to a builder API mutates the runner instance and returns it for more chaining.
* <p>
* See {@code ToiletsOverlayLowLevelApi} or unit tests for examples using the low-level API.
*/
@SuppressWarnings("UnusedReturnValue")
public class Planetiler {
private static final Logger LOGGER = LoggerFactory.getLogger(Planetiler.class);
private final List<Stage> stages = new ArrayList<>();
private final List<ToDownload> toDownload = new ArrayList<>();
private final List<InputPath> inputPaths = new ArrayList<>();
private final Timers.Finishable overallTimer;
private final Arguments arguments;
private final Stats stats;
private final Path tmpDir;
private final Path nodeDbPath;
private final Path multipolygonPath;
private final Path featureDbPath;
private final boolean downloadSources;
private final boolean onlyDownloadSources;
private final boolean parseNodeBounds;
private Profile profile = null;
private Function<Planetiler, Profile> profileProvider = null;
private final PlanetilerConfig config;
private FeatureGroup featureGroup;
private OsmInputFile osmInputFile;
private Path output;
private boolean overwrite = false;
private boolean ran = false;
// most common OSM languages
private List<String> languages = List.of(
"en", "ru", "ar", "zh", "ja", "ko", "fr",
"de", "fi", "pl", "es", "be", "br", "he"
);
private Translations translations;
private Path wikidataNamesFile;
private boolean useWikidata = false;
private boolean onlyFetchWikidata = false;
private boolean fetchWikidata = false;
private TileArchiveMetadata tileArchiveMetadata;
private Planetiler(Arguments arguments) {
this.arguments = arguments;
stats = arguments.getStats();
overallTimer = stats.startStageQuietly("overall");
config = PlanetilerConfig.from(arguments);
tmpDir = arguments.file("tmpdir", "temp directory", Path.of("data", "tmp"));
onlyDownloadSources = arguments.getBoolean("only_download", "download source data then exit", false);
downloadSources = onlyDownloadSources || arguments.getBoolean("download", "download sources", false);
nodeDbPath = arguments.file("temp_nodes", "temp node db location", tmpDir.resolve("node.db"));
multipolygonPath =
arguments.file("temp_multipolygons", "temp multipolygon db location", tmpDir.resolve("multipolygon.db"));
featureDbPath = arguments.file("temp_features", "temp feature db location", tmpDir.resolve("feature.db"));
parseNodeBounds =
arguments.getBoolean("osm_parse_node_bounds", "parse bounds from OSM nodes instead of header", false);
}
/** Returns a new empty runner that will get configuration from {@code arguments}. */
public static Planetiler create(Arguments arguments) {
return new Planetiler(arguments);
}
/**
* Adds a new {@code .osm.pbf} source that will be processed when {@link #run()} is called.
* <p>
* To override the location of the {@code .osm.pbf} file, set {@code name_path=newpath.osm.pbf} in the arguments.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} argument is not set
* @return this runner instance for chaining
* @see OsmInputFile
* @see OsmReader
*/
public Planetiler addOsmSource(String name, Path defaultPath) {
return addOsmSource(name, defaultPath, null);
}
/**
* Adds a new {@code .osm.pbf} source that will be processed when {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code .osm.pbf} file, set {@code name_path=newpath.osm.pbf} in the arguments and
* to override the download URL set {@code name_url=http://url/of/osm.pbf}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} argument is not set
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set. As a shortcut, can use "geofabrik:monaco" or
* "geofabrik:australia" shorthand to find an extract by name from
* <a href="https://download.geofabrik.de/">Geofabrik download site</a> or "aws:latest" to download
* the latest {@code planet.osm.pbf} file from <a href="https://registry.opendata.aws/osm/">AWS
* Open Data Registry</a>.
* @return this runner instance for chaining
* @see OsmInputFile
* @see OsmReader
* @see Downloader
* @see Geofabrik
*/
public Planetiler addOsmSource(String name, Path defaultPath, String defaultUrl) {
if (osmInputFile != null) {
// TODO: support more than one input OSM file
throw new IllegalArgumentException("Currently only one OSM input file is supported");
}
Path path = getPath(name, "OSM input file", defaultPath, defaultUrl);
var thisInputFile = new OsmInputFile(path, config.osmLazyReads());
osmInputFile = thisInputFile;
// fail fast if there is some issue with madvise on this system
if (config.nodeMapMadvise() || config.multipolygonGeometryMadvise()) {
ByteBufferUtil.init();
}
return appendStage(new Stage(
name,
List.of(
name + "_pass1: Pre-process OpenStreetMap input (store node locations then relation members)",
name + "_pass2: Process OpenStreetMap nodes, ways, then relations"
),
ifSourceUsed(name, () -> {
var header = osmInputFile.getHeader();
tileArchiveMetadata.set("planetiler:" + name + ":osmosisreplicationtime", header.instant());
tileArchiveMetadata.set("planetiler:" + name + ":osmosisreplicationseq",
header.osmosisReplicationSequenceNumber());
tileArchiveMetadata.set("planetiler:" + name + ":osmosisreplicationurl", header.osmosisReplicationBaseUrl());
try (
var nodeLocations =
LongLongMap.from(config.nodeMapType(), config.nodeMapStorage(), nodeDbPath, config.nodeMapMadvise());
var multipolygonGeometries = LongLongMultimap.newReplaceableMultimap(
config.multipolygonGeometryStorage(), multipolygonPath, config.multipolygonGeometryMadvise());
var osmReader = new OsmReader(name, thisInputFile, nodeLocations, multipolygonGeometries, profile(), stats)
) {
osmReader.pass1(config);
osmReader.pass2(featureGroup, config);
} finally {
FileUtils.delete(nodeDbPath);
FileUtils.delete(multipolygonPath);
}
}))
);
}
/**
* Adds a new ESRI shapefile source that will be processed using a projection inferred from the shapefile when
* {@link #run()} is called.
* <p>
* To override the location of the {@code shapefile} file, set {@code name_path=newpath.shp.zip} in the arguments.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments. Can be a
* {@code .shp} file with other shapefile components in the same directory, or a {@code .zip} file
* containing the shapefile components.
* @return this runner instance for chaining
* @see ShapefileReader
*/
public Planetiler addShapefileSource(String name, Path defaultPath) {
return addShapefileSource(null, name, defaultPath);
}
/**
* Adds a new ESRI shapefile source that will be processed using an explicit projection when {@link #run()} is called.
* <p>
* To override the location of the {@code shapefile} file, set {@code name_path=newpath.shp.zip} in the arguments.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments. Can be a
* {@code .shp} file with other shapefile components in the same directory, or a {@code .zip} file
* containing the shapefile components.
* @return this runner instance for chaining
* @see ShapefileReader
*/
public Planetiler addShapefileSource(String projection, String name, Path defaultPath) {
return addShapefileSource(projection, name, defaultPath, null);
}
/**
* Adds a new ESRI shapefile source that will be processed with a projection inferred from the shapefile when
* {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code shapefile} file, set {@code name_path=newpath.shp.zip} in the arguments and
* to override the download URL set {@code name_url=http://url/of/shapefile.zip}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments. Can be a
* {@code .shp} file with other shapefile components in the same directory, or a {@code .zip} file
* containing the shapefile components.
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set
* @return this runner instance for chaining
* @see ShapefileReader
* @see Downloader
*/
public Planetiler addShapefileSource(String name, Path defaultPath, String defaultUrl) {
return addShapefileSource(null, name, defaultPath, defaultUrl);
}
/**
* Adds a new ESRI shapefile glob source that will process all files under {@param basePath} matching
* {@param globPattern}. {@param basePath} may be a directory or ZIP archive.
*
* @param sourceName string to use in stats and logs to identify this stage
* @param basePath path to the directory containing shapefiles to process
* @param globPattern string to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
* @return this runner instance for chaining
* @see ShapefileReader
*/
public Planetiler addShapefileGlobSource(String sourceName, Path basePath, String globPattern) {
return addShapefileGlobSource(null, sourceName, basePath, globPattern, null);
}
/**
* Adds a new ESRI shapefile glob source that will process all files under {@param basePath} matching
* {@param globPattern} using an explicit projection. {@param basePath} may be a directory or ZIP archive.
* <p>
* If {@param globPattern} matches a ZIP archive, all files ending in {@code .shp} within the archive will be used for
* this source.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param sourceName string to use in stats and logs to identify this stage
* @param basePath path to the directory or zip file containing shapefiles to process
* @param globPattern string to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
* {@code name_url} argument is not set
* @return this runner instance for chaining
* @see ShapefileReader
*/
public Planetiler addShapefileGlobSource(String projection, String sourceName, Path basePath,
String globPattern, String defaultUrl) {
Path dirPath = getPath(sourceName, "shapefile glob", basePath, defaultUrl);
return addStage(sourceName, "Process all files matching " + dirPath + "/" + globPattern,
ifSourceUsed(sourceName, () -> {
var sourcePaths = FileUtils.walkPathWithPattern(basePath, globPattern,
zipPath -> FileUtils.walkPathWithPattern(zipPath, "*.shp"));
ShapefileReader.processWithProjection(projection, sourceName, sourcePaths, featureGroup, config,
profile, stats);
}));
}
/**
* Adds a new ESRI shapefile source that will be processed with an explicit projection when {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code shapefile} file, set {@code name_path=newpath.shp.zip} in the arguments and
* to override the download URL set {@code name_url=http://url/of/shapefile.zip}.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments. Can be a
* {@code .shp} file with other shapefile components in the same directory, or a {@code .zip} file
* containing the shapefile components.
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set
* @return this runner instance for chaining
* @see ShapefileReader
* @see Downloader
*/
public Planetiler addShapefileSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "shapefile", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
ifSourceUsed(name, () -> {
List<Path> sourcePaths = List.of(path);
if (FileUtils.hasExtension(path, "zip") || Files.isDirectory(path)) {
sourcePaths = FileUtils.walkPathWithPattern(path, "*.shp");
}
ShapefileReader.processWithProjection(projection, name, sourcePaths, featureGroup, config, profile, stats);
}));
}
/**
* Adds a new OGC GeoPackage source that will be processed when {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
* <p>
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set
* @return this runner instance for chaining
* @see GeoPackageReader
* @see Downloader
*/
public Planetiler addGeoPackageSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "geopackage", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
ifSourceUsed(name, () -> {
List<Path> sourcePaths = List.of(path);
if (FileUtils.hasExtension(path, "zip")) {
sourcePaths = FileUtils.walkPathWithPattern(path, "*.gpkg");
}
if (sourcePaths.isEmpty()) {
throw new IllegalArgumentException("No .gpkg files found in " + path);
}
GeoPackageReader.process(projection, name, sourcePaths, tmpDir, featureGroup, config, profile, stats);
}));
}
/**
* Adds a new OGC GeoPackage source that will be processed when {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
* <p>
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set
* @return this runner instance for chaining
* @see GeoPackageReader
* @see Downloader
*/
public Planetiler addGeoPackageSource(String name, Path defaultPath, String defaultUrl) {
return addGeoPackageSource(null, name, defaultPath, defaultUrl);
}
/**
* Adds a new Natural Earth sqlite file source that will be processed when {@link #run()} is called.
* <p>
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
*
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
* @return this runner instance for chaining
* @see NaturalEarthReader
*/
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath) {
return addNaturalEarthSource(name, defaultPath, null);
}
/**
* Adds a new Natural Earth sqlite file source that will be processed when {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
*
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
*
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set
* @return this runner instance for chaining
* @see NaturalEarthReader
* @see Downloader
*/
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "sqlite db", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> NaturalEarthReader
.process(name, path, tmpDir.resolve("natearth.sqlite"), featureGroup, config, profile, stats)));
}
/**
* Adds a new stage that will be invoked when {@link #run()} is called.
*
* @param name string to use in stats and logs to identify this stage
* @param description details to print when logging what stages will run
* @param task the task to run
* @return this runner instance for chaining
*/
public Planetiler addStage(String name, String description, RunnableThatThrows task) {
return appendStage(new Stage(name, description, task));
}
/**
* Sets the default languages that will be used by {@link #translations()} when not overridden by {@code languages}
* argument.
*
* @param languages the list of languages to use when {@code name} argument is not set
* @return this runner instance for chaining
*/
public Planetiler setDefaultLanguages(List<String> languages) {
this.languages = languages;
return this;
}
/**
* Updates {@link #translations()} to use name translations fetched from wikidata based on the
* <a href="https://www.wikidata.org/wiki/Wikidata:OpenStreetMap">wikidata tag</a> on OSM elements.
* <p>
* When either {@code only_fetch_wikidata} or {@code fetch_wikidata} arguments are set to true, this downloads
* translations for every OSM element that the profile cares about and stores them to {@code defaultWikidataCache} (or
* the value of the {@code wikidata_cache} argument) before processing any sources.
* <p>
* As long as {@code use_wikidata} is not set to false, then previously-downloaded wikidata translations will be
* loaded from the cache file, so you can run with {@code fetch_wikidata=true} once, then without it each subsequent
* run to only download translations once.
*
* @param defaultWikidataCache Path to store downloaded wikidata name translations to, and to read them from on
* subsequent runs. Overridden by {@code wikidata_cache} argument value.
* @return this runner for chaining
* @see Wikidata
*/
public Planetiler fetchWikidataNameTranslations(Path defaultWikidataCache) {
onlyFetchWikidata = arguments
.getBoolean("only_fetch_wikidata", "fetch wikidata translations then quit", onlyFetchWikidata);
fetchWikidata =
onlyFetchWikidata || arguments.getBoolean("fetch_wikidata", "fetch wikidata translations then continue",
fetchWikidata);
useWikidata = fetchWikidata || arguments.getBoolean("use_wikidata", "use wikidata translations", true);
wikidataNamesFile = arguments.file("wikidata_cache", "wikidata cache file", defaultWikidataCache);
return this;
}
public Translations translations() {
if (translations == null) {
boolean transliterate = arguments.getBoolean("transliterate", "attempt to transliterate latin names", true);
List<String> languages = arguments.getList("languages", "languages to use", this.languages);
translations = Translations.defaultProvider(languages).setShouldTransliterate(transliterate);
}
return translations;
}
private Planetiler appendStage(Stage stage) {
if (stages.stream().anyMatch(other -> stage.id.equals(other.id))) {
throw new IllegalArgumentException("Duplicate stage name: " + stage.id);
}
stages.add(stage);
return this;
}
/** Sets the profile implementation that controls how source feature map to output map elements. */
public Planetiler setProfile(Profile profile) {
this.profile = profile;
return this;
}
/**
* Sets a profile that needs information from this runner to be instantiated.
* <p>
* Construction will be deferred until all inputs are read.
*/
public Planetiler setProfile(Function<Planetiler, Profile> profileProvider) {
this.profileProvider = profileProvider;
return this;
}
/**
* Sets the location of the output archive to write rendered tiles to. Fails if the archive already exists.
* <p>
* To override the location of the file, set {@code argument=newpath} in the arguments.
*
* @param argument the argument key to check for an override to {@code fallback}
* @param fallback the fallback value if {@code argument} is not set in arguments
* @return this runner instance for chaining
* @see TileArchiveWriter
*/
public Planetiler setOutput(String argument, Path fallback) {
this.output = arguments.file(argument, "output tile archive", fallback);
return this;
}
/**
* Sets the location of the output archive to write rendered tiles to. Overwrites file if it already exists.
* <p>
* To override the location of the file, set {@code argument=newpath} in the arguments.
*
* @param argument the argument key to check for an override to {@code fallback}
* @param fallback the fallback value if {@code argument} is not set in arguments
* @return this runner instance for chaining
* @see TileArchiveWriter
*/
public Planetiler overwriteOutput(String argument, Path fallback) {
this.overwrite = true;
return setOutput(argument, fallback);
}
/**
* Reads all elements from all sourced that have been added, generates map features according to the profile, and
* writes the rendered tiles to the output archive.
*
* @throws IllegalArgumentException if expected inputs have not been provided
* @throws Exception if an error occurs while processing
*/
public void run() throws Exception {
var showVersion = arguments.getBoolean("version", "show version then exit", false);
var buildInfo = BuildInfo.get();
if (buildInfo != null && LOGGER.isInfoEnabled()) {
LOGGER.info("Planetiler build git hash: {}", buildInfo.githash());
LOGGER.info("Planetiler build version: {}", buildInfo.version());
LOGGER.info("Planetiler build timestamp: {}", buildInfo.buildTimeString());
}
if (showVersion) {
System.exit(0);
}
if (profile() == null) {
throw new IllegalArgumentException("No profile specified");
}
if (output == null) {
throw new IllegalArgumentException("No output specified");
}
if (stages.isEmpty()) {
throw new IllegalArgumentException("No sources specified");
}
if (ran) {
throw new IllegalArgumentException("Can only run once");
}
ran = true;
tileArchiveMetadata = new TileArchiveMetadata(profile, config.arguments());
if (arguments.getBoolean("help", "show arguments then exit", false)) {
System.exit(0);
} else if (onlyDownloadSources) {
// don't check files if not generating map
} else if (overwrite || config.force()) {
FileUtils.deleteFile(output);
} else if (Files.exists(output)) {
throw new IllegalArgumentException(output + " already exists, use the --force argument to overwrite.");
}
LOGGER.info("Building {} profile into {} in these phases:", profile.getClass().getSimpleName(), output);
if (!toDownload.isEmpty()) {
LOGGER.info(" download: Download sources {}", toDownload.stream().map(d -> d.id).toList());
}
if (!onlyDownloadSources && fetchWikidata) {
LOGGER.info(" wikidata: Fetch translations from wikidata query service");
}
if (!onlyDownloadSources && !onlyFetchWikidata) {
for (Stage stage : stages) {
for (String details : stage.details) {
LOGGER.info(" {}", details);
}
}
LOGGER.info(" sort: Sort rendered features by tile ID");
LOGGER.info(" archive: Encode each tile and write to {}", output);
}
// in case any temp files are left from a previous run...
FileUtils.delete(tmpDir, nodeDbPath, featureDbPath, multipolygonPath);
Files.createDirectories(tmpDir);
FileUtils.createParentDirectories(nodeDbPath, featureDbPath, multipolygonPath, output);
if (!toDownload.isEmpty()) {
download();
}
ensureInputFilesExist();
if (fetchWikidata) {
Wikidata.fetch(osmInputFile(), wikidataNamesFile, config(), profile(), stats());
}
if (useWikidata) {
translations().addFallbackTranslationProvider(Wikidata.load(wikidataNamesFile));
}
if (onlyDownloadSources || onlyFetchWikidata) {
return; // exit only if just fetching wikidata or downloading sources
}
if (osmInputFile != null) {
checkDiskSpace();
checkMemory();
var bounds = config.bounds();
if (!parseNodeBounds) {
bounds.addFallbackProvider(osmInputFile);
}
bounds.addFallbackProvider(new OsmNodeBoundsProvider(osmInputFile, config, stats));
}
try (WriteableTileArchive archive = Mbtiles.newWriteToFileDatabase(output, config.compactDb())) {
featureGroup =
FeatureGroup.newDiskBackedFeatureGroup(archive.tileOrder(), featureDbPath, profile, config, stats);
stats.monitorFile("nodes", nodeDbPath);
stats.monitorFile("features", featureDbPath);
stats.monitorFile("multipolygons", multipolygonPath);
stats.monitorFile("archive", output);
for (Stage stage : stages) {
stage.task.run();
}
LOGGER.info("Deleting node.db to make room for output file");
profile.release();
for (var inputPath : inputPaths) {
if (inputPath.freeAfterReading()) {
LOGGER.info("Deleting {} ({}) to make room for output file", inputPath.id, inputPath.path);
FileUtils.delete(inputPath.path());
}
}
featureGroup.prepare();
TileArchiveWriter.writeOutput(featureGroup, archive, () -> FileUtils.fileSize(output), tileArchiveMetadata,
config,
stats);
} catch (IOException e) {
throw new IllegalStateException("Unable to write to " + output, e);
}
overallTimer.stop();
LOGGER.info("FINISHED!");
stats.printSummary();
stats.close();
}
private void checkDiskSpace() {
ResourceUsage readPhase = new ResourceUsage("read phase disk");
ResourceUsage writePhase = new ResourceUsage("write phase disk");
long osmSize = osmInputFile.diskUsageBytes();
long nodeMapSize =
OsmReader.estimateNodeLocationUsage(config.nodeMapType(), config.nodeMapStorage(), osmSize, tmpDir).diskUsage();
long multipolygonGeometrySize =
OsmReader.estimateMultipolygonGeometryUsage(config.multipolygonGeometryStorage(), osmSize, tmpDir).diskUsage();
long featureSize = profile.estimateIntermediateDiskBytes(osmSize);
long outputSize = profile.estimateOutputBytes(osmSize);
// node locations and multipolygon geometries only needed while reading inputs
readPhase.addDisk(nodeDbPath, nodeMapSize, "temporary node location cache");
readPhase.addDisk(multipolygonPath, multipolygonGeometrySize, "temporary multipolygon geometry cache");
// feature db persists across read/write phase
readPhase.addDisk(featureDbPath, featureSize, "temporary feature storage");
writePhase.addDisk(featureDbPath, featureSize, "temporary feature storage");
// output only needed during write phase
writePhase.addDisk(output, outputSize, "archive output");
// if the user opts to remove an input source after reading to free up additional space for the output...
for (var input : inputPaths) {
if (input.freeAfterReading()) {
writePhase.addDisk(input.path, -FileUtils.size(input.path), "delete " + input.id + " source after reading");
}
}
readPhase.checkAgainstLimits(config.force(), true);
writePhase.checkAgainstLimits(config.force(), true);
}
private void checkMemory() {
Format format = Format.defaultInstance();
ResourceUsage check = new ResourceUsage("read phase");
ResourceUsage nodeMapUsages = OsmReader.estimateNodeLocationUsage(config.nodeMapType(), config.nodeMapStorage(),
osmInputFile.diskUsageBytes(), tmpDir);
ResourceUsage multipolygonGeometryUsages =
OsmReader.estimateMultipolygonGeometryUsage(config.nodeMapStorage(), osmInputFile.diskUsageBytes(), tmpDir);
long memoryMappedFiles = nodeMapUsages.diskUsage() + multipolygonGeometryUsages.diskUsage();
check
.addAll(nodeMapUsages)
.addAll(multipolygonGeometryUsages)
.addMemory(profile().estimateRamRequired(osmInputFile.diskUsageBytes()), "temporary profile storage");
check.checkAgainstLimits(config().force(), true);
// check off-heap memory if we can get it
ProcessInfo.getSystemFreeMemoryBytes().ifPresent(extraMemory -> {
if (extraMemory < memoryMappedFiles) {
LOGGER.warn(
"""
Planetiler will use ~%s memory-mapped files for node locations and multipolygon geometries but the OS only
has %s available to cache pages, this may slow the import down. To speed up, run on a machine with more
memory or reduce the -Xmx setting.
"""
.formatted(
format.storage(memoryMappedFiles),
format.storage(extraMemory)
));
} else {
LOGGER.debug("✓ %s temporary files and %s of free memory for OS to cache them".formatted(
format.storage(memoryMappedFiles),
format.storage(extraMemory)
));
}
});
}
public Arguments arguments() {
return arguments;
}
public OsmInputFile osmInputFile() {
return osmInputFile;
}
public PlanetilerConfig config() {
return config;
}
public Profile profile() {
if (profile == null && profileProvider != null) {
profile = profileProvider.apply(this);
}
return profile;
}
public Stats stats() {
return stats;
}
private RunnableThatThrows ifSourceUsed(String name, RunnableThatThrows task) {
return () -> {
if (profile.caresAboutSource(name)) {
task.run();
} else {
LogUtil.setStage(name);
LOGGER.info("Skipping since profile does not use it");
LogUtil.clearStage();
}
};
}
private Path getPath(String name, String type, Path defaultPath, String defaultUrl) {
Path path = arguments.file(name + "_path", name + " " + type + " path", defaultPath);
boolean freeAfterReading = arguments.getBoolean("free_" + name + "_after_read",
"delete " + name + " input file after reading to make space for output (reduces peak disk usage)", false);
if (downloadSources) {
String url = arguments.getString(name + "_url", name + " " + type + " url", defaultUrl);
if (!Files.exists(path) && url != null) {
toDownload.add(new ToDownload(name, url, path));
}
}
inputPaths.add(new InputPath(name, path, freeAfterReading));
return path;
}
private void download() {
var timer = stats.startStage("download");
Downloader downloader = Downloader.create(config(), stats());
for (ToDownload toDownload : toDownload) {
if (profile.caresAboutSource(toDownload.id)) {
downloader.add(toDownload.id, toDownload.url, toDownload.path);
}
}
downloader.run();
timer.stop();
}
private void ensureInputFilesExist() {
for (InputPath inputPath : inputPaths) {
if (profile.caresAboutSource(inputPath.id) && !Files.exists(inputPath.path)) {
throw new IllegalArgumentException(inputPath.path + " does not exist");
}
}
}
private record Stage(String id, List<String> details, RunnableThatThrows task) {
Stage(String id, String description, RunnableThatThrows task) {
this(id, List.of(id + ": " + description), task);
}
}
private record ToDownload(String id, String url, Path path) {}
private record InputPath(String id, Path path, boolean freeAfterReading) {}
}