Warn when running with insufficient memory or disk space (#73)

pull/107/head
Michael Barry 2022-03-03 07:25:24 -05:00 zatwierdzone przez GitHub
rodzic 27bfb0f819
commit 20c7a05caa
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
12 zmienionych plików z 317 dodań i 23 usunięć

Wyświetl plik

@ -119,6 +119,10 @@
# osm_path=path/to/monaco.osm.pbf
# osm_url=https://url/for/monaco.osm.pbf
# To delete an input file before writing the output file (and reduce peak disk requirements):
# free_osm_after_read: true
# free_natural_earth_after_read: true
#### Layer-specific overrides:
#### "boundary" layer

Wyświetl plik

@ -182,6 +182,24 @@ public class BasemapProfile extends ForwardingProfile {
return OpenMapTilesSchema.VERSION;
}
@Override
public long estimateIntermediateDiskBytes(long osmFileSize) {
// in late 2021, a 60gb OSM file used 200GB for intermediate storage
return osmFileSize * 200 / 60;
}
@Override
public long estimateOutputBytes(long osmFileSize) {
// in late 2021, a 60gb OSM file generated a 100GB output file
return osmFileSize * 100 / 60;
}
@Override
public long estimateRamRequired(long osmFileSize) {
// 30gb for a 60gb OSM file is generally safe, although less might be OK too
return osmFileSize / 2;
}
/**
* Layers should implement this interface to subscribe to elements from <a href="https://www.naturalearthdata.com/">natural
* earth</a>.

Wyświetl plik

@ -10,19 +10,26 @@ import com.onthegomap.planetiler.reader.NaturalEarthReader;
import com.onthegomap.planetiler.reader.ShapefileReader;
import com.onthegomap.planetiler.reader.osm.OsmInputFile;
import com.onthegomap.planetiler.reader.osm.OsmReader;
import com.onthegomap.planetiler.stats.ProcessInfo;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.stats.Timers;
import com.onthegomap.planetiler.util.Downloader;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.Format;
import com.onthegomap.planetiler.util.Geofabrik;
import com.onthegomap.planetiler.util.LogUtil;
import com.onthegomap.planetiler.util.Translations;
import com.onthegomap.planetiler.util.Wikidata;
import com.onthegomap.planetiler.worker.RunnableThatThrows;
import java.io.IOException;
import java.nio.file.FileStore;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -444,7 +451,7 @@ public class Planetiler {
System.exit(0);
} else if (onlyDownloadSources) {
// don't check files if not generating map
} else if (overwrite || config.forceOverwrite()) {
} else if (overwrite || config.force()) {
FileUtils.deleteFile(output);
} else if (Files.exists(output)) {
throw new IllegalArgumentException(output + " already exists, use the --force argument to overwrite.");
@ -475,17 +482,17 @@ public class Planetiler {
download();
}
ensureInputFilesExist();
if (onlyDownloadSources) {
return; // exit only if just downloading
}
Files.createDirectories(tmpDir);
checkDiskSpace();
checkMemory();
if (fetchWikidata) {
Wikidata.fetch(osmInputFile(), wikidataNamesFile, config(), profile(), stats());
}
if (useWikidata) {
translations().addTranslationProvider(Wikidata.load(wikidataNamesFile));
}
if (onlyFetchWikidata) {
return; // exit only if just fetching wikidata
if (onlyDownloadSources || onlyFetchWikidata) {
return; // exit only if just fetching wikidata or downloading sources
}
if (osmInputFile != null) {
config.bounds().setFallbackProvider(osmInputFile);
@ -503,8 +510,13 @@ public class Planetiler {
stage.task.run();
}
LOGGER.info("Deleting node.db to make room for mbtiles");
LOGGER.info("Deleting node.db to make room for output file");
profile.release();
for (var inputPath : inputPaths) {
if (inputPath.freeAfterReading()) {
LOGGER.info("Deleting " + inputPath.id + "(" + inputPath.path + ") to make room for output file");
}
}
featureGroup.prepare();
@ -516,6 +528,78 @@ public class Planetiler {
stats.close();
}
private void checkDiskSpace() {
Map<FileStore, Long> readPhaseBytes = new HashMap<>();
Map<FileStore, Long> writePhaseBytes = new HashMap<>();
long osmSize = osmInputFile.diskUsageBytes();
long nodeMapSize = LongLongMap.estimateDiskUsage(config.nodeMapType(), config.nodeMapStorage(), osmSize);
long featureSize = profile.estimateIntermediateDiskBytes(osmSize);
long outputSize = profile.estimateOutputBytes(osmSize);
try {
// node locations only needed while reading inputs
readPhaseBytes.merge(Files.getFileStore(tmpDir), nodeMapSize, Long::sum);
// feature db persists across read/write phase
readPhaseBytes.merge(Files.getFileStore(tmpDir), featureSize, Long::sum);
writePhaseBytes.merge(Files.getFileStore(tmpDir), featureSize, Long::sum);
// output only needed during write phase
writePhaseBytes.merge(Files.getFileStore(output.toAbsolutePath().getParent()), outputSize, Long::sum);
// if the user opts to remove an input source after reading to free up additional space for the output...
for (var input : inputPaths) {
if (input.freeAfterReading()) {
writePhaseBytes.merge(Files.getFileStore(input.path), -Files.size(input.path), Long::sum);
}
}
checkDiskSpaceOnDevices(readPhaseBytes, "read");
checkDiskSpaceOnDevices(writePhaseBytes, "write");
} catch (IOException e) {
LOGGER.warn("Unable to check disk space requirements, may run out of room " + e);
}
}
private void checkDiskSpaceOnDevices(Map<FileStore, Long> readPhaseBytes, String phase) throws IOException {
for (var entry : readPhaseBytes.entrySet()) {
var fs = entry.getKey();
var requested = entry.getValue();
long available = fs.getUnallocatedSpace();
if (available < requested) {
var format = Format.defaultInstance();
String warning =
"Planetiler needs ~" + format.storage(requested) + " on " + fs + " during " + phase
+ " phase, which only has "
+ format.storage(available) + " available";
if (config.force() || requested < available * 1.25) {
LOGGER.warn(warning + ", may fail.");
} else {
throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway.");
}
}
}
}
private void checkMemory() {
var format = Format.defaultInstance();
long nodeMap = LongLongMap.estimateMemoryUsage(config.nodeMapType(), config.nodeMapStorage(),
osmInputFile.diskUsageBytes());
long profile = profile().estimateRamRequired(osmInputFile.diskUsageBytes());
long requested = nodeMap + profile;
long jvmMemory = ProcessInfo.getMaxMemoryBytes();
if (jvmMemory < requested) {
String warning =
"Planetiler needs ~" + format.storage(requested) + " memory for the JVM, but only "
+ format.storage(jvmMemory) + " is available, try setting -Xmx=" + format.storage(requested).toLowerCase(
Locale.ROOT);
if (config.force() || requested < jvmMemory * 1.25) {
LOGGER.warn(warning + ", may fail.");
} else {
throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway.");
}
}
}
public Arguments arguments() {
return arguments;
}
@ -553,13 +637,15 @@ public class Planetiler {
private Path getPath(String name, String type, Path defaultPath, String defaultUrl) {
Path path = arguments.file(name + "_path", name + " " + type + " path", defaultPath);
boolean freeAfterReading = arguments.getBoolean("free_" + name + "_after_read",
"delete " + name + " input file after reading to make space for output (reduces peak disk usage)", false);
if (downloadSources) {
String url = arguments.getString(name + "_url", name + " " + type + " url", defaultUrl);
if (!Files.exists(path) && url != null) {
toDownload.add(new ToDownload(name, url, path));
}
}
inputPaths.add(new InputPath(name, path));
inputPaths.add(new InputPath(name, path, freeAfterReading));
return path;
}
@ -592,5 +678,5 @@ public class Planetiler {
private record ToDownload(String id, String url, Path path) {}
private record InputPath(String id, Path path) {}
private record InputPath(String id, Path path, boolean freeAfterReading) {}
}

Wyświetl plik

@ -194,6 +194,30 @@ public interface Profile {
return true;
}
/**
* Returns an estimate for how many bytes of disk this profile will use for intermediate feature storage to warn when
* running with insufficient disk space.
*/
default long estimateIntermediateDiskBytes(long osmFileSize) {
return 0L;
}
/**
* Returns an estimate for how many bytes the output file will be to warn when running with insufficient disk space.
*/
default long estimateOutputBytes(long osmFileSize) {
return 0L;
}
/**
* Returns an estimate for how many bytes of RAM this will use to warn when running with insufficient memory.
* <p>
* This should include memory for things the profile stores in memory, as well as relations and multipolygons.
*/
default long estimateRamRequired(long osmFileSize) {
return 0L;
}
/**
* A default implementation of {@link Profile} that emits no output elements.
*/

Wyświetl plik

@ -44,11 +44,7 @@ public interface LongLongMap extends Closeable, MemoryEstimator.HasEstimate, Dis
* @throws IllegalArgumentException if {@code name} or {@code storage} is not valid
*/
static LongLongMap from(String name, String storage, Path path) {
boolean ram = switch (storage) {
case "ram" -> true;
case "mmap" -> false;
default -> throw new IllegalArgumentException("Unexpected storage value: " + storage);
};
boolean ram = isRam(storage);
return switch (name) {
case "noop" -> noop();
@ -58,6 +54,47 @@ public interface LongLongMap extends Closeable, MemoryEstimator.HasEstimate, Dis
};
}
/** Estimates the number of bytes of RAM this nodemap will use for a given OSM input file. */
static long estimateMemoryUsage(String name, String storage, long osmFileSize) {
boolean ram = isRam(storage);
long nodes = estimateNumNodes(osmFileSize);
return switch (name) {
case "noop" -> 0;
case "sortedtable" -> 300_000_000L + (ram ? 12 * nodes : 0L);
case "sparsearray" -> 300_000_000L + (ram ? 9 * nodes : 0L);
default -> throw new IllegalArgumentException("Unexpected value: " + name);
};
}
/** Estimates the number of bytes of disk this nodemap will use for a given OSM input file. */
static long estimateDiskUsage(String name, String storage, long osmFileSize) {
if (isRam(storage)) {
return 0;
} else {
long nodes = estimateNumNodes(osmFileSize);
return switch (name) {
case "noop" -> 0;
case "sortedtable" -> 12 * nodes;
case "sparsearray" -> 9 * nodes;
default -> throw new IllegalArgumentException("Unexpected value: " + name);
};
}
}
private static boolean isRam(String storage) {
return switch (storage) {
case "ram" -> true;
case "mmap" -> false;
default -> throw new IllegalArgumentException("Unexpected storage value: " + storage);
};
}
private static long estimateNumNodes(long osmFileSize) {
// In February 2022, planet.pbf was 62GB with 750m nodes, so scale from there
return Math.round(750_000_000d * (osmFileSize / 62_000_000_000d));
}
/** Returns a longlong map that stores no data and throws on read */
static LongLongMap noop() {
return new LongLongMap() {

Wyświetl plik

@ -15,7 +15,7 @@ public record PlanetilerConfig(
boolean deferIndexCreation,
boolean optimizeDb,
boolean emitTilesInOrder,
boolean forceOverwrite,
boolean force,
boolean gzipTempStorage,
int sortMaxReaders,
int sortMaxWriters,
@ -62,7 +62,7 @@ public record PlanetilerConfig(
arguments.getBoolean("defer_mbtiles_index_creation", "skip adding index to mbtiles file", false),
arguments.getBoolean("optimize_db", "optimize mbtiles after writing", false),
arguments.getBoolean("emit_tiles_in_order", "emit tiles in index order", true),
arguments.getBoolean("force", "force overwriting output file", false),
arguments.getBoolean("force", "overwriting output file and ignore disk/RAM warnings", false),
arguments.getBoolean("gzip_temp", "gzip temporary feature storage (uses more CPU, but less disk space)", false),
arguments.getInteger("sort_max_readers", "maximum number of concurrent read threads to use when sorting chunks",
6),

Wyświetl plik

@ -1,6 +1,8 @@
package com.onthegomap.planetiler.reader.osm;
import com.onthegomap.planetiler.config.Bounds;
import com.onthegomap.planetiler.util.DiskBacked;
import com.onthegomap.planetiler.util.FileUtils;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
@ -20,7 +22,7 @@ import org.slf4j.LoggerFactory;
*
* @see <a href="https://wiki.openstreetmap.org/wiki/PBF_Format">OSM PBF Format</a>
*/
public class OsmInputFile implements Bounds.Provider, Supplier<OsmBlockSource> {
public class OsmInputFile implements Bounds.Provider, Supplier<OsmBlockSource>, DiskBacked {
private static final Logger LOGGER = LoggerFactory.getLogger(OsmInputFile.class);
@ -123,6 +125,11 @@ public class OsmInputFile implements Bounds.Provider, Supplier<OsmBlockSource> {
return lazy ? new LazyReader() : new EagerReader();
}
@Override
public long diskUsageBytes() {
return FileUtils.size(path);
}
private FileChannel openChannel() {
try {
return FileChannel.open(path, StandardOpenOption.READ);

Wyświetl plik

@ -21,12 +21,15 @@ import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
import java.nio.file.FileStore;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@ -47,7 +50,7 @@ import org.slf4j.LoggerFactory;
* Downloader.create(PlanetilerConfig.defaults())
* .add("natural_earth", "http://url/of/natural_earth.zip", Path.of("natural_earth.zip"))
* .add("osm", "http://url/of/file.osm.pbf", Path.of("file.osm.pbf"))
* .start();
* .run();
* }</pre>
* <p>
* As a shortcut to find the URL of a file to download from the <a href="https://download.geofabrik.de/">Geofabrik
@ -71,6 +74,7 @@ public class Downloader {
private final ExecutorService executor;
private final Stats stats;
private final long chunkSizeBytes;
private final ConcurrentMap<FileStore, Long> bytesToDownload = new ConcurrentHashMap<>();
Downloader(PlanetilerConfig config, Stats stats, long chunkSizeBytes) {
this.chunkSizeBytes = chunkSizeBytes;
@ -202,6 +206,7 @@ public class Downloader {
Path tmpPath = resourceToDownload.tmpPath();
FileUtils.delete(tmpPath);
FileUtils.deleteOnExit(tmpPath);
checkDiskSpace(tmpPath, metadata.size);
return httpDownload(resourceToDownload, tmpPath)
.thenCompose(result -> {
try {
@ -223,6 +228,27 @@ public class Downloader {
}, executor);
}
private void checkDiskSpace(Path destination, long size) {
try {
var fs = Files.getFileStore(destination.toAbsolutePath().getParent());
var totalPendingBytes = bytesToDownload.merge(fs, size, Long::sum);
var availableBytes = fs.getUnallocatedSpace();
if (totalPendingBytes > availableBytes) {
var format = Format.defaultInstance();
String warning =
"Attempting to download " + format.storage(totalPendingBytes) + " to " + fs + " which only has "
+ format.storage(availableBytes) + " available";
if (config.force()) {
LOGGER.warn(warning + ", will probably fail.");
} else {
throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway.");
}
}
} catch (IOException e) {
LOGGER.warn("Unable to check file size for download, you may run out of space: " + e, e);
}
}
private CompletableFuture<ResourceMetadata> httpHeadFollowRedirects(String url, int redirects) {
if (redirects > MAX_REDIRECTS) {
throw new IllegalStateException("Exceeded " + redirects + " redirects for " + url);

Wyświetl plik

@ -133,7 +133,10 @@ public class FileUtils {
if (Files.isDirectory(path)) {
Files.createDirectories(path);
} else {
Files.createDirectories(path.getParent());
Path parent = path.getParent();
if (parent != null) {
Files.createDirectories(parent);
}
}
} catch (IOException e) {
throw new IllegalStateException("Unable to create parent directories " + path, e);

Wyświetl plik

@ -79,6 +79,11 @@ public class Format {
return format(num, pad, STORAGE_SUFFIXES);
}
/** Alias for {@link #storage(Number, boolean)} where {@code pad=false}. */
public String storage(Number num) {
return storage(num, false);
}
/** Returns a number formatted like "123" "1.2k" "2.5B", etc. */
public String numeric(Number num, boolean pad) {
return format(num, pad, NUMERIC_SUFFIXES);

Wyświetl plik

@ -1606,6 +1606,63 @@ public class PlanetilerTests {
}
}
private void runWithProfile(Path tempDir, Profile profile, boolean force) throws Exception {
Planetiler.create(Arguments.of("tmpdir", tempDir, "force", Boolean.toString(force)))
.setProfile(profile)
.addOsmSource("osm", TestUtils.pathToResource("monaco-latest.osm.pbf"))
.addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite"))
.addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip"))
.setOutput("mbtiles", tempDir.resolve("output.mbtiles"))
.run();
}
@Test
public void testPlanetilerMemoryCheck(@TempDir Path tempDir) {
assertThrows(Exception.class, () -> runWithProfile(tempDir, new Profile.NullProfile() {
@Override
public long estimateIntermediateDiskBytes(long osmSize) {
return Long.MAX_VALUE / 10L;
}
}, false)
);
assertThrows(Exception.class, () -> runWithProfile(tempDir, new Profile.NullProfile() {
@Override
public long estimateOutputBytes(long osmSize) {
return Long.MAX_VALUE / 10L;
}
}, false)
);
assertThrows(Exception.class, () -> runWithProfile(tempDir, new Profile.NullProfile() {
@Override
public long estimateRamRequired(long osmSize) {
return Long.MAX_VALUE / 10L;
}
}, false)
);
}
@Test
public void testPlanetilerMemoryCheckForce(@TempDir Path tempDir) throws Exception {
runWithProfile(tempDir, new Profile.NullProfile() {
@Override
public long estimateIntermediateDiskBytes(long osmSize) {
return Long.MAX_VALUE / 10L;
}
}, true);
runWithProfile(tempDir, new Profile.NullProfile() {
@Override
public long estimateOutputBytes(long osmSize) {
return Long.MAX_VALUE / 10L;
}
}, true);
runWithProfile(tempDir, new Profile.NullProfile() {
@Override
public long estimateRamRequired(long osmSize) {
return Long.MAX_VALUE / 10L;
}
}, true);
}
@Test
public void testHandleProfileException() throws Exception {
var results = runWithOsmElements(

Wyświetl plik

@ -1,9 +1,6 @@
package com.onthegomap.planetiler.util;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.*;
import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.stats.Stats;
@ -18,6 +15,7 @@ import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
@ -122,4 +120,33 @@ public class DownloaderTest {
assertEquals(FileUtils.size(path), FileUtils.size(dest));
assertEquals(5, resource4.bytesDownloaded());
}
@Test
public void testDownloadFailsIfTooBig() {
var downloader = new Downloader(config, stats, 2L) {
@Override
InputStream openStream(String url) {
throw new AssertionError("Shouldn't get here");
}
@Override
InputStream openStreamRange(String url, long start, long end) {
throw new AssertionError("Shouldn't get here");
}
@Override
CompletableFuture<ResourceMetadata> httpHead(String url) {
return CompletableFuture.completedFuture(new ResourceMetadata(Optional.empty(), url, Long.MAX_VALUE, true));
}
};
Path dest = path.resolve("out");
String url = "http://url";
var resource1 = new Downloader.ResourceToDownload("resource", url, dest);
var exception = assertThrows(ExecutionException.class, () -> downloader.downloadIfNecessary(resource1).get());
assertInstanceOf(IllegalArgumentException.class, exception.getCause());
assertTrue(exception.getMessage().contains("--force"), exception.getMessage());
}
}