From 97231d18ef32000965a8e7564ddf4336e92de5fa Mon Sep 17 00:00:00 2001 From: Michael Barry Date: Sun, 19 Mar 2023 14:01:17 -0400 Subject: [PATCH] Add --keep-unzipped option to avoid unzipping sources each time (#522) --- .github/workflows/performance.yml | 4 +- .gitignore | 1 + .../com/onthegomap/planetiler/Planetiler.java | 14 +++++- .../planetiler/config/PlanetilerConfig.java | 7 ++- .../planetiler/reader/GeoPackageReader.java | 30 ++++++++----- .../planetiler/reader/NaturalEarthReader.java | 44 ++++++++++++------- .../onthegomap/planetiler/util/FileUtils.java | 25 +++++++++++ .../reader/GeoPackageReaderTest.java | 10 +++-- .../reader/NaturalEarthReaderTest.java | 14 ++++-- 9 files changed, 107 insertions(+), 42 deletions(-) diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 639c37ff..1fb682bb 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -76,14 +76,14 @@ jobs: run: | rm -rf data/out.mbtiles data/tmp cp branch/planetiler-dist/target/*with-deps.jar run.jar - java -Xms${{ env.RAM }} -Xmx${{ env.RAM }} -jar run.jar --area="${{ env.AREA }}" "${{ env.BOUNDS_ARG }}" --output=data/out.mbtiles 2>&1 | tee log + java -Xms${{ env.RAM }} -Xmx${{ env.RAM }} -jar run.jar --area="${{ env.AREA }}" "${{ env.BOUNDS_ARG }}" --output=data/out.mbtiles --keep-unzipped 2>&1 | tee log ls -alh run.jar | tee -a log cat log | strip-ansi > build-info/branchlogs.txt - name: 'Run base' run: | rm -rf data/out.mbtiles data/tmp cp base/planetiler-dist/target/*with-deps.jar run.jar - java -Xms${{ env.RAM }} -Xmx${{ env.RAM }} -jar run.jar --area="${{ env.AREA }}" "${{ env.BOUNDS_ARG }}" --output=data/out.mbtiles 2>&1 | tee log + java -Xms${{ env.RAM }} -Xmx${{ env.RAM }} -jar run.jar --area="${{ env.AREA }}" "${{ env.BOUNDS_ARG }}" --output=data/out.mbtiles --keep-unzipped 2>&1 | tee log ls -alh run.jar | tee -a log cat log | strip-ansi > build-info/baselogs.txt diff --git a/.gitignore b/.gitignore index 1053fae1..9fe443ea 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ bin/ TODO data/ +*-unzipped/ diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java index 596942be..c44c7c95 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java @@ -372,6 +372,7 @@ public class Planetiler { */ public Planetiler addGeoPackageSource(String projection, String name, Path defaultPath, String defaultUrl) { Path path = getPath(name, "geopackage", defaultPath, defaultUrl); + boolean keepUnzipped = getKeepUnzipped(name); return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> { List sourcePaths = List.of(path); @@ -383,7 +384,9 @@ public class Planetiler { throw new IllegalArgumentException("No .gpkg files found in " + path); } - GeoPackageReader.process(projection, name, sourcePaths, tmpDir, featureGroup, config, profile, stats); + GeoPackageReader.process(projection, name, sourcePaths, + keepUnzipped ? path.resolveSibling(path.getFileName() + "-unzipped") : tmpDir, featureGroup, config, profile, + stats, keepUnzipped); })); } @@ -451,8 +454,10 @@ public class Planetiler { @Deprecated(forRemoval = true) public Planetiler addNaturalEarthSource(String name, Path defaultPath, String defaultUrl) { Path path = getPath(name, "sqlite db", defaultPath, defaultUrl); + boolean keepUnzipped = getKeepUnzipped(name); return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> NaturalEarthReader - .process(name, path, tmpDir.resolve("natearth.sqlite"), featureGroup, config, profile, stats))); + .process(name, path, keepUnzipped ? path.resolveSibling(path.getFileName() + "-unzipped") : tmpDir, featureGroup, + config, profile, stats, keepUnzipped))); } /** @@ -524,6 +529,11 @@ public class Planetiler { return this; } + private boolean getKeepUnzipped(String name) { + return arguments.getBoolean(name + "_keep_unzipped", + "keep unzipped " + name + " after reading", config.keepUnzippedSources()); + } + /** Sets the profile implementation that controls how source feature map to output map elements. */ public Planetiler setProfile(Profile profile) { this.profile = profile; diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java index ba50f9db..e7ea24a4 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java @@ -47,7 +47,8 @@ public record PlanetilerConfig( boolean osmLazyReads, boolean skipFilledTiles, int tileWarningSizeBytes, - Boolean color + Boolean color, + boolean keepUnzippedSources ) { public static final int MIN_MINZOOM = 0; @@ -169,7 +170,9 @@ public record PlanetilerConfig( (int) (arguments.getDouble("tile_warning_size_mb", "Maximum size in megabytes of a tile to emit a warning about", 1d) * 1024 * 1024), - arguments.getBooleanObject("color", "Color the terminal output") + arguments.getBooleanObject("color", "Color the terminal output"), + arguments.getBoolean("keep_unzipped", + "keep unzipped sources by default after reading", false) ); } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/GeoPackageReader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/GeoPackageReader.java index b03ed1b6..e4568a9b 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/GeoPackageReader.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/GeoPackageReader.java @@ -7,6 +7,8 @@ import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.util.FileUtils; import java.io.IOException; import java.io.UncheckedIOException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -29,12 +31,14 @@ import org.opengis.referencing.operation.MathTransform; */ public class GeoPackageReader extends SimpleReader { + private final boolean keepUnzipped; private Path extractedPath = null; private final GeoPackage geoPackage; private final MathTransform coordinateTransform; - GeoPackageReader(String sourceProjection, String sourceName, Path input, Path tmpDir) { + GeoPackageReader(String sourceProjection, String sourceName, Path input, Path tmpDir, boolean keepUnzipped) { super(sourceName); + this.keepUnzipped = keepUnzipped; if (sourceProjection != null) { try { @@ -57,14 +61,18 @@ public class GeoPackageReader extends SimpleReader { /** * Create a {@link GeoPackageManager} for the given path. If {@code input} refers to a file within a ZIP archive, - * first extract it to a temporary location. + * first extract it. */ - private GeoPackage openGeopackage(Path input, Path tmpDir) throws IOException { + private GeoPackage openGeopackage(Path input, Path unzippedDir) throws IOException { var inputUri = input.toUri(); if ("jar".equals(inputUri.getScheme())) { - extractedPath = Files.createTempFile(tmpDir, "", ".gpkg"); - try (var inputStream = inputUri.toURL().openStream()) { - FileUtils.safeCopy(inputStream, extractedPath); + extractedPath = keepUnzipped ? unzippedDir.resolve(URLEncoder.encode(input.toString(), StandardCharsets.UTF_8)) : + Files.createTempFile(unzippedDir, "", ".gpkg"); + FileUtils.createParentDirectories(extractedPath); + if (!keepUnzipped || FileUtils.isNewer(input, extractedPath)) { + try (var inputStream = inputUri.toURL().openStream()) { + FileUtils.safeCopy(inputStream, extractedPath); + } } return GeoPackageManager.open(false, extractedPath.toFile()); } @@ -86,15 +94,15 @@ public class GeoPackageReader extends SimpleReader { * @param config user-defined parameters controlling number of threads and log interval * @param profile logic that defines what map features to emit for each source feature * @param stats to keep track of counters and timings + * @param keepUnzipped to keep unzipped files around after running (speeds up subsequent runs, but uses more disk) * @throws IllegalArgumentException if a problem occurs reading the input file */ public static void process(String sourceProjection, String sourceName, List sourcePaths, Path tmpDir, - FeatureGroup writer, PlanetilerConfig config, - Profile profile, Stats stats) { + FeatureGroup writer, PlanetilerConfig config, Profile profile, Stats stats, boolean keepUnzipped) { SourceFeatureProcessor.processFiles( sourceName, sourcePaths, - path -> new GeoPackageReader(sourceProjection, sourceName, path, tmpDir), + path -> new GeoPackageReader(sourceProjection, sourceName, path, tmpDir, keepUnzipped), writer, config, profile, stats ); } @@ -154,8 +162,8 @@ public class GeoPackageReader extends SimpleReader { public void close() throws IOException { geoPackage.close(); - if (extractedPath != null) { - Files.deleteIfExists(extractedPath); + if (!keepUnzipped && extractedPath != null) { + FileUtils.delete(extractedPath); } } } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/NaturalEarthReader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/NaturalEarthReader.java index 02342566..32585a19 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/NaturalEarthReader.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/NaturalEarthReader.java @@ -10,6 +10,8 @@ import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.util.FileUtils; import com.onthegomap.planetiler.util.LogUtil; import java.io.IOException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; @@ -40,6 +42,7 @@ public class NaturalEarthReader extends SimpleReader { private static final Logger LOGGER = LoggerFactory.getLogger(NaturalEarthReader.class); private final Connection conn; + private final boolean keepUnzipped; private Path extracted; static { @@ -51,8 +54,9 @@ public class NaturalEarthReader extends SimpleReader { } } - NaturalEarthReader(String sourceName, Path input, Path tmpDir) { + NaturalEarthReader(String sourceName, Path input, Path tmpDir, boolean keepUnzipped) { super(sourceName); + this.keepUnzipped = keepUnzipped; LogUtil.setStage(sourceName); try { @@ -66,41 +70,47 @@ public class NaturalEarthReader extends SimpleReader { * Renders map features for all elements from a Natural Earth sqlite file, or zip file containing a sqlite file, based * on the mapping logic defined in {@code profile}. * - * @param sourceName string ID for this reader to use in logs and stats - * @param sourcePath path to the sqlite or zip file - * @param tmpDir directory to extract the sqlite file into (if input is a zip file) - * @param writer consumer for rendered features - * @param config user-defined parameters controlling number of threads and log interval - * @param profile logic that defines what map features to emit for each source feature - * @param stats to keep track of counters and timings + * @param sourceName string ID for this reader to use in logs and stats + * @param sourcePath path to the sqlite or zip file + * @param tmpDir directory to extract the sqlite file into (if input is a zip file). + * @param writer consumer for rendered features + * @param config user-defined parameters controlling number of threads and log interval + * @param profile logic that defines what map features to emit for each source feature + * @param stats to keep track of counters and timings + * @param keepUnzipped to keep unzipped files around after running (speeds up subsequent runs, but uses more disk) * @throws IllegalArgumentException if a problem occurs reading the input file */ public static void process(String sourceName, Path sourcePath, Path tmpDir, FeatureGroup writer, - PlanetilerConfig config, Profile profile, Stats stats) { + PlanetilerConfig config, Profile profile, Stats stats, boolean keepUnzipped) { SourceFeatureProcessor.processFiles( sourceName, List.of(sourcePath), - path -> new NaturalEarthReader(sourceName, path, tmpDir), + path -> new NaturalEarthReader(sourceName, path, tmpDir, keepUnzipped), writer, config, profile, stats ); } /** Returns a JDBC connection to the sqlite file. Input can be the sqlite file itself or a zip file containing it. */ - private Connection open(Path path, Path tmpLocation) throws IOException, SQLException { + private Connection open(Path path, Path unzippedDir) throws IOException, SQLException { String uri = "jdbc:sqlite:" + path.toAbsolutePath(); if (FileUtils.hasExtension(path, "zip")) { - extracted = tmpLocation; try (var zipFs = FileSystems.newFileSystem(path)) { var zipEntry = FileUtils.walkFileSystem(zipFs) .filter(Files::isRegularFile) .filter(entry -> FileUtils.hasExtension(entry, "sqlite")) .findFirst() .orElseThrow(() -> new IllegalArgumentException("No .sqlite file found inside " + path)); - LOGGER.info("unzipping {} to {}", path.toAbsolutePath(), extracted); - Files.copy(Files.newInputStream(zipEntry), extracted, StandardCopyOption.REPLACE_EXISTING); - extracted.toFile().deleteOnExit(); + extracted = unzippedDir.resolve(URLEncoder.encode(zipEntry.toString(), StandardCharsets.UTF_8)); + FileUtils.createParentDirectories(extracted); + if (!keepUnzipped || FileUtils.isNewer(path, extracted)) { + LOGGER.error("unzipping {} to {}", path.toAbsolutePath(), extracted); + Files.copy(Files.newInputStream(zipEntry), extracted, StandardCopyOption.REPLACE_EXISTING); + } + if (!keepUnzipped) { + extracted.toFile().deleteOnExit(); + } } - uri = "jdbc:sqlite:" + tmpLocation.toAbsolutePath(); + uri = "jdbc:sqlite:" + extracted.toAbsolutePath(); } return DriverManager.getConnection(uri); } @@ -190,7 +200,7 @@ public class NaturalEarthReader extends SimpleReader { } catch (SQLException e) { LOGGER.error("Error closing sqlite file", e); } - if (extracted != null) { + if (!keepUnzipped && extracted != null) { FileUtils.deleteFile(extracted); } } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java index e78c7a64..cbd60373 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java @@ -3,6 +3,7 @@ package com.onthegomap.planetiler.util; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; +import java.nio.file.ClosedFileSystemException; import java.nio.file.FileStore; import java.nio.file.FileSystem; import java.nio.file.FileSystems; @@ -11,6 +12,7 @@ import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.nio.file.PathMatcher; import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.FileTime; import java.util.Comparator; import java.util.List; import java.util.Objects; @@ -341,4 +343,27 @@ public class FileUtils { throw new UncheckedIOException(e); } } + + public static long getLastModifiedTime(Path path) throws IOException { + try { + FileTime time; + if ("jar".equals(path.toUri().getScheme())) { + time = Files.getLastModifiedTime(Path.of(path.getFileSystem().toString())); + } else { + time = Files.getLastModifiedTime(path); + } + return time.toMillis(); + } catch (ClosedFileSystemException e) { + throw new IOException("File system closed", e); + } + } + + /** Returns {@code true} if src is newer than dest, or if dest does not exist. Defaults to true if an error occurs. */ + public static boolean isNewer(Path src, Path dest) { + try { + return Files.notExists(dest) || getLastModifiedTime(src) > getLastModifiedTime(dest); + } catch (IOException e) { + return true; + } + } } diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/GeoPackageReaderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/GeoPackageReaderTest.java index 3ba212b5..1957a262 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/GeoPackageReaderTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/GeoPackageReaderTest.java @@ -14,18 +14,20 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.function.Consumer; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.locationtech.jts.geom.Geometry; class GeoPackageReaderTest { @TempDir static Path tmpDir; - @Test + @ParameterizedTest + @ValueSource(booleans = {true, false}) @Timeout(30) - void testReadGeoPackage() throws IOException { + void testReadGeoPackage(boolean keepUnzipped) throws IOException { Path pathOutsideZip = TestUtils.pathToResource("geopackage.gpkg"); Path zipPath = TestUtils.pathToResource("geopackage.gpkg.zip"); Path pathInZip = FileUtils.walkPathWithPattern(zipPath, "*.gpkg").get(0); @@ -35,7 +37,7 @@ class GeoPackageReaderTest { for (var path : List.of(pathOutsideZip, pathInZip)) { for (var proj : projections) { try ( - var reader = new GeoPackageReader(proj, "test", path, tmpDir) + var reader = new GeoPackageReader(proj, "test", path, tmpDir, keepUnzipped) ) { for (int iter = 0; iter < 2; iter++) { String id = "path=" + path + " proj=" + proj + " iter=" + iter; diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/NaturalEarthReaderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/NaturalEarthReaderTest.java index 31fb1c1d..95d9a844 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/NaturalEarthReaderTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/NaturalEarthReaderTest.java @@ -17,17 +17,23 @@ import java.util.function.Consumer; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.params.provider.CsvSource; import org.locationtech.jts.geom.Geometry; class NaturalEarthReaderTest { + @TempDir + Path tempDir; @ParameterizedTest - @ValueSource(strings = {"natural_earth_vector.sqlite", "natural_earth_vector.sqlite.zip"}) + @CsvSource({ + "natural_earth_vector.sqlite,false", + "natural_earth_vector.sqlite.zip,false", + "natural_earth_vector.sqlite.zip,true", + }) @Timeout(30) - void testReadNaturalEarth(String filename, @TempDir Path tempDir) { + void testReadNaturalEarth(String filename, boolean keepUnzipped) { var path = TestUtils.pathToResource(filename); - try (var reader = new NaturalEarthReader("test", path, tempDir)) { + try (var reader = new NaturalEarthReader("test", path, tempDir, keepUnzipped)) { for (int i = 1; i <= 2; i++) { assertEquals(7_679, reader.getFeatureCount(), "iter " + i);