Add --keep-unzipped option to avoid unzipping sources each time (#522)

pull/524/head
Michael Barry 2023-03-19 14:01:17 -04:00 zatwierdzone przez GitHub
rodzic 74db638dbc
commit 97231d18ef
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
9 zmienionych plików z 107 dodań i 42 usunięć

Wyświetl plik

@ -76,14 +76,14 @@ jobs:
run: |
rm -rf data/out.mbtiles data/tmp
cp branch/planetiler-dist/target/*with-deps.jar run.jar
java -Xms${{ env.RAM }} -Xmx${{ env.RAM }} -jar run.jar --area="${{ env.AREA }}" "${{ env.BOUNDS_ARG }}" --output=data/out.mbtiles 2>&1 | tee log
java -Xms${{ env.RAM }} -Xmx${{ env.RAM }} -jar run.jar --area="${{ env.AREA }}" "${{ env.BOUNDS_ARG }}" --output=data/out.mbtiles --keep-unzipped 2>&1 | tee log
ls -alh run.jar | tee -a log
cat log | strip-ansi > build-info/branchlogs.txt
- name: 'Run base'
run: |
rm -rf data/out.mbtiles data/tmp
cp base/planetiler-dist/target/*with-deps.jar run.jar
java -Xms${{ env.RAM }} -Xmx${{ env.RAM }} -jar run.jar --area="${{ env.AREA }}" "${{ env.BOUNDS_ARG }}" --output=data/out.mbtiles 2>&1 | tee log
java -Xms${{ env.RAM }} -Xmx${{ env.RAM }} -jar run.jar --area="${{ env.AREA }}" "${{ env.BOUNDS_ARG }}" --output=data/out.mbtiles --keep-unzipped 2>&1 | tee log
ls -alh run.jar | tee -a log
cat log | strip-ansi > build-info/baselogs.txt

1
.gitignore vendored
Wyświetl plik

@ -24,3 +24,4 @@ bin/
TODO
data/
*-unzipped/

Wyświetl plik

@ -372,6 +372,7 @@ public class Planetiler {
*/
public Planetiler addGeoPackageSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "geopackage", defaultPath, defaultUrl);
boolean keepUnzipped = getKeepUnzipped(name);
return addStage(name, "Process features in " + path,
ifSourceUsed(name, () -> {
List<Path> sourcePaths = List.of(path);
@ -383,7 +384,9 @@ public class Planetiler {
throw new IllegalArgumentException("No .gpkg files found in " + path);
}
GeoPackageReader.process(projection, name, sourcePaths, tmpDir, featureGroup, config, profile, stats);
GeoPackageReader.process(projection, name, sourcePaths,
keepUnzipped ? path.resolveSibling(path.getFileName() + "-unzipped") : tmpDir, featureGroup, config, profile,
stats, keepUnzipped);
}));
}
@ -451,8 +454,10 @@ public class Planetiler {
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "sqlite db", defaultPath, defaultUrl);
boolean keepUnzipped = getKeepUnzipped(name);
return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> NaturalEarthReader
.process(name, path, tmpDir.resolve("natearth.sqlite"), featureGroup, config, profile, stats)));
.process(name, path, keepUnzipped ? path.resolveSibling(path.getFileName() + "-unzipped") : tmpDir, featureGroup,
config, profile, stats, keepUnzipped)));
}
/**
@ -524,6 +529,11 @@ public class Planetiler {
return this;
}
private boolean getKeepUnzipped(String name) {
return arguments.getBoolean(name + "_keep_unzipped",
"keep unzipped " + name + " after reading", config.keepUnzippedSources());
}
/** Sets the profile implementation that controls how source feature map to output map elements. */
public Planetiler setProfile(Profile profile) {
this.profile = profile;

Wyświetl plik

@ -47,7 +47,8 @@ public record PlanetilerConfig(
boolean osmLazyReads,
boolean skipFilledTiles,
int tileWarningSizeBytes,
Boolean color
Boolean color,
boolean keepUnzippedSources
) {
public static final int MIN_MINZOOM = 0;
@ -169,7 +170,9 @@ public record PlanetilerConfig(
(int) (arguments.getDouble("tile_warning_size_mb",
"Maximum size in megabytes of a tile to emit a warning about",
1d) * 1024 * 1024),
arguments.getBooleanObject("color", "Color the terminal output")
arguments.getBooleanObject("color", "Color the terminal output"),
arguments.getBoolean("keep_unzipped",
"keep unzipped sources by default after reading", false)
);
}

Wyświetl plik

@ -7,6 +7,8 @@ import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
@ -29,12 +31,14 @@ import org.opengis.referencing.operation.MathTransform;
*/
public class GeoPackageReader extends SimpleReader<SimpleFeature> {
private final boolean keepUnzipped;
private Path extractedPath = null;
private final GeoPackage geoPackage;
private final MathTransform coordinateTransform;
GeoPackageReader(String sourceProjection, String sourceName, Path input, Path tmpDir) {
GeoPackageReader(String sourceProjection, String sourceName, Path input, Path tmpDir, boolean keepUnzipped) {
super(sourceName);
this.keepUnzipped = keepUnzipped;
if (sourceProjection != null) {
try {
@ -57,14 +61,18 @@ public class GeoPackageReader extends SimpleReader<SimpleFeature> {
/**
* Create a {@link GeoPackageManager} for the given path. If {@code input} refers to a file within a ZIP archive,
* first extract it to a temporary location.
* first extract it.
*/
private GeoPackage openGeopackage(Path input, Path tmpDir) throws IOException {
private GeoPackage openGeopackage(Path input, Path unzippedDir) throws IOException {
var inputUri = input.toUri();
if ("jar".equals(inputUri.getScheme())) {
extractedPath = Files.createTempFile(tmpDir, "", ".gpkg");
try (var inputStream = inputUri.toURL().openStream()) {
FileUtils.safeCopy(inputStream, extractedPath);
extractedPath = keepUnzipped ? unzippedDir.resolve(URLEncoder.encode(input.toString(), StandardCharsets.UTF_8)) :
Files.createTempFile(unzippedDir, "", ".gpkg");
FileUtils.createParentDirectories(extractedPath);
if (!keepUnzipped || FileUtils.isNewer(input, extractedPath)) {
try (var inputStream = inputUri.toURL().openStream()) {
FileUtils.safeCopy(inputStream, extractedPath);
}
}
return GeoPackageManager.open(false, extractedPath.toFile());
}
@ -86,15 +94,15 @@ public class GeoPackageReader extends SimpleReader<SimpleFeature> {
* @param config user-defined parameters controlling number of threads and log interval
* @param profile logic that defines what map features to emit for each source feature
* @param stats to keep track of counters and timings
* @param keepUnzipped to keep unzipped files around after running (speeds up subsequent runs, but uses more disk)
* @throws IllegalArgumentException if a problem occurs reading the input file
*/
public static void process(String sourceProjection, String sourceName, List<Path> sourcePaths, Path tmpDir,
FeatureGroup writer, PlanetilerConfig config,
Profile profile, Stats stats) {
FeatureGroup writer, PlanetilerConfig config, Profile profile, Stats stats, boolean keepUnzipped) {
SourceFeatureProcessor.processFiles(
sourceName,
sourcePaths,
path -> new GeoPackageReader(sourceProjection, sourceName, path, tmpDir),
path -> new GeoPackageReader(sourceProjection, sourceName, path, tmpDir, keepUnzipped),
writer, config, profile, stats
);
}
@ -154,8 +162,8 @@ public class GeoPackageReader extends SimpleReader<SimpleFeature> {
public void close() throws IOException {
geoPackage.close();
if (extractedPath != null) {
Files.deleteIfExists(extractedPath);
if (!keepUnzipped && extractedPath != null) {
FileUtils.delete(extractedPath);
}
}
}

Wyświetl plik

@ -10,6 +10,8 @@ import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.LogUtil;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
@ -40,6 +42,7 @@ public class NaturalEarthReader extends SimpleReader<SimpleFeature> {
private static final Logger LOGGER = LoggerFactory.getLogger(NaturalEarthReader.class);
private final Connection conn;
private final boolean keepUnzipped;
private Path extracted;
static {
@ -51,8 +54,9 @@ public class NaturalEarthReader extends SimpleReader<SimpleFeature> {
}
}
NaturalEarthReader(String sourceName, Path input, Path tmpDir) {
NaturalEarthReader(String sourceName, Path input, Path tmpDir, boolean keepUnzipped) {
super(sourceName);
this.keepUnzipped = keepUnzipped;
LogUtil.setStage(sourceName);
try {
@ -66,41 +70,47 @@ public class NaturalEarthReader extends SimpleReader<SimpleFeature> {
* Renders map features for all elements from a Natural Earth sqlite file, or zip file containing a sqlite file, based
* on the mapping logic defined in {@code profile}.
*
* @param sourceName string ID for this reader to use in logs and stats
* @param sourcePath path to the sqlite or zip file
* @param tmpDir directory to extract the sqlite file into (if input is a zip file)
* @param writer consumer for rendered features
* @param config user-defined parameters controlling number of threads and log interval
* @param profile logic that defines what map features to emit for each source feature
* @param stats to keep track of counters and timings
* @param sourceName string ID for this reader to use in logs and stats
* @param sourcePath path to the sqlite or zip file
* @param tmpDir directory to extract the sqlite file into (if input is a zip file).
* @param writer consumer for rendered features
* @param config user-defined parameters controlling number of threads and log interval
* @param profile logic that defines what map features to emit for each source feature
* @param stats to keep track of counters and timings
* @param keepUnzipped to keep unzipped files around after running (speeds up subsequent runs, but uses more disk)
* @throws IllegalArgumentException if a problem occurs reading the input file
*/
public static void process(String sourceName, Path sourcePath, Path tmpDir, FeatureGroup writer,
PlanetilerConfig config, Profile profile, Stats stats) {
PlanetilerConfig config, Profile profile, Stats stats, boolean keepUnzipped) {
SourceFeatureProcessor.processFiles(
sourceName,
List.of(sourcePath),
path -> new NaturalEarthReader(sourceName, path, tmpDir),
path -> new NaturalEarthReader(sourceName, path, tmpDir, keepUnzipped),
writer, config, profile, stats
);
}
/** Returns a JDBC connection to the sqlite file. Input can be the sqlite file itself or a zip file containing it. */
private Connection open(Path path, Path tmpLocation) throws IOException, SQLException {
private Connection open(Path path, Path unzippedDir) throws IOException, SQLException {
String uri = "jdbc:sqlite:" + path.toAbsolutePath();
if (FileUtils.hasExtension(path, "zip")) {
extracted = tmpLocation;
try (var zipFs = FileSystems.newFileSystem(path)) {
var zipEntry = FileUtils.walkFileSystem(zipFs)
.filter(Files::isRegularFile)
.filter(entry -> FileUtils.hasExtension(entry, "sqlite"))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("No .sqlite file found inside " + path));
LOGGER.info("unzipping {} to {}", path.toAbsolutePath(), extracted);
Files.copy(Files.newInputStream(zipEntry), extracted, StandardCopyOption.REPLACE_EXISTING);
extracted.toFile().deleteOnExit();
extracted = unzippedDir.resolve(URLEncoder.encode(zipEntry.toString(), StandardCharsets.UTF_8));
FileUtils.createParentDirectories(extracted);
if (!keepUnzipped || FileUtils.isNewer(path, extracted)) {
LOGGER.error("unzipping {} to {}", path.toAbsolutePath(), extracted);
Files.copy(Files.newInputStream(zipEntry), extracted, StandardCopyOption.REPLACE_EXISTING);
}
if (!keepUnzipped) {
extracted.toFile().deleteOnExit();
}
}
uri = "jdbc:sqlite:" + tmpLocation.toAbsolutePath();
uri = "jdbc:sqlite:" + extracted.toAbsolutePath();
}
return DriverManager.getConnection(uri);
}
@ -190,7 +200,7 @@ public class NaturalEarthReader extends SimpleReader<SimpleFeature> {
} catch (SQLException e) {
LOGGER.error("Error closing sqlite file", e);
}
if (extracted != null) {
if (!keepUnzipped && extracted != null) {
FileUtils.deleteFile(extracted);
}
}

Wyświetl plik

@ -3,6 +3,7 @@ package com.onthegomap.planetiler.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.file.ClosedFileSystemException;
import java.nio.file.FileStore;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
@ -11,6 +12,7 @@ import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.FileTime;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
@ -341,4 +343,27 @@ public class FileUtils {
throw new UncheckedIOException(e);
}
}
public static long getLastModifiedTime(Path path) throws IOException {
try {
FileTime time;
if ("jar".equals(path.toUri().getScheme())) {
time = Files.getLastModifiedTime(Path.of(path.getFileSystem().toString()));
} else {
time = Files.getLastModifiedTime(path);
}
return time.toMillis();
} catch (ClosedFileSystemException e) {
throw new IOException("File system closed", e);
}
}
/** Returns {@code true} if src is newer than dest, or if dest does not exist. Defaults to true if an error occurs. */
public static boolean isNewer(Path src, Path dest) {
try {
return Files.notExists(dest) || getLastModifiedTime(src) > getLastModifiedTime(dest);
} catch (IOException e) {
return true;
}
}
}

Wyświetl plik

@ -14,18 +14,20 @@ import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.locationtech.jts.geom.Geometry;
class GeoPackageReaderTest {
@TempDir
static Path tmpDir;
@Test
@ParameterizedTest
@ValueSource(booleans = {true, false})
@Timeout(30)
void testReadGeoPackage() throws IOException {
void testReadGeoPackage(boolean keepUnzipped) throws IOException {
Path pathOutsideZip = TestUtils.pathToResource("geopackage.gpkg");
Path zipPath = TestUtils.pathToResource("geopackage.gpkg.zip");
Path pathInZip = FileUtils.walkPathWithPattern(zipPath, "*.gpkg").get(0);
@ -35,7 +37,7 @@ class GeoPackageReaderTest {
for (var path : List.of(pathOutsideZip, pathInZip)) {
for (var proj : projections) {
try (
var reader = new GeoPackageReader(proj, "test", path, tmpDir)
var reader = new GeoPackageReader(proj, "test", path, tmpDir, keepUnzipped)
) {
for (int iter = 0; iter < 2; iter++) {
String id = "path=" + path + " proj=" + proj + " iter=" + iter;

Wyświetl plik

@ -17,17 +17,23 @@ import java.util.function.Consumer;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.junit.jupiter.params.provider.CsvSource;
import org.locationtech.jts.geom.Geometry;
class NaturalEarthReaderTest {
@TempDir
Path tempDir;
@ParameterizedTest
@ValueSource(strings = {"natural_earth_vector.sqlite", "natural_earth_vector.sqlite.zip"})
@CsvSource({
"natural_earth_vector.sqlite,false",
"natural_earth_vector.sqlite.zip,false",
"natural_earth_vector.sqlite.zip,true",
})
@Timeout(30)
void testReadNaturalEarth(String filename, @TempDir Path tempDir) {
void testReadNaturalEarth(String filename, boolean keepUnzipped) {
var path = TestUtils.pathToResource(filename);
try (var reader = new NaturalEarthReader("test", path, tempDir)) {
try (var reader = new NaturalEarthReader("test", path, tempDir, keepUnzipped)) {
for (int i = 1; i <= 2; i++) {
assertEquals(7_679, reader.getFeatureCount(), "iter " + i);