Support unzipping GeoPackage sources at runtime (#430)

pull/459/head
Erik Price 2023-01-25 17:56:30 -08:00 zatwierdzone przez GitHub
rodzic ae1317c341
commit a0f8c67c78
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
8 zmienionych plików z 199 dodań i 52 usunięć

Wyświetl plik

@ -350,6 +350,48 @@ public class Planetiler {
* <p> * <p>
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to * To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}. * override the download URL set {@code name_url=http://url/of/file.gpkg}.
* <p>
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
* @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and {@code
* name_url} argument is not set
* @return this runner instance for chaining
* @see GeoPackageReader
* @see Downloader
*/
public Planetiler addGeoPackageSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "geopackage", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
ifSourceUsed(name, () -> {
List<Path> sourcePaths = List.of(path);
if (FileUtils.hasExtension(path, "zip")) {
sourcePaths = FileUtils.walkPathWithPattern(path, "*.gpkg");
}
if (sourcePaths.isEmpty()) {
throw new IllegalArgumentException("No .gpkg files found in " + path);
}
GeoPackageReader.process(projection, name, sourcePaths, tmpDir, featureGroup, config, profile, stats);
}));
}
/**
* Adds a new OGC GeoPackage source that will be processed when {@link #run()} is called.
* <p>
* If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
* {@code defaultUrl}.
* <p>
* To override the location of the {@code geopackage} file, set {@code name_path=newpath.gpkg} in the arguments and to
* override the download URL set {@code name_url=http://url/of/file.gpkg}.
* <p>
* If given a path to a ZIP file containing one or more GeoPackages, each {@code .gpkg} file within will be extracted
* to a temporary directory at runtime.
* *
* @param name string to use in stats and logs to identify this stage * @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments * @param defaultPath path to the input file to use if {@code name_path} key is not set through arguments
@ -360,25 +402,23 @@ public class Planetiler {
* @see Downloader * @see Downloader
*/ */
public Planetiler addGeoPackageSource(String name, Path defaultPath, String defaultUrl) { public Planetiler addGeoPackageSource(String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "geopackage", defaultPath, defaultUrl); return addGeoPackageSource(null, name, defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
ifSourceUsed(name,
() -> GeoPackageReader.process(name, List.of(path), featureGroup, config, profile, stats)));
} }
/** /**
* Adds a new Natural Earth sqlite file source that will be processed when {@link #run()} is called. * Adds a new Natural Earth sqlite file source that will be processed when {@link #run()} is called.
* <p> * <p>
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to * To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}. * override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
* *
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
* @param name string to use in stats and logs to identify this stage * @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the * @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file. * {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
* @return this runner instance for chaining * @return this runner instance for chaining
* @see NaturalEarthReader * @see NaturalEarthReader
*/ */
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath) { public Planetiler addNaturalEarthSource(String name, Path defaultPath) {
return addNaturalEarthSource(name, defaultPath, null); return addNaturalEarthSource(name, defaultPath, null);
} }
@ -392,6 +432,8 @@ public class Planetiler {
* To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to * To override the location of the {@code sqlite} file, set {@code name_path=newpath.zip} in the arguments and to
* override the download URL set {@code name_url=http://url/of/natural_earth.zip}. * override the download URL set {@code name_url=http://url/of/natural_earth.zip}.
* *
* @deprecated can be replaced by {@link #addGeoPackageSource(String, Path, String)}.
*
* @param name string to use in stats and logs to identify this stage * @param name string to use in stats and logs to identify this stage
* @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the * @param defaultPath path to the input file to use if {@code name} key is not set through arguments. Can be the
* {@code .sqlite} file or a {@code .zip} file containing the sqlite file. * {@code .sqlite} file or a {@code .zip} file containing the sqlite file.
@ -401,6 +443,7 @@ public class Planetiler {
* @see NaturalEarthReader * @see NaturalEarthReader
* @see Downloader * @see Downloader
*/ */
@Deprecated(forRemoval = true)
public Planetiler addNaturalEarthSource(String name, Path defaultPath, String defaultUrl) { public Planetiler addNaturalEarthSource(String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "sqlite db", defaultPath, defaultUrl); Path path = getPath(name, "sqlite db", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> NaturalEarthReader return addStage(name, "Process features in " + path, ifSourceUsed(name, () -> NaturalEarthReader

Wyświetl plik

@ -4,6 +4,10 @@ import com.onthegomap.planetiler.Profile;
import com.onthegomap.planetiler.collection.FeatureGroup; import com.onthegomap.planetiler.collection.FeatureGroup;
import com.onthegomap.planetiler.config.PlanetilerConfig; import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@ -17,7 +21,7 @@ import org.geotools.geometry.jts.JTS;
import org.geotools.geometry.jts.WKBReader; import org.geotools.geometry.jts.WKBReader;
import org.geotools.referencing.CRS; import org.geotools.referencing.CRS;
import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.geom.Geometry;
import org.opengis.referencing.crs.CoordinateReferenceSystem; import org.opengis.referencing.FactoryException;
import org.opengis.referencing.operation.MathTransform; import org.opengis.referencing.operation.MathTransform;
/** /**
@ -25,32 +29,72 @@ import org.opengis.referencing.operation.MathTransform;
*/ */
public class GeoPackageReader extends SimpleReader<SimpleFeature> { public class GeoPackageReader extends SimpleReader<SimpleFeature> {
private Path extractedPath = null;
private final GeoPackage geoPackage; private final GeoPackage geoPackage;
private final MathTransform coordinateTransform;
GeoPackageReader(String sourceName, Path input) { GeoPackageReader(String sourceProjection, String sourceName, Path input, Path tmpDir) {
super(sourceName); super(sourceName);
geoPackage = GeoPackageManager.open(false, input.toFile()); if (sourceProjection != null) {
try {
var sourceCRS = CRS.decode(sourceProjection);
var latLonCRS = CRS.decode("EPSG:4326");
coordinateTransform = CRS.findMathTransform(sourceCRS, latLonCRS);
} catch (FactoryException e) {
throw new FileFormatException("Bad reference system", e);
}
} else {
coordinateTransform = null;
}
try {
geoPackage = openGeopackage(input, tmpDir);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
} }
/**
* Create a {@link GeoPackageManager} for the given path. If {@code input} refers to a file within a ZIP archive,
* first extract it to a temporary location.
*/
private GeoPackage openGeopackage(Path input, Path tmpDir) throws IOException {
var inputUri = input.toUri();
if ("jar".equals(inputUri.getScheme())) {
extractedPath = Files.createTempFile(tmpDir, "", ".gpkg");
try (var inputStream = inputUri.toURL().openStream()) {
FileUtils.safeCopy(inputStream, extractedPath);
}
return GeoPackageManager.open(false, extractedPath.toFile());
}
return GeoPackageManager.open(false, input.toFile());
}
/** /**
* Renders map features for all elements from an OGC GeoPackage based on the mapping logic defined in {@code * Renders map features for all elements from an OGC GeoPackage based on the mapping logic defined in {@code
* profile}. * profile}.
* *
* @param sourceName string ID for this reader to use in logs and stats * @param sourceProjection code for the coordinate reference system of the input data, to be parsed by
* @param sourcePaths paths to the {@code .gpkg} files on disk * {@link CRS#decode(String)}
* @param writer consumer for rendered features * @param sourceName string ID for this reader to use in logs and stats
* @param config user-defined parameters controlling number of threads and log interval * @param sourcePaths paths to the {@code .gpkg} files on disk
* @param profile logic that defines what map features to emit for each source feature * @param tmpDir path to temporary directory for extracting data from zip files
* @param stats to keep track of counters and timings * @param writer consumer for rendered features
* @param config user-defined parameters controlling number of threads and log interval
* @param profile logic that defines what map features to emit for each source feature
* @param stats to keep track of counters and timings
* @throws IllegalArgumentException if a problem occurs reading the input file * @throws IllegalArgumentException if a problem occurs reading the input file
*/ */
public static void process(String sourceName, List<Path> sourcePaths, FeatureGroup writer, PlanetilerConfig config, public static void process(String sourceProjection, String sourceName, List<Path> sourcePaths, Path tmpDir,
FeatureGroup writer, PlanetilerConfig config,
Profile profile, Stats stats) { Profile profile, Stats stats) {
SourceFeatureProcessor.processFiles( SourceFeatureProcessor.processFiles(
sourceName, sourceName,
sourcePaths, sourcePaths,
path -> new GeoPackageReader(sourceName, path), path -> new GeoPackageReader(sourceProjection, sourceName, path, tmpDir),
writer, config, profile, stats writer, config, profile, stats
); );
} }
@ -68,15 +112,19 @@ public class GeoPackageReader extends SimpleReader<SimpleFeature> {
@Override @Override
public void readFeatures(Consumer<SimpleFeature> next) throws Exception { public void readFeatures(Consumer<SimpleFeature> next) throws Exception {
CoordinateReferenceSystem latLonCRS = CRS.decode("EPSG:4326"); var latLonCRS = CRS.decode("EPSG:4326");
long id = 0; long id = 0;
for (var featureName : geoPackage.getFeatureTables()) { for (var featureName : geoPackage.getFeatureTables()) {
FeatureDao features = geoPackage.getFeatureDao(featureName); FeatureDao features = geoPackage.getFeatureDao(featureName);
MathTransform transform = CRS.findMathTransform( // GeoPackage spec allows this to be 0 (undefined geographic CRS) or
CRS.decode("EPSG:" + features.getSrsId()), // -1 (undefined cartesian CRS). Both cases will throw when trying to
latLonCRS); // call CRS.decode
long srsId = features.getSrsId();
MathTransform transform = (coordinateTransform != null) ? coordinateTransform :
CRS.findMathTransform(CRS.decode("EPSG:" + srsId), latLonCRS);
for (var feature : features.queryForAll()) { for (var feature : features.queryForAll()) {
GeoPackageGeometryData geometryData = feature.getGeometry(); GeoPackageGeometryData geometryData = feature.getGeometry();
@ -103,7 +151,11 @@ public class GeoPackageReader extends SimpleReader<SimpleFeature> {
} }
@Override @Override
public void close() { public void close() throws IOException {
geoPackage.close(); geoPackage.close();
if (extractedPath != null) {
Files.deleteIfExists(extractedPath);
}
} }
} }

Wyświetl plik

@ -253,6 +253,31 @@ public class FileUtils {
} }
} }
/**
* Copies bytes from {@code input} to {@code destPath}, ensuring that the size is limited to a reasonable value.
*
* @throws UncheckedIOException if an IO exception occurs
*/
public static void safeCopy(InputStream inputStream, Path destPath) {
try (var outputStream = Files.newOutputStream(destPath, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) {
int totalSize = 0;
int nBytes;
byte[] buffer = new byte[2048];
while ((nBytes = inputStream.read(buffer)) > 0) {
outputStream.write(buffer, 0, nBytes);
totalSize += nBytes;
if (totalSize > ZIP_THRESHOLD_SIZE) {
throw new IOException("The uncompressed data size " + FORMAT.storage(totalSize) +
"B is too much for the application resource capacity");
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/** /**
* Unzips a zip file from an input stream to {@code destDir}. * Unzips a zip file from an input stream to {@code destDir}.
* *
@ -304,7 +329,7 @@ public class FileUtils {
} }
if (totalEntryArchive > ZIP_THRESHOLD_ENTRIES) { if (totalEntryArchive > ZIP_THRESHOLD_ENTRIES) {
throw new IOException("Too much entries in this archive " + FORMAT.integer(totalEntryArchive) + throw new IOException("Too many entries in this archive " + FORMAT.integer(totalEntryArchive) +
", can lead to inodes exhaustion of the system"); ", can lead to inodes exhaustion of the system");
} }
} }

Wyświetl plik

@ -1668,7 +1668,7 @@ class PlanetilerTests {
.addOsmSource("osm", tempOsm) .addOsmSource("osm", tempOsm)
.addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite")) .addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite"))
.addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip")) .addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip"))
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null) .addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg.zip"), null)
.setOutput("mbtiles", mbtiles) .setOutput("mbtiles", mbtiles)
.run(); .run();
@ -1749,9 +1749,11 @@ class PlanetilerTests {
@ValueSource(strings = { @ValueSource(strings = {
"", "",
"--write-threads=2 --process-threads=2 --feature-read-threads=2 --threads=4", "--write-threads=2 --process-threads=2 --feature-read-threads=2 --threads=4",
"--input-file=geopackage.gpkg"
}) })
void testPlanetilerRunnerGeoPackage(String args) throws Exception { void testPlanetilerRunnerGeoPackage(String args) throws Exception {
Path mbtiles = tempDir.resolve("output.mbtiles"); Path mbtiles = tempDir.resolve("output.mbtiles");
String inputFile = Arguments.fromArgs(args).getString("input-file", "", "geopackage.gpkg.zip");
Planetiler.create(Arguments.fromArgs((args + " --tmpdir=" + tempDir.resolve("data")).split("\\s+"))) Planetiler.create(Arguments.fromArgs((args + " --tmpdir=" + tempDir.resolve("data")).split("\\s+")))
.setProfile(new Profile.NullProfile() { .setProfile(new Profile.NullProfile() {
@ -1762,7 +1764,7 @@ class PlanetilerTests {
.setAttr("name", source.getString("name")); .setAttr("name", source.getString("name"));
} }
}) })
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null) .addGeoPackageSource("geopackage", TestUtils.pathToResource(inputFile), null)
.setOutput("mbtiles", mbtiles) .setOutput("mbtiles", mbtiles)
.run(); .run();
@ -1790,7 +1792,7 @@ class PlanetilerTests {
.addOsmSource("osm", TestUtils.pathToResource("monaco-latest.osm.pbf")) .addOsmSource("osm", TestUtils.pathToResource("monaco-latest.osm.pbf"))
.addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite")) .addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite"))
.addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip")) .addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip"))
.addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg"), null) .addGeoPackageSource("geopackage", TestUtils.pathToResource("geopackage.gpkg.zip"), null)
.setOutput("mbtiles", tempDir.resolve("output.mbtiles")) .setOutput("mbtiles", tempDir.resolve("output.mbtiles"))
.run(); .run();
} }

Wyświetl plik

@ -7,46 +7,61 @@ import com.onthegomap.planetiler.TestUtils;
import com.onthegomap.planetiler.collection.IterableOnce; import com.onthegomap.planetiler.collection.IterableOnce;
import com.onthegomap.planetiler.geo.GeoUtils; import com.onthegomap.planetiler.geo.GeoUtils;
import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.worker.WorkerPipeline; import com.onthegomap.planetiler.worker.WorkerPipeline;
import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.api.io.TempDir;
import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.geom.Geometry;
class GeoPackageReaderTest { class GeoPackageReaderTest {
@TempDir
static Path tmpDir;
@Test @Test
@Timeout(30) @Timeout(30)
void testReadGeoPackage() { void testReadGeoPackage() throws IOException {
Path path = TestUtils.pathToResource("geopackage.gpkg"); Path pathOutsideZip = TestUtils.pathToResource("geopackage.gpkg");
Path zipPath = TestUtils.pathToResource("geopackage.gpkg.zip");
Path pathInZip = FileUtils.walkPathWithPattern(zipPath, "*.gpkg").get(0);
try ( var projections = new String[]{null, "EPSG:4326"};
var reader = new GeoPackageReader("test", path)
) { for (var path : List.of(pathOutsideZip, pathInZip)) {
for (int i = 1; i <= 2; i++) { for (var proj : projections) {
assertEquals(86, reader.getFeatureCount()); try (
List<Geometry> points = new ArrayList<>(); var reader = new GeoPackageReader(proj, "test", path, tmpDir)
List<String> names = new ArrayList<>(); ) {
WorkerPipeline.start("test", Stats.inMemory()) for (int iter = 0; iter < 2; iter++) {
.readFromTiny("files", List.of(Path.of("dummy-path"))) String id = "path=" + path + " proj=" + proj + " iter=" + iter;
.addWorker("geopackage", 1, (IterableOnce<Path> p, Consumer<SimpleFeature> next) -> reader.readFeatures(next)) assertEquals(86, reader.getFeatureCount(), id);
.addBuffer("reader_queue", 100, 1) List<Geometry> points = new ArrayList<>();
.sinkToConsumer("counter", 1, elem -> { List<String> names = new ArrayList<>();
assertTrue(elem.getTag("name") instanceof String); WorkerPipeline.start("test", Stats.inMemory())
assertEquals("test", elem.getSource()); .readFromTiny("files", List.of(Path.of("dummy-path")))
assertEquals("stations", elem.getSourceLayer()); .addWorker("geopackage", 1,
points.add(elem.latLonGeometry()); (IterableOnce<Path> p, Consumer<SimpleFeature> next) -> reader.readFeatures(next))
names.add(elem.getTag("name").toString()); .addBuffer("reader_queue", 100, 1)
}).await(); .sinkToConsumer("counter", 1, elem -> {
assertEquals(86, points.size()); assertTrue(elem.getTag("name") instanceof String);
assertTrue(names.contains("Van Dörn Street")); assertEquals("test", elem.getSource());
var gc = GeoUtils.JTS_FACTORY.createGeometryCollection(points.toArray(new Geometry[0])); assertEquals("stations", elem.getSourceLayer());
var centroid = gc.getCentroid(); points.add(elem.latLonGeometry());
assertEquals(-77.0297995, centroid.getX(), 5, "iter " + i); names.add(elem.getTag("name").toString());
assertEquals(38.9119684, centroid.getY(), 5, "iter " + i); }).await();
assertEquals(86, points.size(), id);
assertTrue(names.contains("Van Dörn Street"), id);
var gc = GeoUtils.JTS_FACTORY.createGeometryCollection(points.toArray(new Geometry[0]));
var centroid = gc.getCentroid();
assertEquals(-77.0297995, centroid.getX(), 5, id);
assertEquals(38.9119684, centroid.getY(), 5, id);
}
}
} }
} }
} }

Wyświetl plik

@ -5,7 +5,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import com.onthegomap.planetiler.TestUtils; import com.onthegomap.planetiler.TestUtils;
import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.List; import java.util.List;
@ -90,6 +92,14 @@ class FileUtilsTest {
); );
} }
@Test
void testSafeCopy() throws IOException {
var dest = tmpDir.resolve("unzipped");
String input = "a1".repeat(1200);
FileUtils.safeCopy(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)), dest);
assertEquals(input, Files.readString(dest));
}
@Test @Test
void testWalkPathWithPatternDirectory() throws IOException { void testWalkPathWithPatternDirectory() throws IOException {
Path parent = tmpDir.resolve(Path.of("a", "b", "c")); Path parent = tmpDir.resolve(Path.of("a", "b", "c"));

Plik binarny nie jest wyświetlany.

Wyświetl plik

@ -11,7 +11,7 @@ sources:
url: geofabrik:rhode-island url: geofabrik:rhode-island
gpkg: gpkg:
type: geopackage type: geopackage
url: https://example.com/geopackage.gpkg url: https://example.com/geopackage.gpkg.zip
tag_mappings: tag_mappings:
bridge: boolean # input=bridge, output=bridge, type=boolean bridge: boolean # input=bridge, output=bridge, type=boolean
layer: long layer: long