diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java
index 2a0bf048..ba9ba66e 100644
--- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java
+++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java
@@ -29,7 +29,6 @@ import java.io.IOException;
import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.PathMatcher;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
@@ -253,48 +252,53 @@ public class Planetiler {
}
/**
- * Adds a new ESRI shapefile directory source that will process all files under {@param basePath} matching
- * {@param globPattern} using an explicit projection.
+ * Adds a new ESRI shapefile glob source that will process all files under {@param basePath} matching
+ * {@param globPattern}. {@param basePath} may be a directory or ZIP archive.
+ *
+ * @param sourceName string to use in stats and logs to identify this stage
+ * @param basePath path to the directory containing shapefiles to process
+ * @param globPattern string to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
+ * @return this runner instance for chaining
+ * @see ShapefileReader
+ */
+ public Planetiler addShapefileGlobSource(String sourceName, Path basePath, String globPattern) {
+ return addShapefileGlobSource(null, sourceName, basePath, globPattern, null);
+ }
+
+ /**
+ * Adds a new ESRI shapefile glob source that will process all files under {@param basePath} matching
+ * {@param globPattern} using an explicit projection. {@param basePath} may be a directory or ZIP archive.
+ *
+ * If {@param globPattern} matches a ZIP archive, all files ending in {@code .shp} within the archive will be used for
+ * this source.
+ *
+ * If the file does not exist and {@code download=true} argument is set, then the file will first be downloaded from
+ * {@code defaultUrl}.
+ *
*
* @param projection the Coordinate Reference System authority code to use, parsed with
* {@link org.geotools.referencing.CRS#decode(String)}
* @param sourceName string to use in stats and logs to identify this stage
- * @param basePath path to the directory containing shapefiles to process
+ * @param basePath path to the directory or zip file containing shapefiles to process
* @param globPattern string to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
+ * @param defaultUrl remote URL that the file to download if {@code download=true} argument is set and
+ * {@code name_url} argument is not set
* @return this runner instance for chaining
* @see ShapefileReader
*/
- public Planetiler addShapefileDirectorySource(String projection, String sourceName, Path basePath,
- String globPattern) {
- Path dirPath = getPath(sourceName, "shapefile directory", basePath, null);
- PathMatcher matcher = dirPath.getFileSystem().getPathMatcher("glob:" + globPattern);
+ public Planetiler addShapefileGlobSource(String projection, String sourceName, Path basePath,
+ String globPattern, String defaultUrl) {
+ Path dirPath = getPath(sourceName, "shapefile glob", basePath, defaultUrl);
return addStage(sourceName, "Process all files matching " + dirPath + "/" + globPattern,
ifSourceUsed(sourceName, () -> {
- try (
- var walk = Files.walk(dirPath);
- var sourcePaths = walk.filter(path -> matcher.matches(path.getFileName()))
- ) {
- ShapefileReader.processWithProjection(projection, sourceName, sourcePaths.toList(), featureGroup, config,
- profile, stats);
- }
+ var sourcePaths = FileUtils.walkPathWithPattern(basePath, globPattern,
+ zipPath -> FileUtils.walkPathWithPattern(zipPath, "*.shp"));
+ ShapefileReader.processWithProjection(projection, sourceName, sourcePaths, featureGroup, config,
+ profile, stats);
}));
}
- /**
- * Adds a new ESRI shapefile directory source that will process all files under {@param basePath} matching
- * {@param globPattern}.
- *
- * @param sourceName string to use in stats and logs to identify this stage
- * @param basePath path to the directory containing shapefiles to process
- * @param globPattern string to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
- * @return this runner instance for chaining
- * @see ShapefileReader
- */
- public Planetiler addShapefileDirectorySource(String sourceName, Path basePath, String globPattern) {
- return addShapefileDirectorySource(null, sourceName, basePath, globPattern);
- }
-
/**
* Adds a new ESRI shapefile source that will be processed with an explicit projection when {@link #run()} is called.
@@ -320,9 +324,14 @@ public class Planetiler {
public Planetiler addShapefileSource(String projection, String name, Path defaultPath, String defaultUrl) {
Path path = getPath(name, "shapefile", defaultPath, defaultUrl);
return addStage(name, "Process features in " + path,
- ifSourceUsed(name,
- () -> ShapefileReader.processWithProjection(projection, name, List.of(path), featureGroup, config, profile,
- stats)));
+ ifSourceUsed(name, () -> {
+ List sourcePaths = List.of(path);
+ if (FileUtils.hasExtension(path, "zip") || Files.isDirectory(path)) {
+ sourcePaths = FileUtils.walkPathWithPattern(path, "*.shp");
+ }
+
+ ShapefileReader.processWithProjection(projection, name, sourcePaths, featureGroup, config, profile, stats);
+ }));
}
/**
diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/ShapefileReader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/ShapefileReader.java
index 4d62cb48..5b516247 100644
--- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/ShapefileReader.java
+++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/ShapefileReader.java
@@ -4,17 +4,12 @@ import com.onthegomap.planetiler.Profile;
import com.onthegomap.planetiler.collection.FeatureGroup;
import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.stats.Stats;
-import com.onthegomap.planetiler.util.FileUtils;
import java.io.IOException;
import java.io.UncheckedIOException;
-import java.net.URI;
-import java.nio.file.FileSystems;
-import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.function.Consumer;
-import java.util.stream.Stream;
import org.geotools.data.FeatureSource;
import org.geotools.data.shapefile.ShapefileDataStore;
import org.geotools.feature.FeatureCollection;
@@ -96,34 +91,9 @@ public class ShapefileReader extends SimpleReader {
);
}
- private static URI findShpFile(Path path, Stream walkStream) {
- return walkStream
- .filter(z -> FileUtils.hasExtension(z, "shp"))
- .findFirst()
- .orElseThrow(() -> new IllegalArgumentException("No .shp file found inside " + path))
- .toUri();
- }
-
private ShapefileDataStore open(Path path) {
try {
- URI uri;
- if (Files.isDirectory(path)) {
- try (var walkStream = Files.walk(path)) {
- uri = findShpFile(path, walkStream);
- }
- } else if (FileUtils.hasExtension(path, "zip")) {
- try (
- var zipFs = FileSystems.newFileSystem(path);
- var walkStream = FileUtils.walkFileSystem(zipFs)
- ) {
- uri = findShpFile(path, walkStream);
- }
- } else if (FileUtils.hasExtension(path, "shp")) {
- uri = path.toUri();
- } else {
- throw new IllegalArgumentException("Invalid shapefile input: " + path + " must be zip or shp");
- }
- var store = new ShapefileDataStore(uri.toURL());
+ var store = new ShapefileDataStore(path.toUri().toURL());
store.setTryCPGFile(true);
return store;
} catch (IOException e) {
diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java
index 602563de..d3c3b4e9 100644
--- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java
+++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java
@@ -5,12 +5,16 @@ import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.file.FileStore;
import java.nio.file.FileSystem;
+import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
+import java.nio.file.PathMatcher;
import java.nio.file.StandardOpenOption;
import java.util.Comparator;
+import java.util.List;
import java.util.Objects;
+import java.util.function.Function;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import java.util.zip.ZipEntry;
@@ -22,6 +26,7 @@ import org.slf4j.LoggerFactory;
* Convenience methods for working with files on disk.
*/
public class FileUtils {
+
private static final Format FORMAT = Format.defaultInstance();
// Prevent zip-bomb attack, see https://rules.sonarsource.com/java/RSPEC-5042
private static final int ZIP_THRESHOLD_ENTRIES = 10_000;
@@ -45,6 +50,61 @@ public class FileUtils {
});
}
+ /**
+ * Returns list of paths matching {@param pattern} within {@param basePath}.
+ *
+ * If {@param basePath} is a directory, then {@param walkZipFile} will be invoked for each matching {@code .zip} file
+ * found. This function should return paths of interest within the zip file.
+ *
+ * @param basePath file path to recursively walk, either a directory or ZIP archive.
+ * @param pattern pattern to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
+ * @param walkZipFile callback function to recurse into matching {@code .zip} files.
+ */
+ public static List walkPathWithPattern(Path basePath, String pattern,
+ Function> walkZipFile) {
+ PathMatcher matcher = basePath.getFileSystem().getPathMatcher("glob:" + pattern);
+
+ try {
+ if (FileUtils.hasExtension(basePath, "zip")) {
+ try (
+ var zipFs = FileSystems.newFileSystem(basePath);
+ var walkStream = FileUtils.walkFileSystem(zipFs)
+ ) {
+ return walkStream
+ .filter(p -> p.getFileName() != null && matcher.matches(p.getFileName()))
+ .toList();
+ }
+ } else if (Files.isDirectory(basePath)) {
+ try (var walk = Files.walk(basePath)) {
+ return walk
+ .filter(path -> matcher.matches(path.getFileName()))
+ .flatMap(path -> {
+ if (FileUtils.hasExtension(path, "zip")) {
+ return walkZipFile.apply(path).stream();
+ } else {
+ return Stream.of(path);
+ }
+ })
+ .toList();
+ }
+ } else {
+ throw new IllegalArgumentException("No files matching " + basePath + "/" + pattern);
+ }
+ } catch (IOException exc) {
+ throw new UncheckedIOException(exc);
+ }
+ }
+
+ /**
+ * Returns list of paths matching {@param pattern} within {@param basePath}.
+ *
+ * @param basePath file path to recursively walk, either a directory or ZIP archive.
+ * @param pattern pattern to match filenames against, as described in {@link FileSystem#getPathMatcher(String)}.
+ */
+ public static List walkPathWithPattern(Path basePath, String pattern) {
+ return walkPathWithPattern(basePath, pattern, zipPath -> List.of(zipPath));
+ }
+
/** Returns true if {@code path} ends with ".extension" (case-insensitive). */
public static boolean hasExtension(Path path, String extension) {
return path.toString().toLowerCase().endsWith("." + extension.toLowerCase());
diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java
index 2ee843ff..faf435b7 100644
--- a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java
+++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java
@@ -1700,14 +1700,17 @@ class PlanetilerTests {
.setAttr("source", source.getSource());
}
})
- .addShapefileDirectorySource("shapefile-dir", resourceDir, "shape*.zip")
+ // Match *.shp within [shapefile.zip, shapefile-copy.zip]
+ .addShapefileGlobSource("shapefile-glob", resourceDir, "shape*.zip")
+ // Match *.shp within shapefile.zip
+ .addShapefileGlobSource("shapefile-glob-zip", resourceDir.resolve("shapefile.zip"), "*.shp")
+ // Match *.shp within shapefile.zip
.addShapefileSource("shapefile", resourceDir.resolve("shapefile.zip"))
.setOutput("mbtiles", mbtiles)
.run();
try (Mbtiles db = Mbtiles.newReadOnlyDatabase(mbtiles)) {
- long fileCount = 0;
- long dirCount = 0;
+ long fileCount = 0, globCount = 0, globZipCount = 0;
var tileMap = TestUtils.getTileMap(db);
for (var tile : tileMap.values()) {
for (var feature : tile) {
@@ -1715,15 +1718,17 @@ class PlanetilerTests {
switch ((String) feature.attrs().get("source")) {
case "shapefile" -> fileCount++;
- case "shapefile-dir" -> dirCount++;
+ case "shapefile-glob" -> globCount++;
+ case "shapefile-glob-zip" -> globZipCount++;
}
}
}
- // Input file was copied twice into test directory, directory source should have
- // 2x the number of features.
assertTrue(fileCount > 0);
- assertEquals(2 * fileCount, dirCount);
+ // `shapefile` and `shapefile-glob-zip` both match only one file.
+ assertEquals(fileCount, globZipCount);
+ // `shapefile-glob` matches two input files, should have 2x number of features of `shapefile`.
+ assertEquals(2 * fileCount, globCount);
}
}
diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/ShapefileReaderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/ShapefileReaderTest.java
index 7008b327..47e66a3c 100644
--- a/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/ShapefileReaderTest.java
+++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/reader/ShapefileReaderTest.java
@@ -27,18 +27,11 @@ class ShapefileReaderTest {
@TempDir
private Path tempDir;
- @Test
- @Timeout(30)
- void testReadShapefile() {
- testReadShapefile(TestUtils.pathToResource("shapefile.zip"));
- }
-
@Test
@Timeout(30)
@DisabledOnOs(OS.WINDOWS) // the zip file doesn't fully close, which causes trouble running test on windows
void testReadShapefileExtracted() throws IOException {
var extracted = TestUtils.extractPathToResource(tempDir, "shapefile.zip");
- testReadShapefile(extracted);
try (var fs = FileSystems.newFileSystem(extracted)) {
var path = fs.getPath("shapefile", "stations.shp");
testReadShapefile(path);
@@ -50,7 +43,6 @@ class ShapefileReaderTest {
void testReadShapefileUnzipped() throws IOException {
var dest = tempDir.resolve("shapefile.zip");
FileUtils.unzipResource("/shapefile.zip", dest);
- testReadShapefile(dest);
testReadShapefile(dest.resolve("shapefile").resolve("stations.shp"));
}
diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/FileUtilsTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/FileUtilsTest.java
index 63babeed..8a94fc13 100644
--- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/FileUtilsTest.java
+++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/FileUtilsTest.java
@@ -4,11 +4,15 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import com.onthegomap.planetiler.TestUtils;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.util.List;
import java.util.Set;
+import java.util.function.Function;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -85,4 +89,57 @@ class FileUtilsTest {
Files.readString(dest.resolve("shapefile").resolve("stations.cpg"))
);
}
+
+ @Test
+ void testWalkPathWithPatternDirectory() throws IOException {
+ Path parent = tmpDir.resolve(Path.of("a", "b", "c"));
+ FileUtils.createDirectory(parent);
+
+ List txtFiles = Stream.of("1.txt", "2.txt").map(parent::resolve).toList();
+
+ for (var file : txtFiles) {
+ Files.write(file, new byte[]{});
+ }
+
+ Files.write(parent.resolve("something-that-doesnt-match.blah"), new byte[]{});
+
+ var matchingPaths = FileUtils.walkPathWithPattern(parent, "*.txt");
+
+ assertEquals(
+ txtFiles.stream().sorted().toList(),
+ matchingPaths.stream().sorted().toList()
+ );
+ }
+
+ @Test
+ void testWalkPathWithPatternDirectoryZip() throws IOException {
+ Path parent = tmpDir.resolve(Path.of("a", "b", "c"));
+ FileUtils.createDirectory(parent);
+
+ Path zipFile = parent.resolve("fake-zip-file.zip");
+
+ Files.write(zipFile, new byte[]{});
+ Files.write(parent.resolve("something-that-doesnt-match.blah"), new byte[]{});
+
+ Function> mockWalkZipFile = zipPath -> List.of(zipPath.resolve("inner.txt"));
+
+ // When we don't provide a callback to recurse into zip files, the path to the zip
+ // itself should be returned.
+ assertEquals(List.of(zipFile), FileUtils.walkPathWithPattern(parent, "*.zip"));
+
+ // Otherwise, the files inside the zip should be returned.
+ assertEquals(List.of(zipFile.resolve("inner.txt")),
+ FileUtils.walkPathWithPattern(parent, "*.zip", mockWalkZipFile));
+ }
+
+ @Test
+ void testWalkPathWithPatternSingleZip() {
+ Path zipPath = TestUtils.pathToResource("shapefile.zip");
+
+ var matchingPaths = FileUtils.walkPathWithPattern(zipPath, "stations.sh[px]");
+
+ assertEquals(
+ List.of("/shapefile/stations.shp", "/shapefile/stations.shx"),
+ matchingPaths.stream().map(Path::toString).sorted().toList());
+ }
}