kopia lustrzana https://github.com/onthegomap/planetiler
Improve shapefile zip reads (#314)
rodzic
ffbffb577b
commit
93fe75782e
|
@ -8,10 +8,13 @@ import com.onthegomap.planetiler.util.FileUtils;
|
|||
import com.onthegomap.planetiler.worker.WorkerPipeline;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.FileSystems;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashMap;
|
||||
import java.util.stream.Stream;
|
||||
import org.geotools.data.FeatureSource;
|
||||
import org.geotools.data.shapefile.ShapefileDataStore;
|
||||
import org.geotools.feature.FeatureCollection;
|
||||
|
@ -60,8 +63,10 @@ public class ShapefileReader extends SimpleReader implements Closeable {
|
|||
for (int i = 0; i < attributeNames.length; i++) {
|
||||
attributeNames[i] = inputSource.getSchema().getDescriptor(i).getLocalName();
|
||||
}
|
||||
} catch (IOException | FactoryException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
} catch (FactoryException e) {
|
||||
throw new FileFormatException("Bad reference system", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -109,16 +114,27 @@ public class ShapefileReader extends SimpleReader implements Closeable {
|
|||
processWithProjection(null, sourceName, input, writer, config, profile, stats);
|
||||
}
|
||||
|
||||
private static URI findShpFile(Path path, Stream<Path> walkStream) {
|
||||
return walkStream
|
||||
.filter(z -> FileUtils.hasExtension(z, "shp"))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new IllegalArgumentException("No .shp file found inside " + path))
|
||||
.toUri();
|
||||
}
|
||||
|
||||
private ShapefileDataStore open(Path path) {
|
||||
try {
|
||||
URI uri;
|
||||
if (FileUtils.hasExtension(path, "zip")) {
|
||||
try (var zipFs = FileSystems.newFileSystem(path)) {
|
||||
Path shapeFileInZip = FileUtils.walkFileSystem(zipFs)
|
||||
.filter(z -> FileUtils.hasExtension(z, "shp"))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new IllegalArgumentException("No .shp file found inside " + path));
|
||||
uri = shapeFileInZip.toUri();
|
||||
if (Files.isDirectory(path)) {
|
||||
try (var walkStream = Files.walk(path)) {
|
||||
uri = findShpFile(path, walkStream);
|
||||
}
|
||||
} else if (FileUtils.hasExtension(path, "zip")) {
|
||||
try (
|
||||
var zipFs = FileSystems.newFileSystem(path);
|
||||
var walkStream = FileUtils.walkFileSystem(zipFs)
|
||||
) {
|
||||
uri = findShpFile(path, walkStream);
|
||||
}
|
||||
} else if (FileUtils.hasExtension(path, "shp")) {
|
||||
uri = path.toUri();
|
||||
|
|
|
@ -1,15 +1,20 @@
|
|||
package com.onthegomap.planetiler.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.nio.file.FileStore;
|
||||
import java.nio.file.FileSystem;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Comparator;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -17,6 +22,11 @@ import org.slf4j.LoggerFactory;
|
|||
* Convenience methods for working with files on disk.
|
||||
*/
|
||||
public class FileUtils {
|
||||
private static final Format FORMAT = Format.defaultInstance();
|
||||
// Prevent zip-bomb attack, see https://rules.sonarsource.com/java/RSPEC-5042
|
||||
private static final int ZIP_THRESHOLD_ENTRIES = 10_000;
|
||||
private static final int ZIP_THRESHOLD_SIZE = 1_000_000_000;
|
||||
private static final double ZIP_THRESHOLD_RATIO = 1_000;
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class);
|
||||
|
||||
|
@ -168,4 +178,80 @@ public class FileUtils {
|
|||
public static void deleteOnExit(Path path) {
|
||||
path.toFile().deleteOnExit();
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzips a zip file on the classpath to {@code destDir}.
|
||||
*
|
||||
* @throws UncheckedIOException if an IO exception occurs
|
||||
*/
|
||||
public static void unzipResource(String resource, Path dest) {
|
||||
try (var is = FileUtils.class.getResourceAsStream(resource)) {
|
||||
Objects.requireNonNull(is, "Resource not found on classpath: " + resource);
|
||||
unzip(is, dest);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unzips a zip file from an input stream to {@code destDir}.
|
||||
*
|
||||
* @throws UncheckedIOException if an IO exception occurs
|
||||
*/
|
||||
public static void unzip(InputStream input, Path destDir) {
|
||||
int totalSizeArchive = 0;
|
||||
int totalEntryArchive = 0;
|
||||
try (var zip = new ZipInputStream(input)) {
|
||||
ZipEntry entry;
|
||||
while ((entry = zip.getNextEntry()) != null) {
|
||||
Path targetDirResolved = destDir.resolve(entry.getName());
|
||||
Path destination = targetDirResolved.normalize();
|
||||
if (!destination.startsWith(destDir)) {
|
||||
throw new IOException("Bad zip entry: " + entry.getName());
|
||||
}
|
||||
if (entry.isDirectory()) {
|
||||
FileUtils.createDirectory(destDir);
|
||||
} else {
|
||||
createParentDirectories(destination);
|
||||
|
||||
// Instead of Files.copy, read 2kB at a time to prevent zip bomb attack, see https://rules.sonarsource.com/java/RSPEC-5042
|
||||
int nBytes;
|
||||
byte[] buffer = new byte[2048];
|
||||
int totalSizeEntry = 0;
|
||||
|
||||
try (
|
||||
var out = Files.newOutputStream(destination, StandardOpenOption.CREATE_NEW,
|
||||
StandardOpenOption.WRITE)
|
||||
) {
|
||||
totalEntryArchive++;
|
||||
while ((nBytes = zip.read(buffer)) > 0) {
|
||||
out.write(buffer, 0, nBytes);
|
||||
totalSizeEntry += nBytes;
|
||||
totalSizeArchive += nBytes;
|
||||
|
||||
double compressionRatio = totalSizeEntry * 1d / entry.getCompressedSize();
|
||||
if (compressionRatio > ZIP_THRESHOLD_RATIO) {
|
||||
throw new IOException(
|
||||
"Ratio between compressed and uncompressed data is highly suspicious " +
|
||||
FORMAT.numeric(compressionRatio) +
|
||||
"x, looks like a Zip Bomb Attack");
|
||||
}
|
||||
}
|
||||
|
||||
if (totalSizeArchive > ZIP_THRESHOLD_SIZE) {
|
||||
throw new IOException("The uncompressed data size " + FORMAT.storage(totalSizeArchive) +
|
||||
"B is too much for the application resource capacity");
|
||||
}
|
||||
|
||||
if (totalEntryArchive > ZIP_THRESHOLD_ENTRIES) {
|
||||
throw new IOException("Too much entries in this archive " + FORMAT.integer(totalEntryArchive) +
|
||||
", can lead to inodes exhaustion of the system");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import com.onthegomap.planetiler.mbtiles.Verify;
|
|||
import com.onthegomap.planetiler.reader.SourceFeature;
|
||||
import com.onthegomap.planetiler.stats.Stats;
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.ResultSet;
|
||||
|
@ -35,6 +36,7 @@ import java.util.Comparator;
|
|||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
@ -317,6 +319,23 @@ public class TestUtils {
|
|||
return cwd.resolveSibling(pathFromRoot);
|
||||
}
|
||||
|
||||
public static Path extractPathToResource(Path tempDir, String resource) {
|
||||
return extractPathToResource(tempDir, resource, resource);
|
||||
}
|
||||
|
||||
public static Path extractPathToResource(Path tempDir, String resource, String local) {
|
||||
var path = tempDir.resolve(resource);
|
||||
try (
|
||||
var input = TestUtils.class.getResourceAsStream("/" + resource);
|
||||
var output = Files.newOutputStream(path);
|
||||
) {
|
||||
Objects.requireNonNull(input, "Could not find " + resource + " on classpath").transferTo(output);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
public interface GeometryComparision {
|
||||
|
||||
Geometry geom();
|
||||
|
|
|
@ -8,56 +8,78 @@ import com.onthegomap.planetiler.Profile;
|
|||
import com.onthegomap.planetiler.TestUtils;
|
||||
import com.onthegomap.planetiler.geo.GeoUtils;
|
||||
import com.onthegomap.planetiler.stats.Stats;
|
||||
import com.onthegomap.planetiler.util.FileUtils;
|
||||
import com.onthegomap.planetiler.worker.WorkerPipeline;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.FileSystems;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.Timeout;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.locationtech.jts.geom.Geometry;
|
||||
|
||||
class ShapefileReaderTest {
|
||||
|
||||
private final ShapefileReader reader = new ShapefileReader(
|
||||
"test",
|
||||
TestUtils.pathToResource("shapefile.zip"),
|
||||
new Profile.NullProfile(),
|
||||
Stats.inMemory()
|
||||
);
|
||||
|
||||
@AfterEach
|
||||
public void close() {
|
||||
reader.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCount() {
|
||||
assertEquals(86, reader.getCount());
|
||||
assertEquals(86, reader.getCount());
|
||||
}
|
||||
@TempDir
|
||||
private Path tempDir;
|
||||
|
||||
@Test
|
||||
@Timeout(30)
|
||||
void testReadShapefile() {
|
||||
for (int i = 1; i <= 2; i++) {
|
||||
List<Geometry> points = new ArrayList<>();
|
||||
List<String> names = new ArrayList<>();
|
||||
WorkerPipeline.start("test", Stats.inMemory())
|
||||
.fromGenerator("shapefile", reader.read())
|
||||
.addBuffer("reader_queue", 100, 1)
|
||||
.sinkToConsumer("counter", 1, elem -> {
|
||||
assertTrue(elem.getTag("name") instanceof String);
|
||||
assertEquals("test", elem.getSource());
|
||||
assertNull(elem.getSourceLayer());
|
||||
points.add(elem.latLonGeometry());
|
||||
names.add(elem.getTag("name").toString());
|
||||
}).await();
|
||||
assertEquals(86, points.size());
|
||||
assertTrue(names.contains("Van Dörn Street"));
|
||||
var gc = GeoUtils.JTS_FACTORY.createGeometryCollection(points.toArray(new Geometry[0]));
|
||||
var centroid = gc.getCentroid();
|
||||
assertEquals(-77.0297995, centroid.getX(), 5, "iter " + i);
|
||||
assertEquals(38.9119684, centroid.getY(), 5, "iter " + i);
|
||||
testReadShapefile(TestUtils.pathToResource("shapefile.zip"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@Timeout(30)
|
||||
void testReadShapefileExtracted() throws IOException {
|
||||
var extracted = TestUtils.extractPathToResource(tempDir, "shapefile.zip");
|
||||
testReadShapefile(extracted);
|
||||
try (var fs = FileSystems.newFileSystem(extracted)) {
|
||||
var path = fs.getPath("shapefile", "stations.shp");
|
||||
testReadShapefile(path);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Timeout(30)
|
||||
void testReadShapefileUnzipped() throws IOException {
|
||||
var dest = tempDir.resolve("shapefile.zip");
|
||||
FileUtils.unzipResource("/shapefile.zip", dest);
|
||||
testReadShapefile(dest);
|
||||
testReadShapefile(dest.resolve("shapefile").resolve("stations.shp"));
|
||||
}
|
||||
|
||||
private static void testReadShapefile(Path path) {
|
||||
try (
|
||||
var reader = new ShapefileReader(
|
||||
"test",
|
||||
path,
|
||||
new Profile.NullProfile(),
|
||||
Stats.inMemory()
|
||||
)
|
||||
) {
|
||||
for (int i = 1; i <= 2; i++) {
|
||||
assertEquals(86, reader.getCount());
|
||||
List<Geometry> points = new ArrayList<>();
|
||||
List<String> names = new ArrayList<>();
|
||||
WorkerPipeline.start("test", Stats.inMemory())
|
||||
.fromGenerator("shapefile", reader.read())
|
||||
.addBuffer("reader_queue", 100, 1)
|
||||
.sinkToConsumer("counter", 1, elem -> {
|
||||
assertTrue(elem.getTag("name") instanceof String);
|
||||
assertEquals("test", elem.getSource());
|
||||
assertNull(elem.getSourceLayer());
|
||||
points.add(elem.latLonGeometry());
|
||||
names.add(elem.getTag("name").toString());
|
||||
}).await();
|
||||
assertEquals(86, points.size());
|
||||
assertTrue(names.contains("Van Dörn Street"));
|
||||
var gc = GeoUtils.JTS_FACTORY.createGeometryCollection(points.toArray(new Geometry[0]));
|
||||
var centroid = gc.getCentroid();
|
||||
assertEquals(-77.0297995, centroid.getX(), 5, "iter " + i);
|
||||
assertEquals(38.9119684, centroid.getY(), 5, "iter " + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
|||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
|
@ -44,4 +46,43 @@ class FileUtilsTest {
|
|||
FileUtils.createParentDirectories(nested);
|
||||
assertEquals(filestore, FileUtils.getFileStore(nested));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testUnzip() throws IOException {
|
||||
var dest = tmpDir.resolve("unzipped");
|
||||
FileUtils.unzipResource("/shapefile.zip", dest);
|
||||
try (var walkStream = Files.walk(dest)) {
|
||||
var all = walkStream.toList();
|
||||
var directories = all.stream()
|
||||
.filter(Files::isDirectory)
|
||||
.map(tmpDir::relativize)
|
||||
.collect(Collectors.toSet());
|
||||
var files = all.stream()
|
||||
.filter(Files::isRegularFile)
|
||||
.map(tmpDir::relativize)
|
||||
.collect(Collectors.toSet());
|
||||
assertEquals(Set.of(
|
||||
Path.of("unzipped"),
|
||||
Path.of("unzipped", "shapefile")
|
||||
), directories);
|
||||
assertEquals(Set.of(
|
||||
Path.of("unzipped", "shapefile", "stations.shx"),
|
||||
Path.of("unzipped", "shapefile", "stations.cpg"),
|
||||
Path.of("unzipped", "shapefile", "stations.shp"),
|
||||
Path.of("unzipped", "shapefile", "stations.dbf"),
|
||||
Path.of("unzipped", "shapefile", "stations.prj")
|
||||
), files);
|
||||
}
|
||||
assertEquals(
|
||||
"""
|
||||
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
|
||||
"""
|
||||
.strip(),
|
||||
Files.readString(dest.resolve("shapefile").resolve("stations.prj"))
|
||||
);
|
||||
assertEquals(
|
||||
"UTF8",
|
||||
Files.readString(dest.resolve("shapefile").resolve("stations.cpg"))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Ładowanie…
Reference in New Issue