Improve shapefile zip reads (#314)

pull/316/head
Michael Barry 2022-07-31 07:17:42 -04:00 zatwierdzone przez GitHub
rodzic ffbffb577b
commit 93fe75782e
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
5 zmienionych plików z 231 dodań i 47 usunięć

Wyświetl plik

@ -8,10 +8,13 @@ import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.worker.WorkerPipeline;
import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.URI;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.stream.Stream;
import org.geotools.data.FeatureSource;
import org.geotools.data.shapefile.ShapefileDataStore;
import org.geotools.feature.FeatureCollection;
@ -60,8 +63,10 @@ public class ShapefileReader extends SimpleReader implements Closeable {
for (int i = 0; i < attributeNames.length; i++) {
attributeNames[i] = inputSource.getSchema().getDescriptor(i).getLocalName();
}
} catch (IOException | FactoryException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new UncheckedIOException(e);
} catch (FactoryException e) {
throw new FileFormatException("Bad reference system", e);
}
}
@ -109,16 +114,27 @@ public class ShapefileReader extends SimpleReader implements Closeable {
processWithProjection(null, sourceName, input, writer, config, profile, stats);
}
private static URI findShpFile(Path path, Stream<Path> walkStream) {
return walkStream
.filter(z -> FileUtils.hasExtension(z, "shp"))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("No .shp file found inside " + path))
.toUri();
}
private ShapefileDataStore open(Path path) {
try {
URI uri;
if (FileUtils.hasExtension(path, "zip")) {
try (var zipFs = FileSystems.newFileSystem(path)) {
Path shapeFileInZip = FileUtils.walkFileSystem(zipFs)
.filter(z -> FileUtils.hasExtension(z, "shp"))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("No .shp file found inside " + path));
uri = shapeFileInZip.toUri();
if (Files.isDirectory(path)) {
try (var walkStream = Files.walk(path)) {
uri = findShpFile(path, walkStream);
}
} else if (FileUtils.hasExtension(path, "zip")) {
try (
var zipFs = FileSystems.newFileSystem(path);
var walkStream = FileUtils.walkFileSystem(zipFs)
) {
uri = findShpFile(path, walkStream);
}
} else if (FileUtils.hasExtension(path, "shp")) {
uri = path.toUri();

Wyświetl plik

@ -1,15 +1,20 @@
package com.onthegomap.planetiler.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.file.FileStore;
import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Comparator;
import java.util.Objects;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -17,6 +22,11 @@ import org.slf4j.LoggerFactory;
* Convenience methods for working with files on disk.
*/
public class FileUtils {
private static final Format FORMAT = Format.defaultInstance();
// Prevent zip-bomb attack, see https://rules.sonarsource.com/java/RSPEC-5042
private static final int ZIP_THRESHOLD_ENTRIES = 10_000;
private static final int ZIP_THRESHOLD_SIZE = 1_000_000_000;
private static final double ZIP_THRESHOLD_RATIO = 1_000;
private static final Logger LOGGER = LoggerFactory.getLogger(FileUtils.class);
@ -168,4 +178,80 @@ public class FileUtils {
public static void deleteOnExit(Path path) {
path.toFile().deleteOnExit();
}
/**
* Unzips a zip file on the classpath to {@code destDir}.
*
* @throws UncheckedIOException if an IO exception occurs
*/
public static void unzipResource(String resource, Path dest) {
try (var is = FileUtils.class.getResourceAsStream(resource)) {
Objects.requireNonNull(is, "Resource not found on classpath: " + resource);
unzip(is, dest);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* Unzips a zip file from an input stream to {@code destDir}.
*
* @throws UncheckedIOException if an IO exception occurs
*/
public static void unzip(InputStream input, Path destDir) {
int totalSizeArchive = 0;
int totalEntryArchive = 0;
try (var zip = new ZipInputStream(input)) {
ZipEntry entry;
while ((entry = zip.getNextEntry()) != null) {
Path targetDirResolved = destDir.resolve(entry.getName());
Path destination = targetDirResolved.normalize();
if (!destination.startsWith(destDir)) {
throw new IOException("Bad zip entry: " + entry.getName());
}
if (entry.isDirectory()) {
FileUtils.createDirectory(destDir);
} else {
createParentDirectories(destination);
// Instead of Files.copy, read 2kB at a time to prevent zip bomb attack, see https://rules.sonarsource.com/java/RSPEC-5042
int nBytes;
byte[] buffer = new byte[2048];
int totalSizeEntry = 0;
try (
var out = Files.newOutputStream(destination, StandardOpenOption.CREATE_NEW,
StandardOpenOption.WRITE)
) {
totalEntryArchive++;
while ((nBytes = zip.read(buffer)) > 0) {
out.write(buffer, 0, nBytes);
totalSizeEntry += nBytes;
totalSizeArchive += nBytes;
double compressionRatio = totalSizeEntry * 1d / entry.getCompressedSize();
if (compressionRatio > ZIP_THRESHOLD_RATIO) {
throw new IOException(
"Ratio between compressed and uncompressed data is highly suspicious " +
FORMAT.numeric(compressionRatio) +
"x, looks like a Zip Bomb Attack");
}
}
if (totalSizeArchive > ZIP_THRESHOLD_SIZE) {
throw new IOException("The uncompressed data size " + FORMAT.storage(totalSizeArchive) +
"B is too much for the application resource capacity");
}
if (totalEntryArchive > ZIP_THRESHOLD_ENTRIES) {
throw new IOException("Too much entries in this archive " + FORMAT.integer(totalEntryArchive) +
", can lead to inodes exhaustion of the system");
}
}
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}

Wyświetl plik

@ -24,6 +24,7 @@ import com.onthegomap.planetiler.mbtiles.Verify;
import com.onthegomap.planetiler.reader.SourceFeature;
import com.onthegomap.planetiler.stats.Stats;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.ResultSet;
@ -35,6 +36,7 @@ import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
@ -317,6 +319,23 @@ public class TestUtils {
return cwd.resolveSibling(pathFromRoot);
}
public static Path extractPathToResource(Path tempDir, String resource) {
return extractPathToResource(tempDir, resource, resource);
}
public static Path extractPathToResource(Path tempDir, String resource, String local) {
var path = tempDir.resolve(resource);
try (
var input = TestUtils.class.getResourceAsStream("/" + resource);
var output = Files.newOutputStream(path);
) {
Objects.requireNonNull(input, "Could not find " + resource + " on classpath").transferTo(output);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return path;
}
public interface GeometryComparision {
Geometry geom();

Wyświetl plik

@ -8,56 +8,78 @@ import com.onthegomap.planetiler.Profile;
import com.onthegomap.planetiler.TestUtils;
import com.onthegomap.planetiler.geo.GeoUtils;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.worker.WorkerPipeline;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.api.io.TempDir;
import org.locationtech.jts.geom.Geometry;
class ShapefileReaderTest {
private final ShapefileReader reader = new ShapefileReader(
"test",
TestUtils.pathToResource("shapefile.zip"),
new Profile.NullProfile(),
Stats.inMemory()
);
@AfterEach
public void close() {
reader.close();
}
@Test
void testCount() {
assertEquals(86, reader.getCount());
assertEquals(86, reader.getCount());
}
@TempDir
private Path tempDir;
@Test
@Timeout(30)
void testReadShapefile() {
for (int i = 1; i <= 2; i++) {
List<Geometry> points = new ArrayList<>();
List<String> names = new ArrayList<>();
WorkerPipeline.start("test", Stats.inMemory())
.fromGenerator("shapefile", reader.read())
.addBuffer("reader_queue", 100, 1)
.sinkToConsumer("counter", 1, elem -> {
assertTrue(elem.getTag("name") instanceof String);
assertEquals("test", elem.getSource());
assertNull(elem.getSourceLayer());
points.add(elem.latLonGeometry());
names.add(elem.getTag("name").toString());
}).await();
assertEquals(86, points.size());
assertTrue(names.contains("Van Dörn Street"));
var gc = GeoUtils.JTS_FACTORY.createGeometryCollection(points.toArray(new Geometry[0]));
var centroid = gc.getCentroid();
assertEquals(-77.0297995, centroid.getX(), 5, "iter " + i);
assertEquals(38.9119684, centroid.getY(), 5, "iter " + i);
testReadShapefile(TestUtils.pathToResource("shapefile.zip"));
}
@Test
@Timeout(30)
void testReadShapefileExtracted() throws IOException {
var extracted = TestUtils.extractPathToResource(tempDir, "shapefile.zip");
testReadShapefile(extracted);
try (var fs = FileSystems.newFileSystem(extracted)) {
var path = fs.getPath("shapefile", "stations.shp");
testReadShapefile(path);
}
}
@Test
@Timeout(30)
void testReadShapefileUnzipped() throws IOException {
var dest = tempDir.resolve("shapefile.zip");
FileUtils.unzipResource("/shapefile.zip", dest);
testReadShapefile(dest);
testReadShapefile(dest.resolve("shapefile").resolve("stations.shp"));
}
private static void testReadShapefile(Path path) {
try (
var reader = new ShapefileReader(
"test",
path,
new Profile.NullProfile(),
Stats.inMemory()
)
) {
for (int i = 1; i <= 2; i++) {
assertEquals(86, reader.getCount());
List<Geometry> points = new ArrayList<>();
List<String> names = new ArrayList<>();
WorkerPipeline.start("test", Stats.inMemory())
.fromGenerator("shapefile", reader.read())
.addBuffer("reader_queue", 100, 1)
.sinkToConsumer("counter", 1, elem -> {
assertTrue(elem.getTag("name") instanceof String);
assertEquals("test", elem.getSource());
assertNull(elem.getSourceLayer());
points.add(elem.latLonGeometry());
names.add(elem.getTag("name").toString());
}).await();
assertEquals(86, points.size());
assertTrue(names.contains("Van Dörn Street"));
var gc = GeoUtils.JTS_FACTORY.createGeometryCollection(points.toArray(new Geometry[0]));
var centroid = gc.getCentroid();
assertEquals(-77.0297995, centroid.getX(), 5, "iter " + i);
assertEquals(38.9119684, centroid.getY(), 5, "iter " + i);
}
}
}
}

Wyświetl plik

@ -7,6 +7,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@ -44,4 +46,43 @@ class FileUtilsTest {
FileUtils.createParentDirectories(nested);
assertEquals(filestore, FileUtils.getFileStore(nested));
}
@Test
void testUnzip() throws IOException {
var dest = tmpDir.resolve("unzipped");
FileUtils.unzipResource("/shapefile.zip", dest);
try (var walkStream = Files.walk(dest)) {
var all = walkStream.toList();
var directories = all.stream()
.filter(Files::isDirectory)
.map(tmpDir::relativize)
.collect(Collectors.toSet());
var files = all.stream()
.filter(Files::isRegularFile)
.map(tmpDir::relativize)
.collect(Collectors.toSet());
assertEquals(Set.of(
Path.of("unzipped"),
Path.of("unzipped", "shapefile")
), directories);
assertEquals(Set.of(
Path.of("unzipped", "shapefile", "stations.shx"),
Path.of("unzipped", "shapefile", "stations.cpg"),
Path.of("unzipped", "shapefile", "stations.shp"),
Path.of("unzipped", "shapefile", "stations.dbf"),
Path.of("unzipped", "shapefile", "stations.prj")
), files);
}
assertEquals(
"""
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
"""
.strip(),
Files.readString(dest.resolve("shapefile").resolve("stations.prj"))
);
assertEquals(
"UTF8",
Files.readString(dest.resolve("shapefile").resolve("stations.cpg"))
);
}
}