kopia lustrzana https://github.com/onthegomap/planetiler
Tile hashing fix (#436)
rodzic
9fbf952239
commit
4a622a8ef0
|
@ -523,6 +523,24 @@ public class VectorTile {
|
|||
return !empty;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether a tile is likely to be a duplicate of some other tile hence it makes sense to calculate a hash
|
||||
* for it.
|
||||
* <p>
|
||||
* Deduplication code is aiming for a balance between filtering-out all duplicates and not spending too much CPU on
|
||||
* hash calculations: calculating hashes for all tiles costs too much CPU, not calculating hashes at all means
|
||||
* generating mbtiles which are too big. This method is responsible for achieving that balance.
|
||||
* <p>
|
||||
* Current understanding is, that for the whole planet, there are 267m total tiles and 38m unique tiles. The
|
||||
* {@link #containsOnlyFillsOrEdges()} heuristic catches >99.9% of repeated tiles and cuts down the number of tile
|
||||
* hashes we need to track by 98% (38m to 735k). So it is considered a good tradeoff.
|
||||
*
|
||||
* @return {@code true} if the tile might have duplicates hence we want to calculate a hash for it
|
||||
*/
|
||||
public boolean likelyToBeDuplicated() {
|
||||
return layers.values().stream().allMatch(v -> v.encodedFeatures.isEmpty()) || containsOnlyFillsOrEdges();
|
||||
}
|
||||
|
||||
private enum Command {
|
||||
MOVE_TO(1),
|
||||
LINE_TO(2),
|
||||
|
|
|
@ -12,7 +12,6 @@ import com.onthegomap.planetiler.stats.Stats;
|
|||
import com.onthegomap.planetiler.util.CloseableConusmer;
|
||||
import com.onthegomap.planetiler.util.CommonStringEncoder;
|
||||
import com.onthegomap.planetiler.util.DiskBacked;
|
||||
import com.onthegomap.planetiler.util.Hashing;
|
||||
import com.onthegomap.planetiler.util.LayerStats;
|
||||
import com.onthegomap.planetiler.worker.Worker;
|
||||
import java.io.Closeable;
|
||||
|
@ -368,22 +367,6 @@ public final class FeatureGroup implements Iterable<FeatureGroup.TileFeatures>,
|
|||
return tileCoord;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a hash over the feature's relevant data: layer, geometry, and attributes. The coordinates are
|
||||
* <b>not</b> part of the hash.
|
||||
* <p>
|
||||
* Used as an optimization to avoid writing the same (ocean) tiles over and over again.
|
||||
*/
|
||||
public long generateContentHash() {
|
||||
long hash = Hashing.FNV1_64_INIT;
|
||||
for (var feature : entries) {
|
||||
byte layerId = extractLayerIdFromKey(feature.key());
|
||||
hash = Hashing.fnv1a64(hash, layerId);
|
||||
hash = Hashing.fnv1a64(hash, feature.value());
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if {@code other} contains features with identical layer, geometry, and attributes, as this tile -
|
||||
* even if the tiles have separate coordinates.
|
||||
|
|
|
@ -16,6 +16,7 @@ import com.onthegomap.planetiler.stats.Timer;
|
|||
import com.onthegomap.planetiler.util.DiskBacked;
|
||||
import com.onthegomap.planetiler.util.FileUtils;
|
||||
import com.onthegomap.planetiler.util.Format;
|
||||
import com.onthegomap.planetiler.util.Hashing;
|
||||
import com.onthegomap.planetiler.util.LayerStats;
|
||||
import com.onthegomap.planetiler.worker.WorkQueue;
|
||||
import com.onthegomap.planetiler.worker.Worker;
|
||||
|
@ -289,8 +290,8 @@ public class MbtilesWriter {
|
|||
lastEncoded = encoded;
|
||||
lastBytes = bytes;
|
||||
last = tileFeatures;
|
||||
if (compactDb && en.containsOnlyFillsOrEdges()) {
|
||||
tileDataHash = tileFeatures.generateContentHash();
|
||||
if (compactDb && en.likelyToBeDuplicated() && bytes != null) {
|
||||
tileDataHash = generateContentHash(bytes);
|
||||
} else {
|
||||
tileDataHash = null;
|
||||
}
|
||||
|
@ -412,6 +413,15 @@ public class MbtilesWriter {
|
|||
return Stream.of(tilesByZoom).mapToLong(c -> c.get()).sum();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a hash over encoded and compressed tile.
|
||||
* <p>
|
||||
* Used as an optimization to avoid writing the same (mostly ocean) tiles over and over again.
|
||||
*/
|
||||
public static long generateContentHash(byte[] bytes) {
|
||||
return Hashing.fnv1a64(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Container for a batch of tiles to be processed together in the encoder and writer threads.
|
||||
* <p>
|
||||
|
|
|
@ -27,10 +27,12 @@ import java.io.IOException;
|
|||
import java.io.UncheckedIOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
|
@ -42,6 +44,7 @@ import java.util.TreeMap;
|
|||
import java.util.TreeSet;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.reflect.FieldUtils;
|
||||
import org.locationtech.jts.algorithm.Orientation;
|
||||
import org.locationtech.jts.geom.Coordinate;
|
||||
import org.locationtech.jts.geom.CoordinateSequence;
|
||||
|
@ -685,4 +688,33 @@ public class TestUtils {
|
|||
fail(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static void assertTileDuplicates(Mbtiles db, int expected) {
|
||||
try {
|
||||
Connection connection = (Connection) FieldUtils.readField(db, "connection", true);
|
||||
Statement statement = connection.createStatement();
|
||||
ResultSet rs = statement.executeQuery("SELECT tile_data FROM tiles_data");
|
||||
ArrayList<byte[]> tilesList = new ArrayList<>();
|
||||
while (rs.next()) {
|
||||
tilesList.add(rs.getBytes("tile_data"));
|
||||
}
|
||||
|
||||
var tiles = tilesList.toArray(new byte[0][0]);
|
||||
Set<Integer> dups = new HashSet<>();
|
||||
for (int i = 0; i < tiles.length; i++) {
|
||||
for (int j = i + 1; j < tiles.length; j++) {
|
||||
if (Arrays.equals(tiles[i], tiles[j])) {
|
||||
if (!dups.contains(j)) {
|
||||
dups.add(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int dupCount = dups.size();
|
||||
assertEquals(expected, dupCount, "%d duplicates expected, %d found".formatted(expected, dupCount));
|
||||
} catch (IllegalAccessException | SQLException e) {
|
||||
fail(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,9 +12,12 @@ import com.onthegomap.planetiler.Profile;
|
|||
import com.onthegomap.planetiler.VectorTile;
|
||||
import com.onthegomap.planetiler.geo.GeometryType;
|
||||
import com.onthegomap.planetiler.geo.TileCoord;
|
||||
import com.onthegomap.planetiler.mbtiles.MbtilesWriter;
|
||||
import com.onthegomap.planetiler.render.RenderedFeature;
|
||||
import com.onthegomap.planetiler.stats.Stats;
|
||||
import com.onthegomap.planetiler.util.CloseableConusmer;
|
||||
import com.onthegomap.planetiler.util.Gzip;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
@ -356,17 +359,22 @@ class FeatureGroupTest {
|
|||
|
||||
@ParameterizedTest(name = "{0}")
|
||||
@ArgumentsSource(SameFeatureGroupTestArgs.class)
|
||||
void testGenerateContentHash(String testName, boolean expectSame, PuTileArgs args0, PuTileArgs args1) {
|
||||
void testGenerateContentHash(String testName, boolean expectSame, PuTileArgs args0, PuTileArgs args1)
|
||||
throws IOException {
|
||||
put(args0);
|
||||
put(args1);
|
||||
sorter.sort();
|
||||
var iter = features.iterator();
|
||||
var tile0 = iter.next();
|
||||
var tile1 = iter.next();
|
||||
var tileHash0 = MbtilesWriter.generateContentHash(
|
||||
Gzip.gzip(iter.next().getVectorTileEncoder().encode())
|
||||
);
|
||||
var tileHash1 = MbtilesWriter.generateContentHash(
|
||||
Gzip.gzip(iter.next().getVectorTileEncoder().encode())
|
||||
);
|
||||
if (expectSame) {
|
||||
assertEquals(tile0.generateContentHash(), tile1.generateContentHash());
|
||||
assertEquals(tileHash0, tileHash1);
|
||||
} else {
|
||||
assertNotEquals(tile0.generateContentHash(), tile1.generateContentHash());
|
||||
assertNotEquals(tileHash0, tileHash1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -110,6 +110,8 @@ class BikeRouteOverlayTest {
|
|||
"name", "EuroVelo 8 - Mediterranean Route - part Monaco",
|
||||
"ref", "EV8"
|
||||
), GeoUtils.WORLD_LAT_LON_BOUNDS, 25, LineString.class);
|
||||
|
||||
TestUtils.assertTileDuplicates(mbtiles, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Ładowanie…
Reference in New Issue