fixes from running

pull/1/head
Mike Barry 2021-07-20 08:55:48 -04:00
rodzic 5e0d4b20ba
commit 36e2aaa22a
9 zmienionych plików z 256 dodań i 14 usunięć

Wyświetl plik

@ -1,17 +1,29 @@
package com.onthegomap.flatmap.collections;
import com.carrotsearch.hppc.LongArrayList;
import com.graphhopper.coll.GHLongLongHashMap;
import com.onthegomap.flatmap.FileUtils;
import com.onthegomap.flatmap.Format;
import java.io.BufferedOutputStream;
import java.io.Closeable;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.LongSupplier;
import org.mapdb.Serializer;
import org.mapdb.SortedTableMap;
import org.mapdb.volume.ByteArrayVol;
import org.mapdb.volume.MappedFileVol;
import org.mapdb.volume.Volume;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public interface LongLongMap extends Closeable {
@ -55,6 +67,18 @@ public interface LongLongMap extends Closeable {
return new MapdbSortedTable(volume, () -> 0);
}
static LongLongMap newFileBackedSparseArray(Path path) {
return new SparseArray(path);
}
static LongLongMap newFileBackedSparseArray(Path path, int segmentSize, int gapLimit) {
return new SparseArray(path, segmentSize, gapLimit);
}
static LongLongMap newArrayBacked() {
return new Array();
}
static LongLongMap newInMemoryHashMap() {
return new HppcMap();
}
@ -129,4 +153,175 @@ public interface LongLongMap extends Closeable {
}
}
}
class Array implements LongLongMap {
int used = 0;
private static final long MAX_MEM_USAGE = 100_000_000_000L; // 100GB
private static final long SEGMENT_SIZE = 1_000_000; // 1MB
private static final long SEGMENT_MAX_ENTRIES = SEGMENT_SIZE / 8 + 1;
private static final long MAX_SEGMENTS = MAX_MEM_USAGE / SEGMENT_SIZE;
private long[][] longs = new long[(int) MAX_SEGMENTS][];
@Override
public void put(long key, long value) {
int segment = (int) (key / SEGMENT_MAX_ENTRIES);
long[] seg = longs[segment];
if (seg == null) {
seg = longs[segment] = new long[(int) SEGMENT_MAX_ENTRIES];
Arrays.fill(seg, MISSING_VALUE);
used++;
}
seg[(int) (key % SEGMENT_MAX_ENTRIES)] = value;
}
@Override
public long get(long key) {
long[] segment = longs[(int) (key / SEGMENT_MAX_ENTRIES)];
return segment == null ? MISSING_VALUE : segment[(int) (key % SEGMENT_MAX_ENTRIES)];
}
@Override
public long fileSize() {
return 24L + 8L * longs.length + ((long) used) * (24L + 8L * SEGMENT_MAX_ENTRIES);
}
@Override
public void close() throws IOException {
longs = null;
}
}
class SparseArray implements LongLongMap {
private static final Logger LOGGER = LoggerFactory.getLogger(SparseArray.class);
private static final int DEFAULT_GAP_LIMIT = 100;
private static final int DEFAULT_SEGMENT_SIZE_BYTES = 1 << 30; // 1MB
private final long gapLimit;
private final long segmentSize;
private final Path path;
private final DataOutputStream outputStream;
private long lastKey;
private long outIdx = 0;
private FileChannel channel = null;
private final LongArrayList keys = new LongArrayList();
private final LongArrayList values = new LongArrayList();
private volatile List<MappedByteBuffer> segments;
SparseArray(Path path) {
this(path, DEFAULT_SEGMENT_SIZE_BYTES, DEFAULT_GAP_LIMIT);
}
public SparseArray(Path path, int segmentSize, int gapLimit) {
this.path = path;
this.segmentSize = segmentSize / 8;
this.gapLimit = gapLimit;
lastKey = -2 * this.gapLimit;
try {
this.outputStream = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(path), 50_000));
} catch (IOException e) {
throw new IllegalStateException("Could not create compact array output stream", e);
}
}
@Override
public void put(long key, long value) {
assert key > lastKey;
long gap = key - lastKey;
lastKey = key;
try {
if (gap > gapLimit) {
keys.add(key);
values.add(outIdx);
} else {
for (long i = 1; i < gap; i++) {
appendValue(MISSING_VALUE);
}
}
appendValue(value);
} catch (IOException e) {
throw new IllegalStateException("Could not put value", e);
}
}
private void appendValue(long value) throws IOException {
outIdx++;
outputStream.writeLong(value);
}
@Override
public long get(long key) {
if (segments == null) {
synchronized (this) {
if (segments == null) {
build();
}
}
}
if (key > lastKey) {
return MISSING_VALUE;
}
int idx = binarySearch(key);
long fileIdx;
if (idx == -1) {
return MISSING_VALUE;
}
if (idx >= 0) {
fileIdx = values.get(idx);
} else {
int beforeIdx = -idx - 2;
long beforeKey = keys.get(beforeIdx);
fileIdx = values.get(beforeIdx) + (key - beforeKey);
if (beforeIdx < values.size() - 1 ? fileIdx >= values.get(beforeIdx + 1) : fileIdx >= outIdx) {
return MISSING_VALUE;
}
}
return getValue(fileIdx);
}
private void build() {
try {
outputStream.close();
channel = FileChannel.open(path, StandardOpenOption.READ);
var segmentCount = (int) (outIdx / segmentSize + 1);
List<MappedByteBuffer> result = new ArrayList<>(segmentCount);
LOGGER.info("LongLongMap.SparseArray gaps=" + Format.formatInteger(keys.size()) +
" segments=" + Format.formatInteger(segmentCount));
for (long offset = 0; offset < outIdx; offset += segmentSize) {
result
.add(
channel
.map(FileChannel.MapMode.READ_ONLY, offset << 3,
Math.min(segmentSize, outIdx - offset) << 3));
}
segments = result;
} catch (IOException e) {
throw new IllegalStateException("Could not create segments", e);
}
}
private long getValue(long fileIdx) {
int segNum = (int) (fileIdx / segmentSize);
int segOffset = (int) (fileIdx % segmentSize);
return segments.get(segNum).getLong(segOffset << 3);
}
private int binarySearch(long key) {
return Arrays.binarySearch(keys.buffer, 0, keys.elementsCount, key);
}
@Override
public long fileSize() {
return FileUtils.size(path);
}
@Override
public void close() throws IOException {
outputStream.close();
channel.close();
}
}
}

Wyświetl plik

@ -1,6 +1,5 @@
package com.onthegomap.flatmap.monitoring;
import com.onthegomap.flatmap.Format;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
@ -14,9 +13,8 @@ public class Timers {
public void printSummary() {
LOGGER.info("-".repeat(50));
int pad = 1 + timers.keySet().stream().mapToInt(String::length).max().orElse("# features".length());
for (var entry : timers.entrySet()) {
LOGGER.info(Format.padLeft(entry.getKey(), pad) + ": " + entry.getValue());
LOGGER.info("\t" + entry.getKey() + "\t" + entry.getValue().elapsed());
}
}

Wyświetl plik

@ -83,9 +83,9 @@ public class MbtilesWriter {
var topology = Topology.start("mbtiles", stats)
.fromGenerator("reader", writer::readFeatures, 1)
.addBuffer("reader_queue", 50_000, 1_000)
.addBuffer("reader_queue", 10_000, 500)
.addWorker("encoder", config.threads(), writer::tileEncoder)
.addBuffer("writer_queue", 50_000, 1_000)
.addBuffer("writer_queue", 10_000, 500)
.sinkTo("writer", 1, writer::tileWriter);
var loggers = new ProgressLoggers("mbtiles")

Wyświetl plik

@ -22,16 +22,39 @@ public abstract class LongLongMapTest {
map.put(1, 1);
assertEquals(Long.MIN_VALUE, map.get(0));
assertEquals(1, map.get(1));
assertEquals(Long.MIN_VALUE, map.get(2));
}
@Test
public void insertWithGaps() {
map.put(1, 2);
map.put(50, 3);
map.put(500, 4);
map.put(505, 5);
assertEquals(Long.MIN_VALUE, map.get(0));
assertEquals(2, map.get(1));
assertEquals(Long.MIN_VALUE, map.get(2));
assertEquals(Long.MIN_VALUE, map.get(49));
assertEquals(3, map.get(50));
assertEquals(Long.MIN_VALUE, map.get(51));
assertEquals(Long.MIN_VALUE, map.get(300));
assertEquals(Long.MIN_VALUE, map.get(499));
assertEquals(4, map.get(500));
assertEquals(Long.MIN_VALUE, map.get(501));
assertEquals(5, map.get(505));
assertEquals(Long.MIN_VALUE, map.get(506));
assertEquals(Long.MIN_VALUE, map.get(1_000));
}
@Test
public void insertMultiLookup() {
map.put(1, 3);
map.put(2, 4);
map.put(Long.MAX_VALUE, Long.MAX_VALUE);
map.put(1_000_000_000, Long.MAX_VALUE);
assertEquals(Long.MIN_VALUE, map.get(0));
assertEquals(Long.MIN_VALUE, map.get(3));
assertArrayEquals(new long[]{3, 4, Long.MAX_VALUE, Long.MIN_VALUE},
map.multiGet(new long[]{1, 2, Long.MAX_VALUE, 3}));
map.multiGet(new long[]{1, 2, 1_000_000_000, 3}));
}
@Test
@ -64,4 +87,20 @@ public abstract class LongLongMapTest {
this.map = LongLongMap.newInMemorySortedTable();
}
}
public static class SparseArrayTest extends LongLongMapTest {
@BeforeEach
public void setup(@TempDir Path dir) {
this.map = LongLongMap.newFileBackedSparseArray(dir.resolve("test-sparse-array"), 80, 100);
}
}
public static class ArrayTest extends LongLongMapTest {
@BeforeEach
public void setup() {
this.map = LongLongMap.newArrayBacked();
}
}
}

Wyświetl plik

@ -31,6 +31,11 @@
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifestEntries>
<Multi-Release>true</Multi-Release>
</manifestEntries>
</archive>
</configuration>
<executions>

Wyświetl plik

@ -51,6 +51,11 @@
<version>3.3.0</version>
<configuration>
<archive>
<manifestEntries>
<Multi-Release>true</Multi-Release>
</manifestEntries>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>

Wyświetl plik

@ -4,4 +4,4 @@ set -o errexit
set -o pipefail
set -o nounset
mvn -DskipTests=true --projects openmaptiles -am clean package
mvn -DskipTests=true --projects openmaptiles -am package

Wyświetl plik

@ -9,6 +9,6 @@ cd "$(git rev-parse --show-cdup)"
mkdir -p data/sources
cd data/sources
wget -nc https://naciscdn.org/naturalearth/packages/natural_earth_vector.sqlite.zip
wget -nc https://osmdata.openstreetmap.de/download/water-polygons-split-3857.zip
wget -nc https://github.com/lukasmartinelli/osm-lakelines/releases/download/v0.9/lake_centerline.shp.zip
wget --progress=bar:force -nc https://naciscdn.org/naturalearth/packages/natural_earth_vector.sqlite.zip
wget --progress=bar:force -nc https://osmdata.openstreetmap.de/download/water-polygons-split-3857.zip
wget --progress=bar:force -nc https://github.com/lukasmartinelli/osm-lakelines/releases/download/v0.9/lake_centerline.shp.zip

6
scripts/ubuntu_vm_setup.sh 100644 → 100755
Wyświetl plik

@ -12,13 +12,13 @@ fi
"$(dirname "$0")"/build.sh
scp openmaptiles/target/flatmap-openmaptiles-0.1-SNAPSHOT-jar-with-dependencies.jar "${1}":flatmap.jar
rsync -avzP openmaptiles/target/flatmap-openmaptiles-0.1-SNAPSHOT-jar-with-dependencies.jar "${1}":flatmap.jar
scp scripts/download-other-sources.sh "${1}":download-other-sources.sh
scp scripts/download-osm.sh "${1}":download-osm.sh
ssh "${1}" "
ssh "${1}" "bash -s" <<EOF
wget -qO - https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public | sudo apt-key add - && \
add-apt-repository --yes https://adoptopenjdk.jfrog.io/adoptopenjdk/deb/ && \
apt-get update -y && \
apt-get install adoptopenjdk-16-hotspot-jre -y && \
./download-other-sources.sh
"
EOF