2021-12-23 10:42:24 +00:00
|
|
|
package com.onthegomap.planetiler.collection;
|
2021-04-10 09:25:42 +00:00
|
|
|
|
2021-12-23 10:42:24 +00:00
|
|
|
import static com.onthegomap.planetiler.util.MemoryEstimator.estimateSize;
|
2021-09-10 00:46:20 +00:00
|
|
|
|
2021-05-04 11:07:16 +00:00
|
|
|
import com.carrotsearch.hppc.LongArrayList;
|
|
|
|
import com.carrotsearch.hppc.LongIntHashMap;
|
2022-03-01 13:43:19 +00:00
|
|
|
import com.onthegomap.planetiler.stats.Timer;
|
2022-03-23 00:34:54 +00:00
|
|
|
import com.onthegomap.planetiler.util.DiskBacked;
|
2021-12-23 10:42:24 +00:00
|
|
|
import com.onthegomap.planetiler.util.MemoryEstimator;
|
2022-03-23 00:34:54 +00:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.UncheckedIOException;
|
|
|
|
import java.nio.file.Path;
|
2021-05-04 11:07:16 +00:00
|
|
|
import java.util.Arrays;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
2021-09-10 00:46:20 +00:00
|
|
|
/**
|
|
|
|
* An in-memory map that stores a multiple {@code long} values for each {@code long} key.
|
2022-03-23 00:34:54 +00:00
|
|
|
* <p>
|
|
|
|
* Implementations extend {@link Replaceable} if they support replacing the previous set of values for a key and/or
|
|
|
|
* {@link Appendable} if they support adding new values for a key.
|
2021-09-10 00:46:20 +00:00
|
|
|
*/
|
2022-03-23 00:34:54 +00:00
|
|
|
public interface LongLongMultimap extends MemoryEstimator.HasEstimate, DiskBacked, AutoCloseable {
|
|
|
|
|
2022-03-27 09:49:58 +00:00
|
|
|
/** Returns a {@link Noop} implementation that does nothing on put and throws an exception if you try to get. */
|
2022-03-23 00:34:54 +00:00
|
|
|
static Noop noop() {
|
|
|
|
return new Noop();
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Returns a new multimap where each write sets the list of values for a key, and that order is preserved on read. */
|
|
|
|
static Replaceable newReplaceableMultimap(Storage storage, Storage.Params params) {
|
|
|
|
return new DenseOrderedMultimap(storage, params);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Returns a new replaceable multimap held in-memory. */
|
|
|
|
static Replaceable newInMemoryReplaceableMultimap() {
|
|
|
|
return newReplaceableMultimap(Storage.RAM, null);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Returns a new multimap where each write adds a value for the given key. */
|
|
|
|
static Appendable newAppendableMultimap() {
|
|
|
|
return new SparseUnorderedBinarySearchMultimap();
|
|
|
|
}
|
2021-04-10 09:25:42 +00:00
|
|
|
|
2021-09-10 00:46:20 +00:00
|
|
|
/**
|
2022-03-23 00:34:54 +00:00
|
|
|
* Returns a new longlong multimap from config strings.
|
|
|
|
*
|
|
|
|
* @param storage name of the {@link Storage} implementation to use
|
|
|
|
* @param path where to store data (if mmap)
|
|
|
|
* @param madvise whether to use linux madvise random to improve read performance
|
|
|
|
* @return A longlong map instance
|
|
|
|
* @throws IllegalArgumentException if {@code name} or {@code storage} is not valid
|
2021-09-10 00:46:20 +00:00
|
|
|
*/
|
2022-03-23 00:34:54 +00:00
|
|
|
static Replaceable newReplaceableMultimap(String storage, Path path, boolean madvise) {
|
|
|
|
return newReplaceableMultimap(Storage.from(storage), new Storage.Params(path, madvise));
|
|
|
|
}
|
2021-04-12 10:05:32 +00:00
|
|
|
|
2021-09-10 00:46:20 +00:00
|
|
|
/**
|
|
|
|
* Returns the values for a key. Safe to be called by multiple threads after all values have been written. After the
|
|
|
|
* first read, all writes will fail.
|
|
|
|
*/
|
2021-05-04 11:07:16 +00:00
|
|
|
LongArrayList get(long key);
|
|
|
|
|
2022-03-23 00:34:54 +00:00
|
|
|
@Override
|
|
|
|
void close();
|
|
|
|
|
|
|
|
@Override
|
|
|
|
default long diskUsageBytes() {
|
|
|
|
return 0L;
|
2021-05-04 11:07:16 +00:00
|
|
|
}
|
|
|
|
|
2022-03-23 00:34:54 +00:00
|
|
|
/**
|
|
|
|
* A map from long to list of longs where you can use {@link #replaceValues(long, LongArrayList)} to set replace the
|
|
|
|
* previous list of values with a new one.
|
|
|
|
*/
|
|
|
|
interface Replaceable extends LongLongMultimap {
|
|
|
|
|
|
|
|
/** Replaces the previous list of values for {@code key} with {@code values}. */
|
|
|
|
void replaceValues(long key, LongArrayList values);
|
2021-05-04 11:07:16 +00:00
|
|
|
}
|
|
|
|
|
2022-03-23 00:34:54 +00:00
|
|
|
/**
|
|
|
|
* A map from long to list of longs where you can use {@link #put(long, long)} or {@link #putAll(long, LongArrayList)}
|
|
|
|
* to append values for a key.
|
|
|
|
*/
|
|
|
|
interface Appendable extends LongLongMultimap {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Writes the value for a key. Not thread safe!
|
|
|
|
*/
|
|
|
|
void put(long key, long value);
|
|
|
|
|
|
|
|
default void putAll(long key, LongArrayList vals) {
|
|
|
|
for (int i = 0; i < vals.size(); i++) {
|
|
|
|
put(key, vals.get(i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Dummy implementation of a map that throws an exception from {@link #get(long)}. */
|
|
|
|
class Noop implements Replaceable, Appendable {
|
|
|
|
|
|
|
|
@Override
|
2022-04-23 09:58:49 +00:00
|
|
|
public void put(long key, long value) {
|
|
|
|
// do nothing on update
|
|
|
|
}
|
2022-03-23 00:34:54 +00:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public LongArrayList get(long key) {
|
|
|
|
throw new UnsupportedOperationException("get(key) not implemented");
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public long estimateMemoryUsageBytes() {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-04-23 09:58:49 +00:00
|
|
|
public void close() {
|
|
|
|
// nothing to close
|
|
|
|
}
|
2022-03-23 00:34:54 +00:00
|
|
|
|
|
|
|
@Override
|
2022-04-23 09:58:49 +00:00
|
|
|
public void replaceValues(long key, LongArrayList values) {
|
|
|
|
// do nothing on update
|
|
|
|
}
|
2021-05-04 11:07:16 +00:00
|
|
|
}
|
|
|
|
|
2021-09-10 00:46:20 +00:00
|
|
|
/**
|
|
|
|
* A map from {@code long} to {@code long} stored as a list of keys and values that uses binary search to find the
|
|
|
|
* values for a key. Inserts do not need to be ordered, the first read will sort the array.
|
|
|
|
*/
|
2022-03-23 00:34:54 +00:00
|
|
|
class SparseUnorderedBinarySearchMultimap implements Appendable {
|
2021-05-04 11:07:16 +00:00
|
|
|
|
|
|
|
private static final Logger LOGGER = LoggerFactory.getLogger(SparseUnorderedBinarySearchMultimap.class);
|
|
|
|
|
|
|
|
private static final LongArrayList EMPTY_LIST = new LongArrayList();
|
|
|
|
private final LongArrayList keys = new LongArrayList();
|
|
|
|
private final LongArrayList values = new LongArrayList();
|
|
|
|
private volatile boolean prepared = false;
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void put(long key, long val) {
|
|
|
|
if (val <= 0) {
|
|
|
|
throw new IllegalArgumentException("Invalid value: " + val + " must be >0");
|
|
|
|
}
|
|
|
|
if (prepared) {
|
|
|
|
throw new IllegalArgumentException("Cannot insert after preparing");
|
|
|
|
}
|
|
|
|
keys.add(key);
|
|
|
|
values.add(val);
|
|
|
|
}
|
|
|
|
|
|
|
|
private void prepare() {
|
|
|
|
if (!prepared) {
|
|
|
|
synchronized (this) {
|
|
|
|
if (!prepared) {
|
|
|
|
doPrepare();
|
|
|
|
prepared = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-10 00:46:20 +00:00
|
|
|
/** Sort the keys and values arrays by key */
|
2021-05-04 11:07:16 +00:00
|
|
|
private void doPrepare() {
|
2022-03-01 13:43:19 +00:00
|
|
|
Timer timer = Timer.start();
|
2021-05-04 11:07:16 +00:00
|
|
|
|
2021-08-10 10:55:30 +00:00
|
|
|
LOGGER.debug("Sorting long long multimap...");
|
2021-05-04 11:07:16 +00:00
|
|
|
long[] sortedKeys = keys.toArray();
|
2021-08-10 10:55:30 +00:00
|
|
|
|
|
|
|
// this happens in a worker thread, but it's OK to use parallel sort because
|
|
|
|
// all other threads will block while we prepare the multimap.
|
2021-05-04 11:07:16 +00:00
|
|
|
Arrays.parallelSort(sortedKeys);
|
|
|
|
|
2021-09-10 00:46:20 +00:00
|
|
|
// after sorting keys, sort values by iterating through each unordered key/value pair and
|
|
|
|
// using binary search to find where to insert the result in sorted values.
|
2021-05-04 11:07:16 +00:00
|
|
|
long[] sortedValues = new long[sortedKeys.length];
|
|
|
|
int from = 0;
|
|
|
|
while (from < keys.size()) {
|
|
|
|
long key = keys.get(from);
|
|
|
|
int to = Arrays.binarySearch(sortedKeys, key);
|
|
|
|
if (to < 0) {
|
|
|
|
throw new IllegalStateException("Key not found: " + key);
|
|
|
|
}
|
|
|
|
// skip back to the first entry for this key
|
|
|
|
while (to >= 0 && sortedKeys[to] == key) {
|
|
|
|
to--;
|
|
|
|
}
|
|
|
|
// skip ahead past values we've already added for this key
|
|
|
|
do {
|
|
|
|
to++;
|
|
|
|
} while (sortedValues[to] != 0);
|
|
|
|
while (from < keys.size() && keys.get(from) == key) {
|
|
|
|
sortedValues[to++] = values.get(from++);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
keys.buffer = sortedKeys;
|
|
|
|
values.buffer = sortedValues;
|
2022-04-23 09:58:49 +00:00
|
|
|
LOGGER.debug("Sorted long long multimap {}", timer.stop());
|
2021-05-04 11:07:16 +00:00
|
|
|
}
|
2021-04-12 10:05:32 +00:00
|
|
|
|
|
|
|
@Override
|
2021-05-04 11:07:16 +00:00
|
|
|
public LongArrayList get(long key) {
|
|
|
|
prepare();
|
|
|
|
if (keys.isEmpty()) {
|
|
|
|
return EMPTY_LIST;
|
|
|
|
}
|
|
|
|
int size = keys.size();
|
|
|
|
int index = Arrays.binarySearch(keys.buffer, 0, size, key);
|
2021-09-10 00:46:20 +00:00
|
|
|
LongArrayList result = new LongArrayList();
|
2021-05-04 11:07:16 +00:00
|
|
|
if (index >= 0) {
|
2021-09-10 00:46:20 +00:00
|
|
|
// binary search might drop us in the middle of repeated values, so look forwards...
|
2021-05-04 11:07:16 +00:00
|
|
|
for (int i = index; i < size && keys.get(i) == key; i++) {
|
|
|
|
result.add(values.get(i));
|
|
|
|
}
|
2021-09-10 00:46:20 +00:00
|
|
|
// ... and backwards to get all the matches
|
2021-05-04 11:07:16 +00:00
|
|
|
for (int i = index - 1; i >= 0 && keys.get(i) == key; i--) {
|
|
|
|
result.add(values.get(i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
2021-04-12 10:05:32 +00:00
|
|
|
|
2021-05-04 11:07:16 +00:00
|
|
|
@Override
|
2021-05-04 12:02:22 +00:00
|
|
|
public long estimateMemoryUsageBytes() {
|
2021-09-10 00:46:20 +00:00
|
|
|
return estimateSize(keys) + estimateSize(values);
|
2021-04-12 10:05:32 +00:00
|
|
|
}
|
2022-03-23 00:34:54 +00:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public void close() {
|
|
|
|
keys.release();
|
|
|
|
values.release();
|
|
|
|
}
|
2021-04-12 10:05:32 +00:00
|
|
|
}
|
|
|
|
|
2021-09-10 00:46:20 +00:00
|
|
|
/**
|
|
|
|
* A map from {@code long} to {@code long} where each putAll replaces previous values and results are returned in the
|
|
|
|
* same order they were inserted.
|
|
|
|
*/
|
2022-03-23 00:34:54 +00:00
|
|
|
class DenseOrderedMultimap implements Replaceable {
|
2021-05-04 11:07:16 +00:00
|
|
|
|
|
|
|
private static final LongArrayList EMPTY_LIST = new LongArrayList();
|
2022-03-01 13:43:19 +00:00
|
|
|
private final LongIntHashMap keyToValuesIndex = Hppc.newLongIntHashMap();
|
2021-09-10 00:46:20 +00:00
|
|
|
// each block starts with a "length" header then contains that number of entries
|
2022-03-23 00:34:54 +00:00
|
|
|
private final AppendStore.Longs values;
|
2021-04-12 10:05:32 +00:00
|
|
|
|
2022-03-23 00:34:54 +00:00
|
|
|
public DenseOrderedMultimap(Storage storage, Storage.Params params) {
|
|
|
|
values = switch (storage) {
|
|
|
|
case MMAP -> new AppendStoreMmap.Longs(params);
|
|
|
|
case RAM -> new AppendStoreRam.Longs(false);
|
|
|
|
case DIRECT -> new AppendStoreRam.Longs(true);
|
|
|
|
};
|
2021-05-04 11:07:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-03-23 00:34:54 +00:00
|
|
|
public void replaceValues(long key, LongArrayList values) {
|
|
|
|
if (values.isEmpty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
keyToValuesIndex.put(key, (int) this.values.size());
|
|
|
|
this.values.appendLong(values.size());
|
|
|
|
for (int i = 0; i < values.size(); i++) {
|
|
|
|
this.values.appendLong(values.get(i));
|
|
|
|
}
|
2021-05-04 11:07:16 +00:00
|
|
|
}
|
2021-04-12 10:05:32 +00:00
|
|
|
|
2021-05-04 11:07:16 +00:00
|
|
|
@Override
|
|
|
|
public LongArrayList get(long key) {
|
2021-09-10 00:46:20 +00:00
|
|
|
int index = keyToValuesIndex.getOrDefault(key, -1);
|
2021-05-04 11:07:16 +00:00
|
|
|
if (index >= 0) {
|
|
|
|
LongArrayList result = new LongArrayList();
|
2022-03-23 00:34:54 +00:00
|
|
|
int num = (int) values.getLong(index);
|
|
|
|
for (int i = 0; i < num; i++) {
|
2022-04-23 09:58:49 +00:00
|
|
|
result.add(values.getLong(i + index + 1L));
|
2022-03-23 00:34:54 +00:00
|
|
|
}
|
2021-05-04 11:07:16 +00:00
|
|
|
return result;
|
|
|
|
} else {
|
|
|
|
return EMPTY_LIST;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-05-04 12:02:22 +00:00
|
|
|
public long estimateMemoryUsageBytes() {
|
2021-09-10 00:46:20 +00:00
|
|
|
return estimateSize(keyToValuesIndex) + estimateSize(values);
|
2021-04-12 10:05:32 +00:00
|
|
|
}
|
2022-03-23 00:34:54 +00:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public long diskUsageBytes() {
|
|
|
|
return values.diskUsageBytes();
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void close() {
|
|
|
|
keyToValuesIndex.release();
|
|
|
|
try {
|
|
|
|
values.close();
|
|
|
|
} catch (IOException e) {
|
|
|
|
throw new UncheckedIOException(e);
|
|
|
|
}
|
|
|
|
}
|
2021-04-12 10:05:32 +00:00
|
|
|
}
|
2021-04-10 09:25:42 +00:00
|
|
|
}
|