diff --git a/src/main/java/com/onthegomap/flatmap/collections/LongLongMultimap.java b/src/main/java/com/onthegomap/flatmap/collections/LongLongMultimap.java index a3885614..a7fce845 100644 --- a/src/main/java/com/onthegomap/flatmap/collections/LongLongMultimap.java +++ b/src/main/java/com/onthegomap/flatmap/collections/LongLongMultimap.java @@ -1,22 +1,171 @@ package com.onthegomap.flatmap.collections; +import com.carrotsearch.hppc.LongArrayList; +import com.carrotsearch.hppc.LongIntHashMap; +import com.graphhopper.util.StopWatch; +import java.util.Arrays; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public interface LongLongMultimap { void put(long key, long value); - class FewUnorderedBinarySearchMultimap implements LongLongMultimap { + LongArrayList get(long key); - @Override - public void put(long key, long value) { + long estimatedMemoryUsageBytes(); + default void putAll(long key, LongArrayList vals) { + for (int i = 0; i < vals.size(); i++) { + put(key, vals.get(i)); } } - class ManyOrderedBinarySearchMultimap implements LongLongMultimap { + static LongLongMultimap newDensedOrderedMultimap() { + return new DenseOrderedHppcMultimap(); + } + + static LongLongMultimap newSparseUnorderedMultimap() { + return new SparseUnorderedBinarySearchMultimap(); + } + + class SparseUnorderedBinarySearchMultimap implements LongLongMultimap { + + private static final Logger LOGGER = LoggerFactory.getLogger(SparseUnorderedBinarySearchMultimap.class); + + private static final LongArrayList EMPTY_LIST = new LongArrayList(); + private final LongArrayList keys = new LongArrayList(); + private final LongArrayList values = new LongArrayList(); + private volatile boolean prepared = false; + private static final ThreadLocal resultHolder = ThreadLocal.withInitial(LongArrayList::new); + + protected LongArrayList getResultHolder() { + LongArrayList res = resultHolder.get(); + res.elementsCount = 0; + return res; + } @Override - public void put(long key, long value) { + public void put(long key, long val) { + if (val <= 0) { + throw new IllegalArgumentException("Invalid value: " + val + " must be >0"); + } + if (prepared) { + throw new IllegalArgumentException("Cannot insert after preparing"); + } + keys.add(key); + values.add(val); + } + private void prepare() { + if (!prepared) { + synchronized (this) { + if (!prepared) { + doPrepare(); + prepared = true; + } + } + } + } + + private void doPrepare() { + StopWatch watch = new StopWatch().start(); + + LOGGER.info("Sorting long long multimap keys..."); + long[] sortedKeys = keys.toArray(); + Arrays.parallelSort(sortedKeys); + + LOGGER.info("Sorted keys, now values..."); + long[] sortedValues = new long[sortedKeys.length]; + int from = 0; + while (from < keys.size()) { + long key = keys.get(from); + int to = Arrays.binarySearch(sortedKeys, key); + if (to < 0) { + throw new IllegalStateException("Key not found: " + key); + } + // skip back to the first entry for this key + while (to >= 0 && sortedKeys[to] == key) { + to--; + } + // skip ahead past values we've already added for this key + do { + to++; + } while (sortedValues[to] != 0); + while (from < keys.size() && keys.get(from) == key) { + sortedValues[to++] = values.get(from++); + } + } + keys.buffer = sortedKeys; + values.buffer = sortedValues; + LOGGER.info("Sorted long long multimap " + watch.stop()); + } + + @Override + public LongArrayList get(long key) { + prepare(); + if (keys.isEmpty()) { + return EMPTY_LIST; + } + int size = keys.size(); + int index = Arrays.binarySearch(keys.buffer, 0, size, key); + LongArrayList result = getResultHolder(); + if (index >= 0) { + for (int i = index; i < size && keys.get(i) == key; i++) { + result.add(values.get(i)); + } + for (int i = index - 1; i >= 0 && keys.get(i) == key; i--) { + result.add(values.get(i)); + } + } + return result; + } + + @Override + public long estimatedMemoryUsageBytes() { + return 24L + 8L * keys.buffer.length + 24L + 8L * values.buffer.length; + } + } + + class DenseOrderedHppcMultimap implements LongLongMultimap { + + private static final LongArrayList EMPTY_LIST = new LongArrayList(); + private final LongIntHashMap keys = new LongIntHashMap(); + private final LongArrayList values = new LongArrayList(); + + @Override + public void putAll(long key, LongArrayList others) { + if (others.isEmpty()) { + return; + } + keys.put(key, values.size()); + values.add(others.size()); + values.add(others.buffer, 0, others.size()); + } + + @Override + public void put(long key, long val) { + putAll(key, LongArrayList.from(val)); + } + + @Override + public LongArrayList get(long key) { + int index = keys.getOrDefault(key, -1); + if (index >= 0) { + LongArrayList result = new LongArrayList(); + int num = (int) values.get(index); + result.add(values.buffer, index + 1, num); + return result; + } else { + return EMPTY_LIST; + } + } + + @Override + public long estimatedMemoryUsageBytes() { + return 24L + 8L * keys.keys.length + + 24L + 4L * keys.values.length + + 24L + 8L * values.buffer.length; } } } diff --git a/src/main/java/com/onthegomap/flatmap/read/OpenStreetMapReader.java b/src/main/java/com/onthegomap/flatmap/read/OpenStreetMapReader.java index 1c7be447..071d74ba 100644 --- a/src/main/java/com/onthegomap/flatmap/read/OpenStreetMapReader.java +++ b/src/main/java/com/onthegomap/flatmap/read/OpenStreetMapReader.java @@ -47,12 +47,12 @@ public class OpenStreetMapReader implements Closeable { private GHLongObjectHashMap relationInfo = new GHLongObjectHashMap<>(); private final AtomicLong relationInfoSizes = new AtomicLong(0); // ~800mb, ~1.6GB when sorting - private LongLongMultimap wayToRelations = new LongLongMultimap.FewUnorderedBinarySearchMultimap(); + private LongLongMultimap wayToRelations = LongLongMultimap.newSparseUnorderedMultimap(); // for multipolygons need to store way info (20m ways, 800m nodes) to use when processing relations (4.5m) // ~300mb private LongHashSet waysInMultipolygon = new GHLongHashSet(); // ~7GB - private LongLongMultimap multipolygonWayGeometries = new LongLongMultimap.ManyOrderedBinarySearchMultimap(); + private LongLongMultimap multipolygonWayGeometries = LongLongMultimap.newDensedOrderedMultimap(); public OpenStreetMapReader(OsmInputFile osmInputFile, LongLongMap nodeDb, Profile profile, Stats stats) { this.osmInputFile = osmInputFile; diff --git a/src/test/java/com/onthegomap/flatmap/collections/LongLongMultimapTest.java b/src/test/java/com/onthegomap/flatmap/collections/LongLongMultimapTest.java new file mode 100644 index 00000000..837a8a1d --- /dev/null +++ b/src/test/java/com/onthegomap/flatmap/collections/LongLongMultimapTest.java @@ -0,0 +1,164 @@ +package com.onthegomap.flatmap.collections; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.carrotsearch.hppc.LongArrayList; +import java.util.Arrays; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public abstract class LongLongMultimapTest { + + protected LongLongMultimap map; + protected boolean retainInputOrder = false; + + @Test + public void missingValue() { + assertTrue(map.get(0).isEmpty()); + } + + @Test + public void oneValue() { + map.put(1, 1); + assertResultLists(LongArrayList.from(), map.get(0)); + assertResultLists(LongArrayList.from(1), map.get(1)); + assertResultLists(LongArrayList.from(), map.get(2)); + } + + @Test + public void twoConsecutiveValues() { + map.put(1, 1); + map.put(2, 2); + assertResultLists(LongArrayList.from(), map.get(0)); + assertResultLists(LongArrayList.from(1), map.get(1)); + assertResultLists(LongArrayList.from(2), map.get(2)); + assertResultLists(LongArrayList.from(), map.get(3)); + } + + @Test + public void twoNonconsecutiveValues() { + map.put(1, 1); + map.put(3, 3); + assertResultLists(LongArrayList.from(), map.get(0)); + assertResultLists(LongArrayList.from(1), map.get(1)); + assertResultLists(LongArrayList.from(), map.get(2)); + assertResultLists(LongArrayList.from(3), map.get(3)); + assertResultLists(LongArrayList.from(), map.get(4)); + } + + @Test + public void returnToFirstKey() { + if (retainInputOrder) { + return; + } + map.put(3, 31); + map.put(2, 21); + map.put(1, 11); + map.put(1, 12); + map.put(2, 22); + map.put(3, 32); + map.put(3, 33); + map.put(2, 23); + map.put(1, 13); + assertResultLists(LongArrayList.from(11, 12, 13), map.get(1)); + assertResultLists(LongArrayList.from(21, 22, 23), map.get(2)); + assertResultLists(LongArrayList.from(31, 32, 33), map.get(3)); + assertResultLists(LongArrayList.from(), map.get(4)); + } + + @Test + public void manyInsertsOrdered() { + long[] toInsert = new long[10]; + for (int i = 0; i < 100; i++) { + for (int j = 0; j < 10; j++) { + toInsert[j] = i * 10 + j + 1; + } + map.putAll(i, LongArrayList.from(toInsert)); + } + for (int i = 0; i < 100; i++) { + assertResultLists(LongArrayList.from( + i * 10 + 1, + i * 10 + 2, + i * 10 + 3, + i * 10 + 4, + i * 10 + 5, + i * 10 + 6, + i * 10 + 7, + i * 10 + 8, + i * 10 + 9, + i * 10 + 10 + ), map.get(i)); + } + } + + private void assertResultLists(LongArrayList expected, LongArrayList actual) { + if (!retainInputOrder) { + if (!expected.isEmpty()) { + Arrays.sort(expected.buffer, 0, expected.size()); + } + if (!actual.isEmpty()) { + Arrays.sort(actual.buffer, 0, actual.size()); + } + } + assertEquals(expected, actual); + } + + @Test + public void manyInsertsUnordered() { + for (long i = 99; i >= 0; i--) { + map.putAll(i, LongArrayList.from( + i * 10 + 10, + i * 10 + 9, + i * 10 + 8, + i * 10 + 7, + i * 10 + 6, + i * 10 + 5, + i * 10 + 4, + i * 10 + 3, + i * 10 + 2, + i * 10 + 1 + )); + } + for (int i = 0; i < 100; i++) { + assertResultLists(LongArrayList.from( + i * 10 + 10, + i * 10 + 9, + i * 10 + 8, + i * 10 + 7, + i * 10 + 6, + i * 10 + 5, + i * 10 + 4, + i * 10 + 3, + i * 10 + 2, + i * 10 + 1 + ), map.get(i)); + } + } + + @Test + public void multiInsert() { + map.putAll(1, LongArrayList.from(1, 2, 3)); + map.put(0, 3); + assertResultLists(LongArrayList.from(3), map.get(0)); + assertResultLists(LongArrayList.from(1, 2, 3), map.get(1)); + assertResultLists(LongArrayList.from(), map.get(2)); + } + + public static class SparseUnorderedTest extends LongLongMultimapTest { + + @BeforeEach + public void setup() { + this.map = LongLongMultimap.newSparseUnorderedMultimap(); + } + } + + public static class DenseOrderedTest extends LongLongMultimapTest { + + @BeforeEach + public void setup() { + retainInputOrder = true; + this.map = LongLongMultimap.newDensedOrderedMultimap(); + } + } +}