planetiler/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/FeatureSort.java

147 wiersze
4.5 KiB
Java

package com.onthegomap.planetiler.collection;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.util.CloseableConsumer;
import com.onthegomap.planetiler.util.DiskBacked;
import com.onthegomap.planetiler.util.MemoryEstimator;
import com.onthegomap.planetiler.worker.WeightedHandoffQueue;
import com.onthegomap.planetiler.worker.Worker;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.stream.IntStream;
import javax.annotation.concurrent.NotThreadSafe;
/**
* A utility that accepts {@link SortableFeature} instances in any order and lets you iterate through them ordered by
* {@link SortableFeature#key()}.
* <p>
* Only supports single-threaded writes and reads.
*/
@NotThreadSafe
interface FeatureSort extends Iterable<SortableFeature>, DiskBacked, MemoryEstimator.HasEstimate {
/*
* Idea graveyard (all too slow):
* - sqlite (close runner-up to external merge sort - only 50% slower)
* - mapdb b-tree
* - rockdb
* - berkeley db
*/
/** Returns a feature sorter that sorts all features in memory. Suitable for toy examples (unit tests). */
static FeatureSort newInMemory() {
List<SortableFeature> list = Collections.synchronizedList(new ArrayList<>());
return new FeatureSort() {
@Override
public void sort() {
list.sort(Comparator.naturalOrder());
}
@Override
public long numFeaturesWritten() {
return list.size();
}
@Override
public CloseableConsumer<SortableFeature> writerForThread() {
return list::add;
}
@Override
public long estimateMemoryUsageBytes() {
return 0;
}
@Override
public long diskUsageBytes() {
return 0;
}
@Override
public Iterator<SortableFeature> iterator() {
return list.iterator();
}
@Override
public Iterator<SortableFeature> iterator(int shard, int shards) {
if (shard < 0 || shard >= shards) {
throw new IllegalArgumentException("Bad shard params: shard=%d shards=%d".formatted(shard, shards));
}
return IntStream.range(0, list.size())
.filter(d -> d % shards == shard)
.mapToObj(list::get)
.iterator();
}
@Override
public int chunksToRead() {
return list.size();
}
};
}
void sort();
long numFeaturesWritten();
/** Returns all elements in a list. WARNING: this will materialize all elements in-memory. */
default List<SortableFeature> toList() {
List<SortableFeature> list = new ArrayList<>();
for (SortableFeature entry : this) {
list.add(entry);
}
return list;
}
/**
* Returns a new writer that can be used to write features from a single thread independent of writers used from other
* threads.
*/
CloseableConsumer<SortableFeature> writerForThread();
@Override
default Iterator<SortableFeature> iterator() {
return iterator(0, 1);
}
/**
* Returns an iterator over a subset of the features.
*
* @param shard The index of this iterator
* @param shards The total number of iterators that will be used
* @return An iterator over a subset of features that when combined with all other iterators will iterate over the
* full set.
*/
Iterator<SortableFeature> iterator(int shard, int shards);
/**
* Reads temp features using {@code threads} parallel threads and merges into a sorted list.
*
* @param stats Stat tracker
* @param threads The number of parallel read threads to spawn
* @return a {@link ParallelIterator} with a handle to the new read threads that were spawned, and in {@link Iterable}
* that can be used to iterate over the results.
*/
default ParallelIterator parallelIterator(Stats stats, int threads) {
List<WeightedHandoffQueue<SortableFeature>> queues = IntStream.range(0, threads)
.mapToObj(i -> new WeightedHandoffQueue<SortableFeature>(500, 10_000))
.toList();
Worker reader = new Worker("read", stats, threads, shard -> {
try (var next = queues.get(shard)) {
Iterator<SortableFeature> entries = iterator(shard, threads);
while (entries.hasNext()) {
next.accept(entries.next(), 1);
}
}
});
return new ParallelIterator(reader, LongMerger.mergeSuppliers(queues, SortableFeature.COMPARE_BYTES));
}
int chunksToRead();
record ParallelIterator(Worker reader, @Override Iterator<SortableFeature> iterator)
implements Iterable<SortableFeature> {}
}