package com.onthegomap.planetiler.worker; import static com.onthegomap.planetiler.worker.Worker.joinFutures; import com.onthegomap.planetiler.collection.IterableOnce; import com.onthegomap.planetiler.stats.ProgressLoggers; import com.onthegomap.planetiler.stats.Stats; import java.time.Duration; import java.util.Collection; import java.util.Iterator; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.function.Consumer; /** * A mini-framework for chaining sequential steps that run in dedicated threads with a queue between each. *

* For example: * *

 * {@code
 * WorkerPipeline.start("name", stats)
 *   .readFrom("reader", List.of(1, 2, 3))
 *   .addBuffer("reader_queue", 10)
 *   .addWorker("process", 2, (i, next) -> next.accept(doExpensiveWork(i))
 *   .addBuffer("writer_queue", 10)
 *   .sinkToConsumer("writer", 1, result -> writeToDisk(result))
 *   .await();
 * }
 * 
*

* NOTE: to do any forking/joining, you must construct and wire-up queues and each sequence of steps manually. * * @param input type of this pipeline */ public record WorkerPipeline ( String name, WorkerPipeline previous, WorkQueue inputQueue, Worker worker, CompletableFuture done ) { /* * Empty/Bufferable/Builder are used to provide a fluent API for building a model of the steps to run (and keep * pointers to workers and queues) and Builder.build converts to the top-level WorkerPipeline that clients * can use to wait on results. */ /** Returns a new pipeline builder where all worker and queue names will start with {@code prefix_}. */ public static Empty start(String prefix, Stats stats) { return new Empty(prefix, stats); } /** * Blocks until all work has been completed by all steps of this pipeline, logging progress at a fixed {@code * logInterval}. * * @throws RuntimeException if interrupted or if any of the threads die with an exception. */ public void awaitAndLog(ProgressLoggers loggers, Duration logInterval) { loggers.awaitAndLog(done, logInterval); loggers.log(); } /** * Blocks until all work has been completed by all steps of this pipeline. * * @throws RuntimeException if interrupted or if any of the threads die with an exception. */ public void await() { try { done.get(); } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e); } } /** * Work that happens in a thread at the start of a pipeline to provide the initial elements to process. * * @param type of items that this step will emit */ @FunctionalInterface public interface SourceStep { /** * Called inside each worker thread to emit elements until there are no more, then return. * * @param next call {@code next.accept} to pass elements to the next step of the pipeline * @throws Exception if an error occurs, will be rethrown by {@link #await()} as a {@link RuntimeException} */ void run(Consumer next) throws Exception; } /** * Work that happens in a thread in the middle of a pipeline that accepts elements from previous steps and emits * elements to the next step. * * @param type of items that this step consumes * @param type of items that this step emits */ @FunctionalInterface public interface WorkerStep { /** * Called inside each worker thread to process elements until there are no more, then return. * * @param prev get elements from the previous step using {@code prev.get}, will return null when there are no more * elements to process * @param next call {@code next.accept} to pass items to the next step of the pipeline * @throws Exception if an error occurs, will be rethrown by {@link #await()} as a {@link RuntimeException} */ void run(IterableOnce prev, Consumer next) throws Exception; } /** * Work that happens in a thread at the end of a pipeline that accepts elements from the previous step. * * @param type of items that this step consumes */ @FunctionalInterface public interface SinkStep { /** * Called inside each worker thread to consume elements until there are no more, then return. * * @param prev get elements from the previous step using {@code prev.get}, will return null when there are no more * elements to process * @throws Exception if an error occurs, will be rethrown by {@link #await()} as a {@link RuntimeException} */ void run(IterableOnce prev) throws Exception; } /** * An intermediate step while building a pipeline that requires the user to add a queue to buffer items before adding * the next step. * * @param type of elements coming out of the previous step that need to be stored in a queue */ public interface Bufferable { /** * Adds a {@link WorkQueue} that groups items into batches before enqueueing them to reduce contention when many * threads are reading or writing to the queue simultaneously. */ Builder addBuffer(String name, int size, int batchSize); /** * Adds a {@link WorkQueue} with batching disabled. */ default Builder addBuffer(String name, int size) { return addBuffer(name, size, 1); } } /** Builder for a new topology that does not yet have any steps. */ public record Empty(String prefix, Stats stats) { /** * Adds an initial step that runs {@code producer} in {@code threads} worker threads to produce items for this * queue. */ public Bufferable fromGenerator(String name, SourceStep producer, int threads) { return (queueName, size, batchSize) -> { var nextQueue = new WorkQueue(prefix + "_" + queueName, size, batchSize, stats); Worker worker = new Worker(prefix + "_" + name, stats, threads, () -> producer.run(nextQueue.threadLocalWriter())); return new Builder<>(prefix, name, nextQueue, worker, stats); }; } /** * Adds an initial step that runs {@code producer} in 1 worker thread to produce items for this queue. */ public Bufferable fromGenerator(String name, SourceStep producer) { return fromGenerator(name, producer, 1); } /** * Adds an initial step that reads all items from {@code iterable} in a single worker thread to produce items for * this queue. */ public Bufferable readFrom(String name, Iterable iterable) { Iterator iter = iterable.iterator(); return fromGenerator(name, next -> { while (iter.hasNext()) { next.accept(iter.next()); } }, 1); } /** * Populates an initial queue with {@code items} for subsequent steps to process but does not kick off a worker * thread. */ public Builder readFromTiny(String name, Collection items) { WorkQueue queue = new WorkQueue<>(prefix + "_" + name, items.size(), 1, stats); Consumer writer = queue.threadLocalWriter(); for (T item : items) { writer.accept(item); } queue.close(); return readFromQueue(queue); } /** * Starts the pipeline from a queue that is populated externally, and does not kick off any threads. */ public Builder readFromQueue(WorkQueue input) { return new Builder<>(prefix, input, stats); } } /** * A step while building a pipeline that needs a subsequent step to process elements. * * @param type of elements that the next step must process */ public record Builder ( String prefix, String name, // keep track of previous elements so that build can wire-up the computation graph WorkerPipeline.Builder previous, WorkQueue inputQueue, WorkQueue outputQueue, Worker worker, Stats stats ) { private Builder(String prefix, String name, WorkQueue outputQueue, Worker worker, Stats stats) { this(prefix, name, null, null, outputQueue, worker, stats); } private Builder(String prefix, WorkQueue outputQueue, Stats stats) { this(prefix, null, outputQueue, null, stats); } /** * Runs {@code step} simultaneously in {@code threads} threads which consumes items and emits new ones that must be * buffered. * * @param type of element that this step emits */ public Bufferable addWorker(String name, int threads, WorkerStep step) { Builder curr = this; return (queueName, size, batchSize) -> { var nextOutputQueue = new WorkQueue(prefix + "_" + queueName, size, batchSize, stats); var worker = new Worker(prefix + "_" + name, stats, threads, () -> step.run(outputQueue.threadLocalReader(), nextOutputQueue.threadLocalWriter())); return new Builder<>(prefix, name, curr, outputQueue, nextOutputQueue, worker, stats); }; } private WorkerPipeline build() { var previousPipeline = previous == null || previous.worker == null ? null : previous.build(); var doneFuture = worker != null ? worker.done() : CompletableFuture.completedFuture(true); if (previousPipeline != null) { doneFuture = joinFutures(doneFuture, previousPipeline.done); } if (worker != null && outputQueue != null) { doneFuture = doneFuture.thenRun(outputQueue::close); } return new WorkerPipeline<>(name, previousPipeline, inputQueue, worker, doneFuture); } /** * Runs {@code step} simultaneously in {@code threads} threads that consumes items but does not emit any. */ public WorkerPipeline sinkTo(String name, int threads, SinkStep step) { var previousPipeline = build(); var worker = new Worker(prefix + "_" + name, stats, threads, () -> step.run(outputQueue.threadLocalReader())); var doneFuture = joinFutures(worker.done(), previousPipeline.done); return new WorkerPipeline<>(name, previousPipeline, outputQueue, worker, doneFuture); } /** * Runs {@code threads} simultaneous worker threads that consume items from previous step and invoke {@code * consumer.accept} for each one. */ public WorkerPipeline sinkToConsumer(String name, int threads, Consumer consumer) { return sinkTo(name, threads, (prev) -> { for (O item : prev) { consumer.accept(item); } }); } } }