kopia lustrzana https://github.com/onthegomap/planetiler
347 wiersze
13 KiB
Java
347 wiersze
13 KiB
Java
package com.onthegomap.planetiler.stats;
|
|
|
|
import com.onthegomap.planetiler.util.FileUtils;
|
|
import com.onthegomap.planetiler.util.MemoryEstimator;
|
|
import io.prometheus.client.Collector;
|
|
import io.prometheus.client.CollectorRegistry;
|
|
import io.prometheus.client.CounterMetricFamily;
|
|
import io.prometheus.client.GaugeMetricFamily;
|
|
import io.prometheus.client.Histogram;
|
|
import io.prometheus.client.exporter.BasicAuthHttpConnectionFactory;
|
|
import io.prometheus.client.exporter.PushGateway;
|
|
import io.prometheus.client.exporter.common.TextFormat;
|
|
import io.prometheus.client.hotspot.DefaultExports;
|
|
import java.io.IOException;
|
|
import java.io.StringWriter;
|
|
import java.lang.management.ManagementFactory;
|
|
import java.lang.management.OperatingSystemMXBean;
|
|
import java.net.URL;
|
|
import java.nio.file.FileStore;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.time.Duration;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.concurrent.ConcurrentSkipListMap;
|
|
import java.util.concurrent.Executors;
|
|
import java.util.concurrent.ScheduledExecutorService;
|
|
import java.util.concurrent.TimeUnit;
|
|
import java.util.function.Supplier;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
/**
|
|
* A {@link Stats} implementation that pushes metrics to a <a href="https://prometheus.io/">prometheus</a> instance
|
|
* through a <a href="https://github.com/prometheus/pushgateway">push gateway</a>.
|
|
* <p>
|
|
* See {@code grafana.json} for an example grafana dashboard you can use to monitor progress.
|
|
*/
|
|
class PrometheusStats implements Stats {
|
|
|
|
private static final Logger LOGGER = LoggerFactory.getLogger(PrometheusStats.class);
|
|
|
|
private final CollectorRegistry registry = new CollectorRegistry();
|
|
private final Timers timers = new Timers();
|
|
private static final String BASE = "planetiler_";
|
|
private PushGateway pg;
|
|
private ScheduledExecutorService executor;
|
|
private final String job;
|
|
private final Map<String, Path> filesToMonitor = new ConcurrentSkipListMap<>();
|
|
private final Map<String, MemoryEstimator.HasEstimate> heapObjectsToMonitor = new ConcurrentSkipListMap<>();
|
|
|
|
/** Constructs a new instance but does not start polling (for tests). */
|
|
PrometheusStats(String job) {
|
|
this.job = job;
|
|
DefaultExports.register(registry);
|
|
new ThreadDetailsExports().register(registry);
|
|
new InProgressTasks().register(registry);
|
|
new FileSizeCollector().register(registry);
|
|
new HeapObjectSizeCollector().register(registry);
|
|
new PostGcMemoryCollector().register(registry);
|
|
}
|
|
|
|
private PrometheusStats(String destination, String job, Duration interval) {
|
|
this(job);
|
|
try {
|
|
URL url = new URL(destination);
|
|
pg = new PushGateway(url);
|
|
if (url.getUserInfo() != null) {
|
|
String[] parts = url.getUserInfo().split(":");
|
|
if (parts.length == 2) {
|
|
pg.setConnectionFactory(new BasicAuthHttpConnectionFactory(parts[0], parts[1]));
|
|
}
|
|
}
|
|
this.push();
|
|
executor = Executors.newScheduledThreadPool(1, r -> {
|
|
Thread thread = new Thread(r);
|
|
thread.setDaemon(true);
|
|
thread.setName("prometheus-pusher");
|
|
return thread;
|
|
});
|
|
executor.scheduleAtFixedRate(this::push, 0, Math.max(interval.getSeconds(), 5), TimeUnit.SECONDS);
|
|
} catch (IOException e) {
|
|
throw new RuntimeException(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns a new {@code PrometheusStats} that and schedules it to push to {@code destination} every {@code interval}.
|
|
*/
|
|
static PrometheusStats createAndStartPushing(String destination, String job, Duration interval) {
|
|
return new PrometheusStats(destination, job, interval);
|
|
}
|
|
|
|
private void push() {
|
|
try {
|
|
pg.push(registry, job);
|
|
} catch (IOException e) {
|
|
LOGGER.error("Error pushing stats to prometheus", e);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void gauge(String name, Supplier<Number> value) {
|
|
new Collector() {
|
|
@Override
|
|
public List<MetricFamilySamples> collect() {
|
|
return List.of(new GaugeMetricFamily(BASE + sanitizeMetricName(name), "", value.get().doubleValue()));
|
|
}
|
|
}.register(registry);
|
|
}
|
|
|
|
private final io.prometheus.client.Counter processedElements = io.prometheus.client.Counter
|
|
.build(BASE + "renderer_elements_processed", "Number of source elements processed")
|
|
.labelNames("type", "layer")
|
|
.register(registry);
|
|
|
|
@Override
|
|
public void processedElement(String elemType, String layer) {
|
|
processedElements.labels(elemType, layer).inc();
|
|
}
|
|
|
|
private final io.prometheus.client.Counter dataErrors = io.prometheus.client.Counter
|
|
.build(BASE + "bad_input_data", "Number of data inconsistencies encountered in source data")
|
|
.labelNames("type")
|
|
.register(registry);
|
|
|
|
@Override
|
|
public void dataError(String errorCode) {
|
|
dataErrors.labels(errorCode).inc();
|
|
}
|
|
|
|
private final io.prometheus.client.Counter emittedFeatures = io.prometheus.client.Counter
|
|
.build(BASE + "renderer_features_emitted", "Features enqueued for writing to feature DB")
|
|
.labelNames("zoom", "layer")
|
|
.register(registry);
|
|
|
|
@Override
|
|
public void emittedFeatures(int z, String layer, int numFeatures) {
|
|
emittedFeatures.labels(Integer.toString(z), layer).inc(numFeatures);
|
|
}
|
|
|
|
/** Returns the full payload that we would send to push gateway for a poll right way. */
|
|
public String getMetricsAsString() {
|
|
try (StringWriter writer = new StringWriter()) {
|
|
TextFormat.write004(writer, registry.metricFamilySamples());
|
|
return writer.toString();
|
|
} catch (IOException e) {
|
|
throw new IllegalStateException(e);
|
|
}
|
|
}
|
|
|
|
private final Histogram tilesWrittenBytes = Histogram
|
|
.build(BASE + "mbtiles_tile_written_bytes", "Written tile sizes by zoom level")
|
|
.buckets(1_000, 10_000, 100_000, 500_000)
|
|
.labelNames("zoom")
|
|
.register(registry);
|
|
|
|
@Override
|
|
public void wroteTile(int zoom, int bytes) {
|
|
tilesWrittenBytes.labels(Integer.toString(zoom)).observe(bytes);
|
|
}
|
|
|
|
@Override
|
|
public Timers timers() {
|
|
return timers;
|
|
}
|
|
|
|
@Override
|
|
public Map<String, Path> monitoredFiles() {
|
|
return filesToMonitor;
|
|
}
|
|
|
|
@Override
|
|
public void monitorInMemoryObject(String name, MemoryEstimator.HasEstimate object) {
|
|
heapObjectsToMonitor.put(name, object);
|
|
}
|
|
|
|
@Override
|
|
public void counter(String name, Supplier<Number> supplier) {
|
|
new Collector() {
|
|
@Override
|
|
public List<MetricFamilySamples> collect() {
|
|
return List.of(new CounterMetricFamily(BASE + sanitizeMetricName(name), "", supplier.get().doubleValue()));
|
|
}
|
|
}.register(registry);
|
|
}
|
|
|
|
@Override
|
|
public void counter(String name, String label, Supplier<Map<String, Counter.Readable>> values) {
|
|
new Collector() {
|
|
@Override
|
|
public List<MetricFamilySamples> collect() {
|
|
List<MetricFamilySamples> result = new ArrayList<>();
|
|
CounterMetricFamily family = new CounterMetricFamily(BASE + sanitizeMetricName(name), "", List.of(label));
|
|
result.add(family);
|
|
for (var entry : values.get().entrySet()) {
|
|
family.addMetric(List.of(entry.getKey()), entry.getValue().get());
|
|
}
|
|
return result;
|
|
}
|
|
}.register(registry);
|
|
}
|
|
|
|
@Override
|
|
public void close() {
|
|
executor.shutdown();
|
|
push();
|
|
}
|
|
|
|
private static GaugeMetricFamily gaugeMetric(String name, double value) {
|
|
return new GaugeMetricFamily(BASE + name, BASE + name + " value", value);
|
|
}
|
|
|
|
/** Reports stats on all tasks being timed through {@link #timers()}. */
|
|
private class InProgressTasks extends Collector {
|
|
|
|
@Override
|
|
public List<MetricFamilySamples> collect() {
|
|
List<MetricFamilySamples> result = new ArrayList<>();
|
|
for (var entry : timers.all().entrySet()) {
|
|
String name = entry.getKey();
|
|
Timer timer = entry.getValue().timer();
|
|
result.add(gaugeMetric(name + "_running", timer.running() ? 1 : 0));
|
|
ProcessTime time = timer.elapsed();
|
|
result.add(gaugeMetric(name + "_elapsed_time_seconds", time.wall().toNanos() / NANOSECONDS_PER_SECOND));
|
|
result.add(gaugeMetric(name + "_cpu_time_seconds",
|
|
time.cpu().orElse(Duration.ZERO).toNanos() / NANOSECONDS_PER_SECOND));
|
|
}
|
|
return result;
|
|
}
|
|
}
|
|
|
|
/** Reports stats on all file sizes being monitored through {@link #monitorFile(String, Path)}. */
|
|
private class FileSizeCollector extends Collector {
|
|
|
|
private boolean logged = false;
|
|
|
|
@Override
|
|
public List<MetricFamilySamples> collect() {
|
|
List<Collector.MetricFamilySamples> results = new ArrayList<>();
|
|
for (var file : filesToMonitor.entrySet()) {
|
|
String name = sanitizeMetricName(file.getKey());
|
|
Path path = file.getValue();
|
|
results.add(new GaugeMetricFamily(BASE + "file_" + name + "_size_bytes", "Size of " + name + " in bytes",
|
|
FileUtils.size(path)));
|
|
if (Files.exists(path)) {
|
|
try {
|
|
FileStore fileStore = Files.getFileStore(path);
|
|
results
|
|
.add(
|
|
new GaugeMetricFamily(BASE + "file_" + name + "_total_space_bytes", "Total space available on disk",
|
|
fileStore.getTotalSpace()));
|
|
results.add(
|
|
new GaugeMetricFamily(BASE + "file_" + name + "_unallocated_space_bytes", "Unallocated space on disk",
|
|
fileStore.getUnallocatedSpace()));
|
|
results
|
|
.add(new GaugeMetricFamily(BASE + "file_" + name + "_usable_space_bytes", "Usable space on disk",
|
|
fileStore.getUsableSpace()));
|
|
} catch (IOException e) {
|
|
// let the user know once
|
|
if (!logged) {
|
|
LOGGER.warn("unable to get usable space on device", e);
|
|
logged = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reports stats on all in-memory objects sizes being monitored through
|
|
* {@link #monitorInMemoryObject(String, MemoryEstimator.HasEstimate)}.
|
|
*/
|
|
private class HeapObjectSizeCollector extends Collector {
|
|
|
|
@Override
|
|
public List<MetricFamilySamples> collect() {
|
|
List<Collector.MetricFamilySamples> results = new ArrayList<>();
|
|
for (var entry : heapObjectsToMonitor.entrySet()) {
|
|
String name = sanitizeMetricName(entry.getKey());
|
|
MemoryEstimator.HasEstimate heapObject = entry.getValue();
|
|
results
|
|
.add(new GaugeMetricFamily(BASE + "heap_object_" + name + "_size_bytes", "Bytes of memory used by " + name,
|
|
heapObject.estimateMemoryUsageBytes()));
|
|
}
|
|
return results;
|
|
}
|
|
}
|
|
|
|
/** Reports stats on post-GC memory consumption of each memory pool. */
|
|
private static class PostGcMemoryCollector extends Collector {
|
|
|
|
@Override
|
|
public List<MetricFamilySamples> collect() {
|
|
GaugeMetricFamily postGcPoolSizes = new GaugeMetricFamily(
|
|
"jvm_memory_pool_post_gc_bytes_total",
|
|
"Memory used by each pool after last GC",
|
|
List.of("pool")
|
|
);
|
|
for (var entry : ProcessInfo.getPostGcPoolSizes().entrySet()) {
|
|
postGcPoolSizes.addMetric(List.of(entry.getKey()), entry.getValue());
|
|
}
|
|
return List.of(postGcPoolSizes);
|
|
}
|
|
}
|
|
|
|
/** Reports more detailed stats on CPU usage statistics by thread than prometheus collects by default. */
|
|
private static class ThreadDetailsExports extends Collector {
|
|
|
|
private final OperatingSystemMXBean osBean;
|
|
|
|
public ThreadDetailsExports() {
|
|
this.osBean = ManagementFactory.getOperatingSystemMXBean();
|
|
}
|
|
|
|
private final Map<Long, ProcessInfo.ThreadState> threads = new ConcurrentSkipListMap<>();
|
|
|
|
public List<MetricFamilySamples> collect() {
|
|
|
|
List<MetricFamilySamples> mfs = new ArrayList<>(List.of(
|
|
new GaugeMetricFamily("jvm_available_processors", "Result of Runtime.getRuntime().availableProcessors()",
|
|
Runtime.getRuntime().availableProcessors()),
|
|
new GaugeMetricFamily("jvm_system_load_avg", "Result of OperatingSystemMXBean.getSystemLoadAverage()",
|
|
osBean.getSystemLoadAverage())
|
|
));
|
|
|
|
CounterMetricFamily threadCpuTimes = new CounterMetricFamily("jvm_thread_cpu_time_seconds",
|
|
"CPU time used by each thread", List.of("name", "id"));
|
|
mfs.add(threadCpuTimes);
|
|
CounterMetricFamily threadUserTimes = new CounterMetricFamily("jvm_thread_user_time_seconds",
|
|
"User time used by each thread", List.of("name", "id"));
|
|
mfs.add(threadUserTimes);
|
|
threads.putAll(ProcessInfo.getThreadStats());
|
|
for (ProcessInfo.ThreadState thread : threads.values()) {
|
|
var labels = List.of(thread.name(), Long.toString(thread.id()));
|
|
threadUserTimes.addMetric(labels, thread.userTime().toNanos() / NANOSECONDS_PER_SECOND);
|
|
threadCpuTimes.addMetric(labels, thread.cpuTime().toNanos() / NANOSECONDS_PER_SECOND);
|
|
}
|
|
|
|
return mfs;
|
|
}
|
|
}
|
|
}
|