
372 wiersze
15 KiB
Czysty Zwykły widok Historia

package com.onthegomap.planetiler.util;
2021-09-10 00:46:20 +00:00
import static*;
2021-10-20 01:57:47 +00:00
import static java.nio.file.StandardOpenOption.WRITE;
2021-09-10 00:46:20 +00:00
import com.onthegomap.planetiler.config.PlanetilerConfig;
import com.onthegomap.planetiler.stats.ProgressLoggers;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.worker.WorkerPipeline;
2021-09-10 00:46:20 +00:00
2021-10-20 01:57:47 +00:00
2021-09-10 00:46:20 +00:00
2021-10-20 01:57:47 +00:00
2021-09-10 00:46:20 +00:00
2021-10-20 01:57:47 +00:00
2021-09-10 00:46:20 +00:00
2021-10-20 01:57:47 +00:00
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
2021-09-10 00:46:20 +00:00
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
2021-09-10 00:46:20 +00:00
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
2021-10-20 01:57:47 +00:00
import java.util.concurrent.atomic.AtomicLong;
2021-09-10 00:46:20 +00:00
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
* A utility for downloading files to disk in parallel over HTTP.
* <p>
* After downloading a file once, it won't be downloaded again unless the {@code Content-Length} of the resource
* changes.
* <p>
* For example:
* <pre>{@code
* Downloader.create(PlanetilerConfig.defaults())
2021-09-10 00:46:20 +00:00
* .add("natural_earth", "http://url/of/", Path.of(""))
* .add("osm", "http://url/of/file.osm.pbf", Path.of("file.osm.pbf"))
* .start();
* }</pre>
* <p>
* As a shortcut to find the URL of a file to download from the <a href="">Geofabrik
* download site</a>, you can use "geofabrik:extract name" (i.e. "geofabrik:monaco" or "geofabrik:australia") to look up
* a {@code .osm.pbf} download URL in the <a href="">Geofabrik JSON
* index</a>.
2021-10-20 01:57:47 +00:00
* <p>
* You can also use "aws:latest" to download the latest {@code planet.osm.pbf} file from the <a
* href="">AWS Open Data Registry</a>.
2021-09-10 00:46:20 +00:00
public class Downloader {
private static final int MAX_REDIRECTS = 5;
2021-09-10 00:46:20 +00:00
private static final Logger LOGGER = LoggerFactory.getLogger(Downloader.class);
private final PlanetilerConfig config;
2021-09-10 00:46:20 +00:00
private final List<ResourceToDownload> toDownloadList = new ArrayList<>();
private final HttpClient client = HttpClient.newBuilder()
// explicitly follow redirects to capture final redirect url
2021-09-10 00:46:20 +00:00
private final ExecutorService executor;
2021-10-20 01:57:47 +00:00
private final Stats stats;
private final long chunkSizeBytes;
2021-09-10 00:46:20 +00:00
Downloader(PlanetilerConfig config, Stats stats, long chunkSizeBytes) {
2021-10-20 01:57:47 +00:00
this.chunkSizeBytes = chunkSizeBytes;
2021-09-10 00:46:20 +00:00
this.config = config;
2021-10-20 01:57:47 +00:00
this.stats = stats;
2021-09-10 00:46:20 +00:00
this.executor = Executors.newSingleThreadExecutor((runnable) -> {
Thread thread = new Thread(() -> {
return thread;
public static Downloader create(PlanetilerConfig config, Stats stats) {
2021-10-20 01:57:47 +00:00
return new Downloader(config, stats, config.downloadChunkSizeMB() * 1_000_000L);
2021-09-10 00:46:20 +00:00
private static URLConnection getUrlConnection(String urlString, PlanetilerConfig config) throws IOException {
2021-10-20 01:57:47 +00:00
var url = new URL(urlString);
var connection = url.openConnection();
connection.setConnectTimeout((int) config.httpTimeout().toMillis());
connection.setReadTimeout((int) config.httpTimeout().toMillis());
connection.setRequestProperty(USER_AGENT, config.httpUserAgent());
return connection;
* Returns an input stream reading from a remote URL with timeout and user-agent set from planetiler config.
2021-10-20 01:57:47 +00:00
* @param urlString remote URL
* @param config planetiler config containing the user agent and timeout parameter
2021-10-20 01:57:47 +00:00
* @return an input stream that will read from the remote URL
* @throws IOException if an error occurs making the network request
public static InputStream openStream(String urlString, PlanetilerConfig config) throws IOException {
2021-10-20 01:57:47 +00:00
return getUrlConnection(urlString, config).getInputStream();
private static InputStream openStreamRange(String urlString, PlanetilerConfig config, long start, long end)
2021-10-20 01:57:47 +00:00
throws IOException {
URLConnection connection = getUrlConnection(urlString, config);
connection.setRequestProperty(RANGE, "bytes=%d-%d".formatted(start, end));
return connection.getInputStream();
InputStream openStream(String url) throws IOException {
return openStream(url, config);
InputStream openStreamRange(String url, long start, long end) throws IOException {
return openStreamRange(url, config, start, end);
2021-09-10 00:46:20 +00:00
* Adds a new resource to download but does not start downloading it until {@link #run()} is called.
* <p>
* The resource won't be downloaded if size on disk is the same as {@code Content-Length} header reported from a
* {@code HEAD} request to the resource.
* @param id short name to use for this download when logging progress
2021-10-20 01:57:47 +00:00
* @param url the external resource to fetch, "aws:latest" (for the latest planet .osm.pbf), or "geofabrik:extract
* name" as a shortcut to use {@link Geofabrik#getDownloadUrl(String, PlanetilerConfig)} to lookup a
* {@code .osm.pbf} <a href="">Geofabrik</a> extract URL by partial match
* on area name
2021-09-10 00:46:20 +00:00
* @param output where to download the file to
* @return {@code this} for chaining
public Downloader add(String id, String url, Path output) {
if (url.startsWith("geofabrik:")) {
2021-10-20 01:57:47 +00:00
url = Geofabrik.getDownloadUrl(url.replaceFirst("^geofabrik:", ""), config);
} else if (url.startsWith("aws:")) {
url = AwsOsm.getDownloadUrl(url.replaceFirst("^aws:", ""), config);
2021-09-10 00:46:20 +00:00
toDownloadList.add(new ResourceToDownload(id, url, output));
return this;
* Starts downloading all resources in parallel, logging progress until complete.
* @throws IllegalStateException if an error occurs downloading any resource, will be thrown after all resources
* finish
public void run() {
var downloads = CompletableFuture
ProgressLoggers loggers = ProgressLoggers.create();
for (var toDownload : toDownloadList) {
try {
2021-10-20 01:57:47 +00:00
long size = toDownload.metadata.get(10, TimeUnit.SECONDS).size;
loggers.addStorageRatePercentCounter(, size, toDownload::bytesDownloaded);
2021-09-10 00:46:20 +00:00
} catch (InterruptedException | ExecutionException | TimeoutException e) {
throw new IllegalStateException("Error getting size of " + toDownload.url, e);
loggers.awaitAndLog(downloads, config.logInterval());
2021-10-20 01:57:47 +00:00
CompletableFuture<?> downloadIfNecessary(ResourceToDownload resourceToDownload) {
2021-09-10 00:46:20 +00:00
long existingSize = FileUtils.size(resourceToDownload.output);
return httpHeadFollowRedirects(resourceToDownload.url, 0)
2021-10-20 01:57:47 +00:00
.whenComplete((metadata, err) -> {
if (metadata != null) {
2021-09-10 00:46:20 +00:00
} else {
2021-10-20 01:57:47 +00:00
2021-09-10 00:46:20 +00:00
2021-10-20 01:57:47 +00:00
.thenComposeAsync(metadata -> {
if (metadata.size == existingSize) {
2021-09-10 00:46:20 +00:00"Skipping " + + ": " + resourceToDownload.output + " already up-to-date");
return CompletableFuture.completedFuture(null);
} else {
String redirectInfo = metadata.canonicalUrl.equals(resourceToDownload.url)
? ""
: " (redirected to " + metadata.canonicalUrl + ")";"Downloading " + resourceToDownload.url + redirectInfo + " to " + resourceToDownload.output);
2021-09-10 00:46:20 +00:00
Path tmpPath = resourceToDownload.tmpPath();
2021-10-20 01:57:47 +00:00
return httpDownload(resourceToDownload, tmpPath)
2021-09-10 00:46:20 +00:00
.thenCompose(result -> {
try {
Files.move(tmpPath, resourceToDownload.output);
return CompletableFuture.completedFuture(result);
} catch (IOException e) {
return CompletableFuture.failedFuture(e);
.whenCompleteAsync((result, error) -> {
2021-10-20 01:57:47 +00:00
if (error != null) {
2021-09-10 00:46:20 +00:00
LOGGER.error("Error downloading " + resourceToDownload.url + " to " + resourceToDownload.output, error);
2021-10-20 01:57:47 +00:00
} else {"Finished downloading " + resourceToDownload.url + " to " + resourceToDownload.output);
2021-09-10 00:46:20 +00:00
}, executor);
}, executor);
private CompletableFuture<ResourceMetadata> httpHeadFollowRedirects(String url, int redirects) {
if (redirects > MAX_REDIRECTS) {
throw new IllegalStateException("Exceeded " + redirects + " redirects for " + url);
return httpHead(url).thenComposeAsync(response -> response.redirect.isPresent()
? httpHeadFollowRedirects(response.redirect.get(), redirects + 1)
: CompletableFuture.completedFuture(response));
CompletableFuture<ResourceMetadata> httpHead(String url) {
2021-09-10 00:46:20 +00:00
return client
.sendAsync(newHttpRequest(url).method("HEAD", HttpRequest.BodyPublishers.noBody()).build(),
2021-09-10 00:46:20 +00:00
responseInfo -> {
int status = responseInfo.statusCode();
Optional<String> location = Optional.empty();
long contentLength = 0;
2021-10-20 01:57:47 +00:00
HttpHeaders headers = responseInfo.headers();
if (status >= 300 && status < 400) {
location = responseInfo.headers().firstValue(LOCATION);
if (location.isEmpty()) {
throw new IllegalStateException("Received " + status + " but no location header from " + url);
} else if (responseInfo.statusCode() != 200) {
throw new IllegalStateException("Bad response: " + responseInfo.statusCode());
} else {
contentLength = headers.firstValueAsLong(CONTENT_LENGTH).orElseThrow();
2021-10-20 01:57:47 +00:00
boolean supportsRangeRequest = headers.allValues(ACCEPT_RANGES).contains("bytes");
ResourceMetadata metadata = new ResourceMetadata(location, url, contentLength, supportsRangeRequest);
2021-10-20 01:57:47 +00:00
return HttpResponse.BodyHandlers.replacing(metadata).apply(responseInfo);
2021-09-10 00:46:20 +00:00
2021-10-20 01:57:47 +00:00
private CompletableFuture<?> httpDownload(ResourceToDownload resource, Path tmpPath) {
* Alternative using async HTTP client:
* return client.sendAsync(newHttpRequest(url).GET().build(), responseInfo -> {
* assertOK(responseInfo);
* return HttpResponse.BodyHandlers.ofFile(path).apply(responseInfo);
* But it is slower on large files
return resource.metadata.thenCompose(metadata -> {
String canonicalUrl = metadata.canonicalUrl;
2021-10-20 01:57:47 +00:00
record Range(long start, long end) {
long size() {
return end - start;
List<Range> chunks = new ArrayList<>();
boolean ranges = metadata.acceptRange && config.downloadThreads() > 1;
long chunkSize = ranges ? chunkSizeBytes : metadata.size;
for (long start = 0; start < metadata.size; start += chunkSize) {
long end = Math.min(start + chunkSize, metadata.size);
chunks.add(new Range(start, end));
// create an empty file
try {
} catch (IOException e) {
return CompletableFuture.failedFuture(new IOException("Failed to create " + resource.output, e));
return WorkerPipeline.start("download-" +, stats)
.readFromTiny("chunks", chunks)
.sinkToConsumer("chunk-downloader", Math.min(config.downloadThreads(), chunks.size()), range -> {
try (var fileChannel =, WRITE)) {
while (range.size() > 0) {
try (
var inputStream = (ranges || range.start > 0)
? openStreamRange(canonicalUrl, range.start, range.end)
: openStream(canonicalUrl);
2021-10-20 01:57:47 +00:00
var input = new ProgressChannel(Channels.newChannel(inputStream), resource.progress);
) {
// ensure this file has been allocated up to the start of this block
fileChannel.write(ByteBuffer.allocate(1), range.start);
long transferred = fileChannel.transferFrom(input, range.start, range.size());
if (transferred == 0) {
throw new IOException("Transferred 0 bytes but " + range.size() + " expected: " + canonicalUrl);
2021-10-20 01:57:47 +00:00
} else if (transferred != range.size() && !metadata.acceptRange) {
throw new IOException(
"Transferred " + transferred + " bytes but " + range.size() + " expected: " + canonicalUrl
2021-10-20 01:57:47 +00:00
+ " and server does not support range requests");
range = new Range(range.start + transferred, range.end);
} catch (IOException e) {
throw new UncheckedIOException(e);
2021-09-10 00:46:20 +00:00
private HttpRequest.Builder newHttpRequest(String url) {
return HttpRequest.newBuilder(URI.create(url))
2021-10-20 01:57:47 +00:00
2021-09-10 00:46:20 +00:00
.header(USER_AGENT, config.httpUserAgent());
static record ResourceMetadata(Optional<String> redirect, String canonicalUrl, long size, boolean acceptRange) {}
2021-10-20 01:57:47 +00:00
static record ResourceToDownload(
String id, String url, Path output, CompletableFuture<ResourceMetadata> metadata, AtomicLong progress
) {
2021-09-10 00:46:20 +00:00
ResourceToDownload(String id, String url, Path output) {
2021-10-20 01:57:47 +00:00
this(id, url, output, new CompletableFuture<>(), new AtomicLong(0));
2021-09-10 00:46:20 +00:00
public Path tmpPath() {
return output.resolveSibling(output.getFileName() + "_inprogress");
public long bytesDownloaded() {
2021-10-20 01:57:47 +00:00
return progress.get();
* Wrapper for a {@link ReadableByteChannel} that captures progress information.
private static record ProgressChannel(ReadableByteChannel inner, AtomicLong progress) implements ReadableByteChannel {
public int read(ByteBuffer dst) throws IOException {
int n =;
if (n > 0) {
return n;
public boolean isOpen() {
return inner.isOpen();
public void close() throws IOException {
2021-09-10 00:46:20 +00:00