package com.onthegomap.planetiler.util; import static com.google.common.net.HttpHeaders.*; import static java.nio.file.StandardOpenOption.WRITE; import com.onthegomap.planetiler.config.PlanetilerConfig; import com.onthegomap.planetiler.stats.ProgressLoggers; import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.worker.WorkerPipeline; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; import java.net.URI; import java.net.URL; import java.net.URLConnection; import java.net.http.HttpClient; import java.net.http.HttpHeaders; import java.net.http.HttpRequest; import java.net.http.HttpResponse; import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; import java.nio.file.FileStore; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A utility for downloading files to disk in parallel over HTTP. *
* After downloading a file once, it won't be downloaded again unless the {@code Content-Length} of the resource * changes. *
* For example: * *
* {@code * Downloader.create(PlanetilerConfig.defaults()) * .add("natural_earth", "http://url/of/natural_earth.zip", Path.of("natural_earth.zip")) * .add("osm", "http://url/of/file.osm.pbf", Path.of("file.osm.pbf")) * .run(); * } **
* As a shortcut to find the URL of a file to download from the Geofabrik * download site, you can use "geofabrik:extract name" (i.e. "geofabrik:monaco" or "geofabrik:australia") to look up * a {@code .osm.pbf} download URL in the Geofabrik JSON * index. *
* You can also use "aws:latest" to download the latest {@code planet.osm.pbf} file from the
* AWS Open Data Registry.
*/
@SuppressWarnings("UnusedReturnValue")
public class Downloader {
private static final int MAX_REDIRECTS = 5;
private static final Logger LOGGER = LoggerFactory.getLogger(Downloader.class);
private final PlanetilerConfig config;
private final List
* The resource won't be downloaded if size on disk is the same as {@code Content-Length} header reported from a
* {@code HEAD} request to the resource.
*
* @param id short name to use for this download when logging progress
* @param url the external resource to fetch, "aws:latest" (for the latest planet .osm.pbf), or "geofabrik:extract
* name" as a shortcut to use {@link Geofabrik#getDownloadUrl(String, PlanetilerConfig)} to lookup a
* {@code .osm.pbf} Geofabrik extract URL by partial match
* on area name
* @param output where to download the file to
* @return {@code this} for chaining
*/
public Downloader add(String id, String url, Path output) {
if (url.startsWith("geofabrik:")) {
url = Geofabrik.getDownloadUrl(url.replaceFirst("^geofabrik:", ""), config);
} else if (url.startsWith("aws:")) {
url = AwsOsm.getDownloadUrl(url.replaceFirst("^aws:", ""), config);
}
toDownloadList.add(new ResourceToDownload(id, url, output));
return this;
}
/**
* Starts downloading all resources in parallel, logging progress until complete.
*
* @throws IllegalStateException if an error occurs downloading any resource, will be thrown after all resources
* finish
*/
public void run() {
var downloads = CompletableFuture
.allOf(toDownloadList.stream()
.map(this::downloadIfNecessary)
.toArray(CompletableFuture[]::new)
);
ProgressLoggers loggers = ProgressLoggers.create();
for (var toDownload : toDownloadList) {
try {
long size = toDownload.metadata.get(10, TimeUnit.SECONDS).size;
loggers.addStorageRatePercentCounter(toDownload.id, size, toDownload::bytesDownloaded, true);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
throw new IllegalStateException("Error getting size of " + toDownload.url, e);
}
}
loggers.add(" ").addProcessStats()
.awaitAndLog(downloads, config.logInterval());
executor.shutdown();
}
CompletableFuture> downloadIfNecessary(ResourceToDownload resourceToDownload) {
long existingSize = FileUtils.size(resourceToDownload.output);
return httpHeadFollowRedirects(resourceToDownload.url, 0)
.whenComplete((metadata, err) -> {
if (metadata != null) {
resourceToDownload.metadata.complete(metadata);
} else {
resourceToDownload.metadata.completeExceptionally(err);
}
})
.thenComposeAsync(metadata -> {
if (metadata.size == existingSize) {
LOGGER.info("Skipping " + resourceToDownload.id + ": " + resourceToDownload.output + " already up-to-date");
return CompletableFuture.completedFuture(null);
} else {
String redirectInfo = metadata.canonicalUrl.equals(resourceToDownload.url) ? "" :
" (redirected to " + metadata.canonicalUrl + ")";
LOGGER.info("Downloading " + resourceToDownload.url + redirectInfo + " to " + resourceToDownload.output);
FileUtils.delete(resourceToDownload.output);
FileUtils.createParentDirectories(resourceToDownload.output);
Path tmpPath = resourceToDownload.tmpPath();
FileUtils.delete(tmpPath);
FileUtils.deleteOnExit(tmpPath);
checkDiskSpace(tmpPath, metadata.size);
return httpDownload(resourceToDownload, tmpPath)
.thenCompose(result -> {
try {
Files.move(tmpPath, resourceToDownload.output);
return CompletableFuture.completedFuture(result);
} catch (IOException e) {
return CompletableFuture.failedFuture(e);
}
})
.whenCompleteAsync((result, error) -> {
if (error != null) {
LOGGER.error("Error downloading " + resourceToDownload.url + " to " + resourceToDownload.output, error);
} else {
LOGGER.info("Finished downloading " + resourceToDownload.url + " to " + resourceToDownload.output);
}
FileUtils.delete(tmpPath);
}, executor);
}
}, executor);
}
private void checkDiskSpace(Path destination, long size) {
try {
var fs = Files.getFileStore(destination.toAbsolutePath().getParent());
var totalPendingBytes = bytesToDownload.merge(fs, size, Long::sum);
var availableBytes = fs.getUnallocatedSpace();
if (totalPendingBytes > availableBytes) {
var format = Format.defaultInstance();
String warning =
"Attempting to download " + format.storage(totalPendingBytes) + " to " + fs + " which only has " +
format.storage(availableBytes) + " available";
if (config.force()) {
LOGGER.warn(warning + ", will probably fail.");
} else {
throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway.");
}
}
} catch (IOException e) {
LOGGER.warn("Unable to check file size for download, you may run out of space: " + e, e);
}
}
private CompletableFuture