kopia lustrzana https://github.com/onthegomap/planetiler
rodzic
54ab067378
commit
690d87f53e
|
@ -1,9 +1,6 @@
|
|||
package com.onthegomap.flatmap.util;
|
||||
|
||||
import static com.google.common.net.HttpHeaders.ACCEPT_RANGES;
|
||||
import static com.google.common.net.HttpHeaders.CONTENT_LENGTH;
|
||||
import static com.google.common.net.HttpHeaders.RANGE;
|
||||
import static com.google.common.net.HttpHeaders.USER_AGENT;
|
||||
import static com.google.common.net.HttpHeaders.*;
|
||||
import static java.nio.file.StandardOpenOption.WRITE;
|
||||
|
||||
import com.onthegomap.flatmap.config.FlatmapConfig;
|
||||
|
@ -28,6 +25,7 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
@ -63,10 +61,13 @@ import org.slf4j.LoggerFactory;
|
|||
@SuppressWarnings("UnusedReturnValue")
|
||||
public class Downloader {
|
||||
|
||||
private static final int MAX_REDIRECTS = 5;
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(Downloader.class);
|
||||
private final FlatmapConfig config;
|
||||
private final List<ResourceToDownload> toDownloadList = new ArrayList<>();
|
||||
private final HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NORMAL).build();
|
||||
private final HttpClient client = HttpClient.newBuilder()
|
||||
// explicitly follow redirects to capture final redirect url
|
||||
.followRedirects(HttpClient.Redirect.NEVER).build();
|
||||
private final ExecutorService executor;
|
||||
private final Stats stats;
|
||||
private final long chunkSizeBytes;
|
||||
|
@ -89,12 +90,6 @@ public class Downloader {
|
|||
return new Downloader(config, stats, config.downloadChunkSizeMB() * 1_000_000L);
|
||||
}
|
||||
|
||||
private static void assertOK(HttpResponse.ResponseInfo responseInfo) {
|
||||
if (responseInfo.statusCode() != 200) {
|
||||
throw new IllegalStateException("Bad response: " + responseInfo.statusCode());
|
||||
}
|
||||
}
|
||||
|
||||
private static URLConnection getUrlConnection(String urlString, FlatmapConfig config) throws IOException {
|
||||
var url = new URL(urlString);
|
||||
var connection = url.openConnection();
|
||||
|
@ -185,7 +180,7 @@ public class Downloader {
|
|||
CompletableFuture<?> downloadIfNecessary(ResourceToDownload resourceToDownload) {
|
||||
long existingSize = FileUtils.size(resourceToDownload.output);
|
||||
|
||||
return httpHead(resourceToDownload)
|
||||
return httpHeadFollowRedirects(resourceToDownload.url, 0)
|
||||
.whenComplete((metadata, err) -> {
|
||||
if (metadata != null) {
|
||||
resourceToDownload.metadata.complete(metadata);
|
||||
|
@ -198,7 +193,10 @@ public class Downloader {
|
|||
LOGGER.info("Skipping " + resourceToDownload.id + ": " + resourceToDownload.output + " already up-to-date");
|
||||
return CompletableFuture.completedFuture(null);
|
||||
} else {
|
||||
LOGGER.info("Downloading " + resourceToDownload.url + " to " + resourceToDownload.output);
|
||||
String redirectInfo = metadata.canonicalUrl.equals(resourceToDownload.url)
|
||||
? ""
|
||||
: " (redirected to " + metadata.canonicalUrl + ")";
|
||||
LOGGER.info("Downloading " + resourceToDownload.url + redirectInfo + " to " + resourceToDownload.output);
|
||||
FileUtils.delete(resourceToDownload.output);
|
||||
FileUtils.createParentDirectories(resourceToDownload.output);
|
||||
Path tmpPath = resourceToDownload.tmpPath();
|
||||
|
@ -225,15 +223,35 @@ public class Downloader {
|
|||
}, executor);
|
||||
}
|
||||
|
||||
CompletableFuture<ResourceMetadata> httpHead(ResourceToDownload resourceToDownload) {
|
||||
private CompletableFuture<ResourceMetadata> httpHeadFollowRedirects(String url, int redirects) {
|
||||
if (redirects > MAX_REDIRECTS) {
|
||||
throw new IllegalStateException("Exceeded " + redirects + " redirects for " + url);
|
||||
}
|
||||
return httpHead(url).thenComposeAsync(response -> response.redirect.isPresent()
|
||||
? httpHeadFollowRedirects(response.redirect.get(), redirects + 1)
|
||||
: CompletableFuture.completedFuture(response));
|
||||
}
|
||||
|
||||
CompletableFuture<ResourceMetadata> httpHead(String url) {
|
||||
return client
|
||||
.sendAsync(newHttpRequest(resourceToDownload.url).method("HEAD", HttpRequest.BodyPublishers.noBody()).build(),
|
||||
.sendAsync(newHttpRequest(url).method("HEAD", HttpRequest.BodyPublishers.noBody()).build(),
|
||||
responseInfo -> {
|
||||
assertOK(responseInfo);
|
||||
int status = responseInfo.statusCode();
|
||||
Optional<String> location = Optional.empty();
|
||||
long contentLength = 0;
|
||||
HttpHeaders headers = responseInfo.headers();
|
||||
long contentLength = headers.firstValueAsLong(CONTENT_LENGTH).orElseThrow();
|
||||
if (status >= 300 && status < 400) {
|
||||
location = responseInfo.headers().firstValue(LOCATION);
|
||||
if (location.isEmpty()) {
|
||||
throw new IllegalStateException("Received " + status + " but no location header from " + url);
|
||||
}
|
||||
} else if (responseInfo.statusCode() != 200) {
|
||||
throw new IllegalStateException("Bad response: " + responseInfo.statusCode());
|
||||
} else {
|
||||
contentLength = headers.firstValueAsLong(CONTENT_LENGTH).orElseThrow();
|
||||
}
|
||||
boolean supportsRangeRequest = headers.allValues(ACCEPT_RANGES).contains("bytes");
|
||||
ResourceMetadata metadata = new ResourceMetadata(contentLength, supportsRangeRequest);
|
||||
ResourceMetadata metadata = new ResourceMetadata(location, url, contentLength, supportsRangeRequest);
|
||||
return HttpResponse.BodyHandlers.replacing(metadata).apply(responseInfo);
|
||||
}).thenApply(HttpResponse::body);
|
||||
}
|
||||
|
@ -249,6 +267,7 @@ public class Downloader {
|
|||
* But it is slower on large files
|
||||
*/
|
||||
return resource.metadata.thenCompose(metadata -> {
|
||||
String canonicalUrl = metadata.canonicalUrl;
|
||||
record Range(long start, long end) {
|
||||
|
||||
long size() {
|
||||
|
@ -275,8 +294,8 @@ public class Downloader {
|
|||
while (range.size() > 0) {
|
||||
try (
|
||||
var inputStream = (ranges || range.start > 0)
|
||||
? openStreamRange(resource.url, range.start, range.end)
|
||||
: openStream(resource.url);
|
||||
? openStreamRange(canonicalUrl, range.start, range.end)
|
||||
: openStream(canonicalUrl);
|
||||
var input = new ProgressChannel(Channels.newChannel(inputStream), resource.progress);
|
||||
) {
|
||||
// ensure this file has been allocated up to the start of this block
|
||||
|
@ -284,10 +303,10 @@ public class Downloader {
|
|||
fileChannel.position(range.start);
|
||||
long transferred = fileChannel.transferFrom(input, range.start, range.size());
|
||||
if (transferred == 0) {
|
||||
throw new IOException("Transferred 0 bytes but " + range.size() + " expected: " + resource.url);
|
||||
throw new IOException("Transferred 0 bytes but " + range.size() + " expected: " + canonicalUrl);
|
||||
} else if (transferred != range.size() && !metadata.acceptRange) {
|
||||
throw new IOException(
|
||||
"Transferred " + transferred + " bytes but " + range.size() + " expected: " + resource.url
|
||||
"Transferred " + transferred + " bytes but " + range.size() + " expected: " + canonicalUrl
|
||||
+ " and server does not support range requests");
|
||||
}
|
||||
range = new Range(range.start + transferred, range.end);
|
||||
|
@ -306,7 +325,7 @@ public class Downloader {
|
|||
.header(USER_AGENT, config.httpUserAgent());
|
||||
}
|
||||
|
||||
static record ResourceMetadata(long size, boolean acceptRange) {}
|
||||
static record ResourceMetadata(Optional<String> redirect, String canonicalUrl, long size, boolean acceptRange) {}
|
||||
|
||||
static record ResourceToDownload(
|
||||
String id, String url, Path output, CompletableFuture<ResourceMetadata> metadata, AtomicLong progress
|
||||
|
|
|
@ -14,6 +14,7 @@ import java.nio.file.Files;
|
|||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
@ -54,36 +55,48 @@ public class DownloaderTest {
|
|||
}
|
||||
|
||||
@Override
|
||||
CompletableFuture<ResourceMetadata> httpHead(ResourceToDownload resource) {
|
||||
byte[] bytes = resources.get(resource.url());
|
||||
return CompletableFuture.supplyAsync(() -> new ResourceMetadata(bytes.length, supportsRange));
|
||||
CompletableFuture<ResourceMetadata> httpHead(String url) {
|
||||
String[] parts = url.split("#");
|
||||
if (parts.length > 1) {
|
||||
int redirectNum = Integer.parseInt(parts[1]);
|
||||
String next = redirectNum <= 1 ? parts[0] : (parts[0] + "#" + (redirectNum - 1));
|
||||
return CompletableFuture.supplyAsync(
|
||||
() -> new ResourceMetadata(Optional.of(next), url, 0, supportsRange));
|
||||
}
|
||||
byte[] bytes = resources.get(url);
|
||||
return CompletableFuture.supplyAsync(
|
||||
() -> new ResourceMetadata(Optional.empty(), url, bytes.length, supportsRange));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@CsvSource({
|
||||
"false,100",
|
||||
"true,100",
|
||||
"true,2",
|
||||
"false,100,0",
|
||||
"true,100,0",
|
||||
"true,2,0",
|
||||
"false,100,1",
|
||||
"false,100,2",
|
||||
"true,2,4",
|
||||
})
|
||||
public void testDownload(boolean range, int maxLength) throws Exception {
|
||||
public void testDownload(boolean range, int maxLength, int redirects) throws Exception {
|
||||
Path dest = path.resolve("out");
|
||||
String string = "0123456789";
|
||||
String url = "http://url";
|
||||
String initialUrl = url + (redirects > 0 ? "#" + redirects : "");
|
||||
Map<String, byte[]> resources = new ConcurrentHashMap<>();
|
||||
|
||||
byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
|
||||
Downloader downloader = mockDownloader(resources, range, maxLength);
|
||||
|
||||
// fails if no data
|
||||
var resource1 = new Downloader.ResourceToDownload("resource", url, dest);
|
||||
var resource1 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
|
||||
assertThrows(ExecutionException.class, () -> downloader.downloadIfNecessary(resource1).get());
|
||||
assertFalse(Files.exists(dest));
|
||||
assertEquals(0, resource1.bytesDownloaded());
|
||||
|
||||
// succeeds with data
|
||||
var resource2 = new Downloader.ResourceToDownload("resource", url, dest);
|
||||
var resource2 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
|
||||
resources.put(url, bytes);
|
||||
downloader.downloadIfNecessary(resource2).get();
|
||||
assertEquals(string, Files.readString(dest));
|
||||
|
@ -92,7 +105,7 @@ public class DownloaderTest {
|
|||
|
||||
// does not re-request if size is the same
|
||||
downloads = 0;
|
||||
var resource3 = new Downloader.ResourceToDownload("resource", url, dest);
|
||||
var resource3 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
|
||||
downloader.downloadIfNecessary(resource3).get();
|
||||
assertEquals(0, downloads);
|
||||
assertEquals(string, Files.readString(dest));
|
||||
|
@ -100,7 +113,7 @@ public class DownloaderTest {
|
|||
assertEquals(0, resource3.bytesDownloaded());
|
||||
|
||||
// does re-download if size changes
|
||||
var resource4 = new Downloader.ResourceToDownload("resource", url, dest);
|
||||
var resource4 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
|
||||
String newContent = "54321";
|
||||
resources.put(url, newContent.getBytes(StandardCharsets.UTF_8));
|
||||
downloader.downloadIfNecessary(resource4).get();
|
||||
|
|
Ładowanie…
Reference in New Issue