Follow redirects from downloader (#23)

Fixes #21
pull/24/head
Michael Barry 2021-11-29 07:41:57 -05:00 zatwierdzone przez GitHub
rodzic 54ab067378
commit 690d87f53e
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
2 zmienionych plików z 66 dodań i 34 usunięć

Wyświetl plik

@ -1,9 +1,6 @@
package com.onthegomap.flatmap.util;
import static com.google.common.net.HttpHeaders.ACCEPT_RANGES;
import static com.google.common.net.HttpHeaders.CONTENT_LENGTH;
import static com.google.common.net.HttpHeaders.RANGE;
import static com.google.common.net.HttpHeaders.USER_AGENT;
import static com.google.common.net.HttpHeaders.*;
import static java.nio.file.StandardOpenOption.WRITE;
import com.onthegomap.flatmap.config.FlatmapConfig;
@ -28,6 +25,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
@ -63,10 +61,13 @@ import org.slf4j.LoggerFactory;
@SuppressWarnings("UnusedReturnValue")
public class Downloader {
private static final int MAX_REDIRECTS = 5;
private static final Logger LOGGER = LoggerFactory.getLogger(Downloader.class);
private final FlatmapConfig config;
private final List<ResourceToDownload> toDownloadList = new ArrayList<>();
private final HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NORMAL).build();
private final HttpClient client = HttpClient.newBuilder()
// explicitly follow redirects to capture final redirect url
.followRedirects(HttpClient.Redirect.NEVER).build();
private final ExecutorService executor;
private final Stats stats;
private final long chunkSizeBytes;
@ -89,12 +90,6 @@ public class Downloader {
return new Downloader(config, stats, config.downloadChunkSizeMB() * 1_000_000L);
}
private static void assertOK(HttpResponse.ResponseInfo responseInfo) {
if (responseInfo.statusCode() != 200) {
throw new IllegalStateException("Bad response: " + responseInfo.statusCode());
}
}
private static URLConnection getUrlConnection(String urlString, FlatmapConfig config) throws IOException {
var url = new URL(urlString);
var connection = url.openConnection();
@ -185,7 +180,7 @@ public class Downloader {
CompletableFuture<?> downloadIfNecessary(ResourceToDownload resourceToDownload) {
long existingSize = FileUtils.size(resourceToDownload.output);
return httpHead(resourceToDownload)
return httpHeadFollowRedirects(resourceToDownload.url, 0)
.whenComplete((metadata, err) -> {
if (metadata != null) {
resourceToDownload.metadata.complete(metadata);
@ -198,7 +193,10 @@ public class Downloader {
LOGGER.info("Skipping " + resourceToDownload.id + ": " + resourceToDownload.output + " already up-to-date");
return CompletableFuture.completedFuture(null);
} else {
LOGGER.info("Downloading " + resourceToDownload.url + " to " + resourceToDownload.output);
String redirectInfo = metadata.canonicalUrl.equals(resourceToDownload.url)
? ""
: " (redirected to " + metadata.canonicalUrl + ")";
LOGGER.info("Downloading " + resourceToDownload.url + redirectInfo + " to " + resourceToDownload.output);
FileUtils.delete(resourceToDownload.output);
FileUtils.createParentDirectories(resourceToDownload.output);
Path tmpPath = resourceToDownload.tmpPath();
@ -225,15 +223,35 @@ public class Downloader {
}, executor);
}
CompletableFuture<ResourceMetadata> httpHead(ResourceToDownload resourceToDownload) {
private CompletableFuture<ResourceMetadata> httpHeadFollowRedirects(String url, int redirects) {
if (redirects > MAX_REDIRECTS) {
throw new IllegalStateException("Exceeded " + redirects + " redirects for " + url);
}
return httpHead(url).thenComposeAsync(response -> response.redirect.isPresent()
? httpHeadFollowRedirects(response.redirect.get(), redirects + 1)
: CompletableFuture.completedFuture(response));
}
CompletableFuture<ResourceMetadata> httpHead(String url) {
return client
.sendAsync(newHttpRequest(resourceToDownload.url).method("HEAD", HttpRequest.BodyPublishers.noBody()).build(),
.sendAsync(newHttpRequest(url).method("HEAD", HttpRequest.BodyPublishers.noBody()).build(),
responseInfo -> {
assertOK(responseInfo);
int status = responseInfo.statusCode();
Optional<String> location = Optional.empty();
long contentLength = 0;
HttpHeaders headers = responseInfo.headers();
long contentLength = headers.firstValueAsLong(CONTENT_LENGTH).orElseThrow();
if (status >= 300 && status < 400) {
location = responseInfo.headers().firstValue(LOCATION);
if (location.isEmpty()) {
throw new IllegalStateException("Received " + status + " but no location header from " + url);
}
} else if (responseInfo.statusCode() != 200) {
throw new IllegalStateException("Bad response: " + responseInfo.statusCode());
} else {
contentLength = headers.firstValueAsLong(CONTENT_LENGTH).orElseThrow();
}
boolean supportsRangeRequest = headers.allValues(ACCEPT_RANGES).contains("bytes");
ResourceMetadata metadata = new ResourceMetadata(contentLength, supportsRangeRequest);
ResourceMetadata metadata = new ResourceMetadata(location, url, contentLength, supportsRangeRequest);
return HttpResponse.BodyHandlers.replacing(metadata).apply(responseInfo);
}).thenApply(HttpResponse::body);
}
@ -249,6 +267,7 @@ public class Downloader {
* But it is slower on large files
*/
return resource.metadata.thenCompose(metadata -> {
String canonicalUrl = metadata.canonicalUrl;
record Range(long start, long end) {
long size() {
@ -275,8 +294,8 @@ public class Downloader {
while (range.size() > 0) {
try (
var inputStream = (ranges || range.start > 0)
? openStreamRange(resource.url, range.start, range.end)
: openStream(resource.url);
? openStreamRange(canonicalUrl, range.start, range.end)
: openStream(canonicalUrl);
var input = new ProgressChannel(Channels.newChannel(inputStream), resource.progress);
) {
// ensure this file has been allocated up to the start of this block
@ -284,10 +303,10 @@ public class Downloader {
fileChannel.position(range.start);
long transferred = fileChannel.transferFrom(input, range.start, range.size());
if (transferred == 0) {
throw new IOException("Transferred 0 bytes but " + range.size() + " expected: " + resource.url);
throw new IOException("Transferred 0 bytes but " + range.size() + " expected: " + canonicalUrl);
} else if (transferred != range.size() && !metadata.acceptRange) {
throw new IOException(
"Transferred " + transferred + " bytes but " + range.size() + " expected: " + resource.url
"Transferred " + transferred + " bytes but " + range.size() + " expected: " + canonicalUrl
+ " and server does not support range requests");
}
range = new Range(range.start + transferred, range.end);
@ -306,7 +325,7 @@ public class Downloader {
.header(USER_AGENT, config.httpUserAgent());
}
static record ResourceMetadata(long size, boolean acceptRange) {}
static record ResourceMetadata(Optional<String> redirect, String canonicalUrl, long size, boolean acceptRange) {}
static record ResourceToDownload(
String id, String url, Path output, CompletableFuture<ResourceMetadata> metadata, AtomicLong progress

Wyświetl plik

@ -14,6 +14,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
@ -54,36 +55,48 @@ public class DownloaderTest {
}
@Override
CompletableFuture<ResourceMetadata> httpHead(ResourceToDownload resource) {
byte[] bytes = resources.get(resource.url());
return CompletableFuture.supplyAsync(() -> new ResourceMetadata(bytes.length, supportsRange));
CompletableFuture<ResourceMetadata> httpHead(String url) {
String[] parts = url.split("#");
if (parts.length > 1) {
int redirectNum = Integer.parseInt(parts[1]);
String next = redirectNum <= 1 ? parts[0] : (parts[0] + "#" + (redirectNum - 1));
return CompletableFuture.supplyAsync(
() -> new ResourceMetadata(Optional.of(next), url, 0, supportsRange));
}
byte[] bytes = resources.get(url);
return CompletableFuture.supplyAsync(
() -> new ResourceMetadata(Optional.empty(), url, bytes.length, supportsRange));
}
};
}
@ParameterizedTest
@CsvSource({
"false,100",
"true,100",
"true,2",
"false,100,0",
"true,100,0",
"true,2,0",
"false,100,1",
"false,100,2",
"true,2,4",
})
public void testDownload(boolean range, int maxLength) throws Exception {
public void testDownload(boolean range, int maxLength, int redirects) throws Exception {
Path dest = path.resolve("out");
String string = "0123456789";
String url = "http://url";
String initialUrl = url + (redirects > 0 ? "#" + redirects : "");
Map<String, byte[]> resources = new ConcurrentHashMap<>();
byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
Downloader downloader = mockDownloader(resources, range, maxLength);
// fails if no data
var resource1 = new Downloader.ResourceToDownload("resource", url, dest);
var resource1 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
assertThrows(ExecutionException.class, () -> downloader.downloadIfNecessary(resource1).get());
assertFalse(Files.exists(dest));
assertEquals(0, resource1.bytesDownloaded());
// succeeds with data
var resource2 = new Downloader.ResourceToDownload("resource", url, dest);
var resource2 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
resources.put(url, bytes);
downloader.downloadIfNecessary(resource2).get();
assertEquals(string, Files.readString(dest));
@ -92,7 +105,7 @@ public class DownloaderTest {
// does not re-request if size is the same
downloads = 0;
var resource3 = new Downloader.ResourceToDownload("resource", url, dest);
var resource3 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
downloader.downloadIfNecessary(resource3).get();
assertEquals(0, downloads);
assertEquals(string, Files.readString(dest));
@ -100,7 +113,7 @@ public class DownloaderTest {
assertEquals(0, resource3.bytesDownloaded());
// does re-download if size changes
var resource4 = new Downloader.ResourceToDownload("resource", url, dest);
var resource4 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
String newContent = "54321";
resources.put(url, newContent.getBytes(StandardCharsets.UTF_8));
downloader.downloadIfNecessary(resource4).get();