diff --git a/build.gradle b/build.gradle index e10c7bc..057b3ac 100644 --- a/build.gradle +++ b/build.gradle @@ -1,7 +1,7 @@ plugins { id 'com.github.johnrengelman.shadow' version '8.1.1' id 'java' - id 'com.github.ben-manes.versions' version '0.47.0' + id 'com.github.ben-manes.versions' version '0.48.0' } repositories { @@ -10,7 +10,7 @@ repositories { base { archivesName = 'jortage-poolmgr' - version = '1.4.2' + version = '1.5.0' } compileJava { @@ -36,7 +36,7 @@ dependencies { implementation 'com.squareup.okhttp3:okhttp:4.11.0' implementation 'com.squareup.okhttp3:okhttp-brotli:4.11.0' - implementation 'org.mariadb.jdbc:mariadb-java-client:3.1.4' + implementation 'org.mariadb.jdbc:mariadb-java-client:3.2.0' implementation 'com.zaxxer:HikariCP:5.0.1' implementation 'org.apache.jclouds:jclouds-blobstore:2.5.0' @@ -44,7 +44,7 @@ dependencies { implementation 'org.apache.jclouds.api:filesystem:2.5.0' implementation 'org.apache.jclouds.driver:jclouds-slf4j:2.5.0' - implementation 'org.eclipse.jetty:jetty-server:11.0.15' + implementation 'org.eclipse.jetty:jetty-server:11.0.16' implementation 'org.slf4j:slf4j-api:1.7.36' implementation 'org.slf4j:slf4j-simple:1.7.36' @@ -83,5 +83,6 @@ tasks.named("dependencyUpdates").configure { rejectVersionIf { it.candidate.version.contains("alpha") || it.candidate.version.contains("beta") || (it.candidate.group == 'org.slf4j' && it.candidate.version.startsWith("2.")) + || (it.candidate.group == 'org.eclipse.jetty' && it.candidate.version.startsWith("12.")) } } diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 84a0b92..db9a6b8 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.2.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.3-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/src/main/java/com/jortage/poolmgr/FileFormatUtils.java b/src/main/java/com/jortage/poolmgr/FileFormatUtils.java new file mode 100644 index 0000000..fbd15fa --- /dev/null +++ b/src/main/java/com/jortage/poolmgr/FileFormatUtils.java @@ -0,0 +1,140 @@ +package com.jortage.poolmgr; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import com.jortage.poolmgr.PngSurgeon.CRCException; +import com.jortage.poolmgr.PngSurgeon.Chunk; + +import com.google.common.base.Charsets; +import com.google.common.primitives.Longs; + +public class FileFormatUtils { + + public static void reprocess(InputStream in, OutputStream out) throws IOException { + byte[] magic = new byte[8]; + int count = in.readNBytes(magic, 0, 8); + if (count != 8) { + out.write(magic, 0, count); + in.transferTo(out); + } else if (Longs.fromByteArray(magic) == PngSurgeon.PNG_MAGIC) { + try (var ps = new PngSurgeon(in, out)) { + var baos = new ByteArrayOutputStream(); + byte[] buf = new byte[512]; + outer: while (true) { + int type = ps.readChunkType(); + if (type == Chunk.tIME) { + // useless chunk that destroys dedupe + ps.skipChunkData(); + } else if (type == Chunk.tEXt) { + int len = ps.getChunkLength(); + glass: if (len < 16384) { + byte[] data; + try { + data = ps.readChunkData(); + } catch (CRCException e) { + // uhh, okay. sure, you can enjoy that one + ps.copyChunk(); + break glass; + } + var is = new ByteArrayInputStream(data); + baos.reset(); + while (true) { + String key = readNulString(is, buf, 80); + if (key == null) { + // corrupted tEXt chunk + ps.writeChunk(Chunk.tEXt, data); + continue outer; + } else if (key.isEmpty()) { + // EOS + break; + } + boolean copy; + switch (key) { + case "date:timestamp": + case "date:modify": + case "date:create": + // useless entries that destroy dedupe + // (create is the closest to useful, but imagemagick will inject it in files that are missing a timestamp) + copy = false; + break; + default: + copy = true; + break; + } + if (copy) { + baos.write(key.getBytes(Charsets.ISO_8859_1)); + baos.write(0); + transferNulBytes(is, buf, baos); + baos.write(0); + } else { + skipNulBytes(is, buf); + } + } + if (baos.size() != 0) { + ps.writeChunk(Chunk.tEXt, baos); + } + } else { + // alright have fun with that + ps.copyChunk(); + } + } else { + ps.copyChunk(); + if (type == Chunk.IEND) break; + } + } + } + } else { + out.write(magic, 0, count); + in.transferTo(out); + } + } + + private static int readNulBytes(ByteArrayInputStream is, byte[] buf, int limit) { + is.mark(limit); + int count = is.readNBytes(buf, 0, limit); + if (count == 0) return 0; + int delimIdx = -1; + for (int i = 0; i < count; i++) { + if (buf[i] == 0) { + delimIdx = i; + break; + } + } + is.reset(); + is.skip(delimIdx+1); + return delimIdx; + } + + private static void transferNulBytes(ByteArrayInputStream in, byte[] buf, OutputStream out) throws IOException { + while (true) { + int len = readNulBytes(in, buf, buf.length); + if (len == 0) break; + if (len == -1) { + out.write(buf); + in.skip(buf.length); + } else { + out.write(buf, 0, len); + break; + } + } + } + + private static void skipNulBytes(ByteArrayInputStream in, byte[] buf) throws IOException { + while (true) { + if (readNulBytes(in, buf, buf.length) != -1) break; + in.skip(buf.length); + } + } + + private static String readNulString(ByteArrayInputStream is, byte[] buf, int limit) { + int len = readNulBytes(is, buf, limit); + if (len == 0) return ""; + if (len == -1) return null; + return new String(buf, 0, len, Charsets.ISO_8859_1); + } + +} diff --git a/src/main/java/com/jortage/poolmgr/JortageBlobStore.java b/src/main/java/com/jortage/poolmgr/JortageBlobStore.java index 96bcc70..e247ff1 100644 --- a/src/main/java/com/jortage/poolmgr/JortageBlobStore.java +++ b/src/main/java/com/jortage/poolmgr/JortageBlobStore.java @@ -213,7 +213,7 @@ public class JortageBlobStore extends ForwardingBlobStore { try (InputStream is = blob.getPayload().openStream(); FileOutputStream fos = new FileOutputStream(f)) { HashingOutputStream hos = new HashingOutputStream(Hashing.sha512(), fos); - ByteStreams.copy(is, hos); + FileFormatUtils.reprocess(is, hos); hash = hos.hash(); } String hashString = hash.toString(); @@ -314,7 +314,7 @@ public class JortageBlobStore extends ForwardingBlobStore { try (InputStream stream = delegate().getBlob(mpu.containerName(), mpu.blobName()).getPayload().openStream()) { CountingOutputStream counter = new CountingOutputStream(ByteStreams.nullOutputStream()); HashingOutputStream hos = new HashingOutputStream(Hashing.sha512(), counter); - ByteStreams.copy(stream, hos); + FileFormatUtils.reprocess(stream, hos); HashCode hash = hos.hash(); String hashStr = hash.toString(); String path = Poolmgr.hashToPath(hashStr); diff --git a/src/main/java/com/jortage/poolmgr/PngSurgeon.java b/src/main/java/com/jortage/poolmgr/PngSurgeon.java new file mode 100644 index 0000000..71e4e5b --- /dev/null +++ b/src/main/java/com/jortage/poolmgr/PngSurgeon.java @@ -0,0 +1,149 @@ +package com.jortage.poolmgr; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Locale; +import java.util.zip.CRC32; +import java.util.zip.CheckedOutputStream; + +import com.google.common.base.Charsets; +import com.google.common.io.ByteStreams; +import com.google.common.primitives.Ints; + +public class PngSurgeon implements Closeable { + + public static class CRCException extends IOException { + public CRCException(String msg) { super(msg); } + } + + public static final class Chunk { + public static final int IHDR = fourcc("IHDR"); + public static final int PLTE = fourcc("PLTE"); + public static final int IDAT = fourcc("IDAT"); + public static final int IEND = fourcc("IEND"); + public static final int tRNS = fourcc("tRNS"); + public static final int cHRM = fourcc("cHRM"); + public static final int gAMA = fourcc("gAMA"); + public static final int iCCP = fourcc("iCCP"); + public static final int sBIT = fourcc("sBIT"); + public static final int sRGB = fourcc("sRGB"); + public static final int cICP = fourcc("cICP"); + public static final int mDCv = fourcc("mDCv"); + public static final int cLLi = fourcc("cLLi"); + public static final int tEXt = fourcc("tEXt"); + public static final int zTXt = fourcc("zTXt"); + public static final int iTXt = fourcc("iTXt"); + public static final int bKGD = fourcc("bKGD"); + public static final int hIST = fourcc("hIST"); + public static final int pHYs = fourcc("pHYs"); + public static final int sPLT = fourcc("sPLT"); + public static final int eXIf = fourcc("eXIf"); + public static final int tIME = fourcc("tIME"); + public static final int acTL = fourcc("acTL"); + public static final int fcTL = fourcc("fcTL"); + public static final int fdAT = fourcc("fdAT"); + } + + public static final long PNG_MAGIC = 0x89504E470D0A1A0AL; + + private final DataInputStream in; + private final DataOutputStream out, crcOut; + private final CRC32 crc = new CRC32(); + + private int chunkLength = -1; + private int chunkType; + + public PngSurgeon(InputStream in, OutputStream out) throws IOException { + this.in = new DataInputStream(new BufferedInputStream(in)); + OutputStream bout = new BufferedOutputStream(out); + this.out = new DataOutputStream(bout); + this.crcOut = new DataOutputStream(new CheckedOutputStream(bout, crc)); + } + + public int readChunkType() throws IOException { + if (chunkLength != -1) throw new IllegalStateException("Current chunk has not been processed"); + chunkLength = in.readInt(); + chunkType = in.readInt(); + return chunkType; + } + + public int getChunkLength() { + if (chunkLength == -1) throw new IllegalStateException("Data has already been read or no chunk has been read yet"); + return chunkLength; + } + + public byte[] readChunkData() throws IOException { + if (chunkLength == -1) throw new IllegalStateException("Data has already been read or no chunk has been read yet"); + byte[] data = new byte[chunkLength]; + chunkLength = -1; + in.readFully(data); + crc.reset(); + crc.update(Ints.toByteArray(chunkType)); + crc.update(data); + int actual = in.readInt(); + int expected = (int)crc.getValue(); + if (actual != expected) { + throw new CRCException("Bad CRC ("+toHexString(actual)+" != "+toHexString(expected)+")"); + } + return data; + } + + public void skipChunkData() throws IOException { + if (chunkLength == -1) throw new IllegalStateException("Data has already been read or no chunk has been read yet"); + in.skipBytes(chunkLength+4); + chunkLength = -1; + } + + public void copyChunk() throws IOException { + if (chunkLength == -1) throw new IllegalStateException("Data has already been read or no chunk has been read yet"); + out.writeInt(chunkLength); + out.writeInt(chunkType); + ByteStreams.limit(in, chunkLength+4).transferTo(out); + chunkLength = -1; + } + + public void writeChunk(int chunkType, byte[] data) throws IOException { + out.writeInt(data.length); + crc.reset(); + crcOut.writeInt(chunkType); + crcOut.write(data); + out.writeInt((int)crc.getValue()); + } + + public void writeChunk(int chunkType, ByteArrayOutputStream data) throws IOException { + out.writeInt(data.size()); + crc.reset(); + crcOut.writeInt(chunkType); + data.writeTo(crcOut); + out.writeInt((int)crc.getValue()); + } + + public void writeEmptyChunk(int chunkType) throws IOException { + out.writeInt(0); + crc.reset(); + crcOut.writeInt(chunkType); + out.writeInt((int)crc.getValue()); + } + + @Override + public void close() throws IOException { + in.close(); + out.close(); + } + + private static int fourcc(String str) { + return Ints.fromByteArray(str.getBytes(Charsets.ISO_8859_1)); + } + + private static String toHexString(int i) { + return Long.toHexString(((i)&0xFFFFFFFFL)|0xF00000000L).substring(1).toUpperCase(Locale.ROOT); + } + +} diff --git a/src/main/java/com/jortage/poolmgr/RivetHandler.java b/src/main/java/com/jortage/poolmgr/RivetHandler.java index 5720941..c0a17e2 100644 --- a/src/main/java/com/jortage/poolmgr/RivetHandler.java +++ b/src/main/java/com/jortage/poolmgr/RivetHandler.java @@ -136,7 +136,7 @@ public final class RivetHandler extends AbstractHandler { OutputStream sinkOut = bss.getSink().openStream(); HashingOutputStream hos = new HashingOutputStream(Hashing.sha512(), sinkOut); try (InputStream in = getRes.body().byteStream()) { - ByteStreams.copy(in, hos); + FileFormatUtils.reprocess(in, hos); } hos.close(); HashCode hash = hos.hash();