Skip to content

Commit

Permalink
GzipCompressorOutputStream no longer percent-endcodes in US-ASCII a file
Browse files Browse the repository at this point in the history
name or comment that the Charset in
GzipParameters.setFileNameCharset(Charset) cannot encode
  • Loading branch information
garydgregory committed Nov 7, 2024
1 parent ec7bc35 commit d0e5f6c
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 38 deletions.
6 changes: 4 additions & 2 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,16 @@ The <action> type attribute can be add,update,fix,remove.
<action type="add" dev="ggregory" due-to="Gary Gregory">Add GzipParameters.setModificationInstant(Instant).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add GzipParameters.OS, setOS(OS), getOS().</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add GzipParameters.toString().</action>
<action type="add" dev="ggregory" due-to="vincexjl, Gary Gregory, Piotr P. Karwasz">Add GzipParameters.setFileNameCharset(Charset) and getFileNameCharset().</action>
<action type="add" dev="ggregory" issue="COMPRESS-638" due-to="vincexjl, Gary Gregory, Piotr P. Karwasz">Add GzipParameters.setFileNameCharset(Charset) and getFileNameCharset() to override the default ISO-8859-1 Charset #602.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump org.apache.commons:commons-parent from 72 to 78 #563, #567, #574, #582, #587, #595.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump com.github.luben:zstd-jni from 1.5.6-4 to 1.5.6-7 #565, #578, #601.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump org.apache.commons:commons-lang3 from 3.16.0 to 3.17.0 #568.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump commons-io:commons-io from 2.16.1 to 2.17.0 #575.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump com.github.marschall:memoryfilesystem from 2.8.0 to 2.8.1 #577.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump org.ow2.asm:asm from 9.7 to 9.7.1 #586.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump org.ow2.asm:asm from 9.7 to 9.7.1 #586.</action>
<!-- REMOVE -->
<action type="remove" dev="ggregory" issue="COMPRESS-638" due-to="vincexjl, Gary Gregory, Piotr P. Karwasz">GzipCompressorOutputStream no longer percent-endcodes in US-ASCII a file name or comment that the Charset in GzipParameters.setFileNameCharset(Charset) cannot encode.</action>
</release>
<release version="1.27.1" date="2024-08-16" description="This is a feature and maintenance release. Java 8 or later is required.">
<action type="fix" issue="COMPRESS-686" dev="ggregory" due-to="Richard Blank, Gary Gregory">Compression into BZip2 format has unexpected end of file when using a BufferedOutputStream.</action>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,9 @@

import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.GZIPInputStream;
Expand Down Expand Up @@ -122,29 +119,6 @@ public void finish() throws IOException {
}
}

/**
* Gets the bytes encoded in the {@value GzipUtils#GZIP_ENCODING} Charset.
* <p>
* If the string cannot be encoded directly with {@value GzipUtils#GZIP_ENCODING}, then use URI-style percent encoding.
* </p>
*
* @param string The string to encode.
* @param charset Overrides the default charset
* @return bytes encoded with the given charset if non-null, otherwise use {@value GzipUtils#GZIP_ENCODING} or {@link StandardCharsets#US_ASCII} if
* GZIP_ENCODING fails.
* @throws IOException When an ASCII encoded error occurs.
*/
private byte[] getBytes(final String string, final Charset charset) throws IOException {
if (charset.newEncoder().canEncode(string)) {
return string.getBytes(charset);
}
try {
return new URI(null, null, string, null).toASCIIString().getBytes(StandardCharsets.US_ASCII);
} catch (final URISyntaxException e) {
throw new IOException(string, e);
}
}

/**
* {@inheritDoc}
*
Expand All @@ -167,11 +141,9 @@ public void write(final byte[] buffer, final int offset, final int length) throw
}
if (length > 0) {
deflater.setInput(buffer, offset, length);

while (!deflater.needsInput()) {
deflate();
}

crc.update(buffer, offset, length);
}
}
Expand All @@ -182,15 +154,15 @@ public void write(final int b) throws IOException {
}

/**
* Writes a NUL-terminated String.
* Writes a NUL-terminated String encoded with the {@code charset}.
*
* @param value The String to write.
* @param parameters Specifies the Charset to use.
* @param charset Specifies the Charset to use.
* @throws IOException if an I/O error occurs.
*/
private void write(final String value, final GzipParameters parameters) throws IOException {
private void write(final String value, final Charset charset) throws IOException {
if (value != null) {
out.write(getBytes(value, parameters.getFileNameCharset()));
out.write(value.getBytes(charset));
out.write(0);
}
}
Expand All @@ -215,8 +187,8 @@ private void writeHeader(final GzipParameters parameters) throws IOException {
}
buffer.put((byte) parameters.getOperatingSystem());
out.write(buffer.array());
write(fileName, parameters);
write(comment, parameters);
write(fileName, parameters.getFileNameCharset());
write(comment, parameters.getFileNameCharset());
}

private void writeTrailer() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public void testFileNameAscii() throws IOException {
}

/**
* Tests COMPRESS-638.
* Tests COMPRESS-638. Use {@link GzipParameters#setFileNameCharset(Charset)} if you want non-ISO-8859-1 characters.
*
* GZip RFC requires ISO 8859-1 (LATIN-1).
*
Expand All @@ -116,6 +116,6 @@ public void testFileNameAscii() throws IOException {
@Test
public void testFileNameChinesePercentEncoded() throws IOException {
// "Test Chinese name"
testFileName("%E6%B5%8B%E8%AF%95%E4%B8%AD%E6%96%87%E5%90%8D%E7%A7%B0.xml", EXPECTED_FILE_NAME);
testFileName("??????.xml", EXPECTED_FILE_NAME);
}
}

0 comments on commit d0e5f6c

Please sign in to comment.