Skip to content

Commit

Permalink
Add GzipParameters.setFileNameCharset(Charset) and getFileNameCharset()
Browse files Browse the repository at this point in the history
  • Loading branch information
garydgregory committed Nov 6, 2024
1 parent 8a86434 commit 132f574
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 28 deletions.
1 change: 1 addition & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ The <action> type attribute can be add,update,fix,remove.
<action type="add" dev="ggregory" due-to="Gary Gregory">Add GzipParameters.setModificationInstant(Instant).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add GzipParameters.OS, setOS(OS), getOS().</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add GzipParameters.toString().</action>
<action type="add" dev="ggregory" due-to="vincexjl, Gary Gregory, Piotr P. Karwasz">Add GzipParameters.setFileNameCharset(Charset) and getFileNameCharset().</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump org.apache.commons:commons-parent from 72 to 78 #563, #567, #574, #582, #587, #595.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump com.github.luben:zstd-jni from 1.5.6-4 to 1.5.6-7 #565, #578, #601.</action>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
Expand Down Expand Up @@ -114,11 +115,9 @@ private void deflate() throws IOException {
public void finish() throws IOException {
if (!deflater.finished()) {
deflater.finish();

while (!deflater.finished()) {
deflate();
}

writeTrailer();
}
}
Expand All @@ -129,13 +128,15 @@ public void finish() throws IOException {
* If the string cannot be encoded directly with {@value GzipUtils#GZIP_ENCODING}, then use URI-style percent encoding.
* </p>
*
* @param string The string to encode.
* @return
* @throws IOException
* @param string The string to encode.
* @param charset Overrides the default charset
* @return bytes encoded with the given charset if non-null, otherwise use {@value GzipUtils#GZIP_ENCODING} or {@link StandardCharsets#US_ASCII} if
* GZIP_ENCODING fails.
* @throws IOException When an ASCII encoded error occurs.
*/
private byte[] getBytes(final String string) throws IOException {
if (GzipUtils.GZIP_ENCODING.newEncoder().canEncode(string)) {
return string.getBytes(GzipUtils.GZIP_ENCODING);
private byte[] getBytes(final String string, final Charset charset) throws IOException {
if (charset.newEncoder().canEncode(string)) {
return string.getBytes(charset);
}
try {
return new URI(null, null, string, null).toASCIIString().getBytes(StandardCharsets.US_ASCII);
Expand Down Expand Up @@ -180,17 +181,29 @@ public void write(final int b) throws IOException {
write(new byte[] { (byte) (b & 0xff) }, 0, 1);
}

/**
* Writes a NUL-terminated String.
*
* @param value The String to write.
* @param parameters Specifies the Charset to use.
* @throws IOException if an I/O error occurs.
*/
private void write(final String value, final GzipParameters parameters) throws IOException {
if (value != null) {
out.write(getBytes(value, parameters.getFileNameCharset()));
out.write(0);
}
}

private void writeHeader(final GzipParameters parameters) throws IOException {
final String fileName = parameters.getFileName();
final String comment = parameters.getComment();

final ByteBuffer buffer = ByteBuffer.allocate(10);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putShort((short) GZIPInputStream.GZIP_MAGIC);
buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate)
buffer.put((byte) ((fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
buffer.putInt((int) (parameters.getModificationTime() / 1000));

// extra flags
final int compressionLevel = parameters.getCompressionLevel();
if (compressionLevel == Deflater.BEST_COMPRESSION) {
Expand All @@ -200,28 +213,17 @@ private void writeHeader(final GzipParameters parameters) throws IOException {
} else {
buffer.put((byte) 0);
}

buffer.put((byte) parameters.getOperatingSystem());

out.write(buffer.array());

if (fileName != null) {
out.write(getBytes(fileName));
out.write(0);
}

if (comment != null) {
out.write(getBytes(comment));
out.write(0);
}
write(fileName, parameters);
write(comment, parameters);
}

private void writeTrailer() throws IOException {
final ByteBuffer buffer = ByteBuffer.allocate(8);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putInt((int) crc.getValue());
buffer.putInt(deflater.getTotalIn());

out.write(buffer.array());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@
package org.apache.commons.compress.compressors.gzip;

import java.io.OutputStream;
import java.nio.charset.Charset;
import java.time.Instant;
import java.util.zip.Deflater;

import org.apache.commons.io.Charsets;

/**
* Parameters for the GZIP compressor.
*
Expand Down Expand Up @@ -287,6 +290,7 @@ public int type() {
*/
private Instant modificationTime = Instant.EPOCH;
private String fileName;
private Charset fileNameCharset = GzipUtils.GZIP_ENCODING;
private String comment;
private OS operatingSystem = OS.UNKNOWN; // Unknown OS by default
private int bufferSize = 512;
Expand Down Expand Up @@ -359,6 +363,19 @@ public String getFileName() {
}


/**
* Gets the Charset to use for writing file names and comments.
* <p>
* The default value is {@link GzipUtils#GZIP_ENCODING}.
* </p>
*
* @return the Charset to use for writing file names and comments.
* @since 1.28.0
*/
public Charset getFileNameCharset() {
return fileNameCharset;
}

/**
* Gets the most recent modification time (MTIME) of the original file being compressed.
*
Expand Down Expand Up @@ -471,6 +488,23 @@ public void setFileName(final String fileName) {
this.fileName = fileName;
}

/**
* Sets the Charset to use for writing file names and comments, where null maps to {@link GzipUtils#GZIP_ENCODING}.
* <p>
* <em>Setting a value other than {@link GzipUtils#GZIP_ENCODING} is not compliant with the <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952
* GZIP File Format Specification</a></em>. Use at your own risk of interoperability issues.
* </p>
* <p>
* The default value is {@link GzipUtils#GZIP_ENCODING}.
* </p>
*
* @param charset the Charset to use for writing file names and comments, null maps to {@link GzipUtils#GZIP_ENCODING}.
* @since 1.28.0
*/
public void setFileNameCharset(final Charset charset) {
this.fileNameCharset = Charsets.toCharset(charset, GzipUtils.GZIP_ENCODING);
}

/**
* Sets the modification time (MTIME) of the compressed file.
*
Expand Down Expand Up @@ -533,7 +567,7 @@ public void setOS(final OS os) {

@Override
public String toString() {
StringBuilder builder = new StringBuilder();
final StringBuilder builder = new StringBuilder();
builder.append("GzipParameters [compressionLevel=").append(compressionLevel).append(", modificationTime=").append(modificationTime)
.append(", fileName=").append(fileName).append(", comment=").append(comment).append(", operatingSystem=").append(operatingSystem)
.append(", bufferSize=").append(bufferSize).append(", deflateStrategy=").append(deflateStrategy).append("]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.apache.commons.compress.compressors.FileNameUtil;

/**
* Utility code for the gzip compression format.
* Utility code for the GZIP compression format.
*
* @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
* @ThreadSafe
Expand Down Expand Up @@ -55,7 +55,7 @@ public class GzipUtils {
}

/**
* Encoding for file name and comments per the <a href="https://tools.ietf.org/html/rfc1952">GZIP File Format Specification</a>
* Encoding for file name and comments per the <a href="https://tools.ietf.org/html/rfc1952">GZIP File Format Specification</a>.
*/
static final Charset GZIP_ENCODING = StandardCharsets.ISO_8859_1;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
Expand All @@ -34,6 +35,42 @@
*/
public class GzipCompressorOutputStreamTest {

private static final String EXPECTED_BASE_NAME = "\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0";
private static final String EXPECTED_FILE_NAME = EXPECTED_BASE_NAME + ".xml";

private void testChineseFileName(final String expected, final String sourceFile, final Charset fileNameCharset) throws IOException {
final Path tempSourceFile = Files.createTempFile(sourceFile, sourceFile);
Files.write(tempSourceFile, "<text>Hello World!</text>".getBytes(StandardCharsets.ISO_8859_1));
final Path targetFile = Files.createTempFile(EXPECTED_BASE_NAME, ".gz");
final GzipParameters parameters = new GzipParameters();
// if your system is Windows with Chinese, and your file name is Chinese, you need set the filenameCharset to GBK
// otherwise your filename is different with use GzipCompressorOutputStream without set GzipParameters
// and the same situation in Linux, need set the filenameCharset to UTF-8
parameters.setFileNameCharset(fileNameCharset);
assertEquals(fileNameCharset, parameters.getFileNameCharset());
parameters.setFileName(EXPECTED_FILE_NAME);
try (OutputStream fos = Files.newOutputStream(targetFile);
GzipCompressorOutputStream gos = new GzipCompressorOutputStream(fos, parameters)) {
Files.copy(tempSourceFile, gos);
}
try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) {
final byte[] fileNameBytes = gis.getMetaData().getFileName().getBytes(Charset.forName("ISO-8859-1"));
final String unicodeFileName = new String(fileNameBytes, fileNameCharset);
assertEquals(expected, unicodeFileName);
}
}

/**
* Tests Chinese Filename for Windows behavior.
*
* @throws IOException When the test fails.
*/
@Test
public void testChineseFileName() throws IOException {
testChineseFileName(EXPECTED_FILE_NAME, EXPECTED_FILE_NAME, StandardCharsets.UTF_8);
testChineseFileName(EXPECTED_FILE_NAME, EXPECTED_FILE_NAME, Charset.forName("GBK"));
}

private void testFileName(final String expected, final String sourceFile) throws IOException {
final Path tempSourceFile = Files.createTempFile(sourceFile, sourceFile);
Files.write(tempSourceFile, "<text>Hello World!</text>".getBytes(StandardCharsets.ISO_8859_1));
Expand Down Expand Up @@ -68,7 +105,6 @@ public void testFileNameAscii() throws IOException {
@Test
public void testFileNameChinesePercentEncoded() throws IOException {
// "Test Chinese name"
testFileName("%E6%B5%8B%E8%AF%95%E4%B8%AD%E6%96%87%E5%90%8D%E7%A7%B0.xml", "\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0.xml");
testFileName("%E6%B5%8B%E8%AF%95%E4%B8%AD%E6%96%87%E5%90%8D%E7%A7%B0.xml", EXPECTED_FILE_NAME);
}

}

0 comments on commit 132f574

Please sign in to comment.