From dce910d23b4be4093eaa92fc7cb51419fd4c9cad Mon Sep 17 00:00:00 2001 From: Wing Yew Poon Date: Tue, 1 Oct 2024 21:24:02 -0700 Subject: [PATCH 1/5] Fix indexing in dictionary encoded Parquet readers. --- ...dDictionaryEncodedParquetValuesReader.java | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java index 55f1d3fd7908..c4098e9de7a0 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java @@ -59,14 +59,10 @@ public void nextBatch( } int numValues = Math.min(left, currentCount); for (int i = 0; i < numValues; i++) { - int index = idx * typeWidth; - if (typeWidth == -1) { - index = idx; - } if (Mode.RLE.equals(mode)) { - nextVal(vector, dict, index, currentValue, typeWidth); + nextVal(vector, dict, idx, currentValue, typeWidth); } else if (Mode.PACKED.equals(mode)) { - nextVal(vector, dict, index, packedValuesBuffer[packedValuesBufferIdx++], typeWidth); + nextVal(vector, dict, idx, packedValuesBuffer[packedValuesBufferIdx++], typeWidth); } nullabilityHolder.setNotNull(idx); if (setArrowValidityVector) { @@ -95,7 +91,7 @@ class LongDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setLong(idx, dict.decodeToLong(currentVal)); + vector.getDataBuffer().setLong(((long) idx) * typeWidth, dict.decodeToLong(currentVal)); } } @@ -103,7 +99,9 @@ class TimestampMillisDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setLong(idx, dict.decodeToLong(currentVal) * 1000); + vector + .getDataBuffer() + .setLong(((long) idx) * typeWidth, dict.decodeToLong(currentVal) * 1000); } } @@ -114,7 +112,7 @@ protected void nextVal( ByteBuffer buffer = dict.decodeToBinary(currentVal).toByteBuffer().order(ByteOrder.LITTLE_ENDIAN); long timestampInt96 = ParquetUtil.extractTimestampInt96(buffer); - vector.getDataBuffer().setLong(idx, timestampInt96); + vector.getDataBuffer().setLong(((long) idx) * typeWidth, timestampInt96); } } @@ -122,7 +120,7 @@ class IntegerDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setInt(idx, dict.decodeToInt(currentVal)); + vector.getDataBuffer().setInt(((long) idx) * typeWidth, dict.decodeToInt(currentVal)); } } @@ -130,7 +128,7 @@ class FloatDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setFloat(idx, dict.decodeToFloat(currentVal)); + vector.getDataBuffer().setFloat(((long) idx) * typeWidth, dict.decodeToFloat(currentVal)); } } @@ -138,7 +136,7 @@ class DoubleDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setDouble(idx, dict.decodeToDouble(currentVal)); + vector.getDataBuffer().setDouble(((long) idx) * typeWidth, dict.decodeToDouble(currentVal)); } } @@ -147,7 +145,7 @@ class FixedWidthBinaryDictEncodedReader extends BaseDictEncodedReader { protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { ByteBuffer buffer = dict.decodeToBinary(currentVal).toByteBuffer(); - vector.getDataBuffer().setBytes(idx, buffer); + vector.getDataBuffer().setBytes(((long) idx) * typeWidth, buffer); } } From 60a596f0a6950a39a5e0c406ca57746253c5b76a Mon Sep 17 00:00:00 2001 From: Wing Yew Poon Date: Sat, 5 Oct 2024 12:17:46 -0700 Subject: [PATCH 2/5] Add test. --- .../iceberg/spark/data/TestHelpers.java | 15 ++++ ...rquetDictionaryEncodedVectorizedReads.java | 65 ++++++++++++++++++ .../decimal_dict_and_plain_encoding.parquet | Bin 0 -> 3685 bytes 3 files changed, 80 insertions(+) create mode 100644 spark/v3.5/spark/src/test/resources/decimal_dict_and_plain_encoding.parquet diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java index c73ef630ac48..d3cfebbaf578 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java @@ -116,6 +116,21 @@ public static void assertEqualsBatch( } } + public static void assertEqualsBatchWithRows( + Types.StructType struct, Iterator expected, ColumnarBatch batch) { + for (int rowId = 0; rowId < batch.numRows(); rowId++) { + List fields = struct.fields(); + InternalRow row = batch.getRow(rowId); + Row expectedRow = expected.next(); + for (int i = 0; i < fields.size(); i += 1) { + Type fieldType = fields.get(i).type(); + Object expectedValue = expectedRow.get(i); + Object actualValue = row.isNullAt(i) ? null : row.get(i, convert(fieldType)); + assertEqualsUnsafe(fieldType, expectedValue, actualValue); + } + } + } + private static void assertEqualsSafe(Types.ListType list, Collection expected, List actual) { Type elementType = list.elementType(); List expectedElements = Lists.newArrayList(expected); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java index eeed9d1a03ce..3f3d2ea13831 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java @@ -23,8 +23,14 @@ import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Iterator; +import java.util.List; import org.apache.avro.generic.GenericData; +import org.apache.iceberg.Files; import org.apache.iceberg.Schema; +import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.base.Function; @@ -33,11 +39,35 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.spark.data.RandomData; +import org.apache.iceberg.spark.data.TestHelpers; +import org.apache.iceberg.spark.data.vectorized.VectorizedSparkParquetReaders; +import org.apache.iceberg.types.Types; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.vectorized.ColumnarBatch; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; public class TestParquetDictionaryEncodedVectorizedReads extends TestParquetVectorizedReads { + protected static SparkSession spark = null; + + @BeforeAll + public static void startSpark() { + spark = SparkSession.builder().master("local[2]").getOrCreate(); + } + + @AfterAll + public static void stopSpark() { + if (spark != null) { + spark.stop(); + spark = null; + } + } + @Override Iterable generateData( Schema schema, @@ -93,4 +123,39 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { true, BATCH_SIZE); } + + @Test + public void testDecimalNotAllPagesDictionaryEncoded() throws Exception { + Schema schema = new Schema(Types.NestedField.required(1, "id", Types.DecimalType.of(38, 0))); + Path path = + Paths.get( + getClass() + .getClassLoader() + .getResource("decimal_dict_and_plain_encoding.parquet") + .toURI()); + + Dataset df = spark.read().parquet(path.toString()); + List expected = df.collectAsList(); + long expectedSize = df.count(); + + Parquet.ReadBuilder readBuilder = + Parquet.read(Files.localInput(path.toFile())) + .project(schema) + .createBatchedReaderFunc( + type -> + VectorizedSparkParquetReaders.buildReader( + schema, type, ImmutableMap.of(), null)); + + try (CloseableIterable batchReader = readBuilder.build()) { + Iterator expectedIter = expected.iterator(); + Iterator batches = batchReader.iterator(); + int numRowsRead = 0; + while (batches.hasNext()) { + ColumnarBatch batch = batches.next(); + numRowsRead += batch.numRows(); + TestHelpers.assertEqualsBatchWithRows(schema.asStruct(), expectedIter, batch); + } + assertThat(numRowsRead).isEqualTo(expectedSize); + } + } } diff --git a/spark/v3.5/spark/src/test/resources/decimal_dict_and_plain_encoding.parquet b/spark/v3.5/spark/src/test/resources/decimal_dict_and_plain_encoding.parquet new file mode 100644 index 0000000000000000000000000000000000000000..48b3bd1bf24f13150e2e283bf61f5d4776c3754a GIT binary patch literal 3685 zcmd6qd2~}%9>-tuZt`xEv<-!nq@;^o>6Ruby``mT`%+rcjV5K6tp#eKGlfu=0%a)( zRAB~!tm0M?1=*&`fFKCP3dq433wj2IL8gf4IG_U#=n<4N_xo_pnWOySpY#66=lz!Z zyZi6=;+3l%CSq!JnjZA2Fa<=D5GDw%7D2G71ZwNQ)fOrk1XJxnr%|BNK_4>;#P!2{NXlWm^%kBUmVLpGMubKMf)%R3jTR1T`)!6{tyUK4+Zqi z`vLjKaX?(+XIP>WDG;jFUjilNK|p9r8xRqZ1^6bG0IoYn0C8~vP~AHiFfUF6k}lr^ zq}Y)_@>g08l75v1RMd3$4Gd_&hRe_y>RF9Q4XqQQ3(d`kuDF94)im4o4s=Cl%m|^2 z8VzKg90J6jqd;n9F;KZk0*u#o0Md8=0BqCZfb@^tK{Hwxg z=d*tE8Z5C}S&fP$oMm1$4cb)7Eu?BR*1PKXG(HSs+|O>QXvVjnVoaW|zd_Itzl^5l z3U*6PLr3?9X~+&0Oc7UR0Q%uc@X$TSI@KgZWUYj6)n0V#cv%exx*!s8RR@5SlD`AS zq?dt=70rNnu@6wKJ^=V8MgWm}@;Hb-zzVgrXhtTwcB%gaT~!NK~Eod$f34M0Dk9B_1HaFFO5tX)l`;?vQC{l+5b zbd5|<5$AjsBNBZv-X$Mv*U;1()16)(;H`@Y5ci653S(EZ8V#kMMg)*lFT#|6X(`a_ zQYFGwS=cR+->rCfB%GZKUHmo1g_42Ka~&BVGeN^Q>Nte#`^>H(kDX?%{IFTaV}7NZ zSdW@-_!0!jS!73m3>k;tcw`Kr?5gw^!u4tBoR40 zFdfiqveVBO#?N|0T7148%{^c5L-VwX+*=~q{~A=$88@H`apRbbO9DA$K2e9j)H}E=-}htX)O2&#-op`l4RJ z+BS{5!^yrgyM zvL~0XSlRZ})2mkh?)Pif2G%{Ze#6F1o3}i>^||L?c=4t7m$$vL{ngiY?0kLK?mc_= z{h{NHH~0VXz`?f;9e(@B(RYp=?>zBt*Lx>Vo#uVeErO5%2J_!d&} z$wr``7Po?yEHMCms+9649yLM=V@kS}I^rSO2PzdS@mdZf{U#;(W-eO|DeJx)FvN@p zd}A_!YVtf#O4b57v>UM9_W|WIm3k>H8xAS|m{Lo79V>-2bcB-YQU@w|DaE)ON=M;! zK!0ik5a<5@aNbi2W%ckAkcxK{a**)jJQ>lxn?J$9*WN?^q4`1KfT30?ZY!1IHl%9| z+}Rh*t|HzG%*zw;swiA1$3dk&%5MkI$l9gcPAOg~B~^cOKsof^lL61FUC5w~%7=le zk8%t9u0*mdzo0??2FGFe#Xe99Y4C#tNF_%`0zS_LAY|}hU~qCBpl`bgBzTSjnX{E5 zkmss~)ccZ`gLqu5hgXDoW#}M#Zz>;wP{#Wtp@)&;gFj#$Y8p}SF)p@lffL_9?n#%O zy?EjO*oHXK{oVws4qJhW&y@OayLb}82TkDFho-M+KyY#I9e50A|0_&!!&o<=hG83E zN^H)DDXjS_AcYSDjMvWtIV~2zbLbLKNxuN3x>FeFGUV6NYmtcI8-w~tf+-6KB(shu zbE4nGIz-|y@?t<-!1% zU3xE6lk6Nn*OB5K%&w*e|2PN{yO~kLk5LeWgdof)^ZY6_r3rZhQY4j@T4fUu>T z5ieyUi_wy_n=GIq;SH=^BECUPkZ98D5h!+vPppJ3Mz3g3lL(O zixXD%LLyYQ0#?YY=z=OlkxE#hhPsl{xIn{_dN7Wm>%uS&8j8W&e_)*&>Ymh!rn&`) zqd(9IZ|{M6c=zjzO~X;*D`Vb!1X0r>m|w>?cMbfc@K9{TZRVxa)jSibq=|O4aXayW zqf23eo@C8nenNv=wmJ)W*%`(HYEo`zc9E2sSr0z|+qT0nC=X+ox|DB0DSio}RPJLn zl=kmhgvgGA>!B<9kVnHEwM)cnXMrGU3r^wzXu9_)Zl}3}@$Eti+E|B%SZ6aoKS*u- zbAiPD1^ZTV1Y2NmTu;1cx0*sMS%TJRF{`IdF{#7bwc+O*!n^H>J57o7S5&!2CG20; z!g*zaC6E6r5lm`<|NI0amjuB?ENNMSSvza;)Vju6IE0vDOlpiLLC0My(#GMJd-B0q ztQPD4#i+4L?b<}jm#U8-SgdA)TwPr?D)-?m%zIQBcrxo{{`m02pK@7lmfiJoi7eO4 zMT$vwyJfi&#v)lR1-blRa;X@q5%}S6ob_n$K^*@0;Qa6@_9&iiMX$U*)FYJJ|M){; zh|*q`Ic2%*VgGn_3Fa7oZPUy-b+gka`~B&)O-(bJW9HU1&6+l2dW<96X1CcbRx96u MJt{%)@;{jW0iIk85dZ)H literal 0 HcmV?d00001 From 2c826487541cb94d284c4449bdc91497af6bbd94 Mon Sep 17 00:00:00 2001 From: Wing Yew Poon Date: Sun, 6 Oct 2024 16:12:59 -0700 Subject: [PATCH 3/5] Add explanatory comment for test. --- .../TestParquetDictionaryEncodedVectorizedReads.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java index 3f3d2ea13831..18c5700252ca 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java @@ -124,6 +124,10 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { BATCH_SIZE); } + /** + * decimal_dict_and_plain_encoding.parquet contains one column chunk of decimal(38, 0) data in two + * pages, one RLE dictionary encoded and one plain encoded, each with 200 rows. + */ @Test public void testDecimalNotAllPagesDictionaryEncoded() throws Exception { Schema schema = new Schema(Types.NestedField.required(1, "id", Types.DecimalType.of(38, 0))); From 70c4433b9168c50bbd6f38ce66d2ddeeee55d604 Mon Sep 17 00:00:00 2001 From: Wing Yew Poon Date: Mon, 7 Oct 2024 16:40:02 -0700 Subject: [PATCH 4/5] Add a test for varbinary. --- ...dDictionaryEncodedParquetValuesReader.java | 16 ++++++------- ...rquetDictionaryEncodedVectorizedReads.java | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java index c4098e9de7a0..56096f3f094d 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedDictionaryEncodedParquetValuesReader.java @@ -91,7 +91,7 @@ class LongDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setLong(((long) idx) * typeWidth, dict.decodeToLong(currentVal)); + vector.getDataBuffer().setLong((long) idx * typeWidth, dict.decodeToLong(currentVal)); } } @@ -99,9 +99,7 @@ class TimestampMillisDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector - .getDataBuffer() - .setLong(((long) idx) * typeWidth, dict.decodeToLong(currentVal) * 1000); + vector.getDataBuffer().setLong((long) idx * typeWidth, dict.decodeToLong(currentVal) * 1000); } } @@ -112,7 +110,7 @@ protected void nextVal( ByteBuffer buffer = dict.decodeToBinary(currentVal).toByteBuffer().order(ByteOrder.LITTLE_ENDIAN); long timestampInt96 = ParquetUtil.extractTimestampInt96(buffer); - vector.getDataBuffer().setLong(((long) idx) * typeWidth, timestampInt96); + vector.getDataBuffer().setLong((long) idx * typeWidth, timestampInt96); } } @@ -120,7 +118,7 @@ class IntegerDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setInt(((long) idx) * typeWidth, dict.decodeToInt(currentVal)); + vector.getDataBuffer().setInt((long) idx * typeWidth, dict.decodeToInt(currentVal)); } } @@ -128,7 +126,7 @@ class FloatDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setFloat(((long) idx) * typeWidth, dict.decodeToFloat(currentVal)); + vector.getDataBuffer().setFloat((long) idx * typeWidth, dict.decodeToFloat(currentVal)); } } @@ -136,7 +134,7 @@ class DoubleDictEncodedReader extends BaseDictEncodedReader { @Override protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { - vector.getDataBuffer().setDouble(((long) idx) * typeWidth, dict.decodeToDouble(currentVal)); + vector.getDataBuffer().setDouble((long) idx * typeWidth, dict.decodeToDouble(currentVal)); } } @@ -145,7 +143,7 @@ class FixedWidthBinaryDictEncodedReader extends BaseDictEncodedReader { protected void nextVal( FieldVector vector, Dictionary dict, int idx, int currentVal, int typeWidth) { ByteBuffer buffer = dict.decodeToBinary(currentVal).toByteBuffer(); - vector.getDataBuffer().setBytes(((long) idx) * typeWidth, buffer); + vector.getDataBuffer().setBytes((long) idx * typeWidth, buffer); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java index 18c5700252ca..9743c50c7117 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.spark.data.parquet.vectorized; +import static org.apache.iceberg.TableProperties.PARQUET_DICT_SIZE_BYTES; +import static org.apache.iceberg.TableProperties.PARQUET_PAGE_ROW_LIMIT; import static org.apache.iceberg.TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT; import static org.assertj.core.api.Assertions.assertThat; @@ -124,6 +126,27 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { BATCH_SIZE); } + @Test + public void testBinaryNotAllPagesDictionaryEncoded() throws IOException { + Schema schema = new Schema(Types.NestedField.required(1, "bytes", Types.BinaryType.get())); + File parquetFile = File.createTempFile("junit", null, temp.toFile()); + assertThat(parquetFile.delete()).as("Delete should succeed").isTrue(); + + Iterable records = RandomData.generateFallbackData(schema, 500, 0L, 100); + try (FileAppender writer = + Parquet.write(Files.localOutput(parquetFile)) + .schema(schema) + .set(PARQUET_DICT_SIZE_BYTES, "4096") + .set(PARQUET_PAGE_ROW_LIMIT, "100") + .build()) { + writer.addAll(records); + } + // After this, parquetFile contains one column chunk of binary data in five pages, + // the first two RLE dictionary encoded, and the remaining three plain encoded. + + assertRecordsMatch(schema, 500, records, parquetFile, true, BATCH_SIZE); + } + /** * decimal_dict_and_plain_encoding.parquet contains one column chunk of decimal(38, 0) data in two * pages, one RLE dictionary encoded and one plain encoded, each with 200 rows. From ef4d29beaad2e6423611c829e588dd0829d5d1c0 Mon Sep 17 00:00:00 2001 From: Wing Yew Poon Date: Fri, 11 Oct 2024 15:58:56 -0700 Subject: [PATCH 5/5] Address nit. --- .../TestParquetDictionaryEncodedVectorizedReads.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java index 9743c50c7117..bc4e722bc869 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java @@ -141,9 +141,9 @@ public void testBinaryNotAllPagesDictionaryEncoded() throws IOException { .build()) { writer.addAll(records); } - // After this, parquetFile contains one column chunk of binary data in five pages, - // the first two RLE dictionary encoded, and the remaining three plain encoded. + // After the above, parquetFile contains one column chunk of binary data in five pages, + // the first two RLE dictionary encoded, and the remaining three plain encoded. assertRecordsMatch(schema, 500, records, parquetFile, true, BATCH_SIZE); }