From a90c7acca6cc8aec08362b8c193f89b980a8c63d Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Tue, 5 Mar 2019 16:23:48 -0800 Subject: [PATCH] Fix handling of null partition values (#100) * Fix StructLikeWrapper equals and hashCode null handling. * Spark: Fix reading null partition values. * Add test for null partition values. --- .../iceberg/util/StructLikeWrapper.java | 5 +- .../netflix/iceberg/spark/source/Reader.java | 7 +- .../spark/source/TestPartitionValues.java | 130 ++++++++++++++++++ 3 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 spark/src/test/java/com/netflix/iceberg/spark/source/TestPartitionValues.java diff --git a/core/src/main/java/com/netflix/iceberg/util/StructLikeWrapper.java b/core/src/main/java/com/netflix/iceberg/util/StructLikeWrapper.java index 7a76d58d6..983a8f168 100644 --- a/core/src/main/java/com/netflix/iceberg/util/StructLikeWrapper.java +++ b/core/src/main/java/com/netflix/iceberg/util/StructLikeWrapper.java @@ -20,6 +20,7 @@ package com.netflix.iceberg.util; import com.netflix.iceberg.StructLike; +import java.util.Objects; /** * Wrapper to adapt StructLike for use in maps and sets by implementing equals and hashCode. @@ -68,7 +69,7 @@ public boolean equals(Object other) { } for (int i = 0; i < len; i += 1) { - if (!struct.get(i, Object.class).equals(that.struct.get(i, Object.class))) { + if (!Objects.equals(struct.get(i, Object.class), that.struct.get(i, Object.class))) { return false; } } @@ -82,7 +83,7 @@ public int hashCode() { int len = struct.size(); result = 41 * result + len; for (int i = 0; i < len; i += 1) { - result = 41 * result + struct.get(i, Object.class).hashCode(); + result = 41 * result + Objects.hashCode(struct.get(i, Object.class)); } return result; } diff --git a/spark/src/main/java/com/netflix/iceberg/spark/source/Reader.java b/spark/src/main/java/com/netflix/iceberg/spark/source/Reader.java index ed6ed21af..85628deee 100644 --- a/spark/src/main/java/com/netflix/iceberg/spark/source/Reader.java +++ b/spark/src/main/java/com/netflix/iceberg/spark/source/Reader.java @@ -488,7 +488,12 @@ private static class PartitionRowConverter implements Function expected = Lists.newArrayList( + new SimpleRecord(1, "a"), + new SimpleRecord(2, "b"), + new SimpleRecord(3, "c"), + new SimpleRecord(4, null) + ); + + Dataset df = spark.createDataFrame(expected, SimpleRecord.class); + + try { + // TODO: incoming columns must be ordered according to the table's schema + df.select("id", "data").write() + .format("iceberg") + .mode("append") + .save(location.toString()); + + Dataset result = spark.read() + .format("iceberg") + .load(location.toString()); + + List actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList(); + + Assert.assertEquals("Number of rows should match", expected.size(), actual.size()); + Assert.assertEquals("Result rows should match", expected, actual); + + } finally { + TestTables.clearTables(); + } + } + +}