forked from ZJONSSON/parquetjs
-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reference Tests and Breaking Change: Optional nullable fields are now…
… null instead of undefined (#114) Problem ======= We wanted to add tests for all the tests in https://github.com/apache/parquet-testing ### Discovered Bugs - We treated nulls as undefined, but others don't - We incorrectly processed dictionary_page_offset >= 0 instead of only > 0 Solution ======== - Added new test that automatically tests all files: `test/reference-test/read-all.test.ts` - Fixed found bugs with @shannonwells Steps to Verify: ---------------- 1. Run the tests 1. Comment out the bug fixes and see reference test files fail
- Loading branch information
Showing
60 changed files
with
92 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# References Tests | ||
|
||
This is a set of tests that use the reference files from https://github.com/apache/parquet-testing/. | ||
|
||
## Updating the Reference Files | ||
|
||
This assumes that parquetjs is in the same folder as the clone of parquet-testing. | ||
|
||
1. `git clone [email protected]:apache/parquet-testing.git` | ||
1. `cd ../parquetjs` | ||
1. `cp ../parquet-testing/data/*.parquet ./test/reference-test/files/` | ||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+2.82 KB
test/reference-test/files/data_index_bloom_encoding_with_length.parquet
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+3.3 KB
test/reference-test/files/datapage_v1-snappy-compressed-checksum.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+814 Bytes
test/reference-test/files/rle-dict-uncompressed-corrupt-checksum.parquet
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import { expect } from "chai"; | ||
import path from "node:path"; | ||
import fs from "node:fs"; | ||
|
||
import parquet from '../../parquet'; | ||
|
||
// Used for testing a single file. Example: | ||
// const onlyTest = 'single_nan.parquet'; | ||
const onlyTest = null; | ||
|
||
// Test files currently unsupported / needing separate test | ||
const unsupported = [ | ||
'byte_stream_split.zstd.parquet', // ZSTD unsupported | ||
'hadoop_lz4_compressed.parquet', // LZ4 unsupported | ||
'hadoop_lz4_compressed_larger.parquet', // LZ4 unsupported | ||
'lz4_raw_compressed.parquet', // LZ4_RAW unsupported | ||
'lz4_raw_compressed_larger.parquet', // LZ4_RAW unsupported | ||
'nested_structs.rust.parquet', // ZSTD unsupported | ||
'non_hadoop_lz4_compressed.parquet', // ZSTD unsupported | ||
'rle_boolean_encoding.parquet', // BUG?: https://github.com/LibertyDSNP/parquetjs/issues/113 | ||
'datapage_v2.snappy.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_binary_packed.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_byte_array.parquet', // DELTA_BYTE_ARRAY unsupported | ||
'delta_encoding_optional_column.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_encoding_required_column.parquet', // DELTA_BINARY_PACKED unsupported | ||
'delta_length_byte_array.parquet', // ZSTD unsupported, DELTA_BINARY_PACKED unsupported | ||
'float16_nonzeros_and_nans.parquet', // missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY) | ||
'float16_zeros_and_nans.parquet', // missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY) | ||
'large_string_map.brotli.parquet', // BUG? | ||
]; | ||
|
||
describe("Read Test for all files", function () { | ||
|
||
const listOfFiles = fs.readdirSync(path.join(__dirname, 'files')) | ||
.filter(x => x.endsWith(".parquet") && !unsupported.includes(x)); | ||
|
||
for (const filename of listOfFiles) { | ||
if (onlyTest && onlyTest !== filename) continue; | ||
it(`Reading ${filename}`, async function () { | ||
const reader = await parquet.ParquetReader.openFile(path.join(__dirname, 'files', filename)); | ||
const schema = reader.getSchema(); | ||
expect(schema.fieldList).to.have.length.greaterThan(0); | ||
const cursor = reader.getCursor(); | ||
const record = await cursor.next() as any; | ||
// Expect the same keys as top-level fields | ||
const expectedRecordKeys = schema.fieldList.filter(x => x.path.length === 1).map(x => x.name); | ||
expect(Object.keys(record)).to.deep.equal(expectedRecordKeys); | ||
}) | ||
} | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters