From f76ae06c17aa91372311db5c007573423aade942 Mon Sep 17 00:00:00 2001 From: shannonwells Date: Tue, 21 Nov 2023 11:51:13 -0800 Subject: [PATCH] cleanup --- lib/bloomFilterIO/bloomFilterReader.ts | 8 -------- lib/writer.ts | 21 --------------------- test/bloomFilterIntegration.ts | 9 +++++++-- 3 files changed, 7 insertions(+), 31 deletions(-) diff --git a/lib/bloomFilterIO/bloomFilterReader.ts b/lib/bloomFilterIO/bloomFilterReader.ts index 69719424..14def67b 100644 --- a/lib/bloomFilterIO/bloomFilterReader.ts +++ b/lib/bloomFilterIO/bloomFilterReader.ts @@ -3,14 +3,6 @@ import parquet_thrift from "../../gen-nodejs/parquet_types"; import { ParquetEnvelopeReader } from "../reader" import { ColumnChunkData } from "../declare"; import sbbf from "../bloom/sbbf"; -import SplitBlockBloomFilter from "../bloom/sbbf"; - -// TODO: maybe move this somewhere else? -export type BloomFilterColumnData = { - sbbf: SplitBlockBloomFilter, - columnName: string, - rowGroupIndex: number, -} const filterColumnChunksWithBloomFilters = ( columnChunkDataCollection: Array diff --git a/lib/writer.ts b/lib/writer.ts index 00930c1e..234c4d9d 100644 --- a/lib/writer.ts +++ b/lib/writer.ts @@ -259,7 +259,6 @@ export class ParquetEnvelopeWriter { writeBloomFilters() { this.rowGroups.forEach(group => { group.columns.forEach(column => { - // new if (!column.meta_data) { return } if (!column.meta_data.path_in_schema.length) { return } @@ -270,16 +269,6 @@ export class ParquetEnvelopeWriter { bloomFilterWriter.setFilterOffset(column, this.offset); - // old - // const columnName = column.meta_data?.path_in_schema[0]; - // if (!columnName || columnName in this.bloomFilters === false) return; - // - // const serializedBloomFilterData = - // bloomFilterWriter.getSerializedBloomFilterData(this.bloomFilters[columnName]); - // - // bloomFilterWriter.setFilterOffset(column, this.offset); - - // end changes this.writeSection(serializedBloomFilterData); }); }); @@ -430,7 +419,6 @@ async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.Re let page; - // new const columnPath = field.path.join(','); const values = rowBuffer.columnData![columnPath]; @@ -439,15 +427,6 @@ async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.Re values.values!.forEach(v => splitBlockBloomFilter.insert(v)); } - // old - // const values = rowBuffer.columnData![field.path.join(',')]; - // - // if (opts.bloomFilters && (field.name in opts.bloomFilters)) { - // const splitBlockBloomFilter = opts.bloomFilters[field.name]; - // values.values!.forEach(v => splitBlockBloomFilter.insert(v)); - // } - // end changes - let statistics: parquet_thrift.Statistics = {}; if (field.statistics !== false) { statistics = {}; diff --git a/test/bloomFilterIntegration.ts b/test/bloomFilterIntegration.ts index 160c1cc7..d66db4ed 100644 --- a/test/bloomFilterIntegration.ts +++ b/test/bloomFilterIntegration.ts @@ -1,14 +1,19 @@ import {assert} from "chai"; import parquet from "../parquet"; -import SplitBlockBloomFilter from "../lib/bloom/sbbf"; -import {BloomFilterColumnData} from "../lib/bloomFilterIO/bloomFilterReader"; import parquet_thrift from "../gen-nodejs/parquet_types"; import {decodeThrift} from "../lib/util"; +import SplitBlockBloomFilter from "../lib/bloom/sbbf"; const TEST_VTIME = new Date(); const TEST_FILE= '/tmp/fruits-bloomfilter.parquet' +type BloomFilterColumnData = { + sbbf: SplitBlockBloomFilter, + columnName: string, + rowGroupIndex: number, +} + const sampleColumnHeaders = async (filename: string) => { let reader = await parquet.ParquetReader.openFile(filename);