Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
shannonwells committed Nov 21, 2023
1 parent 445f27e commit f76ae06
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 31 deletions.
8 changes: 0 additions & 8 deletions lib/bloomFilterIO/bloomFilterReader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,6 @@ import parquet_thrift from "../../gen-nodejs/parquet_types";
import { ParquetEnvelopeReader } from "../reader"
import { ColumnChunkData } from "../declare";
import sbbf from "../bloom/sbbf";
import SplitBlockBloomFilter from "../bloom/sbbf";

// TODO: maybe move this somewhere else?
export type BloomFilterColumnData = {
sbbf: SplitBlockBloomFilter,
columnName: string,
rowGroupIndex: number,
}

const filterColumnChunksWithBloomFilters = (
columnChunkDataCollection: Array<ColumnChunkData>
Expand Down
21 changes: 0 additions & 21 deletions lib/writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,6 @@ export class ParquetEnvelopeWriter {
writeBloomFilters() {
this.rowGroups.forEach(group => {
group.columns.forEach(column => {
// new
if (!column.meta_data) { return }
if (!column.meta_data.path_in_schema.length) { return }

Expand All @@ -270,16 +269,6 @@ export class ParquetEnvelopeWriter {

bloomFilterWriter.setFilterOffset(column, this.offset);

// old
// const columnName = column.meta_data?.path_in_schema[0];
// if (!columnName || columnName in this.bloomFilters === false) return;
//
// const serializedBloomFilterData =
// bloomFilterWriter.getSerializedBloomFilterData(this.bloomFilters[columnName]);
//
// bloomFilterWriter.setFilterOffset(column, this.offset);

// end changes
this.writeSection(serializedBloomFilterData);
});
});
Expand Down Expand Up @@ -430,7 +419,6 @@ async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.Re

let page;

// new
const columnPath = field.path.join(',');
const values = rowBuffer.columnData![columnPath];

Expand All @@ -439,15 +427,6 @@ async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.Re
values.values!.forEach(v => splitBlockBloomFilter.insert(v));
}

// old
// const values = rowBuffer.columnData![field.path.join(',')];
//
// if (opts.bloomFilters && (field.name in opts.bloomFilters)) {
// const splitBlockBloomFilter = opts.bloomFilters[field.name];
// values.values!.forEach(v => splitBlockBloomFilter.insert(v));
// }
// end changes

let statistics: parquet_thrift.Statistics = {};
if (field.statistics !== false) {
statistics = {};
Expand Down
9 changes: 7 additions & 2 deletions test/bloomFilterIntegration.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
import {assert} from "chai";
import parquet from "../parquet";
import SplitBlockBloomFilter from "../lib/bloom/sbbf";
import {BloomFilterColumnData} from "../lib/bloomFilterIO/bloomFilterReader";

import parquet_thrift from "../gen-nodejs/parquet_types";
import {decodeThrift} from "../lib/util";
import SplitBlockBloomFilter from "../lib/bloom/sbbf";
const TEST_VTIME = new Date();

const TEST_FILE= '/tmp/fruits-bloomfilter.parquet'

type BloomFilterColumnData = {
sbbf: SplitBlockBloomFilter,
columnName: string,
rowGroupIndex: number,
}

const sampleColumnHeaders = async (filename: string) => {
let reader = await parquet.ParquetReader.openFile(filename);

Expand Down

0 comments on commit f76ae06

Please sign in to comment.