Skip to content
This repository has been archived by the owner on Jul 7, 2020. It is now read-only.

Add cardinality support to BloomFilter. #133

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ public boolean isPresent(byte[] key) {
return true;
}

public long cardinality() {
int m = buckets();
double fractionOfBits = ((double) filter_.cardinality()) / m;
return Math.round(-m * Math.log1p(-fractionOfBits) / hashCount);
}

/*
@param key -- value whose hash is used to fill
the filter_.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*
*/
package com.clearspring.analytics.stream.membership;

Expand Down Expand Up @@ -111,6 +111,27 @@ public void testSerialize() throws IOException {
FilterTest.testSerialize(bf);
}

@Test
public void testCardinalityOne() {
bf.add("a");
assertEquals(1, bf.cardinality());
}

@Test
public void testCardinalityTwo() {
bf.add("a");
bf.add("b");
assertEquals(2, bf.cardinality());
}

@Test
public void testCardinalityHundred() {
for (int i = 0; i < 100; ++i) {
bf.add(Integer.toString(i));
}
assertEquals(100, bf.cardinality());
}

@Test
public void testGetFalsePositiveProbability() {
// These probabilities are taken from the bloom filter probability table at
Expand Down Expand Up @@ -204,7 +225,7 @@ public void testHashCount() {

instance = new BloomFilter(1, 7);
assertEquals(5, instance.getHashCount());

/*
* Although technically 8*ln(2) = 5.545...
* we round down here for speed
Expand Down