Skip to content
This repository has been archived by the owner on Jul 7, 2020. It is now read-only.

Commit

Permalink
Add cardinality support to BloomFilter.
Browse files Browse the repository at this point in the history
  • Loading branch information
b4hand committed May 16, 2017
1 parent dd912cc commit 0e2adba
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ public boolean isPresent(byte[] key) {
return true;
}

public long cardinality() {
int m = buckets();
double fractionOfBits = ((double) filter_.cardinality()) / m;
return Math.round(-m * Math.log1p(-fractionOfBits) / hashCount);
}

/*
@param key -- value whose hash is used to fill
the filter_.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*
*/
package com.clearspring.analytics.stream.membership;

Expand Down Expand Up @@ -111,6 +111,27 @@ public void testSerialize() throws IOException {
FilterTest.testSerialize(bf);
}

@Test
public void testCardinalityOne() {
bf.add("a");
assertEquals(1, bf.cardinality());
}

@Test
public void testCardinalityTwo() {
bf.add("a");
bf.add("b");
assertEquals(2, bf.cardinality());
}

@Test
public void testCardinalityHundred() {
for (int i = 0; i < 100; ++i) {
bf.add(Integer.toString(i));
}
assertEquals(100, bf.cardinality());
}

@Test
public void testGetFalsePositiveProbability() {
// These probabilities are taken from the bloom filter probability table at
Expand Down Expand Up @@ -204,7 +225,7 @@ public void testHashCount() {

instance = new BloomFilter(1, 7);
assertEquals(5, instance.getHashCount());

/*
* Although technically 8*ln(2) = 5.545...
* we round down here for speed
Expand Down

0 comments on commit 0e2adba

Please sign in to comment.