diff --git a/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java b/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java index a374b088a..f63764f0d 100644 --- a/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java +++ b/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java @@ -83,6 +83,12 @@ public boolean isPresent(byte[] key) { return true; } + public long cardinality() { + int m = buckets(); + double fractionOfBits = ((double) filter_.cardinality()) / m; + return Math.round(-m * Math.log1p(-fractionOfBits) / hashCount); + } + /* @param key -- value whose hash is used to fill the filter_. diff --git a/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java b/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java index 178d53c6a..cda1794d4 100644 --- a/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java +++ b/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java @@ -15,7 +15,7 @@ * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. -* +* */ package com.clearspring.analytics.stream.membership; @@ -111,6 +111,27 @@ public void testSerialize() throws IOException { FilterTest.testSerialize(bf); } + @Test + public void testCardinalityOne() { + bf.add("a"); + assertEquals(1, bf.cardinality()); + } + + @Test + public void testCardinalityTwo() { + bf.add("a"); + bf.add("b"); + assertEquals(2, bf.cardinality()); + } + + @Test + public void testCardinalityHundred() { + for (int i = 0; i < 100; ++i) { + bf.add(Integer.toString(i)); + } + assertEquals(100, bf.cardinality()); + } + @Test public void testGetFalsePositiveProbability() { // These probabilities are taken from the bloom filter probability table at @@ -204,7 +225,7 @@ public void testHashCount() { instance = new BloomFilter(1, 7); assertEquals(5, instance.getHashCount()); - + /* * Although technically 8*ln(2) = 5.545... * we round down here for speed