Skip to content

Commit

Permalink
Merge pull request #68 from robert-milan/add-compact
Browse files Browse the repository at this point in the history
add Shrink method
  • Loading branch information
lemire authored Jan 14, 2019
2 parents 4edb874 + bdac85d commit 3fb80c4
Show file tree
Hide file tree
Showing 2 changed files with 303 additions and 0 deletions.
64 changes: 64 additions & 0 deletions bitset.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,70 @@ func (b *BitSet) Flip(i uint) *BitSet {
return b
}

// Shrink shrinks BitSet to desired length in bits. It clears all bits > length
// and reduces the size and length of the set.
//
// A new slice is allocated to store the new bits, so you may see an increase in
// memory usage until the GC runs. Normally this should not be a problem, but if you
// have an extremely large BitSet its important to understand that the old BitSet will
// remain in memory until the GC frees it.
func (b *BitSet) Shrink(length uint) *BitSet {
idx := wordsNeeded(length + 1)
if idx > len(b.set) {
return b
}
shrunk := make([]uint64, idx)
copy(shrunk, b.set[:idx])
b.set = shrunk
b.length = length + 1
b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1)) - 1))
return b
}

// InsertAt takes an index which indicates where a bit should be
// inserted. Then it shifts all the bits in the set to the left by 1, starting
// from the given index position, and sets the index position to 0.
//
// Depending on the size of your BitSet, and where you are inserting the new entry,
// this method could be extremely slow and in some cases might cause the entire BitSet
// to be recopied.
func (b *BitSet) InsertAt(idx uint) *BitSet {
insertAtElement := (idx >> log2WordSize)

// if length of set is a multiple of wordSize we need to allocate more space first
if b.isLenExactMultiple() {
b.set = append(b.set, uint64(0))
}

var i uint
for i = uint(len(b.set) - 1); i > insertAtElement; i-- {
// all elements above the position where we want to insert can simply by shifted
b.set[i] <<= 1

// we take the most significant bit of the previous element and set it as
// the least significant bit of the current element
b.set[i] |= (b.set[i-1] & 0x8000000000000000) >> 63
}

// generate a mask to extract the data that we need to shift left
// within the element where we insert a bit
dataMask := ^(uint64(1)<<uint64(idx&(wordSize-1)) - 1)

// extract that data that we'll shift
data := b.set[i] & dataMask

// set the positions of the data mask to 0 in the element where we insert
b.set[i] &= ^dataMask

// shift data mask to the left and insert its data to the slice element
b.set[i] |= data << 1

// add 1 to length of BitSet
b.length++

return b
}

// String creates a string representation of the Bitmap
func (b *BitSet) String() string {
// follows code from https://github.com/RoaringBitmap/roaring
Expand Down
239 changes: 239 additions & 0 deletions bitset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,245 @@ func TestAll(t *testing.T) {
}
}

func TestShrink(t *testing.T) {
b := New(0)

b.Set(0)
b.Set(1)
b.Set(2)
b.Set(3)
b.Set(64)
b.Shrink(2)
if !b.Test(0) {
t.Error("0 should be set")
return
}
if !b.Test(1) {
t.Error("1 should be set")
return
}
if !b.Test(2) {
t.Error("2 should be set")
return
}
if b.Test(3) {
t.Error("3 should not be set")
return
}
if b.Test(64) {
t.Error("64 should not be set")
return
}

b.Set(24)
b.Shrink(100)
if !b.Test(24) {
t.Error("24 should be set")
return
}

b.Set(127)
b.Set(128)
b.Set(129)
b.Shrink(128)
if !b.Test(127) {
t.Error("127 should be set")
return
}
if !b.Test(128) {
t.Error("128 should be set")
return
}
if b.Test(129) {
t.Error("129 should not be set")
return
}

b.Set(129)
b.Shrink(129)
if !b.Test(129) {
t.Error("129 should be set")
return
}

b.Set(1000)
b.Set(2000)
b.Set(3000)
b.Shrink(3000)
if len(b.set) != 3000/64+1 {
t.Error("Wrong length of BitSet.set")
return
}
if !b.Test(3000) {
t.Error("3000 should be set")
return
}

b.Shrink(2000)
if len(b.set) != 2000/64+1 {
t.Error("Wrong length of BitSet.set")
return
}
if b.Test(3000) {
t.Error("3000 should not be set")
return
}
if !b.Test(2000) {
t.Error("2000 should be set")
return
}
if !b.Test(1000) {
t.Error("1000 should be set")
return
}
if !b.Test(24) {
t.Error("24 should be set")
return
}
}

func TestInsertAtWithSet(t *testing.T) {
b := New(0)
b.Set(0)
b.Set(1)
b.Set(63)
b.Set(64)
b.Set(65)

b.InsertAt(3)
if !b.Test(0) {
t.Error("0 should be set")
return
}
if !b.Test(1) {
t.Error("1 should be set")
return
}
if b.Test(3) {
t.Error("3 should not be set")
return
}
if !b.Test(64) {
t.Error("64 should be set")
return
}
if !b.Test(65) {
t.Error("65 should be set")
return
}
if !b.Test(66) {
t.Error("66 should be set")
return
}

}

func TestInsertAt(t *testing.T) {
type testCase struct {
input []string
insertIdx uint
expected []string
}

testCases := []testCase{
{
input: []string{
"1111111111111111111111111111111111111111111111111111111111111111",
},
insertIdx: uint(62),
expected: []string{
"1011111111111111111111111111111111111111111111111111111111111111",
"0000000000000000000000000000000000000000000000000000000000000001",
},
},
{
input: []string{
"1111111111111111111111111111111111111111111111111111111111111111",
},
insertIdx: uint(63),
expected: []string{
"0111111111111111111111111111111111111111111111111111111111111111",
"0000000000000000000000000000000000000000000000000000000000000001",
},
},
{
input: []string{
"1111111111111111111111111111111111111111111111111111111111111111",
},
insertIdx: uint(0),
expected: []string{
"1111111111111111111111111111111111111111111111111111111111111110",
"0000000000000000000000000000000000000000000000000000000000000001",
},
},
{
input: []string{
"1111111111111111111111111111111111111111111111111111111111111111",
"1111111111111111111111111111111111111111111111111111111111111111",
"1111111111111111111111111111111111111111111111111111111111111111",
},
insertIdx: uint(70),
expected: []string{
"1111111111111111111111111111111111111111111111111111111111111111",
"1111111111111111111111111111111111111111111111111111111110111111",
"1111111111111111111111111111111111111111111111111111111111111111",
"0000000000000000000000000000000000000000000000000000000000000001",
},
},
{
input: []string{
"1111111111111111111111111111111111111111111111111111111111111111",
"1111111111111111111111111111111111111111111111111111111111111111",
"1111111111111111111111111111111111111111111111111111111111110000",
},
insertIdx: uint(70),
expected: []string{
"1111111111111111111111111111111111111111111111111111111111111111",
"1111111111111111111111111111111111111111111111111111111110111111",
"1111111111111111111111111111111111111111111111111111111111100001",
"0000000000000000000000000000000000000000000000000000000000000001",
},
},
{
input: []string{
"1111111111111111111111111111111111111111111111111111111111110000",
},
insertIdx: uint(10),
expected: []string{
"1111111111111111111111111111111111111111111111111111101111110000",
"0000000000000000000000000000000000000000000000000000000000000001",
},
},
}

for _, tc := range testCases {
var input []uint64
for _, inputElement := range tc.input {
parsed, _ := strconv.ParseUint(inputElement, 2, 64)
input = append(input, parsed)
}

var expected []uint64
for _, expectedElement := range tc.expected {
parsed, _ := strconv.ParseUint(expectedElement, 2, 64)
expected = append(expected, parsed)
}

b := From(input)
b.InsertAt(tc.insertIdx)
if len(b.set) != len(expected) {
t.Error("Length of sets should be equal")
return
}
for i := range b.set {
if b.set[i] != expected[i] {
t.Error("Unexpected results found in set")
return
}
}
}
}

func TestNone(t *testing.T) {
v := New(0)
if !v.None() {
Expand Down

0 comments on commit 3fb80c4

Please sign in to comment.