From 5df2e1c2d298c5632df9e6eab7a4814cc636b98d Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 12 Oct 2023 11:51:27 -0400 Subject: [PATCH 1/3] Simplifies readFrom and WriteTo, and improves their performance. --- bitset.go | 48 ++++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/bitset.go b/bitset.go index 4eb1733..3d2e05f 100644 --- a/bitset.go +++ b/bitset.go @@ -920,22 +920,24 @@ func (b *BitSet) BinaryStorageSize() int { // f, err := os.Create("myfile") // w := bufio.NewWriter(f) func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { - buf := make([]byte, wordBytes) + length := uint64(b.length) // Write length - binaryOrder.PutUint64(buf, length) - n, err := stream.Write(buf) + err := binary.Write(stream, binaryOrder, &length) if err != nil { - return int64(n), err + // Upon failure, we do not guarantee that we + // return the number of bytes written. + return int64(0), err } nWords := b.wordCount() - for i := range b.set[:nWords] { - binaryOrder.PutUint64(buf, b.set[i]) - if nn, err := stream.Write(buf); err != nil { - return int64(i*int(wordBytes) + nn + n), err - } + + err = binary.Write(stream, binaryOrder, b.set[:nWords]) + if err != nil { + // Upon failure, we do not guarantee that we + // return the number of bytes written. + return int64(wordBytes), err } return int64(b.BinaryStorageSize()), nil @@ -958,11 +960,8 @@ func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { // f, err := os.Open("myfile") // r := bufio.NewReader(f) func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { - buf := make([]byte, wordBytes) - - // Read length first - _, err := io.ReadFull(stream, buf[:]) - length := binaryOrder.Uint64(buf) + var length uint64 + err := binary.Read(stream, binaryOrder, &length) if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF @@ -982,19 +981,16 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { } b.length = newlength - - for i := 0; i < nWords; i++ { - if _, err := io.ReadFull(stream, buf); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } - // We do not want to leave the BitSet partially filled as - // it is error prone. - b.set = b.set[:0] - b.length = 0 - return 0, err + err = binary.Read(stream, binaryOrder, b.set) + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF } - b.set[i] = binaryOrder.Uint64(buf) + // We do not want to leave the BitSet partially filled as + // it is error prone. + b.set = b.set[:0] + b.length = 0 + return 0, err } return int64(b.BinaryStorageSize()), nil From 6c9ae5ac032cb03b2ccd4ee8afc332617cb2a5db Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 12 Oct 2023 15:39:40 -0400 Subject: [PATCH 2/3] tweak --- bitset.go | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/bitset.go b/bitset.go index 3d2e05f..0e40dce 100644 --- a/bitset.go +++ b/bitset.go @@ -908,6 +908,37 @@ func (b *BitSet) BinaryStorageSize() int { return int(wordBytes + wordBytes*uint(b.wordCount())) } +func readUint64Array(reader io.Reader, data []uint64) error { + length := len(data) + for i := 0; i < length; i += 512 { + end := i + 512 + if end > length { + end = length + } + chunk := data[i:end] + err := binary.Read(reader, binaryOrder, chunk) + if err != nil { + return err + } + } + return nil +} + +func writeUint64Array(writer io.Writer, data []uint64) error { + for i := 0; i < len(data); i += 512 { + end := i + 512 + if end > len(data) { + end = len(data) + } + chunk := data[i:end] + err := binary.Write(writer, binaryOrder, chunk) + if err != nil { + return err + } + } + return nil +} + // WriteTo writes a BitSet to a stream. The format is: // 1. uint64 length // 2. []uint64 set @@ -920,9 +951,7 @@ func (b *BitSet) BinaryStorageSize() int { // f, err := os.Create("myfile") // w := bufio.NewWriter(f) func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { - length := uint64(b.length) - // Write length err := binary.Write(stream, binaryOrder, &length) if err != nil { @@ -930,16 +959,12 @@ func (b *BitSet) WriteTo(stream io.Writer) (int64, error) { // return the number of bytes written. return int64(0), err } - - nWords := b.wordCount() - - err = binary.Write(stream, binaryOrder, b.set[:nWords]) + err = writeUint64Array(stream, b.set[:b.wordCount()]) if err != nil { // Upon failure, we do not guarantee that we // return the number of bytes written. return int64(wordBytes), err } - return int64(b.BinaryStorageSize()), nil } @@ -981,7 +1006,8 @@ func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) { } b.length = newlength - err = binary.Read(stream, binaryOrder, b.set) + + err = readUint64Array(stream, b.set) if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF From 9a5bfb7035ab177db71d53434545b753377a2cc8 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 12 Oct 2023 15:57:12 -0400 Subject: [PATCH 3/3] tweak --- bitset.go | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/bitset.go b/bitset.go index 0e40dce..97712e9 100644 --- a/bitset.go +++ b/bitset.go @@ -910,28 +910,39 @@ func (b *BitSet) BinaryStorageSize() int { func readUint64Array(reader io.Reader, data []uint64) error { length := len(data) - for i := 0; i < length; i += 512 { - end := i + 512 + bufferSize := 128 + buffer := make([]byte, bufferSize*int(wordBytes)) + for i := 0; i < length; i += bufferSize { + end := i + bufferSize if end > length { end = length + buffer = buffer[:wordBytes*uint(end-i)] } chunk := data[i:end] - err := binary.Read(reader, binaryOrder, chunk) - if err != nil { + if _, err := io.ReadFull(reader, buffer); err != nil { return err } + for i := range chunk { + chunk[i] = uint64(binaryOrder.Uint64(buffer[8*i:])) + } } return nil } func writeUint64Array(writer io.Writer, data []uint64) error { - for i := 0; i < len(data); i += 512 { - end := i + 512 + bufferSize := 128 + buffer := make([]byte, bufferSize*int(wordBytes)) + for i := 0; i < len(data); i += bufferSize { + end := i + bufferSize if end > len(data) { end = len(data) + buffer = buffer[:wordBytes*uint(end-i)] } chunk := data[i:end] - err := binary.Write(writer, binaryOrder, chunk) + for i, x := range chunk { + binaryOrder.PutUint64(buffer[8*i:], x) + } + _, err := writer.Write(buffer) if err != nil { return err }