0
0
Fork 0

scorch zap optimize writeRoaringWithLen()

Before this change, writeRoaringWithLen() would leverage a reused
bytes.Buffer (#A) and invoke the roaring.WriteTo() API.

But, it turns out the roaring.WriteTo() API has a suboptimal
implementation, in that underneath-the-hood it converts the roaring
bitmap to a byte buffer (using roaring.ToBytes()), and then calls
Write().  But, that Write() turns out to be an additional memcpy into
the provided bytes.Buffer (#A).

By directly invoking roaring.ToBytes(), this change to
writeRoaringWithLen() avoids the extra memory allocation and memcpy.
This commit is contained in:
Steve Yen 2018-03-06 14:59:20 -08:00
parent ae81806435
commit dde6c2e01b
3 changed files with 14 additions and 17 deletions

View File

@ -394,13 +394,12 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
rv = make([]uint64, 0, len(memSegment.PostingsLocs))
var reuseBuf bytes.Buffer
reuseBufVarint := make([]byte, binary.MaxVarintLen64)
for postingID := range memSegment.PostingsLocs {
// record where we start this posting loc
rv = append(rv, uint64(w.Count()))
// write out the length and bitmap
_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint)
_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, reuseBufVarint)
if err != nil {
return nil, err
}
@ -411,7 +410,6 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint
func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
rv = make([]uint64, 0, len(memSegment.Postings))
var reuseBuf bytes.Buffer
reuseBufVarint := make([]byte, binary.MaxVarintLen64)
for postingID := range memSegment.Postings {
// record where we start this posting list
@ -425,7 +423,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
}
// write out the length and bitmap
_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint)
_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, reuseBufVarint)
if err != nil {
return nil, err
}

View File

@ -160,7 +160,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
newSegDocCount uint64, chunkFactor uint32,
w *CountHashWriter) ([]uint64, uint64, error) {
var bufReuse bytes.Buffer
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
var postings *PostingsList
@ -247,7 +246,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
return err
}
postingLocOffset := uint64(w.Count())
_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
_, err = writeRoaringWithLen(newRoaringLocs, w, bufMaxVarintLen64)
if err != nil {
return err
}
@ -271,7 +270,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
if err != nil {
return err
}
_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
_, err = writeRoaringWithLen(newRoaring, w, bufMaxVarintLen64)
if err != nil {
return err
}

View File

@ -15,7 +15,6 @@
package zap
import (
"bytes"
"encoding/binary"
"io"
@ -25,28 +24,29 @@ import (
// writes out the length of the roaring bitmap in bytes as varint
// then writes out the roaring bitmap itself
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) {
reuseBuf.Reset()
// write out postings list to memory so we know the len
postingsListLen, err := r.WriteTo(reuseBuf)
reuseBufVarint []byte) (int, error) {
buf, err := r.ToBytes()
if err != nil {
return 0, err
}
var tw int
// write out the length of this postings list
n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen))
// write out the length
n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
nw, err := w.Write(reuseBufVarint[:n])
tw += nw
if err != nil {
return tw, err
}
// write out the postings list itself
nw, err = w.Write(reuseBuf.Bytes())
// write out the roaring bytes
nw, err = w.Write(buf)
tw += nw
if err != nil {
return tw, err
}
return tw, nil
}