From dde6c2e01b3b33828f33f28ad43af2147713f7c1 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Tue, 6 Mar 2018 14:59:20 -0800 Subject: [PATCH] scorch zap optimize writeRoaringWithLen() Before this change, writeRoaringWithLen() would leverage a reused bytes.Buffer (#A) and invoke the roaring.WriteTo() API. But, it turns out the roaring.WriteTo() API has a suboptimal implementation, in that underneath-the-hood it converts the roaring bitmap to a byte buffer (using roaring.ToBytes()), and then calls Write(). But, that Write() turns out to be an additional memcpy into the provided bytes.Buffer (#A). By directly invoking roaring.ToBytes(), this change to writeRoaringWithLen() avoids the extra memory allocation and memcpy. --- index/scorch/segment/zap/build.go | 6 ++---- index/scorch/segment/zap/merge.go | 5 ++--- index/scorch/segment/zap/write.go | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index 361e56e5..404ec869 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -394,13 +394,12 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) { rv = make([]uint64, 0, len(memSegment.PostingsLocs)) - var reuseBuf bytes.Buffer reuseBufVarint := make([]byte, binary.MaxVarintLen64) for postingID := range memSegment.PostingsLocs { // record where we start this posting loc rv = append(rv, uint64(w.Count())) // write out the length and bitmap - _, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint) + _, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, reuseBufVarint) if err != nil { return nil, err } @@ -411,7 +410,6 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) { rv = make([]uint64, 0, len(memSegment.Postings)) - var reuseBuf bytes.Buffer reuseBufVarint := make([]byte, binary.MaxVarintLen64) for postingID := range memSegment.Postings { // record where we start this posting list @@ -425,7 +423,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, } // write out the length and bitmap - _, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint) + _, err = writeRoaringWithLen(memSegment.Postings[postingID], w, reuseBufVarint) if err != nil { return nil, err } diff --git a/index/scorch/segment/zap/merge.go b/index/scorch/segment/zap/merge.go index 5066dfb9..3a0577cd 100644 --- a/index/scorch/segment/zap/merge.go +++ b/index/scorch/segment/zap/merge.go @@ -160,7 +160,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, newSegDocCount uint64, chunkFactor uint32, w *CountHashWriter) ([]uint64, uint64, error) { - var bufReuse bytes.Buffer var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64) var postings *PostingsList @@ -247,7 +246,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, return err } postingLocOffset := uint64(w.Count()) - _, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64) + _, err = writeRoaringWithLen(newRoaringLocs, w, bufMaxVarintLen64) if err != nil { return err } @@ -271,7 +270,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, if err != nil { return err } - _, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64) + _, err = writeRoaringWithLen(newRoaring, w, bufMaxVarintLen64) if err != nil { return err } diff --git a/index/scorch/segment/zap/write.go b/index/scorch/segment/zap/write.go index c5316a99..7f4f5a88 100644 --- a/index/scorch/segment/zap/write.go +++ b/index/scorch/segment/zap/write.go @@ -15,7 +15,6 @@ package zap import ( - "bytes" "encoding/binary" "io" @@ -25,28 +24,29 @@ import ( // writes out the length of the roaring bitmap in bytes as varint // then writes out the roaring bitmap itself func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer, - reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) { - reuseBuf.Reset() - - // write out postings list to memory so we know the len - postingsListLen, err := r.WriteTo(reuseBuf) + reuseBufVarint []byte) (int, error) { + buf, err := r.ToBytes() if err != nil { return 0, err } + var tw int - // write out the length of this postings list - n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen)) + + // write out the length + n := binary.PutUvarint(reuseBufVarint, uint64(len(buf))) nw, err := w.Write(reuseBufVarint[:n]) tw += nw if err != nil { return tw, err } - // write out the postings list itself - nw, err = w.Write(reuseBuf.Bytes()) + + // write out the roaring bytes + nw, err = w.Write(buf) tw += nw if err != nil { return tw, err } + return tw, nil }