0
0
Fork 0

scorch zap optimize writeRoaringWithLen()

Before this change, writeRoaringWithLen() would leverage a reused
bytes.Buffer (#A) and invoke the roaring.WriteTo() API.

But, it turns out the roaring.WriteTo() API has a suboptimal
implementation, in that underneath-the-hood it converts the roaring
bitmap to a byte buffer (using roaring.ToBytes()), and then calls
Write().  But, that Write() turns out to be an additional memcpy into
the provided bytes.Buffer (#A).

By directly invoking roaring.ToBytes(), this change to
writeRoaringWithLen() avoids the extra memory allocation and memcpy.
This commit is contained in:
Steve Yen 2018-03-06 14:59:20 -08:00
parent ae81806435
commit dde6c2e01b
3 changed files with 14 additions and 17 deletions

View File

@ -394,13 +394,12 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) { func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
rv = make([]uint64, 0, len(memSegment.PostingsLocs)) rv = make([]uint64, 0, len(memSegment.PostingsLocs))
var reuseBuf bytes.Buffer
reuseBufVarint := make([]byte, binary.MaxVarintLen64) reuseBufVarint := make([]byte, binary.MaxVarintLen64)
for postingID := range memSegment.PostingsLocs { for postingID := range memSegment.PostingsLocs {
// record where we start this posting loc // record where we start this posting loc
rv = append(rv, uint64(w.Count())) rv = append(rv, uint64(w.Count()))
// write out the length and bitmap // write out the length and bitmap
_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint) _, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, reuseBufVarint)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -411,7 +410,6 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint
func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) { postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
rv = make([]uint64, 0, len(memSegment.Postings)) rv = make([]uint64, 0, len(memSegment.Postings))
var reuseBuf bytes.Buffer
reuseBufVarint := make([]byte, binary.MaxVarintLen64) reuseBufVarint := make([]byte, binary.MaxVarintLen64)
for postingID := range memSegment.Postings { for postingID := range memSegment.Postings {
// record where we start this posting list // record where we start this posting list
@ -425,7 +423,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
} }
// write out the length and bitmap // write out the length and bitmap
_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint) _, err = writeRoaringWithLen(memSegment.Postings[postingID], w, reuseBufVarint)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -160,7 +160,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
newSegDocCount uint64, chunkFactor uint32, newSegDocCount uint64, chunkFactor uint32,
w *CountHashWriter) ([]uint64, uint64, error) { w *CountHashWriter) ([]uint64, uint64, error) {
var bufReuse bytes.Buffer
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64) var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
var postings *PostingsList var postings *PostingsList
@ -247,7 +246,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
return err return err
} }
postingLocOffset := uint64(w.Count()) postingLocOffset := uint64(w.Count())
_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64) _, err = writeRoaringWithLen(newRoaringLocs, w, bufMaxVarintLen64)
if err != nil { if err != nil {
return err return err
} }
@ -271,7 +270,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
if err != nil { if err != nil {
return err return err
} }
_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64) _, err = writeRoaringWithLen(newRoaring, w, bufMaxVarintLen64)
if err != nil { if err != nil {
return err return err
} }

View File

@ -15,7 +15,6 @@
package zap package zap
import ( import (
"bytes"
"encoding/binary" "encoding/binary"
"io" "io"
@ -25,28 +24,29 @@ import (
// writes out the length of the roaring bitmap in bytes as varint // writes out the length of the roaring bitmap in bytes as varint
// then writes out the roaring bitmap itself // then writes out the roaring bitmap itself
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer, func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) { reuseBufVarint []byte) (int, error) {
reuseBuf.Reset() buf, err := r.ToBytes()
// write out postings list to memory so we know the len
postingsListLen, err := r.WriteTo(reuseBuf)
if err != nil { if err != nil {
return 0, err return 0, err
} }
var tw int var tw int
// write out the length of this postings list
n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen)) // write out the length
n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
nw, err := w.Write(reuseBufVarint[:n]) nw, err := w.Write(reuseBufVarint[:n])
tw += nw tw += nw
if err != nil { if err != nil {
return tw, err return tw, err
} }
// write out the postings list itself
nw, err = w.Write(reuseBuf.Bytes()) // write out the roaring bytes
nw, err = w.Write(buf)
tw += nw tw += nw
if err != nil { if err != nil {
return tw, err return tw, err
} }
return tw, nil return tw, nil
} }