scorch zap optimize writeRoaringWithLen()
Before this change, writeRoaringWithLen() would leverage a reused bytes.Buffer (#A) and invoke the roaring.WriteTo() API. But, it turns out the roaring.WriteTo() API has a suboptimal implementation, in that underneath-the-hood it converts the roaring bitmap to a byte buffer (using roaring.ToBytes()), and then calls Write(). But, that Write() turns out to be an additional memcpy into the provided bytes.Buffer (#A). By directly invoking roaring.ToBytes(), this change to writeRoaringWithLen() avoids the extra memory allocation and memcpy.
This commit is contained in:
parent
ae81806435
commit
dde6c2e01b
|
@ -394,13 +394,12 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
|
|
||||||
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
|
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
|
||||||
rv = make([]uint64, 0, len(memSegment.PostingsLocs))
|
rv = make([]uint64, 0, len(memSegment.PostingsLocs))
|
||||||
var reuseBuf bytes.Buffer
|
|
||||||
reuseBufVarint := make([]byte, binary.MaxVarintLen64)
|
reuseBufVarint := make([]byte, binary.MaxVarintLen64)
|
||||||
for postingID := range memSegment.PostingsLocs {
|
for postingID := range memSegment.PostingsLocs {
|
||||||
// record where we start this posting loc
|
// record where we start this posting loc
|
||||||
rv = append(rv, uint64(w.Count()))
|
rv = append(rv, uint64(w.Count()))
|
||||||
// write out the length and bitmap
|
// write out the length and bitmap
|
||||||
_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint)
|
_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, reuseBufVarint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -411,7 +410,6 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint
|
||||||
func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
|
func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
|
||||||
postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
|
postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
|
||||||
rv = make([]uint64, 0, len(memSegment.Postings))
|
rv = make([]uint64, 0, len(memSegment.Postings))
|
||||||
var reuseBuf bytes.Buffer
|
|
||||||
reuseBufVarint := make([]byte, binary.MaxVarintLen64)
|
reuseBufVarint := make([]byte, binary.MaxVarintLen64)
|
||||||
for postingID := range memSegment.Postings {
|
for postingID := range memSegment.Postings {
|
||||||
// record where we start this posting list
|
// record where we start this posting list
|
||||||
|
@ -425,7 +423,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
|
||||||
}
|
}
|
||||||
|
|
||||||
// write out the length and bitmap
|
// write out the length and bitmap
|
||||||
_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint)
|
_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, reuseBufVarint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,7 +160,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
newSegDocCount uint64, chunkFactor uint32,
|
newSegDocCount uint64, chunkFactor uint32,
|
||||||
w *CountHashWriter) ([]uint64, uint64, error) {
|
w *CountHashWriter) ([]uint64, uint64, error) {
|
||||||
|
|
||||||
var bufReuse bytes.Buffer
|
|
||||||
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
||||||
|
|
||||||
var postings *PostingsList
|
var postings *PostingsList
|
||||||
|
@ -247,7 +246,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
postingLocOffset := uint64(w.Count())
|
postingLocOffset := uint64(w.Count())
|
||||||
_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
|
_, err = writeRoaringWithLen(newRoaringLocs, w, bufMaxVarintLen64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -271,7 +270,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
|
_, err = writeRoaringWithLen(newRoaring, w, bufMaxVarintLen64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
package zap
|
package zap
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
|
@ -25,28 +24,29 @@ import (
|
||||||
// writes out the length of the roaring bitmap in bytes as varint
|
// writes out the length of the roaring bitmap in bytes as varint
|
||||||
// then writes out the roaring bitmap itself
|
// then writes out the roaring bitmap itself
|
||||||
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
|
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
|
||||||
reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) {
|
reuseBufVarint []byte) (int, error) {
|
||||||
reuseBuf.Reset()
|
buf, err := r.ToBytes()
|
||||||
|
|
||||||
// write out postings list to memory so we know the len
|
|
||||||
postingsListLen, err := r.WriteTo(reuseBuf)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var tw int
|
var tw int
|
||||||
// write out the length of this postings list
|
|
||||||
n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen))
|
// write out the length
|
||||||
|
n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
|
||||||
nw, err := w.Write(reuseBufVarint[:n])
|
nw, err := w.Write(reuseBufVarint[:n])
|
||||||
tw += nw
|
tw += nw
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return tw, err
|
return tw, err
|
||||||
}
|
}
|
||||||
// write out the postings list itself
|
|
||||||
nw, err = w.Write(reuseBuf.Bytes())
|
// write out the roaring bytes
|
||||||
|
nw, err = w.Write(buf)
|
||||||
tw += nw
|
tw += nw
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return tw, err
|
return tw, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return tw, nil
|
return tw, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue