Merge pull request #805 from steveyen/optimize-scorch-mem-processField
Optimize scorch processField() inner loop and writeRoaringWithLen()
This commit is contained in:
commit
15242af465
|
@ -111,13 +111,15 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
|
|||
}
|
||||
|
||||
processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
|
||||
dict := s.Dicts[fieldID]
|
||||
dictKeys := s.DictKeys[fieldID]
|
||||
for term, tf := range tfs {
|
||||
pidPlus1, exists := s.Dicts[fieldID][term]
|
||||
pidPlus1, exists := dict[term]
|
||||
if !exists {
|
||||
numPostingsLists++
|
||||
pidPlus1 = uint64(numPostingsLists)
|
||||
s.Dicts[fieldID][term] = pidPlus1
|
||||
s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
|
||||
dict[term] = pidPlus1
|
||||
dictKeys = append(dictKeys, term)
|
||||
numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
|
||||
numLocsPerPostingsList = append(numLocsPerPostingsList, 0)
|
||||
}
|
||||
|
@ -127,6 +129,7 @@ func (s *Segment) initializeDict(results []*index.AnalysisResult) {
|
|||
totLocs += len(tf.Locations)
|
||||
}
|
||||
numTokenFrequencies += len(tfs)
|
||||
s.DictKeys[fieldID] = dictKeys
|
||||
}
|
||||
|
||||
for _, result := range results {
|
||||
|
|
|
@ -394,13 +394,12 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
|||
|
||||
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
|
||||
rv = make([]uint64, 0, len(memSegment.PostingsLocs))
|
||||
var reuseBuf bytes.Buffer
|
||||
reuseBufVarint := make([]byte, binary.MaxVarintLen64)
|
||||
for postingID := range memSegment.PostingsLocs {
|
||||
// record where we start this posting loc
|
||||
rv = append(rv, uint64(w.Count()))
|
||||
// write out the length and bitmap
|
||||
_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint)
|
||||
_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, reuseBufVarint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -411,7 +410,6 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint
|
|||
func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
|
||||
postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
|
||||
rv = make([]uint64, 0, len(memSegment.Postings))
|
||||
var reuseBuf bytes.Buffer
|
||||
reuseBufVarint := make([]byte, binary.MaxVarintLen64)
|
||||
for postingID := range memSegment.Postings {
|
||||
// record where we start this posting list
|
||||
|
@ -425,7 +423,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
|
|||
}
|
||||
|
||||
// write out the length and bitmap
|
||||
_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint)
|
||||
_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, reuseBufVarint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -160,7 +160,6 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
|||
newSegDocCount uint64, chunkFactor uint32,
|
||||
w *CountHashWriter) ([]uint64, uint64, error) {
|
||||
|
||||
var bufReuse bytes.Buffer
|
||||
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
||||
|
||||
var postings *PostingsList
|
||||
|
@ -247,7 +246,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
|||
return err
|
||||
}
|
||||
postingLocOffset := uint64(w.Count())
|
||||
_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64)
|
||||
_, err = writeRoaringWithLen(newRoaringLocs, w, bufMaxVarintLen64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -271,7 +270,7 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap,
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64)
|
||||
_, err = writeRoaringWithLen(newRoaring, w, bufMaxVarintLen64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
|
||||
|
@ -25,28 +24,29 @@ import (
|
|||
// writes out the length of the roaring bitmap in bytes as varint
|
||||
// then writes out the roaring bitmap itself
|
||||
func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer,
|
||||
reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) {
|
||||
reuseBuf.Reset()
|
||||
|
||||
// write out postings list to memory so we know the len
|
||||
postingsListLen, err := r.WriteTo(reuseBuf)
|
||||
reuseBufVarint []byte) (int, error) {
|
||||
buf, err := r.ToBytes()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var tw int
|
||||
// write out the length of this postings list
|
||||
n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen))
|
||||
|
||||
// write out the length
|
||||
n := binary.PutUvarint(reuseBufVarint, uint64(len(buf)))
|
||||
nw, err := w.Write(reuseBufVarint[:n])
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
// write out the postings list itself
|
||||
nw, err = w.Write(reuseBuf.Bytes())
|
||||
|
||||
// write out the roaring bytes
|
||||
nw, err = w.Write(buf)
|
||||
tw += nw
|
||||
if err != nil {
|
||||
return tw, err
|
||||
}
|
||||
|
||||
return tw, nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue