Merge pull request #746 from steveyen/master
more scorch zap optimizations (array for docTermMap, etc)
This commit is contained in:
commit
a3b125508b
|
@ -368,7 +368,6 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
|||
}
|
||||
|
||||
// put pos
|
||||
|
||||
err = locEncoder.Add(docNum, locpos[locOffset])
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
|
@ -386,10 +385,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
|||
return nil, nil, err
|
||||
}
|
||||
|
||||
// put array positions
|
||||
num := len(locarraypos[locOffset])
|
||||
|
||||
// put the number of array positions to follow
|
||||
num := len(locarraypos[locOffset])
|
||||
err = locEncoder.Add(docNum, uint64(num))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
|
|
|
@ -41,6 +41,7 @@ func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder {
|
|||
chunkSize: chunkSize,
|
||||
maxDocNum: maxDocNum,
|
||||
chunkLens: make([]uint64, total),
|
||||
final: make([]byte, 0, 64),
|
||||
}
|
||||
rv.encoder = govarint.NewU64Base128Encoder(&rv.chunkBuf)
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ import (
|
|||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/Smerity/govarint"
|
||||
|
@ -149,7 +148,11 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
fieldDvLocs := make([]uint64, len(fieldsInv))
|
||||
fieldDvLocsOffset := uint64(fieldNotUninverted)
|
||||
|
||||
var docNumbers docIDRange
|
||||
// docTermMap is keyed by docNum, where the array impl provides
|
||||
// better memory usage behavior than a sparse-friendlier hashmap
|
||||
// for when docs have much structural similarity (i.e., every doc
|
||||
// has a given field)
|
||||
var docTermMap [][]byte
|
||||
|
||||
var vellumBuf bytes.Buffer
|
||||
|
||||
|
@ -193,7 +196,14 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
|
||||
docTermMap := make(map[uint64][]byte, newSegDocCount)
|
||||
if uint64(cap(docTermMap)) < newSegDocCount {
|
||||
docTermMap = make([][]byte, newSegDocCount)
|
||||
} else {
|
||||
docTermMap = docTermMap[0:newSegDocCount]
|
||||
for docNum := range docTermMap { // reset the docTermMap
|
||||
docTermMap[docNum] = docTermMap[docNum][:0]
|
||||
}
|
||||
}
|
||||
|
||||
for err == nil {
|
||||
term, _ := mergeItr.Current()
|
||||
|
@ -237,12 +247,12 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
if cap(bufLoc) < 5+len(loc.ArrayPositions()) {
|
||||
bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions()))
|
||||
}
|
||||
args := bufLoc[0:0]
|
||||
args = append(args, uint64(fieldsMap[loc.Field()]))
|
||||
args = append(args, loc.Pos())
|
||||
args = append(args, loc.Start())
|
||||
args = append(args, loc.End())
|
||||
args = append(args, uint64(len(loc.ArrayPositions())))
|
||||
args := bufLoc[0:5]
|
||||
args[0] = uint64(fieldsMap[loc.Field()])
|
||||
args[1] = loc.Pos()
|
||||
args[2] = loc.Start()
|
||||
args[3] = loc.End()
|
||||
args[4] = uint64(len(loc.ArrayPositions()))
|
||||
args = append(args, loc.ArrayPositions()...)
|
||||
err = locEncoder.Add(hitNewDocNum, args...)
|
||||
if err != nil {
|
||||
|
@ -343,21 +353,14 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
|
||||
rv[fieldID] = dictOffset
|
||||
|
||||
// update the doc nums
|
||||
if cap(docNumbers) < len(docTermMap) {
|
||||
docNumbers = make(docIDRange, 0, len(docTermMap))
|
||||
}
|
||||
docNumbers = docNumbers[:0]
|
||||
for k := range docTermMap {
|
||||
docNumbers = append(docNumbers, k)
|
||||
}
|
||||
sort.Sort(docNumbers)
|
||||
|
||||
// update the field doc values
|
||||
fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1)
|
||||
for _, docNum := range docNumbers {
|
||||
err = fdvEncoder.Add(docNum, docTermMap[docNum])
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
for docNum, docTerms := range docTermMap {
|
||||
if len(docTerms) > 0 {
|
||||
err = fdvEncoder.Add(uint64(docNum), docTerms)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
err = fdvEncoder.Close()
|
||||
|
|
Loading…
Reference in New Issue