Merge pull request #758 from steveyen/scorch-optimizations-20180207
scorch optimizations via struct reuse
This commit is contained in:
commit
3d729c73c1
|
@ -76,6 +76,8 @@ type DictionaryIterator struct {
|
|||
prefix string
|
||||
end string
|
||||
offset int
|
||||
|
||||
dictEntry index.DictEntry // reused across Next()'s
|
||||
}
|
||||
|
||||
// Next returns the next entry in the dictionary
|
||||
|
@ -95,8 +97,7 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
|
|||
|
||||
d.offset++
|
||||
postingID := d.d.segment.Dicts[d.d.fieldID][next]
|
||||
return &index.DictEntry{
|
||||
Term: next,
|
||||
Count: d.d.segment.Postings[postingID-1].GetCardinality(),
|
||||
}, nil
|
||||
d.dictEntry.Term = next
|
||||
d.dictEntry.Count = d.d.segment.Postings[postingID-1].GetCardinality()
|
||||
return &d.dictEntry, nil
|
||||
}
|
||||
|
|
|
@ -34,15 +34,18 @@ type Dictionary struct {
|
|||
|
||||
// PostingsList returns the postings list for the specified term
|
||||
func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
|
||||
return d.postingsList([]byte(term), except)
|
||||
return d.postingsList([]byte(term), except, nil)
|
||||
}
|
||||
|
||||
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap) (*PostingsList, error) {
|
||||
rv := &PostingsList{
|
||||
sb: d.sb,
|
||||
term: term,
|
||||
except: except,
|
||||
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
|
||||
if rv == nil {
|
||||
rv = &PostingsList{}
|
||||
} else {
|
||||
*rv = PostingsList{} // clear the struct
|
||||
}
|
||||
rv.sb = d.sb
|
||||
rv.term = term
|
||||
rv.except = except
|
||||
|
||||
if d.fst != nil {
|
||||
postingsOffset, exists, err := d.fst.Get(term)
|
||||
|
|
|
@ -46,6 +46,11 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
|||
_ = os.Remove(path)
|
||||
}
|
||||
|
||||
segmentBases := make([]*SegmentBase, len(segments))
|
||||
for segmenti, segment := range segments {
|
||||
segmentBases[segmenti] = &segment.SegmentBase
|
||||
}
|
||||
|
||||
// buffer the output
|
||||
br := bufio.NewWriter(f)
|
||||
|
||||
|
@ -53,7 +58,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
|||
cr := NewCountHashWriter(br)
|
||||
|
||||
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, err :=
|
||||
mergeToWriter(segments, drops, chunkFactor, cr)
|
||||
MergeToWriter(segmentBases, drops, chunkFactor, cr)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, err
|
||||
|
@ -87,7 +92,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
|
|||
return newDocNums, nil
|
||||
}
|
||||
|
||||
func mergeToWriter(segments []*Segment, drops []*roaring.Bitmap,
|
||||
func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
|
||||
chunkFactor uint32, cr *CountHashWriter) (
|
||||
newDocNums [][]uint64,
|
||||
numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
|
||||
|
@ -135,10 +140,10 @@ func mapFields(fields []string) map[string]uint16 {
|
|||
|
||||
// computeNewDocCount determines how many documents will be in the newly
|
||||
// merged segment when obsoleted docs are dropped
|
||||
func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
|
||||
func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 {
|
||||
var newDocCount uint64
|
||||
for segI, segment := range segments {
|
||||
newDocCount += segment.NumDocs()
|
||||
newDocCount += segment.numDocs
|
||||
if drops[segI] != nil {
|
||||
newDocCount -= drops[segI].GetCardinality()
|
||||
}
|
||||
|
@ -146,7 +151,7 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
|
|||
return newDocCount
|
||||
}
|
||||
|
||||
func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
||||
func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
|
||||
fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64,
|
||||
newSegDocCount uint64, chunkFactor uint32,
|
||||
w *CountHashWriter) ([]uint64, uint64, error) {
|
||||
|
@ -155,6 +160,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
|
||||
var bufLoc []uint64
|
||||
|
||||
var postings *PostingsList
|
||||
|
||||
rv := make([]uint64, len(fieldsInv))
|
||||
fieldDvLocs := make([]uint64, len(fieldsInv))
|
||||
fieldDvLocsOffset := uint64(fieldNotUninverted)
|
||||
|
@ -231,7 +238,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
if dict == nil {
|
||||
continue
|
||||
}
|
||||
postings, err2 := dict.postingsList(term, drops[dictI])
|
||||
var err2 error
|
||||
postings, err2 = dict.postingsList(term, drops[dictI], postings)
|
||||
if err2 != nil {
|
||||
return nil, 0, err2
|
||||
}
|
||||
|
@ -405,7 +413,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
|
|||
|
||||
const docDropped = math.MaxUint64
|
||||
|
||||
func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
|
||||
func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
|
||||
fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
|
||||
w *CountHashWriter) (uint64, [][]uint64, error) {
|
||||
var rv [][]uint64 // The remapped or newDocNums for each segment.
|
||||
|
@ -520,7 +528,7 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
|
|||
}
|
||||
|
||||
// mergeFields builds a unified list of fields used across all the input segments
|
||||
func mergeFields(segments []*Segment) []string {
|
||||
func mergeFields(segments []*SegmentBase) []string {
|
||||
fieldsMap := map[string]struct{}{}
|
||||
for _, segment := range segments {
|
||||
fields := segment.Fields()
|
||||
|
|
|
@ -310,8 +310,8 @@ func compareSegments(a, b *Segment) string {
|
|||
continue
|
||||
}
|
||||
|
||||
aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil)
|
||||
bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil)
|
||||
aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
|
||||
bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
|
||||
if aerr != berr {
|
||||
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() errors different: %v %v",
|
||||
fieldName, next.Term, aerr, berr))
|
||||
|
|
|
@ -343,8 +343,9 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
var postings *PostingsList
|
||||
for _, id := range ids {
|
||||
postings, err := idDict.postingsList([]byte(id), nil)
|
||||
postings, err = idDict.postingsList([]byte(id), nil, postings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue