0
0
Fork 0

Merge pull request #758 from steveyen/scorch-optimizations-20180207

scorch optimizations via struct reuse
This commit is contained in:
Steve Yen 2018-02-08 06:16:27 -08:00 committed by GitHub
commit 3d729c73c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 34 additions and 21 deletions

View File

@ -76,6 +76,8 @@ type DictionaryIterator struct {
prefix string
end string
offset int
dictEntry index.DictEntry // reused across Next()'s
}
// Next returns the next entry in the dictionary
@ -95,8 +97,7 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
d.offset++
postingID := d.d.segment.Dicts[d.d.fieldID][next]
return &index.DictEntry{
Term: next,
Count: d.d.segment.Postings[postingID-1].GetCardinality(),
}, nil
d.dictEntry.Term = next
d.dictEntry.Count = d.d.segment.Postings[postingID-1].GetCardinality()
return &d.dictEntry, nil
}

View File

@ -34,15 +34,18 @@ type Dictionary struct {
// PostingsList returns the postings list for the specified term
func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
return d.postingsList([]byte(term), except)
return d.postingsList([]byte(term), except, nil)
}
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap) (*PostingsList, error) {
rv := &PostingsList{
sb: d.sb,
term: term,
except: except,
func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
if rv == nil {
rv = &PostingsList{}
} else {
*rv = PostingsList{} // clear the struct
}
rv.sb = d.sb
rv.term = term
rv.except = except
if d.fst != nil {
postingsOffset, exists, err := d.fst.Get(term)

View File

@ -46,6 +46,11 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
_ = os.Remove(path)
}
segmentBases := make([]*SegmentBase, len(segments))
for segmenti, segment := range segments {
segmentBases[segmenti] = &segment.SegmentBase
}
// buffer the output
br := bufio.NewWriter(f)
@ -53,7 +58,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
cr := NewCountHashWriter(br)
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, err :=
mergeToWriter(segments, drops, chunkFactor, cr)
MergeToWriter(segmentBases, drops, chunkFactor, cr)
if err != nil {
cleanup()
return nil, err
@ -87,7 +92,7 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string,
return newDocNums, nil
}
func mergeToWriter(segments []*Segment, drops []*roaring.Bitmap,
func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap,
chunkFactor uint32, cr *CountHashWriter) (
newDocNums [][]uint64,
numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64,
@ -135,10 +140,10 @@ func mapFields(fields []string) map[string]uint16 {
// computeNewDocCount determines how many documents will be in the newly
// merged segment when obsoleted docs are dropped
func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 {
var newDocCount uint64
for segI, segment := range segments {
newDocCount += segment.NumDocs()
newDocCount += segment.numDocs
if drops[segI] != nil {
newDocCount -= drops[segI].GetCardinality()
}
@ -146,7 +151,7 @@ func computeNewDocCount(segments []*Segment, drops []*roaring.Bitmap) uint64 {
return newDocCount
}
func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
func persistMergedRest(segments []*SegmentBase, drops []*roaring.Bitmap,
fieldsInv []string, fieldsMap map[string]uint16, newDocNums [][]uint64,
newSegDocCount uint64, chunkFactor uint32,
w *CountHashWriter) ([]uint64, uint64, error) {
@ -155,6 +160,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64)
var bufLoc []uint64
var postings *PostingsList
rv := make([]uint64, len(fieldsInv))
fieldDvLocs := make([]uint64, len(fieldsInv))
fieldDvLocsOffset := uint64(fieldNotUninverted)
@ -231,7 +238,8 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
if dict == nil {
continue
}
postings, err2 := dict.postingsList(term, drops[dictI])
var err2 error
postings, err2 = dict.postingsList(term, drops[dictI], postings)
if err2 != nil {
return nil, 0, err2
}
@ -405,7 +413,7 @@ func persistMergedRest(segments []*Segment, drops []*roaring.Bitmap,
const docDropped = math.MaxUint64
func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
fieldsMap map[string]uint16, fieldsInv []string, newSegDocCount uint64,
w *CountHashWriter) (uint64, [][]uint64, error) {
var rv [][]uint64 // The remapped or newDocNums for each segment.
@ -520,7 +528,7 @@ func mergeStoredAndRemap(segments []*Segment, drops []*roaring.Bitmap,
}
// mergeFields builds a unified list of fields used across all the input segments
func mergeFields(segments []*Segment) []string {
func mergeFields(segments []*SegmentBase) []string {
fieldsMap := map[string]struct{}{}
for _, segment := range segments {
fields := segment.Fields()

View File

@ -310,8 +310,8 @@ func compareSegments(a, b *Segment) string {
continue
}
aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil)
bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil)
aplist, aerr := adict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
bplist, berr := bdict.(*Dictionary).postingsList([]byte(next.Term), nil, nil)
if aerr != berr {
rv = append(rv, fmt.Sprintf("field %s, term: %s, postingsList() errors different: %v %v",
fieldName, next.Term, aerr, berr))

View File

@ -343,8 +343,9 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
return nil, err
}
var postings *PostingsList
for _, id := range ids {
postings, err := idDict.postingsList([]byte(id), nil)
postings, err = idDict.postingsList([]byte(id), nil, postings)
if err != nil {
return nil, err
}