0
0
Fork 0

Merge pull request #799 from steveyen/scorch-optimizations

more scorch optimizations
This commit is contained in:
Steve Yen 2018-03-05 15:59:19 -08:00 committed by GitHub
commit 16174c589d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 64 additions and 19 deletions

View File

@ -179,9 +179,19 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
filename := zapFileName(newSegmentID) filename := zapFileName(newSegmentID)
s.markIneligibleForRemoval(filename) s.markIneligibleForRemoval(filename)
path := s.path + string(os.PathSeparator) + filename path := s.path + string(os.PathSeparator) + filename
fileMergeZapStartTime := time.Now()
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1) atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024) newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024)
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1) atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
}
if err != nil { if err != nil {
s.unmarkIneligibleForRemoval(filename) s.unmarkIneligibleForRemoval(filename)
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
@ -258,11 +268,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
cr := zap.NewCountHashWriter(&br) cr := zap.NewCountHashWriter(&br)
memMergeZapStartTime := time.Now()
atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1) atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset,
docValueOffset, dictLocs, fieldsInv, fieldsMap, err := docValueOffset, dictLocs, fieldsInv, fieldsMap, err :=
zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr) zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr)
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1) atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
}
if err != nil { if err != nil {
atomic.AddUint64(&s.stats.TotMemMergeErr, 1) atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return 0, nil, 0, err return 0, nil, 0, err

View File

@ -365,7 +365,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
introTime := uint64(time.Since(introStartTime)) introTime := uint64(time.Since(introStartTime))
atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime) atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime { if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
atomic.AddUint64(&s.stats.MaxBatchIntroTime, introTime) atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
} }
return err return err

View File

@ -253,7 +253,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
// now that its been rolled up into docMap, walk that // now that its been rolled up into docMap, walk that
for fieldID, tokenFrequencies := range docMap { for fieldID, tokenFrequencies := range docMap {
dict := s.Dicts[fieldID] dict := s.Dicts[fieldID]
norm := float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))) norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
for term, tokenFreq := range tokenFrequencies { for term, tokenFreq := range tokenFrequencies {
pid := dict[term] - 1 pid := dict[term] - 1
bs := s.Postings[pid] bs := s.Postings[pid]

View File

@ -33,12 +33,20 @@ type Dictionary struct {
// PostingsList returns the postings list for the specified term // PostingsList returns the postings list for the specified term
func (d *Dictionary) PostingsList(term string, func (d *Dictionary) PostingsList(term string,
except *roaring.Bitmap) (segment.PostingsList, error) { except *roaring.Bitmap) (segment.PostingsList, error) {
return &PostingsList{ return d.InitPostingsList(term, except, nil)
dictionary: d, }
term: term,
postingsID: d.segment.Dicts[d.fieldID][term], func (d *Dictionary) InitPostingsList(term string, except *roaring.Bitmap,
except: except, prealloc *PostingsList) (*PostingsList, error) {
}, nil rv := prealloc
if rv == nil {
rv = &PostingsList{}
}
rv.dictionary = d
rv.term = term
rv.postingsID = d.segment.Dicts[d.fieldID][term]
rv.except = except
return rv, nil
} }
// Iterator returns an iterator for this dictionary // Iterator returns an iterator for this dictionary

View File

@ -46,9 +46,16 @@ func (p *PostingsList) Count() uint64 {
// Iterator returns an iterator for this postings list // Iterator returns an iterator for this postings list
func (p *PostingsList) Iterator() segment.PostingsIterator { func (p *PostingsList) Iterator() segment.PostingsIterator {
rv := &PostingsIterator{ return p.InitIterator(nil)
postings: p, }
func (p *PostingsList) InitIterator(prealloc *PostingsIterator) *PostingsIterator {
rv := prealloc
if rv == nil {
rv = &PostingsIterator{postings: p}
} else {
*rv = PostingsIterator{postings: p}
} }
if p.postingsID > 0 { if p.postingsID > 0 {
allbits := p.dictionary.segment.Postings[p.postingsID-1] allbits := p.dictionary.segment.Postings[p.postingsID-1]
rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1] rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1]

View File

@ -308,7 +308,7 @@ func persistStoredFieldValues(fieldID int,
} }
func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) { func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) {
var freqOffsets, locOfffsets []uint64 freqOffsets := make([]uint64, 0, len(memSegment.Postings))
tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
for postingID := range memSegment.Postings { for postingID := range memSegment.Postings {
if postingID != 0 { if postingID != 0 {
@ -351,6 +351,7 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
} }
// now do it again for the locations // now do it again for the locations
locOffsets := make([]uint64, 0, len(memSegment.Postings))
locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
for postingID := range memSegment.Postings { for postingID := range memSegment.Postings {
if postingID != 0 { if postingID != 0 {
@ -367,7 +368,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
var locOffset int var locOffset int
for postingsListItr.HasNext() { for postingsListItr.HasNext() {
docNum := uint64(postingsListItr.Next()) docNum := uint64(postingsListItr.Next())
for i := 0; i < int(freqs[offset]); i++ { n := int(freqs[offset])
for i := 0; i < n; i++ {
if len(locfields) > 0 { if len(locfields) > 0 {
// put field // put field
err := locEncoder.Add(docNum, uint64(locfields[locOffset])) err := locEncoder.Add(docNum, uint64(locfields[locOffset]))
@ -414,14 +416,15 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
} }
// record where this postings loc info starts // record where this postings loc info starts
locOfffsets = append(locOfffsets, uint64(w.Count())) locOffsets = append(locOffsets, uint64(w.Count()))
locEncoder.Close() locEncoder.Close()
_, err := locEncoder.Write(w) _, err := locEncoder.Write(w)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
} }
return freqOffsets, locOfffsets, nil
return freqOffsets, locOffsets, nil
} }
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) { func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
@ -532,6 +535,9 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv)) fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv))
fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1))
var postings *mem.PostingsList
var postingsItr *mem.PostingsIterator
for fieldID := range memSegment.DocValueFields { for fieldID := range memSegment.DocValueFields {
field := memSegment.FieldsInv[fieldID] field := memSegment.FieldsInv[fieldID]
docTermMap := make(map[uint64][]byte, 0) docTermMap := make(map[uint64][]byte, 0)
@ -543,12 +549,13 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
dictItr := dict.Iterator() dictItr := dict.Iterator()
next, err := dictItr.Next() next, err := dictItr.Next()
for err == nil && next != nil { for err == nil && next != nil {
postings, err1 := dict.PostingsList(next.Term, nil) var err1 error
postings, err1 = dict.(*mem.Dictionary).InitPostingsList(next.Term, nil, postings)
if err1 != nil { if err1 != nil {
return nil, err return nil, err
} }
postingsItr := postings.Iterator() postingsItr = postings.InitIterator(postingsItr)
nextPosting, err2 := postingsItr.Next() nextPosting, err2 := postingsItr.Next()
for err2 == nil && nextPosting != nil { for err2 == nil && nextPosting != nil {
docNum := nextPosting.Number() docNum := nextPosting.Number()
@ -566,7 +573,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
} }
// sort wrt to docIDs // sort wrt to docIDs
var docNumbers docIDRange docNumbers := make(docIDRange, 0, len(docTermMap))
for k := range docTermMap { for k := range docTermMap {
docNumbers = append(docNumbers, k) docNumbers = append(docNumbers, k)
} }

View File

@ -88,8 +88,10 @@ type Stats struct {
TotFileMergeSegmentsEmpty uint64 TotFileMergeSegmentsEmpty uint64
TotFileMergeSegments uint64 TotFileMergeSegments uint64
TotFileMergeZapBeg uint64 TotFileMergeZapBeg uint64
TotFileMergeZapEnd uint64 TotFileMergeZapEnd uint64
TotFileMergeZapTime uint64
MaxFileMergeZapTime uint64
TotFileMergeIntroductions uint64 TotFileMergeIntroductions uint64
TotFileMergeIntroductionsDone uint64 TotFileMergeIntroductionsDone uint64
@ -99,6 +101,8 @@ type Stats struct {
TotMemMergeDone uint64 TotMemMergeDone uint64
TotMemMergeZapBeg uint64 TotMemMergeZapBeg uint64
TotMemMergeZapEnd uint64 TotMemMergeZapEnd uint64
TotMemMergeZapTime uint64
MaxMemMergeZapTime uint64
TotMemMergeSegments uint64 TotMemMergeSegments uint64
} }