From d44c5ad5682450baff48ead3345d69f9ec5399cc Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Sat, 3 Mar 2018 19:32:39 -0800 Subject: [PATCH 1/6] scorch stats MaxBatchIntroTime bug fix and more timing stats Added timing stats for in-mem zap merging and file-based zap merging. --- index/scorch/merge.go | 19 +++++++++++++++++++ index/scorch/scorch.go | 2 +- index/scorch/stats.go | 8 ++++++-- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/index/scorch/merge.go b/index/scorch/merge.go index 005d4f41..ee3ec46c 100644 --- a/index/scorch/merge.go +++ b/index/scorch/merge.go @@ -179,9 +179,19 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, filename := zapFileName(newSegmentID) s.markIneligibleForRemoval(filename) path := s.path + string(os.PathSeparator) + filename + + fileMergeZapStartTime := time.Now() + atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1) newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024) atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1) + + fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime)) + atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime) + if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime { + atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime) + } + if err != nil { s.unmarkIneligibleForRemoval(filename) atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) @@ -258,11 +268,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, cr := zap.NewCountHashWriter(&br) + memMergeZapStartTime := time.Now() + atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1) newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, fieldsInv, fieldsMap, err := zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr) atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1) + + memMergeZapTime := uint64(time.Since(memMergeZapStartTime)) + atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime) + if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime { + atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime) + } + if err != nil { atomic.AddUint64(&s.stats.TotMemMergeErr, 1) return 0, nil, 0, err diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 87372a32..a40f374a 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -365,7 +365,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, introTime := uint64(time.Since(introStartTime)) atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime) if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime { - atomic.AddUint64(&s.stats.MaxBatchIntroTime, introTime) + atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime) } return err diff --git a/index/scorch/stats.go b/index/scorch/stats.go index cd416a7c..3c978af7 100644 --- a/index/scorch/stats.go +++ b/index/scorch/stats.go @@ -88,8 +88,10 @@ type Stats struct { TotFileMergeSegmentsEmpty uint64 TotFileMergeSegments uint64 - TotFileMergeZapBeg uint64 - TotFileMergeZapEnd uint64 + TotFileMergeZapBeg uint64 + TotFileMergeZapEnd uint64 + TotFileMergeZapTime uint64 + MaxFileMergeZapTime uint64 TotFileMergeIntroductions uint64 TotFileMergeIntroductionsDone uint64 @@ -99,6 +101,8 @@ type Stats struct { TotMemMergeDone uint64 TotMemMergeZapBeg uint64 TotMemMergeZapEnd uint64 + TotMemMergeZapTime uint64 + MaxMemMergeZapTime uint64 TotMemMergeSegments uint64 } From 85761c6a57987ece0b193042ddf953f3f360bad1 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Sat, 3 Mar 2018 19:39:21 -0800 Subject: [PATCH 2/6] go fmt --- index/scorch/segment/mem/build.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go index 643ae36e..57971aae 100644 --- a/index/scorch/segment/mem/build.go +++ b/index/scorch/segment/mem/build.go @@ -253,7 +253,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) { // now that its been rolled up into docMap, walk that for fieldID, tokenFrequencies := range docMap { dict := s.Dicts[fieldID] - norm := float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))) + norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID]))) for term, tokenFreq := range tokenFrequencies { pid := dict[term] - 1 bs := s.Postings[pid] From 8c0881eab2caef301835eb9b0660e0b727d06687 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Sun, 4 Mar 2018 12:03:02 -0800 Subject: [PATCH 3/6] scorch zap build reuses mem postingsList/Iterator structs --- index/scorch/segment/mem/dict.go | 20 ++++++++++++++------ index/scorch/segment/mem/posting.go | 11 +++++++++-- index/scorch/segment/zap/build.go | 8 ++++++-- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/index/scorch/segment/mem/dict.go b/index/scorch/segment/mem/dict.go index cf92ef71..b564ed1f 100644 --- a/index/scorch/segment/mem/dict.go +++ b/index/scorch/segment/mem/dict.go @@ -33,12 +33,20 @@ type Dictionary struct { // PostingsList returns the postings list for the specified term func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) { - return &PostingsList{ - dictionary: d, - term: term, - postingsID: d.segment.Dicts[d.fieldID][term], - except: except, - }, nil + return d.InitPostingsList(term, except, nil) +} + +func (d *Dictionary) InitPostingsList(term string, except *roaring.Bitmap, + prealloc *PostingsList) (*PostingsList, error) { + rv := prealloc + if rv == nil { + rv = &PostingsList{} + } + rv.dictionary = d + rv.term = term + rv.postingsID = d.segment.Dicts[d.fieldID][term] + rv.except = except + return rv, nil } // Iterator returns an iterator for this dictionary diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go index 25cbeb45..2554333a 100644 --- a/index/scorch/segment/mem/posting.go +++ b/index/scorch/segment/mem/posting.go @@ -46,9 +46,16 @@ func (p *PostingsList) Count() uint64 { // Iterator returns an iterator for this postings list func (p *PostingsList) Iterator() segment.PostingsIterator { - rv := &PostingsIterator{ - postings: p, + return p.InitIterator(nil) +} +func (p *PostingsList) InitIterator(prealloc *PostingsIterator) *PostingsIterator { + rv := prealloc + if rv == nil { + rv = &PostingsIterator{postings: p} + } else { + *rv = PostingsIterator{postings: p} } + if p.postingsID > 0 { allbits := p.dictionary.segment.Postings[p.postingsID-1] rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1] diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index b075496c..7fbc995f 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -532,6 +532,9 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv)) fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) + var postings *mem.PostingsList + var postingsItr *mem.PostingsIterator + for fieldID := range memSegment.DocValueFields { field := memSegment.FieldsInv[fieldID] docTermMap := make(map[uint64][]byte, 0) @@ -543,12 +546,13 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, dictItr := dict.Iterator() next, err := dictItr.Next() for err == nil && next != nil { - postings, err1 := dict.PostingsList(next.Term, nil) + var err1 error + postings, err1 = dict.(*mem.Dictionary).InitPostingsList(next.Term, nil, postings) if err1 != nil { return nil, err } - postingsItr := postings.Iterator() + postingsItr = postings.InitIterator(postingsItr) nextPosting, err2 := postingsItr.Next() for err2 == nil && nextPosting != nil { docNum := nextPosting.Number() From 856778ad7bb4d4c8433a504de1302360ac4a5dbc Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Sun, 4 Mar 2018 12:06:45 -0800 Subject: [PATCH 4/6] scorch zap build prealloc docNumbers capacity --- index/scorch/segment/zap/build.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index 7fbc995f..4edd277d 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -570,7 +570,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, } // sort wrt to docIDs - var docNumbers docIDRange + docNumbers := make(docIDRange, 0, len(docTermMap)) for k := range docTermMap { docNumbers = append(docNumbers, k) } From a338386a038594f37d5b7901c4d3a73e53424787 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Sun, 4 Mar 2018 12:56:33 -0800 Subject: [PATCH 5/6] scorch build optimize freq/loc slice capacity --- index/scorch/segment/zap/build.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index 4edd277d..eec9998b 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -308,7 +308,7 @@ func persistStoredFieldValues(fieldID int, } func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) { - var freqOffsets, locOfffsets []uint64 + freqOffsets := make([]uint64, 0, len(memSegment.Postings)) tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) for postingID := range memSegment.Postings { if postingID != 0 { @@ -351,6 +351,7 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac } // now do it again for the locations + locOffsets := make([]uint64, 0, len(memSegment.Postings)) locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) for postingID := range memSegment.Postings { if postingID != 0 { @@ -414,14 +415,15 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac } // record where this postings loc info starts - locOfffsets = append(locOfffsets, uint64(w.Count())) + locOffsets = append(locOffsets, uint64(w.Count())) locEncoder.Close() _, err := locEncoder.Write(w) if err != nil { return nil, nil, err } } - return freqOffsets, locOfffsets, nil + + return freqOffsets, locOffsets, nil } func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) { From 8f8fd511b7d2a6f221a5cc51641017b4be9c2a40 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Sun, 4 Mar 2018 13:01:22 -0800 Subject: [PATCH 6/6] scorch zap access freqs[offset] outside loop --- index/scorch/segment/zap/build.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index eec9998b..237cc5f3 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -368,7 +368,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac var locOffset int for postingsListItr.HasNext() { docNum := uint64(postingsListItr.Next()) - for i := 0; i < int(freqs[offset]); i++ { + n := int(freqs[offset]) + for i := 0; i < n; i++ { if len(locfields) > 0 { // put field err := locEncoder.Add(docNum, uint64(locfields[locOffset]))