From 88c740095b7a5224c98e7538a0ee3c7bca574ee2 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Sat, 3 Mar 2018 10:59:53 -0800 Subject: [PATCH] scorch optimizations for mem.PostingsIterator.Next() & docTermMap Due to the usage rules of iterators, mem.PostingsIterator.Next() can reuse its returned Postings instance. Also, there's a micro optimization in persistDocValues() for one fewer access to the docTermMap in the inner-loop. --- index/scorch/segment/mem/posting.go | 6 +++--- index/scorch/segment/zap/build.go | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go index d91a0056..25cbeb45 100644 --- a/index/scorch/segment/mem/posting.go +++ b/index/scorch/segment/mem/posting.go @@ -73,6 +73,7 @@ type PostingsIterator struct { offset int locoffset int actual roaring.IntIterable + reuse Posting } // Next returns the next posting on the postings list, or nil at the end @@ -92,17 +93,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) { i.offset++ allN = i.all.Next() } - rv := &Posting{ + i.reuse = Posting{ iterator: i, docNum: uint64(n), offset: i.offset, locoffset: i.locoffset, hasLoc: i.locations.Contains(n), } - i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset]) i.offset++ - return rv, nil + return &i.reuse, nil } // Posting is a single entry in a postings list diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index 77f18b05..b075496c 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -552,8 +552,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, nextPosting, err2 := postingsItr.Next() for err2 == nil && nextPosting != nil { docNum := nextPosting.Number() - docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...) - docTermMap[docNum] = append(docTermMap[docNum], termSeparator) + docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator) nextPosting, err2 = postingsItr.Next() } if err2 != nil { @@ -562,10 +561,10 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, next, err = dictItr.Next() } - if err != nil { return nil, err } + // sort wrt to docIDs var docNumbers docIDRange for k := range docTermMap {