diff --git a/index/scorch/segment/mem/build.go b/index/scorch/segment/mem/build.go index 57d60dc8..643ae36e 100644 --- a/index/scorch/segment/mem/build.go +++ b/index/scorch/segment/mem/build.go @@ -222,12 +222,6 @@ func (s *Segment) processDocument(result *index.AnalysisResult) { } } - storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) { - s.Stored[docNum][field] = append(s.Stored[docNum][field], val) - s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ) - s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos) - } - // walk each composite field for _, field := range result.Document.CompositeFields { fieldID := uint16(s.getOrDefineField(field.Name())) @@ -235,6 +229,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) { processField(fieldID, field.Name(), l, tf) } + docStored := s.Stored[docNum] + docStoredTypes := s.StoredTypes[docNum] + docStoredPos := s.StoredPos[docNum] + // walk each field for i, field := range result.Document.Fields { fieldID := uint16(s.getOrDefineField(field.Name())) @@ -242,7 +240,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) { tf := result.Analyzed[i] processField(fieldID, field.Name(), l, tf) if field.Options().IsStored() { - storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions()) + docStored[fieldID] = append(docStored[fieldID], field.Value()) + docStoredTypes[fieldID] = append(docStoredTypes[fieldID], encodeFieldType(field)) + docStoredPos[fieldID] = append(docStoredPos[fieldID], field.ArrayPositions()) } if field.Options().IncludeDocValues() { @@ -252,12 +252,14 @@ func (s *Segment) processDocument(result *index.AnalysisResult) { // now that its been rolled up into docMap, walk that for fieldID, tokenFrequencies := range docMap { + dict := s.Dicts[fieldID] + norm := float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))) for term, tokenFreq := range tokenFrequencies { - pid := s.Dicts[fieldID][term] - 1 + pid := dict[term] - 1 bs := s.Postings[pid] bs.AddInt(int(docNum)) s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency())) - s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID])))) + s.Norms[pid] = append(s.Norms[pid], norm) locationBS := s.PostingsLocs[pid] if len(tokenFreq.Locations) > 0 { locationBS.AddInt(int(docNum)) diff --git a/index/scorch/segment/mem/posting.go b/index/scorch/segment/mem/posting.go index d91a0056..25cbeb45 100644 --- a/index/scorch/segment/mem/posting.go +++ b/index/scorch/segment/mem/posting.go @@ -73,6 +73,7 @@ type PostingsIterator struct { offset int locoffset int actual roaring.IntIterable + reuse Posting } // Next returns the next posting on the postings list, or nil at the end @@ -92,17 +93,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) { i.offset++ allN = i.all.Next() } - rv := &Posting{ + i.reuse = Posting{ iterator: i, docNum: uint64(n), offset: i.offset, locoffset: i.locoffset, hasLoc: i.locations.Contains(n), } - i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset]) i.offset++ - return rv, nil + return &i.reuse, nil } // Posting is a single entry in a postings list diff --git a/index/scorch/segment/zap/build.go b/index/scorch/segment/zap/build.go index 77f18b05..b075496c 100644 --- a/index/scorch/segment/zap/build.go +++ b/index/scorch/segment/zap/build.go @@ -552,8 +552,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, nextPosting, err2 := postingsItr.Next() for err2 == nil && nextPosting != nil { docNum := nextPosting.Number() - docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...) - docTermMap[docNum] = append(docTermMap[docNum], termSeparator) + docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator) nextPosting, err2 = postingsItr.Next() } if err2 != nil { @@ -562,10 +561,10 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, next, err = dictItr.Next() } - if err != nil { return nil, err } + // sort wrt to docIDs var docNumbers docIDRange for k := range docTermMap {