Merge pull request #795 from steveyen/scorch-mem-optimizations

More scorch micro optimizations when processing mem segments
2018-03-05 12:02:15 -08:00 · 2018-03-05 12:02:15 -08:00 · c5ab1f61d7
parent 4ebf3f1d44 884da6f93a
commit c5ab1f61d7
3 changed files with 16 additions and 15 deletions
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@ -222,12 +222,6 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		}
 	}

-	storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
-		s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
-		s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
-		s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
-	}
-
 	// walk each composite field
 	for _, field := range result.Document.CompositeFields {
 		fieldID := uint16(s.getOrDefineField(field.Name()))
@ -235,6 +229,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		processField(fieldID, field.Name(), l, tf)
 	}

+	docStored := s.Stored[docNum]
+	docStoredTypes := s.StoredTypes[docNum]
+	docStoredPos := s.StoredPos[docNum]
+
 	// walk each field
 	for i, field := range result.Document.Fields {
 		fieldID := uint16(s.getOrDefineField(field.Name()))
@ -242,7 +240,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 		tf := result.Analyzed[i]
 		processField(fieldID, field.Name(), l, tf)
 		if field.Options().IsStored() {
-			storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
+			docStored[fieldID] = append(docStored[fieldID], field.Value())
+			docStoredTypes[fieldID] = append(docStoredTypes[fieldID], encodeFieldType(field))
+			docStoredPos[fieldID] = append(docStoredPos[fieldID], field.ArrayPositions())
 		}

 		if field.Options().IncludeDocValues() {
@ -252,12 +252,14 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {

 	// now that its been rolled up into docMap, walk that
 	for fieldID, tokenFrequencies := range docMap {
+		dict := s.Dicts[fieldID]
+		norm := float32(1.0/math.Sqrt(float64(fieldLens[fieldID])))
 		for term, tokenFreq := range tokenFrequencies {
-			pid := s.Dicts[fieldID][term] - 1
+			pid := dict[term] - 1
 			bs := s.Postings[pid]
 			bs.AddInt(int(docNum))
 			s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
-			s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
+			s.Norms[pid] = append(s.Norms[pid], norm)
 			locationBS := s.PostingsLocs[pid]
 			if len(tokenFreq.Locations) > 0 {
 				locationBS.AddInt(int(docNum))
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@ -73,6 +73,7 @@ type PostingsIterator struct {
 	offset    int
 	locoffset int
 	actual    roaring.IntIterable
+	reuse     Posting
 }

 // Next returns the next posting on the postings list, or nil at the end
@ -92,17 +93,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
 		i.offset++
 		allN = i.all.Next()
 	}
-	rv := &Posting{
+	i.reuse = Posting{
 		iterator:  i,
 		docNum:    uint64(n),
 		offset:    i.offset,
 		locoffset: i.locoffset,
 		hasLoc:    i.locations.Contains(n),
 	}
-
 	i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
 	i.offset++
-	return rv, nil
+	return &i.reuse, nil
 }

 // Posting is a single entry in a postings list
--- a/index/scorch/segment/zap/build.go
+++ b/index/scorch/segment/zap/build.go
@ -552,8 +552,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
 			nextPosting, err2 := postingsItr.Next()
 			for err2 == nil && nextPosting != nil {
 				docNum := nextPosting.Number()
-				docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...)
-				docTermMap[docNum] = append(docTermMap[docNum], termSeparator)
+				docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator)
 				nextPosting, err2 = postingsItr.Next()
 			}
 			if err2 != nil {
@ -562,10 +561,10 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,

 			next, err = dictItr.Next()
 		}
-
 		if err != nil {
 			return nil, err
 		}
+
 		// sort wrt to docIDs
 		var docNumbers docIDRange
 		for k := range docTermMap {