scorch fix for TestSortMatchSearch

The cachedDocs preparation has to happen for all docs in the field, not just on the currently requested docNum. Also, as part of this commit, there's a loop optimization where we no longer use bytes.Split() on the terms buffer, thus avoiding garbage creation.
2017-12-14 13:16:06 -08:00 · 2017-12-14 13:16:06 -08:00 · a8884e1011
parent 5bca9f06b9
commit a8884e1011
2 changed files with 15 additions and 17 deletions
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@ -413,7 +413,7 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,

 	ss := i.segment[segmentIndex]

-	err = ss.cachedDocs.prepareFields(localDocNum, fields, ss)
+	err = ss.cachedDocs.prepareFields(fields, ss)
 	if err != nil {
 		return err
 	}
@ -421,11 +421,13 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
 	for _, field := range fields {
 		if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
 			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
-				terms := bytes.SplitN(tlist, TermSeparatorSplitSlice, -1)
-				for _, term := range terms {
-					if len(term) > 0 {
-						visitor(field, term)
+				for {
+					i := bytes.Index(tlist, TermSeparatorSplitSlice)
+					if i < 0 {
+						break
 					}
+					visitor(field, tlist[0:i])
+					tlist = tlist[i+1:]
 				}
 			}
 		}
--- a/index/scorch/snapshot_segment.go
+++ b/index/scorch/snapshot_segment.go
@ -171,8 +171,7 @@ type cachedFieldDocs struct {
 	docs    map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
 }

-func (cfd *cachedFieldDocs) prepareFields(docNum uint64, field string,
-	ss *SegmentSnapshot) {
+func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
 	defer close(cfd.readyCh)

 	dict, err := ss.segment.Dictionary(field)
@ -183,7 +182,7 @@ func (cfd *cachedFieldDocs) prepareFields(docNum uint64, field string,

 	dictItr := dict.Iterator()
 	next, err := dictItr.Next()
-	for next != nil && err == nil {
+	for err == nil && next != nil {
 		postings, err1 := dict.PostingsList(next.Term, nil)
 		if err1 != nil {
 			cfd.err = err1
@ -192,12 +191,10 @@ func (cfd *cachedFieldDocs) prepareFields(docNum uint64, field string,

 		postingsItr := postings.Iterator()
 		nextPosting, err2 := postingsItr.Next()
-		for err2 == nil && nextPosting != nil && nextPosting.Number() <= docNum {
-			if nextPosting.Number() == docNum {
-				// got what we're looking for
-				cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
-				cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
-			}
+		for err2 == nil && nextPosting != nil {
+			docNum := nextPosting.Number()
+			cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
+			cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
 			nextPosting, err2 = postingsItr.Next()
 		}

@ -220,8 +217,7 @@ type cachedDocs struct {
 	cache map[string]*cachedFieldDocs // Keyed by field
 }

-func (c *cachedDocs) prepareFields(docNum uint64, wantedFields []string,
-	ss *SegmentSnapshot) error {
+func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
 	c.m.Lock()
 	if c.cache == nil {
 		c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
@ -235,7 +231,7 @@ func (c *cachedDocs) prepareFields(docNum uint64, wantedFields []string,
 				docs:    make(map[uint64][]byte),
 			}

-			go c.cache[field].prepareFields(docNum, field, ss)
+			go c.cache[field].prepareFields(field, ss)
 		}
 	}