optimize DisjunctionSearcher.Next()

This change simplifies and removes the DisjunctionSearcher.currentID tracking, and instead utilizes the the matching/matchingIdxs slices for tracking the required information. As the core of the optimization, the previous code used two loop passses to compare the internal ID's to the currentID field. This commit instead optimizes to have a single pass to both compare the internalID's and to also maintain the matching/matchingIdxs arrays. On my dev box, using a bleve-query benchmark on a wiki dataset, with query-string of "text:afternoon text:coffee", the previous code had throughput of 958qps, and this commit has 1174qps.
2016-09-20 19:22:37 -07:00 · 2016-09-20 19:22:37 -07:00 · e344582021
parent 75281a1f9f
commit e344582021
1 changed files with 33 additions and 23 deletions
--- a/search/searchers/search_disjunction.go
+++ b/search/searchers/search_disjunction.go
@ -29,7 +29,6 @@ type DisjunctionSearcher struct {
 	searchers    OrderedSearcherList
 	queryNorm    float64
 	currs        []*search.DocumentMatch
-	currentID    index.IndexInternalID
 	scorer       *scorers.DisjunctionQueryScorer
 	min          int
 	matching     []*search.DocumentMatch
@ -100,19 +99,38 @@ func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
 		}
 	}

-	s.currentID = s.nextSmallestID()
+	s.updateMatches()
 	s.initialized = true
 	return nil
 }

-func (s *DisjunctionSearcher) nextSmallestID() index.IndexInternalID {
-	var rv index.IndexInternalID
-	for _, curr := range s.currs {
-		if curr != nil && (curr.IndexInternalID.Compare(rv) < 0 || rv == nil) {
-			rv = curr.IndexInternalID
+func (s *DisjunctionSearcher) updateMatches() {
+	matching := s.matching[:0]
+	matchingIdxs := s.matchingIdxs[:0]
+
+	for i, curr := range s.currs {
+		if curr == nil {
+			continue
 		}
+
+		if len(matching) > 0 {
+			cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
+			if cmp > 0 {
+				continue
+			}
+
+			if cmp < 0 {
+				matching = matching[:0]
+				matchingIdxs = matchingIdxs[:0]
+			}
+		}
+
+		matching = append(matching, curr)
+		matchingIdxs = append(matchingIdxs, i)
 	}
-	return rv
+
+	s.matching = matching
+	s.matchingIdxs = matchingIdxs
 }

 func (s *DisjunctionSearcher) Weight() float64 {
@ -140,24 +158,15 @@ func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
 	var rv *search.DocumentMatch

 	found := false
-	for !found && s.currentID != nil {
-		matching := s.matching[:0]
-		matchingIdxs := s.matchingIdxs[:0]
-		for i, curr := range s.currs {
-			if curr != nil && curr.IndexInternalID.Equals(s.currentID) {
-				matching = append(matching, curr)
-				matchingIdxs = append(matchingIdxs, i)
-			}
-		}
-
-		if len(matching) >= s.min {
+	for !found && len(s.matching) > 0 {
+		if len(s.matching) >= s.min {
 			found = true
 			// score this match
-			rv = s.scorer.Score(ctx, matching, len(matching), len(s.searchers))
+			rv = s.scorer.Score(ctx, s.matching, len(s.matching), len(s.searchers))
 		}

 		// invoke next on all the matching searchers
-		for _, i := range matchingIdxs {
+		for _, i := range s.matchingIdxs {
 			searcher := s.searchers[i]
 			if s.currs[i] != rv {
 				ctx.DocumentMatchPool.Put(s.currs[i])
@ -167,7 +176,8 @@ func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
 				return nil, err
 			}
 		}
-		s.currentID = s.nextSmallestID()
+
+		s.updateMatches()
 	}
 	return rv, nil
 }
@ -191,7 +201,7 @@ func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexI
 		}
 	}

-	s.currentID = s.nextSmallestID()
+	s.updateMatches()

 	return s.Next(ctx)
 }