improved implementation to address perf regressions

primary change is going back to sort values be []string and not []interface{}, this avoid allocatiosn converting into the interface{} that sounds obvious, so why didn't we just do that first? because a common (default) sort is score, which is naturally a number, not a string (like terms). converting into the number was also expensive, and the common case. so, this solution also makes the change to NOT put the score into the sort value list. instead you see the dummy value "_score". this is just a placeholder, the actual sort impl knows that field of the sort is the score, and will sort using the actual score. also, several other aspets of the benchmark were cleaned up so that unnecessary allocations do not pollute the cpu profiles Here are the updated benchmarks: $ go test -run=xxx -bench=. -benchmem -cpuprofile=cpu.out BenchmarkTop10of100000Scores-4 3000 465809 ns/op 2548 B/op 33 allocs/op BenchmarkTop100of100000Scores-4 2000 626488 ns/op 21484 B/op 213 allocs/op BenchmarkTop10of1000000Scores-4 300 5107658 ns/op 2560 B/op 33 allocs/op BenchmarkTop100of1000000Scores-4 300 5275403 ns/op 21624 B/op 213 allocs/op PASS ok github.com/blevesearch/bleve/search/collectors 7.188s Prior to this PR, master reported: $ go test -run=xxx -bench=. -benchmem BenchmarkTop10of100000Scores-4 3000 453269 ns/op 360161 B/op 42 allocs/op BenchmarkTop100of100000Scores-4 2000 519131 ns/op 388275 B/op 219 allocs/op BenchmarkTop10of1000000Scores-4 200 7459004 ns/op 4628236 B/op 52 allocs/op BenchmarkTop100of1000000Scores-4 200 8064864 ns/op 4656596 B/op 232 allocs/op PASS ok github.com/blevesearch/bleve/search/collectors 7.385s So, we're pretty close on the smaller datasets, and we scale better on the larger datasets. We also show fewer allocations and bytes in all cases (some of this is artificial due to test cleanup).
2016-08-25 15:47:07 -04:00 · 2016-08-25 15:47:07 -04:00 · 60750c1614
parent ce0b299d6f
commit 60750c1614
9 changed files with 64 additions and 91 deletions
--- a/index_alias_impl_test.go
+++ b/index_alias_impl_test.go
@ -469,7 +469,7 @@ func TestIndexAliasMulti(t *testing.T) {
 				{
 					ID:    "a",
 					Score: 1.0,
-					Sort:  []interface{}{string(score1)},
+					Sort:  []string{string(score1)},
 				},
 			},
 			MaxScore: 1.0,
@ -489,7 +489,7 @@ func TestIndexAliasMulti(t *testing.T) {
 				{
 					ID:    "b",
 					Score: 2.0,
-					Sort:  []interface{}{string(score2)},
+					Sort:  []string{string(score2)},
 				},
 			},
 			MaxScore: 2.0,
@ -577,12 +577,12 @@ func TestIndexAliasMulti(t *testing.T) {
 			{
 				ID:    "b",
 				Score: 2.0,
-				Sort:  []interface{}{string(score2)},
+				Sort:  []string{string(score2)},
 			},
 			{
 				ID:    "a",
 				Score: 1.0,
-				Sort:  []interface{}{string(score1)},
+				Sort:  []string{string(score1)},
 			},
 		},
 		MaxScore: 2.0,
@ -622,7 +622,7 @@ func TestMultiSearchNoError(t *testing.T) {
 				Index: "1",
 				ID:    "a",
 				Score: 1.0,
-				Sort:  []interface{}{string(score1)},
+				Sort:  []string{string(score1)},
 			},
 		},
 		MaxScore: 1.0,
@ -639,7 +639,7 @@ func TestMultiSearchNoError(t *testing.T) {
 				Index: "2",
 				ID:    "b",
 				Score: 2.0,
-				Sort:  []interface{}{string(score2)},
+				Sort:  []string{string(score2)},
 			},
 		},
 		MaxScore: 2.0,
@ -659,13 +659,13 @@ func TestMultiSearchNoError(t *testing.T) {
 				Index: "2",
 				ID:    "b",
 				Score: 2.0,
-				Sort:  []interface{}{string(score2)},
+				Sort:  []string{string(score2)},
 			},
 			{
 				Index: "1",
 				ID:    "a",
 				Score: 1.0,
-				Sort:  []interface{}{string(score1)},
+				Sort:  []string{string(score1)},
 			},
 		},
 		MaxScore: 2.0,
@ -818,7 +818,7 @@ func TestMultiSearchTimeout(t *testing.T) {
 					Index: "1",
 					ID:    "a",
 					Score: 1.0,
-					Sort:  []interface{}{string(score1)},
+					Sort:  []string{string(score1)},
 				},
 			},
 			MaxScore: 1.0,
@ -842,7 +842,7 @@ func TestMultiSearchTimeout(t *testing.T) {
 					Index: "2",
 					ID:    "b",
 					Score: 2.0,
-					Sort:  []interface{}{string(score2)},
+					Sort:  []string{string(score2)},
 				},
 			},
 			MaxScore: 2.0,
@ -944,7 +944,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
 					Index: "1",
 					ID:    "a",
 					Score: 1.0,
-					Sort:  []interface{}{string(score1)},
+					Sort:  []string{string(score1)},
 				},
 			},
 			MaxScore: 1.0,
@ -964,7 +964,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
 					Index: "2",
 					ID:    "b",
 					Score: 2.0,
-					Sort:  []interface{}{string(score2)},
+					Sort:  []string{string(score2)},
 				},
 			},
 			MaxScore: 2.0,
@ -989,7 +989,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
 					Index: "3",
 					ID:    "c",
 					Score: 3.0,
-					Sort:  []interface{}{string(score3)},
+					Sort:  []string{string(score3)},
 				},
 			},
 			MaxScore: 3.0,
@ -1016,13 +1016,13 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
 				Index: "2",
 				ID:    "b",
 				Score: 2.0,
-				Sort:  []interface{}{string(score2)},
+				Sort:  []string{string(score2)},
 			},
 			{
 				Index: "1",
 				ID:    "a",
 				Score: 1.0,
-				Sort:  []interface{}{string(score1)},
+				Sort:  []string{string(score1)},
 			},
 		},
 		MaxScore: 2.0,
@ -1058,7 +1058,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
 					Index: "1",
 					ID:    "a",
 					Score: 1.0,
-					Sort:  []interface{}{string(score1)},
+					Sort:  []string{string(score1)},
 				},
 			},
 			MaxScore: 1.0,
@ -1082,7 +1082,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
 					Index: "2",
 					ID:    "b",
 					Score: 2.0,
-					Sort:  []interface{}{string(score2)},
+					Sort:  []string{string(score2)},
 				},
 			},
 			MaxScore: 2.0,
@ -1107,7 +1107,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
 					Index: "3",
 					ID:    "c",
 					Score: 3.0,
-					Sort:  []interface{}{string(score3)},
+					Sort:  []string{string(score3)},
 				},
 			},
 			MaxScore: 3.0,
@ -1128,7 +1128,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
 					Index: "4",
 					ID:    "d",
 					Score: 4.0,
-					Sort:  []interface{}{string(score4)},
+					Sort:  []string{string(score4)},
 				},
 			},
 			MaxScore: 4.0,
@ -1162,13 +1162,13 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
 				Index: "4",
 				ID:    "d",
 				Score: 4.0,
-				Sort:  []interface{}{string(score4)},
+				Sort:  []string{string(score4)},
 			},
 			{
 				Index: "1",
 				ID:    "a",
 				Score: 1.0,
-				Sort:  []interface{}{string(score1)},
+				Sort:  []string{string(score1)},
 			},
 		},
 		MaxScore: 4.0,
@ -1198,13 +1198,13 @@ func TestMultiSearchCustomSort(t *testing.T) {
 				Index: "1",
 				ID:    "a",
 				Score: 1.0,
-				Sort:  []interface{}{"albert"},
+				Sort:  []string{"albert"},
 			},
 			{
 				Index: "1",
 				ID:    "b",
 				Score: 2.0,
-				Sort:  []interface{}{"crown"},
+				Sort:  []string{"crown"},
 			},
 		},
 		MaxScore: 2.0,
@ -1221,13 +1221,13 @@ func TestMultiSearchCustomSort(t *testing.T) {
 				Index: "2",
 				ID:    "c",
 				Score: 2.5,
-				Sort:  []interface{}{"frank"},
+				Sort:  []string{"frank"},
 			},
 			{
 				Index: "2",
 				ID:    "d",
 				Score: 3.0,
-				Sort:  []interface{}{"zombie"},
+				Sort:  []string{"zombie"},
 			},
 		},
 		MaxScore: 3.0,
@ -1248,25 +1248,25 @@ func TestMultiSearchCustomSort(t *testing.T) {
 				Index: "1",
 				ID:    "a",
 				Score: 1.0,
-				Sort:  []interface{}{"albert"},
+				Sort:  []string{"albert"},
 			},
 			{
 				Index: "1",
 				ID:    "b",
 				Score: 2.0,
-				Sort:  []interface{}{"crown"},
+				Sort:  []string{"crown"},
 			},
 			{
 				Index: "2",
 				ID:    "c",
 				Score: 2.5,
-				Sort:  []interface{}{"frank"},
+				Sort:  []string{"frank"},
 			},
 			{
 				Index: "2",
 				ID:    "d",
 				Score: 3.0,
-				Sort:  []interface{}{"zombie"},
+				Sort:  []string{"zombie"},
 			},
 		},
 		MaxScore: 3.0,
--- a/search/collectors/bench_test.go
+++ b/search/collectors/bench_test.go
@ -13,13 +13,12 @@ import (
 type createCollector func() search.Collector

 func benchHelper(numOfMatches int, cc createCollector, b *testing.B) {
-	dp := search.NewDocumentMatchPool(numOfMatches, 1)
 	matches := make([]*search.DocumentMatch, 0, numOfMatches)
 	for i := 0; i < numOfMatches; i++ {
-		match := dp.Get()
-		match.IndexInternalID = index.IndexInternalID(strconv.Itoa(i))
-		match.Score = rand.Float64()
-		matches = append(matches, match)
+		matches = append(matches, &search.DocumentMatch{
+			IndexInternalID: index.IndexInternalID(strconv.Itoa(i)),
+			Score:           rand.Float64(),
+		})
 	}

 	b.ResetTimer()
--- a/search/collectors/collector_heap.go
+++ b/search/collectors/collector_heap.go
@ -38,7 +38,9 @@ var COLLECT_CHECK_DONE_EVERY = uint64(1024)

 func NewHeapCollector(size int, skip int, sort search.SortOrder) *HeapCollector {
 	hc := &HeapCollector{size: size, skip: skip, sort: sort}
-	hc.results = make(search.DocumentMatchCollection, 0, size+skip)
+	// pre-allocate space on the heap, we need size+skip results
+	// +1 additional while figuring out which to evict
+	hc.results = make(search.DocumentMatchCollection, 0, size+skip+1)
 	heap.Init(hc)

 	// these lookups traverse an interface, so do once up-front
--- a/search/collectors/search_test.go
+++ b/search/collectors/search_test.go
@ -22,7 +22,9 @@ type stubSearcher struct {

 func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
 	if ss.index < len(ss.matches) {
-		rv := ss.matches[ss.index]
+		rv := ctx.DocumentMatchPool.Get()
+		rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
+		rv.Score = ss.matches[ss.index].Score
 		ss.index++
 		return rv, nil
 	}
@ -35,7 +37,9 @@ func (ss *stubSearcher) Advance(ctx *search.SearchContext, ID index.IndexInterna
 		ss.index++
 	}
 	if ss.index < len(ss.matches) {
-		rv := ss.matches[ss.index]
+		rv := ctx.DocumentMatchPool.Get()
+		rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
+		rv.Score = ss.matches[ss.index].Score
 		ss.index++
 		return rv, nil
 	}
--- a/search/pool.go
+++ b/search/pool.go
@ -37,7 +37,7 @@ func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
 	startBlock := make([]DocumentMatch, size)
 	// make these initial instances available
 	for i := range startBlock {
-		startBlock[i].Sort = make([]interface{}, 0, sortsize)
+		startBlock[i].Sort = make([]string, 0, sortsize)
 		avail = append(avail, &startBlock[i])
 	}
 	return &DocumentMatchPool{
--- a/search/scorers/scorer_constant_test.go
+++ b/search/scorers/scorer_constant_test.go
@ -47,7 +47,7 @@ func TestConstantScorer(t *testing.T) {
 					Value:   1.0,
 					Message: "ConstantScore()",
 				},
-				Sort: []interface{}{},
+				Sort: []string{},
 			},
 		},
 	}
@ -83,7 +83,7 @@ func TestConstantScorerWithQueryNorm(t *testing.T) {
 			result: &search.DocumentMatch{
 				IndexInternalID: index.IndexInternalID("one"),
 				Score:           2.0,
-				Sort:            []interface{}{},
+				Sort:            []string{},
 				Expl: &search.Explanation{
 					Value:   2.0,
 					Message: "weight(^1.000000), product of:",
--- a/search/scorers/scorer_term_test.go
+++ b/search/scorers/scorer_term_test.go
@ -50,7 +50,7 @@ func TestTermScorer(t *testing.T) {
 			result: &search.DocumentMatch{
 				IndexInternalID: index.IndexInternalID("one"),
 				Score:           math.Sqrt(1.0) * idf,
-				Sort:            []interface{}{},
+				Sort:            []string{},
 				Expl: &search.Explanation{
 					Value:   math.Sqrt(1.0) * idf,
 					Message: "fieldWeight(desc:beer in one), product of:",
@ -92,7 +92,7 @@ func TestTermScorer(t *testing.T) {
 			result: &search.DocumentMatch{
 				IndexInternalID: index.IndexInternalID("one"),
 				Score:           math.Sqrt(1.0) * idf,
-				Sort:            []interface{}{},
+				Sort:            []string{},
 				Expl: &search.Explanation{
 					Value:   math.Sqrt(1.0) * idf,
 					Message: "fieldWeight(desc:beer in one), product of:",
@ -123,7 +123,7 @@ func TestTermScorer(t *testing.T) {
 			result: &search.DocumentMatch{
 				IndexInternalID: index.IndexInternalID("one"),
 				Score:           math.Sqrt(65) * idf,
-				Sort:            []interface{}{},
+				Sort:            []string{},
 				Expl: &search.Explanation{
 					Value:   math.Sqrt(65) * idf,
 					Message: "fieldWeight(desc:beer in one), product of:",
@ -190,7 +190,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
 			result: &search.DocumentMatch{
 				IndexInternalID: index.IndexInternalID("one"),
 				Score:           math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
-				Sort:            []interface{}{},
+				Sort:            []string{},
 				Expl: &search.Explanation{
 					Value:   math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
 					Message: "weight(desc:beer^3.000000 in one), product of:",
--- a/search/search.go
+++ b/search/search.go
@ -65,7 +65,7 @@ type DocumentMatch struct {
 	Expl            *Explanation          `json:"explanation,omitempty"`
 	Locations       FieldTermLocationMap  `json:"locations,omitempty"`
 	Fragments       FieldFragmentMap      `json:"fragments,omitempty"`
-	Sort            []interface{}         `json:"sort,omitempty"`
+	Sort            []string              `json:"sort,omitempty"`

 	// Fields contains the values for document fields listed in
 	// SearchRequest.Fields. Text fields are returned as strings, numeric
--- a/search/sort.go
+++ b/search/sort.go
@ -22,7 +22,7 @@ var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
 var LowTerm = string([]byte{0x00})

 type SearchSort interface {
-	Value(a *DocumentMatch) interface{}
+	Value(a *DocumentMatch) string
 	Descending() bool

 	RequiresDocID() bool
@ -171,36 +171,17 @@ func (so SortOrder) Value(doc *DocumentMatch) {
 func (so SortOrder) Compare(i, j *DocumentMatch) int {
 	// compare the documents on all search sorts until a differences is found
 	for x, soi := range so {
-		iVal := i.Sort[x]
-		jVal := j.Sort[x]
 		c := 0
-		switch iVal := iVal.(type) {
-		case string:
-			switch jVal := jVal.(type) {
-			case string:
-				// both string
-				c = strings.Compare(iVal, jVal)
-			case float64:
-				// i is string, j is number, i sorts higher
-				ji := numeric_util.Float64ToInt64(jVal)
-				jt, _ := numeric_util.NewPrefixCodedInt64(ji, 0)
-				c = strings.Compare(iVal, string(jt))
-			}
-		case float64:
-			switch jVal := jVal.(type) {
-			case string:
-				// i is number, j is string
-				ii := numeric_util.Float64ToInt64(iVal)
-				it, _ := numeric_util.NewPrefixCodedInt64(ii, 0)
-				c = strings.Compare(string(it), jVal)
-			case float64:
-				// numeric comparison
-				if iVal < jVal {
-					c = -1
-				} else if iVal > jVal {
-					c = 1
-				}
+		if soi.RequiresScoring() {
+			if i.Score < j.Score {
+				c = -1
+			} else if i.Score > j.Score {
+				c = 1
 			}
+		} else {
+			iVal := i.Sort[x]
+			jVal := j.Sort[x]
+			c = strings.Compare(iVal, jVal)
 		}

 		if c == 0 {
@ -301,23 +282,10 @@ type SortField struct {
 }

 // Value returns the sort value of the DocumentMatch
-func (s *SortField) Value(i *DocumentMatch) interface{} {
+func (s *SortField) Value(i *DocumentMatch) string {
 	iTerms := i.CachedFieldTerms[s.Field]
 	iTerms = s.filterTermsByType(iTerms)
 	iTerm := s.filterTermsByMode(iTerms)
-	if s.Type == SortFieldAsNumber || s.Type == SortFieldAsDate {
-		// explicitly asked for numeric sort
-		rv, _ := numeric_util.PrefixCoded(iTerm).Int64()
-		return rv
-	} else if s.Type == SortFieldAuto {
-		// asked for auto, looks like a number
-		valid, shift := numeric_util.ValidPrefixCodedTerm(iTerm)
-		if valid && shift == 0 {
-			ri, _ := numeric_util.PrefixCoded(iTerm).Int64()
-			rv := numeric_util.Int64ToFloat64(ri)
-			return rv
-		}
-	}
 	return iTerm
 }

@ -447,7 +415,7 @@ type SortDocID struct {
 }

 // Value returns the sort value of the DocumentMatch
-func (s *SortDocID) Value(i *DocumentMatch) interface{} {
+func (s *SortDocID) Value(i *DocumentMatch) string {
 	return i.ID
 }

@ -478,8 +446,8 @@ type SortScore struct {
 }

 // Value returns the sort value of the DocumentMatch
-func (s *SortScore) Value(i *DocumentMatch) interface{} {
-	return i.Score
+func (s *SortScore) Value(i *DocumentMatch) string {
+	return "_score"
 }

 // Descending determines the order of the sort