0
0
Fork 0

improved implementation to address perf regressions

primary change is going back to sort values be []string
and not []interface{}, this avoid allocatiosn converting
into the interface{}

that sounds obvious, so why didn't we just do that first?
because a common (default) sort is score, which is naturally
a number, not a string (like terms).  converting into the
number was also expensive, and the common case.

so, this solution also makes the change to NOT put the score
into the sort value list.  instead you see the dummy value
"_score".  this is just a placeholder, the actual sort impl
knows that field of the sort is the score, and will sort
using the actual score.

also, several other aspets of the benchmark were cleaned up
so that unnecessary allocations do not pollute the cpu profiles

Here are the updated benchmarks:

$ go test -run=xxx -bench=. -benchmem -cpuprofile=cpu.out
BenchmarkTop10of100000Scores-4     	    3000	    465809 ns/op	    2548 B/op	      33 allocs/op
BenchmarkTop100of100000Scores-4    	    2000	    626488 ns/op	   21484 B/op	     213 allocs/op
BenchmarkTop10of1000000Scores-4    	     300	   5107658 ns/op	    2560 B/op	      33 allocs/op
BenchmarkTop100of1000000Scores-4   	     300	   5275403 ns/op	   21624 B/op	     213 allocs/op
PASS
ok  	github.com/blevesearch/bleve/search/collectors	7.188s

Prior to this PR, master reported:

$ go test -run=xxx -bench=. -benchmem
BenchmarkTop10of100000Scores-4          3000        453269 ns/op      360161 B/op         42 allocs/op
BenchmarkTop100of100000Scores-4         2000        519131 ns/op      388275 B/op        219 allocs/op
BenchmarkTop10of1000000Scores-4          200       7459004 ns/op     4628236 B/op         52 allocs/op
BenchmarkTop100of1000000Scores-4         200       8064864 ns/op     4656596 B/op        232 allocs/op
PASS
ok      github.com/blevesearch/bleve/search/collectors  7.385s

So, we're pretty close on the smaller datasets, and we scale better on the larger datasets.
We also show fewer allocations and bytes in all cases (some of this is artificial due to test cleanup).
This commit is contained in:
Marty Schoch 2016-08-25 15:47:07 -04:00
parent ce0b299d6f
commit 60750c1614
9 changed files with 64 additions and 91 deletions

View File

@ -469,7 +469,7 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -489,7 +489,7 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "b",
Score: 2.0,
Sort: []interface{}{string(score2)},
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -577,12 +577,12 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "b",
Score: 2.0,
Sort: []interface{}{string(score2)},
Sort: []string{string(score2)},
},
{
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
@ -622,7 +622,7 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -639,7 +639,7 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []interface{}{string(score2)},
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -659,13 +659,13 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []interface{}{string(score2)},
Sort: []string{string(score2)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
@ -818,7 +818,7 @@ func TestMultiSearchTimeout(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -842,7 +842,7 @@ func TestMultiSearchTimeout(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []interface{}{string(score2)},
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -944,7 +944,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -964,7 +964,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []interface{}{string(score2)},
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -989,7 +989,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "3",
ID: "c",
Score: 3.0,
Sort: []interface{}{string(score3)},
Sort: []string{string(score3)},
},
},
MaxScore: 3.0,
@ -1016,13 +1016,13 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []interface{}{string(score2)},
Sort: []string{string(score2)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
@ -1058,7 +1058,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -1082,7 +1082,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []interface{}{string(score2)},
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -1107,7 +1107,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "3",
ID: "c",
Score: 3.0,
Sort: []interface{}{string(score3)},
Sort: []string{string(score3)},
},
},
MaxScore: 3.0,
@ -1128,7 +1128,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "4",
ID: "d",
Score: 4.0,
Sort: []interface{}{string(score4)},
Sort: []string{string(score4)},
},
},
MaxScore: 4.0,
@ -1162,13 +1162,13 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "4",
ID: "d",
Score: 4.0,
Sort: []interface{}{string(score4)},
Sort: []string{string(score4)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{string(score1)},
Sort: []string{string(score1)},
},
},
MaxScore: 4.0,
@ -1198,13 +1198,13 @@ func TestMultiSearchCustomSort(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{"albert"},
Sort: []string{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []interface{}{"crown"},
Sort: []string{"crown"},
},
},
MaxScore: 2.0,
@ -1221,13 +1221,13 @@ func TestMultiSearchCustomSort(t *testing.T) {
Index: "2",
ID: "c",
Score: 2.5,
Sort: []interface{}{"frank"},
Sort: []string{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []interface{}{"zombie"},
Sort: []string{"zombie"},
},
},
MaxScore: 3.0,
@ -1248,25 +1248,25 @@ func TestMultiSearchCustomSort(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []interface{}{"albert"},
Sort: []string{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []interface{}{"crown"},
Sort: []string{"crown"},
},
{
Index: "2",
ID: "c",
Score: 2.5,
Sort: []interface{}{"frank"},
Sort: []string{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []interface{}{"zombie"},
Sort: []string{"zombie"},
},
},
MaxScore: 3.0,

View File

@ -13,13 +13,12 @@ import (
type createCollector func() search.Collector
func benchHelper(numOfMatches int, cc createCollector, b *testing.B) {
dp := search.NewDocumentMatchPool(numOfMatches, 1)
matches := make([]*search.DocumentMatch, 0, numOfMatches)
for i := 0; i < numOfMatches; i++ {
match := dp.Get()
match.IndexInternalID = index.IndexInternalID(strconv.Itoa(i))
match.Score = rand.Float64()
matches = append(matches, match)
matches = append(matches, &search.DocumentMatch{
IndexInternalID: index.IndexInternalID(strconv.Itoa(i)),
Score: rand.Float64(),
})
}
b.ResetTimer()

View File

@ -38,7 +38,9 @@ var COLLECT_CHECK_DONE_EVERY = uint64(1024)
func NewHeapCollector(size int, skip int, sort search.SortOrder) *HeapCollector {
hc := &HeapCollector{size: size, skip: skip, sort: sort}
hc.results = make(search.DocumentMatchCollection, 0, size+skip)
// pre-allocate space on the heap, we need size+skip results
// +1 additional while figuring out which to evict
hc.results = make(search.DocumentMatchCollection, 0, size+skip+1)
heap.Init(hc)
// these lookups traverse an interface, so do once up-front

View File

@ -22,7 +22,9 @@ type stubSearcher struct {
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if ss.index < len(ss.matches) {
rv := ss.matches[ss.index]
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
rv.Score = ss.matches[ss.index].Score
ss.index++
return rv, nil
}
@ -35,7 +37,9 @@ func (ss *stubSearcher) Advance(ctx *search.SearchContext, ID index.IndexInterna
ss.index++
}
if ss.index < len(ss.matches) {
rv := ss.matches[ss.index]
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
rv.Score = ss.matches[ss.index].Score
ss.index++
return rv, nil
}

View File

@ -37,7 +37,7 @@ func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
startBlock := make([]DocumentMatch, size)
// make these initial instances available
for i := range startBlock {
startBlock[i].Sort = make([]interface{}, 0, sortsize)
startBlock[i].Sort = make([]string, 0, sortsize)
avail = append(avail, &startBlock[i])
}
return &DocumentMatchPool{

View File

@ -47,7 +47,7 @@ func TestConstantScorer(t *testing.T) {
Value: 1.0,
Message: "ConstantScore()",
},
Sort: []interface{}{},
Sort: []string{},
},
},
}
@ -83,7 +83,7 @@ func TestConstantScorerWithQueryNorm(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: 2.0,
Sort: []interface{}{},
Sort: []string{},
Expl: &search.Explanation{
Value: 2.0,
Message: "weight(^1.000000), product of:",

View File

@ -50,7 +50,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf,
Sort: []interface{}{},
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
@ -92,7 +92,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf,
Sort: []interface{}{},
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
@ -123,7 +123,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(65) * idf,
Sort: []interface{}{},
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(65) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
@ -190,7 +190,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Sort: []interface{}{},
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Message: "weight(desc:beer^3.000000 in one), product of:",

View File

@ -65,7 +65,7 @@ type DocumentMatch struct {
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []interface{} `json:"sort,omitempty"`
Sort []string `json:"sort,omitempty"`
// Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric

View File

@ -22,7 +22,7 @@ var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
var LowTerm = string([]byte{0x00})
type SearchSort interface {
Value(a *DocumentMatch) interface{}
Value(a *DocumentMatch) string
Descending() bool
RequiresDocID() bool
@ -171,36 +171,17 @@ func (so SortOrder) Value(doc *DocumentMatch) {
func (so SortOrder) Compare(i, j *DocumentMatch) int {
// compare the documents on all search sorts until a differences is found
for x, soi := range so {
iVal := i.Sort[x]
jVal := j.Sort[x]
c := 0
switch iVal := iVal.(type) {
case string:
switch jVal := jVal.(type) {
case string:
// both string
c = strings.Compare(iVal, jVal)
case float64:
// i is string, j is number, i sorts higher
ji := numeric_util.Float64ToInt64(jVal)
jt, _ := numeric_util.NewPrefixCodedInt64(ji, 0)
c = strings.Compare(iVal, string(jt))
}
case float64:
switch jVal := jVal.(type) {
case string:
// i is number, j is string
ii := numeric_util.Float64ToInt64(iVal)
it, _ := numeric_util.NewPrefixCodedInt64(ii, 0)
c = strings.Compare(string(it), jVal)
case float64:
// numeric comparison
if iVal < jVal {
c = -1
} else if iVal > jVal {
c = 1
}
if soi.RequiresScoring() {
if i.Score < j.Score {
c = -1
} else if i.Score > j.Score {
c = 1
}
} else {
iVal := i.Sort[x]
jVal := j.Sort[x]
c = strings.Compare(iVal, jVal)
}
if c == 0 {
@ -301,23 +282,10 @@ type SortField struct {
}
// Value returns the sort value of the DocumentMatch
func (s *SortField) Value(i *DocumentMatch) interface{} {
func (s *SortField) Value(i *DocumentMatch) string {
iTerms := i.CachedFieldTerms[s.Field]
iTerms = s.filterTermsByType(iTerms)
iTerm := s.filterTermsByMode(iTerms)
if s.Type == SortFieldAsNumber || s.Type == SortFieldAsDate {
// explicitly asked for numeric sort
rv, _ := numeric_util.PrefixCoded(iTerm).Int64()
return rv
} else if s.Type == SortFieldAuto {
// asked for auto, looks like a number
valid, shift := numeric_util.ValidPrefixCodedTerm(iTerm)
if valid && shift == 0 {
ri, _ := numeric_util.PrefixCoded(iTerm).Int64()
rv := numeric_util.Int64ToFloat64(ri)
return rv
}
}
return iTerm
}
@ -447,7 +415,7 @@ type SortDocID struct {
}
// Value returns the sort value of the DocumentMatch
func (s *SortDocID) Value(i *DocumentMatch) interface{} {
func (s *SortDocID) Value(i *DocumentMatch) string {
return i.ID
}
@ -478,8 +446,8 @@ type SortScore struct {
}
// Value returns the sort value of the DocumentMatch
func (s *SortScore) Value(i *DocumentMatch) interface{} {
return i.Score
func (s *SortScore) Value(i *DocumentMatch) string {
return "_score"
}
// Descending determines the order of the sort