bleve/search/scorer/scorer_constant_test.go

//  Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// 		http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package scorer

import (
	"reflect"
	"testing"

	"github.com/blevesearch/bleve/index"
	"github.com/blevesearch/bleve/search"
)

func TestConstantScorer(t *testing.T) {

	scorer := NewConstantScorer(1, 1, search.SearcherOptions{Explain: true})

	tests := []struct {
		termMatch *index.TermFieldDoc
		result    *search.DocumentMatch
	}{
		// test some simple math
		{
			termMatch: &index.TermFieldDoc{
				ID:   index.IndexInternalID("one"),
				Freq: 1,
				Norm: 1.0,
				Vectors: []*index.TermFieldVector{
					{
						Field: "desc",
						Pos:   1,
						Start: 0,
						End:   4,
					},
				},
			},
			result: &search.DocumentMatch{
				IndexInternalID: index.IndexInternalID("one"),
				Score:           1.0,
				Expl: &search.Explanation{
					Value:   1.0,
					Message: "ConstantScore()",
				},
				Sort: []string{},
			},
		},
	}

	for _, test := range tests {
		ctx := &search.SearchContext{
			DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
		}
		actual := scorer.Score(ctx, test.termMatch.ID)

		if !reflect.DeepEqual(actual, test.result) {
			t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
		}
	}

}

func TestConstantScorerWithQueryNorm(t *testing.T) {

	scorer := NewConstantScorer(1, 1, search.SearcherOptions{Explain: true})
	scorer.SetQueryNorm(2.0)

	tests := []struct {
		termMatch *index.TermFieldDoc
		result    *search.DocumentMatch
	}{
		{
			termMatch: &index.TermFieldDoc{
				ID:   index.IndexInternalID("one"),
				Freq: 1,
				Norm: 1.0,
			},
			result: &search.DocumentMatch{
				IndexInternalID: index.IndexInternalID("one"),
				Score:           2.0,
				Sort:            []string{},
				Expl: &search.Explanation{
					Value:   2.0,
					Message: "weight(^1.000000), product of:",
					Children: []*search.Explanation{
						{
							Value:   2.0,
							Message: "ConstantScore()^1.000000, product of:",
							Children: []*search.Explanation{
								{
									Value:   1,
									Message: "boost",
								},
								{
									Value:   2,
									Message: "queryNorm",
								},
							},
						},
						{
							Value:   1.0,
							Message: "ConstantScore()",
						},
					},
				},
			},
		},
	}

	for _, test := range tests {
		ctx := &search.SearchContext{
			DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
		}
		actual := scorer.Score(ctx, test.termMatch.ID)

		if !reflect.DeepEqual(actual, test.result) {
			t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
		}
	}

}
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`// Copyright (c) 2013 Couchbase, Inc.`
nicer formatting of license header 2016-10-02 16:13:14 +02:00			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00
actually rename packages to singular, not just directory name 2016-10-02 16:29:39 +02:00			`package scorer`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00
			`import (`
			`"reflect"`
			`"testing"`

			`"github.com/blevesearch/bleve/index"`
			`"github.com/blevesearch/bleve/search"`
			`)`

			`func TestConstantScorer(t *testing.T) {`

API change: optional SearchRequest.IncludeLocations flag This is a change in search result behavior in that location information is no longer provided by default with search results. Although this looks like a wide-ranging change, it's mostly a mechanical replacement of the explain bool flag with a new search.SearcherOptions struct, which holds both the Explain bool flag and the IncludeTermVectors bool flag. 2017-01-06 02:49:45 +01:00			`scorer := NewConstantScorer(1, 1, search.SearcherOptions{Explain: true})`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00
			`tests := []struct {`
			`termMatch *index.TermFieldDoc`
switch back to single DocumentMatch struct instead of separate DocumentMatch/DocumentMatchInternal rules are simple, everything operates on the IndexInternalID field until the results are returned, then ID is set correctly the IndexInternalID field is not exported to JSON 2016-08-01 20:58:02 +02:00			`result *search.DocumentMatch`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`}{`
			`// test some simple math`
			`{`
			`termMatch: &index.TermFieldDoc{`
changed approach IndexInternalID is now []byte this is still opaque, and should still work for any future index implementations as it is a least common denominator choice, all implementations must internally represent the id as []byte at some point for storage to disk 2016-08-01 20:26:50 +02:00			`ID: index.IndexInternalID("one"),`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Freq: 1,`
			`Norm: 1.0,`
			`Vectors: []*index.TermFieldVector{`
gofmt simplifications 2016-04-03 03:54:33 +02:00			`{`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Field: "desc",`
			`Pos: 1,`
			`Start: 0,`
			`End: 4,`
			`},`
			`},`
			`},`
switch back to single DocumentMatch struct instead of separate DocumentMatch/DocumentMatchInternal rules are simple, everything operates on the IndexInternalID field until the results are returned, then ID is set correctly the IndexInternalID field is not exported to JSON 2016-08-01 20:58:02 +02:00			`result: &search.DocumentMatch{`
			`IndexInternalID: index.IndexInternalID("one"),`
			`Score: 1.0,`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Expl: &search.Explanation{`
			`Value: 1.0,`
			`Message: "ConstantScore()",`
			`},`
improved implementation to address perf regressions primary change is going back to sort values be []string and not []interface{}, this avoid allocatiosn converting into the interface{} that sounds obvious, so why didn't we just do that first? because a common (default) sort is score, which is naturally a number, not a string (like terms). converting into the number was also expensive, and the common case. so, this solution also makes the change to NOT put the score into the sort value list. instead you see the dummy value "_score". this is just a placeholder, the actual sort impl knows that field of the sort is the score, and will sort using the actual score. also, several other aspets of the benchmark were cleaned up so that unnecessary allocations do not pollute the cpu profiles Here are the updated benchmarks: $ go test -run=xxx -bench=. -benchmem -cpuprofile=cpu.out BenchmarkTop10of100000Scores-4 3000 465809 ns/op 2548 B/op 33 allocs/op BenchmarkTop100of100000Scores-4 2000 626488 ns/op 21484 B/op 213 allocs/op BenchmarkTop10of1000000Scores-4 300 5107658 ns/op 2560 B/op 33 allocs/op BenchmarkTop100of1000000Scores-4 300 5275403 ns/op 21624 B/op 213 allocs/op PASS ok github.com/blevesearch/bleve/search/collectors 7.188s Prior to this PR, master reported: $ go test -run=xxx -bench=. -benchmem BenchmarkTop10of100000Scores-4 3000 453269 ns/op 360161 B/op 42 allocs/op BenchmarkTop100of100000Scores-4 2000 519131 ns/op 388275 B/op 219 allocs/op BenchmarkTop10of1000000Scores-4 200 7459004 ns/op 4628236 B/op 52 allocs/op BenchmarkTop100of1000000Scores-4 200 8064864 ns/op 4656596 B/op 232 allocs/op PASS ok github.com/blevesearch/bleve/search/collectors 7.385s So, we're pretty close on the smaller datasets, and we scale better on the larger datasets. We also show fewer allocations and bytes in all cases (some of this is artificial due to test cleanup). 2016-08-25 21:47:07 +02:00			`Sort: []string{},`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`},`
			`},`
			`}`

			`for _, test := range tests {`
refactor search package to reuse DocumentMatch and ID []byte's the motivation for this commit is long and detailed and has been documented externally here: https://gist.github.com/mschoch/5cc5c9cf4669a5fe8512cb7770d3c1a2 the core of the changes are: 1. recognize that collector/searcher need only a fixed number of DocumentMatch instances, and this number can be determined from the structure of the query, not the size of the data 2. knowing this, instances can be allocated in bulk, up front and they can be reused without locking (since all search operations take place in a single goroutine 3. combined with previous commits which enabled reuse of the IndexInternalID []byte, this allows for no allocation/copy of these bytes as well (by using DocumentMatch Reset() method when returning entries to the pool 2016-08-09 04:21:47 +02:00			`ctx := &search.SearchContext{`
switch sort impl to use interface this improves perf in the case where we're not doing any sorting as we avoid allocating memory and converting scores into numeric terms 2016-08-25 01:02:22 +02:00			`DocumentMatchPool: search.NewDocumentMatchPool(1, 0),`
refactor search package to reuse DocumentMatch and ID []byte's the motivation for this commit is long and detailed and has been documented externally here: https://gist.github.com/mschoch/5cc5c9cf4669a5fe8512cb7770d3c1a2 the core of the changes are: 1. recognize that collector/searcher need only a fixed number of DocumentMatch instances, and this number can be determined from the structure of the query, not the size of the data 2. knowing this, instances can be allocated in bulk, up front and they can be reused without locking (since all search operations take place in a single goroutine 3. combined with previous commits which enabled reuse of the IndexInternalID []byte, this allows for no allocation/copy of these bytes as well (by using DocumentMatch Reset() method when returning entries to the pool 2016-08-09 04:21:47 +02:00			`}`
			`actual := scorer.Score(ctx, test.termMatch.ID)`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00
			`if !reflect.DeepEqual(actual, test.result) {`
			`t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)`
			`}`
			`}`

			`}`

			`func TestConstantScorerWithQueryNorm(t *testing.T) {`

API change: optional SearchRequest.IncludeLocations flag This is a change in search result behavior in that location information is no longer provided by default with search results. Although this looks like a wide-ranging change, it's mostly a mechanical replacement of the explain bool flag with a new search.SearcherOptions struct, which holds both the Explain bool flag and the IncludeTermVectors bool flag. 2017-01-06 02:49:45 +01:00			`scorer := NewConstantScorer(1, 1, search.SearcherOptions{Explain: true})`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`scorer.SetQueryNorm(2.0)`

			`tests := []struct {`
			`termMatch *index.TermFieldDoc`
switch back to single DocumentMatch struct instead of separate DocumentMatch/DocumentMatchInternal rules are simple, everything operates on the IndexInternalID field until the results are returned, then ID is set correctly the IndexInternalID field is not exported to JSON 2016-08-01 20:58:02 +02:00			`result *search.DocumentMatch`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`}{`
			`{`
			`termMatch: &index.TermFieldDoc{`
changed approach IndexInternalID is now []byte this is still opaque, and should still work for any future index implementations as it is a least common denominator choice, all implementations must internally represent the id as []byte at some point for storage to disk 2016-08-01 20:26:50 +02:00			`ID: index.IndexInternalID("one"),`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Freq: 1,`
			`Norm: 1.0,`
			`},`
switch back to single DocumentMatch struct instead of separate DocumentMatch/DocumentMatchInternal rules are simple, everything operates on the IndexInternalID field until the results are returned, then ID is set correctly the IndexInternalID field is not exported to JSON 2016-08-01 20:58:02 +02:00			`result: &search.DocumentMatch{`
			`IndexInternalID: index.IndexInternalID("one"),`
			`Score: 2.0,`
improved implementation to address perf regressions primary change is going back to sort values be []string and not []interface{}, this avoid allocatiosn converting into the interface{} that sounds obvious, so why didn't we just do that first? because a common (default) sort is score, which is naturally a number, not a string (like terms). converting into the number was also expensive, and the common case. so, this solution also makes the change to NOT put the score into the sort value list. instead you see the dummy value "_score". this is just a placeholder, the actual sort impl knows that field of the sort is the score, and will sort using the actual score. also, several other aspets of the benchmark were cleaned up so that unnecessary allocations do not pollute the cpu profiles Here are the updated benchmarks: $ go test -run=xxx -bench=. -benchmem -cpuprofile=cpu.out BenchmarkTop10of100000Scores-4 3000 465809 ns/op 2548 B/op 33 allocs/op BenchmarkTop100of100000Scores-4 2000 626488 ns/op 21484 B/op 213 allocs/op BenchmarkTop10of1000000Scores-4 300 5107658 ns/op 2560 B/op 33 allocs/op BenchmarkTop100of1000000Scores-4 300 5275403 ns/op 21624 B/op 213 allocs/op PASS ok github.com/blevesearch/bleve/search/collectors 7.188s Prior to this PR, master reported: $ go test -run=xxx -bench=. -benchmem BenchmarkTop10of100000Scores-4 3000 453269 ns/op 360161 B/op 42 allocs/op BenchmarkTop100of100000Scores-4 2000 519131 ns/op 388275 B/op 219 allocs/op BenchmarkTop10of1000000Scores-4 200 7459004 ns/op 4628236 B/op 52 allocs/op BenchmarkTop100of1000000Scores-4 200 8064864 ns/op 4656596 B/op 232 allocs/op PASS ok github.com/blevesearch/bleve/search/collectors 7.385s So, we're pretty close on the smaller datasets, and we scale better on the larger datasets. We also show fewer allocations and bytes in all cases (some of this is artificial due to test cleanup). 2016-08-25 21:47:07 +02:00			`Sort: []string{},`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Expl: &search.Explanation{`
			`Value: 2.0,`
			`Message: "weight(^1.000000), product of:",`
			`Children: []*search.Explanation{`
gofmt simplifications 2016-04-03 03:54:33 +02:00			`{`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Value: 2.0,`
			`Message: "ConstantScore()^1.000000, product of:",`
			`Children: []*search.Explanation{`
gofmt simplifications 2016-04-03 03:54:33 +02:00			`{`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Value: 1,`
			`Message: "boost",`
			`},`
gofmt simplifications 2016-04-03 03:54:33 +02:00			`{`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Value: 2,`
			`Message: "queryNorm",`
			`},`
			`},`
			`},`
gofmt simplifications 2016-04-03 03:54:33 +02:00			`{`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00			`Value: 1.0,`
			`Message: "ConstantScore()",`
			`},`
			`},`
			`},`
			`},`
			`},`
			`}`

			`for _, test := range tests {`
refactor search package to reuse DocumentMatch and ID []byte's the motivation for this commit is long and detailed and has been documented externally here: https://gist.github.com/mschoch/5cc5c9cf4669a5fe8512cb7770d3c1a2 the core of the changes are: 1. recognize that collector/searcher need only a fixed number of DocumentMatch instances, and this number can be determined from the structure of the query, not the size of the data 2. knowing this, instances can be allocated in bulk, up front and they can be reused without locking (since all search operations take place in a single goroutine 3. combined with previous commits which enabled reuse of the IndexInternalID []byte, this allows for no allocation/copy of these bytes as well (by using DocumentMatch Reset() method when returning entries to the pool 2016-08-09 04:21:47 +02:00			`ctx := &search.SearchContext{`
switch sort impl to use interface this improves perf in the case where we're not doing any sorting as we avoid allocating memory and converting scores into numeric terms 2016-08-25 01:02:22 +02:00			`DocumentMatchPool: search.NewDocumentMatchPool(1, 0),`
refactor search package to reuse DocumentMatch and ID []byte's the motivation for this commit is long and detailed and has been documented externally here: https://gist.github.com/mschoch/5cc5c9cf4669a5fe8512cb7770d3c1a2 the core of the changes are: 1. recognize that collector/searcher need only a fixed number of DocumentMatch instances, and this number can be determined from the structure of the query, not the size of the data 2. knowing this, instances can be allocated in bulk, up front and they can be reused without locking (since all search operations take place in a single goroutine 3. combined with previous commits which enabled reuse of the IndexInternalID []byte, this allows for no allocation/copy of these bytes as well (by using DocumentMatch Reset() method when returning entries to the pool 2016-08-09 04:21:47 +02:00			`}`
			`actual := scorer.Score(ctx, test.termMatch.ID)`
added tests for facet builds and constant scorer 2014-11-27 03:09:00 +01:00
			`if !reflect.DeepEqual(actual, test.result) {`
			`t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)`
			`}`
			`}`

			`}`