0
0
Fork 0

switch sort impl to use interface

this improves perf in the case where we're not doing any sorting
as we avoid allocating memory and converting scores into
numeric terms
This commit is contained in:
Marty Schoch 2016-08-24 19:02:22 -04:00
parent 5e94145cf4
commit ce0b299d6f
19 changed files with 134 additions and 72 deletions

View File

@ -469,7 +469,7 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 1.0,
@ -489,7 +489,7 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
Sort: []interface{}{string(score2)},
},
},
MaxScore: 2.0,
@ -577,12 +577,12 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
Sort: []interface{}{string(score2)},
},
{
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 2.0,
@ -622,7 +622,7 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 1.0,
@ -639,7 +639,7 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
Sort: []interface{}{string(score2)},
},
},
MaxScore: 2.0,
@ -659,13 +659,13 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
Sort: []interface{}{string(score2)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 2.0,
@ -818,7 +818,7 @@ func TestMultiSearchTimeout(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 1.0,
@ -842,7 +842,7 @@ func TestMultiSearchTimeout(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
Sort: []interface{}{string(score2)},
},
},
MaxScore: 2.0,
@ -944,7 +944,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 1.0,
@ -964,7 +964,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
Sort: []interface{}{string(score2)},
},
},
MaxScore: 2.0,
@ -989,7 +989,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "3",
ID: "c",
Score: 3.0,
Sort: []string{string(score3)},
Sort: []interface{}{string(score3)},
},
},
MaxScore: 3.0,
@ -1016,13 +1016,13 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
Sort: []interface{}{string(score2)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 2.0,
@ -1058,7 +1058,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 1.0,
@ -1082,7 +1082,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
Sort: []interface{}{string(score2)},
},
},
MaxScore: 2.0,
@ -1107,7 +1107,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "3",
ID: "c",
Score: 3.0,
Sort: []string{string(score3)},
Sort: []interface{}{string(score3)},
},
},
MaxScore: 3.0,
@ -1128,7 +1128,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "4",
ID: "d",
Score: 4.0,
Sort: []string{string(score4)},
Sort: []interface{}{string(score4)},
},
},
MaxScore: 4.0,
@ -1162,13 +1162,13 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "4",
ID: "d",
Score: 4.0,
Sort: []string{string(score4)},
Sort: []interface{}{string(score4)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
Sort: []interface{}{string(score1)},
},
},
MaxScore: 4.0,
@ -1198,13 +1198,13 @@ func TestMultiSearchCustomSort(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{"albert"},
Sort: []interface{}{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []string{"crown"},
Sort: []interface{}{"crown"},
},
},
MaxScore: 2.0,
@ -1221,13 +1221,13 @@ func TestMultiSearchCustomSort(t *testing.T) {
Index: "2",
ID: "c",
Score: 2.5,
Sort: []string{"frank"},
Sort: []interface{}{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []string{"zombie"},
Sort: []interface{}{"zombie"},
},
},
MaxScore: 3.0,
@ -1248,25 +1248,25 @@ func TestMultiSearchCustomSort(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{"albert"},
Sort: []interface{}{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []string{"crown"},
Sort: []interface{}{"crown"},
},
{
Index: "2",
ID: "c",
Score: 2.5,
Sort: []string{"frank"},
Sort: []interface{}{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []string{"zombie"},
Sort: []interface{}{"zombie"},
},
},
MaxScore: 3.0,

View File

@ -13,12 +13,13 @@ import (
type createCollector func() search.Collector
func benchHelper(numOfMatches int, cc createCollector, b *testing.B) {
dp := search.NewDocumentMatchPool(numOfMatches, 1)
matches := make([]*search.DocumentMatch, 0, numOfMatches)
for i := 0; i < numOfMatches; i++ {
matches = append(matches, &search.DocumentMatch{
IndexInternalID: index.IndexInternalID(strconv.Itoa(i)),
Score: rand.Float64(),
})
match := dp.Get()
match.IndexInternalID = index.IndexInternalID(strconv.Itoa(i))
match.Score = rand.Float64()
matches = append(matches, match)
}
b.ResetTimer()

View File

@ -28,6 +28,9 @@ type HeapCollector struct {
results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder
needDocIds bool
neededFields []string
lowestMatchOutsideResults *search.DocumentMatch
}
@ -35,7 +38,15 @@ var COLLECT_CHECK_DONE_EVERY = uint64(1024)
func NewHeapCollector(size int, skip int, sort search.SortOrder) *HeapCollector {
hc := &HeapCollector{size: size, skip: skip, sort: sort}
hc.results = make(search.DocumentMatchCollection, 0, size+skip)
heap.Init(hc)
// these lookups traverse an interface, so do once up-front
if sort.RequiresDocID() {
hc.needDocIds = true
}
hc.neededFields = sort.RequiredFields()
return hc
}
@ -49,7 +60,7 @@ func (hc *HeapCollector) Collect(ctx context.Context, searcher search.Searcher,
// plus possibly one extra for the highestMatchOutsideResults
// plus the amount required by the searcher tree
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(hc.size + hc.skip + 1 + searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(hc.size+hc.skip+1+searcher.DocumentMatchPoolSize(), len(hc.sort)),
}
select {
@ -105,7 +116,7 @@ func (hc *HeapCollector) collectSingle(ctx *search.SearchContext, reader index.I
var err error
// see if we need to load ID (at this early stage, for example to sort on it)
if hc.sort.RequiresDocID() {
if hc.needDocIds {
d.ID, err = reader.FinalizeDocID(d.IndexInternalID)
if err != nil {
return err
@ -113,9 +124,9 @@ func (hc *HeapCollector) collectSingle(ctx *search.SearchContext, reader index.I
}
// see if we need to load the stored fields
if len(hc.sort.RequiredFields()) > 0 {
if len(hc.neededFields) > 0 {
// find out which fields haven't been loaded yet
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.sort.RequiredFields())
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
// look them up
fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
if err != nil {
@ -129,7 +140,7 @@ func (hc *HeapCollector) collectSingle(ctx *search.SearchContext, reader index.I
}
// compute this hits sort value
d.Sort = hc.sort.Value(d)
hc.sort.Value(d)
// optimization, we track lowest sorting hit already removed from heap
// with this one comparision, we can avoid all heap operations if

View File

@ -31,12 +31,13 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
// NewDocumentMatchPool will build a DocumentMatchPool with memory
// pre-allocated to accomodate the requested number of DocumentMatch
// instances
func NewDocumentMatchPool(size int) *DocumentMatchPool {
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, 0, size)
// pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size)
// make these initial instances available
for i := range startBlock {
startBlock[i].Sort = make([]interface{}, 0, sortsize)
avail = append(avail, &startBlock[i])
}
return &DocumentMatchPool{

View File

@ -16,7 +16,7 @@ func TestDocumentMatchPool(t *testing.T) {
tooManyCalled := false
// create a pool
dmp := NewDocumentMatchPool(10)
dmp := NewDocumentMatchPool(10, 0)
dmp.TooSmall = func(inner *DocumentMatchPool) *DocumentMatch {
tooManyCalled = true
return &DocumentMatch{}

View File

@ -47,13 +47,14 @@ func TestConstantScorer(t *testing.T) {
Value: 1.0,
Message: "ConstantScore()",
},
Sort: []interface{}{},
},
},
}
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1),
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
actual := scorer.Score(ctx, test.termMatch.ID)
@ -82,6 +83,7 @@ func TestConstantScorerWithQueryNorm(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: 2.0,
Sort: []interface{}{},
Expl: &search.Explanation{
Value: 2.0,
Message: "weight(^1.000000), product of:",
@ -112,7 +114,7 @@ func TestConstantScorerWithQueryNorm(t *testing.T) {
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1),
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
actual := scorer.Score(ctx, test.termMatch.ID)

View File

@ -50,6 +50,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf,
Sort: []interface{}{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
@ -91,6 +92,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf,
Sort: []interface{}{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
@ -121,6 +123,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(65) * idf,
Sort: []interface{}{},
Expl: &search.Explanation{
Value: math.Sqrt(65) * idf,
Message: "fieldWeight(desc:beer in one), product of:",
@ -145,7 +148,7 @@ func TestTermScorer(t *testing.T) {
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1),
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
actual := scorer.Score(ctx, test.termMatch)
@ -187,6 +190,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Sort: []interface{}{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Message: "weight(desc:beer^3.000000 in one), product of:",
@ -235,7 +239,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1),
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
actual := scorer.Score(ctx, test.termMatch)

View File

@ -65,7 +65,7 @@ type DocumentMatch struct {
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []string `json:"sort,omitempty"`
Sort []interface{} `json:"sort,omitempty"`
// Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric
@ -107,11 +107,15 @@ func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
// Reset allows an already allocated DocumentMatch to be reused
func (dm *DocumentMatch) Reset() *DocumentMatch {
// remember the []byte used for the IndexInternalID
indexInternalId := dm.IndexInternalID
indexInternalID := dm.IndexInternalID
// remember the []interface{} used for sort
sort := dm.Sort
// idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalId[:0]
dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
return dm
}

View File

@ -344,7 +344,7 @@ func TestBooleanSearch(t *testing.T) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0

View File

@ -189,7 +189,7 @@ func TestConjunctionSearch(t *testing.T) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(10),
DocumentMatchPool: search.NewDocumentMatchPool(10, 0),
}
next, err := test.searcher.Next(ctx)
i := 0

View File

@ -110,7 +110,7 @@ func TestDisjunctionSearch(t *testing.T) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0
@ -164,7 +164,7 @@ func TestDisjunctionAdvance(t *testing.T) {
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinSearcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinSearcher.DocumentMatchPoolSize(), 0),
}
match, err := martyOrDustinSearcher.Advance(ctx, index.IndexInternalID("3"))
if err != nil {

View File

@ -64,7 +64,7 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
}
// Check the sequence

View File

@ -107,7 +107,7 @@ func TestFuzzySearch(t *testing.T) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0

View File

@ -111,7 +111,7 @@ func TestMatchAllSearch(t *testing.T) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0

View File

@ -52,7 +52,7 @@ func TestMatchNoneSearch(t *testing.T) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0

View File

@ -70,7 +70,7 @@ func TestPhraseSearch(t *testing.T) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0

View File

@ -87,7 +87,7 @@ func TestRegexpSearch(t *testing.T) {
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()),
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0

View File

@ -165,7 +165,7 @@ func TestTermSearcher(t *testing.T) {
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1),
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
docMatch, err := searcher.Next(ctx)
if err != nil {

View File

@ -22,7 +22,7 @@ var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
var LowTerm = string([]byte{0x00})
type SearchSort interface {
Value(a *DocumentMatch) string
Value(a *DocumentMatch) interface{}
Descending() bool
RequiresDocID() bool
@ -160,27 +160,55 @@ func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
type SortOrder []SearchSort
func (so SortOrder) Value(doc *DocumentMatch) []string {
rv := make([]string, len(so))
for i, soi := range so {
rv[i] = soi.Value(doc)
func (so SortOrder) Value(doc *DocumentMatch) {
for _, soi := range so {
doc.Sort = append(doc.Sort, soi.Value(doc))
}
return rv
}
// Compare will compare two document matches using the specified sort order
// if both are numbers, we avoid converting back to term
func (so SortOrder) Compare(i, j *DocumentMatch) int {
// compare the documents on all search sorts until a differences is found
for x, soi := range so {
iVal := i.Sort[x]
jVal := j.Sort[x]
c := strings.Compare(iVal, jVal)
c := 0
switch iVal := iVal.(type) {
case string:
switch jVal := jVal.(type) {
case string:
// both string
c = strings.Compare(iVal, jVal)
case float64:
// i is string, j is number, i sorts higher
ji := numeric_util.Float64ToInt64(jVal)
jt, _ := numeric_util.NewPrefixCodedInt64(ji, 0)
c = strings.Compare(iVal, string(jt))
}
case float64:
switch jVal := jVal.(type) {
case string:
// i is number, j is string
ii := numeric_util.Float64ToInt64(iVal)
it, _ := numeric_util.NewPrefixCodedInt64(ii, 0)
c = strings.Compare(string(it), jVal)
case float64:
// numeric comparison
if iVal < jVal {
c = -1
} else if iVal > jVal {
c = 1
}
}
}
if c == 0 {
continue
}
if soi.Descending() {
c = -c
}
//c := soi.Compare(i, j)
return c
}
// if they are the same at this point, impose order based on index natural sort order
@ -273,10 +301,23 @@ type SortField struct {
}
// Value returns the sort value of the DocumentMatch
func (s *SortField) Value(i *DocumentMatch) string {
func (s *SortField) Value(i *DocumentMatch) interface{} {
iTerms := i.CachedFieldTerms[s.Field]
iTerms = s.filterTermsByType(iTerms)
iTerm := s.filterTermsByMode(iTerms)
if s.Type == SortFieldAsNumber || s.Type == SortFieldAsDate {
// explicitly asked for numeric sort
rv, _ := numeric_util.PrefixCoded(iTerm).Int64()
return rv
} else if s.Type == SortFieldAuto {
// asked for auto, looks like a number
valid, shift := numeric_util.ValidPrefixCodedTerm(iTerm)
if valid && shift == 0 {
ri, _ := numeric_util.PrefixCoded(iTerm).Int64()
rv := numeric_util.Int64ToFloat64(ri)
return rv
}
}
return iTerm
}
@ -406,7 +447,7 @@ type SortDocID struct {
}
// Value returns the sort value of the DocumentMatch
func (s *SortDocID) Value(i *DocumentMatch) string {
func (s *SortDocID) Value(i *DocumentMatch) interface{} {
return i.ID
}
@ -437,10 +478,8 @@ type SortScore struct {
}
// Value returns the sort value of the DocumentMatch
func (s *SortScore) Value(i *DocumentMatch) string {
scoreInt := numeric_util.Float64ToInt64(i.Score)
prefixCodedScore, _ := numeric_util.NewPrefixCodedInt64(scoreInt, 0)
return string(prefixCodedScore)
func (s *SortScore) Value(i *DocumentMatch) interface{} {
return i.Score
}
// Descending determines the order of the sort