0
0
Fork 0

adjust new sort functionality to also work with MultiSearch

This commit is contained in:
Marty Schoch 2016-08-24 14:07:10 -04:00
parent 1ae938b781
commit 0322ecd441
7 changed files with 234 additions and 60 deletions

View File

@ -474,6 +474,7 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
Fields: req.Fields,
Facets: req.Facets,
Explain: req.Explain,
Sort: req.Sort,
}
return &rv
}
@ -568,8 +569,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
}
}
// first sort it by score
sort.Sort(sr.Hits)
// sort all hits with the requested order
if len(req.Sort) > 0 {
sorter := &multiSearchHitSorter{
hits: sr.Hits,
sort: req.Sort,
}
sort.Sort(sorter)
}
// now skip over the correct From
if req.From > 0 && len(sr.Hits) > req.From {
@ -645,3 +652,15 @@ func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
return f.fieldDict.Close()
}
type multiSearchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
}
func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *multiSearchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.hits[i], m.hits[j])
return c < 0
}

View File

@ -11,6 +11,7 @@ import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/search"
)
@ -451,6 +452,8 @@ func TestIndexAliasEmpty(t *testing.T) {
}
func TestIndexAliasMulti(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
ei1Count := uint64(7)
ei1 := &stubIndex{
err: nil,
@ -466,6 +469,7 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -485,6 +489,7 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -572,10 +577,12 @@ func TestIndexAliasMulti(t *testing.T) {
{
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
{
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
@ -601,6 +608,8 @@ func TestIndexAliasMulti(t *testing.T) {
// TestMultiSearchNoError
func TestMultiSearchNoError(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
ei1 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
@ -613,6 +622,7 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -629,6 +639,7 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -648,11 +659,13 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
@ -784,6 +797,8 @@ func TestMultiSearchSecondPage(t *testing.T) {
// 2. no searchers finish before the timeout
// 3. no searches finish before cancellation
func TestMultiSearchTimeout(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
ei1 := &stubIndex{
name: "ei1",
checkRequest: func(req *SearchRequest) error {
@ -803,6 +818,7 @@ func TestMultiSearchTimeout(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -826,6 +842,7 @@ func TestMultiSearchTimeout(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -909,6 +926,9 @@ func TestMultiSearchTimeout(t *testing.T) {
// TestMultiSearchTimeoutPartial tests the case where some indexes exceed
// the timeout, while others complete successfully
func TestMultiSearchTimeoutPartial(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
score3, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(3.0), 0)
ei1 := &stubIndex{
name: "ei1",
err: nil,
@ -924,6 +944,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -943,6 +964,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -967,6 +989,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "3",
ID: "c",
Score: 3.0,
Sort: []string{string(score3)},
},
},
MaxScore: 3.0,
@ -993,11 +1016,13 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
@ -1014,6 +1039,10 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
}
func TestIndexAliasMultipleLayer(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
score3, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(3.0), 0)
score4, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(4.0), 0)
ei1 := &stubIndex{
name: "ei1",
err: nil,
@ -1029,6 +1058,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
@ -1052,6 +1082,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
@ -1076,6 +1107,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "3",
ID: "c",
Score: 3.0,
Sort: []string{string(score3)},
},
},
MaxScore: 3.0,
@ -1096,6 +1128,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "4",
ID: "d",
Score: 4.0,
Sort: []string{string(score4)},
},
},
MaxScore: 4.0,
@ -1129,11 +1162,13 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "4",
ID: "d",
Score: 4.0,
Sort: []string{string(score4)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 4.0,
@ -1149,6 +1184,105 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
}
}
// TestMultiSearchNoError
func TestMultiSearchCustomSort(t *testing.T) {
ei1 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []string{"crown"},
},
},
MaxScore: 2.0,
}}
ei2 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "2",
ID: "c",
Score: 2.5,
Sort: []string{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []string{"zombie"},
},
},
MaxScore: 3.0,
}}
sr := NewSearchRequest(NewTermQuery("test"))
sr.SortBy([]string{"name"})
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Request: sr,
Total: 4,
Hits: search.DocumentMatchCollection{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []string{"crown"},
},
{
Index: "2",
ID: "c",
Score: 2.5,
Sort: []string{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []string{"zombie"},
},
},
MaxScore: 3.0,
}
results, err := MultiSearch(context.Background(), sr, ei1, ei2)
if err != nil {
t.Error(err)
}
// cheat and ensure that Took field matches since it invovles time
expected.Took = results.Took
if !reflect.DeepEqual(results, expected) {
t.Errorf("expected %v, got %v", expected, results)
}
}
// stubIndex is an Index impl for which all operations
// return the configured error value, unless the
// corresponding operation result value has been

View File

@ -262,7 +262,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Size = *temp.Size
}
if temp.Sort == nil {
r.Sort = search.SortOrder{&search.SortScore{Descending: true}}
r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
} else {
r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
if err != nil {
@ -307,7 +307,7 @@ func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchReque
Size: size,
From: from,
Explain: explain,
Sort: search.SortOrder{&search.SortScore{Descending: true}},
Sort: search.SortOrder{&search.SortScore{Desc: true}},
}
}

View File

@ -128,6 +128,9 @@ func (hc *HeapCollector) collectSingle(ctx *search.SearchContext, reader index.I
d.CachedFieldTerms.Merge(fieldTerms)
}
// compute this hits sort value
d.Sort = hc.sort.Value(d)
// optimization, we track lowest sorting hit already removed from heap
// with this one comparision, we can avoid all heap operations if
// this hit would have been added and then immediately removed

View File

@ -84,7 +84,7 @@ func TestTop10Scores(t *testing.T) {
},
}
collector := NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Descending: true}})
collector := NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
@ -192,7 +192,7 @@ func TestTop10ScoresSkip10(t *testing.T) {
},
}
collector := NewHeapCollector(10, 10, search.SortOrder{&search.SortScore{Descending: true}})
collector := NewHeapCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
@ -289,7 +289,7 @@ func TestPaginationSameScores(t *testing.T) {
}
// first get first 5 hits
collector := NewHeapCollector(5, 0, search.SortOrder{&search.SortScore{Descending: true}})
collector := NewHeapCollector(5, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
@ -375,7 +375,7 @@ func TestPaginationSameScores(t *testing.T) {
}
// now get next 5 hits
collector = NewHeapCollector(5, 5, search.SortOrder{&search.SortScore{Descending: true}})
collector = NewHeapCollector(5, 5, search.SortOrder{&search.SortScore{Desc: true}})
err = collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
@ -401,17 +401,17 @@ func TestPaginationSameScores(t *testing.T) {
}
func BenchmarkTop10of100000Scores(b *testing.B) {
benchHelper(10000, NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Descending: true}}), b)
benchHelper(10000, NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}}), b)
}
func BenchmarkTop100of100000Scores(b *testing.B) {
benchHelper(10000, NewHeapCollector(100, 0, search.SortOrder{&search.SortScore{Descending: true}}), b)
benchHelper(10000, NewHeapCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}}), b)
}
func BenchmarkTop10of1000000Scores(b *testing.B) {
benchHelper(100000, NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Descending: true}}), b)
benchHelper(100000, NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}}), b)
}
func BenchmarkTop100of1000000Scores(b *testing.B) {
benchHelper(100000, NewHeapCollector(100, 0, search.SortOrder{&search.SortScore{Descending: true}}), b)
benchHelper(100000, NewHeapCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}}), b)
}

View File

@ -65,6 +65,7 @@ type DocumentMatch struct {
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []string `json:"sort,omitempty"`
// Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric

View File

@ -22,7 +22,8 @@ var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
var LowTerm = string([]byte{0x00})
type SearchSort interface {
Compare(a, b *DocumentMatch) int
Value(a *DocumentMatch) string
Descending() bool
RequiresDocID() bool
RequiresScoring() bool
@ -38,11 +39,11 @@ func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
switch by {
case "id":
return &SortDocID{
Descending: descending,
Desc: descending,
}, nil
case "score":
return &SortScore{
Descending: descending,
Desc: descending,
}, nil
case "field":
field, ok := input["field"].(string)
@ -50,8 +51,8 @@ func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
return nil, fmt.Errorf("search sort mode field must specify field")
}
rv := &SortField{
Field: field,
Descending: descending,
Field: field,
Desc: descending,
}
typ, ok := input["type"].(string)
if ok {
@ -108,16 +109,16 @@ func ParseSearchSortString(input string) SearchSort {
}
if input == "_id" {
return &SortDocID{
Descending: descending,
Desc: descending,
}
} else if input == "_score" {
return &SortScore{
Descending: descending,
Desc: descending,
}
}
return &SortField{
Field: input,
Descending: descending,
Field: input,
Desc: descending,
}
}
@ -159,13 +160,27 @@ func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
type SortOrder []SearchSort
func (so SortOrder) Value(doc *DocumentMatch) []string {
rv := make([]string, len(so))
for i, soi := range so {
rv[i] = soi.Value(doc)
}
return rv
}
func (so SortOrder) Compare(i, j *DocumentMatch) int {
// compare the documents on all search sorts until a differences is found
for _, soi := range so {
c := soi.Compare(i, j)
for x, soi := range so {
iVal := i.Sort[x]
jVal := j.Sort[x]
c := strings.Compare(iVal, jVal)
if c == 0 {
continue
}
if soi.Descending() {
c = -c
}
//c := soi.Compare(i, j)
return c
}
// if they are the same at this point, impose order based on index natural sort order
@ -250,26 +265,24 @@ const (
// Mode controls behavior for multi-values fields (default first)
// Missing controls behavior of missing values (default last)
type SortField struct {
Field string
Descending bool
Type SortFieldType
Mode SortFieldMode
Missing SortFieldMissing
Field string
Desc bool
Type SortFieldType
Mode SortFieldMode
Missing SortFieldMissing
}
// Compare orders DocumentMatch instances by stored field values
func (s *SortField) Compare(i, j *DocumentMatch) int {
// Value returns the sort value of the DocumentMatch
func (s *SortField) Value(i *DocumentMatch) string {
iTerms := i.CachedFieldTerms[s.Field]
iTerms = s.filterTermsByType(iTerms)
iTerm := s.filterTermsByMode(iTerms)
jTerms := j.CachedFieldTerms[s.Field]
jTerms = s.filterTermsByType(jTerms)
jTerm := s.filterTermsByMode(jTerms)
rv := strings.Compare(iTerm, jTerm)
if s.Descending {
rv = -rv
}
return rv
return iTerm
}
// Descending determines the order of the sort
func (s *SortField) Descending() bool {
return s.Desc
}
func (s *SortField) filterTermsByMode(terms []string) string {
@ -288,12 +301,12 @@ func (s *SortField) filterTermsByMode(terms []string) string {
// handle missing terms
if s.Missing == SortFieldMissingLast {
if s.Descending {
if s.Desc {
return LowTerm
}
return HighTerm
}
if s.Descending {
if s.Desc {
return HighTerm
}
return LowTerm
@ -347,7 +360,7 @@ func (s *SortField) MarshalJSON() ([]byte, error) {
if s.Missing == SortFieldMissingLast &&
s.Mode == SortFieldDefault &&
s.Type == SortFieldAuto {
if s.Descending {
if s.Desc {
return json.Marshal("-" + s.Field)
}
return json.Marshal(s.Field)
@ -356,7 +369,7 @@ func (s *SortField) MarshalJSON() ([]byte, error) {
"by": "field",
"field": s.Field,
}
if s.Descending {
if s.Desc {
sfm["desc"] = true
}
if s.Missing > SortFieldMissingLast {
@ -389,15 +402,17 @@ func (s *SortField) MarshalJSON() ([]byte, error) {
// SortDocID will sort results by the document identifier
type SortDocID struct {
Descending bool
Desc bool
}
// Compare orders DocumentMatch instances by document identifiers
func (s *SortDocID) Compare(i, j *DocumentMatch) int {
if s.Descending {
return strings.Compare(j.ID, i.ID)
}
return strings.Compare(i.ID, j.ID)
// Value returns the sort value of the DocumentMatch
func (s *SortDocID) Value(i *DocumentMatch) string {
return i.ID
}
// Descending determines the order of the sort
func (s *SortDocID) Descending() bool {
return s.Desc
}
// RequiresDocID says this SearchSort does require the DocID be loaded
@ -410,7 +425,7 @@ func (s *SortDocID) RequiresScoring() bool { return false }
func (s *SortDocID) RequiresFields() []string { return nil }
func (s *SortDocID) MarshalJSON() ([]byte, error) {
if s.Descending {
if s.Desc {
return json.Marshal("-_id")
}
return json.Marshal("_id")
@ -418,17 +433,19 @@ func (s *SortDocID) MarshalJSON() ([]byte, error) {
// SortScore will sort results by the document match score
type SortScore struct {
Descending bool
Desc bool
}
// Compare orders DocumentMatch instances by computed scores
func (s *SortScore) Compare(i, j *DocumentMatch) int {
if i.Score == j.Score {
return 0
} else if (i.Score < j.Score && !s.Descending) || (j.Score < i.Score && s.Descending) {
return -1
}
return 1
// Value returns the sort value of the DocumentMatch
func (s *SortScore) Value(i *DocumentMatch) string {
scoreInt := numeric_util.Float64ToInt64(i.Score)
prefixCodedScore, _ := numeric_util.NewPrefixCodedInt64(scoreInt, 0)
return string(prefixCodedScore)
}
// Descending determines the order of the sort
func (s *SortScore) Descending() bool {
return s.Desc
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
@ -441,7 +458,7 @@ func (s *SortScore) RequiresScoring() bool { return true }
func (s *SortScore) RequiresFields() []string { return nil }
func (s *SortScore) MarshalJSON() ([]byte, error) {
if s.Descending {
if s.Desc {
return json.Marshal("-_score")
}
return json.Marshal("_score")