diff --git a/document/document.go b/document/document.go index a5965135..db6a1e0c 100644 --- a/document/document.go +++ b/document/document.go @@ -9,9 +9,7 @@ package document -import ( - "fmt" -) +import "fmt" type Document struct { ID string `json:"id"` diff --git a/examples_test.go b/examples_test.go index f33968cf..a0e8e097 100644 --- a/examples_test.go +++ b/examples_test.go @@ -15,6 +15,7 @@ import ( "testing" "time" + "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/highlight/highlighters/ansi" ) @@ -61,11 +62,13 @@ func ExampleIndex_indexing() { data := struct { Name string Created time.Time - }{Name: "named one", Created: time.Now()} + Age int + }{Name: "named one", Created: time.Now(), Age: 50} data2 := struct { Name string Created time.Time - }{Name: "great nameless one", Created: time.Now()} + Age int + }{Name: "great nameless one", Created: time.Now(), Age: 25} // index some data err = example_index.Index("document id 1", data) @@ -504,3 +507,46 @@ func ExampleDocumentMapping_AddFieldMappingsAt() { // Output: // 1 } + +func ExampleSearchRequest_SortBy() { + // find docs containing "one", order by Age instead of score + query := NewMatchQuery("one") + searchRequest := NewSearchRequest(query) + searchRequest.SortBy([]string{"Age"}) + searchResults, err := example_index.Search(searchRequest) + if err != nil { + panic(err) + } + + fmt.Println(searchResults.Hits[0].ID) + fmt.Println(searchResults.Hits[1].ID) + // Output: + // document id 2 + // document id 1 +} + +func ExampleSearchRequest_SortByCustom() { + // find all docs, order by Age, with docs missing Age field first + query := NewMatchAllQuery() + searchRequest := NewSearchRequest(query) + searchRequest.SortByCustom(search.SortOrder{ + &search.SortField{ + Field: "Age", + Missing: search.SortFieldMissingFirst, + }, + }) + searchResults, err := example_index.Search(searchRequest) + if err != nil { + panic(err) + } + + fmt.Println(searchResults.Hits[0].ID) + fmt.Println(searchResults.Hits[1].ID) + fmt.Println(searchResults.Hits[2].ID) + fmt.Println(searchResults.Hits[3].ID) + // Output: + // document id 3 + // document id 4 + // document id 2 + // document id 1 +} diff --git a/index/index.go b/index/index.go index ac86f20a..0d1383a4 100644 --- a/index/index.go +++ b/index/index.go @@ -79,8 +79,7 @@ type IndexReader interface { FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) Document(id string) (*document.Document, error) - DocumentFieldTerms(id IndexInternalID) (FieldTerms, error) - DocumentFieldTermsForFields(id IndexInternalID, fields []string) (FieldTerms, error) + DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error) Fields() ([]string, error) @@ -93,8 +92,29 @@ type IndexReader interface { Close() error } +// FieldTerms contains the terms used by a document, keyed by field type FieldTerms map[string][]string +// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields +func (f FieldTerms) FieldsNotYetCached(fields []string) []string { + var rv []string + for _, field := range fields { + if _, ok := f[field]; !ok { + rv = append(rv, field) + } + } + return rv +} + +// Merge will combine two FieldTerms +// it assumes that the terms lists are complete (thus do not need to be merged) +// field terms from the other list always replace the ones in the receiver +func (f FieldTerms) Merge(other FieldTerms) { + for field, terms := range other { + f[field] = terms + } +} + type TermFieldVector struct { Field string ArrayPositions []uint64 diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index 181b8b14..2349b749 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -10,8 +10,6 @@ package upside_down import ( - "fmt" - "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" @@ -98,25 +96,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) { return } -func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID) (index.FieldTerms, error) { - back, err := i.index.backIndexRowForDoc(i.kvreader, id) - if err != nil { - return nil, err - } - rv := make(index.FieldTerms, len(back.termEntries)) - for _, entry := range back.termEntries { - fieldName := i.index.fieldCache.FieldIndexed(uint16(*entry.Field)) - terms, ok := rv[fieldName] - if !ok { - terms = make([]string, 0) - } - terms = append(terms, *entry.Term) - rv[fieldName] = terms - } - return rv, nil -} - -func (i *IndexReader) DocumentFieldTermsForFields(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { +func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { back, err := i.index.backIndexRowForDoc(i.kvreader, id) if err != nil { return nil, err @@ -125,10 +105,9 @@ func (i *IndexReader) DocumentFieldTermsForFields(id index.IndexInternalID, fiel fieldsMap := make(map[uint16]string, len(fields)) for _, f := range fields { id, ok := i.index.fieldCache.FieldNamed(f, false) - if !ok { - return nil, fmt.Errorf("Field %s was not found in cache", f) + if ok { + fieldsMap[id] = f } - fieldsMap[id] = f } for _, entry := range back.termEntries { if field, ok := fieldsMap[uint16(*entry.Field)]; ok { diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index 099a2b0c..cbfdab54 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -1179,7 +1179,7 @@ func TestIndexDocumentFieldTerms(t *testing.T) { } }() - fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1")) + fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1"), []string{"name", "title"}) if err != nil { t.Error(err) } diff --git a/index_alias_impl.go b/index_alias_impl.go index 03f30f5a..4367f548 100644 --- a/index_alias_impl.go +++ b/index_alias_impl.go @@ -474,6 +474,7 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest { Fields: req.Fields, Facets: req.Facets, Explain: req.Explain, + Sort: req.Sort, } return &rv } @@ -568,8 +569,11 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se } } - // first sort it by score - sort.Sort(sr.Hits) + // sort all hits with the requested order + if len(req.Sort) > 0 { + sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) + sort.Sort(sorter) + } // now skip over the correct From if req.From > 0 && len(sr.Hits) > req.From { @@ -645,3 +649,26 @@ func (f *indexAliasImplFieldDict) Close() error { defer f.index.mutex.RUnlock() return f.fieldDict.Close() } + +type multiSearchHitSorter struct { + hits search.DocumentMatchCollection + sort search.SortOrder + cachedScoring []bool + cachedDesc []bool +} + +func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter { + return &multiSearchHitSorter{ + sort: sort, + hits: hits, + cachedScoring: sort.CacheIsScore(), + cachedDesc: sort.CacheDescending(), + } +} + +func (m *multiSearchHitSorter) Len() int { return len(m.hits) } +func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } +func (m *multiSearchHitSorter) Less(i, j int) bool { + c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) + return c < 0 +} diff --git a/index_alias_impl_test.go b/index_alias_impl_test.go index 1fb67647..7aa2dd46 100644 --- a/index_alias_impl_test.go +++ b/index_alias_impl_test.go @@ -11,6 +11,7 @@ import ( "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/numeric_util" "github.com/blevesearch/bleve/search" ) @@ -451,6 +452,8 @@ func TestIndexAliasEmpty(t *testing.T) { } func TestIndexAliasMulti(t *testing.T) { + score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0) + score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0) ei1Count := uint64(7) ei1 := &stubIndex{ err: nil, @@ -466,6 +469,7 @@ func TestIndexAliasMulti(t *testing.T) { { ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 1.0, @@ -485,6 +489,7 @@ func TestIndexAliasMulti(t *testing.T) { { ID: "b", Score: 2.0, + Sort: []string{string(score2)}, }, }, MaxScore: 2.0, @@ -572,10 +577,12 @@ func TestIndexAliasMulti(t *testing.T) { { ID: "b", Score: 2.0, + Sort: []string{string(score2)}, }, { ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 2.0, @@ -601,6 +608,8 @@ func TestIndexAliasMulti(t *testing.T) { // TestMultiSearchNoError func TestMultiSearchNoError(t *testing.T) { + score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0) + score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0) ei1 := &stubIndex{err: nil, searchResult: &SearchResult{ Status: &SearchStatus{ Total: 1, @@ -613,6 +622,7 @@ func TestMultiSearchNoError(t *testing.T) { Index: "1", ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 1.0, @@ -629,6 +639,7 @@ func TestMultiSearchNoError(t *testing.T) { Index: "2", ID: "b", Score: 2.0, + Sort: []string{string(score2)}, }, }, MaxScore: 2.0, @@ -648,11 +659,13 @@ func TestMultiSearchNoError(t *testing.T) { Index: "2", ID: "b", Score: 2.0, + Sort: []string{string(score2)}, }, { Index: "1", ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 2.0, @@ -784,6 +797,8 @@ func TestMultiSearchSecondPage(t *testing.T) { // 2. no searchers finish before the timeout // 3. no searches finish before cancellation func TestMultiSearchTimeout(t *testing.T) { + score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0) + score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0) ei1 := &stubIndex{ name: "ei1", checkRequest: func(req *SearchRequest) error { @@ -803,6 +818,7 @@ func TestMultiSearchTimeout(t *testing.T) { Index: "1", ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 1.0, @@ -826,6 +842,7 @@ func TestMultiSearchTimeout(t *testing.T) { Index: "2", ID: "b", Score: 2.0, + Sort: []string{string(score2)}, }, }, MaxScore: 2.0, @@ -909,6 +926,9 @@ func TestMultiSearchTimeout(t *testing.T) { // TestMultiSearchTimeoutPartial tests the case where some indexes exceed // the timeout, while others complete successfully func TestMultiSearchTimeoutPartial(t *testing.T) { + score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0) + score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0) + score3, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(3.0), 0) ei1 := &stubIndex{ name: "ei1", err: nil, @@ -924,6 +944,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) { Index: "1", ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 1.0, @@ -943,6 +964,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) { Index: "2", ID: "b", Score: 2.0, + Sort: []string{string(score2)}, }, }, MaxScore: 2.0, @@ -967,6 +989,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) { Index: "3", ID: "c", Score: 3.0, + Sort: []string{string(score3)}, }, }, MaxScore: 3.0, @@ -993,11 +1016,13 @@ func TestMultiSearchTimeoutPartial(t *testing.T) { Index: "2", ID: "b", Score: 2.0, + Sort: []string{string(score2)}, }, { Index: "1", ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 2.0, @@ -1014,6 +1039,10 @@ func TestMultiSearchTimeoutPartial(t *testing.T) { } func TestIndexAliasMultipleLayer(t *testing.T) { + score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0) + score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0) + score3, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(3.0), 0) + score4, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(4.0), 0) ei1 := &stubIndex{ name: "ei1", err: nil, @@ -1029,6 +1058,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) { Index: "1", ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 1.0, @@ -1052,6 +1082,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) { Index: "2", ID: "b", Score: 2.0, + Sort: []string{string(score2)}, }, }, MaxScore: 2.0, @@ -1076,6 +1107,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) { Index: "3", ID: "c", Score: 3.0, + Sort: []string{string(score3)}, }, }, MaxScore: 3.0, @@ -1096,6 +1128,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) { Index: "4", ID: "d", Score: 4.0, + Sort: []string{string(score4)}, }, }, MaxScore: 4.0, @@ -1129,11 +1162,13 @@ func TestIndexAliasMultipleLayer(t *testing.T) { Index: "4", ID: "d", Score: 4.0, + Sort: []string{string(score4)}, }, { Index: "1", ID: "a", Score: 1.0, + Sort: []string{string(score1)}, }, }, MaxScore: 4.0, @@ -1149,6 +1184,105 @@ func TestIndexAliasMultipleLayer(t *testing.T) { } } +// TestMultiSearchNoError +func TestMultiSearchCustomSort(t *testing.T) { + ei1 := &stubIndex{err: nil, searchResult: &SearchResult{ + Status: &SearchStatus{ + Total: 1, + Successful: 1, + Errors: make(map[string]error), + }, + Total: 2, + Hits: search.DocumentMatchCollection{ + { + Index: "1", + ID: "a", + Score: 1.0, + Sort: []string{"albert"}, + }, + { + Index: "1", + ID: "b", + Score: 2.0, + Sort: []string{"crown"}, + }, + }, + MaxScore: 2.0, + }} + ei2 := &stubIndex{err: nil, searchResult: &SearchResult{ + Status: &SearchStatus{ + Total: 1, + Successful: 1, + Errors: make(map[string]error), + }, + Total: 2, + Hits: search.DocumentMatchCollection{ + { + Index: "2", + ID: "c", + Score: 2.5, + Sort: []string{"frank"}, + }, + { + Index: "2", + ID: "d", + Score: 3.0, + Sort: []string{"zombie"}, + }, + }, + MaxScore: 3.0, + }} + + sr := NewSearchRequest(NewTermQuery("test")) + sr.SortBy([]string{"name"}) + expected := &SearchResult{ + Status: &SearchStatus{ + Total: 2, + Successful: 2, + Errors: make(map[string]error), + }, + Request: sr, + Total: 4, + Hits: search.DocumentMatchCollection{ + { + Index: "1", + ID: "a", + Score: 1.0, + Sort: []string{"albert"}, + }, + { + Index: "1", + ID: "b", + Score: 2.0, + Sort: []string{"crown"}, + }, + { + Index: "2", + ID: "c", + Score: 2.5, + Sort: []string{"frank"}, + }, + { + Index: "2", + ID: "d", + Score: 3.0, + Sort: []string{"zombie"}, + }, + }, + MaxScore: 3.0, + } + + results, err := MultiSearch(context.Background(), sr, ei1, ei2) + if err != nil { + t.Error(err) + } + // cheat and ensure that Took field matches since it invovles time + expected.Took = results.Took + if !reflect.DeepEqual(results, expected) { + t.Errorf("expected %v, got %v", expected, results) + } +} + // stubIndex is an Index impl for which all operations // return the configured error value, unless the // corresponding operation result value has been diff --git a/index_impl.go b/index_impl.go index 66c33f8c..6093ec97 100644 --- a/index_impl.go +++ b/index_impl.go @@ -384,7 +384,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr return nil, ErrorIndexClosed } - collector := collectors.NewTopScorerSkipCollector(req.Size, req.From) + collector := collectors.NewTopNCollector(req.Size, req.From, req.Sort) // open a reader for this search indexReader, err := i.i.Reader() diff --git a/index_test.go b/index_test.go index ae0f21ae..5e070366 100644 --- a/index_test.go +++ b/index_test.go @@ -715,6 +715,54 @@ func TestIndexMetadataRaceBug198(t *testing.T) { close(done) } +func TestSortMatchSearch(t *testing.T) { + defer func() { + err := os.RemoveAll("testidx") + if err != nil { + t.Fatal(err) + } + }() + + index, err := New("testidx", NewIndexMapping()) + if err != nil { + t.Fatal(err) + } + + names := []string{"Noam", "Uri", "David", "Yosef", "Eitan", "Itay", "Ariel", "Daniel", "Omer", "Yogev", "Yehonatan", "Moshe", "Mohammed", "Yusuf", "Omar"} + days := []string{"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"} + numbers := []string{"One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven", "Twelve"} + for i := 0; i < 200; i++ { + doc := make(map[string]interface{}) + doc["Name"] = names[i%len(names)] + doc["Day"] = days[i%len(days)] + doc["Number"] = numbers[i%len(numbers)] + err = index.Index(fmt.Sprintf("%d", i), doc) + if err != nil { + t.Fatal(err) + } + } + + req := NewSearchRequest(NewMatchQuery("One")) + req.SortBy([]string{"Day", "Name"}) + req.Fields = []string{"*"} + sr, err := index.Search(req) + if err != nil { + t.Fatal(err) + } + prev := "" + for _, hit := range sr.Hits { + val := hit.Fields["Day"].(string) + if prev > val { + t.Errorf("Hits must be sorted by 'Day'. Found '%s' before '%s'", prev, val) + } + prev = val + } + err = index.Close() + if err != nil { + t.Fatal(err) + } +} + func TestIndexCountMatchSearch(t *testing.T) { defer func() { err := os.RemoveAll("testidx") diff --git a/numeric_util/prefix_coded.go b/numeric_util/prefix_coded.go index 25931151..4cf36dec 100644 --- a/numeric_util/prefix_coded.go +++ b/numeric_util/prefix_coded.go @@ -9,9 +9,7 @@ package numeric_util -import ( - "fmt" -) +import "fmt" const ShiftStartInt64 byte = 0x20 @@ -72,3 +70,18 @@ func (p PrefixCoded) Int64() (int64, error) { } return int64(uint64((sortableBits << shift)) ^ 0x8000000000000000), nil } + +func ValidPrefixCodedTerm(p string) (bool, int) { + if len(p) > 0 { + if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 { + return false, 0 + } + shift := p[0] - ShiftStartInt64 + nChars := ((63 - int(shift)) / 7) + 1 + if len(p) != nChars+1 { + return false, 0 + } + return true, int(shift) + } + return false, 0 +} diff --git a/numeric_util/prefix_coded_test.go b/numeric_util/prefix_coded_test.go index fe55475d..6f064fdd 100644 --- a/numeric_util/prefix_coded_test.go +++ b/numeric_util/prefix_coded_test.go @@ -98,6 +98,45 @@ func TestPrefixCoded(t *testing.T) { } } +func TestPrefixCodedValid(t *testing.T) { + // all of the shared tests should be valid + for _, test := range tests { + valid, _ := ValidPrefixCodedTerm(string(test.output)) + if !valid { + t.Errorf("expected %s to be valid prefix coded, is not", string(test.output)) + } + } + + invalidTests := []struct { + data PrefixCoded + }{ + // first byte invalid skip (too low) + { + data: PrefixCoded{0x19, 'c', 'a', 't'}, + }, + // first byte invalid skip (too high) + { + data: PrefixCoded{0x20 + 64, 'c'}, + }, + // length of trailing bytes wrong (too long) + { + data: PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, + }, + // length of trailing bytes wrong (too short) + { + data: PrefixCoded{0x20 + 63}, + }, + } + + // all of the shared tests should be valid + for _, test := range invalidTests { + valid, _ := ValidPrefixCodedTerm(string(test.data)) + if valid { + t.Errorf("expected %s to be invalid prefix coded, it is", string(test.data)) + } + } +} + func BenchmarkTestPrefixCoded(b *testing.B) { for i := 0; i < b.N; i++ { diff --git a/search.go b/search.go index e9ca34be..dc6bc26e 100644 --- a/search.go +++ b/search.go @@ -191,6 +191,7 @@ func (h *HighlightRequest) AddField(field string) { // Facets describe the set of facets to be computed. // Explain triggers inclusion of additional search // result score explanations. +// Sort describes the desired order for the results to be returned. // // A special field named "*" can be used to return all fields. type SearchRequest struct { @@ -201,6 +202,7 @@ type SearchRequest struct { Fields []string `json:"fields"` Facets FacetsRequest `json:"facets"` Explain bool `json:"explain"` + Sort search.SortOrder `json:"sort"` } func (sr *SearchRequest) Validate() error { @@ -220,6 +222,21 @@ func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) { r.Facets[facetName] = f } +// SortBy changes the request to use the requested sort order +// this form uses the simplified syntax with an array of strings +// each string can either be a field name +// or the magic value _id and _score which refer to the doc id and search score +// any of these values can optionally be prefixed with - to reverse the order +func (r *SearchRequest) SortBy(order []string) { + so := search.ParseSortOrderStrings(order) + r.Sort = so +} + +// SortByCustom changes the request to use the requested sort order +func (r *SearchRequest) SortByCustom(order search.SortOrder) { + r.Sort = order +} + // UnmarshalJSON deserializes a JSON representation of // a SearchRequest func (r *SearchRequest) UnmarshalJSON(input []byte) error { @@ -231,6 +248,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { Fields []string `json:"fields"` Facets FacetsRequest `json:"facets"` Explain bool `json:"explain"` + Sort []json.RawMessage `json:"sort"` } err := json.Unmarshal(input, &temp) @@ -243,6 +261,14 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { } else { r.Size = *temp.Size } + if temp.Sort == nil { + r.Sort = search.SortOrder{&search.SortScore{Desc: true}} + } else { + r.Sort, err = search.ParseSortOrderJSON(temp.Sort) + if err != nil { + return err + } + } r.From = temp.From r.Explain = temp.Explain r.Highlight = temp.Highlight @@ -274,12 +300,14 @@ func NewSearchRequest(q Query) *SearchRequest { // NewSearchRequestOptions creates a new SearchRequest // for the Query, with the requested size, from // and explanation search parameters. +// By default results are ordered by score, descending. func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchRequest { return &SearchRequest{ Query: q, Size: size, From: from, Explain: explain, + Sort: search.SortOrder{&search.SortScore{Desc: true}}, } } diff --git a/search/collectors/collector_top_score.go b/search/collectors/collector_top_score.go deleted file mode 100644 index 7d7131ce..00000000 --- a/search/collectors/collector_top_score.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package collectors - -import ( - "container/list" - "time" - - "golang.org/x/net/context" - - "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/search" -) - -type TopScoreCollector struct { - k int - skip int - results *list.List - took time.Duration - maxScore float64 - minScore float64 - total uint64 - facetsBuilder *search.FacetsBuilder - actualResults search.DocumentMatchCollection -} - -func NewTopScorerCollector(k int) *TopScoreCollector { - return &TopScoreCollector{ - k: k, - skip: 0, - results: list.New(), - } -} - -func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector { - return &TopScoreCollector{ - k: k, - skip: skip, - results: list.New(), - } -} - -func (tksc *TopScoreCollector) Total() uint64 { - return tksc.total -} - -func (tksc *TopScoreCollector) MaxScore() float64 { - return tksc.maxScore -} - -func (tksc *TopScoreCollector) Took() time.Duration { - return tksc.took -} - -var COLLECT_CHECK_DONE_EVERY = uint64(1024) - -func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error { - startTime := time.Now() - var err error - var next *search.DocumentMatch - - // search context with enough pre-allocated document matches - searchContext := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(tksc.k + tksc.skip + searcher.DocumentMatchPoolSize()), - } - - select { - case <-ctx.Done(): - return ctx.Err() - default: - next, err = searcher.Next(searchContext) - } - for err == nil && next != nil { - if tksc.total%COLLECT_CHECK_DONE_EVERY == 0 { - select { - case <-ctx.Done(): - return ctx.Err() - default: - } - } - if tksc.facetsBuilder != nil { - err = tksc.facetsBuilder.Update(next) - if err != nil { - break - } - } - tksc.collectSingle(searchContext, next) - - next, err = searcher.Next(searchContext) - } - // finalize actual results - tksc.actualResults, err = tksc.finalizeResults(reader) - if err != nil { - return err - } - - // compute search duration - tksc.took = time.Since(startTime) - if err != nil { - return err - } - return nil -} - -func (tksc *TopScoreCollector) collectSingle(ctx *search.SearchContext, d *search.DocumentMatch) { - // increment total hits - tksc.total++ - - // update max score - if d.Score > tksc.maxScore { - tksc.maxScore = d.Score - } - - if d.Score <= tksc.minScore { - ctx.DocumentMatchPool.Put(d) - return - } - - for e := tksc.results.Front(); e != nil; e = e.Next() { - curr := e.Value.(*search.DocumentMatch) - if d.Score <= curr.Score { - - tksc.results.InsertBefore(d, e) - // if we just made the list too long - if tksc.results.Len() > (tksc.k + tksc.skip) { - // remove the head - removed := tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch) - tksc.minScore = removed.Score - ctx.DocumentMatchPool.Put(removed) - } - return - } - } - // if we got to the end, we still have to add it - tksc.results.PushBack(d) - if tksc.results.Len() > (tksc.k + tksc.skip) { - // remove the head - removed := tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch) - tksc.minScore = removed.Score - ctx.DocumentMatchPool.Put(removed) - } -} - -func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection { - return tksc.actualResults -} - -func (tksc *TopScoreCollector) finalizeResults(r index.IndexReader) (search.DocumentMatchCollection, error) { - if tksc.results.Len()-tksc.skip > 0 { - rv := make(search.DocumentMatchCollection, tksc.results.Len()-tksc.skip) - i := 0 - skipped := 0 - for e := tksc.results.Back(); e != nil; e = e.Prev() { - if skipped < tksc.skip { - skipped++ - continue - } - var err error - rv[i] = e.Value.(*search.DocumentMatch) - rv[i].ID, err = r.FinalizeDocID(rv[i].IndexInternalID) - if err != nil { - return nil, err - } - i++ - } - return rv, nil - } - return search.DocumentMatchCollection{}, nil -} - -func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) { - tksc.facetsBuilder = facetsBuilder -} - -func (tksc *TopScoreCollector) FacetResults() search.FacetResults { - if tksc.facetsBuilder != nil { - return tksc.facetsBuilder.Results() - } - return search.FacetResults{} -} diff --git a/search/collectors/heap.go b/search/collectors/heap.go new file mode 100644 index 00000000..1c834e9c --- /dev/null +++ b/search/collectors/heap.go @@ -0,0 +1,83 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package collectors + +import ( + "container/heap" + + "github.com/blevesearch/bleve/search" +) + +type collectStoreHeap struct { + heap search.DocumentMatchCollection + compare collectorCompare +} + +func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap { + rv := &collectStoreHeap{ + heap: make(search.DocumentMatchCollection, 0, cap), + compare: compare, + } + heap.Init(rv) + return rv +} + +func (c *collectStoreHeap) Add(doc *search.DocumentMatch) { + heap.Push(c, doc) +} + +func (c *collectStoreHeap) RemoveLast() *search.DocumentMatch { + return heap.Pop(c).(*search.DocumentMatch) +} + +func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) { + count := c.Len() + size := count - skip + rv := make(search.DocumentMatchCollection, size) + for count > 0 { + count-- + + if count >= skip { + size-- + doc := heap.Pop(c).(*search.DocumentMatch) + rv[size] = doc + err := fixup(doc) + if err != nil { + return nil, err + } + } + } + return rv, nil +} + +// heap interface implementation + +func (c *collectStoreHeap) Len() int { + return len(c.heap) +} + +func (c *collectStoreHeap) Less(i, j int) bool { + so := c.compare(c.heap[i], c.heap[j]) + return -so < 0 +} + +func (c *collectStoreHeap) Swap(i, j int) { + c.heap[i], c.heap[j] = c.heap[j], c.heap[i] +} + +func (c *collectStoreHeap) Push(x interface{}) { + c.heap = append(c.heap, x.(*search.DocumentMatch)) +} + +func (c *collectStoreHeap) Pop() interface{} { + var rv *search.DocumentMatch + rv, c.heap = c.heap[len(c.heap)-1], c.heap[:len(c.heap)-1] + return rv +} diff --git a/search/collectors/list.go b/search/collectors/list.go new file mode 100644 index 00000000..d3f49410 --- /dev/null +++ b/search/collectors/list.go @@ -0,0 +1,73 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package collectors + +import ( + "container/list" + + "github.com/blevesearch/bleve/search" +) + +type collectStoreList struct { + results *list.List + compare collectorCompare +} + +func newStoreList(cap int, compare collectorCompare) *collectStoreList { + rv := &collectStoreList{ + results: list.New(), + compare: compare, + } + + return rv +} + +func (c *collectStoreList) Add(doc *search.DocumentMatch) { + for e := c.results.Front(); e != nil; e = e.Next() { + curr := e.Value.(*search.DocumentMatch) + if c.compare(doc, curr) >= 0 { + c.results.InsertBefore(doc, e) + return + } + } + // if we got to the end, we still have to add it + c.results.PushBack(doc) +} + +func (c *collectStoreList) RemoveLast() *search.DocumentMatch { + return c.results.Remove(c.results.Front()).(*search.DocumentMatch) +} + +func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) { + if c.results.Len()-skip > 0 { + rv := make(search.DocumentMatchCollection, c.results.Len()-skip) + i := 0 + skipped := 0 + for e := c.results.Back(); e != nil; e = e.Prev() { + if skipped < skip { + skipped++ + continue + } + + rv[i] = e.Value.(*search.DocumentMatch) + err := fixup(rv[i]) + if err != nil { + return nil, err + } + i++ + } + return rv, nil + } + return search.DocumentMatchCollection{}, nil +} + +func (c *collectStoreList) Len() int { + return c.results.Len() +} diff --git a/search/collectors/search_test.go b/search/collectors/search_test.go index c5faa243..035486b5 100644 --- a/search/collectors/search_test.go +++ b/search/collectors/search_test.go @@ -22,7 +22,9 @@ type stubSearcher struct { func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { if ss.index < len(ss.matches) { - rv := ss.matches[ss.index] + rv := ctx.DocumentMatchPool.Get() + rv.IndexInternalID = ss.matches[ss.index].IndexInternalID + rv.Score = ss.matches[ss.index].Score ss.index++ return rv, nil } @@ -35,7 +37,9 @@ func (ss *stubSearcher) Advance(ctx *search.SearchContext, ID index.IndexInterna ss.index++ } if ss.index < len(ss.matches) { - rv := ss.matches[ss.index] + rv := ctx.DocumentMatchPool.Get() + rv.IndexInternalID = ss.matches[ss.index].IndexInternalID + rv.Score = ss.matches[ss.index].Score ss.index++ return rv, nil } @@ -95,11 +99,7 @@ func (sr *stubReader) Document(id string) (*document.Document, error) { return nil, nil } -func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID) (index.FieldTerms, error) { - return nil, nil -} - -func (sr *stubReader) DocumentFieldTermsForFields(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { +func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { return nil, nil } diff --git a/search/collectors/slice.go b/search/collectors/slice.go new file mode 100644 index 00000000..dee08c81 --- /dev/null +++ b/search/collectors/slice.go @@ -0,0 +1,60 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package collectors + +import "github.com/blevesearch/bleve/search" + +type collectStoreSlice struct { + slice search.DocumentMatchCollection + compare collectorCompare +} + +func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice { + rv := &collectStoreSlice{ + slice: make(search.DocumentMatchCollection, 0, cap), + compare: compare, + } + return rv +} + +func (c *collectStoreSlice) Add(doc *search.DocumentMatch) { + // find where to insert, starting at end (lowest) + i := len(c.slice) + for ; i > 0; i-- { + cmp := c.compare(doc, c.slice[i-1]) + if cmp >= 0 { + break + } + } + // insert at i + c.slice = append(c.slice, nil) + copy(c.slice[i+1:], c.slice[i:]) + c.slice[i] = doc +} + +func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch { + var rv *search.DocumentMatch + rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1] + return rv +} + +func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) { + for i := skip; i < len(c.slice); i++ { + err := fixup(c.slice[i]) + if err != nil { + return nil, err + } + } + return c.slice[skip:], nil +} + +func (c *collectStoreSlice) Len() int { + return len(c.slice) +} diff --git a/search/collectors/topn.go b/search/collectors/topn.go new file mode 100644 index 00000000..636b7593 --- /dev/null +++ b/search/collectors/topn.go @@ -0,0 +1,250 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package collectors + +import ( + "time" + + "github.com/blevesearch/bleve/index" + "github.com/blevesearch/bleve/search" + "golang.org/x/net/context" +) + +type collectorCompare func(i, j *search.DocumentMatch) int + +type collectorFixup func(d *search.DocumentMatch) error + +// TopNCollector collects the top N hits, optionally skipping some results +type TopNCollector struct { + size int + skip int + total uint64 + maxScore float64 + took time.Duration + sort search.SortOrder + results search.DocumentMatchCollection + facetsBuilder *search.FacetsBuilder + + store *collectStoreSlice + + needDocIds bool + neededFields []string + cachedScoring []bool + cachedDesc []bool + + lowestMatchOutsideResults *search.DocumentMatch +} + +// CheckDoneEvery controls how frequently we check the context deadline +const CheckDoneEvery = uint64(1024) + +// NewTopNCollector builds a collector to find the top 'size' hits +// skipping over the first 'skip' hits +// ordering hits by the provided sort order +func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { + hc := &TopNCollector{size: size, skip: skip, sort: sort} + // pre-allocate space on the heap, we need size+skip results + // +1 additional while figuring out which to evict + hc.store = newStoreSlice(size+skip+1, func(i, j *search.DocumentMatch) int { + return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) + }) + + // these lookups traverse an interface, so do once up-front + if sort.RequiresDocID() { + hc.needDocIds = true + } + hc.neededFields = sort.RequiredFields() + hc.cachedScoring = sort.CacheIsScore() + hc.cachedDesc = sort.CacheDescending() + + return hc +} + +// Collect goes to the index to find the matching documents +func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error { + startTime := time.Now() + var err error + var next *search.DocumentMatch + + // search context with enough pre-allocated document matches + // we keep references to size+skip ourselves + // plus possibly one extra for the highestMatchOutsideResults + // plus the amount required by the searcher tree + searchContext := &search.SearchContext{ + DocumentMatchPool: search.NewDocumentMatchPool(hc.size+hc.skip+1+searcher.DocumentMatchPoolSize(), len(hc.sort)), + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + next, err = searcher.Next(searchContext) + } + for err == nil && next != nil { + if hc.total%CheckDoneEvery == 0 { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + } + if hc.facetsBuilder != nil { + err = hc.facetsBuilder.Update(next) + if err != nil { + break + } + } + + err = hc.collectSingle(searchContext, reader, next) + if err != nil { + break + } + + next, err = searcher.Next(searchContext) + } + // compute search duration + hc.took = time.Since(startTime) + if err != nil { + return err + } + // finalize actual results + err = hc.finalizeResults(reader) + if err != nil { + return err + } + return nil +} + +var sortByScoreOpt = []string{"_score"} + +func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error { + // increment total hits + hc.total++ + d.HitNumber = hc.total + + // update max score + if d.Score > hc.maxScore { + hc.maxScore = d.Score + } + + var err error + // see if we need to load ID (at this early stage, for example to sort on it) + if hc.needDocIds { + d.ID, err = reader.FinalizeDocID(d.IndexInternalID) + if err != nil { + return err + } + } + + // see if we need to load the stored fields + if len(hc.neededFields) > 0 { + // find out which fields haven't been loaded yet + fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields) + // look them up + fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad) + if err != nil { + return err + } + // cache these as well + if d.CachedFieldTerms == nil { + d.CachedFieldTerms = make(map[string][]string) + } + d.CachedFieldTerms.Merge(fieldTerms) + } + + // compute this hits sort value + if len(hc.sort) == 1 && hc.cachedScoring[0] { + d.Sort = sortByScoreOpt + } else { + hc.sort.Value(d) + } + + // optimization, we track lowest sorting hit already removed from heap + // with this one comparision, we can avoid all heap operations if + // this hit would have been added and then immediately removed + if hc.lowestMatchOutsideResults != nil { + cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults) + if cmp >= 0 { + // this hit can't possibly be in the result set, so avoid heap ops + ctx.DocumentMatchPool.Put(d) + return nil + } + } + + hc.store.Add(d) + if hc.store.Len() > hc.size+hc.skip { + removed := hc.store.RemoveLast() + if hc.lowestMatchOutsideResults == nil { + hc.lowestMatchOutsideResults = removed + } else { + cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults) + if cmp < 0 { + tmp := hc.lowestMatchOutsideResults + hc.lowestMatchOutsideResults = removed + ctx.DocumentMatchPool.Put(tmp) + } + } + } + + return nil +} + +// SetFacetsBuilder registers a facet builder for this collector +func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) { + hc.facetsBuilder = facetsBuilder +} + +// finalizeResults starts with the heap containing the final top size+skip +// it now throws away the results to be skipped +// and does final doc id lookup (if necessary) +func (hc *TopNCollector) finalizeResults(r index.IndexReader) error { + var err error + hc.results, err = hc.store.Final(hc.skip, func(doc *search.DocumentMatch) error { + if doc.ID == "" { + // look up the id since we need it for lookup + var err error + doc.ID, err = r.FinalizeDocID(doc.IndexInternalID) + if err != nil { + return err + } + } + return nil + }) + + return err +} + +// Results returns the collected hits +func (hc *TopNCollector) Results() search.DocumentMatchCollection { + return hc.results +} + +// Total returns the total number of hits +func (hc *TopNCollector) Total() uint64 { + return hc.total +} + +// MaxScore returns the maximum score seen across all the hits +func (hc *TopNCollector) MaxScore() float64 { + return hc.maxScore +} + +// Took returns the time spent collecting hits +func (hc *TopNCollector) Took() time.Duration { + return hc.took +} + +// FacetResults returns the computed facets results +func (hc *TopNCollector) FacetResults() search.FacetResults { + if hc.facetsBuilder != nil { + return hc.facetsBuilder.Results() + } + return search.FacetResults{} +} diff --git a/search/collectors/collector_top_score_test.go b/search/collectors/topn_test.go similarity index 91% rename from search/collectors/collector_top_score_test.go rename to search/collectors/topn_test.go index f5b0c6b8..4c7b25b0 100644 --- a/search/collectors/collector_top_score_test.go +++ b/search/collectors/topn_test.go @@ -84,7 +84,7 @@ func TestTop10Scores(t *testing.T) { }, } - collector := NewTopScorerCollector(10) + collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}}) err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) @@ -103,6 +103,7 @@ func TestTop10Scores(t *testing.T) { results := collector.Results() if len(results) != 10 { + t.Logf("results: %v", results) t.Fatalf("expected 10 results, got %d", len(results)) } @@ -192,7 +193,7 @@ func TestTop10ScoresSkip10(t *testing.T) { }, } - collector := NewTopScorerSkipCollector(10, 10) + collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}}) err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) @@ -219,7 +220,7 @@ func TestTop10ScoresSkip10(t *testing.T) { } if results[0].Score != 9.5 { - t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score) + t.Errorf("expected highest score to be 9.5, got %f", results[0].Score) } } @@ -289,7 +290,7 @@ func TestPaginationSameScores(t *testing.T) { } // first get first 5 hits - collector := NewTopScorerSkipCollector(5, 0) + collector := NewTopNCollector(5, 0, search.SortOrder{&search.SortScore{Desc: true}}) err := collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) @@ -375,7 +376,7 @@ func TestPaginationSameScores(t *testing.T) { } // now get next 5 hits - collector = NewTopScorerSkipCollector(5, 5) + collector = NewTopNCollector(5, 5, search.SortOrder{&search.SortScore{Desc: true}}) err = collector.Collect(context.Background(), searcher, &stubReader{}) if err != nil { t.Fatal(err) @@ -398,21 +399,28 @@ func TestPaginationSameScores(t *testing.T) { t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID) } } - } func BenchmarkTop10of100000Scores(b *testing.B) { - benchHelper(10000, func() search.Collector { return NewTopScorerCollector(10) }, b) + benchHelper(10000, func() search.Collector { + return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}}) + }, b) } func BenchmarkTop100of100000Scores(b *testing.B) { - benchHelper(10000, func() search.Collector { return NewTopScorerCollector(100) }, b) + benchHelper(10000, func() search.Collector { + return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}}) + }, b) } func BenchmarkTop10of1000000Scores(b *testing.B) { - benchHelper(100000, func() search.Collector { return NewTopScorerCollector(10) }, b) + benchHelper(100000, func() search.Collector { + return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}}) + }, b) } func BenchmarkTop100of1000000Scores(b *testing.B) { - benchHelper(100000, func() search.Collector { return NewTopScorerCollector(100) }, b) + benchHelper(100000, func() search.Collector { + return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}}) + }, b) } diff --git a/search/facets_builder.go b/search/facets_builder.go index 55cb33ab..66f96a6a 100644 --- a/search/facets_builder.go +++ b/search/facets_builder.go @@ -42,12 +42,23 @@ func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error { for _, facetBuilder := range fb.facets { fields = append(fields, facetBuilder.Field()) } - fieldTerms, err := fb.indexReader.DocumentFieldTermsForFields(docMatch.IndexInternalID, fields) - if err != nil { - return err + + if len(fields) > 0 { + // find out which fields haven't been loaded yet + fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fields) + // look them up + fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad) + if err != nil { + return err + } + // cache these as well + if docMatch.CachedFieldTerms == nil { + docMatch.CachedFieldTerms = make(map[string][]string) + } + docMatch.CachedFieldTerms.Merge(fieldTerms) } for _, facetBuilder := range fb.facets { - facetBuilder.Update(fieldTerms) + facetBuilder.Update(docMatch.CachedFieldTerms) } return nil } diff --git a/search/pool.go b/search/pool.go index 108d494b..5600f488 100644 --- a/search/pool.go +++ b/search/pool.go @@ -31,12 +31,13 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch { // NewDocumentMatchPool will build a DocumentMatchPool with memory // pre-allocated to accomodate the requested number of DocumentMatch // instances -func NewDocumentMatchPool(size int) *DocumentMatchPool { +func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool { avail := make(DocumentMatchCollection, 0, size) // pre-allocate the expected number of instances startBlock := make([]DocumentMatch, size) // make these initial instances available for i := range startBlock { + startBlock[i].Sort = make([]string, 0, sortsize) avail = append(avail, &startBlock[i]) } return &DocumentMatchPool{ diff --git a/search/pool_test.go b/search/pool_test.go index 875f607c..51001757 100644 --- a/search/pool_test.go +++ b/search/pool_test.go @@ -16,7 +16,7 @@ func TestDocumentMatchPool(t *testing.T) { tooManyCalled := false // create a pool - dmp := NewDocumentMatchPool(10) + dmp := NewDocumentMatchPool(10, 0) dmp.TooSmall = func(inner *DocumentMatchPool) *DocumentMatch { tooManyCalled = true return &DocumentMatch{} diff --git a/search/scorers/scorer_constant_test.go b/search/scorers/scorer_constant_test.go index e1e6e9c6..8238b07b 100644 --- a/search/scorers/scorer_constant_test.go +++ b/search/scorers/scorer_constant_test.go @@ -47,13 +47,14 @@ func TestConstantScorer(t *testing.T) { Value: 1.0, Message: "ConstantScore()", }, + Sort: []string{}, }, }, } for _, test := range tests { ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(1), + DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } actual := scorer.Score(ctx, test.termMatch.ID) @@ -82,6 +83,7 @@ func TestConstantScorerWithQueryNorm(t *testing.T) { result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: 2.0, + Sort: []string{}, Expl: &search.Explanation{ Value: 2.0, Message: "weight(^1.000000), product of:", @@ -112,7 +114,7 @@ func TestConstantScorerWithQueryNorm(t *testing.T) { for _, test := range tests { ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(1), + DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } actual := scorer.Score(ctx, test.termMatch.ID) diff --git a/search/scorers/scorer_term_test.go b/search/scorers/scorer_term_test.go index 0241d163..592463a4 100644 --- a/search/scorers/scorer_term_test.go +++ b/search/scorers/scorer_term_test.go @@ -50,6 +50,7 @@ func TestTermScorer(t *testing.T) { result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: math.Sqrt(1.0) * idf, + Sort: []string{}, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf, Message: "fieldWeight(desc:beer in one), product of:", @@ -91,6 +92,7 @@ func TestTermScorer(t *testing.T) { result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: math.Sqrt(1.0) * idf, + Sort: []string{}, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf, Message: "fieldWeight(desc:beer in one), product of:", @@ -121,6 +123,7 @@ func TestTermScorer(t *testing.T) { result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: math.Sqrt(65) * idf, + Sort: []string{}, Expl: &search.Explanation{ Value: math.Sqrt(65) * idf, Message: "fieldWeight(desc:beer in one), product of:", @@ -145,7 +148,7 @@ func TestTermScorer(t *testing.T) { for _, test := range tests { ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(1), + DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } actual := scorer.Score(ctx, test.termMatch) @@ -187,6 +190,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) { result: &search.DocumentMatch{ IndexInternalID: index.IndexInternalID("one"), Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0, + Sort: []string{}, Expl: &search.Explanation{ Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0, Message: "weight(desc:beer^3.000000 in one), product of:", @@ -235,7 +239,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) { for _, test := range tests { ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(1), + DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } actual := scorer.Score(ctx, test.termMatch) diff --git a/search/search.go b/search/search.go index 22abc064..5e43b748 100644 --- a/search/search.go +++ b/search/search.go @@ -9,7 +9,12 @@ package search -import "github.com/blevesearch/bleve/index" +import ( + "fmt" + + "github.com/blevesearch/bleve/document" + "github.com/blevesearch/bleve/index" +) type Location struct { Pos float64 `json:"pos"` @@ -60,11 +65,22 @@ type DocumentMatch struct { Expl *Explanation `json:"explanation,omitempty"` Locations FieldTermLocationMap `json:"locations,omitempty"` Fragments FieldFragmentMap `json:"fragments,omitempty"` + Sort []string `json:"sort,omitempty"` // Fields contains the values for document fields listed in // SearchRequest.Fields. Text fields are returned as strings, numeric // fields as float64s and date fields as time.RFC3339 formatted strings. Fields map[string]interface{} `json:"fields,omitempty"` + + // as we learn field terms, we can cache important ones for later use + // for example, sorting and building facets need these values + CachedFieldTerms index.FieldTerms `json:"-"` + + // if we load the document for this hit, remember it so we dont load again + Document *document.Document `json:"-"` + + // used to maintain natural index order + HitNumber uint64 `json:"-"` } func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) { @@ -91,14 +107,22 @@ func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) { // Reset allows an already allocated DocumentMatch to be reused func (dm *DocumentMatch) Reset() *DocumentMatch { // remember the []byte used for the IndexInternalID - indexInternalId := dm.IndexInternalID + indexInternalID := dm.IndexInternalID + // remember the []interface{} used for sort + sort := dm.Sort // idiom to copy over from empty DocumentMatch (0 allocations) *dm = DocumentMatch{} // reuse the []byte already allocated (and reset len to 0) - dm.IndexInternalID = indexInternalId[:0] + dm.IndexInternalID = indexInternalID[:0] + // reuse the []interface{} already allocated (and reset len to 0) + dm.Sort = sort[:0] return dm } +func (dm *DocumentMatch) String() string { + return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score) +} + type DocumentMatchCollection []*DocumentMatch func (c DocumentMatchCollection) Len() int { return len(c) } diff --git a/search/searchers/search_boolean_test.go b/search/searchers/search_boolean_test.go index ddd31d2e..c9aea8a2 100644 --- a/search/searchers/search_boolean_test.go +++ b/search/searchers/search_boolean_test.go @@ -344,7 +344,7 @@ func TestBooleanSearch(t *testing.T) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 diff --git a/search/searchers/search_conjunction_test.go b/search/searchers/search_conjunction_test.go index 227a70ba..8554e0bf 100644 --- a/search/searchers/search_conjunction_test.go +++ b/search/searchers/search_conjunction_test.go @@ -189,7 +189,7 @@ func TestConjunctionSearch(t *testing.T) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(10), + DocumentMatchPool: search.NewDocumentMatchPool(10, 0), } next, err := test.searcher.Next(ctx) i := 0 diff --git a/search/searchers/search_disjunction_test.go b/search/searchers/search_disjunction_test.go index 5f194065..9fe7cc62 100644 --- a/search/searchers/search_disjunction_test.go +++ b/search/searchers/search_disjunction_test.go @@ -110,7 +110,7 @@ func TestDisjunctionSearch(t *testing.T) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 @@ -164,7 +164,7 @@ func TestDisjunctionAdvance(t *testing.T) { } ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinSearcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinSearcher.DocumentMatchPoolSize(), 0), } match, err := martyOrDustinSearcher.Advance(ctx, index.IndexInternalID("3")) if err != nil { diff --git a/search/searchers/search_docid_test.go b/search/searchers/search_docid_test.go index 00fe3db9..56b2da97 100644 --- a/search/searchers/search_docid_test.go +++ b/search/searchers/search_docid_test.go @@ -64,7 +64,7 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0), } // Check the sequence diff --git a/search/searchers/search_fuzzy_test.go b/search/searchers/search_fuzzy_test.go index b4469666..086c1f72 100644 --- a/search/searchers/search_fuzzy_test.go +++ b/search/searchers/search_fuzzy_test.go @@ -107,7 +107,7 @@ func TestFuzzySearch(t *testing.T) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 diff --git a/search/searchers/search_match_all_test.go b/search/searchers/search_match_all_test.go index 26e8a3c2..04b9421a 100644 --- a/search/searchers/search_match_all_test.go +++ b/search/searchers/search_match_all_test.go @@ -111,7 +111,7 @@ func TestMatchAllSearch(t *testing.T) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 diff --git a/search/searchers/search_match_none_test.go b/search/searchers/search_match_none_test.go index 90ec526c..e7c8876a 100644 --- a/search/searchers/search_match_none_test.go +++ b/search/searchers/search_match_none_test.go @@ -52,7 +52,7 @@ func TestMatchNoneSearch(t *testing.T) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 diff --git a/search/searchers/search_phrase_test.go b/search/searchers/search_phrase_test.go index 8c9a76d2..2a6a485f 100644 --- a/search/searchers/search_phrase_test.go +++ b/search/searchers/search_phrase_test.go @@ -70,7 +70,7 @@ func TestPhraseSearch(t *testing.T) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 diff --git a/search/searchers/search_regexp_test.go b/search/searchers/search_regexp_test.go index f2859cb9..8ea1c720 100644 --- a/search/searchers/search_regexp_test.go +++ b/search/searchers/search_regexp_test.go @@ -87,7 +87,7 @@ func TestRegexpSearch(t *testing.T) { }() ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), + DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), } next, err := test.searcher.Next(ctx) i := 0 diff --git a/search/searchers/search_term_test.go b/search/searchers/search_term_test.go index c94803e1..9220d725 100644 --- a/search/searchers/search_term_test.go +++ b/search/searchers/search_term_test.go @@ -165,7 +165,7 @@ func TestTermSearcher(t *testing.T) { } ctx := &search.SearchContext{ - DocumentMatchPool: search.NewDocumentMatchPool(1), + DocumentMatchPool: search.NewDocumentMatchPool(1, 0), } docMatch, err := searcher.Next(ctx) if err != nil { diff --git a/search/sort.go b/search/sort.go new file mode 100644 index 00000000..64287422 --- /dev/null +++ b/search/sort.go @@ -0,0 +1,488 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package search + +import ( + "encoding/json" + "fmt" + "sort" + "strings" + + "github.com/blevesearch/bleve/numeric_util" +) + +var HighTerm = strings.Repeat(string([]byte{0xff}), 10) +var LowTerm = string([]byte{0x00}) + +type SearchSort interface { + Value(a *DocumentMatch) string + Descending() bool + + RequiresDocID() bool + RequiresScoring() bool + RequiresFields() []string +} + +func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) { + descending, ok := input["desc"].(bool) + by, ok := input["by"].(string) + if !ok { + return nil, fmt.Errorf("search sort must specify by") + } + switch by { + case "id": + return &SortDocID{ + Desc: descending, + }, nil + case "score": + return &SortScore{ + Desc: descending, + }, nil + case "field": + field, ok := input["field"].(string) + if !ok { + return nil, fmt.Errorf("search sort mode field must specify field") + } + rv := &SortField{ + Field: field, + Desc: descending, + } + typ, ok := input["type"].(string) + if ok { + switch typ { + case "auto": + rv.Type = SortFieldAuto + case "string": + rv.Type = SortFieldAsString + case "number": + rv.Type = SortFieldAsNumber + case "date": + rv.Type = SortFieldAsDate + default: + return nil, fmt.Errorf("unkown sort field type: %s", typ) + } + } + mode, ok := input["mode"].(string) + if ok { + switch mode { + case "default": + rv.Mode = SortFieldDefault + case "min": + rv.Mode = SortFieldMin + case "max": + rv.Mode = SortFieldMax + default: + return nil, fmt.Errorf("unknown sort field mode: %s", mode) + } + } + missing, ok := input["missing"].(string) + if ok { + switch missing { + case "first": + rv.Missing = SortFieldMissingFirst + case "last": + rv.Missing = SortFieldMissingLast + default: + return nil, fmt.Errorf("unknown sort field missing: %s", missing) + } + } + return rv, nil + } + + return nil, fmt.Errorf("unknown search sort by: %s", by) +} + +func ParseSearchSortString(input string) SearchSort { + descending := false + if strings.HasPrefix(input, "-") { + descending = true + input = input[1:] + } else if strings.HasPrefix(input, "+") { + input = input[1:] + } + if input == "_id" { + return &SortDocID{ + Desc: descending, + } + } else if input == "_score" { + return &SortScore{ + Desc: descending, + } + } + return &SortField{ + Field: input, + Desc: descending, + } +} + +func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) { + // first try to parse it as string + var sortString string + err := json.Unmarshal(input, &sortString) + if err != nil { + var sortObj map[string]interface{} + err = json.Unmarshal(input, &sortObj) + if err != nil { + return nil, err + } + return ParseSearchSortObj(sortObj) + } + return ParseSearchSortString(sortString), nil +} + +func ParseSortOrderStrings(in []string) SortOrder { + rv := make(SortOrder, 0, len(in)) + for _, i := range in { + ss := ParseSearchSortString(i) + rv = append(rv, ss) + } + return rv +} + +func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) { + rv := make(SortOrder, 0, len(in)) + for _, i := range in { + ss, err := ParseSearchSortJSON(i) + if err != nil { + return nil, err + } + rv = append(rv, ss) + } + return rv, nil +} + +type SortOrder []SearchSort + +func (so SortOrder) Value(doc *DocumentMatch) { + for _, soi := range so { + doc.Sort = append(doc.Sort, soi.Value(doc)) + } +} + +// Compare will compare two document matches using the specified sort order +// if both are numbers, we avoid converting back to term +func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int { + // compare the documents on all search sorts until a differences is found + for x := range so { + c := 0 + if cachedScoring[x] { + if i.Score < j.Score { + c = -1 + } else if i.Score > j.Score { + c = 1 + } + } else { + iVal := i.Sort[x] + jVal := j.Sort[x] + c = strings.Compare(iVal, jVal) + } + + if c == 0 { + continue + } + if cachedDesc[x] { + c = -c + } + return c + } + // if they are the same at this point, impose order based on index natural sort order + if i.HitNumber == j.HitNumber { + return 0 + } else if i.HitNumber > j.HitNumber { + return 1 + } + return -1 +} + +func (so SortOrder) RequiresScore() bool { + rv := false + for _, soi := range so { + if soi.RequiresScoring() { + rv = true + } + } + return rv +} + +func (so SortOrder) RequiresDocID() bool { + rv := false + for _, soi := range so { + if soi.RequiresDocID() { + rv = true + } + } + return rv +} + +func (so SortOrder) RequiredFields() []string { + var rv []string + for _, soi := range so { + rv = append(rv, soi.RequiresFields()...) + } + return rv +} + +func (so SortOrder) CacheIsScore() []bool { + var rv []bool + for _, soi := range so { + rv = append(rv, soi.RequiresScoring()) + } + return rv +} + +func (so SortOrder) CacheDescending() []bool { + var rv []bool + for _, soi := range so { + rv = append(rv, soi.Descending()) + } + return rv +} + +// SortFieldType lets you control some internal sort behavior +// normally leaving this to the zero-value of SortFieldAuto is fine +type SortFieldType int + +const ( + // SortFieldAuto applies heuristics attempt to automatically sort correctly + SortFieldAuto SortFieldType = iota + // SortFieldAsString forces sort as string (no prefix coded terms removed) + SortFieldAsString + // SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed) + SortFieldAsNumber + // SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed) + SortFieldAsDate +) + +// SortFieldMode describes the behavior if the field has multiple values +type SortFieldMode int + +const ( + // SortFieldDefault uses the first (or only) value, this is the default zero-value + SortFieldDefault SortFieldMode = iota // FIXME name is confusing + // SortFieldMin uses the minimum value + SortFieldMin + // SortFieldMax uses the maximum value + SortFieldMax +) + +// SortFieldMissing controls where documents missing a field value should be sorted +type SortFieldMissing int + +const ( + // SortFieldMissingLast sorts documents missing a field at the end + SortFieldMissingLast SortFieldMissing = iota + + // SortFieldMissingFirst sorts documents missing a field at the beginning + SortFieldMissingFirst +) + +// SortField will sort results by the value of a stored field +// Field is the name of the field +// Descending reverse the sort order (default false) +// Type allows forcing of string/number/date behavior (default auto) +// Mode controls behavior for multi-values fields (default first) +// Missing controls behavior of missing values (default last) +type SortField struct { + Field string + Desc bool + Type SortFieldType + Mode SortFieldMode + Missing SortFieldMissing +} + +// Value returns the sort value of the DocumentMatch +func (s *SortField) Value(i *DocumentMatch) string { + iTerms := i.CachedFieldTerms[s.Field] + iTerms = s.filterTermsByType(iTerms) + iTerm := s.filterTermsByMode(iTerms) + return iTerm +} + +// Descending determines the order of the sort +func (s *SortField) Descending() bool { + return s.Desc +} + +func (s *SortField) filterTermsByMode(terms []string) string { + if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) { + return terms[0] + } else if len(terms) > 1 { + switch s.Mode { + case SortFieldMin: + sort.Strings(terms) + return terms[0] + case SortFieldMax: + sort.Strings(terms) + return terms[len(terms)-1] + } + } + + // handle missing terms + if s.Missing == SortFieldMissingLast { + if s.Desc { + return LowTerm + } + return HighTerm + } + if s.Desc { + return HighTerm + } + return LowTerm +} + +// filterTermsByType attempts to make one pass on the terms +// if we are in auto-mode AND all the terms look like prefix-coded numbers +// return only the terms which had shift of 0 +// if we are in explicit number or date mode, return only valid +// prefix coded numbers with shift of 0 +func (s *SortField) filterTermsByType(terms []string) []string { + stype := s.Type + if stype == SortFieldAuto { + allTermsPrefixCoded := true + var termsWithShiftZero []string + for _, term := range terms { + valid, shift := numeric_util.ValidPrefixCodedTerm(term) + if valid && shift == 0 { + termsWithShiftZero = append(termsWithShiftZero, term) + } else if !valid { + allTermsPrefixCoded = false + } + } + if allTermsPrefixCoded { + terms = termsWithShiftZero + } + } else if stype == SortFieldAsNumber || stype == SortFieldAsDate { + var termsWithShiftZero []string + for _, term := range terms { + valid, shift := numeric_util.ValidPrefixCodedTerm(term) + if valid && shift == 0 { + termsWithShiftZero = append(termsWithShiftZero) + } + } + terms = termsWithShiftZero + } + return terms +} + +// RequiresDocID says this SearchSort does not require the DocID be loaded +func (s *SortField) RequiresDocID() bool { return false } + +// RequiresScoring says this SearchStore does not require scoring +func (s *SortField) RequiresScoring() bool { return false } + +// RequiresFields says this SearchStore requires the specified stored field +func (s *SortField) RequiresFields() []string { return []string{s.Field} } + +func (s *SortField) MarshalJSON() ([]byte, error) { + // see if simple format can be used + if s.Missing == SortFieldMissingLast && + s.Mode == SortFieldDefault && + s.Type == SortFieldAuto { + if s.Desc { + return json.Marshal("-" + s.Field) + } + return json.Marshal(s.Field) + } + sfm := map[string]interface{}{ + "by": "field", + "field": s.Field, + } + if s.Desc { + sfm["desc"] = true + } + if s.Missing > SortFieldMissingLast { + switch s.Missing { + case SortFieldMissingFirst: + sfm["missing"] = "first" + } + } + if s.Mode > SortFieldDefault { + switch s.Mode { + case SortFieldMin: + sfm["mode"] = "min" + case SortFieldMax: + sfm["mode"] = "max" + } + } + if s.Type > SortFieldAuto { + switch s.Type { + case SortFieldAsString: + sfm["type"] = "string" + case SortFieldAsNumber: + sfm["type"] = "number" + case SortFieldAsDate: + sfm["type"] = "date" + } + } + + return json.Marshal(sfm) +} + +// SortDocID will sort results by the document identifier +type SortDocID struct { + Desc bool +} + +// Value returns the sort value of the DocumentMatch +func (s *SortDocID) Value(i *DocumentMatch) string { + return i.ID +} + +// Descending determines the order of the sort +func (s *SortDocID) Descending() bool { + return s.Desc +} + +// RequiresDocID says this SearchSort does require the DocID be loaded +func (s *SortDocID) RequiresDocID() bool { return true } + +// RequiresScoring says this SearchStore does not require scoring +func (s *SortDocID) RequiresScoring() bool { return false } + +// RequiresFields says this SearchStore does not require any stored fields +func (s *SortDocID) RequiresFields() []string { return nil } + +func (s *SortDocID) MarshalJSON() ([]byte, error) { + if s.Desc { + return json.Marshal("-_id") + } + return json.Marshal("_id") +} + +// SortScore will sort results by the document match score +type SortScore struct { + Desc bool +} + +// Value returns the sort value of the DocumentMatch +func (s *SortScore) Value(i *DocumentMatch) string { + return "_score" +} + +// Descending determines the order of the sort +func (s *SortScore) Descending() bool { + return s.Desc +} + +// RequiresDocID says this SearchSort does not require the DocID be loaded +func (s *SortScore) RequiresDocID() bool { return false } + +// RequiresScoring says this SearchStore does require scoring +func (s *SortScore) RequiresScoring() bool { return true } + +// RequiresFields says this SearchStore does not require any store fields +func (s *SortScore) RequiresFields() []string { return nil } + +func (s *SortScore) MarshalJSON() ([]byte, error) { + if s.Desc { + return json.Marshal("-_score") + } + return json.Marshal("_score") +} diff --git a/test/tests/sort/data/a.json b/test/tests/sort/data/a.json new file mode 100644 index 00000000..66ac5969 --- /dev/null +++ b/test/tests/sort/data/a.json @@ -0,0 +1,8 @@ +{ + "id": "a", + "name": "marty", + "age": 19, + "born": "2014-11-25", + "title": "mista", + "tags": ["gopher", "belieber"] +} diff --git a/test/tests/sort/data/b.json b/test/tests/sort/data/b.json new file mode 100644 index 00000000..0b84ce89 --- /dev/null +++ b/test/tests/sort/data/b.json @@ -0,0 +1,8 @@ +{ + "id": "b", + "name": "steve", + "age": 21, + "born": "2000-09-11", + "title": "zebra", + "tags": ["thought-leader", "futurist"] +} diff --git a/test/tests/sort/data/c.json b/test/tests/sort/data/c.json new file mode 100644 index 00000000..a1b17b00 --- /dev/null +++ b/test/tests/sort/data/c.json @@ -0,0 +1,8 @@ +{ + "id": "c", + "name": "aster", + "age": 21, + "born": "1954-02-02", + "title": "blogger", + "tags": ["red", "blue", "green"] +} diff --git a/test/tests/sort/data/d.json b/test/tests/sort/data/d.json new file mode 100644 index 00000000..926869a8 --- /dev/null +++ b/test/tests/sort/data/d.json @@ -0,0 +1,7 @@ +{ + "id": "d", + "age": 65, + "born": "1978-12-02", + "title": "agent", + "tags": ["cats"] +} diff --git a/test/tests/sort/data/e.json b/test/tests/sort/data/e.json new file mode 100644 index 00000000..436f010b --- /dev/null +++ b/test/tests/sort/data/e.json @@ -0,0 +1,7 @@ +{ + "id": "e", + "name": "nancy", + "born": "1954-10-22", + "title": "rapstar", + "tags": ["pain"] +} diff --git a/test/tests/sort/data/f.json b/test/tests/sort/data/f.json new file mode 100644 index 00000000..14f0921a --- /dev/null +++ b/test/tests/sort/data/f.json @@ -0,0 +1,7 @@ +{ + "id": "f", + "name": "frank", + "age": 1, + "title": "taxman", + "tags": ["vitamin","purple"] +} diff --git a/test/tests/sort/mapping.json b/test/tests/sort/mapping.json new file mode 100644 index 00000000..0db3279e --- /dev/null +++ b/test/tests/sort/mapping.json @@ -0,0 +1,3 @@ +{ + +} diff --git a/test/tests/sort/searches.json b/test/tests/sort/searches.json new file mode 100644 index 00000000..a679f0b8 --- /dev/null +++ b/test/tests/sort/searches.json @@ -0,0 +1,443 @@ +[ + { + "comment": "default order, all have same score, then by natural index order", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + } + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "a" + }, + { + "id": "b" + }, + { + "id": "c" + }, + { + "id": "d" + }, + { + "id": "e" + }, + { + "id": "f" + } + ] + } + }, + { + "comment": "sort by name, ascending", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": ["name"] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "c" + }, + { + "id": "f" + }, + { + "id": "a" + }, + { + "id": "e" + }, + { + "id": "b" + }, + { + "id": "d" + } + ] + } + }, + { + "comment": "sort by name, descending", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": ["-name"] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "b" + }, + { + "id": "e" + }, + { + "id": "a" + }, + { + "id": "f" + }, + { + "id": "c" + }, + { + "id": "d" + } + ] + } + }, + { + "comment": "sort by name, descending, missing first", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": [{"by":"field","field":"name","missing":"first","desc":true}] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "d" + }, + { + "id": "b" + }, + { + "id": "e" + }, + { + "id": "a" + }, + { + "id": "f" + }, + { + "id": "c" + } + ] + } + }, + { + "comment": "sort by age, ascending", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": ["age"] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "f" + }, + { + "id": "a" + }, + { + "id": "b" + }, + { + "id": "c" + }, + { + "id": "d" + }, + { + "id": "e" + } + ] + } + }, + { + "comment": "sort by age, descending", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": ["-age"] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "d" + }, + { + "id": "b" + }, + { + "id": "c" + }, + { + "id": "a" + }, + { + "id": "f" + }, + { + "id": "e" + } + ] + } + }, + { + "comment": "sort by age, descending, missing first", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": [{"by":"field","field":"age","missing":"first","desc":true}] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "e" + }, + { + "id": "d" + }, + { + "id": "b" + }, + { + "id": "c" + }, + { + "id": "a" + }, + { + "id": "f" + } + ] + } + }, + { + "comment": "sort by born, ascending", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": ["born"] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "c" + }, + { + "id": "e" + }, + { + "id": "d" + }, + { + "id": "b" + }, + { + "id": "a" + }, + { + "id": "f" + } + ] + } + }, + { + "comment": "sort by born, descending", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": ["-born"] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "a" + }, + { + "id": "b" + }, + { + "id": "d" + }, + { + "id": "e" + }, + { + "id": "c" + }, + { + "id": "f" + } + ] + } + }, + { + "comment": "sort by born, descending, missing first", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": [{"by":"field","field":"born","missing":"first","desc":true}] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "f" + }, + { + "id": "a" + }, + { + "id": "b" + }, + { + "id": "d" + }, + { + "id": "e" + }, + { + "id": "c" + } + ] + } + }, + { + "comment": "sort on multi-valued field", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": [{"by":"field","field":"tags","mode":"min"}] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "a" + }, + { + "id": "c" + }, + { + "id": "d" + }, + { + "id": "b" + }, + { + "id": "e" + }, + { + "id": "f" + } + ] + } + }, + { + "comment": "multi-column sort by age, ascending, name, ascending (flips b and c which have same age)", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": ["age", "name"] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "f" + }, + { + "id": "a" + }, + { + "id": "c" + }, + { + "id": "b" + }, + { + "id": "d" + }, + { + "id": "e" + } + ] + } + }, + { + "comment": "sort by docid descending", + "search": { + "from": 0, + "size": 10, + "query": { + "match_all":{} + }, + "sort": ["-_id"] + }, + "result": { + "total_hits": 6, + "hits": [ + { + "id": "f" + }, + { + "id": "e" + }, + { + "id": "d" + }, + { + "id": "c" + }, + { + "id": "b" + }, + { + "id": "a" + } + ] + } + } +]