0
0

Merge branch 'sort-by-field-try2'

This commit is contained in:
Marty Schoch 2016-08-26 17:58:38 -04:00
commit 4a25034ddd
44 changed files with 1906 additions and 266 deletions

View File

@ -9,9 +9,7 @@
package document package document
import ( import "fmt"
"fmt"
)
type Document struct { type Document struct {
ID string `json:"id"` ID string `json:"id"`

View File

@ -15,6 +15,7 @@ import (
"testing" "testing"
"time" "time"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/highlight/highlighters/ansi" "github.com/blevesearch/bleve/search/highlight/highlighters/ansi"
) )
@ -61,11 +62,13 @@ func ExampleIndex_indexing() {
data := struct { data := struct {
Name string Name string
Created time.Time Created time.Time
}{Name: "named one", Created: time.Now()} Age int
}{Name: "named one", Created: time.Now(), Age: 50}
data2 := struct { data2 := struct {
Name string Name string
Created time.Time Created time.Time
}{Name: "great nameless one", Created: time.Now()} Age int
}{Name: "great nameless one", Created: time.Now(), Age: 25}
// index some data // index some data
err = example_index.Index("document id 1", data) err = example_index.Index("document id 1", data)
@ -504,3 +507,46 @@ func ExampleDocumentMapping_AddFieldMappingsAt() {
// Output: // Output:
// 1 // 1
} }
func ExampleSearchRequest_SortBy() {
// find docs containing "one", order by Age instead of score
query := NewMatchQuery("one")
searchRequest := NewSearchRequest(query)
searchRequest.SortBy([]string{"Age"})
searchResults, err := example_index.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
fmt.Println(searchResults.Hits[1].ID)
// Output:
// document id 2
// document id 1
}
func ExampleSearchRequest_SortByCustom() {
// find all docs, order by Age, with docs missing Age field first
query := NewMatchAllQuery()
searchRequest := NewSearchRequest(query)
searchRequest.SortByCustom(search.SortOrder{
&search.SortField{
Field: "Age",
Missing: search.SortFieldMissingFirst,
},
})
searchResults, err := example_index.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
fmt.Println(searchResults.Hits[1].ID)
fmt.Println(searchResults.Hits[2].ID)
fmt.Println(searchResults.Hits[3].ID)
// Output:
// document id 3
// document id 4
// document id 2
// document id 1
}

View File

@ -79,8 +79,7 @@ type IndexReader interface {
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
Document(id string) (*document.Document, error) Document(id string) (*document.Document, error)
DocumentFieldTerms(id IndexInternalID) (FieldTerms, error) DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error)
DocumentFieldTermsForFields(id IndexInternalID, fields []string) (FieldTerms, error)
Fields() ([]string, error) Fields() ([]string, error)
@ -93,8 +92,29 @@ type IndexReader interface {
Close() error Close() error
} }
// FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string type FieldTerms map[string][]string
// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields
func (f FieldTerms) FieldsNotYetCached(fields []string) []string {
var rv []string
for _, field := range fields {
if _, ok := f[field]; !ok {
rv = append(rv, field)
}
}
return rv
}
// Merge will combine two FieldTerms
// it assumes that the terms lists are complete (thus do not need to be merged)
// field terms from the other list always replace the ones in the receiver
func (f FieldTerms) Merge(other FieldTerms) {
for field, terms := range other {
f[field] = terms
}
}
type TermFieldVector struct { type TermFieldVector struct {
Field string Field string
ArrayPositions []uint64 ArrayPositions []uint64

View File

@ -10,8 +10,6 @@
package upside_down package upside_down
import ( import (
"fmt"
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
@ -98,25 +96,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
return return
} }
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID) (index.FieldTerms, error) { func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
back, err := i.index.backIndexRowForDoc(i.kvreader, id)
if err != nil {
return nil, err
}
rv := make(index.FieldTerms, len(back.termEntries))
for _, entry := range back.termEntries {
fieldName := i.index.fieldCache.FieldIndexed(uint16(*entry.Field))
terms, ok := rv[fieldName]
if !ok {
terms = make([]string, 0)
}
terms = append(terms, *entry.Term)
rv[fieldName] = terms
}
return rv, nil
}
func (i *IndexReader) DocumentFieldTermsForFields(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
back, err := i.index.backIndexRowForDoc(i.kvreader, id) back, err := i.index.backIndexRowForDoc(i.kvreader, id)
if err != nil { if err != nil {
return nil, err return nil, err
@ -125,10 +105,9 @@ func (i *IndexReader) DocumentFieldTermsForFields(id index.IndexInternalID, fiel
fieldsMap := make(map[uint16]string, len(fields)) fieldsMap := make(map[uint16]string, len(fields))
for _, f := range fields { for _, f := range fields {
id, ok := i.index.fieldCache.FieldNamed(f, false) id, ok := i.index.fieldCache.FieldNamed(f, false)
if !ok { if ok {
return nil, fmt.Errorf("Field %s was not found in cache", f) fieldsMap[id] = f
} }
fieldsMap[id] = f
} }
for _, entry := range back.termEntries { for _, entry := range back.termEntries {
if field, ok := fieldsMap[uint16(*entry.Field)]; ok { if field, ok := fieldsMap[uint16(*entry.Field)]; ok {

View File

@ -1179,7 +1179,7 @@ func TestIndexDocumentFieldTerms(t *testing.T) {
} }
}() }()
fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1")) fieldTerms, err := indexReader.DocumentFieldTerms(index.IndexInternalID("1"), []string{"name", "title"})
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }

View File

@ -474,6 +474,7 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
Fields: req.Fields, Fields: req.Fields,
Facets: req.Facets, Facets: req.Facets,
Explain: req.Explain, Explain: req.Explain,
Sort: req.Sort,
} }
return &rv return &rv
} }
@ -568,8 +569,11 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
} }
} }
// first sort it by score // sort all hits with the requested order
sort.Sort(sr.Hits) if len(req.Sort) > 0 {
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(sorter)
}
// now skip over the correct From // now skip over the correct From
if req.From > 0 && len(sr.Hits) > req.From { if req.From > 0 && len(sr.Hits) > req.From {
@ -645,3 +649,26 @@ func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock() defer f.index.mutex.RUnlock()
return f.fieldDict.Close() return f.fieldDict.Close()
} }
type multiSearchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
return &multiSearchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *multiSearchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View File

@ -11,6 +11,7 @@ import (
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/numeric_util"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
) )
@ -451,6 +452,8 @@ func TestIndexAliasEmpty(t *testing.T) {
} }
func TestIndexAliasMulti(t *testing.T) { func TestIndexAliasMulti(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
ei1Count := uint64(7) ei1Count := uint64(7)
ei1 := &stubIndex{ ei1 := &stubIndex{
err: nil, err: nil,
@ -466,6 +469,7 @@ func TestIndexAliasMulti(t *testing.T) {
{ {
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 1.0, MaxScore: 1.0,
@ -485,6 +489,7 @@ func TestIndexAliasMulti(t *testing.T) {
{ {
ID: "b", ID: "b",
Score: 2.0, Score: 2.0,
Sort: []string{string(score2)},
}, },
}, },
MaxScore: 2.0, MaxScore: 2.0,
@ -572,10 +577,12 @@ func TestIndexAliasMulti(t *testing.T) {
{ {
ID: "b", ID: "b",
Score: 2.0, Score: 2.0,
Sort: []string{string(score2)},
}, },
{ {
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 2.0, MaxScore: 2.0,
@ -601,6 +608,8 @@ func TestIndexAliasMulti(t *testing.T) {
// TestMultiSearchNoError // TestMultiSearchNoError
func TestMultiSearchNoError(t *testing.T) { func TestMultiSearchNoError(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
ei1 := &stubIndex{err: nil, searchResult: &SearchResult{ ei1 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{ Status: &SearchStatus{
Total: 1, Total: 1,
@ -613,6 +622,7 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "1", Index: "1",
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 1.0, MaxScore: 1.0,
@ -629,6 +639,7 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "2", Index: "2",
ID: "b", ID: "b",
Score: 2.0, Score: 2.0,
Sort: []string{string(score2)},
}, },
}, },
MaxScore: 2.0, MaxScore: 2.0,
@ -648,11 +659,13 @@ func TestMultiSearchNoError(t *testing.T) {
Index: "2", Index: "2",
ID: "b", ID: "b",
Score: 2.0, Score: 2.0,
Sort: []string{string(score2)},
}, },
{ {
Index: "1", Index: "1",
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 2.0, MaxScore: 2.0,
@ -784,6 +797,8 @@ func TestMultiSearchSecondPage(t *testing.T) {
// 2. no searchers finish before the timeout // 2. no searchers finish before the timeout
// 3. no searches finish before cancellation // 3. no searches finish before cancellation
func TestMultiSearchTimeout(t *testing.T) { func TestMultiSearchTimeout(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
ei1 := &stubIndex{ ei1 := &stubIndex{
name: "ei1", name: "ei1",
checkRequest: func(req *SearchRequest) error { checkRequest: func(req *SearchRequest) error {
@ -803,6 +818,7 @@ func TestMultiSearchTimeout(t *testing.T) {
Index: "1", Index: "1",
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 1.0, MaxScore: 1.0,
@ -826,6 +842,7 @@ func TestMultiSearchTimeout(t *testing.T) {
Index: "2", Index: "2",
ID: "b", ID: "b",
Score: 2.0, Score: 2.0,
Sort: []string{string(score2)},
}, },
}, },
MaxScore: 2.0, MaxScore: 2.0,
@ -909,6 +926,9 @@ func TestMultiSearchTimeout(t *testing.T) {
// TestMultiSearchTimeoutPartial tests the case where some indexes exceed // TestMultiSearchTimeoutPartial tests the case where some indexes exceed
// the timeout, while others complete successfully // the timeout, while others complete successfully
func TestMultiSearchTimeoutPartial(t *testing.T) { func TestMultiSearchTimeoutPartial(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
score3, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(3.0), 0)
ei1 := &stubIndex{ ei1 := &stubIndex{
name: "ei1", name: "ei1",
err: nil, err: nil,
@ -924,6 +944,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "1", Index: "1",
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 1.0, MaxScore: 1.0,
@ -943,6 +964,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "2", Index: "2",
ID: "b", ID: "b",
Score: 2.0, Score: 2.0,
Sort: []string{string(score2)},
}, },
}, },
MaxScore: 2.0, MaxScore: 2.0,
@ -967,6 +989,7 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "3", Index: "3",
ID: "c", ID: "c",
Score: 3.0, Score: 3.0,
Sort: []string{string(score3)},
}, },
}, },
MaxScore: 3.0, MaxScore: 3.0,
@ -993,11 +1016,13 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
Index: "2", Index: "2",
ID: "b", ID: "b",
Score: 2.0, Score: 2.0,
Sort: []string{string(score2)},
}, },
{ {
Index: "1", Index: "1",
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 2.0, MaxScore: 2.0,
@ -1014,6 +1039,10 @@ func TestMultiSearchTimeoutPartial(t *testing.T) {
} }
func TestIndexAliasMultipleLayer(t *testing.T) { func TestIndexAliasMultipleLayer(t *testing.T) {
score1, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(1.0), 0)
score2, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(2.0), 0)
score3, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(3.0), 0)
score4, _ := numeric_util.NewPrefixCodedInt64(numeric_util.Float64ToInt64(4.0), 0)
ei1 := &stubIndex{ ei1 := &stubIndex{
name: "ei1", name: "ei1",
err: nil, err: nil,
@ -1029,6 +1058,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "1", Index: "1",
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 1.0, MaxScore: 1.0,
@ -1052,6 +1082,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "2", Index: "2",
ID: "b", ID: "b",
Score: 2.0, Score: 2.0,
Sort: []string{string(score2)},
}, },
}, },
MaxScore: 2.0, MaxScore: 2.0,
@ -1076,6 +1107,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "3", Index: "3",
ID: "c", ID: "c",
Score: 3.0, Score: 3.0,
Sort: []string{string(score3)},
}, },
}, },
MaxScore: 3.0, MaxScore: 3.0,
@ -1096,6 +1128,7 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "4", Index: "4",
ID: "d", ID: "d",
Score: 4.0, Score: 4.0,
Sort: []string{string(score4)},
}, },
}, },
MaxScore: 4.0, MaxScore: 4.0,
@ -1129,11 +1162,13 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
Index: "4", Index: "4",
ID: "d", ID: "d",
Score: 4.0, Score: 4.0,
Sort: []string{string(score4)},
}, },
{ {
Index: "1", Index: "1",
ID: "a", ID: "a",
Score: 1.0, Score: 1.0,
Sort: []string{string(score1)},
}, },
}, },
MaxScore: 4.0, MaxScore: 4.0,
@ -1149,6 +1184,105 @@ func TestIndexAliasMultipleLayer(t *testing.T) {
} }
} }
// TestMultiSearchNoError
func TestMultiSearchCustomSort(t *testing.T) {
ei1 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []string{"crown"},
},
},
MaxScore: 2.0,
}}
ei2 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "2",
ID: "c",
Score: 2.5,
Sort: []string{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []string{"zombie"},
},
},
MaxScore: 3.0,
}}
sr := NewSearchRequest(NewTermQuery("test"))
sr.SortBy([]string{"name"})
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Request: sr,
Total: 4,
Hits: search.DocumentMatchCollection{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []string{"crown"},
},
{
Index: "2",
ID: "c",
Score: 2.5,
Sort: []string{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []string{"zombie"},
},
},
MaxScore: 3.0,
}
results, err := MultiSearch(context.Background(), sr, ei1, ei2)
if err != nil {
t.Error(err)
}
// cheat and ensure that Took field matches since it invovles time
expected.Took = results.Took
if !reflect.DeepEqual(results, expected) {
t.Errorf("expected %v, got %v", expected, results)
}
}
// stubIndex is an Index impl for which all operations // stubIndex is an Index impl for which all operations
// return the configured error value, unless the // return the configured error value, unless the
// corresponding operation result value has been // corresponding operation result value has been

View File

@ -384,7 +384,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return nil, ErrorIndexClosed return nil, ErrorIndexClosed
} }
collector := collectors.NewTopScorerSkipCollector(req.Size, req.From) collector := collectors.NewTopNCollector(req.Size, req.From, req.Sort)
// open a reader for this search // open a reader for this search
indexReader, err := i.i.Reader() indexReader, err := i.i.Reader()

View File

@ -715,6 +715,54 @@ func TestIndexMetadataRaceBug198(t *testing.T) {
close(done) close(done)
} }
func TestSortMatchSearch(t *testing.T) {
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
t.Fatal(err)
}
}()
index, err := New("testidx", NewIndexMapping())
if err != nil {
t.Fatal(err)
}
names := []string{"Noam", "Uri", "David", "Yosef", "Eitan", "Itay", "Ariel", "Daniel", "Omer", "Yogev", "Yehonatan", "Moshe", "Mohammed", "Yusuf", "Omar"}
days := []string{"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}
numbers := []string{"One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven", "Twelve"}
for i := 0; i < 200; i++ {
doc := make(map[string]interface{})
doc["Name"] = names[i%len(names)]
doc["Day"] = days[i%len(days)]
doc["Number"] = numbers[i%len(numbers)]
err = index.Index(fmt.Sprintf("%d", i), doc)
if err != nil {
t.Fatal(err)
}
}
req := NewSearchRequest(NewMatchQuery("One"))
req.SortBy([]string{"Day", "Name"})
req.Fields = []string{"*"}
sr, err := index.Search(req)
if err != nil {
t.Fatal(err)
}
prev := ""
for _, hit := range sr.Hits {
val := hit.Fields["Day"].(string)
if prev > val {
t.Errorf("Hits must be sorted by 'Day'. Found '%s' before '%s'", prev, val)
}
prev = val
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexCountMatchSearch(t *testing.T) { func TestIndexCountMatchSearch(t *testing.T) {
defer func() { defer func() {
err := os.RemoveAll("testidx") err := os.RemoveAll("testidx")

View File

@ -9,9 +9,7 @@
package numeric_util package numeric_util
import ( import "fmt"
"fmt"
)
const ShiftStartInt64 byte = 0x20 const ShiftStartInt64 byte = 0x20
@ -72,3 +70,18 @@ func (p PrefixCoded) Int64() (int64, error) {
} }
return int64(uint64((sortableBits << shift)) ^ 0x8000000000000000), nil return int64(uint64((sortableBits << shift)) ^ 0x8000000000000000), nil
} }
func ValidPrefixCodedTerm(p string) (bool, int) {
if len(p) > 0 {
if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 {
return false, 0
}
shift := p[0] - ShiftStartInt64
nChars := ((63 - int(shift)) / 7) + 1
if len(p) != nChars+1 {
return false, 0
}
return true, int(shift)
}
return false, 0
}

View File

@ -98,6 +98,45 @@ func TestPrefixCoded(t *testing.T) {
} }
} }
func TestPrefixCodedValid(t *testing.T) {
// all of the shared tests should be valid
for _, test := range tests {
valid, _ := ValidPrefixCodedTerm(string(test.output))
if !valid {
t.Errorf("expected %s to be valid prefix coded, is not", string(test.output))
}
}
invalidTests := []struct {
data PrefixCoded
}{
// first byte invalid skip (too low)
{
data: PrefixCoded{0x19, 'c', 'a', 't'},
},
// first byte invalid skip (too high)
{
data: PrefixCoded{0x20 + 64, 'c'},
},
// length of trailing bytes wrong (too long)
{
data: PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
},
// length of trailing bytes wrong (too short)
{
data: PrefixCoded{0x20 + 63},
},
}
// all of the shared tests should be valid
for _, test := range invalidTests {
valid, _ := ValidPrefixCodedTerm(string(test.data))
if valid {
t.Errorf("expected %s to be invalid prefix coded, it is", string(test.data))
}
}
}
func BenchmarkTestPrefixCoded(b *testing.B) { func BenchmarkTestPrefixCoded(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {

View File

@ -191,6 +191,7 @@ func (h *HighlightRequest) AddField(field string) {
// Facets describe the set of facets to be computed. // Facets describe the set of facets to be computed.
// Explain triggers inclusion of additional search // Explain triggers inclusion of additional search
// result score explanations. // result score explanations.
// Sort describes the desired order for the results to be returned.
// //
// A special field named "*" can be used to return all fields. // A special field named "*" can be used to return all fields.
type SearchRequest struct { type SearchRequest struct {
@ -201,6 +202,7 @@ type SearchRequest struct {
Fields []string `json:"fields"` Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"` Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"` Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"`
} }
func (sr *SearchRequest) Validate() error { func (sr *SearchRequest) Validate() error {
@ -220,6 +222,21 @@ func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
r.Facets[facetName] = f r.Facets[facetName] = f
} }
// SortBy changes the request to use the requested sort order
// this form uses the simplified syntax with an array of strings
// each string can either be a field name
// or the magic value _id and _score which refer to the doc id and search score
// any of these values can optionally be prefixed with - to reverse the order
func (r *SearchRequest) SortBy(order []string) {
so := search.ParseSortOrderStrings(order)
r.Sort = so
}
// SortByCustom changes the request to use the requested sort order
func (r *SearchRequest) SortByCustom(order search.SortOrder) {
r.Sort = order
}
// UnmarshalJSON deserializes a JSON representation of // UnmarshalJSON deserializes a JSON representation of
// a SearchRequest // a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error { func (r *SearchRequest) UnmarshalJSON(input []byte) error {
@ -231,6 +248,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
Fields []string `json:"fields"` Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"` Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"` Explain bool `json:"explain"`
Sort []json.RawMessage `json:"sort"`
} }
err := json.Unmarshal(input, &temp) err := json.Unmarshal(input, &temp)
@ -243,6 +261,14 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
} else { } else {
r.Size = *temp.Size r.Size = *temp.Size
} }
if temp.Sort == nil {
r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
} else {
r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
if err != nil {
return err
}
}
r.From = temp.From r.From = temp.From
r.Explain = temp.Explain r.Explain = temp.Explain
r.Highlight = temp.Highlight r.Highlight = temp.Highlight
@ -274,12 +300,14 @@ func NewSearchRequest(q Query) *SearchRequest {
// NewSearchRequestOptions creates a new SearchRequest // NewSearchRequestOptions creates a new SearchRequest
// for the Query, with the requested size, from // for the Query, with the requested size, from
// and explanation search parameters. // and explanation search parameters.
// By default results are ordered by score, descending.
func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchRequest { func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchRequest {
return &SearchRequest{ return &SearchRequest{
Query: q, Query: q,
Size: size, Size: size,
From: from, From: from,
Explain: explain, Explain: explain,
Sort: search.SortOrder{&search.SortScore{Desc: true}},
} }
} }

View File

@ -1,187 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"container/list"
"time"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type TopScoreCollector struct {
k int
skip int
results *list.List
took time.Duration
maxScore float64
minScore float64
total uint64
facetsBuilder *search.FacetsBuilder
actualResults search.DocumentMatchCollection
}
func NewTopScorerCollector(k int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: 0,
results: list.New(),
}
}
func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: skip,
results: list.New(),
}
}
func (tksc *TopScoreCollector) Total() uint64 {
return tksc.total
}
func (tksc *TopScoreCollector) MaxScore() float64 {
return tksc.maxScore
}
func (tksc *TopScoreCollector) Took() time.Duration {
return tksc.took
}
var COLLECT_CHECK_DONE_EVERY = uint64(1024)
func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
// search context with enough pre-allocated document matches
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(tksc.k + tksc.skip + searcher.DocumentMatchPoolSize()),
}
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
for err == nil && next != nil {
if tksc.total%COLLECT_CHECK_DONE_EVERY == 0 {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
if tksc.facetsBuilder != nil {
err = tksc.facetsBuilder.Update(next)
if err != nil {
break
}
}
tksc.collectSingle(searchContext, next)
next, err = searcher.Next(searchContext)
}
// finalize actual results
tksc.actualResults, err = tksc.finalizeResults(reader)
if err != nil {
return err
}
// compute search duration
tksc.took = time.Since(startTime)
if err != nil {
return err
}
return nil
}
func (tksc *TopScoreCollector) collectSingle(ctx *search.SearchContext, d *search.DocumentMatch) {
// increment total hits
tksc.total++
// update max score
if d.Score > tksc.maxScore {
tksc.maxScore = d.Score
}
if d.Score <= tksc.minScore {
ctx.DocumentMatchPool.Put(d)
return
}
for e := tksc.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if d.Score <= curr.Score {
tksc.results.InsertBefore(d, e)
// if we just made the list too long
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
removed := tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch)
tksc.minScore = removed.Score
ctx.DocumentMatchPool.Put(removed)
}
return
}
}
// if we got to the end, we still have to add it
tksc.results.PushBack(d)
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
removed := tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch)
tksc.minScore = removed.Score
ctx.DocumentMatchPool.Put(removed)
}
}
func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection {
return tksc.actualResults
}
func (tksc *TopScoreCollector) finalizeResults(r index.IndexReader) (search.DocumentMatchCollection, error) {
if tksc.results.Len()-tksc.skip > 0 {
rv := make(search.DocumentMatchCollection, tksc.results.Len()-tksc.skip)
i := 0
skipped := 0
for e := tksc.results.Back(); e != nil; e = e.Prev() {
if skipped < tksc.skip {
skipped++
continue
}
var err error
rv[i] = e.Value.(*search.DocumentMatch)
rv[i].ID, err = r.FinalizeDocID(rv[i].IndexInternalID)
if err != nil {
return nil, err
}
i++
}
return rv, nil
}
return search.DocumentMatchCollection{}, nil
}
func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
tksc.facetsBuilder = facetsBuilder
}
func (tksc *TopScoreCollector) FacetResults() search.FacetResults {
if tksc.facetsBuilder != nil {
return tksc.facetsBuilder.Results()
}
return search.FacetResults{}
}

83
search/collectors/heap.go Normal file
View File

@ -0,0 +1,83 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"container/heap"
"github.com/blevesearch/bleve/search"
)
type collectStoreHeap struct {
heap search.DocumentMatchCollection
compare collectorCompare
}
func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap {
rv := &collectStoreHeap{
heap: make(search.DocumentMatchCollection, 0, cap),
compare: compare,
}
heap.Init(rv)
return rv
}
func (c *collectStoreHeap) Add(doc *search.DocumentMatch) {
heap.Push(c, doc)
}
func (c *collectStoreHeap) RemoveLast() *search.DocumentMatch {
return heap.Pop(c).(*search.DocumentMatch)
}
func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
count := c.Len()
size := count - skip
rv := make(search.DocumentMatchCollection, size)
for count > 0 {
count--
if count >= skip {
size--
doc := heap.Pop(c).(*search.DocumentMatch)
rv[size] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
}
}
return rv, nil
}
// heap interface implementation
func (c *collectStoreHeap) Len() int {
return len(c.heap)
}
func (c *collectStoreHeap) Less(i, j int) bool {
so := c.compare(c.heap[i], c.heap[j])
return -so < 0
}
func (c *collectStoreHeap) Swap(i, j int) {
c.heap[i], c.heap[j] = c.heap[j], c.heap[i]
}
func (c *collectStoreHeap) Push(x interface{}) {
c.heap = append(c.heap, x.(*search.DocumentMatch))
}
func (c *collectStoreHeap) Pop() interface{} {
var rv *search.DocumentMatch
rv, c.heap = c.heap[len(c.heap)-1], c.heap[:len(c.heap)-1]
return rv
}

73
search/collectors/list.go Normal file
View File

@ -0,0 +1,73 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"container/list"
"github.com/blevesearch/bleve/search"
)
type collectStoreList struct {
results *list.List
compare collectorCompare
}
func newStoreList(cap int, compare collectorCompare) *collectStoreList {
rv := &collectStoreList{
results: list.New(),
compare: compare,
}
return rv
}
func (c *collectStoreList) Add(doc *search.DocumentMatch) {
for e := c.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if c.compare(doc, curr) >= 0 {
c.results.InsertBefore(doc, e)
return
}
}
// if we got to the end, we still have to add it
c.results.PushBack(doc)
}
func (c *collectStoreList) RemoveLast() *search.DocumentMatch {
return c.results.Remove(c.results.Front()).(*search.DocumentMatch)
}
func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
if c.results.Len()-skip > 0 {
rv := make(search.DocumentMatchCollection, c.results.Len()-skip)
i := 0
skipped := 0
for e := c.results.Back(); e != nil; e = e.Prev() {
if skipped < skip {
skipped++
continue
}
rv[i] = e.Value.(*search.DocumentMatch)
err := fixup(rv[i])
if err != nil {
return nil, err
}
i++
}
return rv, nil
}
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreList) Len() int {
return c.results.Len()
}

View File

@ -22,7 +22,9 @@ type stubSearcher struct {
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if ss.index < len(ss.matches) { if ss.index < len(ss.matches) {
rv := ss.matches[ss.index] rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
rv.Score = ss.matches[ss.index].Score
ss.index++ ss.index++
return rv, nil return rv, nil
} }
@ -35,7 +37,9 @@ func (ss *stubSearcher) Advance(ctx *search.SearchContext, ID index.IndexInterna
ss.index++ ss.index++
} }
if ss.index < len(ss.matches) { if ss.index < len(ss.matches) {
rv := ss.matches[ss.index] rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
rv.Score = ss.matches[ss.index].Score
ss.index++ ss.index++
return rv, nil return rv, nil
} }
@ -95,11 +99,7 @@ func (sr *stubReader) Document(id string) (*document.Document, error) {
return nil, nil return nil, nil
} }
func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID) (index.FieldTerms, error) { func (sr *stubReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
return nil, nil
}
func (sr *stubReader) DocumentFieldTermsForFields(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
return nil, nil return nil, nil
} }

View File

@ -0,0 +1,60 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import "github.com/blevesearch/bleve/search"
type collectStoreSlice struct {
slice search.DocumentMatchCollection
compare collectorCompare
}
func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice {
rv := &collectStoreSlice{
slice: make(search.DocumentMatchCollection, 0, cap),
compare: compare,
}
return rv
}
func (c *collectStoreSlice) Add(doc *search.DocumentMatch) {
// find where to insert, starting at end (lowest)
i := len(c.slice)
for ; i > 0; i-- {
cmp := c.compare(doc, c.slice[i-1])
if cmp >= 0 {
break
}
}
// insert at i
c.slice = append(c.slice, nil)
copy(c.slice[i+1:], c.slice[i:])
c.slice[i] = doc
}
func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch {
var rv *search.DocumentMatch
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1]
return rv
}
func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
for i := skip; i < len(c.slice); i++ {
err := fixup(c.slice[i])
if err != nil {
return nil, err
}
}
return c.slice[skip:], nil
}
func (c *collectStoreSlice) Len() int {
return len(c.slice)
}

250
search/collectors/topn.go Normal file
View File

@ -0,0 +1,250 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"golang.org/x/net/context"
)
type collectorCompare func(i, j *search.DocumentMatch) int
type collectorFixup func(d *search.DocumentMatch) error
// TopNCollector collects the top N hits, optionally skipping some results
type TopNCollector struct {
size int
skip int
total uint64
maxScore float64
took time.Duration
sort search.SortOrder
results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder
store *collectStoreSlice
needDocIds bool
neededFields []string
cachedScoring []bool
cachedDesc []bool
lowestMatchOutsideResults *search.DocumentMatch
}
// CheckDoneEvery controls how frequently we check the context deadline
const CheckDoneEvery = uint64(1024)
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the heap, we need size+skip results
// +1 additional while figuring out which to evict
hc.store = newStoreSlice(size+skip+1, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
// these lookups traverse an interface, so do once up-front
if sort.RequiresDocID() {
hc.needDocIds = true
}
hc.neededFields = sort.RequiredFields()
hc.cachedScoring = sort.CacheIsScore()
hc.cachedDesc = sort.CacheDescending()
return hc
}
// Collect goes to the index to find the matching documents
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
// search context with enough pre-allocated document matches
// we keep references to size+skip ourselves
// plus possibly one extra for the highestMatchOutsideResults
// plus the amount required by the searcher tree
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(hc.size+hc.skip+1+searcher.DocumentMatchPoolSize(), len(hc.sort)),
}
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
for err == nil && next != nil {
if hc.total%CheckDoneEvery == 0 {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
if hc.facetsBuilder != nil {
err = hc.facetsBuilder.Update(next)
if err != nil {
break
}
}
err = hc.collectSingle(searchContext, reader, next)
if err != nil {
break
}
next, err = searcher.Next(searchContext)
}
// compute search duration
hc.took = time.Since(startTime)
if err != nil {
return err
}
// finalize actual results
err = hc.finalizeResults(reader)
if err != nil {
return err
}
return nil
}
var sortByScoreOpt = []string{"_score"}
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
// increment total hits
hc.total++
d.HitNumber = hc.total
// update max score
if d.Score > hc.maxScore {
hc.maxScore = d.Score
}
var err error
// see if we need to load ID (at this early stage, for example to sort on it)
if hc.needDocIds {
d.ID, err = reader.FinalizeDocID(d.IndexInternalID)
if err != nil {
return err
}
}
// see if we need to load the stored fields
if len(hc.neededFields) > 0 {
// find out which fields haven't been loaded yet
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
// look them up
fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if d.CachedFieldTerms == nil {
d.CachedFieldTerms = make(map[string][]string)
}
d.CachedFieldTerms.Merge(fieldTerms)
}
// compute this hits sort value
if len(hc.sort) == 1 && hc.cachedScoring[0] {
d.Sort = sortByScoreOpt
} else {
hc.sort.Value(d)
}
// optimization, we track lowest sorting hit already removed from heap
// with this one comparision, we can avoid all heap operations if
// this hit would have been added and then immediately removed
if hc.lowestMatchOutsideResults != nil {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults)
if cmp >= 0 {
// this hit can't possibly be in the result set, so avoid heap ops
ctx.DocumentMatchPool.Put(d)
return nil
}
}
hc.store.Add(d)
if hc.store.Len() > hc.size+hc.skip {
removed := hc.store.RemoveLast()
if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed
} else {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults)
if cmp < 0 {
tmp := hc.lowestMatchOutsideResults
hc.lowestMatchOutsideResults = removed
ctx.DocumentMatchPool.Put(tmp)
}
}
}
return nil
}
// SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
}
// finalizeResults starts with the heap containing the final top size+skip
// it now throws away the results to be skipped
// and does final doc id lookup (if necessary)
func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
var err error
hc.results, err = hc.store.Final(hc.skip, func(doc *search.DocumentMatch) error {
if doc.ID == "" {
// look up the id since we need it for lookup
var err error
doc.ID, err = r.FinalizeDocID(doc.IndexInternalID)
if err != nil {
return err
}
}
return nil
})
return err
}
// Results returns the collected hits
func (hc *TopNCollector) Results() search.DocumentMatchCollection {
return hc.results
}
// Total returns the total number of hits
func (hc *TopNCollector) Total() uint64 {
return hc.total
}
// MaxScore returns the maximum score seen across all the hits
func (hc *TopNCollector) MaxScore() float64 {
return hc.maxScore
}
// Took returns the time spent collecting hits
func (hc *TopNCollector) Took() time.Duration {
return hc.took
}
// FacetResults returns the computed facets results
func (hc *TopNCollector) FacetResults() search.FacetResults {
if hc.facetsBuilder != nil {
return hc.facetsBuilder.Results()
}
return search.FacetResults{}
}

View File

@ -84,7 +84,7 @@ func TestTop10Scores(t *testing.T) {
}, },
} }
collector := NewTopScorerCollector(10) collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{}) err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -103,6 +103,7 @@ func TestTop10Scores(t *testing.T) {
results := collector.Results() results := collector.Results()
if len(results) != 10 { if len(results) != 10 {
t.Logf("results: %v", results)
t.Fatalf("expected 10 results, got %d", len(results)) t.Fatalf("expected 10 results, got %d", len(results))
} }
@ -192,7 +193,7 @@ func TestTop10ScoresSkip10(t *testing.T) {
}, },
} }
collector := NewTopScorerSkipCollector(10, 10) collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{}) err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -219,7 +220,7 @@ func TestTop10ScoresSkip10(t *testing.T) {
} }
if results[0].Score != 9.5 { if results[0].Score != 9.5 {
t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score) t.Errorf("expected highest score to be 9.5, got %f", results[0].Score)
} }
} }
@ -289,7 +290,7 @@ func TestPaginationSameScores(t *testing.T) {
} }
// first get first 5 hits // first get first 5 hits
collector := NewTopScorerSkipCollector(5, 0) collector := NewTopNCollector(5, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{}) err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -375,7 +376,7 @@ func TestPaginationSameScores(t *testing.T) {
} }
// now get next 5 hits // now get next 5 hits
collector = NewTopScorerSkipCollector(5, 5) collector = NewTopNCollector(5, 5, search.SortOrder{&search.SortScore{Desc: true}})
err = collector.Collect(context.Background(), searcher, &stubReader{}) err = collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -398,21 +399,28 @@ func TestPaginationSameScores(t *testing.T) {
t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID) t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID)
} }
} }
} }
func BenchmarkTop10of100000Scores(b *testing.B) { func BenchmarkTop10of100000Scores(b *testing.B) {
benchHelper(10000, func() search.Collector { return NewTopScorerCollector(10) }, b) benchHelper(10000, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
} }
func BenchmarkTop100of100000Scores(b *testing.B) { func BenchmarkTop100of100000Scores(b *testing.B) {
benchHelper(10000, func() search.Collector { return NewTopScorerCollector(100) }, b) benchHelper(10000, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
} }
func BenchmarkTop10of1000000Scores(b *testing.B) { func BenchmarkTop10of1000000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector { return NewTopScorerCollector(10) }, b) benchHelper(100000, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
} }
func BenchmarkTop100of1000000Scores(b *testing.B) { func BenchmarkTop100of1000000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector { return NewTopScorerCollector(100) }, b) benchHelper(100000, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
} }

View File

@ -42,12 +42,23 @@ func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
for _, facetBuilder := range fb.facets { for _, facetBuilder := range fb.facets {
fields = append(fields, facetBuilder.Field()) fields = append(fields, facetBuilder.Field())
} }
fieldTerms, err := fb.indexReader.DocumentFieldTermsForFields(docMatch.IndexInternalID, fields)
if err != nil { if len(fields) > 0 {
return err // find out which fields haven't been loaded yet
fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fields)
// look them up
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if docMatch.CachedFieldTerms == nil {
docMatch.CachedFieldTerms = make(map[string][]string)
}
docMatch.CachedFieldTerms.Merge(fieldTerms)
} }
for _, facetBuilder := range fb.facets { for _, facetBuilder := range fb.facets {
facetBuilder.Update(fieldTerms) facetBuilder.Update(docMatch.CachedFieldTerms)
} }
return nil return nil
} }

View File

@ -31,12 +31,13 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
// NewDocumentMatchPool will build a DocumentMatchPool with memory // NewDocumentMatchPool will build a DocumentMatchPool with memory
// pre-allocated to accomodate the requested number of DocumentMatch // pre-allocated to accomodate the requested number of DocumentMatch
// instances // instances
func NewDocumentMatchPool(size int) *DocumentMatchPool { func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, 0, size) avail := make(DocumentMatchCollection, 0, size)
// pre-allocate the expected number of instances // pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size) startBlock := make([]DocumentMatch, size)
// make these initial instances available // make these initial instances available
for i := range startBlock { for i := range startBlock {
startBlock[i].Sort = make([]string, 0, sortsize)
avail = append(avail, &startBlock[i]) avail = append(avail, &startBlock[i])
} }
return &DocumentMatchPool{ return &DocumentMatchPool{

View File

@ -16,7 +16,7 @@ func TestDocumentMatchPool(t *testing.T) {
tooManyCalled := false tooManyCalled := false
// create a pool // create a pool
dmp := NewDocumentMatchPool(10) dmp := NewDocumentMatchPool(10, 0)
dmp.TooSmall = func(inner *DocumentMatchPool) *DocumentMatch { dmp.TooSmall = func(inner *DocumentMatchPool) *DocumentMatch {
tooManyCalled = true tooManyCalled = true
return &DocumentMatch{} return &DocumentMatch{}

View File

@ -47,13 +47,14 @@ func TestConstantScorer(t *testing.T) {
Value: 1.0, Value: 1.0,
Message: "ConstantScore()", Message: "ConstantScore()",
}, },
Sort: []string{},
}, },
}, },
} }
for _, test := range tests { for _, test := range tests {
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1), DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
} }
actual := scorer.Score(ctx, test.termMatch.ID) actual := scorer.Score(ctx, test.termMatch.ID)
@ -82,6 +83,7 @@ func TestConstantScorerWithQueryNorm(t *testing.T) {
result: &search.DocumentMatch{ result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"), IndexInternalID: index.IndexInternalID("one"),
Score: 2.0, Score: 2.0,
Sort: []string{},
Expl: &search.Explanation{ Expl: &search.Explanation{
Value: 2.0, Value: 2.0,
Message: "weight(^1.000000), product of:", Message: "weight(^1.000000), product of:",
@ -112,7 +114,7 @@ func TestConstantScorerWithQueryNorm(t *testing.T) {
for _, test := range tests { for _, test := range tests {
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1), DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
} }
actual := scorer.Score(ctx, test.termMatch.ID) actual := scorer.Score(ctx, test.termMatch.ID)

View File

@ -50,6 +50,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{ result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"), IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf, Score: math.Sqrt(1.0) * idf,
Sort: []string{},
Expl: &search.Explanation{ Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf, Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:", Message: "fieldWeight(desc:beer in one), product of:",
@ -91,6 +92,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{ result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"), IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf, Score: math.Sqrt(1.0) * idf,
Sort: []string{},
Expl: &search.Explanation{ Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf, Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), product of:", Message: "fieldWeight(desc:beer in one), product of:",
@ -121,6 +123,7 @@ func TestTermScorer(t *testing.T) {
result: &search.DocumentMatch{ result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"), IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(65) * idf, Score: math.Sqrt(65) * idf,
Sort: []string{},
Expl: &search.Explanation{ Expl: &search.Explanation{
Value: math.Sqrt(65) * idf, Value: math.Sqrt(65) * idf,
Message: "fieldWeight(desc:beer in one), product of:", Message: "fieldWeight(desc:beer in one), product of:",
@ -145,7 +148,7 @@ func TestTermScorer(t *testing.T) {
for _, test := range tests { for _, test := range tests {
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1), DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
} }
actual := scorer.Score(ctx, test.termMatch) actual := scorer.Score(ctx, test.termMatch)
@ -187,6 +190,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
result: &search.DocumentMatch{ result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"), IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0, Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Sort: []string{},
Expl: &search.Explanation{ Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0, Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Message: "weight(desc:beer^3.000000 in one), product of:", Message: "weight(desc:beer^3.000000 in one), product of:",
@ -235,7 +239,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
for _, test := range tests { for _, test := range tests {
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1), DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
} }
actual := scorer.Score(ctx, test.termMatch) actual := scorer.Score(ctx, test.termMatch)

View File

@ -9,7 +9,12 @@
package search package search
import "github.com/blevesearch/bleve/index" import (
"fmt"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
)
type Location struct { type Location struct {
Pos float64 `json:"pos"` Pos float64 `json:"pos"`
@ -60,11 +65,22 @@ type DocumentMatch struct {
Expl *Explanation `json:"explanation,omitempty"` Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"` Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"` Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []string `json:"sort,omitempty"`
// Fields contains the values for document fields listed in // Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric // SearchRequest.Fields. Text fields are returned as strings, numeric
// fields as float64s and date fields as time.RFC3339 formatted strings. // fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"` Fields map[string]interface{} `json:"fields,omitempty"`
// as we learn field terms, we can cache important ones for later use
// for example, sorting and building facets need these values
CachedFieldTerms index.FieldTerms `json:"-"`
// if we load the document for this hit, remember it so we dont load again
Document *document.Document `json:"-"`
// used to maintain natural index order
HitNumber uint64 `json:"-"`
} }
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) { func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
@ -91,14 +107,22 @@ func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
// Reset allows an already allocated DocumentMatch to be reused // Reset allows an already allocated DocumentMatch to be reused
func (dm *DocumentMatch) Reset() *DocumentMatch { func (dm *DocumentMatch) Reset() *DocumentMatch {
// remember the []byte used for the IndexInternalID // remember the []byte used for the IndexInternalID
indexInternalId := dm.IndexInternalID indexInternalID := dm.IndexInternalID
// remember the []interface{} used for sort
sort := dm.Sort
// idiom to copy over from empty DocumentMatch (0 allocations) // idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{} *dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0) // reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalId[:0] dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
return dm return dm
} }
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
}
type DocumentMatchCollection []*DocumentMatch type DocumentMatchCollection []*DocumentMatch
func (c DocumentMatchCollection) Len() int { return len(c) } func (c DocumentMatchCollection) Len() int { return len(c) }

View File

@ -344,7 +344,7 @@ func TestBooleanSearch(t *testing.T) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
} }
next, err := test.searcher.Next(ctx) next, err := test.searcher.Next(ctx)
i := 0 i := 0

View File

@ -189,7 +189,7 @@ func TestConjunctionSearch(t *testing.T) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(10), DocumentMatchPool: search.NewDocumentMatchPool(10, 0),
} }
next, err := test.searcher.Next(ctx) next, err := test.searcher.Next(ctx)
i := 0 i := 0

View File

@ -110,7 +110,7 @@ func TestDisjunctionSearch(t *testing.T) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
} }
next, err := test.searcher.Next(ctx) next, err := test.searcher.Next(ctx)
i := 0 i := 0
@ -164,7 +164,7 @@ func TestDisjunctionAdvance(t *testing.T) {
} }
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinSearcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinSearcher.DocumentMatchPoolSize(), 0),
} }
match, err := martyOrDustinSearcher.Advance(ctx, index.IndexInternalID("3")) match, err := martyOrDustinSearcher.Advance(ctx, index.IndexInternalID("3"))
if err != nil { if err != nil {

View File

@ -64,7 +64,7 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
} }
// Check the sequence // Check the sequence

View File

@ -107,7 +107,7 @@ func TestFuzzySearch(t *testing.T) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
} }
next, err := test.searcher.Next(ctx) next, err := test.searcher.Next(ctx)
i := 0 i := 0

View File

@ -111,7 +111,7 @@ func TestMatchAllSearch(t *testing.T) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
} }
next, err := test.searcher.Next(ctx) next, err := test.searcher.Next(ctx)
i := 0 i := 0

View File

@ -52,7 +52,7 @@ func TestMatchNoneSearch(t *testing.T) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
} }
next, err := test.searcher.Next(ctx) next, err := test.searcher.Next(ctx)
i := 0 i := 0

View File

@ -70,7 +70,7 @@ func TestPhraseSearch(t *testing.T) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
} }
next, err := test.searcher.Next(ctx) next, err := test.searcher.Next(ctx)
i := 0 i := 0

View File

@ -87,7 +87,7 @@ func TestRegexpSearch(t *testing.T) {
}() }()
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize()), DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
} }
next, err := test.searcher.Next(ctx) next, err := test.searcher.Next(ctx)
i := 0 i := 0

View File

@ -165,7 +165,7 @@ func TestTermSearcher(t *testing.T) {
} }
ctx := &search.SearchContext{ ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1), DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
} }
docMatch, err := searcher.Next(ctx) docMatch, err := searcher.Next(ctx)
if err != nil { if err != nil {

488
search/sort.go Normal file
View File

@ -0,0 +1,488 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"encoding/json"
"fmt"
"sort"
"strings"
"github.com/blevesearch/bleve/numeric_util"
)
var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
var LowTerm = string([]byte{0x00})
type SearchSort interface {
Value(a *DocumentMatch) string
Descending() bool
RequiresDocID() bool
RequiresScoring() bool
RequiresFields() []string
}
func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
descending, ok := input["desc"].(bool)
by, ok := input["by"].(string)
if !ok {
return nil, fmt.Errorf("search sort must specify by")
}
switch by {
case "id":
return &SortDocID{
Desc: descending,
}, nil
case "score":
return &SortScore{
Desc: descending,
}, nil
case "field":
field, ok := input["field"].(string)
if !ok {
return nil, fmt.Errorf("search sort mode field must specify field")
}
rv := &SortField{
Field: field,
Desc: descending,
}
typ, ok := input["type"].(string)
if ok {
switch typ {
case "auto":
rv.Type = SortFieldAuto
case "string":
rv.Type = SortFieldAsString
case "number":
rv.Type = SortFieldAsNumber
case "date":
rv.Type = SortFieldAsDate
default:
return nil, fmt.Errorf("unkown sort field type: %s", typ)
}
}
mode, ok := input["mode"].(string)
if ok {
switch mode {
case "default":
rv.Mode = SortFieldDefault
case "min":
rv.Mode = SortFieldMin
case "max":
rv.Mode = SortFieldMax
default:
return nil, fmt.Errorf("unknown sort field mode: %s", mode)
}
}
missing, ok := input["missing"].(string)
if ok {
switch missing {
case "first":
rv.Missing = SortFieldMissingFirst
case "last":
rv.Missing = SortFieldMissingLast
default:
return nil, fmt.Errorf("unknown sort field missing: %s", missing)
}
}
return rv, nil
}
return nil, fmt.Errorf("unknown search sort by: %s", by)
}
func ParseSearchSortString(input string) SearchSort {
descending := false
if strings.HasPrefix(input, "-") {
descending = true
input = input[1:]
} else if strings.HasPrefix(input, "+") {
input = input[1:]
}
if input == "_id" {
return &SortDocID{
Desc: descending,
}
} else if input == "_score" {
return &SortScore{
Desc: descending,
}
}
return &SortField{
Field: input,
Desc: descending,
}
}
func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) {
// first try to parse it as string
var sortString string
err := json.Unmarshal(input, &sortString)
if err != nil {
var sortObj map[string]interface{}
err = json.Unmarshal(input, &sortObj)
if err != nil {
return nil, err
}
return ParseSearchSortObj(sortObj)
}
return ParseSearchSortString(sortString), nil
}
func ParseSortOrderStrings(in []string) SortOrder {
rv := make(SortOrder, 0, len(in))
for _, i := range in {
ss := ParseSearchSortString(i)
rv = append(rv, ss)
}
return rv
}
func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
rv := make(SortOrder, 0, len(in))
for _, i := range in {
ss, err := ParseSearchSortJSON(i)
if err != nil {
return nil, err
}
rv = append(rv, ss)
}
return rv, nil
}
type SortOrder []SearchSort
func (so SortOrder) Value(doc *DocumentMatch) {
for _, soi := range so {
doc.Sort = append(doc.Sort, soi.Value(doc))
}
}
// Compare will compare two document matches using the specified sort order
// if both are numbers, we avoid converting back to term
func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
// compare the documents on all search sorts until a differences is found
for x := range so {
c := 0
if cachedScoring[x] {
if i.Score < j.Score {
c = -1
} else if i.Score > j.Score {
c = 1
}
} else {
iVal := i.Sort[x]
jVal := j.Sort[x]
c = strings.Compare(iVal, jVal)
}
if c == 0 {
continue
}
if cachedDesc[x] {
c = -c
}
return c
}
// if they are the same at this point, impose order based on index natural sort order
if i.HitNumber == j.HitNumber {
return 0
} else if i.HitNumber > j.HitNumber {
return 1
}
return -1
}
func (so SortOrder) RequiresScore() bool {
rv := false
for _, soi := range so {
if soi.RequiresScoring() {
rv = true
}
}
return rv
}
func (so SortOrder) RequiresDocID() bool {
rv := false
for _, soi := range so {
if soi.RequiresDocID() {
rv = true
}
}
return rv
}
func (so SortOrder) RequiredFields() []string {
var rv []string
for _, soi := range so {
rv = append(rv, soi.RequiresFields()...)
}
return rv
}
func (so SortOrder) CacheIsScore() []bool {
var rv []bool
for _, soi := range so {
rv = append(rv, soi.RequiresScoring())
}
return rv
}
func (so SortOrder) CacheDescending() []bool {
var rv []bool
for _, soi := range so {
rv = append(rv, soi.Descending())
}
return rv
}
// SortFieldType lets you control some internal sort behavior
// normally leaving this to the zero-value of SortFieldAuto is fine
type SortFieldType int
const (
// SortFieldAuto applies heuristics attempt to automatically sort correctly
SortFieldAuto SortFieldType = iota
// SortFieldAsString forces sort as string (no prefix coded terms removed)
SortFieldAsString
// SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed)
SortFieldAsNumber
// SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed)
SortFieldAsDate
)
// SortFieldMode describes the behavior if the field has multiple values
type SortFieldMode int
const (
// SortFieldDefault uses the first (or only) value, this is the default zero-value
SortFieldDefault SortFieldMode = iota // FIXME name is confusing
// SortFieldMin uses the minimum value
SortFieldMin
// SortFieldMax uses the maximum value
SortFieldMax
)
// SortFieldMissing controls where documents missing a field value should be sorted
type SortFieldMissing int
const (
// SortFieldMissingLast sorts documents missing a field at the end
SortFieldMissingLast SortFieldMissing = iota
// SortFieldMissingFirst sorts documents missing a field at the beginning
SortFieldMissingFirst
)
// SortField will sort results by the value of a stored field
// Field is the name of the field
// Descending reverse the sort order (default false)
// Type allows forcing of string/number/date behavior (default auto)
// Mode controls behavior for multi-values fields (default first)
// Missing controls behavior of missing values (default last)
type SortField struct {
Field string
Desc bool
Type SortFieldType
Mode SortFieldMode
Missing SortFieldMissing
}
// Value returns the sort value of the DocumentMatch
func (s *SortField) Value(i *DocumentMatch) string {
iTerms := i.CachedFieldTerms[s.Field]
iTerms = s.filterTermsByType(iTerms)
iTerm := s.filterTermsByMode(iTerms)
return iTerm
}
// Descending determines the order of the sort
func (s *SortField) Descending() bool {
return s.Desc
}
func (s *SortField) filterTermsByMode(terms []string) string {
if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
return terms[0]
} else if len(terms) > 1 {
switch s.Mode {
case SortFieldMin:
sort.Strings(terms)
return terms[0]
case SortFieldMax:
sort.Strings(terms)
return terms[len(terms)-1]
}
}
// handle missing terms
if s.Missing == SortFieldMissingLast {
if s.Desc {
return LowTerm
}
return HighTerm
}
if s.Desc {
return HighTerm
}
return LowTerm
}
// filterTermsByType attempts to make one pass on the terms
// if we are in auto-mode AND all the terms look like prefix-coded numbers
// return only the terms which had shift of 0
// if we are in explicit number or date mode, return only valid
// prefix coded numbers with shift of 0
func (s *SortField) filterTermsByType(terms []string) []string {
stype := s.Type
if stype == SortFieldAuto {
allTermsPrefixCoded := true
var termsWithShiftZero []string
for _, term := range terms {
valid, shift := numeric_util.ValidPrefixCodedTerm(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
} else if !valid {
allTermsPrefixCoded = false
}
}
if allTermsPrefixCoded {
terms = termsWithShiftZero
}
} else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
var termsWithShiftZero []string
for _, term := range terms {
valid, shift := numeric_util.ValidPrefixCodedTerm(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero)
}
}
terms = termsWithShiftZero
}
return terms
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortField) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortField) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore requires the specified stored field
func (s *SortField) RequiresFields() []string { return []string{s.Field} }
func (s *SortField) MarshalJSON() ([]byte, error) {
// see if simple format can be used
if s.Missing == SortFieldMissingLast &&
s.Mode == SortFieldDefault &&
s.Type == SortFieldAuto {
if s.Desc {
return json.Marshal("-" + s.Field)
}
return json.Marshal(s.Field)
}
sfm := map[string]interface{}{
"by": "field",
"field": s.Field,
}
if s.Desc {
sfm["desc"] = true
}
if s.Missing > SortFieldMissingLast {
switch s.Missing {
case SortFieldMissingFirst:
sfm["missing"] = "first"
}
}
if s.Mode > SortFieldDefault {
switch s.Mode {
case SortFieldMin:
sfm["mode"] = "min"
case SortFieldMax:
sfm["mode"] = "max"
}
}
if s.Type > SortFieldAuto {
switch s.Type {
case SortFieldAsString:
sfm["type"] = "string"
case SortFieldAsNumber:
sfm["type"] = "number"
case SortFieldAsDate:
sfm["type"] = "date"
}
}
return json.Marshal(sfm)
}
// SortDocID will sort results by the document identifier
type SortDocID struct {
Desc bool
}
// Value returns the sort value of the DocumentMatch
func (s *SortDocID) Value(i *DocumentMatch) string {
return i.ID
}
// Descending determines the order of the sort
func (s *SortDocID) Descending() bool {
return s.Desc
}
// RequiresDocID says this SearchSort does require the DocID be loaded
func (s *SortDocID) RequiresDocID() bool { return true }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortDocID) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore does not require any stored fields
func (s *SortDocID) RequiresFields() []string { return nil }
func (s *SortDocID) MarshalJSON() ([]byte, error) {
if s.Desc {
return json.Marshal("-_id")
}
return json.Marshal("_id")
}
// SortScore will sort results by the document match score
type SortScore struct {
Desc bool
}
// Value returns the sort value of the DocumentMatch
func (s *SortScore) Value(i *DocumentMatch) string {
return "_score"
}
// Descending determines the order of the sort
func (s *SortScore) Descending() bool {
return s.Desc
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortScore) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does require scoring
func (s *SortScore) RequiresScoring() bool { return true }
// RequiresFields says this SearchStore does not require any store fields
func (s *SortScore) RequiresFields() []string { return nil }
func (s *SortScore) MarshalJSON() ([]byte, error) {
if s.Desc {
return json.Marshal("-_score")
}
return json.Marshal("_score")
}

View File

@ -0,0 +1,8 @@
{
"id": "a",
"name": "marty",
"age": 19,
"born": "2014-11-25",
"title": "mista",
"tags": ["gopher", "belieber"]
}

View File

@ -0,0 +1,8 @@
{
"id": "b",
"name": "steve",
"age": 21,
"born": "2000-09-11",
"title": "zebra",
"tags": ["thought-leader", "futurist"]
}

View File

@ -0,0 +1,8 @@
{
"id": "c",
"name": "aster",
"age": 21,
"born": "1954-02-02",
"title": "blogger",
"tags": ["red", "blue", "green"]
}

View File

@ -0,0 +1,7 @@
{
"id": "d",
"age": 65,
"born": "1978-12-02",
"title": "agent",
"tags": ["cats"]
}

View File

@ -0,0 +1,7 @@
{
"id": "e",
"name": "nancy",
"born": "1954-10-22",
"title": "rapstar",
"tags": ["pain"]
}

View File

@ -0,0 +1,7 @@
{
"id": "f",
"name": "frank",
"age": 1,
"title": "taxman",
"tags": ["vitamin","purple"]
}

View File

@ -0,0 +1,3 @@
{
}

View File

@ -0,0 +1,443 @@
[
{
"comment": "default order, all have same score, then by natural index order",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
}
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "a"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "d"
},
{
"id": "e"
},
{
"id": "f"
}
]
}
},
{
"comment": "sort by name, ascending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["name"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "c"
},
{
"id": "f"
},
{
"id": "a"
},
{
"id": "e"
},
{
"id": "b"
},
{
"id": "d"
}
]
}
},
{
"comment": "sort by name, descending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["-name"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "b"
},
{
"id": "e"
},
{
"id": "a"
},
{
"id": "f"
},
{
"id": "c"
},
{
"id": "d"
}
]
}
},
{
"comment": "sort by name, descending, missing first",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": [{"by":"field","field":"name","missing":"first","desc":true}]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "d"
},
{
"id": "b"
},
{
"id": "e"
},
{
"id": "a"
},
{
"id": "f"
},
{
"id": "c"
}
]
}
},
{
"comment": "sort by age, ascending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["age"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "f"
},
{
"id": "a"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "d"
},
{
"id": "e"
}
]
}
},
{
"comment": "sort by age, descending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["-age"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "d"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "a"
},
{
"id": "f"
},
{
"id": "e"
}
]
}
},
{
"comment": "sort by age, descending, missing first",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": [{"by":"field","field":"age","missing":"first","desc":true}]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "e"
},
{
"id": "d"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "a"
},
{
"id": "f"
}
]
}
},
{
"comment": "sort by born, ascending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["born"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "c"
},
{
"id": "e"
},
{
"id": "d"
},
{
"id": "b"
},
{
"id": "a"
},
{
"id": "f"
}
]
}
},
{
"comment": "sort by born, descending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["-born"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "a"
},
{
"id": "b"
},
{
"id": "d"
},
{
"id": "e"
},
{
"id": "c"
},
{
"id": "f"
}
]
}
},
{
"comment": "sort by born, descending, missing first",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": [{"by":"field","field":"born","missing":"first","desc":true}]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "f"
},
{
"id": "a"
},
{
"id": "b"
},
{
"id": "d"
},
{
"id": "e"
},
{
"id": "c"
}
]
}
},
{
"comment": "sort on multi-valued field",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": [{"by":"field","field":"tags","mode":"min"}]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "a"
},
{
"id": "c"
},
{
"id": "d"
},
{
"id": "b"
},
{
"id": "e"
},
{
"id": "f"
}
]
}
},
{
"comment": "multi-column sort by age, ascending, name, ascending (flips b and c which have same age)",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["age", "name"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "f"
},
{
"id": "a"
},
{
"id": "c"
},
{
"id": "b"
},
{
"id": "d"
},
{
"id": "e"
}
]
}
},
{
"comment": "sort by docid descending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["-_id"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "f"
},
{
"id": "e"
},
{
"id": "d"
},
{
"id": "c"
},
{
"id": "b"
},
{
"id": "a"
}
]
}
}
]