0
0
Fork 0

Merge branch 'master' of https://github.com/dtylman/bleve into sort-by-field-try2

This commit is contained in:
Marty Schoch 2016-08-12 14:23:55 -04:00
commit 0bb69a9a1c
15 changed files with 984 additions and 202 deletions

View File

@ -9,9 +9,7 @@
package document
import (
"fmt"
)
import "fmt"
type Document struct {
ID string `json:"id"`
@ -38,6 +36,21 @@ func (d *Document) AddField(f Field) *Document {
return d
}
func (d *Document) FieldNamed(field string) Field {
for _, f := range d.Fields {
if f.Name() == field {
return f
}
}
return nil
}
func (d *Document) CompareFieldsNamed(other *Document, field string, descending bool) int {
fieldi := d.FieldNamed(field)
fieldj := other.FieldNamed(field)
return CompareFieldValues(fieldi, fieldj, descending)
}
func (d *Document) GoString() string {
fields := ""
for i, field := range d.Fields {

View File

@ -32,3 +32,86 @@ type Field interface {
// the rate of indexing
NumPlainTextBytes() uint64
}
// CompareFieldValues provides ordering amongst stored field values
// when trying compare field values of different types,
// we impose the following order:
// - nil (missing field)
// - boolean
// - number
// - text
// - date
func CompareFieldValues(i, j Field, descending bool) int {
lower := func() int {
if descending {
return 1
}
return -1
}
higher := func() int {
if descending {
return -1
}
return 1
}
switch i := i.(type) {
case nil:
switch j.(type) {
case nil:
return 0
default:
return lower()
}
case *BooleanField:
switch j := j.(type) {
case nil:
return higher()
case *BooleanField:
return i.Compare(j, descending)
default:
return lower()
}
case *NumericField:
switch j := j.(type) {
case nil:
return higher()
case *BooleanField:
return higher()
case *NumericField:
return i.Compare(j, descending)
default:
return lower()
}
case *TextField:
switch j := j.(type) {
case nil:
return higher()
case *BooleanField:
return higher()
case *NumericField:
return higher()
case *TextField:
return i.Compare(j, descending)
default:
return lower()
}
case *DateTimeField:
switch j := j.(type) {
case nil:
return higher()
case *BooleanField:
return higher()
case *NumericField:
return higher()
case *TextField:
return higher()
case *DateTimeField:
return i.Compare(j, descending)
}
}
return 0
}

View File

@ -71,6 +71,17 @@ func (b *BooleanField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}
func (b *BooleanField) Compare(other *BooleanField, descending bool) int {
bv, _ := b.Boolean()
otherbv, _ := other.Boolean()
if bv == otherbv {
return 0
} else if (otherbv && !descending) || (bv && descending) {
return -1
}
return 1
}
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
return &BooleanField{
name: name,

View File

@ -100,6 +100,17 @@ func (n *DateTimeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func (n *DateTimeField) Compare(other *DateTimeField, descending bool) int {
dt, _ := n.DateTime()
otherdt, _ := other.DateTime()
if dt.Equal(otherdt) {
return 0
} else if (dt.Before(otherdt) && !descending) || (otherdt.Before(dt) && descending) {
return -1
}
return 1
}
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
return &DateTimeField{
name: name,

View File

@ -96,6 +96,17 @@ func (n *NumericField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func (n *NumericField) Compare(other *NumericField, descending bool) int {
num, _ := n.Number()
othernum, _ := other.Number()
if num == othernum {
return 0
} else if (num < othernum && !descending) || (num > othernum && descending) {
return -1
}
return 1
}
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
return &NumericField{
name: name,

383
document/field_test.go Normal file
View File

@ -0,0 +1,383 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"testing"
"time"
)
func TestCompareFieldValues(t *testing.T) {
t1 := time.Now()
t2 := t1.Add(1 * time.Hour)
dtf1, _ := NewDateTimeField("", nil, t1)
dtf2, _ := NewDateTimeField("", nil, t2)
tests := []struct {
l Field
r Field
desc bool
res int
}{
// nil simple
{
l: nil,
r: nil,
res: 0,
},
// boolean simple
{
l: NewBooleanField("", nil, true),
r: NewBooleanField("", nil, true),
res: 0,
},
{
l: NewBooleanField("", nil, true),
r: NewBooleanField("", nil, false),
res: 1,
},
{
l: NewBooleanField("", nil, false),
r: NewBooleanField("", nil, true),
res: -1,
},
{
l: NewBooleanField("", nil, true),
r: NewBooleanField("", nil, false),
desc: true,
res: -1,
},
{
l: NewBooleanField("", nil, false),
r: NewBooleanField("", nil, true),
desc: true,
res: 1,
},
// numeric simple
{
l: NewNumericField("", nil, 3.14),
r: NewNumericField("", nil, 3.14),
res: 0,
},
{
l: NewNumericField("", nil, 5.14),
r: NewNumericField("", nil, 3.14),
res: 1,
},
{
l: NewNumericField("", nil, 3.14),
r: NewNumericField("", nil, 5.14),
res: -1,
},
{
l: NewNumericField("", nil, 5.14),
r: NewNumericField("", nil, 3.14),
desc: true,
res: -1,
},
{
l: NewNumericField("", nil, 3.14),
r: NewNumericField("", nil, 5.14),
desc: true,
res: 1,
},
// text simple
{
l: NewTextField("", nil, []byte("cat")),
r: NewTextField("", nil, []byte("cat")),
res: 0,
},
{
l: NewTextField("", nil, []byte("dog")),
r: NewTextField("", nil, []byte("cat")),
res: 1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: NewTextField("", nil, []byte("dog")),
res: -1,
},
{
l: NewTextField("", nil, []byte("dog")),
r: NewTextField("", nil, []byte("cat")),
desc: true,
res: -1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: NewTextField("", nil, []byte("dog")),
desc: true,
res: 1,
},
// datetime simple
{
l: dtf1,
r: dtf1,
res: 0,
},
{
l: dtf2,
r: dtf1,
res: 1,
},
{
l: dtf1,
r: dtf2,
res: -1,
},
{
l: dtf2,
r: dtf1,
desc: true,
res: -1,
},
{
l: dtf1,
r: dtf2,
desc: true,
res: 1,
},
// mixed types, nil left
{
l: nil,
r: NewBooleanField("", nil, true),
res: -1,
},
{
l: nil,
r: NewBooleanField("", nil, true),
desc: true,
res: 1,
},
{
l: nil,
r: NewNumericField("", nil, 3.14),
res: -1,
},
{
l: nil,
r: NewNumericField("", nil, 3.14),
desc: true,
res: 1,
},
{
l: nil,
r: NewTextField("", nil, []byte("cat")),
res: -1,
},
{
l: nil,
r: NewTextField("", nil, []byte("cat")),
desc: true,
res: 1,
},
{
l: nil,
r: dtf1,
res: -1,
},
{
l: nil,
r: dtf1,
desc: true,
res: 1,
},
// mixed types, boolean left
{
l: NewBooleanField("", nil, true),
r: nil,
res: 1,
},
{
l: NewBooleanField("", nil, true),
r: nil,
desc: true,
res: -1,
},
{
l: NewBooleanField("", nil, true),
r: NewNumericField("", nil, 3.14),
res: -1,
},
{
l: NewBooleanField("", nil, true),
r: NewNumericField("", nil, 3.14),
desc: true,
res: 1,
},
{
l: NewBooleanField("", nil, true),
r: NewTextField("", nil, []byte("cat")),
res: -1,
},
{
l: NewBooleanField("", nil, true),
r: NewTextField("", nil, []byte("cat")),
desc: true,
res: 1,
},
{
l: NewBooleanField("", nil, true),
r: dtf1,
res: -1,
},
{
l: NewBooleanField("", nil, true),
r: dtf1,
desc: true,
res: 1,
},
// mixed types, number left
{
l: NewNumericField("", nil, 3.14),
r: nil,
res: 1,
},
{
l: NewNumericField("", nil, 3.14),
r: nil,
desc: true,
res: -1,
},
{
l: NewNumericField("", nil, 3.14),
r: NewBooleanField("", nil, true),
res: 1,
},
{
l: NewNumericField("", nil, 3.14),
r: NewBooleanField("", nil, true),
desc: true,
res: -1,
},
{
l: NewNumericField("", nil, 3.14),
r: NewTextField("", nil, []byte("cat")),
res: -1,
},
{
l: NewNumericField("", nil, 3.14),
r: NewTextField("", nil, []byte("cat")),
desc: true,
res: 1,
},
{
l: NewNumericField("", nil, 3.14),
r: dtf1,
res: -1,
},
{
l: NewNumericField("", nil, 3.14),
r: dtf1,
desc: true,
res: 1,
},
// mixed types, text left
{
l: NewTextField("", nil, []byte("cat")),
r: nil,
res: 1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: nil,
desc: true,
res: -1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: NewBooleanField("", nil, true),
res: 1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: NewBooleanField("", nil, true),
desc: true,
res: -1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: NewNumericField("", nil, 3.14),
res: 1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: NewNumericField("", nil, 3.14),
desc: true,
res: -1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: dtf1,
res: -1,
},
{
l: NewTextField("", nil, []byte("cat")),
r: dtf1,
desc: true,
res: 1,
},
// mixed types, datetimes left
{
l: dtf1,
r: nil,
res: 1,
},
{
l: dtf1,
r: nil,
desc: true,
res: -1,
},
{
l: dtf1,
r: NewBooleanField("", nil, true),
res: 1,
},
{
l: dtf1,
r: NewBooleanField("", nil, true),
desc: true,
res: -1,
},
{
l: dtf1,
r: NewNumericField("", nil, 3.14),
res: 1,
},
{
l: dtf1,
r: NewNumericField("", nil, 3.14),
desc: true,
res: -1,
},
{
l: dtf1,
r: NewTextField("", nil, []byte("cat")),
res: 1,
},
{
l: dtf1,
r: NewTextField("", nil, []byte("cat")),
desc: true,
res: -1,
},
}
for i, test := range tests {
actual := CompareFieldValues(test.l, test.r, test.desc)
if actual != test.res {
t.Errorf("expected %d, got %d for case %d", test.res, actual, i)
}
}
}

View File

@ -11,6 +11,7 @@ package document
import (
"fmt"
"strings"
"github.com/blevesearch/bleve/analysis"
)
@ -77,6 +78,13 @@ func (t *TextField) NumPlainTextBytes() uint64 {
return t.numPlainTextBytes
}
func (t *TextField) Compare(other *TextField, descending bool) int {
if descending {
return strings.Compare(string(other.value), string(t.value))
}
return strings.Compare(string(t.value), string(other.value))
}
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
}

View File

@ -384,7 +384,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return nil, ErrorIndexClosed
}
collector := collectors.NewTopScorerSkipCollector(req.Size, req.From)
collector := collectors.NewHeapCollector(req.Size, req.From, req.Sort)
// open a reader for this search
indexReader, err := i.i.Reader()

View File

@ -715,6 +715,57 @@ func TestIndexMetadataRaceBug198(t *testing.T) {
close(done)
}
func TestSortMatchSearch(t *testing.T) {
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
t.Fatal(err)
}
}()
index, err := New("testidx", NewIndexMapping())
if err != nil {
t.Fatal(err)
}
names := []string{"Noam", "Uri", "David", "Yosef", "Eitan", "Itay", "Ariel", "Daniel", "Omer", "Yogev", "Yehonatan", "Moshe", "Mohammed", "Yusuf", "Omar"}
days := []string{"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}
numbers := []string{"One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven", "Twelve"}
for i := 0; i < 200; i++ {
doc := make(map[string]interface{})
doc["Name"] = names[i%len(names)]
doc["Day"] = days[i%len(days)]
doc["Number"] = numbers[i%len(numbers)]
err = index.Index(fmt.Sprintf("%d", i), doc)
if err != nil {
t.Fatal(err)
}
}
req := NewSearchRequest(NewMatchQuery("One"))
req.SortBy(search.SortOrder{
&search.SortStoredField{Field: "Day"},
&search.SortStoredField{Field: "Name"},
})
req.Fields = []string{"*"}
sr, err := index.Search(req)
if err != nil {
t.Fatal(err)
}
prev := ""
for _, hit := range sr.Hits {
val := hit.Fields["Day"].(string)
if prev > val {
t.Errorf("Hits must be sorted by 'Day'. Found '%s' before '%s'", prev, val)
}
prev = val
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexCountMatchSearch(t *testing.T) {
defer func() {
err := os.RemoveAll("testidx")

View File

@ -191,6 +191,7 @@ func (h *HighlightRequest) AddField(field string) {
// Facets describe the set of facets to be computed.
// Explain triggers inclusion of additional search
// result score explanations.
// Sort describes the desired order for the results to be returned.
//
// A special field named "*" can be used to return all fields.
type SearchRequest struct {
@ -201,6 +202,7 @@ type SearchRequest struct {
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"`
}
func (sr *SearchRequest) Validate() error {
@ -220,6 +222,11 @@ func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
r.Facets[facetName] = f
}
// SortBy changes the request to use the requested sort order
func (r *SearchRequest) SortBy(order search.SortOrder) {
r.Sort = order
}
// UnmarshalJSON deserializes a JSON representation of
// a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
@ -274,12 +281,14 @@ func NewSearchRequest(q Query) *SearchRequest {
// NewSearchRequestOptions creates a new SearchRequest
// for the Query, with the requested size, from
// and explanation search parameters.
// By default results are ordered by score, descending.
func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchRequest {
return &SearchRequest{
Query: q,
Size: size,
From: from,
Explain: explain,
Sort: search.SortOrder{&search.SortScore{Descending: true}},
}
}

View File

@ -0,0 +1,241 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
package collectors
import (
"container/heap"
"time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"golang.org/x/net/context"
)
type HeapCollector struct {
size int
skip int
total uint64
maxScore float64
took time.Duration
sort search.SortOrder
results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder
lowestMatchOutsideResults *search.DocumentMatch
}
var COLLECT_CHECK_DONE_EVERY = uint64(1024)
func NewHeapCollector(size int, skip int, sort search.SortOrder) *HeapCollector {
hc := &HeapCollector{size: size, skip: skip, sort: sort}
heap.Init(hc)
return hc
}
func (hc *HeapCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
// search context with enough pre-allocated document matches
// we keep references to size+skip ourselves
// plus possibly one extra for the highestMatchOutsideResults
// plus the amount required by the searcher tree
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(hc.size + hc.skip + 1 + searcher.DocumentMatchPoolSize()),
}
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
for err == nil && next != nil {
if hc.total%COLLECT_CHECK_DONE_EVERY == 0 {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
if hc.facetsBuilder != nil {
err = hc.facetsBuilder.Update(next)
if err != nil {
break
}
}
err = hc.collectSingle(searchContext, reader, next)
if err != nil {
break
}
next, err = searcher.Next(searchContext)
}
// compute search duration
hc.took = time.Since(startTime)
if err != nil {
return err
}
// finalize actual results
err = hc.finalizeResults(reader)
if err != nil {
return err
}
return nil
}
func (hc *HeapCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
// increment total hits
hc.total++
d.HitNumber = hc.total
// update max score
if d.Score > hc.maxScore {
hc.maxScore = d.Score
}
var err error
// see if we need to load ID (at this early stage, for example to sort on it)
if hc.sort.RequiresDocID() {
d.ID, err = reader.FinalizeDocID(d.IndexInternalID)
if err != nil {
return err
}
}
// see if we need to load the stored fields
if len(hc.sort.RequiredStoredFields()) > 0 {
if d.ID == "" {
// look up the id since we need it for lookup
d.ID, err = reader.FinalizeDocID(d.IndexInternalID)
if err != nil {
return err
}
}
d.Document, err = reader.Document(d.ID)
if err != nil {
return err
}
}
// optimization, we track lowest sorting hit already removed from heap
// with this one comparision, we can avoid all heap operations if
// this hit would have been added and then immediately removed
if hc.lowestMatchOutsideResults != nil {
cmp := hc.sort.Compare(d, hc.lowestMatchOutsideResults)
if cmp >= 0 {
// this hit can't possibly be in the result set, so avoid heap ops
ctx.DocumentMatchPool.Put(d)
return nil
}
}
heap.Push(hc, d)
if hc.Len() > hc.size+hc.skip {
removed := heap.Pop(hc).(*search.DocumentMatch)
if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed
} else {
cmp := hc.sort.Compare(removed, hc.lowestMatchOutsideResults)
if cmp < 0 {
tmp := hc.lowestMatchOutsideResults
hc.lowestMatchOutsideResults = removed
ctx.DocumentMatchPool.Put(tmp)
}
}
}
return nil
}
func (hc *HeapCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
}
// finalizeResults starts with the heap containing the final top size+skip
// it now throws away the results to be skipped
// and does final doc id lookup (if necessary)
func (hc *HeapCollector) finalizeResults(r index.IndexReader) error {
count := hc.Len()
size := count - hc.skip
rv := make(search.DocumentMatchCollection, size)
for count > 0 {
count--
if count >= hc.skip {
size--
doc := heap.Pop(hc).(*search.DocumentMatch)
rv[size] = doc
if doc.ID == "" {
// look up the id since we need it for lookup
var err error
doc.ID, err = r.FinalizeDocID(doc.IndexInternalID)
if err != nil {
return err
}
}
}
}
// no longer a heap
hc.results = rv
return nil
}
func (hc *HeapCollector) Results() search.DocumentMatchCollection {
return hc.results
}
func (hc *HeapCollector) Total() uint64 {
return hc.total
}
func (hc *HeapCollector) MaxScore() float64 {
return hc.maxScore
}
func (hc *HeapCollector) Took() time.Duration {
return hc.took
}
func (hc *HeapCollector) FacetResults() search.FacetResults {
if hc.facetsBuilder != nil {
return hc.facetsBuilder.Results()
}
return search.FacetResults{}
}
// heap interface implementation
func (hc *HeapCollector) Len() int {
return len(hc.results)
}
func (hc *HeapCollector) Less(i, j int) bool {
so := hc.sort.Compare(hc.results[i], hc.results[j])
return -so < 0
}
func (hc *HeapCollector) Swap(i, j int) {
hc.results[i], hc.results[j] = hc.results[j], hc.results[i]
}
func (hc *HeapCollector) Push(x interface{}) {
hc.results = append(hc.results, x.(*search.DocumentMatch))
}
func (hc *HeapCollector) Pop() interface{} {
var rv *search.DocumentMatch
rv, hc.results = hc.results[len(hc.results)-1], hc.results[:len(hc.results)-1]
return rv
}

View File

@ -84,7 +84,7 @@ func TestTop10Scores(t *testing.T) {
},
}
collector := NewTopScorerCollector(10)
collector := NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Descending: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
@ -192,7 +192,7 @@ func TestTop10ScoresSkip10(t *testing.T) {
},
}
collector := NewTopScorerSkipCollector(10, 10)
collector := NewHeapCollector(10, 10, search.SortOrder{&search.SortScore{Descending: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
@ -219,7 +219,7 @@ func TestTop10ScoresSkip10(t *testing.T) {
}
if results[0].Score != 9.5 {
t.Errorf("expected highest score to be 9.5ß, got %f", results[0].Score)
t.Errorf("expected highest score to be 9.5, got %f", results[0].Score)
}
}
@ -289,7 +289,7 @@ func TestPaginationSameScores(t *testing.T) {
}
// first get first 5 hits
collector := NewTopScorerSkipCollector(5, 0)
collector := NewHeapCollector(5, 0, search.SortOrder{&search.SortScore{Descending: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
@ -375,7 +375,7 @@ func TestPaginationSameScores(t *testing.T) {
}
// now get next 5 hits
collector = NewTopScorerSkipCollector(5, 5)
collector = NewHeapCollector(5, 5, search.SortOrder{&search.SortScore{Descending: true}})
err = collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
@ -398,21 +398,20 @@ func TestPaginationSameScores(t *testing.T) {
t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID)
}
}
}
func BenchmarkTop10of100000Scores(b *testing.B) {
benchHelper(10000, NewTopScorerCollector(10), b)
benchHelper(10000, NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Descending: true}}), b)
}
func BenchmarkTop100of100000Scores(b *testing.B) {
benchHelper(10000, NewTopScorerCollector(100), b)
benchHelper(10000, NewHeapCollector(100, 0, search.SortOrder{&search.SortScore{Descending: true}}), b)
}
func BenchmarkTop10of1000000Scores(b *testing.B) {
benchHelper(100000, NewTopScorerCollector(10), b)
benchHelper(100000, NewHeapCollector(10, 0, search.SortOrder{&search.SortScore{Descending: true}}), b)
}
func BenchmarkTop100of1000000Scores(b *testing.B) {
benchHelper(100000, NewTopScorerCollector(100), b)
benchHelper(100000, NewHeapCollector(100, 0, search.SortOrder{&search.SortScore{Descending: true}}), b)
}

View File

@ -1,187 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"container/list"
"time"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
type TopScoreCollector struct {
k int
skip int
results *list.List
took time.Duration
maxScore float64
minScore float64
total uint64
facetsBuilder *search.FacetsBuilder
actualResults search.DocumentMatchCollection
}
func NewTopScorerCollector(k int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: 0,
results: list.New(),
}
}
func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: skip,
results: list.New(),
}
}
func (tksc *TopScoreCollector) Total() uint64 {
return tksc.total
}
func (tksc *TopScoreCollector) MaxScore() float64 {
return tksc.maxScore
}
func (tksc *TopScoreCollector) Took() time.Duration {
return tksc.took
}
var COLLECT_CHECK_DONE_EVERY = uint64(1024)
func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
// search context with enough pre-allocated document matches
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(tksc.k + tksc.skip + searcher.DocumentMatchPoolSize()),
}
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
for err == nil && next != nil {
if tksc.total%COLLECT_CHECK_DONE_EVERY == 0 {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
if tksc.facetsBuilder != nil {
err = tksc.facetsBuilder.Update(next)
if err != nil {
break
}
}
tksc.collectSingle(searchContext, next)
next, err = searcher.Next(searchContext)
}
// finalize actual results
tksc.actualResults, err = tksc.finalizeResults(reader)
if err != nil {
return err
}
// compute search duration
tksc.took = time.Since(startTime)
if err != nil {
return err
}
return nil
}
func (tksc *TopScoreCollector) collectSingle(ctx *search.SearchContext, d *search.DocumentMatch) {
// increment total hits
tksc.total++
// update max score
if d.Score > tksc.maxScore {
tksc.maxScore = d.Score
}
if d.Score <= tksc.minScore {
ctx.DocumentMatchPool.Put(d)
return
}
for e := tksc.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if d.Score <= curr.Score {
tksc.results.InsertBefore(d, e)
// if we just made the list too long
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
removed := tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch)
tksc.minScore = removed.Score
ctx.DocumentMatchPool.Put(removed)
}
return
}
}
// if we got to the end, we still have to add it
tksc.results.PushBack(d)
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
removed := tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch)
tksc.minScore = removed.Score
ctx.DocumentMatchPool.Put(removed)
}
}
func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection {
return tksc.actualResults
}
func (tksc *TopScoreCollector) finalizeResults(r index.IndexReader) (search.DocumentMatchCollection, error) {
if tksc.results.Len()-tksc.skip > 0 {
rv := make(search.DocumentMatchCollection, tksc.results.Len()-tksc.skip)
i := 0
skipped := 0
for e := tksc.results.Back(); e != nil; e = e.Prev() {
if skipped < tksc.skip {
skipped++
continue
}
var err error
rv[i] = e.Value.(*search.DocumentMatch)
rv[i].ID, err = r.FinalizeDocID(rv[i].IndexInternalID)
if err != nil {
return nil, err
}
i++
}
return rv, nil
}
return search.DocumentMatchCollection{}, nil
}
func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
tksc.facetsBuilder = facetsBuilder
}
func (tksc *TopScoreCollector) FacetResults() search.FacetResults {
if tksc.facetsBuilder != nil {
return tksc.facetsBuilder.Results()
}
return search.FacetResults{}
}

View File

@ -9,7 +9,12 @@
package search
import "github.com/blevesearch/bleve/index"
import (
"fmt"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
)
type Location struct {
Pos float64 `json:"pos"`
@ -65,6 +70,12 @@ type DocumentMatch struct {
// SearchRequest.Fields. Text fields are returned as strings, numeric
// fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"`
// if we load the document for this hit, remember it so we dont load again
Document *document.Document `json:"-"`
// used to maintain natural index order
HitNumber uint64 `json:"-"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
@ -99,6 +110,10 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
return dm
}
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
}
type DocumentMatchCollection []*DocumentMatch
func (c DocumentMatchCollection) Len() int { return len(c) }
@ -121,3 +136,59 @@ type Searcher interface {
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
}
type SearchSort interface {
Compare(a, b *DocumentMatch) int
RequiresDocID() bool
RequiresScoring() bool
RequiresStoredFields() []string
}
type SortOrder []SearchSort
func (so SortOrder) Compare(i, j *DocumentMatch) int {
// compare the documents on all search sorts until a differences is found
for _, soi := range so {
c := soi.Compare(i, j)
if c == 0 {
continue
}
return c
}
// if they are the same at this point, impose order based on index natural sort order
if i.HitNumber == j.HitNumber {
return 0
} else if i.HitNumber > j.HitNumber {
return 1
}
return -1
}
func (so SortOrder) RequiresScore() bool {
rv := false
for _, soi := range so {
if soi.RequiresScoring() {
rv = true
}
}
return rv
}
func (so SortOrder) RequiresDocID() bool {
rv := false
for _, soi := range so {
if soi.RequiresDocID() {
rv = true
}
}
return rv
}
func (so SortOrder) RequiredStoredFields() []string {
var rv []string
for _, soi := range so {
rv = append(rv, soi.RequiresStoredFields()...)
}
return rv
}

78
search/sort.go Normal file
View File

@ -0,0 +1,78 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import "strings"
// SortStoredField will sort results by the value of a stored field
type SortStoredField struct {
Field string
Descending bool
}
// Compare orders DocumentMatch instances by stored field values
func (s *SortStoredField) Compare(i, j *DocumentMatch) int {
return i.Document.CompareFieldsNamed(j.Document, s.Field, s.Descending)
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortStoredField) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortStoredField) RequiresScoring() bool { return false }
// RequiresStoredFields says this SearchStore requires the specified stored field
func (s *SortStoredField) RequiresStoredFields() []string { return []string{s.Field} }
// SortDocID will sort results by the document identifier
type SortDocID struct {
Descending bool
}
// Compare orders DocumentMatch instances by document identifiers
func (s *SortDocID) Compare(i, j *DocumentMatch) int {
if s.Descending {
return strings.Compare(j.ID, i.ID)
}
return strings.Compare(i.ID, j.ID)
}
// RequiresDocID says this SearchSort does require the DocID be loaded
func (s *SortDocID) RequiresDocID() bool { return true }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortDocID) RequiresScoring() bool { return false }
// RequiresStoredFields says this SearchStore does not require any stored fields
func (s *SortDocID) RequiresStoredFields() []string { return nil }
// SortScore will sort results by the document match score
type SortScore struct {
Descending bool
}
// Compare orders DocumentMatch instances by computed scores
func (s *SortScore) Compare(i, j *DocumentMatch) int {
if i.Score == j.Score {
return 0
} else if (i.Score < j.Score && !s.Descending) || (j.Score < i.Score && s.Descending) {
return -1
}
return 1
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortScore) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does require scoring
func (s *SortScore) RequiresScoring() bool { return true }
// RequiresStoredFields says this SearchStore does not require any store fields
func (s *SortScore) RequiresStoredFields() []string { return nil }