Adding sort to SearchRequest.
This commit is contained in:
parent
5164e70f6e
commit
154d1b904b
|
@ -1,12 +1,23 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
//
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"golang.org/x/net/context"
|
||||
"time"
|
||||
)
|
||||
|
||||
type collectedDoc struct {
|
||||
|
@ -25,6 +36,8 @@ type HeapCollector struct {
|
|||
reader index.IndexReader
|
||||
}
|
||||
|
||||
var COLLECT_CHECK_DONE_EVERY = uint64(1024)
|
||||
|
||||
func NewHeapCollector(size int, skip int, reader index.IndexReader, sort search.SortOrder) *HeapCollector {
|
||||
hc := &HeapCollector{size: size, skip: skip, reader: reader, sort: sort}
|
||||
heap.Init(hc)
|
||||
|
@ -79,12 +92,11 @@ func (hc *HeapCollector) collectSingle(dmIn *search.DocumentMatch) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
if hc.Len() >= hc.size {
|
||||
hc.Pop()
|
||||
}
|
||||
heap.Push(hc, single)
|
||||
if hc.Len() > hc.size+hc.skip {
|
||||
heap.Pop(hc)
|
||||
}
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (hc *HeapCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
||||
|
@ -92,10 +104,16 @@ func (hc *HeapCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
|||
}
|
||||
|
||||
func (hc *HeapCollector) Results() search.DocumentMatchCollection {
|
||||
rv := make(search.DocumentMatchCollection, hc.Len())
|
||||
for i := 0; hc.Len() > 0; i++ {
|
||||
doc := heap.Pop(hc).(*collectedDoc)
|
||||
rv[i] = &doc.match
|
||||
count := hc.Len()
|
||||
size := count - hc.skip
|
||||
rv := make(search.DocumentMatchCollection, size)
|
||||
for count > 0 {
|
||||
count--
|
||||
if count >= hc.skip {
|
||||
size--
|
||||
doc := heap.Pop(hc).(*collectedDoc)
|
||||
rv[size] = &doc.match
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
@ -105,7 +123,15 @@ func (hc *HeapCollector) Total() uint64 {
|
|||
}
|
||||
|
||||
func (hc *HeapCollector) MaxScore() float64 {
|
||||
return 0
|
||||
var max float64
|
||||
for _, res := range hc.results {
|
||||
max = math.Max(max, res.match.Score)
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
func (hc *HeapCollector) Took() time.Duration {
|
||||
return hc.took
|
||||
}
|
||||
|
||||
func (hc *HeapCollector) FacetResults() search.FacetResults {
|
||||
|
@ -206,7 +232,13 @@ func (hc *HeapCollector) Less(i, j int) bool {
|
|||
}
|
||||
}
|
||||
}
|
||||
return hc.results[i].match.Score > hc.results[j].match.Score
|
||||
scori := hc.results[i].match.Score
|
||||
scorj := hc.results[j].match.Score
|
||||
// make sure the list is ordered if everything else is the same...
|
||||
if scori==scorj{
|
||||
return hc.results[i].match.ID < hc.results[j].match.ID
|
||||
}
|
||||
return scori < scorj
|
||||
}
|
||||
|
||||
func (hc *HeapCollector) Swap(i, j int) {
|
||||
|
|
|
@ -83,7 +83,8 @@ func TestTop10Scores(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
collector := NewTopScorerCollector(10)
|
||||
collector := NewHeapCollector(10, 0, nil, nil)
|
||||
//collector:=NewTopScorerCollector(10)
|
||||
err := collector.Collect(context.Background(), searcher)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
@ -191,7 +192,7 @@ func TestTop10ScoresSkip10(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
collector := NewTopScorerSkipCollector(10, 10)
|
||||
collector := NewHeapCollector(10, 10, nil, nil)
|
||||
err := collector.Collect(context.Background(), searcher)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
@ -288,7 +289,7 @@ func TestPaginationSameScores(t *testing.T) {
|
|||
}
|
||||
|
||||
// first get first 5 hits
|
||||
collector := NewTopScorerSkipCollector(5, 0)
|
||||
collector := NewHeapCollector(5, 0, nil, nil)
|
||||
err := collector.Collect(context.Background(), searcher)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
@ -374,7 +375,7 @@ func TestPaginationSameScores(t *testing.T) {
|
|||
}
|
||||
|
||||
// now get next 5 hits
|
||||
collector = NewTopScorerSkipCollector(5, 5)
|
||||
collector = NewHeapCollector(5, 5, nil, nil)
|
||||
err = collector.Collect(context.Background(), searcher)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
@ -397,21 +398,20 @@ func TestPaginationSameScores(t *testing.T) {
|
|||
t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkTop10of100000Scores(b *testing.B) {
|
||||
benchHelper(10000, NewTopScorerCollector(10), b)
|
||||
benchHelper(10000, NewHeapCollector(10, 0, nil, nil), b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of100000Scores(b *testing.B) {
|
||||
benchHelper(10000, NewTopScorerCollector(100), b)
|
||||
benchHelper(10000, NewHeapCollector(100, 0, nil, nil), b)
|
||||
}
|
||||
|
||||
func BenchmarkTop10of1000000Scores(b *testing.B) {
|
||||
benchHelper(100000, NewTopScorerCollector(10), b)
|
||||
benchHelper(100000, NewHeapCollector(10, 0, nil, nil), b)
|
||||
}
|
||||
|
||||
func BenchmarkTop100of1000000Scores(b *testing.B) {
|
||||
benchHelper(100000, NewTopScorerCollector(100), b)
|
||||
benchHelper(100000, NewHeapCollector(100, 0, nil, nil), b)
|
||||
}
|
|
@ -1,165 +0,0 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package collectors
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
type TopScoreCollector struct {
|
||||
k int
|
||||
skip int
|
||||
results *list.List
|
||||
took time.Duration
|
||||
maxScore float64
|
||||
minScore float64
|
||||
total uint64
|
||||
facetsBuilder *search.FacetsBuilder
|
||||
}
|
||||
|
||||
func NewTopScorerCollector(k int) *TopScoreCollector {
|
||||
return &TopScoreCollector{
|
||||
k: k,
|
||||
skip: 0,
|
||||
results: list.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector {
|
||||
return &TopScoreCollector{
|
||||
k: k,
|
||||
skip: skip,
|
||||
results: list.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func (tksc *TopScoreCollector) Total() uint64 {
|
||||
return tksc.total
|
||||
}
|
||||
|
||||
func (tksc *TopScoreCollector) MaxScore() float64 {
|
||||
return tksc.maxScore
|
||||
}
|
||||
|
||||
func (tksc *TopScoreCollector) Took() time.Duration {
|
||||
return tksc.took
|
||||
}
|
||||
|
||||
var COLLECT_CHECK_DONE_EVERY = uint64(1024)
|
||||
|
||||
func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher) error {
|
||||
startTime := time.Now()
|
||||
var err error
|
||||
var pre search.DocumentMatch // A single pre-alloc'ed, reused instance.
|
||||
var next *search.DocumentMatch
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
next, err = searcher.Next(&pre)
|
||||
}
|
||||
for err == nil && next != nil {
|
||||
if tksc.total%COLLECT_CHECK_DONE_EVERY == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
tksc.collectSingle(next)
|
||||
if tksc.facetsBuilder != nil {
|
||||
err = tksc.facetsBuilder.Update(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
next, err = searcher.Next(pre.Reset())
|
||||
}
|
||||
// compute search duration
|
||||
tksc.took = time.Since(startTime)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tksc *TopScoreCollector) collectSingle(dmIn *search.DocumentMatch) {
|
||||
// increment total hits
|
||||
tksc.total++
|
||||
|
||||
// update max score
|
||||
if dmIn.Score > tksc.maxScore {
|
||||
tksc.maxScore = dmIn.Score
|
||||
}
|
||||
|
||||
if dmIn.Score <= tksc.minScore {
|
||||
return
|
||||
}
|
||||
|
||||
// Because the dmIn will be the single, pre-allocated, reused
|
||||
// instance, we need to copy the dmIn into a new, standalone
|
||||
// instance before inserting into our candidate results list.
|
||||
dm := &search.DocumentMatch{}
|
||||
*dm = *dmIn
|
||||
|
||||
for e := tksc.results.Front(); e != nil; e = e.Next() {
|
||||
curr := e.Value.(*search.DocumentMatch)
|
||||
if dm.Score <= curr.Score {
|
||||
|
||||
tksc.results.InsertBefore(dm, e)
|
||||
// if we just made the list too long
|
||||
if tksc.results.Len() > (tksc.k + tksc.skip) {
|
||||
// remove the head
|
||||
tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
// if we got to the end, we still have to add it
|
||||
tksc.results.PushBack(dm)
|
||||
if tksc.results.Len() > (tksc.k + tksc.skip) {
|
||||
// remove the head
|
||||
tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score
|
||||
}
|
||||
}
|
||||
|
||||
func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection {
|
||||
if tksc.results.Len()-tksc.skip > 0 {
|
||||
rv := make(search.DocumentMatchCollection, tksc.results.Len()-tksc.skip)
|
||||
i := 0
|
||||
skipped := 0
|
||||
for e := tksc.results.Back(); e != nil; e = e.Prev() {
|
||||
if skipped < tksc.skip {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
rv[i] = e.Value.(*search.DocumentMatch)
|
||||
i++
|
||||
}
|
||||
return rv
|
||||
}
|
||||
return search.DocumentMatchCollection{}
|
||||
}
|
||||
|
||||
func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
||||
tksc.facetsBuilder = facetsBuilder
|
||||
}
|
||||
|
||||
func (tksc *TopScoreCollector) FacetResults() search.FacetResults {
|
||||
if tksc.facetsBuilder != nil {
|
||||
return tksc.facetsBuilder.Results()
|
||||
}
|
||||
return search.FacetResults{}
|
||||
}
|
Loading…
Reference in New Issue