0
0
Fork 0

Adding sort to SearchRequest.

This commit is contained in:
Danny Tylman 2016-08-10 11:13:38 +03:00
parent 5164e70f6e
commit 154d1b904b
3 changed files with 52 additions and 185 deletions

View File

@ -1,12 +1,23 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
package collectors
import (
"container/heap"
"math"
"time"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"golang.org/x/net/context"
"time"
)
type collectedDoc struct {
@ -25,6 +36,8 @@ type HeapCollector struct {
reader index.IndexReader
}
var COLLECT_CHECK_DONE_EVERY = uint64(1024)
func NewHeapCollector(size int, skip int, reader index.IndexReader, sort search.SortOrder) *HeapCollector {
hc := &HeapCollector{size: size, skip: skip, reader: reader, sort: sort}
heap.Init(hc)
@ -79,12 +92,11 @@ func (hc *HeapCollector) collectSingle(dmIn *search.DocumentMatch) error {
return err
}
}
if hc.Len() >= hc.size {
hc.Pop()
}
heap.Push(hc, single)
if hc.Len() > hc.size+hc.skip {
heap.Pop(hc)
}
return nil
}
func (hc *HeapCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
@ -92,10 +104,16 @@ func (hc *HeapCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
}
func (hc *HeapCollector) Results() search.DocumentMatchCollection {
rv := make(search.DocumentMatchCollection, hc.Len())
for i := 0; hc.Len() > 0; i++ {
doc := heap.Pop(hc).(*collectedDoc)
rv[i] = &doc.match
count := hc.Len()
size := count - hc.skip
rv := make(search.DocumentMatchCollection, size)
for count > 0 {
count--
if count >= hc.skip {
size--
doc := heap.Pop(hc).(*collectedDoc)
rv[size] = &doc.match
}
}
return rv
}
@ -105,7 +123,15 @@ func (hc *HeapCollector) Total() uint64 {
}
func (hc *HeapCollector) MaxScore() float64 {
return 0
var max float64
for _, res := range hc.results {
max = math.Max(max, res.match.Score)
}
return max
}
func (hc *HeapCollector) Took() time.Duration {
return hc.took
}
func (hc *HeapCollector) FacetResults() search.FacetResults {
@ -206,7 +232,13 @@ func (hc *HeapCollector) Less(i, j int) bool {
}
}
}
return hc.results[i].match.Score > hc.results[j].match.Score
scori := hc.results[i].match.Score
scorj := hc.results[j].match.Score
// make sure the list is ordered if everything else is the same...
if scori==scorj{
return hc.results[i].match.ID < hc.results[j].match.ID
}
return scori < scorj
}
func (hc *HeapCollector) Swap(i, j int) {

View File

@ -83,7 +83,8 @@ func TestTop10Scores(t *testing.T) {
},
}
collector := NewTopScorerCollector(10)
collector := NewHeapCollector(10, 0, nil, nil)
//collector:=NewTopScorerCollector(10)
err := collector.Collect(context.Background(), searcher)
if err != nil {
t.Fatal(err)
@ -191,7 +192,7 @@ func TestTop10ScoresSkip10(t *testing.T) {
},
}
collector := NewTopScorerSkipCollector(10, 10)
collector := NewHeapCollector(10, 10, nil, nil)
err := collector.Collect(context.Background(), searcher)
if err != nil {
t.Fatal(err)
@ -288,7 +289,7 @@ func TestPaginationSameScores(t *testing.T) {
}
// first get first 5 hits
collector := NewTopScorerSkipCollector(5, 0)
collector := NewHeapCollector(5, 0, nil, nil)
err := collector.Collect(context.Background(), searcher)
if err != nil {
t.Fatal(err)
@ -374,7 +375,7 @@ func TestPaginationSameScores(t *testing.T) {
}
// now get next 5 hits
collector = NewTopScorerSkipCollector(5, 5)
collector = NewHeapCollector(5, 5, nil, nil)
err = collector.Collect(context.Background(), searcher)
if err != nil {
t.Fatal(err)
@ -397,21 +398,20 @@ func TestPaginationSameScores(t *testing.T) {
t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID)
}
}
}
func BenchmarkTop10of100000Scores(b *testing.B) {
benchHelper(10000, NewTopScorerCollector(10), b)
benchHelper(10000, NewHeapCollector(10, 0, nil, nil), b)
}
func BenchmarkTop100of100000Scores(b *testing.B) {
benchHelper(10000, NewTopScorerCollector(100), b)
benchHelper(10000, NewHeapCollector(100, 0, nil, nil), b)
}
func BenchmarkTop10of1000000Scores(b *testing.B) {
benchHelper(100000, NewTopScorerCollector(10), b)
benchHelper(100000, NewHeapCollector(10, 0, nil, nil), b)
}
func BenchmarkTop100of1000000Scores(b *testing.B) {
benchHelper(100000, NewTopScorerCollector(100), b)
benchHelper(100000, NewHeapCollector(100, 0, nil, nil), b)
}

View File

@ -1,165 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package collectors
import (
"container/list"
"time"
"golang.org/x/net/context"
"github.com/blevesearch/bleve/search"
)
type TopScoreCollector struct {
k int
skip int
results *list.List
took time.Duration
maxScore float64
minScore float64
total uint64
facetsBuilder *search.FacetsBuilder
}
func NewTopScorerCollector(k int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: 0,
results: list.New(),
}
}
func NewTopScorerSkipCollector(k, skip int) *TopScoreCollector {
return &TopScoreCollector{
k: k,
skip: skip,
results: list.New(),
}
}
func (tksc *TopScoreCollector) Total() uint64 {
return tksc.total
}
func (tksc *TopScoreCollector) MaxScore() float64 {
return tksc.maxScore
}
func (tksc *TopScoreCollector) Took() time.Duration {
return tksc.took
}
var COLLECT_CHECK_DONE_EVERY = uint64(1024)
func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher) error {
startTime := time.Now()
var err error
var pre search.DocumentMatch // A single pre-alloc'ed, reused instance.
var next *search.DocumentMatch
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next(&pre)
}
for err == nil && next != nil {
if tksc.total%COLLECT_CHECK_DONE_EVERY == 0 {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
tksc.collectSingle(next)
if tksc.facetsBuilder != nil {
err = tksc.facetsBuilder.Update(next)
if err != nil {
break
}
}
next, err = searcher.Next(pre.Reset())
}
// compute search duration
tksc.took = time.Since(startTime)
if err != nil {
return err
}
return nil
}
func (tksc *TopScoreCollector) collectSingle(dmIn *search.DocumentMatch) {
// increment total hits
tksc.total++
// update max score
if dmIn.Score > tksc.maxScore {
tksc.maxScore = dmIn.Score
}
if dmIn.Score <= tksc.minScore {
return
}
// Because the dmIn will be the single, pre-allocated, reused
// instance, we need to copy the dmIn into a new, standalone
// instance before inserting into our candidate results list.
dm := &search.DocumentMatch{}
*dm = *dmIn
for e := tksc.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if dm.Score <= curr.Score {
tksc.results.InsertBefore(dm, e)
// if we just made the list too long
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score
}
return
}
}
// if we got to the end, we still have to add it
tksc.results.PushBack(dm)
if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head
tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score
}
}
func (tksc *TopScoreCollector) Results() search.DocumentMatchCollection {
if tksc.results.Len()-tksc.skip > 0 {
rv := make(search.DocumentMatchCollection, tksc.results.Len()-tksc.skip)
i := 0
skipped := 0
for e := tksc.results.Back(); e != nil; e = e.Prev() {
if skipped < tksc.skip {
skipped++
continue
}
rv[i] = e.Value.(*search.DocumentMatch)
i++
}
return rv
}
return search.DocumentMatchCollection{}
}
func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
tksc.facetsBuilder = facetsBuilder
}
func (tksc *TopScoreCollector) FacetResults() search.FacetResults {
if tksc.facetsBuilder != nil {
return tksc.facetsBuilder.Results()
}
return search.FacetResults{}
}