0
0
bleve/search/collectors/collector_heap.go
2016-08-10 16:16:58 +03:00

261 lines
6.1 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//
package collectors
import (
"container/heap"
"math"
"time"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
"golang.org/x/net/context"
)
type collectedDoc struct {
match search.DocumentMatch
doc *document.Document
}
type HeapCollector struct {
size int
skip int
total uint64
took time.Duration
sort search.SortOrder
results []*collectedDoc
facetsBuilder *search.FacetsBuilder
reader index.IndexReader
}
var COLLECT_CHECK_DONE_EVERY = uint64(1024)
func NewHeapCollector(size int, skip int, reader index.IndexReader, sort search.SortOrder) *HeapCollector {
hc := &HeapCollector{size: size, skip: skip, reader: reader, sort: sort}
heap.Init(hc)
return hc
}
func (hc *HeapCollector) Collect(ctx context.Context, searcher search.Searcher) error {
startTime := time.Now()
var err error
var pre search.DocumentMatch // A single pre-alloc'ed, reused instance.
var next *search.DocumentMatch
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next(&pre)
}
for err == nil && next != nil {
if hc.total%COLLECT_CHECK_DONE_EVERY == 0 {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
err = hc.collectSingle(next)
if err != nil {
break
}
if hc.facetsBuilder != nil {
err = hc.facetsBuilder.Update(next)
if err != nil {
break
}
}
next, err = searcher.Next(pre.Reset())
}
// compute search duration
hc.took = time.Since(startTime)
if err != nil {
return err
}
return nil
}
func (hc *HeapCollector) collectSingle(dmIn *search.DocumentMatch) error {
// increment total hits
hc.total++
single := new(collectedDoc)
single.match = *dmIn
var err error
if len(hc.sort) > 0 {
single.doc, err = hc.reader.Document(dmIn.ID)
if err != nil {
return err
}
}
heap.Push(hc, single)
if hc.Len() > hc.size+hc.skip {
heap.Pop(hc)
}
return nil
}
func (hc *HeapCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
}
func (hc *HeapCollector) Results() search.DocumentMatchCollection {
count := hc.Len()
size := count - hc.skip
rv := make(search.DocumentMatchCollection, size)
for count > 0 {
count--
if count >= hc.skip {
size--
doc := heap.Pop(hc).(*collectedDoc)
rv[size] = &doc.match
}
}
return rv
}
func (hc *HeapCollector) Total() uint64 {
return hc.total
}
func (hc *HeapCollector) MaxScore() float64 {
var max float64
for _, res := range hc.results {
max = math.Max(max, res.match.Score)
}
return max
}
func (hc *HeapCollector) Took() time.Duration {
return hc.took
}
func (hc *HeapCollector) FacetResults() search.FacetResults {
if hc.facetsBuilder != nil {
return hc.facetsBuilder.Results()
}
return search.FacetResults{}
}
func (hc *HeapCollector) Len() int {
return len(hc.results)
}
func field(doc *document.Document, field string) document.Field {
if doc == nil {
return nil
}
for _, f := range doc.Fields {
if f.Name() == field {
return f
}
}
return nil
}
func textFieldCompare(i, j *document.TextField, ascends bool) (bool, bool) {
ivalue := string(i.Value())
jvalue := string(j.Value())
if ivalue == jvalue {
return true, false
}
if ascends {
return false, ivalue < jvalue
}
return false, ivalue > jvalue
}
func numericFieldCompare(i, j *document.NumericField, ascends bool) (bool, bool) {
ivalue, _ := i.Number()
jvalue, _ := i.Number()
if ivalue == jvalue {
return true, false
}
if ascends {
return false, ivalue < jvalue
}
return false, ivalue > jvalue
}
func dateTimeFieldCompare(i, j *document.DateTimeField, ascends bool) (bool, bool) {
ivalue, _ := i.DateTime()
jvalue, _ := i.DateTime()
if ivalue.Equal(jvalue) {
return true, false
}
if ascends {
return false, ivalue.Before(jvalue)
}
return false, ivalue.After(jvalue)
}
func boolFieldCompare(i, j *document.BooleanField, ascends bool) (bool, bool) {
ivalue, _ := i.Boolean()
jvalue, _ := i.Boolean()
if ivalue == jvalue {
return true, false
}
return false, ascends && jvalue
}
//returns: equals, less
func fieldCompare(i, j document.Field, ascends bool) (bool, bool) {
switch ft := i.(type) {
case *document.TextField:
return textFieldCompare(ft, j.(*document.TextField), ascends)
case *document.NumericField:
return numericFieldCompare(ft, j.(*document.NumericField), ascends)
case *document.DateTimeField:
return dateTimeFieldCompare(ft, j.(*document.DateTimeField), ascends)
case *document.BooleanField:
return boolFieldCompare(ft, j.(*document.BooleanField), ascends)
}
return false, false
}
func (hc *HeapCollector) Less(i, j int) bool {
if len(hc.sort) > 0 {
doci := hc.results[i].doc
docj := hc.results[j].doc
if doci != nil && docj != nil {
for _, so := range hc.sort {
fieldi := field(doci, so.Field)
fieldj := field(docj, so.Field)
equals, lt := fieldCompare(fieldi, fieldj, so.Ascends)
if !equals {
return lt
}
}
}
}
scori := hc.results[i].match.Score
scorj := hc.results[j].match.Score
// make sure the list is ordered if everything else is the same...
if scori == scorj {
return hc.results[i].match.ID > hc.results[j].match.ID
}
return scori < scorj
}
func (hc *HeapCollector) Swap(i, j int) {
hc.results[i], hc.results[j] = hc.results[j], hc.results[i]
}
func (hc *HeapCollector) Push(x interface{}) {
hc.results = append(hc.results, x.(*collectedDoc))
}
func (hc *HeapCollector) Pop() interface{} {
n := len(hc.results)
doc := hc.results[n-1]
hc.results = hc.results[0 : n-1]
return doc
}