0
0

Merge pull request #372 from slavikm/master

Load the document only once for both fields and highlighter
This commit is contained in:
Marty Schoch 2016-04-29 15:10:37 -04:00
commit c6666d4674
4 changed files with 96 additions and 80 deletions

View File

@ -28,6 +28,7 @@ import (
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/collectors" "github.com/blevesearch/bleve/search/collectors"
"github.com/blevesearch/bleve/search/facets" "github.com/blevesearch/bleve/search/facets"
"github.com/blevesearch/bleve/search/highlight"
) )
type indexImpl struct { type indexImpl struct {
@ -437,9 +438,11 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
hits := collector.Results() hits := collector.Results()
var highlighter highlight.Highlighter
if req.Highlight != nil { if req.Highlight != nil {
// get the right highlighter // get the right highlighter
highlighter, err := Config.Cache.HighlighterNamed(Config.DefaultHighlighter) highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -452,74 +455,62 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
if highlighter == nil { if highlighter == nil {
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style) return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
} }
for _, hit := range hits {
doc, err := indexReader.Document(hit.ID)
if err == nil && doc != nil {
highlightFields := req.Highlight.Fields
if highlightFields == nil {
// add all fields with matches
highlightFields = make([]string, 0, len(hit.Locations))
for k := range hit.Locations {
highlightFields = append(highlightFields, k)
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 1)
}
} else if err == nil {
// unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup
return nil, ErrorIndexReadInconsistency
}
}
} }
if len(req.Fields) > 0 { for _, hit := range hits {
for _, hit := range hits { if len(req.Fields) > 0 || highlighter != nil {
// FIXME avoid loading doc second time
// if we already loaded it for highlighting
doc, err := indexReader.Document(hit.ID) doc, err := indexReader.Document(hit.ID)
if err == nil && doc != nil { if err == nil && doc != nil {
for _, f := range req.Fields { if len(req.Fields) > 0 {
for _, docF := range doc.Fields { for _, f := range req.Fields {
if f == "*" || docF.Name() == f { for _, docF := range doc.Fields {
var value interface{} if f == "*" || docF.Name() == f {
switch docF := docF.(type) { var value interface{}
case *document.TextField: switch docF := docF.(type) {
value = string(docF.Value()) case *document.TextField:
case *document.NumericField: value = string(docF.Value())
num, err := docF.Number() case *document.NumericField:
if err == nil { num, err := docF.Number()
value = num if err == nil {
value = num
}
case *document.DateTimeField:
datetime, err := docF.DateTime()
if err == nil {
value = datetime.Format(time.RFC3339)
}
case *document.BooleanField:
boolean, err := docF.Boolean()
if err == nil {
value = boolean
}
} }
case *document.DateTimeField: if value != nil {
datetime, err := docF.DateTime() hit.AddFieldValue(docF.Name(), value)
if err == nil {
value = datetime.Format(time.RFC3339)
} }
case *document.BooleanField:
boolean, err := docF.Boolean()
if err == nil {
value = boolean
}
}
if value != nil {
hit.AddFieldValue(docF.Name(), value)
} }
} }
} }
} }
if highlighter != nil {
highlightFields := req.Highlight.Fields
if highlightFields == nil {
// add all fields with matches
highlightFields = make([]string, 0, len(hit.Locations))
for k := range hit.Locations {
highlightFields = append(highlightFields, k)
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 1)
}
}
} else if doc == nil { } else if doc == nil {
// unexpected case, a doc ID that was found as a search hit // unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup // was unable to be found during document lookup
return nil, ErrorIndexReadInconsistency return nil, ErrorIndexReadInconsistency
} }
} }
}
for _, hit := range hits {
if i.name != "" { if i.name != "" {
hit.Index = i.name hit.Index = i.name
} }

View File

@ -0,0 +1,32 @@
package collectors
import (
"math/rand"
"strconv"
"testing"
"github.com/blevesearch/bleve/search"
"golang.org/x/net/context"
)
func benchHelper(numOfMatches int, collector search.Collector, b *testing.B) {
matches := make(search.DocumentMatchCollection, 0, numOfMatches)
for i := 0; i < numOfMatches; i++ {
matches = append(matches, &search.DocumentMatch{
ID: strconv.Itoa(i),
Score: rand.Float64(),
})
}
b.ResetTimer()
for run := 0; run < b.N; run++ {
searcher := &stubSearcher{
matches: matches,
}
err := collector.Collect(context.Background(), searcher)
if err != nil {
b.Fatal(err)
}
}
}

View File

@ -24,6 +24,7 @@ type TopScoreCollector struct {
results *list.List results *list.List
took time.Duration took time.Duration
maxScore float64 maxScore float64
minScore float64
total uint64 total uint64
facetsBuilder *search.FacetsBuilder facetsBuilder *search.FacetsBuilder
} }
@ -98,6 +99,10 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) {
tksc.maxScore = dm.Score tksc.maxScore = dm.Score
} }
if dm.Score <= tksc.minScore {
return
}
for e := tksc.results.Front(); e != nil; e = e.Next() { for e := tksc.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch) curr := e.Value.(*search.DocumentMatch)
if dm.Score < curr.Score { if dm.Score < curr.Score {
@ -106,7 +111,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) {
// if we just made the list too long // if we just made the list too long
if tksc.results.Len() > (tksc.k + tksc.skip) { if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head // remove the head
tksc.results.Remove(tksc.results.Front()) tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score
} }
return return
} }
@ -115,7 +120,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) {
tksc.results.PushBack(dm) tksc.results.PushBack(dm)
if tksc.results.Len() > (tksc.k + tksc.skip) { if tksc.results.Len() > (tksc.k + tksc.skip) {
// remove the head // remove the head
tksc.results.Remove(tksc.results.Front()) tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score
} }
} }

View File

@ -10,8 +10,6 @@
package collectors package collectors
import ( import (
"math/rand"
"strconv"
"testing" "testing"
"golang.org/x/net/context" "golang.org/x/net/context"
@ -225,27 +223,17 @@ func TestTop10ScoresSkip10(t *testing.T) {
} }
func BenchmarkTop10of100000Scores(b *testing.B) { func BenchmarkTop10of100000Scores(b *testing.B) {
benchHelper(10000, NewTopScorerCollector(10), b)
matches := make(search.DocumentMatchCollection, 0, 100000) }
for i := 0; i < 100000; i++ {
matches = append(matches, &search.DocumentMatch{ func BenchmarkTop100of100000Scores(b *testing.B) {
ID: strconv.Itoa(i), benchHelper(10000, NewTopScorerCollector(100), b)
Score: rand.Float64(), }
})
} func BenchmarkTop10of1000000Scores(b *testing.B) {
searcher := &stubSearcher{ benchHelper(100000, NewTopScorerCollector(10), b)
matches: matches, }
}
func BenchmarkTop100of1000000Scores(b *testing.B) {
collector := NewTopScorerCollector(10) benchHelper(100000, NewTopScorerCollector(100), b)
b.ResetTimer()
err := collector.Collect(context.Background(), searcher)
if err != nil {
b.Fatal(err)
}
res := collector.Results()
for _, dm := range res {
b.Logf("%s - %f\n", dm.ID, dm.Score)
}
} }