Merge pull request #372 from slavikm/master
Load the document only once for both fields and highlighter
This commit is contained in:
commit
c6666d4674
|
@ -28,6 +28,7 @@ import (
|
||||||
"github.com/blevesearch/bleve/search"
|
"github.com/blevesearch/bleve/search"
|
||||||
"github.com/blevesearch/bleve/search/collectors"
|
"github.com/blevesearch/bleve/search/collectors"
|
||||||
"github.com/blevesearch/bleve/search/facets"
|
"github.com/blevesearch/bleve/search/facets"
|
||||||
|
"github.com/blevesearch/bleve/search/highlight"
|
||||||
)
|
)
|
||||||
|
|
||||||
type indexImpl struct {
|
type indexImpl struct {
|
||||||
|
@ -437,9 +438,11 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||||
|
|
||||||
hits := collector.Results()
|
hits := collector.Results()
|
||||||
|
|
||||||
|
var highlighter highlight.Highlighter
|
||||||
|
|
||||||
if req.Highlight != nil {
|
if req.Highlight != nil {
|
||||||
// get the right highlighter
|
// get the right highlighter
|
||||||
highlighter, err := Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
|
highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -452,74 +455,62 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||||
if highlighter == nil {
|
if highlighter == nil {
|
||||||
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
|
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, hit := range hits {
|
|
||||||
doc, err := indexReader.Document(hit.ID)
|
|
||||||
if err == nil && doc != nil {
|
|
||||||
highlightFields := req.Highlight.Fields
|
|
||||||
if highlightFields == nil {
|
|
||||||
// add all fields with matches
|
|
||||||
highlightFields = make([]string, 0, len(hit.Locations))
|
|
||||||
for k := range hit.Locations {
|
|
||||||
highlightFields = append(highlightFields, k)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, hf := range highlightFields {
|
|
||||||
highlighter.BestFragmentsInField(hit, doc, hf, 1)
|
|
||||||
}
|
|
||||||
} else if err == nil {
|
|
||||||
// unexpected case, a doc ID that was found as a search hit
|
|
||||||
// was unable to be found during document lookup
|
|
||||||
return nil, ErrorIndexReadInconsistency
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(req.Fields) > 0 {
|
for _, hit := range hits {
|
||||||
for _, hit := range hits {
|
if len(req.Fields) > 0 || highlighter != nil {
|
||||||
// FIXME avoid loading doc second time
|
|
||||||
// if we already loaded it for highlighting
|
|
||||||
doc, err := indexReader.Document(hit.ID)
|
doc, err := indexReader.Document(hit.ID)
|
||||||
if err == nil && doc != nil {
|
if err == nil && doc != nil {
|
||||||
for _, f := range req.Fields {
|
if len(req.Fields) > 0 {
|
||||||
for _, docF := range doc.Fields {
|
for _, f := range req.Fields {
|
||||||
if f == "*" || docF.Name() == f {
|
for _, docF := range doc.Fields {
|
||||||
var value interface{}
|
if f == "*" || docF.Name() == f {
|
||||||
switch docF := docF.(type) {
|
var value interface{}
|
||||||
case *document.TextField:
|
switch docF := docF.(type) {
|
||||||
value = string(docF.Value())
|
case *document.TextField:
|
||||||
case *document.NumericField:
|
value = string(docF.Value())
|
||||||
num, err := docF.Number()
|
case *document.NumericField:
|
||||||
if err == nil {
|
num, err := docF.Number()
|
||||||
value = num
|
if err == nil {
|
||||||
|
value = num
|
||||||
|
}
|
||||||
|
case *document.DateTimeField:
|
||||||
|
datetime, err := docF.DateTime()
|
||||||
|
if err == nil {
|
||||||
|
value = datetime.Format(time.RFC3339)
|
||||||
|
}
|
||||||
|
case *document.BooleanField:
|
||||||
|
boolean, err := docF.Boolean()
|
||||||
|
if err == nil {
|
||||||
|
value = boolean
|
||||||
|
}
|
||||||
}
|
}
|
||||||
case *document.DateTimeField:
|
if value != nil {
|
||||||
datetime, err := docF.DateTime()
|
hit.AddFieldValue(docF.Name(), value)
|
||||||
if err == nil {
|
|
||||||
value = datetime.Format(time.RFC3339)
|
|
||||||
}
|
}
|
||||||
case *document.BooleanField:
|
|
||||||
boolean, err := docF.Boolean()
|
|
||||||
if err == nil {
|
|
||||||
value = boolean
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if value != nil {
|
|
||||||
hit.AddFieldValue(docF.Name(), value)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if highlighter != nil {
|
||||||
|
highlightFields := req.Highlight.Fields
|
||||||
|
if highlightFields == nil {
|
||||||
|
// add all fields with matches
|
||||||
|
highlightFields = make([]string, 0, len(hit.Locations))
|
||||||
|
for k := range hit.Locations {
|
||||||
|
highlightFields = append(highlightFields, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, hf := range highlightFields {
|
||||||
|
highlighter.BestFragmentsInField(hit, doc, hf, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if doc == nil {
|
} else if doc == nil {
|
||||||
// unexpected case, a doc ID that was found as a search hit
|
// unexpected case, a doc ID that was found as a search hit
|
||||||
// was unable to be found during document lookup
|
// was unable to be found during document lookup
|
||||||
return nil, ErrorIndexReadInconsistency
|
return nil, ErrorIndexReadInconsistency
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
for _, hit := range hits {
|
|
||||||
if i.name != "" {
|
if i.name != "" {
|
||||||
hit.Index = i.name
|
hit.Index = i.name
|
||||||
}
|
}
|
||||||
|
|
32
search/collectors/bench_test.go
Normal file
32
search/collectors/bench_test.go
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
package collectors
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math/rand"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/search"
|
||||||
|
"golang.org/x/net/context"
|
||||||
|
)
|
||||||
|
|
||||||
|
func benchHelper(numOfMatches int, collector search.Collector, b *testing.B) {
|
||||||
|
matches := make(search.DocumentMatchCollection, 0, numOfMatches)
|
||||||
|
for i := 0; i < numOfMatches; i++ {
|
||||||
|
matches = append(matches, &search.DocumentMatch{
|
||||||
|
ID: strconv.Itoa(i),
|
||||||
|
Score: rand.Float64(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for run := 0; run < b.N; run++ {
|
||||||
|
searcher := &stubSearcher{
|
||||||
|
matches: matches,
|
||||||
|
}
|
||||||
|
err := collector.Collect(context.Background(), searcher)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -24,6 +24,7 @@ type TopScoreCollector struct {
|
||||||
results *list.List
|
results *list.List
|
||||||
took time.Duration
|
took time.Duration
|
||||||
maxScore float64
|
maxScore float64
|
||||||
|
minScore float64
|
||||||
total uint64
|
total uint64
|
||||||
facetsBuilder *search.FacetsBuilder
|
facetsBuilder *search.FacetsBuilder
|
||||||
}
|
}
|
||||||
|
@ -98,6 +99,10 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) {
|
||||||
tksc.maxScore = dm.Score
|
tksc.maxScore = dm.Score
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if dm.Score <= tksc.minScore {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
for e := tksc.results.Front(); e != nil; e = e.Next() {
|
for e := tksc.results.Front(); e != nil; e = e.Next() {
|
||||||
curr := e.Value.(*search.DocumentMatch)
|
curr := e.Value.(*search.DocumentMatch)
|
||||||
if dm.Score < curr.Score {
|
if dm.Score < curr.Score {
|
||||||
|
@ -106,7 +111,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) {
|
||||||
// if we just made the list too long
|
// if we just made the list too long
|
||||||
if tksc.results.Len() > (tksc.k + tksc.skip) {
|
if tksc.results.Len() > (tksc.k + tksc.skip) {
|
||||||
// remove the head
|
// remove the head
|
||||||
tksc.results.Remove(tksc.results.Front())
|
tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -115,7 +120,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) {
|
||||||
tksc.results.PushBack(dm)
|
tksc.results.PushBack(dm)
|
||||||
if tksc.results.Len() > (tksc.k + tksc.skip) {
|
if tksc.results.Len() > (tksc.k + tksc.skip) {
|
||||||
// remove the head
|
// remove the head
|
||||||
tksc.results.Remove(tksc.results.Front())
|
tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,8 +10,6 @@
|
||||||
package collectors
|
package collectors
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math/rand"
|
|
||||||
"strconv"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
|
@ -225,27 +223,17 @@ func TestTop10ScoresSkip10(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkTop10of100000Scores(b *testing.B) {
|
func BenchmarkTop10of100000Scores(b *testing.B) {
|
||||||
|
benchHelper(10000, NewTopScorerCollector(10), b)
|
||||||
matches := make(search.DocumentMatchCollection, 0, 100000)
|
}
|
||||||
for i := 0; i < 100000; i++ {
|
|
||||||
matches = append(matches, &search.DocumentMatch{
|
func BenchmarkTop100of100000Scores(b *testing.B) {
|
||||||
ID: strconv.Itoa(i),
|
benchHelper(10000, NewTopScorerCollector(100), b)
|
||||||
Score: rand.Float64(),
|
}
|
||||||
})
|
|
||||||
}
|
func BenchmarkTop10of1000000Scores(b *testing.B) {
|
||||||
searcher := &stubSearcher{
|
benchHelper(100000, NewTopScorerCollector(10), b)
|
||||||
matches: matches,
|
}
|
||||||
}
|
|
||||||
|
func BenchmarkTop100of1000000Scores(b *testing.B) {
|
||||||
collector := NewTopScorerCollector(10)
|
benchHelper(100000, NewTopScorerCollector(100), b)
|
||||||
b.ResetTimer()
|
|
||||||
|
|
||||||
err := collector.Collect(context.Background(), searcher)
|
|
||||||
if err != nil {
|
|
||||||
b.Fatal(err)
|
|
||||||
}
|
|
||||||
res := collector.Results()
|
|
||||||
for _, dm := range res {
|
|
||||||
b.Logf("%s - %f\n", dm.ID, dm.Score)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user