From 6d830a9f3e6b8fd0af56b3b1e1fdb329ecc5e997 Mon Sep 17 00:00:00 2001 From: slavikm Date: Thu, 28 Apr 2016 11:12:33 -0700 Subject: [PATCH 1/2] Load the document only once for both fields and highlighter --- index_impl.go | 97 +++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 53 deletions(-) diff --git a/index_impl.go b/index_impl.go index 279d4eb4..9c6dfc69 100644 --- a/index_impl.go +++ b/index_impl.go @@ -28,6 +28,7 @@ import ( "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/collectors" "github.com/blevesearch/bleve/search/facets" + "github.com/blevesearch/bleve/search/highlight" ) type indexImpl struct { @@ -437,9 +438,11 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr hits := collector.Results() + var highlighter highlight.Highlighter + if req.Highlight != nil { // get the right highlighter - highlighter, err := Config.Cache.HighlighterNamed(Config.DefaultHighlighter) + highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter) if err != nil { return nil, err } @@ -452,74 +455,62 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr if highlighter == nil { return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style) } - - for _, hit := range hits { - doc, err := indexReader.Document(hit.ID) - if err == nil && doc != nil { - highlightFields := req.Highlight.Fields - if highlightFields == nil { - // add all fields with matches - highlightFields = make([]string, 0, len(hit.Locations)) - for k := range hit.Locations { - highlightFields = append(highlightFields, k) - } - } - - for _, hf := range highlightFields { - highlighter.BestFragmentsInField(hit, doc, hf, 1) - } - } else if err == nil { - // unexpected case, a doc ID that was found as a search hit - // was unable to be found during document lookup - return nil, ErrorIndexReadInconsistency - } - } } - if len(req.Fields) > 0 { - for _, hit := range hits { - // FIXME avoid loading doc second time - // if we already loaded it for highlighting + for _, hit := range hits { + if len(req.Fields) > 0 || highlighter != nil { doc, err := indexReader.Document(hit.ID) if err == nil && doc != nil { - for _, f := range req.Fields { - for _, docF := range doc.Fields { - if f == "*" || docF.Name() == f { - var value interface{} - switch docF := docF.(type) { - case *document.TextField: - value = string(docF.Value()) - case *document.NumericField: - num, err := docF.Number() - if err == nil { - value = num + if len(req.Fields) > 0 { + for _, f := range req.Fields { + for _, docF := range doc.Fields { + if f == "*" || docF.Name() == f { + var value interface{} + switch docF := docF.(type) { + case *document.TextField: + value = string(docF.Value()) + case *document.NumericField: + num, err := docF.Number() + if err == nil { + value = num + } + case *document.DateTimeField: + datetime, err := docF.DateTime() + if err == nil { + value = datetime.Format(time.RFC3339) + } + case *document.BooleanField: + boolean, err := docF.Boolean() + if err == nil { + value = boolean + } } - case *document.DateTimeField: - datetime, err := docF.DateTime() - if err == nil { - value = datetime.Format(time.RFC3339) + if value != nil { + hit.AddFieldValue(docF.Name(), value) } - case *document.BooleanField: - boolean, err := docF.Boolean() - if err == nil { - value = boolean - } - } - if value != nil { - hit.AddFieldValue(docF.Name(), value) } } } } + if highlighter != nil { + highlightFields := req.Highlight.Fields + if highlightFields == nil { + // add all fields with matches + highlightFields = make([]string, 0, len(hit.Locations)) + for k := range hit.Locations { + highlightFields = append(highlightFields, k) + } + } + for _, hf := range highlightFields { + highlighter.BestFragmentsInField(hit, doc, hf, 1) + } + } } else if doc == nil { // unexpected case, a doc ID that was found as a search hit // was unable to be found during document lookup return nil, ErrorIndexReadInconsistency } } - } - - for _, hit := range hits { if i.name != "" { hit.Index = i.name } From f2aba116c49ea51b27bc9afd3bf15305ef04883c Mon Sep 17 00:00:00 2001 From: slavikm Date: Fri, 29 Apr 2016 09:46:47 -0700 Subject: [PATCH 2/2] Make top score collector about 7 times faster --- search/collectors/bench_test.go | 32 ++++++++++++++++ search/collectors/collector_top_score.go | 9 ++++- search/collectors/collector_top_score_test.go | 38 +++++++------------ 3 files changed, 52 insertions(+), 27 deletions(-) create mode 100644 search/collectors/bench_test.go diff --git a/search/collectors/bench_test.go b/search/collectors/bench_test.go new file mode 100644 index 00000000..d8daeb9e --- /dev/null +++ b/search/collectors/bench_test.go @@ -0,0 +1,32 @@ +package collectors + +import ( + "math/rand" + "strconv" + "testing" + + "github.com/blevesearch/bleve/search" + "golang.org/x/net/context" +) + +func benchHelper(numOfMatches int, collector search.Collector, b *testing.B) { + matches := make(search.DocumentMatchCollection, 0, numOfMatches) + for i := 0; i < numOfMatches; i++ { + matches = append(matches, &search.DocumentMatch{ + ID: strconv.Itoa(i), + Score: rand.Float64(), + }) + } + + b.ResetTimer() + + for run := 0; run < b.N; run++ { + searcher := &stubSearcher{ + matches: matches, + } + err := collector.Collect(context.Background(), searcher) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/search/collectors/collector_top_score.go b/search/collectors/collector_top_score.go index 2f00d133..2ca5a67b 100644 --- a/search/collectors/collector_top_score.go +++ b/search/collectors/collector_top_score.go @@ -24,6 +24,7 @@ type TopScoreCollector struct { results *list.List took time.Duration maxScore float64 + minScore float64 total uint64 facetsBuilder *search.FacetsBuilder } @@ -98,6 +99,10 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) { tksc.maxScore = dm.Score } + if dm.Score <= tksc.minScore { + return + } + for e := tksc.results.Front(); e != nil; e = e.Next() { curr := e.Value.(*search.DocumentMatch) if dm.Score < curr.Score { @@ -106,7 +111,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) { // if we just made the list too long if tksc.results.Len() > (tksc.k + tksc.skip) { // remove the head - tksc.results.Remove(tksc.results.Front()) + tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score } return } @@ -115,7 +120,7 @@ func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) { tksc.results.PushBack(dm) if tksc.results.Len() > (tksc.k + tksc.skip) { // remove the head - tksc.results.Remove(tksc.results.Front()) + tksc.minScore = tksc.results.Remove(tksc.results.Front()).(*search.DocumentMatch).Score } } diff --git a/search/collectors/collector_top_score_test.go b/search/collectors/collector_top_score_test.go index d37140ae..8b41ff1b 100644 --- a/search/collectors/collector_top_score_test.go +++ b/search/collectors/collector_top_score_test.go @@ -10,8 +10,6 @@ package collectors import ( - "math/rand" - "strconv" "testing" "golang.org/x/net/context" @@ -225,27 +223,17 @@ func TestTop10ScoresSkip10(t *testing.T) { } func BenchmarkTop10of100000Scores(b *testing.B) { - - matches := make(search.DocumentMatchCollection, 0, 100000) - for i := 0; i < 100000; i++ { - matches = append(matches, &search.DocumentMatch{ - ID: strconv.Itoa(i), - Score: rand.Float64(), - }) - } - searcher := &stubSearcher{ - matches: matches, - } - - collector := NewTopScorerCollector(10) - b.ResetTimer() - - err := collector.Collect(context.Background(), searcher) - if err != nil { - b.Fatal(err) - } - res := collector.Results() - for _, dm := range res { - b.Logf("%s - %f\n", dm.ID, dm.Score) - } + benchHelper(10000, NewTopScorerCollector(10), b) +} + +func BenchmarkTop100of100000Scores(b *testing.B) { + benchHelper(10000, NewTopScorerCollector(100), b) +} + +func BenchmarkTop10of1000000Scores(b *testing.B) { + benchHelper(100000, NewTopScorerCollector(10), b) +} + +func BenchmarkTop100of1000000Scores(b *testing.B) { + benchHelper(100000, NewTopScorerCollector(100), b) }