From 68af6aef62c216b45df2d63058346209851961ca Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 20 Jul 2016 11:25:05 -0700 Subject: [PATCH 01/14] optimize upside_down reader Next() when 0-length term field vectors From some bleve-query perf profiling, term field vectors appeared to be alloc'ed, which was unnecessary as term field vectors are disabled in the bleve-blast/bleve-query tests. --- index/upside_down/upside_down.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 5e9715ad..ba998f18 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -769,7 +769,13 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis. return rv, rows } +var emptyTermFieldVectors = []*index.TermFieldVector{} + func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector { + if len(in) <= 0 { + return emptyTermFieldVectors + } + rv := make([]*index.TermFieldVector, len(in)) for i, tv := range in { From 2498ccc913d9653c76c87e9e313b96e8f8a640bf Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 20 Jul 2016 11:49:51 -0700 Subject: [PATCH 02/14] optimize upside_down reader Next() to reuse TermFrequencyRow Before this change, upside down's reader would alloc a new TermFrequencyRow on every Next(), which would be immediately transformed into an index.TermFieldDoc{}. This change reuses a pre-allocated TermFrequencyRow that's a field in the reader. --- index/upside_down/reader.go | 8 +++++++- index/upside_down/row.go | 24 ++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index 07de4939..73e1f6c2 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -22,6 +22,7 @@ type UpsideDownCouchTermFieldReader struct { count uint64 term []byte field uint16 + tfrNext TermFrequencyRow } func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) { @@ -65,7 +66,12 @@ func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) { if r.iterator != nil { key, val, valid := r.iterator.Current() if valid { - tfr, err := NewTermFrequencyRowKV(key, val) + tfr := r.tfrNext + err := tfr.parseK(key) + if err != nil { + return nil, err + } + err = tfr.parseV(val) if err != nil { return nil, err } diff --git a/index/upside_down/row.go b/index/upside_down/row.go index 5685e521..1a44126b 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -483,26 +483,34 @@ func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, } func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) { - rv := TermFrequencyRow{} + rv := &TermFrequencyRow{} + err := rv.parseK(key) + if err != nil { + return nil, err + } + return rv, nil +} + +func (tfr *TermFrequencyRow) parseK(key []byte) error { keyLen := len(key) if keyLen < 3 { - return nil, fmt.Errorf("invalid term frequency key, no valid field") + return fmt.Errorf("invalid term frequency key, no valid field") } - rv.field = binary.LittleEndian.Uint16(key[1:3]) + tfr.field = binary.LittleEndian.Uint16(key[1:3]) termEndPos := bytes.IndexByte(key[3:], ByteSeparator) if termEndPos < 0 { - return nil, fmt.Errorf("invalid term frequency key, no byte separator terminating term") + return fmt.Errorf("invalid term frequency key, no byte separator terminating term") } - rv.term = key[3 : 3+termEndPos] + tfr.term = key[3 : 3+termEndPos] docLen := len(key) - (3 + termEndPos + 1) if docLen < 1 { - return nil, fmt.Errorf("invalid term frequency key, empty docid") + return fmt.Errorf("invalid term frequency key, empty docid") } - rv.doc = key[3+termEndPos+1:] + tfr.doc = key[3+termEndPos+1:] - return &rv, nil + return nil } func (tfr *TermFrequencyRow) parseV(value []byte) error { From 988ca62182199aad7da369cf2804dea77dab2175 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 20 Jul 2016 16:29:20 -0700 Subject: [PATCH 03/14] optimize upside_down reader Next() with doc match reuse This optimization changes the search.Search.Next() interface API, adding an optional, pre-allocated *DocumentMatch parameter. When it's non-nil, the TermSearcher and TermQueryScorer will use that pre-allocated *DocumentMatch, instead of allocating a brand new DocumentMatch instance. --- search/collectors/collector_top_score.go | 16 ++++++++++------ search/collectors/search_test.go | 2 +- search/scorers/scorer_term.go | 12 +++++++----- search/scorers/scorer_term_test.go | 4 ++-- search/search.go | 7 ++++++- search/searchers/search_boolean.go | 14 +++++++------- search/searchers/search_boolean_test.go | 4 ++-- search/searchers/search_conjunction.go | 8 ++++---- search/searchers/search_conjunction_test.go | 4 ++-- search/searchers/search_disjunction.go | 8 ++++---- search/searchers/search_disjunction_test.go | 4 ++-- search/searchers/search_docid.go | 4 ++-- search/searchers/search_docid_test.go | 4 ++-- search/searchers/search_fuzzy.go | 4 ++-- search/searchers/search_fuzzy_test.go | 4 ++-- search/searchers/search_match_all.go | 2 +- search/searchers/search_match_all_test.go | 4 ++-- search/searchers/search_match_none.go | 2 +- search/searchers/search_match_none_test.go | 4 ++-- search/searchers/search_numeric_range.go | 4 ++-- search/searchers/search_phrase.go | 8 ++++---- search/searchers/search_phrase_test.go | 4 ++-- search/searchers/search_regexp.go | 4 ++-- search/searchers/search_regexp_test.go | 4 ++-- search/searchers/search_term.go | 6 +++--- search/searchers/search_term_prefix.go | 4 ++-- search/searchers/search_term_test.go | 4 ++-- 27 files changed, 80 insertions(+), 69 deletions(-) diff --git a/search/collectors/collector_top_score.go b/search/collectors/collector_top_score.go index b2996ad5..8868c4f6 100644 --- a/search/collectors/collector_top_score.go +++ b/search/collectors/collector_top_score.go @@ -60,12 +60,13 @@ func (tksc *TopScoreCollector) Took() time.Duration { func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher) error { startTime := time.Now() var err error + var pre search.DocumentMatch // Pre-alloc'ed instance. var next *search.DocumentMatch select { case <-ctx.Done(): return ctx.Err() default: - next, err = searcher.Next() + next, err = searcher.Next(&pre) } for err == nil && next != nil { select { @@ -79,7 +80,7 @@ func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Sear break } } - next, err = searcher.Next() + next, err = searcher.Next(pre.Reset()) } } // compute search duration @@ -90,19 +91,22 @@ func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Sear return nil } -func (tksc *TopScoreCollector) collectSingle(dm *search.DocumentMatch) { +func (tksc *TopScoreCollector) collectSingle(dmIn *search.DocumentMatch) { // increment total hits tksc.total++ // update max score - if dm.Score > tksc.maxScore { - tksc.maxScore = dm.Score + if dmIn.Score > tksc.maxScore { + tksc.maxScore = dmIn.Score } - if dm.Score <= tksc.minScore { + if dmIn.Score <= tksc.minScore { return } + dm := &search.DocumentMatch{} + *dm = *dmIn + for e := tksc.results.Front(); e != nil; e = e.Next() { curr := e.Value.(*search.DocumentMatch) if dm.Score <= curr.Score { diff --git a/search/collectors/search_test.go b/search/collectors/search_test.go index dde33ea2..4c235444 100644 --- a/search/collectors/search_test.go +++ b/search/collectors/search_test.go @@ -18,7 +18,7 @@ type stubSearcher struct { matches search.DocumentMatchCollection } -func (ss *stubSearcher) Next() (*search.DocumentMatch, error) { +func (ss *stubSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if ss.index < len(ss.matches) { rv := ss.matches[ss.index] ss.index++ diff --git a/search/scorers/scorer_term.go b/search/scorers/scorer_term.go index 0a0518d2..ce926221 100644 --- a/search/scorers/scorer_term.go +++ b/search/scorers/scorer_term.go @@ -83,7 +83,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { } } -func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentMatch { +func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc, preAllocated *search.DocumentMatch) *search.DocumentMatch { var scoreExplanation *search.Explanation // need to compute score @@ -128,10 +128,12 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentM } } - rv := search.DocumentMatch{ - ID: termMatch.ID, - Score: score, + rv := preAllocated + if rv == nil { + rv = &search.DocumentMatch{} } + rv.ID = termMatch.ID + rv.Score = score if s.explain { rv.Expl = scoreExplanation } @@ -172,5 +174,5 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentM } - return &rv + return rv } diff --git a/search/scorers/scorer_term_test.go b/search/scorers/scorer_term_test.go index 48083f82..612b02d9 100644 --- a/search/scorers/scorer_term_test.go +++ b/search/scorers/scorer_term_test.go @@ -144,7 +144,7 @@ func TestTermScorer(t *testing.T) { } for _, test := range tests { - actual := scorer.Score(test.termMatch) + actual := scorer.Score(test.termMatch, nil) if !reflect.DeepEqual(actual, test.result) { t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch) @@ -231,7 +231,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) { } for _, test := range tests { - actual := scorer.Score(test.termMatch) + actual := scorer.Score(test.termMatch, nil) if !reflect.DeepEqual(actual, test.result) { t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch) diff --git a/search/search.go b/search/search.go index cc4b175c..55bde5b1 100644 --- a/search/search.go +++ b/search/search.go @@ -85,6 +85,11 @@ func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) { dm.Fields[name] = valSlice } +func (dm *DocumentMatch) Reset() *DocumentMatch { + *dm = DocumentMatch{} + return dm +} + type DocumentMatchCollection []*DocumentMatch func (c DocumentMatchCollection) Len() int { return len(c) } @@ -92,7 +97,7 @@ func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score } type Searcher interface { - Next() (*DocumentMatch, error) + Next(preAllocated *DocumentMatch) (*DocumentMatch, error) Advance(ID string) (*DocumentMatch, error) Close() error Weight() float64 diff --git a/search/searchers/search_boolean.go b/search/searchers/search_boolean.go index bf44e027..00371b29 100644 --- a/search/searchers/search_boolean.go +++ b/search/searchers/search_boolean.go @@ -70,21 +70,21 @@ func (s *BooleanSearcher) initSearchers() error { var err error // get all searchers pointing at their first match if s.mustSearcher != nil { - s.currMust, err = s.mustSearcher.Next() + s.currMust, err = s.mustSearcher.Next(nil) if err != nil { return err } } if s.shouldSearcher != nil { - s.currShould, err = s.shouldSearcher.Next() + s.currShould, err = s.shouldSearcher.Next(nil) if err != nil { return err } } if s.mustNotSearcher != nil { - s.currMustNot, err = s.mustNotSearcher.Next() + s.currMustNot, err = s.mustNotSearcher.Next(nil) if err != nil { return err } @@ -106,12 +106,12 @@ func (s *BooleanSearcher) advanceNextMust() error { var err error if s.mustSearcher != nil { - s.currMust, err = s.mustSearcher.Next() + s.currMust, err = s.mustSearcher.Next(nil) if err != nil { return err } } else if s.mustSearcher == nil { - s.currShould, err = s.shouldSearcher.Next() + s.currShould, err = s.shouldSearcher.Next(nil) if err != nil { return err } @@ -148,7 +148,7 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) { } } -func (s *BooleanSearcher) Next() (*search.DocumentMatch, error) { +func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers() @@ -292,7 +292,7 @@ func (s *BooleanSearcher) Advance(ID string) (*search.DocumentMatch, error) { s.currentID = "" } - return s.Next() + return s.Next(nil) } func (s *BooleanSearcher) Count() uint64 { diff --git a/search/searchers/search_boolean_test.go b/search/searchers/search_boolean_test.go index 43484d97..ac6dc7ea 100644 --- a/search/searchers/search_boolean_test.go +++ b/search/searchers/search_boolean_test.go @@ -342,7 +342,7 @@ func TestBooleanSearch(t *testing.T) { } }() - next, err := test.searcher.Next() + next, err := test.searcher.Next(nil) i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -354,7 +354,7 @@ func TestBooleanSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = test.searcher.Next() + next, err = test.searcher.Next(nil) i++ } if err != nil { diff --git a/search/searchers/search_conjunction.go b/search/searchers/search_conjunction.go index 77c4b1ff..45685c42 100644 --- a/search/searchers/search_conjunction.go +++ b/search/searchers/search_conjunction.go @@ -67,7 +67,7 @@ func (s *ConjunctionSearcher) initSearchers() error { var err error // get all searchers pointing at their first match for i, termSearcher := range s.searchers { - s.currs[i], err = termSearcher.Next() + s.currs[i], err = termSearcher.Next(nil) if err != nil { return err } @@ -99,7 +99,7 @@ func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) { } } -func (s *ConjunctionSearcher) Next() (*search.DocumentMatch, error) { +func (s *ConjunctionSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers() if err != nil { @@ -140,7 +140,7 @@ OUTER: rv = s.scorer.Score(s.currs) // prepare for next entry - s.currs[0], err = s.searchers[0].Next() + s.currs[0], err = s.searchers[0].Next(nil) if err != nil { return nil, err } @@ -170,7 +170,7 @@ func (s *ConjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) } } s.currentID = ID - return s.Next() + return s.Next(nil) } func (s *ConjunctionSearcher) Count() uint64 { diff --git a/search/searchers/search_conjunction_test.go b/search/searchers/search_conjunction_test.go index a72fdf89..1256059b 100644 --- a/search/searchers/search_conjunction_test.go +++ b/search/searchers/search_conjunction_test.go @@ -187,7 +187,7 @@ func TestConjunctionSearch(t *testing.T) { } }() - next, err := test.searcher.Next() + next, err := test.searcher.Next(nil) i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -199,7 +199,7 @@ func TestConjunctionSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = test.searcher.Next() + next, err = test.searcher.Next(nil) i++ } if err != nil { diff --git a/search/searchers/search_disjunction.go b/search/searchers/search_disjunction.go index 1b6bd9cf..cf3b16d7 100644 --- a/search/searchers/search_disjunction.go +++ b/search/searchers/search_disjunction.go @@ -87,7 +87,7 @@ func (s *DisjunctionSearcher) initSearchers() error { var err error // get all searchers pointing at their first match for i, termSearcher := range s.searchers { - s.currs[i], err = termSearcher.Next() + s.currs[i], err = termSearcher.Next(nil) if err != nil { return err } @@ -122,7 +122,7 @@ func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) { } } -func (s *DisjunctionSearcher) Next() (*search.DocumentMatch, error) { +func (s *DisjunctionSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers() if err != nil { @@ -153,7 +153,7 @@ func (s *DisjunctionSearcher) Next() (*search.DocumentMatch, error) { for i, curr := range s.currs { if curr != nil && curr.ID == s.currentID { searcher := s.searchers[i] - s.currs[i], err = searcher.Next() + s.currs[i], err = searcher.Next(nil) if err != nil { return nil, err } @@ -182,7 +182,7 @@ func (s *DisjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) s.currentID = s.nextSmallestID() - return s.Next() + return s.Next(nil) } func (s *DisjunctionSearcher) Count() uint64 { diff --git a/search/searchers/search_disjunction_test.go b/search/searchers/search_disjunction_test.go index 1d5eb094..019f61c7 100644 --- a/search/searchers/search_disjunction_test.go +++ b/search/searchers/search_disjunction_test.go @@ -108,7 +108,7 @@ func TestDisjunctionSearch(t *testing.T) { } }() - next, err := test.searcher.Next() + next, err := test.searcher.Next(nil) i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -120,7 +120,7 @@ func TestDisjunctionSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = test.searcher.Next() + next, err = test.searcher.Next(nil) i++ } if err != nil { diff --git a/search/searchers/search_docid.go b/search/searchers/search_docid.go index 8d0f6cc6..a1dc113e 100644 --- a/search/searchers/search_docid.go +++ b/search/searchers/search_docid.go @@ -77,7 +77,7 @@ func (s *DocIDSearcher) SetQueryNorm(qnorm float64) { s.scorer.SetQueryNorm(qnorm) } -func (s *DocIDSearcher) Next() (*search.DocumentMatch, error) { +func (s *DocIDSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if s.current >= len(s.ids) { return nil, nil } @@ -90,7 +90,7 @@ func (s *DocIDSearcher) Next() (*search.DocumentMatch, error) { func (s *DocIDSearcher) Advance(ID string) (*search.DocumentMatch, error) { s.current = sort.SearchStrings(s.ids, ID) - return s.Next() + return s.Next(nil) } func (s *DocIDSearcher) Close() error { diff --git a/search/searchers/search_docid_test.go b/search/searchers/search_docid_test.go index 67b878a5..38390114 100644 --- a/search/searchers/search_docid_test.go +++ b/search/searchers/search_docid_test.go @@ -68,7 +68,7 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { // Check the sequence for i, id := range wanted { - m, err := searcher.Next() + m, err := searcher.Next(nil) if err != nil { t.Fatal(err) } @@ -76,7 +76,7 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { t.Fatalf("expected %v at position %v, got %v", id, i, m.ID) } } - m, err := searcher.Next() + m, err := searcher.Next(nil) if err != nil { t.Fatal(err) } diff --git a/search/searchers/search_fuzzy.go b/search/searchers/search_fuzzy.go index 52c13e83..469d9187 100644 --- a/search/searchers/search_fuzzy.go +++ b/search/searchers/search_fuzzy.go @@ -107,8 +107,8 @@ func (s *FuzzySearcher) SetQueryNorm(qnorm float64) { s.searcher.SetQueryNorm(qnorm) } -func (s *FuzzySearcher) Next() (*search.DocumentMatch, error) { - return s.searcher.Next() +func (s *FuzzySearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + return s.searcher.Next(preAllocated) } diff --git a/search/searchers/search_fuzzy_test.go b/search/searchers/search_fuzzy_test.go index 89c5b1fd..610367de 100644 --- a/search/searchers/search_fuzzy_test.go +++ b/search/searchers/search_fuzzy_test.go @@ -105,7 +105,7 @@ func TestFuzzySearch(t *testing.T) { } }() - next, err := test.searcher.Next() + next, err := test.searcher.Next(nil) i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -117,7 +117,7 @@ func TestFuzzySearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = test.searcher.Next() + next, err = test.searcher.Next(nil) i++ } if err != nil { diff --git a/search/searchers/search_match_all.go b/search/searchers/search_match_all.go index 657bbaff..f55092c8 100644 --- a/search/searchers/search_match_all.go +++ b/search/searchers/search_match_all.go @@ -46,7 +46,7 @@ func (s *MatchAllSearcher) SetQueryNorm(qnorm float64) { s.scorer.SetQueryNorm(qnorm) } -func (s *MatchAllSearcher) Next() (*search.DocumentMatch, error) { +func (s *MatchAllSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { id, err := s.reader.Next() if err != nil { return nil, err diff --git a/search/searchers/search_match_all_test.go b/search/searchers/search_match_all_test.go index 915e4743..5bcd1b51 100644 --- a/search/searchers/search_match_all_test.go +++ b/search/searchers/search_match_all_test.go @@ -109,7 +109,7 @@ func TestMatchAllSearch(t *testing.T) { } }() - next, err := test.searcher.Next() + next, err := test.searcher.Next(nil) i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -121,7 +121,7 @@ func TestMatchAllSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = test.searcher.Next() + next, err = test.searcher.Next(nil) i++ } if err != nil { diff --git a/search/searchers/search_match_none.go b/search/searchers/search_match_none.go index 87881f5c..b8621976 100644 --- a/search/searchers/search_match_none.go +++ b/search/searchers/search_match_none.go @@ -36,7 +36,7 @@ func (s *MatchNoneSearcher) SetQueryNorm(qnorm float64) { } -func (s *MatchNoneSearcher) Next() (*search.DocumentMatch, error) { +func (s *MatchNoneSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { return nil, nil } diff --git a/search/searchers/search_match_none_test.go b/search/searchers/search_match_none_test.go index 8a33c921..0d470358 100644 --- a/search/searchers/search_match_none_test.go +++ b/search/searchers/search_match_none_test.go @@ -51,7 +51,7 @@ func TestMatchNoneSearch(t *testing.T) { } }() - next, err := test.searcher.Next() + next, err := test.searcher.Next(nil) i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -63,7 +63,7 @@ func TestMatchNoneSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = test.searcher.Next() + next, err = test.searcher.Next(nil) i++ } if err != nil { diff --git a/search/searchers/search_numeric_range.go b/search/searchers/search_numeric_range.go index 41d917ca..56e7a5a0 100644 --- a/search/searchers/search_numeric_range.go +++ b/search/searchers/search_numeric_range.go @@ -96,8 +96,8 @@ func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) { s.searcher.SetQueryNorm(qnorm) } -func (s *NumericRangeSearcher) Next() (*search.DocumentMatch, error) { - return s.searcher.Next() +func (s *NumericRangeSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + return s.searcher.Next(preAllocated) } func (s *NumericRangeSearcher) Advance(ID string) (*search.DocumentMatch, error) { diff --git a/search/searchers/search_phrase.go b/search/searchers/search_phrase.go index fd318b48..c2f7b8f7 100644 --- a/search/searchers/search_phrase.go +++ b/search/searchers/search_phrase.go @@ -56,7 +56,7 @@ func (s *PhraseSearcher) initSearchers() error { var err error // get all searchers pointing at their first match if s.mustSearcher != nil { - s.currMust, err = s.mustSearcher.Next() + s.currMust, err = s.mustSearcher.Next(nil) if err != nil { return err } @@ -70,7 +70,7 @@ func (s *PhraseSearcher) advanceNextMust() error { var err error if s.mustSearcher != nil { - s.currMust, err = s.mustSearcher.Next() + s.currMust, err = s.mustSearcher.Next(nil) if err != nil { return err } @@ -90,7 +90,7 @@ func (s *PhraseSearcher) SetQueryNorm(qnorm float64) { s.mustSearcher.SetQueryNorm(qnorm) } -func (s *PhraseSearcher) Next() (*search.DocumentMatch, error) { +func (s *PhraseSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers() if err != nil { @@ -172,7 +172,7 @@ func (s *PhraseSearcher) Advance(ID string) (*search.DocumentMatch, error) { if err != nil { return nil, err } - return s.Next() + return s.Next(nil) } func (s *PhraseSearcher) Count() uint64 { diff --git a/search/searchers/search_phrase_test.go b/search/searchers/search_phrase_test.go index fe05bdd5..a3c14b70 100644 --- a/search/searchers/search_phrase_test.go +++ b/search/searchers/search_phrase_test.go @@ -68,7 +68,7 @@ func TestPhraseSearch(t *testing.T) { } }() - next, err := test.searcher.Next() + next, err := test.searcher.Next(nil) i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -80,7 +80,7 @@ func TestPhraseSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = test.searcher.Next() + next, err = test.searcher.Next(nil) i++ } if err != nil { diff --git a/search/searchers/search_regexp.go b/search/searchers/search_regexp.go index 00dff8bb..c10cd53e 100644 --- a/search/searchers/search_regexp.go +++ b/search/searchers/search_regexp.go @@ -106,8 +106,8 @@ func (s *RegexpSearcher) SetQueryNorm(qnorm float64) { s.searcher.SetQueryNorm(qnorm) } -func (s *RegexpSearcher) Next() (*search.DocumentMatch, error) { - return s.searcher.Next() +func (s *RegexpSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + return s.searcher.Next(preAllocated) } diff --git a/search/searchers/search_regexp_test.go b/search/searchers/search_regexp_test.go index eca6c4ff..cb4d01ee 100644 --- a/search/searchers/search_regexp_test.go +++ b/search/searchers/search_regexp_test.go @@ -85,7 +85,7 @@ func TestRegexpSearch(t *testing.T) { } }() - next, err := test.searcher.Next() + next, err := test.searcher.Next(nil) i := 0 for err == nil && next != nil { if i < len(test.results) { @@ -97,7 +97,7 @@ func TestRegexpSearch(t *testing.T) { t.Logf("scoring explanation: %s", next.Expl) } } - next, err = test.searcher.Next() + next, err = test.searcher.Next(nil) i++ } if err != nil { diff --git a/search/searchers/search_term.go b/search/searchers/search_term.go index 2ea3a9d8..9a19e29f 100644 --- a/search/searchers/search_term.go +++ b/search/searchers/search_term.go @@ -52,7 +52,7 @@ func (s *TermSearcher) SetQueryNorm(qnorm float64) { s.scorer.SetQueryNorm(qnorm) } -func (s *TermSearcher) Next() (*search.DocumentMatch, error) { +func (s *TermSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { termMatch, err := s.reader.Next() if err != nil { return nil, err @@ -63,7 +63,7 @@ func (s *TermSearcher) Next() (*search.DocumentMatch, error) { } // score match - docMatch := s.scorer.Score(termMatch) + docMatch := s.scorer.Score(termMatch, preAllocated) // return doc match return docMatch, nil @@ -80,7 +80,7 @@ func (s *TermSearcher) Advance(ID string) (*search.DocumentMatch, error) { } // score match - docMatch := s.scorer.Score(termMatch) + docMatch := s.scorer.Score(termMatch, nil) // return doc match return docMatch, nil diff --git a/search/searchers/search_term_prefix.go b/search/searchers/search_term_prefix.go index 606bc5aa..42f6e18d 100644 --- a/search/searchers/search_term_prefix.go +++ b/search/searchers/search_term_prefix.go @@ -70,8 +70,8 @@ func (s *TermPrefixSearcher) SetQueryNorm(qnorm float64) { s.searcher.SetQueryNorm(qnorm) } -func (s *TermPrefixSearcher) Next() (*search.DocumentMatch, error) { - return s.searcher.Next() +func (s *TermPrefixSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + return s.searcher.Next(preAllocated) } diff --git a/search/searchers/search_term_test.go b/search/searchers/search_term_test.go index cf6f04b6..1c309e47 100644 --- a/search/searchers/search_term_test.go +++ b/search/searchers/search_term_test.go @@ -163,7 +163,7 @@ func TestTermSearcher(t *testing.T) { t.Errorf("expected count of 9, got %d", searcher.Count()) } - docMatch, err := searcher.Next() + docMatch, err := searcher.Next(nil) if err != nil { t.Errorf("expected result, got %v", err) } @@ -188,7 +188,7 @@ func TestTermSearcher(t *testing.T) { } // try pushing next past end - docMatch, err = searcher.Next() + docMatch, err = searcher.Next(nil) if err != nil { t.Fatal(err) } From 39d3e2f028904670da94314160a99d0c1484f59d Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 20 Jul 2016 16:53:30 -0700 Subject: [PATCH 04/14] optimize upside_down reader Next() with TermFieldDoc reuse This optimization changes the index.TermFieldReader.Next() interface API, adding an optional, pre-allocated *TermFieldDoc parameter, which can help prevent garbage creation. --- index/index.go | 10 ++++++++-- index/upside_down/reader.go | 16 +++++++++------- index/upside_down/reader_test.go | 8 ++++---- index/upside_down/upside_down_test.go | 4 ++-- search/searchers/search_term.go | 3 ++- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/index/index.go b/index/index.go index dcef48e9..e643ef10 100644 --- a/index/index.go +++ b/index/index.go @@ -111,8 +111,9 @@ type TermFieldDoc struct { // lexicographic order over their identifiers. type TermFieldReader interface { // Next returns the next document containing the term in this field, or nil - // when it reaches the end of the enumeration. - Next() (*TermFieldDoc, error) + // when it reaches the end of the enumeration. The preAlloced TermFieldDoc + // is optional, and when non-nil, will be used instead of allocating memory. + Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error) // Advance resets the enumeration at specified document or its immediate // follower. @@ -199,3 +200,8 @@ func (b *Batch) Reset() { b.IndexOps = make(map[string]*document.Document) b.InternalOps = make(map[string][]byte) } + +func (tfd *TermFieldDoc) Reset() *TermFieldDoc { + *tfd = TermFieldDoc{} + return tfd +} diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index 73e1f6c2..ab7a18ed 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -62,7 +62,7 @@ func (r *UpsideDownCouchTermFieldReader) Count() uint64 { return r.count } -func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) { +func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { if r.iterator != nil { key, val, valid := r.iterator.Current() if valid { @@ -75,14 +75,16 @@ func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) { if err != nil { return nil, err } - rv := index.TermFieldDoc{ - ID: string(tfr.doc), - Freq: tfr.freq, - Norm: float64(tfr.norm), - Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors), + rv := preAlloced + if rv == nil { + rv = &index.TermFieldDoc{} } + rv.ID = string(tfr.doc) + rv.Freq = tfr.freq + rv.Norm = float64(tfr.norm) + rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) r.iterator.Next() - return &rv, nil + return rv, nil } } return nil, nil diff --git a/index/upside_down/reader_test.go b/index/upside_down/reader_test.go index 19c2bdfb..60b8156c 100644 --- a/index/upside_down/reader_test.go +++ b/index/upside_down/reader_test.go @@ -98,9 +98,9 @@ func TestIndexReader(t *testing.T) { var match *index.TermFieldDoc var actualCount uint64 - match, err = reader.Next() + match, err = reader.Next(nil) for err == nil && match != nil { - match, err = reader.Next() + match, err = reader.Next(nil) if err != nil { t.Errorf("unexpected error reading next") } @@ -127,7 +127,7 @@ func TestIndexReader(t *testing.T) { if err != nil { t.Errorf("unexpected error: %v", err) } - match, err = tfr.Next() + match, err = tfr.Next(nil) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -176,7 +176,7 @@ func TestIndexReader(t *testing.T) { if count != 0 { t.Errorf("expected count 0 for reader of non-existant field") } - match, err = reader.Next() + match, err = reader.Next(nil) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index 15fbb3e5..85a62bc0 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -1124,12 +1124,12 @@ func TestIndexTermReaderCompositeFields(t *testing.T) { t.Error(err) } - tfd, err := termFieldReader.Next() + tfd, err := termFieldReader.Next(nil) for tfd != nil && err == nil { if tfd.ID != "1" { t.Errorf("expected to find document id 1") } - tfd, err = termFieldReader.Next() + tfd, err = termFieldReader.Next(nil) } if err != nil { t.Error(err) diff --git a/search/searchers/search_term.go b/search/searchers/search_term.go index 9a19e29f..c0d8bcc5 100644 --- a/search/searchers/search_term.go +++ b/search/searchers/search_term.go @@ -22,6 +22,7 @@ type TermSearcher struct { explain bool reader index.TermFieldReader scorer *scorers.TermQueryScorer + tfd index.TermFieldDoc } func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) { @@ -53,7 +54,7 @@ func (s *TermSearcher) SetQueryNorm(qnorm float64) { } func (s *TermSearcher) Next(preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { - termMatch, err := s.reader.Next() + termMatch, err := s.reader.Next(s.tfd.Reset()) if err != nil { return nil, err } From 6d7fa0b96419071f2b76445dde2cd1666e3be7b0 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Wed, 20 Jul 2016 22:51:59 -0700 Subject: [PATCH 05/14] optimize moss iterator checkDone() --- index/store/moss/iterator.go | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/index/store/moss/iterator.go b/index/store/moss/iterator.go index cf616fe4..c08af1d5 100644 --- a/index/store/moss/iterator.go +++ b/index/store/moss/iterator.go @@ -115,16 +115,11 @@ func (x *Iterator) Close() error { } func (x *Iterator) checkDone() { - x.done = true - x.k = nil - x.v = nil - k, v, err := x.iter.Current() - if err != nil { - return - } - - if x.prefix != nil && !bytes.HasPrefix(k, x.prefix) { + if err != nil || (x.prefix != nil && !bytes.HasPrefix(k, x.prefix)) { + x.done = true + x.k = nil + x.v = nil return } From b744148449b54b2a2e1b5d9600b7b0c938355d81 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 21 Jul 2016 09:48:41 -0700 Subject: [PATCH 06/14] optimization to actually reuse the TermFrequencyRow --- index/upside_down/reader.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index ab7a18ed..4b01f927 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -66,7 +66,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* if r.iterator != nil { key, val, valid := r.iterator.Current() if valid { - tfr := r.tfrNext + tfr := &r.tfrNext err := tfr.parseK(key) if err != nil { return nil, err From b564ebbfbe6e7796dc30c642220de5266d4a3c81 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 21 Jul 2016 10:02:47 -0700 Subject: [PATCH 07/14] optimization comments on DocumentMatch instance reuse --- search/collectors/collector_top_score.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/search/collectors/collector_top_score.go b/search/collectors/collector_top_score.go index 8868c4f6..740427ec 100644 --- a/search/collectors/collector_top_score.go +++ b/search/collectors/collector_top_score.go @@ -60,7 +60,7 @@ func (tksc *TopScoreCollector) Took() time.Duration { func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher) error { startTime := time.Now() var err error - var pre search.DocumentMatch // Pre-alloc'ed instance. + var pre search.DocumentMatch // A single pre-alloc'ed, reused instance. var next *search.DocumentMatch select { case <-ctx.Done(): @@ -104,6 +104,9 @@ func (tksc *TopScoreCollector) collectSingle(dmIn *search.DocumentMatch) { return } + // Because the dmIn will be the single, pre-allocated, reused + // instance, we need to copy the dmIn into a new, standalone + // instance before inserting into our candidate results list. dm := &search.DocumentMatch{} *dm = *dmIn From cbb174b0748a0e3b3360a8ffd91b16f3800e99e5 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 21 Jul 2016 10:28:02 -0700 Subject: [PATCH 08/14] optimize moss iterator Next() done/k/v maintenance --- index/store/moss/iterator.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/index/store/moss/iterator.go b/index/store/moss/iterator.go index c08af1d5..671e9585 100644 --- a/index/store/moss/iterator.go +++ b/index/store/moss/iterator.go @@ -60,12 +60,11 @@ func (x *Iterator) Next() { return } - x.done = true - x.k = nil - x.v = nil - err := x.iter.Next() if err != nil { + x.done = true + x.k = nil + x.v = nil return } From b8c847878322572dbffe9b3b3cfce9bf13b896df Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 21 Jul 2016 10:22:06 -0700 Subject: [PATCH 09/14] optimize collector to check ctx.Done() only occasionally --- search/collectors/collector_top_score.go | 26 ++++++++++++++---------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/search/collectors/collector_top_score.go b/search/collectors/collector_top_score.go index 740427ec..90b0c75e 100644 --- a/search/collectors/collector_top_score.go +++ b/search/collectors/collector_top_score.go @@ -57,6 +57,8 @@ func (tksc *TopScoreCollector) Took() time.Duration { return tksc.took } +var COLLECT_CHECK_DONE_EVERY = uint64(1024) + func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher) error { startTime := time.Now() var err error @@ -69,19 +71,21 @@ func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Sear next, err = searcher.Next(&pre) } for err == nil && next != nil { - select { - case <-ctx.Done(): - return ctx.Err() - default: - tksc.collectSingle(next) - if tksc.facetsBuilder != nil { - err = tksc.facetsBuilder.Update(next) - if err != nil { - break - } + if tksc.total%COLLECT_CHECK_DONE_EVERY == 0 { + select { + case <-ctx.Done(): + return ctx.Err() + default: } - next, err = searcher.Next(pre.Reset()) } + tksc.collectSingle(next) + if tksc.facetsBuilder != nil { + err = tksc.facetsBuilder.Update(next) + if err != nil { + break + } + } + next, err = searcher.Next(pre.Reset()) } // compute search duration tksc.took = time.Since(startTime) From 5271a0f62b43f84d2b617de2a9aabb81e94381a5 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 21 Jul 2016 11:45:54 -0700 Subject: [PATCH 10/14] optimize termFieldVectorsFromTermVectors when empty --- index/upside_down/upside_down.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index ba998f18..7fec5724 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -769,11 +769,9 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis. return rv, rows } -var emptyTermFieldVectors = []*index.TermFieldVector{} - func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector { if len(in) <= 0 { - return emptyTermFieldVectors + return nil } rv := make([]*index.TermFieldVector, len(in)) From 5094d2d0971f2e8b4536ee8f4d5bf16fa3f82d69 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 21 Jul 2016 18:31:07 -0700 Subject: [PATCH 11/14] optimize moss PrefixIterator Previously, the PrefixIterator() for moss was implemented by comparing the prefix bytes on every Next(). With this optimization, the next larger endKeyExclusive is computed at the iterator's initialization, which allows us to avoid all those prefix comparisons. --- index/store/moss/iterator.go | 20 +++++++++----------- index/store/moss/reader.go | 30 +++++++++++++++++------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/index/store/moss/iterator.go b/index/store/moss/iterator.go index 671e9585..c2e0dc1d 100644 --- a/index/store/moss/iterator.go +++ b/index/store/moss/iterator.go @@ -18,15 +18,14 @@ import ( ) type Iterator struct { - store *Store - ss moss.Snapshot - iter moss.Iterator - prefix []byte - start []byte - end []byte - done bool - k []byte - v []byte + store *Store + ss moss.Snapshot + iter moss.Iterator + start []byte + end []byte + done bool + k []byte + v []byte } func (x *Iterator) Seek(seekToKey []byte) { @@ -105,7 +104,6 @@ func (x *Iterator) Close() error { x.iter = nil } - x.prefix = nil x.done = true x.k = nil x.v = nil @@ -115,7 +113,7 @@ func (x *Iterator) Close() error { func (x *Iterator) checkDone() { k, v, err := x.iter.Current() - if err != nil || (x.prefix != nil && !bytes.HasPrefix(k, x.prefix)) { + if err != nil { x.done = true x.k = nil x.v = nil diff --git a/index/store/moss/reader.go b/index/store/moss/reader.go index 277200b8..946dbbbf 100644 --- a/index/store/moss/reader.go +++ b/index/store/moss/reader.go @@ -12,6 +12,8 @@ package moss import ( + "math/big" + "github.com/couchbase/moss" "github.com/blevesearch/bleve/index/store" @@ -37,19 +39,22 @@ func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { return store.MultiGet(r, keys) } +var bigOne = big.NewInt(1) + func (r *Reader) PrefixIterator(k []byte) store.KVIterator { - iter, err := r.ss.StartIterator(k, nil, moss.IteratorOptions{}) + kEnd := big.NewInt(0).Add(big.NewInt(0).SetBytes(k), bigOne).Bytes() + + iter, err := r.ss.StartIterator(k, kEnd, moss.IteratorOptions{}) if err != nil { return nil } rv := &Iterator{ - store: r.store, - ss: r.ss, - iter: iter, - prefix: k, - start: k, - end: nil, + store: r.store, + ss: r.ss, + iter: iter, + start: k, + end: kEnd, } rv.checkDone() @@ -64,12 +69,11 @@ func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { } rv := &Iterator{ - store: r.store, - ss: r.ss, - iter: iter, - prefix: nil, - start: start, - end: end, + store: r.store, + ss: r.ss, + iter: iter, + start: start, + end: end, } rv.checkDone() From e33ae65cd268b80b29af68367e5970f5a0a64cd9 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Thu, 21 Jul 2016 19:41:10 -0700 Subject: [PATCH 12/14] optimize SqrtCache as just-an-array --- search/scorers/sqrt_cache.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/search/scorers/sqrt_cache.go b/search/scorers/sqrt_cache.go index f93d27cc..d444c25c 100644 --- a/search/scorers/sqrt_cache.go +++ b/search/scorers/sqrt_cache.go @@ -13,12 +13,12 @@ import ( "math" ) -var SqrtCache map[int]float64 +var SqrtCache []float64 const MaxSqrtCache = 64 func init() { - SqrtCache = make(map[int]float64, MaxSqrtCache) + SqrtCache = make([]float64, MaxSqrtCache) for i := 0; i < MaxSqrtCache; i++ { SqrtCache[i] = math.Sqrt(float64(i)) } From 3c82086805892fd9269cbc495578b5baad2dda87 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Fri, 22 Jul 2016 16:49:33 -0700 Subject: [PATCH 13/14] optimize upside_down reader & 64-bit struct alignments The UpsideDownCouchTermFieldReader.Next() only needs the doc ID from the key, so this change provides a specialized parseKDoc() method for that optimization. Additionally, fields in various structs are more 64-bit aligned, in an attempt to reduce the invocations of runtime.typedmemmove() and runtime.heapBitsBulkBarrier(), which the go compiler seems to automatically insert to transparently handle misaligned data. --- index/upside_down/reader.go | 20 ++++++++++++-------- index/upside_down/row.go | 27 ++++++++++++++++++++------- search/searchers/search_term.go | 2 +- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index 4b01f927..5b3c2959 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -17,12 +17,12 @@ import ( ) type UpsideDownCouchTermFieldReader struct { + count uint64 indexReader *IndexReader iterator store.KVIterator - count uint64 term []byte + tfrNext *TermFrequencyRow field uint16 - tfrNext TermFrequencyRow } func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) { @@ -34,9 +34,10 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi if val == nil { atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) return &UpsideDownCouchTermFieldReader{ - count: 0, - term: term, - field: field, + count: 0, + term: term, + tfrNext: &TermFrequencyRow{}, + field: field, }, nil } @@ -54,6 +55,7 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi iterator: it, count: dictionaryRow.count, term: term, + tfrNext: &TermFrequencyRow{}, field: field, }, nil } @@ -66,8 +68,8 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* if r.iterator != nil { key, val, valid := r.iterator.Current() if valid { - tfr := &r.tfrNext - err := tfr.parseK(key) + tfr := r.tfrNext + err := tfr.parseKDoc(key) if err != nil { return nil, err } @@ -82,7 +84,9 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* rv.ID = string(tfr.doc) rv.Freq = tfr.freq rv.Norm = float64(tfr.norm) - rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) + if tfr.vectors != nil { + rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) + } r.iterator.Next() return rv, nil } diff --git a/index/upside_down/row.go b/index/upside_down/row.go index 1a44126b..7327f5e3 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -350,11 +350,11 @@ func (tv *TermVector) String() string { type TermFrequencyRow struct { term []byte - field uint16 doc []byte freq uint64 - norm float32 vectors []*TermVector + norm float32 + field uint16 } func (tfr *TermFrequencyRow) Term() []byte { @@ -504,7 +504,7 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error { } tfr.term = key[3 : 3+termEndPos] - docLen := len(key) - (3 + termEndPos + 1) + docLen := keyLen - (3 + termEndPos + 1) if docLen < 1 { return fmt.Errorf("invalid term frequency key, empty docid") } @@ -513,14 +513,27 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error { return nil } +func (tfr *TermFrequencyRow) parseKDoc(key []byte) error { + termEndPos := bytes.IndexByte(key[3:], ByteSeparator) + if termEndPos < 0 { + return fmt.Errorf("invalid term frequency key, no byte separator terminating term") + } + + tfr.doc = key[3+termEndPos+1:] + if len(tfr.doc) <= 0 { + return fmt.Errorf("invalid term frequency key, empty docid") + } + + return nil +} + func (tfr *TermFrequencyRow) parseV(value []byte) error { - currOffset := 0 - bytesRead := 0 - tfr.freq, bytesRead = binary.Uvarint(value[currOffset:]) + var bytesRead int + tfr.freq, bytesRead = binary.Uvarint(value) if bytesRead <= 0 { return fmt.Errorf("invalid term frequency value, invalid frequency") } - currOffset += bytesRead + currOffset := bytesRead var norm uint64 norm, bytesRead = binary.Uvarint(value[currOffset:]) diff --git a/search/searchers/search_term.go b/search/searchers/search_term.go index c0d8bcc5..7baa649b 100644 --- a/search/searchers/search_term.go +++ b/search/searchers/search_term.go @@ -19,10 +19,10 @@ type TermSearcher struct { indexReader index.IndexReader term string field string - explain bool reader index.TermFieldReader scorer *scorers.TermQueryScorer tfd index.TermFieldDoc + explain bool } func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) { From 4822cff63acd6f4884e2e03256ae653996b896e9 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Fri, 29 Jul 2016 14:14:58 -0700 Subject: [PATCH 14/14] optimize Advance() with pre-allocated in-out param This perf-related change helps the code and API reach more similarity with the Next() methods, which now take a pre-allocate param. --- index/index.go | 2 +- index/upside_down/reader.go | 18 +++++++++++------- index/upside_down/reader_test.go | 6 +++--- search/collectors/search_test.go | 2 +- search/search.go | 2 +- search/searchers/search_boolean.go | 14 +++++++------- search/searchers/search_conjunction.go | 8 ++++---- search/searchers/search_disjunction.go | 6 +++--- search/searchers/search_disjunction_test.go | 2 +- search/searchers/search_docid.go | 4 ++-- search/searchers/search_docid_test.go | 4 ++-- search/searchers/search_fuzzy.go | 4 ++-- search/searchers/search_match_all.go | 2 +- search/searchers/search_match_none.go | 2 +- search/searchers/search_numeric_range.go | 4 ++-- search/searchers/search_phrase.go | 6 +++--- search/searchers/search_regexp.go | 4 ++-- search/searchers/search_term.go | 6 +++--- search/searchers/search_term_prefix.go | 4 ++-- search/searchers/search_term_test.go | 4 ++-- 20 files changed, 54 insertions(+), 50 deletions(-) diff --git a/index/index.go b/index/index.go index e643ef10..bf9f394a 100644 --- a/index/index.go +++ b/index/index.go @@ -117,7 +117,7 @@ type TermFieldReader interface { // Advance resets the enumeration at specified document or its immediate // follower. - Advance(ID string) (*TermFieldDoc, error) + Advance(ID string, preAlloced *TermFieldDoc) (*TermFieldDoc, error) // Count returns the number of documents contains the term in this field. Count() uint64 diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index 5b3c2959..53007ea0 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -94,7 +94,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* return nil, nil } -func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) { +func (r *UpsideDownCouchTermFieldReader) Advance(docID string, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { if r.iterator != nil { tfr := NewTermFrequencyRow(r.term, r.field, []byte(docID), 0, 0) r.iterator.Seek(tfr.Key()) @@ -104,14 +104,18 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermField if err != nil { return nil, err } - rv := index.TermFieldDoc{ - ID: string(tfr.doc), - Freq: tfr.freq, - Norm: float64(tfr.norm), - Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors), + rv := preAlloced + if rv == nil { + rv = &index.TermFieldDoc{} + } + rv.ID = string(tfr.doc) + rv.Freq = tfr.freq + rv.Norm = float64(tfr.norm) + if tfr.vectors != nil { + rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) } r.iterator.Next() - return &rv, nil + return rv, nil } } return nil, nil diff --git a/index/upside_down/reader_test.go b/index/upside_down/reader_test.go index 60b8156c..124b1462 100644 --- a/index/upside_down/reader_test.go +++ b/index/upside_down/reader_test.go @@ -145,7 +145,7 @@ func TestIndexReader(t *testing.T) { t.Errorf("Error accessing term field reader: %v", err) } - match, err = reader.Advance("2") + match, err = reader.Advance("2", nil) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -155,7 +155,7 @@ func TestIndexReader(t *testing.T) { if match.ID != "2" { t.Errorf("Expected ID '2', got '%s'", match.ID) } - match, err = reader.Advance("3") + match, err = reader.Advance("3", nil) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -183,7 +183,7 @@ func TestIndexReader(t *testing.T) { if match != nil { t.Errorf("expected nil, got %v", match) } - match, err = reader.Advance("anywhere") + match, err = reader.Advance("anywhere", nil) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/search/collectors/search_test.go b/search/collectors/search_test.go index 4c235444..629a1eca 100644 --- a/search/collectors/search_test.go +++ b/search/collectors/search_test.go @@ -27,7 +27,7 @@ func (ss *stubSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docume return nil, nil } -func (ss *stubSearcher) Advance(ID string) (*search.DocumentMatch, error) { +func (ss *stubSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { for ss.index < len(ss.matches) && ss.matches[ss.index].ID < ID { ss.index++ diff --git a/search/search.go b/search/search.go index 55bde5b1..4b3b988b 100644 --- a/search/search.go +++ b/search/search.go @@ -98,7 +98,7 @@ func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j]. type Searcher interface { Next(preAllocated *DocumentMatch) (*DocumentMatch, error) - Advance(ID string) (*DocumentMatch, error) + Advance(ID string, preAllocated *DocumentMatch) (*DocumentMatch, error) Close() error Weight() float64 SetQueryNorm(float64) diff --git a/search/searchers/search_boolean.go b/search/searchers/search_boolean.go index 00371b29..0ae96483 100644 --- a/search/searchers/search_boolean.go +++ b/search/searchers/search_boolean.go @@ -163,7 +163,7 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu for s.currentID != "" { if s.currMustNot != nil && s.currMustNot.ID < s.currentID { // advance must not searcher to our candidate entry - s.currMustNot, err = s.mustNotSearcher.Advance(s.currentID) + s.currMustNot, err = s.mustNotSearcher.Advance(s.currentID, nil) if err != nil { return nil, err } @@ -186,7 +186,7 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu if s.currShould != nil && s.currShould.ID < s.currentID { // advance should searcher to our candidate entry - s.currShould, err = s.shouldSearcher.Advance(s.currentID) + s.currShould, err = s.shouldSearcher.Advance(s.currentID, nil) if err != nil { return nil, err } @@ -255,7 +255,7 @@ func (s *BooleanSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docu return rv, nil } -func (s *BooleanSearcher) Advance(ID string) (*search.DocumentMatch, error) { +func (s *BooleanSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers() @@ -266,19 +266,19 @@ func (s *BooleanSearcher) Advance(ID string) (*search.DocumentMatch, error) { var err error if s.mustSearcher != nil { - s.currMust, err = s.mustSearcher.Advance(ID) + s.currMust, err = s.mustSearcher.Advance(ID, nil) if err != nil { return nil, err } } if s.shouldSearcher != nil { - s.currShould, err = s.shouldSearcher.Advance(ID) + s.currShould, err = s.shouldSearcher.Advance(ID, nil) if err != nil { return nil, err } } if s.mustNotSearcher != nil { - s.currMustNot, err = s.mustNotSearcher.Advance(ID) + s.currMustNot, err = s.mustNotSearcher.Advance(ID, nil) if err != nil { return nil, err } @@ -292,7 +292,7 @@ func (s *BooleanSearcher) Advance(ID string) (*search.DocumentMatch, error) { s.currentID = "" } - return s.Next(nil) + return s.Next(preAllocated) } func (s *BooleanSearcher) Count() uint64 { diff --git a/search/searchers/search_conjunction.go b/search/searchers/search_conjunction.go index 45685c42..fe37b80b 100644 --- a/search/searchers/search_conjunction.go +++ b/search/searchers/search_conjunction.go @@ -117,7 +117,7 @@ OUTER: continue OUTER } // this reader doesn't have the currentID, try to advance - s.currs[i], err = termSearcher.Advance(s.currentID) + s.currs[i], err = termSearcher.Advance(s.currentID, nil) if err != nil { return nil, err } @@ -155,7 +155,7 @@ OUTER: return rv, nil } -func (s *ConjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) { +func (s *ConjunctionSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers() if err != nil { @@ -164,13 +164,13 @@ func (s *ConjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) } var err error for i, searcher := range s.searchers { - s.currs[i], err = searcher.Advance(ID) + s.currs[i], err = searcher.Advance(ID, nil) if err != nil { return nil, err } } s.currentID = ID - return s.Next(nil) + return s.Next(preAllocated) } func (s *ConjunctionSearcher) Count() uint64 { diff --git a/search/searchers/search_disjunction.go b/search/searchers/search_disjunction.go index cf3b16d7..d7cd9408 100644 --- a/search/searchers/search_disjunction.go +++ b/search/searchers/search_disjunction.go @@ -164,7 +164,7 @@ func (s *DisjunctionSearcher) Next(preAllocated *search.DocumentMatch) (*search. return rv, nil } -func (s *DisjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) { +func (s *DisjunctionSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers() if err != nil { @@ -174,7 +174,7 @@ func (s *DisjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) // get all searchers pointing at their first match var err error for i, termSearcher := range s.searchers { - s.currs[i], err = termSearcher.Advance(ID) + s.currs[i], err = termSearcher.Advance(ID, nil) if err != nil { return nil, err } @@ -182,7 +182,7 @@ func (s *DisjunctionSearcher) Advance(ID string) (*search.DocumentMatch, error) s.currentID = s.nextSmallestID() - return s.Next(nil) + return s.Next(preAllocated) } func (s *DisjunctionSearcher) Count() uint64 { diff --git a/search/searchers/search_disjunction_test.go b/search/searchers/search_disjunction_test.go index 019f61c7..19b0dd42 100644 --- a/search/searchers/search_disjunction_test.go +++ b/search/searchers/search_disjunction_test.go @@ -158,7 +158,7 @@ func TestDisjunctionAdvance(t *testing.T) { t.Fatal(err) } - match, err := martyOrDustinSearcher.Advance("3") + match, err := martyOrDustinSearcher.Advance("3", nil) if err != nil { t.Errorf("unexpected error: %v", err) } diff --git a/search/searchers/search_docid.go b/search/searchers/search_docid.go index a1dc113e..262583d0 100644 --- a/search/searchers/search_docid.go +++ b/search/searchers/search_docid.go @@ -88,9 +88,9 @@ func (s *DocIDSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docume } -func (s *DocIDSearcher) Advance(ID string) (*search.DocumentMatch, error) { +func (s *DocIDSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { s.current = sort.SearchStrings(s.ids, ID) - return s.Next(nil) + return s.Next(preAllocated) } func (s *DocIDSearcher) Close() error { diff --git a/search/searchers/search_docid_test.go b/search/searchers/search_docid_test.go index 38390114..ad89288b 100644 --- a/search/searchers/search_docid_test.go +++ b/search/searchers/search_docid_test.go @@ -91,7 +91,7 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { } before := id[:1] for _, target := range []string{before, id} { - m, err := searcher.Advance(target) + m, err := searcher.Advance(target, nil) if err != nil { t.Fatal(err) } @@ -102,7 +102,7 @@ func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) { } // Seek after the end of the sequence after := "zzz" - m, err = searcher.Advance(after) + m, err = searcher.Advance(after, nil) if err != nil { t.Fatal(err) } diff --git a/search/searchers/search_fuzzy.go b/search/searchers/search_fuzzy.go index 469d9187..da328a7e 100644 --- a/search/searchers/search_fuzzy.go +++ b/search/searchers/search_fuzzy.go @@ -112,8 +112,8 @@ func (s *FuzzySearcher) Next(preAllocated *search.DocumentMatch) (*search.Docume } -func (s *FuzzySearcher) Advance(ID string) (*search.DocumentMatch, error) { - return s.searcher.Advance(ID) +func (s *FuzzySearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + return s.searcher.Advance(ID, preAllocated) } func (s *FuzzySearcher) Close() error { diff --git a/search/searchers/search_match_all.go b/search/searchers/search_match_all.go index f55092c8..bd26ba13 100644 --- a/search/searchers/search_match_all.go +++ b/search/searchers/search_match_all.go @@ -63,7 +63,7 @@ func (s *MatchAllSearcher) Next(preAllocated *search.DocumentMatch) (*search.Doc } -func (s *MatchAllSearcher) Advance(ID string) (*search.DocumentMatch, error) { +func (s *MatchAllSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { id, err := s.reader.Advance(ID) if err != nil { return nil, err diff --git a/search/searchers/search_match_none.go b/search/searchers/search_match_none.go index b8621976..0d4f5a9a 100644 --- a/search/searchers/search_match_none.go +++ b/search/searchers/search_match_none.go @@ -40,7 +40,7 @@ func (s *MatchNoneSearcher) Next(preAllocated *search.DocumentMatch) (*search.Do return nil, nil } -func (s *MatchNoneSearcher) Advance(ID string) (*search.DocumentMatch, error) { +func (s *MatchNoneSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { return nil, nil } diff --git a/search/searchers/search_numeric_range.go b/search/searchers/search_numeric_range.go index 56e7a5a0..fdf3c4c0 100644 --- a/search/searchers/search_numeric_range.go +++ b/search/searchers/search_numeric_range.go @@ -100,8 +100,8 @@ func (s *NumericRangeSearcher) Next(preAllocated *search.DocumentMatch) (*search return s.searcher.Next(preAllocated) } -func (s *NumericRangeSearcher) Advance(ID string) (*search.DocumentMatch, error) { - return s.searcher.Advance(ID) +func (s *NumericRangeSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + return s.searcher.Advance(ID, preAllocated) } func (s *NumericRangeSearcher) Close() error { diff --git a/search/searchers/search_phrase.go b/search/searchers/search_phrase.go index c2f7b8f7..11e8c9c8 100644 --- a/search/searchers/search_phrase.go +++ b/search/searchers/search_phrase.go @@ -160,7 +160,7 @@ func (s *PhraseSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docum return nil, nil } -func (s *PhraseSearcher) Advance(ID string) (*search.DocumentMatch, error) { +func (s *PhraseSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers() if err != nil { @@ -168,11 +168,11 @@ func (s *PhraseSearcher) Advance(ID string) (*search.DocumentMatch, error) { } } var err error - s.currMust, err = s.mustSearcher.Advance(ID) + s.currMust, err = s.mustSearcher.Advance(ID, nil) if err != nil { return nil, err } - return s.Next(nil) + return s.Next(preAllocated) } func (s *PhraseSearcher) Count() uint64 { diff --git a/search/searchers/search_regexp.go b/search/searchers/search_regexp.go index c10cd53e..d92822f1 100644 --- a/search/searchers/search_regexp.go +++ b/search/searchers/search_regexp.go @@ -111,8 +111,8 @@ func (s *RegexpSearcher) Next(preAllocated *search.DocumentMatch) (*search.Docum } -func (s *RegexpSearcher) Advance(ID string) (*search.DocumentMatch, error) { - return s.searcher.Advance(ID) +func (s *RegexpSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + return s.searcher.Advance(ID, preAllocated) } func (s *RegexpSearcher) Close() error { diff --git a/search/searchers/search_term.go b/search/searchers/search_term.go index 7baa649b..ff034112 100644 --- a/search/searchers/search_term.go +++ b/search/searchers/search_term.go @@ -70,8 +70,8 @@ func (s *TermSearcher) Next(preAllocated *search.DocumentMatch) (*search.Documen } -func (s *TermSearcher) Advance(ID string) (*search.DocumentMatch, error) { - termMatch, err := s.reader.Advance(ID) +func (s *TermSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + termMatch, err := s.reader.Advance(ID, s.tfd.Reset()) if err != nil { return nil, err } @@ -81,7 +81,7 @@ func (s *TermSearcher) Advance(ID string) (*search.DocumentMatch, error) { } // score match - docMatch := s.scorer.Score(termMatch, nil) + docMatch := s.scorer.Score(termMatch, preAllocated) // return doc match return docMatch, nil diff --git a/search/searchers/search_term_prefix.go b/search/searchers/search_term_prefix.go index 42f6e18d..35f34722 100644 --- a/search/searchers/search_term_prefix.go +++ b/search/searchers/search_term_prefix.go @@ -75,8 +75,8 @@ func (s *TermPrefixSearcher) Next(preAllocated *search.DocumentMatch) (*search.D } -func (s *TermPrefixSearcher) Advance(ID string) (*search.DocumentMatch, error) { - return s.searcher.Advance(ID) +func (s *TermPrefixSearcher) Advance(ID string, preAllocated *search.DocumentMatch) (*search.DocumentMatch, error) { + return s.searcher.Advance(ID, preAllocated) } func (s *TermPrefixSearcher) Close() error { diff --git a/search/searchers/search_term_test.go b/search/searchers/search_term_test.go index 1c309e47..ef3b927f 100644 --- a/search/searchers/search_term_test.go +++ b/search/searchers/search_term_test.go @@ -170,7 +170,7 @@ func TestTermSearcher(t *testing.T) { if docMatch.ID != "a" { t.Errorf("expected result ID to be 'a', got '%s", docMatch.ID) } - docMatch, err = searcher.Advance("c") + docMatch, err = searcher.Advance("c", nil) if err != nil { t.Errorf("expected result, got %v", err) } @@ -179,7 +179,7 @@ func TestTermSearcher(t *testing.T) { } // try advancing past end - docMatch, err = searcher.Advance("z") + docMatch, err = searcher.Advance("z", nil) if err != nil { t.Fatal(err) }