From c7a342bc7d69414ea651f3b4ea7a0ec1d1586b49 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Fri, 22 Dec 2017 10:28:26 -0800 Subject: [PATCH] scorch conjuncts match phrase test passes The conjunction searcher Advance() method now checks if its curr doc-matches suffices before advancing them. --- index/scorch/snapshot_index.go | 1 + index/scorch/snapshot_index_tfr.go | 15 ++--- search/searcher/search_conjunction.go | 3 + search/searcher/search_phrase.go | 6 ++ test/versus_test.go | 83 ++++++++++++++++++++++----- 5 files changed, 88 insertions(+), 20 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 6089a771..5f08a496 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -343,6 +343,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv := &IndexSnapshotTermFieldReader{ term: term, + field: field, snapshot: i, postings: make([]segment.PostingsList, len(i.segment)), iterators: make([]segment.PostingsIterator, len(i.segment)), diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index 25cc0bd0..497b83dd 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -23,6 +23,7 @@ import ( type IndexSnapshotTermFieldReader struct { term []byte + field string snapshot *IndexSnapshot postings []segment.PostingsList iterators []segment.PostingsIterator @@ -84,15 +85,15 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin } func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { - // first make sure we aren't already pointing at the right thing, (due to way searchers work) + // FIXME do something better + // for now, if we need to seek backwards, then restart from the beginning if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 { - rv := preAlloced - if rv == nil { - rv = &index.TermFieldDoc{} + i2, err := i.snapshot.TermFieldReader(i.term, i.field, + i.includeFreq, i.includeNorm, i.includeTermVectors) + if err != nil { + return nil, err } - rv.ID = i.currID - i.postingToTermFieldDoc(i.currPosting, rv) - return rv, nil + *i = *(i2.(*IndexSnapshotTermFieldReader)) } // FIXME do something better next, err := i.Next(preAlloced) diff --git a/search/searcher/search_conjunction.go b/search/searcher/search_conjunction.go index d7a873ff..73fba19c 100644 --- a/search/searcher/search_conjunction.go +++ b/search/searcher/search_conjunction.go @@ -184,6 +184,9 @@ func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexI } } for i := range s.searchers { + if s.currs[i] != nil && s.currs[i].IndexInternalID.Compare(ID) >= 0 { + continue + } err := s.advanceChild(ctx, i, ID) if err != nil { return nil, err diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go index 6ff592ef..552dfabe 100644 --- a/search/searcher/search_phrase.go +++ b/search/searcher/search_phrase.go @@ -313,6 +313,12 @@ func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexIntern return nil, err } } + if s.currMust != nil { + if s.currMust.IndexInternalID.Compare(ID) >= 0 { + return s.Next(ctx) + } + ctx.DocumentMatchPool.Put(s.currMust) + } var err error s.currMust, err = s.mustSearcher.Advance(ctx, ID) if err != nil { diff --git a/test/versus_test.go b/test/versus_test.go index de4123ca..70463a93 100644 --- a/test/versus_test.go +++ b/test/versus_test.go @@ -41,7 +41,7 @@ import ( // go test -v -run TestScorchVersusUpsideDownBolt ./test // VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test // -func TestScorchVersusUpsideDownBolt(t *testing.T) { +func TestScorchVersusUpsideDownBoltAll(t *testing.T) { (&VersusTest{ t: t, NumDocs: 1000, @@ -49,7 +49,7 @@ func TestScorchVersusUpsideDownBolt(t *testing.T) { NumWords: 10, BatchSize: 10, NumAttemptsPerSearch: 100, - }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil) + }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) } func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) { @@ -61,13 +61,25 @@ func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) { NumWords: 1, BatchSize: 1, NumAttemptsPerSearch: 1, - }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil) + }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) +} + +func TestScorchVersusUpsideDownBoltSmallCMP11(t *testing.T) { + (&VersusTest{ + t: t, + Focus: "conjuncts-match-phrase-1-1", + NumDocs: 30, + MaxWordsPerDoc: 8, + NumWords: 2, + BatchSize: 1, + NumAttemptsPerSearch: 1, + }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) } // ------------------------------------------------------- // Templates used to compare search results in the "versus" tests. -var searchTemplates = []string{ +var testVersusSearchTemplates = []string{ `{ "about": "expected to return zero hits", "query": { @@ -130,7 +142,7 @@ var searchTemplates = []string{ } }`, `{ - "about": "must-not-only -- FAILS!!!", + "about": "must-not-only", "query": { "must_not": {"disjuncts": [ {"field": "body", "term": "{{word}}"} @@ -172,6 +184,24 @@ var searchTemplates = []string{ ]} } }`, + `{ + "about": "conjuncts-match-phrase-1-1 inspired by testrunner RQG issue -- see: MB-27291", + "query": { + "conjuncts": [ + {"field": "body", "match": "{{bodyWord 0}}"}, + {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 1}}"} + ] + } + }`, + `{ + "about": "conjuncts-match-phrase-1-2 inspired by testrunner RQG issue -- see: MB-27291 -- FAILS!!", + "query": { + "conjuncts": [ + {"field": "body", "match": "{{bodyWord 0}}"}, + {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 2}}"} + ] + } + }`, } // ------------------------------------------------------- @@ -203,13 +233,25 @@ type VersusTest struct { // ------------------------------------------------------- -func testVersusSearches(vt *VersusTest, idxA, idxB bleve.Index) { +func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB bleve.Index) { t := vt.t funcMap := template.FuncMap{ + // Returns a word. The word may or may not be in any + // document's body. "word": func() string { return vt.genWord(vt.CurAttempt % vt.NumWords) }, + // Picks a document and returns the i'th word in that + // document's body. You can use this in searches to + // definitely find at least one document. + "bodyWord": func(i int) string { + body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)] + if len(body) <= 0 { + return "" + } + return body[i%len(body)] + }, } // Optionally allow call to focus on a particular search templates, @@ -275,16 +317,24 @@ func testVersusSearches(vt *VersusTest, idxA, idxB bleve.Index) { hitsA := hitsById(resA) hitsB := hitsById(resB) if !reflect.DeepEqual(hitsA, hitsB) { - t.Errorf("search: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d", + t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d", i, bufBytes, len(hitsA), len(hitsB)) t.Errorf("\n hitsA: %#v,\n hitsB: %#v", hitsA, hitsB) for id, hitA := range hitsA { hitB := hitsB[id] if !reflect.DeepEqual(hitA, hitB) { - t.Errorf("\n hitA: %#v,\n hitB: %#v", hitA, hitB) + t.Errorf("\n driving from hitsA\n hitA: %#v,\n hitB: %#v", hitA, hitB) idx, _ := strconv.Atoi(id) - t.Errorf("\n body: %s", strings.Join(vt.Bodies[idx], " ")) + t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " ")) + } + } + for id, hitB := range hitsB { + hitA := hitsA[id] + if !reflect.DeepEqual(hitA, hitB) { + t.Errorf("\n driving from hitsB\n hitA: %#v,\n hitB: %#v", hitA, hitB) + idx, _ := strconv.Atoi(id) + t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " ")) } } } @@ -295,7 +345,7 @@ func testVersusSearches(vt *VersusTest, idxA, idxB bleve.Index) { if !reflect.DeepEqual(resA, resB) { resAj, _ := json.Marshal(resA) resBj, _ := json.Marshal(resB) - t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s", + t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s", i, bufBytes, resAj, resBj) } @@ -329,11 +379,16 @@ func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch { // ------------------------------------------------------- func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string, - cb func(versusTest *VersusTest, idxA, idxB bleve.Index)) { + cb func(versusTest *VersusTest, searchTemplates []string, idxA, idxB bleve.Index), + searchTemplates []string) { if cb == nil { cb = testVersusSearches } + if searchTemplates == nil { + searchTemplates = testVersusSearchTemplates + } + if vt.Verbose <= 0 { vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE")) } @@ -369,12 +424,14 @@ func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string, rand.Seed(0) - vt.Bodies = vt.genBodies() + if vt.Bodies == nil { + vt.Bodies = vt.genBodies() + } vt.insertBodies(idxA) vt.insertBodies(idxB) - cb(vt, idxA, idxB) + cb(vt, searchTemplates, idxA, idxB) } // -------------------------------------------------------