// Copyright (c) 2014 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package test import ( "bytes" "encoding/json" "fmt" "math" "math/rand" "os" "reflect" "strconv" "strings" "testing" "text/template" "github.com/blevesearch/bleve" "github.com/blevesearch/bleve/index/scorch" "github.com/blevesearch/bleve/index/store/boltdb" "github.com/blevesearch/bleve/index/upsidedown" "github.com/blevesearch/bleve/mapping" "github.com/blevesearch/bleve/search" ) // Tests scorch indexer versus upsidedown/bolt indexer against various // templated queries. Example usage from the bleve top-level directory... // // go test -v -run TestScorchVersusUpsideDownBolt ./test // VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test // func TestScorchVersusUpsideDownBoltAll(t *testing.T) { (&VersusTest{ t: t, NumDocs: 1000, MaxWordsPerDoc: 20, NumWords: 10, BatchSize: 10, NumAttemptsPerSearch: 100, }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) } func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) { (&VersusTest{ t: t, Focus: "must-not-same-as-must", NumDocs: 5, MaxWordsPerDoc: 2, NumWords: 1, BatchSize: 1, NumAttemptsPerSearch: 1, }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) } func TestScorchVersusUpsideDownBoltSmallCMP11(t *testing.T) { (&VersusTest{ t: t, Focus: "conjuncts-match-phrase-1-1", NumDocs: 30, MaxWordsPerDoc: 8, NumWords: 2, BatchSize: 1, NumAttemptsPerSearch: 1, }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) } // ------------------------------------------------------- // Templates used to compare search results in the "versus" tests. var testVersusSearchTemplates = []string{ `{ "about": "expected to return zero hits", "query": { "query": "title:notARealTitle" } }`, `{ "about": "try straight word()'s", "query": { "query": "body:{{word}}" } }`, `{ "about": "conjuncts on same term", "query": { "conjuncts": [ { "field": "body", "term": "{{word}}", "boost": 1.0 }, { "field": "body", "term": "{{word}}", "boost": 1.0 } ] } }`, `{ "about": "disjuncts on same term", "query": { "disjuncts": [ { "field": "body", "term": "{{word}}", "boost": 1.0 }, { "field": "body", "term": "{{word}}", "boost": 1.0 } ] } }`, `{ "about": "never-matching-title-conjuncts", "query": { "conjuncts": [ {"field": "body", "match": "{{word}}"}, {"field": "body", "match": "{{word}}"}, {"field": "title", "match": "notAnActualTitle"} ] } }`, `{ "about": "never-matching-title-disjuncts", "query": { "disjuncts": [ {"field": "body", "match": "{{word}}"}, {"field": "body", "match": "{{word}}"}, {"field": "title", "match": "notAnActualTitle"} ] } }`, `{ "about": "must-not-never-matches", "query": { "must_not": {"disjuncts": [ {"field": "title", "match": "notAnActualTitle"} ]}, "should": {"disjuncts": [ {"field": "body", "match": "{{word}}"} ]} } }`, `{ "about": "must-not-only", "query": { "must_not": {"disjuncts": [ {"field": "body", "term": "{{word}}"} ]} } }`, `{ "about": "must-not-same-as-must -- see: MB-27291", "query": { "must_not": {"disjuncts": [ {"field": "body", "match": "{{word}}"} ]}, "must": {"conjuncts": [ {"field": "body", "match": "{{word}}"} ]} } }`, `{ "about": "must-not-same-as-should", "query": { "must_not": {"disjuncts": [ {"field": "body", "match": "{{word}}"} ]}, "should": {"disjuncts": [ {"field": "body", "match": "{{word}}"} ]} } }`, `{ "about": "inspired by testrunner RQG issue -- see: MB-27291", "query": { "must_not": {"disjuncts": [ {"field": "title", "match": "Trista Allen"}, {"field": "body", "match": "{{word}}"} ]}, "should": {"disjuncts": [ {"field": "title", "match": "Kallie Safiya Amara"}, {"field": "body", "match": "{{word}}"} ]} } }`, `{ "about": "conjuncts-match-phrase-1-1 inspired by testrunner RQG issue -- see: MB-27291", "query": { "conjuncts": [ {"field": "body", "match": "{{bodyWord 0}}"}, {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 1}}"} ] } }`, `{ "about": "conjuncts-match-phrase-1-2 inspired by testrunner RQG issue -- see: MB-27291 -- FAILS!!", "query": { "conjuncts": [ {"field": "body", "match": "{{bodyWord 0}}"}, {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 2}}"} ] } }`, } // ------------------------------------------------------- type VersusTest struct { t *testing.T // Use environment variable VERBOSE= that's > 0 for more // verbose output. Verbose int // Allow user to focus on particular search templates, where // where the search template must contain the Focus string. Focus string NumDocs int // Number of docs to insert. MaxWordsPerDoc int // Max number words in each doc's Body field. NumWords int // Total number of words in the dictionary. BatchSize int // Batch size when inserting docs. NumAttemptsPerSearch int // For each search template, number of searches to try. // The Bodies is an array with length NumDocs, where each entry // is the words in a doc's Body field. Bodies [][]string CurAttempt int TotAttempts int } // ------------------------------------------------------- func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB bleve.Index) { t := vt.t funcMap := template.FuncMap{ // Returns a word. The word may or may not be in any // document's body. "word": func() string { return vt.genWord(vt.CurAttempt % vt.NumWords) }, // Picks a document and returns the i'th word in that // document's body. You can use this in searches to // definitely find at least one document. "bodyWord": func(i int) string { body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)] if len(body) <= 0 { return "" } return body[i%len(body)] }, } // Optionally allow call to focus on a particular search templates, // where the search template must contain the vt.Focus string. if vt.Focus == "" { vt.Focus = os.Getenv("FOCUS") } for i, searchTemplate := range searchTemplates { if vt.Focus != "" && !strings.Contains(searchTemplate, vt.Focus) { continue } tmpl, err := template.New("search").Funcs(funcMap).Parse(searchTemplate) if err != nil { t.Fatalf("could not parse search template: %s, err: %v", searchTemplate, err) } for j := 0; j < vt.NumAttemptsPerSearch; j++ { vt.CurAttempt = j var buf bytes.Buffer err = tmpl.Execute(&buf, vt) if err != nil { t.Fatalf("could not execute search template: %s, err: %v", searchTemplate, err) } bufBytes := buf.Bytes() if vt.Verbose > 0 { fmt.Printf(" %s\n", bufBytes) } var search bleve.SearchRequest err = json.Unmarshal(bufBytes, &search) if err != nil { t.Fatalf("could not unmarshal search: %s, err: %v", bufBytes, err) } search.Size = vt.NumDocs * 10 // Crank up limit to get all results. searchA := search searchB := search resA, errA := idxA.Search(&searchA) resB, errB := idxB.Search(&searchB) if errA != errB { t.Errorf("search: (%d) %s,\n err mismatch, errA: %v, errB: %v", i, bufBytes, errA, errB) } // Scores might have float64 vs float32 wobbles, so truncate precision. resA.MaxScore = math.Trunc(resA.MaxScore*1000.0) / 1000.0 resB.MaxScore = math.Trunc(resB.MaxScore*1000.0) / 1000.0 // Timings may be different between A & B, so force equality. resA.Took = resB.Took // Hits might have different ordering since some indexers // (like upsidedown) have a natural secondary sort on id // while others (like scorch) don't. So, we compare by // putting the hits from A & B into maps. hitsA := hitsById(resA) hitsB := hitsById(resB) if !reflect.DeepEqual(hitsA, hitsB) { t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d", i, bufBytes, len(hitsA), len(hitsB)) t.Errorf("\n hitsA: %#v,\n hitsB: %#v", hitsA, hitsB) for id, hitA := range hitsA { hitB := hitsB[id] if !reflect.DeepEqual(hitA, hitB) { t.Errorf("\n driving from hitsA\n hitA: %#v,\n hitB: %#v", hitA, hitB) idx, _ := strconv.Atoi(id) t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " ")) } } for id, hitB := range hitsB { hitA := hitsA[id] if !reflect.DeepEqual(hitA, hitB) { t.Errorf("\n driving from hitsB\n hitA: %#v,\n hitB: %#v", hitA, hitB) idx, _ := strconv.Atoi(id) t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " ")) } } } resA.Hits = nil resB.Hits = nil if !reflect.DeepEqual(resA, resB) { resAj, _ := json.Marshal(resA) resBj, _ := json.Marshal(resB) t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s", i, bufBytes, resAj, resBj) } if vt.Verbose > 0 { fmt.Printf(" Total: (%t) %d\n", resA.Total == resB.Total, resA.Total) } vt.TotAttempts++ } } } // Organizes the hits into a map keyed by id. func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch { rv := make(map[string]*search.DocumentMatch, len(res.Hits)) for _, hit := range res.Hits { // Clear out or truncate precision of hit fields that might be // different across different indexer implementations. hit.Index = "" hit.Score = math.Trunc(hit.Score*1000.0) / 1000.0 hit.IndexInternalID = nil hit.HitNumber = 0 rv[hit.ID] = hit } return rv } // ------------------------------------------------------- func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string, cb func(versusTest *VersusTest, searchTemplates []string, idxA, idxB bleve.Index), searchTemplates []string) { if cb == nil { cb = testVersusSearches } if searchTemplates == nil { searchTemplates = testVersusSearchTemplates } if vt.Verbose <= 0 { vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE")) } dirA := "/tmp/bleve-versus-test-a" dirB := "/tmp/bleve-versus-test-b" defer func() { _ = os.RemoveAll(dirA) _ = os.RemoveAll(dirB) }() _ = os.RemoveAll(dirA) _ = os.RemoveAll(dirB) imA := vt.makeIndexMapping() imB := vt.makeIndexMapping() kvConfigA := map[string]interface{}{} kvConfigB := map[string]interface{}{} idxA, err := bleve.NewUsing(dirA, imA, indexTypeA, kvStoreA, kvConfigA) if err != nil || idxA == nil { vt.t.Fatalf("new using err: %v", err) } defer func() { _ = idxA.Close() }() idxB, err := bleve.NewUsing(dirB, imB, indexTypeB, kvStoreB, kvConfigB) if err != nil || idxB == nil { vt.t.Fatalf("new using err: %v", err) } defer func() { _ = idxB.Close() }() rand.Seed(0) if vt.Bodies == nil { vt.Bodies = vt.genBodies() } vt.insertBodies(idxA) vt.insertBodies(idxB) cb(vt, searchTemplates, idxA, idxB) } // ------------------------------------------------------- func (vt *VersusTest) makeIndexMapping() mapping.IndexMapping { standardFM := bleve.NewTextFieldMapping() standardFM.Store = false standardFM.IncludeInAll = false standardFM.IncludeTermVectors = true standardFM.Analyzer = "standard" dm := bleve.NewDocumentMapping() dm.AddFieldMappingsAt("title", standardFM) dm.AddFieldMappingsAt("body", standardFM) im := bleve.NewIndexMapping() im.DefaultMapping = dm im.DefaultAnalyzer = "standard" return im } func (vt *VersusTest) insertBodies(idx bleve.Index) { batch := idx.NewBatch() for i, bodyWords := range vt.Bodies { title := fmt.Sprintf("%d", i) body := strings.Join(bodyWords, " ") err := batch.Index(title, map[string]interface{}{"title": title, "body": body}) if err != nil { vt.t.Fatalf("batch.Index err: %v", err) } if i%vt.BatchSize == 0 { err = idx.Batch(batch) if err != nil { vt.t.Fatalf("batch err: %v", err) } batch.Reset() } } err := idx.Batch(batch) if err != nil { vt.t.Fatalf("last batch err: %v", err) } } func (vt *VersusTest) genBodies() (rv [][]string) { for i := 0; i < vt.NumDocs; i++ { rv = append(rv, vt.genBody()) } return rv } func (vt *VersusTest) genBody() (rv []string) { m := rand.Intn(vt.MaxWordsPerDoc) for j := 0; j < m; j++ { rv = append(rv, vt.genWord(rand.Intn(vt.NumWords))) } return rv } func (vt *VersusTest) genWord(i int) string { return fmt.Sprintf("%x", i) }