c7a342bc7d
The conjunction searcher Advance() method now checks if its curr doc-matches suffices before advancing them.
498 lines
14 KiB
Go
498 lines
14 KiB
Go
// Copyright (c) 2014 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package test
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math"
|
|
"math/rand"
|
|
"os"
|
|
"reflect"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
"text/template"
|
|
|
|
"github.com/blevesearch/bleve"
|
|
"github.com/blevesearch/bleve/index/scorch"
|
|
"github.com/blevesearch/bleve/index/store/boltdb"
|
|
"github.com/blevesearch/bleve/index/upsidedown"
|
|
"github.com/blevesearch/bleve/mapping"
|
|
"github.com/blevesearch/bleve/search"
|
|
)
|
|
|
|
// Tests scorch indexer versus upsidedown/bolt indexer against various
|
|
// templated queries. Example usage from the bleve top-level directory...
|
|
//
|
|
// go test -v -run TestScorchVersusUpsideDownBolt ./test
|
|
// VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
|
|
//
|
|
func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
|
|
(&VersusTest{
|
|
t: t,
|
|
NumDocs: 1000,
|
|
MaxWordsPerDoc: 20,
|
|
NumWords: 10,
|
|
BatchSize: 10,
|
|
NumAttemptsPerSearch: 100,
|
|
}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
|
|
}
|
|
|
|
func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) {
|
|
(&VersusTest{
|
|
t: t,
|
|
Focus: "must-not-same-as-must",
|
|
NumDocs: 5,
|
|
MaxWordsPerDoc: 2,
|
|
NumWords: 1,
|
|
BatchSize: 1,
|
|
NumAttemptsPerSearch: 1,
|
|
}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
|
|
}
|
|
|
|
func TestScorchVersusUpsideDownBoltSmallCMP11(t *testing.T) {
|
|
(&VersusTest{
|
|
t: t,
|
|
Focus: "conjuncts-match-phrase-1-1",
|
|
NumDocs: 30,
|
|
MaxWordsPerDoc: 8,
|
|
NumWords: 2,
|
|
BatchSize: 1,
|
|
NumAttemptsPerSearch: 1,
|
|
}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
|
|
}
|
|
|
|
// -------------------------------------------------------
|
|
|
|
// Templates used to compare search results in the "versus" tests.
|
|
var testVersusSearchTemplates = []string{
|
|
`{
|
|
"about": "expected to return zero hits",
|
|
"query": {
|
|
"query": "title:notARealTitle"
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "try straight word()'s",
|
|
"query": {
|
|
"query": "body:{{word}}"
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "conjuncts on same term",
|
|
"query": {
|
|
"conjuncts": [
|
|
{ "field": "body", "term": "{{word}}", "boost": 1.0 },
|
|
{ "field": "body", "term": "{{word}}", "boost": 1.0 }
|
|
]
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "disjuncts on same term",
|
|
"query": {
|
|
"disjuncts": [
|
|
{ "field": "body", "term": "{{word}}", "boost": 1.0 },
|
|
{ "field": "body", "term": "{{word}}", "boost": 1.0 }
|
|
]
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "never-matching-title-conjuncts",
|
|
"query": {
|
|
"conjuncts": [
|
|
{"field": "body", "match": "{{word}}"},
|
|
{"field": "body", "match": "{{word}}"},
|
|
{"field": "title", "match": "notAnActualTitle"}
|
|
]
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "never-matching-title-disjuncts",
|
|
"query": {
|
|
"disjuncts": [
|
|
{"field": "body", "match": "{{word}}"},
|
|
{"field": "body", "match": "{{word}}"},
|
|
{"field": "title", "match": "notAnActualTitle"}
|
|
]
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "must-not-never-matches",
|
|
"query": {
|
|
"must_not": {"disjuncts": [
|
|
{"field": "title", "match": "notAnActualTitle"}
|
|
]},
|
|
"should": {"disjuncts": [
|
|
{"field": "body", "match": "{{word}}"}
|
|
]}
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "must-not-only",
|
|
"query": {
|
|
"must_not": {"disjuncts": [
|
|
{"field": "body", "term": "{{word}}"}
|
|
]}
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "must-not-same-as-must -- see: MB-27291",
|
|
"query": {
|
|
"must_not": {"disjuncts": [
|
|
{"field": "body", "match": "{{word}}"}
|
|
]},
|
|
"must": {"conjuncts": [
|
|
{"field": "body", "match": "{{word}}"}
|
|
]}
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "must-not-same-as-should",
|
|
"query": {
|
|
"must_not": {"disjuncts": [
|
|
{"field": "body", "match": "{{word}}"}
|
|
]},
|
|
"should": {"disjuncts": [
|
|
{"field": "body", "match": "{{word}}"}
|
|
]}
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "inspired by testrunner RQG issue -- see: MB-27291",
|
|
"query": {
|
|
"must_not": {"disjuncts": [
|
|
{"field": "title", "match": "Trista Allen"},
|
|
{"field": "body", "match": "{{word}}"}
|
|
]},
|
|
"should": {"disjuncts": [
|
|
{"field": "title", "match": "Kallie Safiya Amara"},
|
|
{"field": "body", "match": "{{word}}"}
|
|
]}
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "conjuncts-match-phrase-1-1 inspired by testrunner RQG issue -- see: MB-27291",
|
|
"query": {
|
|
"conjuncts": [
|
|
{"field": "body", "match": "{{bodyWord 0}}"},
|
|
{"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 1}}"}
|
|
]
|
|
}
|
|
}`,
|
|
`{
|
|
"about": "conjuncts-match-phrase-1-2 inspired by testrunner RQG issue -- see: MB-27291 -- FAILS!!",
|
|
"query": {
|
|
"conjuncts": [
|
|
{"field": "body", "match": "{{bodyWord 0}}"},
|
|
{"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 2}}"}
|
|
]
|
|
}
|
|
}`,
|
|
}
|
|
|
|
// -------------------------------------------------------
|
|
|
|
type VersusTest struct {
|
|
t *testing.T
|
|
|
|
// Use environment variable VERBOSE=<integer> that's > 0 for more
|
|
// verbose output.
|
|
Verbose int
|
|
|
|
// Allow user to focus on particular search templates, where
|
|
// where the search template must contain the Focus string.
|
|
Focus string
|
|
|
|
NumDocs int // Number of docs to insert.
|
|
MaxWordsPerDoc int // Max number words in each doc's Body field.
|
|
NumWords int // Total number of words in the dictionary.
|
|
BatchSize int // Batch size when inserting docs.
|
|
NumAttemptsPerSearch int // For each search template, number of searches to try.
|
|
|
|
// The Bodies is an array with length NumDocs, where each entry
|
|
// is the words in a doc's Body field.
|
|
Bodies [][]string
|
|
|
|
CurAttempt int
|
|
TotAttempts int
|
|
}
|
|
|
|
// -------------------------------------------------------
|
|
|
|
func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB bleve.Index) {
|
|
t := vt.t
|
|
|
|
funcMap := template.FuncMap{
|
|
// Returns a word. The word may or may not be in any
|
|
// document's body.
|
|
"word": func() string {
|
|
return vt.genWord(vt.CurAttempt % vt.NumWords)
|
|
},
|
|
// Picks a document and returns the i'th word in that
|
|
// document's body. You can use this in searches to
|
|
// definitely find at least one document.
|
|
"bodyWord": func(i int) string {
|
|
body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)]
|
|
if len(body) <= 0 {
|
|
return ""
|
|
}
|
|
return body[i%len(body)]
|
|
},
|
|
}
|
|
|
|
// Optionally allow call to focus on a particular search templates,
|
|
// where the search template must contain the vt.Focus string.
|
|
if vt.Focus == "" {
|
|
vt.Focus = os.Getenv("FOCUS")
|
|
}
|
|
|
|
for i, searchTemplate := range searchTemplates {
|
|
if vt.Focus != "" && !strings.Contains(searchTemplate, vt.Focus) {
|
|
continue
|
|
}
|
|
|
|
tmpl, err := template.New("search").Funcs(funcMap).Parse(searchTemplate)
|
|
if err != nil {
|
|
t.Fatalf("could not parse search template: %s, err: %v", searchTemplate, err)
|
|
}
|
|
|
|
for j := 0; j < vt.NumAttemptsPerSearch; j++ {
|
|
vt.CurAttempt = j
|
|
|
|
var buf bytes.Buffer
|
|
err = tmpl.Execute(&buf, vt)
|
|
if err != nil {
|
|
t.Fatalf("could not execute search template: %s, err: %v", searchTemplate, err)
|
|
}
|
|
|
|
bufBytes := buf.Bytes()
|
|
|
|
if vt.Verbose > 0 {
|
|
fmt.Printf(" %s\n", bufBytes)
|
|
}
|
|
|
|
var search bleve.SearchRequest
|
|
err = json.Unmarshal(bufBytes, &search)
|
|
if err != nil {
|
|
t.Fatalf("could not unmarshal search: %s, err: %v", bufBytes, err)
|
|
}
|
|
|
|
search.Size = vt.NumDocs * 10 // Crank up limit to get all results.
|
|
|
|
searchA := search
|
|
searchB := search
|
|
|
|
resA, errA := idxA.Search(&searchA)
|
|
resB, errB := idxB.Search(&searchB)
|
|
if errA != errB {
|
|
t.Errorf("search: (%d) %s,\n err mismatch, errA: %v, errB: %v",
|
|
i, bufBytes, errA, errB)
|
|
}
|
|
|
|
// Scores might have float64 vs float32 wobbles, so truncate precision.
|
|
resA.MaxScore = math.Trunc(resA.MaxScore*1000.0) / 1000.0
|
|
resB.MaxScore = math.Trunc(resB.MaxScore*1000.0) / 1000.0
|
|
|
|
// Timings may be different between A & B, so force equality.
|
|
resA.Took = resB.Took
|
|
|
|
// Hits might have different ordering since some indexers
|
|
// (like upsidedown) have a natural secondary sort on id
|
|
// while others (like scorch) don't. So, we compare by
|
|
// putting the hits from A & B into maps.
|
|
hitsA := hitsById(resA)
|
|
hitsB := hitsById(resB)
|
|
if !reflect.DeepEqual(hitsA, hitsB) {
|
|
t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d",
|
|
i, bufBytes, len(hitsA), len(hitsB))
|
|
t.Errorf("\n hitsA: %#v,\n hitsB: %#v",
|
|
hitsA, hitsB)
|
|
for id, hitA := range hitsA {
|
|
hitB := hitsB[id]
|
|
if !reflect.DeepEqual(hitA, hitB) {
|
|
t.Errorf("\n driving from hitsA\n hitA: %#v,\n hitB: %#v", hitA, hitB)
|
|
idx, _ := strconv.Atoi(id)
|
|
t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
|
|
}
|
|
}
|
|
for id, hitB := range hitsB {
|
|
hitA := hitsA[id]
|
|
if !reflect.DeepEqual(hitA, hitB) {
|
|
t.Errorf("\n driving from hitsB\n hitA: %#v,\n hitB: %#v", hitA, hitB)
|
|
idx, _ := strconv.Atoi(id)
|
|
t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
|
|
}
|
|
}
|
|
}
|
|
|
|
resA.Hits = nil
|
|
resB.Hits = nil
|
|
|
|
if !reflect.DeepEqual(resA, resB) {
|
|
resAj, _ := json.Marshal(resA)
|
|
resBj, _ := json.Marshal(resB)
|
|
t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s",
|
|
i, bufBytes, resAj, resBj)
|
|
}
|
|
|
|
if vt.Verbose > 0 {
|
|
fmt.Printf(" Total: (%t) %d\n", resA.Total == resB.Total, resA.Total)
|
|
}
|
|
|
|
vt.TotAttempts++
|
|
}
|
|
}
|
|
}
|
|
|
|
// Organizes the hits into a map keyed by id.
|
|
func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch {
|
|
rv := make(map[string]*search.DocumentMatch, len(res.Hits))
|
|
|
|
for _, hit := range res.Hits {
|
|
// Clear out or truncate precision of hit fields that might be
|
|
// different across different indexer implementations.
|
|
hit.Index = ""
|
|
hit.Score = math.Trunc(hit.Score*1000.0) / 1000.0
|
|
hit.IndexInternalID = nil
|
|
hit.HitNumber = 0
|
|
|
|
rv[hit.ID] = hit
|
|
}
|
|
|
|
return rv
|
|
}
|
|
|
|
// -------------------------------------------------------
|
|
|
|
func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
|
|
cb func(versusTest *VersusTest, searchTemplates []string, idxA, idxB bleve.Index),
|
|
searchTemplates []string) {
|
|
if cb == nil {
|
|
cb = testVersusSearches
|
|
}
|
|
|
|
if searchTemplates == nil {
|
|
searchTemplates = testVersusSearchTemplates
|
|
}
|
|
|
|
if vt.Verbose <= 0 {
|
|
vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE"))
|
|
}
|
|
|
|
dirA := "/tmp/bleve-versus-test-a"
|
|
dirB := "/tmp/bleve-versus-test-b"
|
|
|
|
defer func() {
|
|
_ = os.RemoveAll(dirA)
|
|
_ = os.RemoveAll(dirB)
|
|
}()
|
|
|
|
_ = os.RemoveAll(dirA)
|
|
_ = os.RemoveAll(dirB)
|
|
|
|
imA := vt.makeIndexMapping()
|
|
imB := vt.makeIndexMapping()
|
|
|
|
kvConfigA := map[string]interface{}{}
|
|
kvConfigB := map[string]interface{}{}
|
|
|
|
idxA, err := bleve.NewUsing(dirA, imA, indexTypeA, kvStoreA, kvConfigA)
|
|
if err != nil || idxA == nil {
|
|
vt.t.Fatalf("new using err: %v", err)
|
|
}
|
|
defer func() { _ = idxA.Close() }()
|
|
|
|
idxB, err := bleve.NewUsing(dirB, imB, indexTypeB, kvStoreB, kvConfigB)
|
|
if err != nil || idxB == nil {
|
|
vt.t.Fatalf("new using err: %v", err)
|
|
}
|
|
defer func() { _ = idxB.Close() }()
|
|
|
|
rand.Seed(0)
|
|
|
|
if vt.Bodies == nil {
|
|
vt.Bodies = vt.genBodies()
|
|
}
|
|
|
|
vt.insertBodies(idxA)
|
|
vt.insertBodies(idxB)
|
|
|
|
cb(vt, searchTemplates, idxA, idxB)
|
|
}
|
|
|
|
// -------------------------------------------------------
|
|
|
|
func (vt *VersusTest) makeIndexMapping() mapping.IndexMapping {
|
|
standardFM := bleve.NewTextFieldMapping()
|
|
standardFM.Store = false
|
|
standardFM.IncludeInAll = false
|
|
standardFM.IncludeTermVectors = true
|
|
standardFM.Analyzer = "standard"
|
|
|
|
dm := bleve.NewDocumentMapping()
|
|
dm.AddFieldMappingsAt("title", standardFM)
|
|
dm.AddFieldMappingsAt("body", standardFM)
|
|
|
|
im := bleve.NewIndexMapping()
|
|
im.DefaultMapping = dm
|
|
im.DefaultAnalyzer = "standard"
|
|
|
|
return im
|
|
}
|
|
|
|
func (vt *VersusTest) insertBodies(idx bleve.Index) {
|
|
batch := idx.NewBatch()
|
|
for i, bodyWords := range vt.Bodies {
|
|
title := fmt.Sprintf("%d", i)
|
|
body := strings.Join(bodyWords, " ")
|
|
err := batch.Index(title, map[string]interface{}{"title": title, "body": body})
|
|
if err != nil {
|
|
vt.t.Fatalf("batch.Index err: %v", err)
|
|
}
|
|
if i%vt.BatchSize == 0 {
|
|
err = idx.Batch(batch)
|
|
if err != nil {
|
|
vt.t.Fatalf("batch err: %v", err)
|
|
}
|
|
batch.Reset()
|
|
}
|
|
}
|
|
err := idx.Batch(batch)
|
|
if err != nil {
|
|
vt.t.Fatalf("last batch err: %v", err)
|
|
}
|
|
}
|
|
|
|
func (vt *VersusTest) genBodies() (rv [][]string) {
|
|
for i := 0; i < vt.NumDocs; i++ {
|
|
rv = append(rv, vt.genBody())
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (vt *VersusTest) genBody() (rv []string) {
|
|
m := rand.Intn(vt.MaxWordsPerDoc)
|
|
for j := 0; j < m; j++ {
|
|
rv = append(rv, vt.genWord(rand.Intn(vt.NumWords)))
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (vt *VersusTest) genWord(i int) string {
|
|
return fmt.Sprintf("%x", i)
|
|
}
|