Merge pull request #532 from mschoch/phrase2
move phrase search logic into phrase searcher
This commit is contained in:
commit
8f6c032d51
|
@ -81,7 +81,7 @@ func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
|
|||
tokens := analyzer.Analyze([]byte(q.MatchPhrase))
|
||||
if len(tokens) > 0 {
|
||||
phrase := tokenStreamToPhrase(tokens)
|
||||
phraseQuery := NewPhraseQuery(phrase, field)
|
||||
phraseQuery := NewMultiPhraseQuery(phrase, field)
|
||||
phraseQuery.SetBoost(q.BoostVal.Value())
|
||||
return phraseQuery.Searcher(i, m, options)
|
||||
}
|
||||
|
@ -89,7 +89,7 @@ func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
|
|||
return noneQuery.Searcher(i, m, options)
|
||||
}
|
||||
|
||||
func tokenStreamToPhrase(tokens analysis.TokenStream) []string {
|
||||
func tokenStreamToPhrase(tokens analysis.TokenStream) [][]string {
|
||||
firstPosition := int(^uint(0) >> 1)
|
||||
lastPosition := 0
|
||||
for _, token := range tokens {
|
||||
|
@ -102,13 +102,10 @@ func tokenStreamToPhrase(tokens analysis.TokenStream) []string {
|
|||
}
|
||||
phraseLen := lastPosition - firstPosition + 1
|
||||
if phraseLen > 0 {
|
||||
rv := make([]string, phraseLen)
|
||||
for i := 0; i < phraseLen; i++ {
|
||||
rv[i] = ""
|
||||
}
|
||||
rv := make([][]string, phraseLen)
|
||||
for _, token := range tokens {
|
||||
pos := token.Position - firstPosition
|
||||
rv[pos] = string(token.Term)
|
||||
rv[pos] = append(rv[pos], string(token.Term))
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package query
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
func TestTokenStreamToPhrase(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
tokens analysis.TokenStream
|
||||
result [][]string
|
||||
}{
|
||||
// empty token stream returns nil
|
||||
{
|
||||
tokens: analysis.TokenStream{},
|
||||
result: nil,
|
||||
},
|
||||
// typical token
|
||||
{
|
||||
tokens: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("one"),
|
||||
Position: 1,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("two"),
|
||||
Position: 2,
|
||||
},
|
||||
},
|
||||
result: [][]string{[]string{"one"}, []string{"two"}},
|
||||
},
|
||||
// token stream containing a gap (usually from stop words)
|
||||
{
|
||||
tokens: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("wag"),
|
||||
Position: 1,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("dog"),
|
||||
Position: 3,
|
||||
},
|
||||
},
|
||||
result: [][]string{[]string{"wag"}, nil, []string{"dog"}},
|
||||
},
|
||||
// token stream containing multiple tokens at the same position
|
||||
{
|
||||
tokens: analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: []byte("nia"),
|
||||
Position: 1,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("onia"),
|
||||
Position: 1,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("donia"),
|
||||
Position: 1,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("imo"),
|
||||
Position: 2,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("nimo"),
|
||||
Position: 2,
|
||||
},
|
||||
&analysis.Token{
|
||||
Term: []byte("ónimo"),
|
||||
Position: 2,
|
||||
},
|
||||
},
|
||||
result: [][]string{[]string{"nia", "onia", "donia"}, []string{"imo", "nimo", "ónimo"}},
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
actual := tokenStreamToPhrase(test.tokens)
|
||||
if !reflect.DeepEqual(actual, test.result) {
|
||||
t.Fatalf("expected %#v got %#v for test %d", test.result, actual, i)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package query
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/mapping"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/searcher"
|
||||
)
|
||||
|
||||
type MultiPhraseQuery struct {
|
||||
Terms [][]string `json:"terms"`
|
||||
Field string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
// NewMultiPhraseQuery creates a new Query for finding
|
||||
// term phrases in the index.
|
||||
// It is like PhraseQuery, but each position in the
|
||||
// phrase may be satisfied by a list of terms
|
||||
// as opposed to just one.
|
||||
// At least one of the terms must exist in the correct
|
||||
// order, at the correct index offsets, in the
|
||||
// specified field. Queried field must have been indexed with
|
||||
// IncludeTermVectors set to true.
|
||||
func NewMultiPhraseQuery(terms [][]string, field string) *MultiPhraseQuery {
|
||||
return &MultiPhraseQuery{
|
||||
Terms: terms,
|
||||
Field: field,
|
||||
}
|
||||
}
|
||||
|
||||
func (q *MultiPhraseQuery) SetBoost(b float64) {
|
||||
boost := Boost(b)
|
||||
q.BoostVal = &boost
|
||||
}
|
||||
|
||||
func (q *MultiPhraseQuery) Boost() float64 {
|
||||
return q.BoostVal.Value()
|
||||
}
|
||||
|
||||
func (q *MultiPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
return searcher.NewMultiPhraseSearcher(i, q.Terms, q.Field, options)
|
||||
}
|
||||
|
||||
func (q *MultiPhraseQuery) Validate() error {
|
||||
if len(q.Terms) < 1 {
|
||||
return fmt.Errorf("phrase query must contain at least one term")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *MultiPhraseQuery) UnmarshalJSON(data []byte) error {
|
||||
type _mphraseQuery MultiPhraseQuery
|
||||
tmp := _mphraseQuery{}
|
||||
err := json.Unmarshal(data, &tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Terms = tmp.Terms
|
||||
q.Field = tmp.Field
|
||||
q.BoostVal = tmp.BoostVal
|
||||
return nil
|
||||
}
|
|
@ -25,10 +25,9 @@ import (
|
|||
)
|
||||
|
||||
type PhraseQuery struct {
|
||||
Terms []string `json:"terms"`
|
||||
Field string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
termQueries []Query
|
||||
Terms []string `json:"terms"`
|
||||
Field string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
// NewPhraseQuery creates a new Query for finding
|
||||
|
@ -38,18 +37,9 @@ type PhraseQuery struct {
|
|||
// specified field. Queried field must have been indexed with
|
||||
// IncludeTermVectors set to true.
|
||||
func NewPhraseQuery(terms []string, field string) *PhraseQuery {
|
||||
termQueries := make([]Query, 0)
|
||||
for _, term := range terms {
|
||||
if term != "" {
|
||||
tq := NewTermQuery(term)
|
||||
tq.SetField(field)
|
||||
termQueries = append(termQueries, tq)
|
||||
}
|
||||
}
|
||||
return &PhraseQuery{
|
||||
Terms: terms,
|
||||
Field: field,
|
||||
termQueries: termQueries,
|
||||
Terms: terms,
|
||||
Field: field,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -63,18 +53,11 @@ func (q *PhraseQuery) Boost() float64 {
|
|||
}
|
||||
|
||||
func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
options.IncludeTermVectors = true
|
||||
|
||||
conjunctionQuery := NewConjunctionQuery(q.termQueries)
|
||||
conjunctionSearcher, err := conjunctionQuery.Searcher(i, m, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return searcher.NewPhraseSearcher(i, conjunctionSearcher.(*searcher.ConjunctionSearcher), q.Terms)
|
||||
return searcher.NewPhraseSearcher(i, q.Terms, q.Field, options)
|
||||
}
|
||||
|
||||
func (q *PhraseQuery) Validate() error {
|
||||
if len(q.termQueries) < 1 {
|
||||
if len(q.Terms) < 1 {
|
||||
return fmt.Errorf("phrase query must contain at least one term")
|
||||
}
|
||||
return nil
|
||||
|
@ -90,9 +73,5 @@ func (q *PhraseQuery) UnmarshalJSON(data []byte) error {
|
|||
q.Terms = tmp.Terms
|
||||
q.Field = tmp.Field
|
||||
q.BoostVal = tmp.BoostVal
|
||||
q.termQueries = make([]Query, len(q.Terms))
|
||||
for i, term := range q.Terms {
|
||||
q.termQueries[i] = &TermQuery{Term: term, FieldVal: q.Field, BoostVal: q.BoostVal}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -123,7 +123,13 @@ func ParseQuery(input []byte) (Query, error) {
|
|||
var rv PhraseQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// now try multi-phrase
|
||||
var rv2 MultiPhraseQuery
|
||||
err = json.Unmarshal(input, &rv2)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &rv2, nil
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
@ -301,14 +307,6 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
|
|||
return nil, err
|
||||
}
|
||||
return &q, nil
|
||||
case *PhraseQuery:
|
||||
q := *query.(*PhraseQuery)
|
||||
children, err := expandSlice(q.termQueries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
q.termQueries = children
|
||||
return &q, nil
|
||||
default:
|
||||
return query, nil
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
package searcher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
|
@ -27,11 +28,72 @@ type PhraseSearcher struct {
|
|||
queryNorm float64
|
||||
currMust *search.DocumentMatch
|
||||
slop int
|
||||
terms []string
|
||||
terms [][]string
|
||||
initialized bool
|
||||
}
|
||||
|
||||
func NewPhraseSearcher(indexReader index.IndexReader, mustSearcher *ConjunctionSearcher, terms []string) (*PhraseSearcher, error) {
|
||||
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
|
||||
// turn flat terms []string into [][]string
|
||||
mterms := make([][]string, len(terms))
|
||||
for i, term := range terms {
|
||||
mterms[i] = []string{term}
|
||||
}
|
||||
return NewMultiPhraseSearcher(indexReader, mterms, field, options)
|
||||
}
|
||||
|
||||
func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
|
||||
options.IncludeTermVectors = true
|
||||
var termPositionSearchers []search.Searcher
|
||||
for _, termPos := range terms {
|
||||
if len(termPos) == 1 && termPos[0] != "" {
|
||||
// single term
|
||||
ts, err := NewTermSearcher(indexReader, termPos[0], field, 1.0, options)
|
||||
if err != nil {
|
||||
// close any searchers already opened
|
||||
for _, ts := range termPositionSearchers {
|
||||
_ = ts.Close()
|
||||
}
|
||||
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
|
||||
}
|
||||
termPositionSearchers = append(termPositionSearchers, ts)
|
||||
} else if len(termPos) > 1 {
|
||||
// multiple terms
|
||||
var termSearchers []search.Searcher
|
||||
for _, term := range termPos {
|
||||
if term == "" {
|
||||
continue
|
||||
}
|
||||
ts, err := NewTermSearcher(indexReader, term, field, 1.0, options)
|
||||
if err != nil {
|
||||
// close any searchers already opened
|
||||
for _, ts := range termPositionSearchers {
|
||||
_ = ts.Close()
|
||||
}
|
||||
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
|
||||
}
|
||||
termSearchers = append(termSearchers, ts)
|
||||
}
|
||||
disjunction, err := NewDisjunctionSearcher(indexReader, termSearchers, 1, options)
|
||||
if err != nil {
|
||||
// close any searchers already opened
|
||||
for _, ts := range termPositionSearchers {
|
||||
_ = ts.Close()
|
||||
}
|
||||
return nil, fmt.Errorf("phrase searcher error building term position disjunction searcher: %v", err)
|
||||
}
|
||||
termPositionSearchers = append(termPositionSearchers, disjunction)
|
||||
}
|
||||
}
|
||||
|
||||
mustSearcher, err := NewConjunctionSearcher(indexReader, termPositionSearchers, options)
|
||||
if err != nil {
|
||||
// close any searchers already opened
|
||||
for _, ts := range termPositionSearchers {
|
||||
_ = ts.Close()
|
||||
}
|
||||
return nil, fmt.Errorf("phrase searcher error building conjunction searcher: %v", err)
|
||||
}
|
||||
|
||||
// build our searcher
|
||||
rv := PhraseSearcher{
|
||||
indexReader: indexReader,
|
||||
|
@ -185,7 +247,7 @@ func (p phrasePath) MergeInto(in search.TermLocationMap) {
|
|||
// this is the primary state being built during the traversal
|
||||
//
|
||||
// returns slice of paths, or nil if invocation did not find any successul paths
|
||||
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms []string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
|
||||
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
|
||||
|
||||
// no more terms
|
||||
if len(phraseTerms) < 1 {
|
||||
|
@ -196,7 +258,7 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms []str
|
|||
cdr := phraseTerms[1:]
|
||||
|
||||
// empty term is treated as match (continue)
|
||||
if car == "" {
|
||||
if len(car) == 0 || (len(car) == 1 && car[0] == "") {
|
||||
nextPos := prevPos + 1
|
||||
if prevPos == 0 {
|
||||
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
|
||||
|
@ -205,26 +267,28 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms []str
|
|||
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop)
|
||||
}
|
||||
|
||||
// locations for this term
|
||||
locations := tlm[car]
|
||||
var rv []phrasePath
|
||||
for _, loc := range locations {
|
||||
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
|
||||
// if the array positions are wrong, can't match, try next location
|
||||
continue
|
||||
}
|
||||
// locations for this term
|
||||
for _, carTerm := range car {
|
||||
locations := tlm[carTerm]
|
||||
for _, loc := range locations {
|
||||
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
|
||||
// if the array positions are wrong, can't match, try next location
|
||||
continue
|
||||
}
|
||||
|
||||
// compute distance from previous phrase term
|
||||
dist := 0
|
||||
if prevPos != 0 {
|
||||
dist = editDistance(prevPos+1, loc.Pos)
|
||||
}
|
||||
// compute distance from previous phrase term
|
||||
dist := 0
|
||||
if prevPos != 0 {
|
||||
dist = editDistance(prevPos+1, loc.Pos)
|
||||
}
|
||||
|
||||
// if enough slop reamining, continue recursively
|
||||
if prevPos == 0 || (remainingSlop-dist) >= 0 {
|
||||
// this location works, add it to the path (but not for empty term)
|
||||
px := append(p, &phrasePart{term: car, loc: loc})
|
||||
rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
|
||||
// if enough slop reamining, continue recursively
|
||||
if prevPos == 0 || (remainingSlop-dist) >= 0 {
|
||||
// this location works, add it to the path (but not for empty term)
|
||||
px := append(p, &phrasePart{term: carTerm, loc: loc})
|
||||
rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
|
|
|
@ -36,20 +36,7 @@ func TestPhraseSearch(t *testing.T) {
|
|||
}()
|
||||
|
||||
soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true}
|
||||
|
||||
angstTermSearcher, err := NewTermSearcher(twoDocIndexReader, "angst", "desc", 1.0, soptions)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
beerTermSearcher, err := NewTermSearcher(twoDocIndexReader, "beer", "desc", 1.0, soptions)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
mustSearcher, err := NewConjunctionSearcher(twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher}, soptions)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, mustSearcher, []string{"angst", "beer"})
|
||||
phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, []string{"angst", "beer"}, "desc", soptions)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -122,15 +109,68 @@ func TestPhraseSearch(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestMultiPhraseSearch(t *testing.T) {
|
||||
|
||||
soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true}
|
||||
|
||||
tests := []struct {
|
||||
phrase [][]string
|
||||
docids [][]byte
|
||||
}{
|
||||
{
|
||||
phrase: [][]string{[]string{"angst", "what"}, []string{"beer"}},
|
||||
docids: [][]byte{[]byte("2")},
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
|
||||
reader, err := twoDocIndex.Reader()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
searcher, err := NewMultiPhraseSearcher(reader, test.phrase, "desc", soptions)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
ctx := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
|
||||
}
|
||||
next, err := searcher.Next(ctx)
|
||||
var actualIds [][]byte
|
||||
for err == nil && next != nil {
|
||||
actualIds = append(actualIds, next.IndexInternalID)
|
||||
ctx.DocumentMatchPool.Put(next)
|
||||
next, err = searcher.Next(ctx)
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("error iterating searcher: %v for test %d", err, i)
|
||||
}
|
||||
if !reflect.DeepEqual(test.docids, actualIds) {
|
||||
t.Fatalf("expected ids: %v, got %v", test.docids, actualIds)
|
||||
}
|
||||
|
||||
err = searcher.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
err = reader.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindPhrasePaths(t *testing.T) {
|
||||
tests := []struct {
|
||||
phrase []string
|
||||
phrase [][]string
|
||||
tlm search.TermLocationMap
|
||||
paths []phrasePath
|
||||
}{
|
||||
// simplest matching case
|
||||
{
|
||||
phrase: []string{"cat", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -152,7 +192,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// second term missing, no match
|
||||
{
|
||||
phrase: []string{"cat", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -164,7 +204,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// second term exists but in wrong position
|
||||
{
|
||||
phrase: []string{"cat", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -181,7 +221,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// matches multiple times
|
||||
{
|
||||
phrase: []string{"cat", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -213,7 +253,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// match over gaps
|
||||
{
|
||||
phrase: []string{"cat", "", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{""}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -235,7 +275,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// match with leading ""
|
||||
{
|
||||
phrase: []string{"", "cat", "dog"},
|
||||
phrase: [][]string{[]string{""}, []string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -257,7 +297,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// match with trailing ""
|
||||
{
|
||||
phrase: []string{"cat", "dog", ""},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}, []string{""}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -317,18 +357,18 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
}
|
||||
|
||||
tests := []struct {
|
||||
phrase []string
|
||||
phrase [][]string
|
||||
paths []phrasePath
|
||||
slop int
|
||||
}{
|
||||
// no match
|
||||
{
|
||||
phrase: []string{"one", "five"},
|
||||
phrase: [][]string{[]string{"one"}, []string{"five"}},
|
||||
slop: 2,
|
||||
},
|
||||
// should match
|
||||
{
|
||||
phrase: []string{"one", "five"},
|
||||
phrase: [][]string{[]string{"one"}, []string{"five"}},
|
||||
slop: 3,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -339,7 +379,7 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// slop 0 finds exact match
|
||||
{
|
||||
phrase: []string{"four", "five"},
|
||||
phrase: [][]string{[]string{"four"}, []string{"five"}},
|
||||
slop: 0,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -350,12 +390,12 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// slop 0 does not find exact match (reversed)
|
||||
{
|
||||
phrase: []string{"two", "one"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"one"}},
|
||||
slop: 0,
|
||||
},
|
||||
// slop 1 finds exact match
|
||||
{
|
||||
phrase: []string{"one", "two"},
|
||||
phrase: [][]string{[]string{"one"}, []string{"two"}},
|
||||
slop: 1,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -366,12 +406,12 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// slop 1 *still* does not find exact match (reversed) requires at least 2
|
||||
{
|
||||
phrase: []string{"two", "one"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"one"}},
|
||||
slop: 1,
|
||||
},
|
||||
// slop 2 does finds exact match reversed
|
||||
{
|
||||
phrase: []string{"two", "one"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"one"}},
|
||||
slop: 2,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -382,12 +422,12 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// slop 2 not enough for this
|
||||
{
|
||||
phrase: []string{"three", "one"},
|
||||
phrase: [][]string{[]string{"three"}, []string{"one"}},
|
||||
slop: 2,
|
||||
},
|
||||
// slop should be cumulative
|
||||
{
|
||||
phrase: []string{"one", "three", "five"},
|
||||
phrase: [][]string{[]string{"one"}, []string{"three"}, []string{"five"}},
|
||||
slop: 2,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -399,12 +439,12 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// should require 6
|
||||
{
|
||||
phrase: []string{"five", "three", "one"},
|
||||
phrase: [][]string{[]string{"five"}, []string{"three"}, []string{"one"}},
|
||||
slop: 5,
|
||||
},
|
||||
// so lets try 6
|
||||
{
|
||||
phrase: []string{"five", "three", "one"},
|
||||
phrase: [][]string{[]string{"five"}, []string{"three"}, []string{"one"}},
|
||||
slop: 6,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -450,13 +490,13 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
|
|||
}
|
||||
|
||||
tests := []struct {
|
||||
phrase []string
|
||||
phrase [][]string
|
||||
paths []phrasePath
|
||||
slop int
|
||||
}{
|
||||
// search non palyndrone, exact match
|
||||
{
|
||||
phrase: []string{"two", "three"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"three"}},
|
||||
slop: 0,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -467,7 +507,7 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
|
|||
},
|
||||
// same with slop 2 (not required) (find it twice)
|
||||
{
|
||||
phrase: []string{"two", "three"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"three"}},
|
||||
slop: 2,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -482,7 +522,7 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
|
|||
},
|
||||
// palyndrone reversed
|
||||
{
|
||||
phrase: []string{"three", "two"},
|
||||
phrase: [][]string{[]string{"three"}, []string{"two"}},
|
||||
slop: 2,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -504,3 +544,99 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindMultiPhrasePaths(t *testing.T) {
|
||||
|
||||
tlm := search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
Pos: 1,
|
||||
},
|
||||
},
|
||||
"dog": search.Locations{
|
||||
&search.Location{
|
||||
Pos: 2,
|
||||
},
|
||||
},
|
||||
"frog": search.Locations{
|
||||
&search.Location{
|
||||
Pos: 3,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
phrase [][]string
|
||||
paths []phrasePath
|
||||
}{
|
||||
// simplest, one of two possible terms matches
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{"dog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"dog", &search.Location{Pos: 2}},
|
||||
},
|
||||
},
|
||||
},
|
||||
// two possible terms, neither work
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{"chicken"}},
|
||||
},
|
||||
// two possible terms, one works, but out of position with next
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{"frog"}},
|
||||
},
|
||||
// matches multiple times, with different pairing
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "dog"}, []string{"dog", "frog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"dog", &search.Location{Pos: 2}},
|
||||
},
|
||||
phrasePath{
|
||||
&phrasePart{"dog", &search.Location{Pos: 2}},
|
||||
&phrasePart{"frog", &search.Location{Pos: 3}},
|
||||
},
|
||||
},
|
||||
},
|
||||
// multi-match over a gap
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{""}, []string{"frog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"frog", &search.Location{Pos: 3}},
|
||||
},
|
||||
},
|
||||
},
|
||||
// multi-match over a gap (same as before, but with empty term list)
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{}, []string{"frog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"frog", &search.Location{Pos: 3}},
|
||||
},
|
||||
},
|
||||
},
|
||||
// multi-match over a gap (same once again, but nil term list)
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, nil, []string{"frog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"frog", &search.Location{Pos: 3}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, 0)
|
||||
if !reflect.DeepEqual(actualPaths, test.paths) {
|
||||
t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -372,5 +372,24 @@
|
|||
"total_hits": 0,
|
||||
"hits": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"comment": "multi-phrase terms",
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"terms": [["twenti","thirti"],["thousand"]]
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
|
|
Loading…
Reference in New Issue