add initial low-level support for multi-phrase
this adds basic multi-phrase support, a shim to keep the top-level working and unit tests for new multi-phrase cases
This commit is contained in:
parent
4e38c49287
commit
9c8e1e82de
|
@ -169,7 +169,12 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
|
|||
// satisfied, and these locations are returned. otherwise 0 and either
|
||||
// a nil or empty TermLocationMap
|
||||
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (int, search.TermLocationMap) {
|
||||
paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
|
||||
// temporarily turn flat terms []sting into [][]string
|
||||
terms := make([][]string, len(s.terms))
|
||||
for i, term := range s.terms {
|
||||
terms[i] = []string{term}
|
||||
}
|
||||
paths := findPhrasePaths(0, nil, terms, tlm, nil, 0)
|
||||
rv := make(search.TermLocationMap, len(s.terms))
|
||||
for _, p := range paths {
|
||||
p.MergeInto(rv)
|
||||
|
@ -203,7 +208,7 @@ func (p phrasePath) MergeInto(in search.TermLocationMap) {
|
|||
// this is the primary state being built during the traversal
|
||||
//
|
||||
// returns slice of paths, or nil if invocation did not find any successul paths
|
||||
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms []string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
|
||||
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string, tlm search.TermLocationMap, p phrasePath, remainingSlop int) []phrasePath {
|
||||
|
||||
// no more terms
|
||||
if len(phraseTerms) < 1 {
|
||||
|
@ -214,7 +219,7 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms []str
|
|||
cdr := phraseTerms[1:]
|
||||
|
||||
// empty term is treated as match (continue)
|
||||
if car == "" {
|
||||
if len(car) == 0 || len(car) == 1 && car[0] == "" {
|
||||
nextPos := prevPos + 1
|
||||
if prevPos == 0 {
|
||||
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
|
||||
|
@ -223,26 +228,28 @@ func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms []str
|
|||
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop)
|
||||
}
|
||||
|
||||
// locations for this term
|
||||
locations := tlm[car]
|
||||
var rv []phrasePath
|
||||
for _, loc := range locations {
|
||||
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
|
||||
// if the array positions are wrong, can't match, try next location
|
||||
continue
|
||||
}
|
||||
// locations for this term
|
||||
for _, carTerm := range car {
|
||||
locations := tlm[carTerm]
|
||||
for _, loc := range locations {
|
||||
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
|
||||
// if the array positions are wrong, can't match, try next location
|
||||
continue
|
||||
}
|
||||
|
||||
// compute distance from previous phrase term
|
||||
dist := 0
|
||||
if prevPos != 0 {
|
||||
dist = editDistance(prevPos+1, loc.Pos)
|
||||
}
|
||||
// compute distance from previous phrase term
|
||||
dist := 0
|
||||
if prevPos != 0 {
|
||||
dist = editDistance(prevPos+1, loc.Pos)
|
||||
}
|
||||
|
||||
// if enough slop reamining, continue recursively
|
||||
if prevPos == 0 || (remainingSlop-dist) >= 0 {
|
||||
// this location works, add it to the path (but not for empty term)
|
||||
px := append(p, &phrasePart{term: car, loc: loc})
|
||||
rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
|
||||
// if enough slop reamining, continue recursively
|
||||
if prevPos == 0 || (remainingSlop-dist) >= 0 {
|
||||
// this location works, add it to the path (but not for empty term)
|
||||
px := append(p, &phrasePart{term: carTerm, loc: loc})
|
||||
rv = append(rv, findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist)...)
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
|
|
|
@ -111,13 +111,13 @@ func TestPhraseSearch(t *testing.T) {
|
|||
|
||||
func TestFindPhrasePaths(t *testing.T) {
|
||||
tests := []struct {
|
||||
phrase []string
|
||||
phrase [][]string
|
||||
tlm search.TermLocationMap
|
||||
paths []phrasePath
|
||||
}{
|
||||
// simplest matching case
|
||||
{
|
||||
phrase: []string{"cat", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -139,7 +139,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// second term missing, no match
|
||||
{
|
||||
phrase: []string{"cat", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -151,7 +151,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// second term exists but in wrong position
|
||||
{
|
||||
phrase: []string{"cat", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -168,7 +168,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// matches multiple times
|
||||
{
|
||||
phrase: []string{"cat", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -200,7 +200,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// match over gaps
|
||||
{
|
||||
phrase: []string{"cat", "", "dog"},
|
||||
phrase: [][]string{[]string{"cat"}, []string{""}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -222,7 +222,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// match with leading ""
|
||||
{
|
||||
phrase: []string{"", "cat", "dog"},
|
||||
phrase: [][]string{[]string{""}, []string{"cat"}, []string{"dog"}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -244,7 +244,7 @@ func TestFindPhrasePaths(t *testing.T) {
|
|||
},
|
||||
// match with trailing ""
|
||||
{
|
||||
phrase: []string{"cat", "dog", ""},
|
||||
phrase: [][]string{[]string{"cat"}, []string{"dog"}, []string{""}},
|
||||
tlm: search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
|
@ -304,18 +304,18 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
}
|
||||
|
||||
tests := []struct {
|
||||
phrase []string
|
||||
phrase [][]string
|
||||
paths []phrasePath
|
||||
slop int
|
||||
}{
|
||||
// no match
|
||||
{
|
||||
phrase: []string{"one", "five"},
|
||||
phrase: [][]string{[]string{"one"}, []string{"five"}},
|
||||
slop: 2,
|
||||
},
|
||||
// should match
|
||||
{
|
||||
phrase: []string{"one", "five"},
|
||||
phrase: [][]string{[]string{"one"}, []string{"five"}},
|
||||
slop: 3,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -326,7 +326,7 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// slop 0 finds exact match
|
||||
{
|
||||
phrase: []string{"four", "five"},
|
||||
phrase: [][]string{[]string{"four"}, []string{"five"}},
|
||||
slop: 0,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -337,12 +337,12 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// slop 0 does not find exact match (reversed)
|
||||
{
|
||||
phrase: []string{"two", "one"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"one"}},
|
||||
slop: 0,
|
||||
},
|
||||
// slop 1 finds exact match
|
||||
{
|
||||
phrase: []string{"one", "two"},
|
||||
phrase: [][]string{[]string{"one"}, []string{"two"}},
|
||||
slop: 1,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -353,12 +353,12 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// slop 1 *still* does not find exact match (reversed) requires at least 2
|
||||
{
|
||||
phrase: []string{"two", "one"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"one"}},
|
||||
slop: 1,
|
||||
},
|
||||
// slop 2 does finds exact match reversed
|
||||
{
|
||||
phrase: []string{"two", "one"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"one"}},
|
||||
slop: 2,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -369,12 +369,12 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// slop 2 not enough for this
|
||||
{
|
||||
phrase: []string{"three", "one"},
|
||||
phrase: [][]string{[]string{"three"}, []string{"one"}},
|
||||
slop: 2,
|
||||
},
|
||||
// slop should be cumulative
|
||||
{
|
||||
phrase: []string{"one", "three", "five"},
|
||||
phrase: [][]string{[]string{"one"}, []string{"three"}, []string{"five"}},
|
||||
slop: 2,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -386,12 +386,12 @@ func TestFindPhrasePathsSloppy(t *testing.T) {
|
|||
},
|
||||
// should require 6
|
||||
{
|
||||
phrase: []string{"five", "three", "one"},
|
||||
phrase: [][]string{[]string{"five"}, []string{"three"}, []string{"one"}},
|
||||
slop: 5,
|
||||
},
|
||||
// so lets try 6
|
||||
{
|
||||
phrase: []string{"five", "three", "one"},
|
||||
phrase: [][]string{[]string{"five"}, []string{"three"}, []string{"one"}},
|
||||
slop: 6,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -437,13 +437,13 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
|
|||
}
|
||||
|
||||
tests := []struct {
|
||||
phrase []string
|
||||
phrase [][]string
|
||||
paths []phrasePath
|
||||
slop int
|
||||
}{
|
||||
// search non palyndrone, exact match
|
||||
{
|
||||
phrase: []string{"two", "three"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"three"}},
|
||||
slop: 0,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -454,7 +454,7 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
|
|||
},
|
||||
// same with slop 2 (not required) (find it twice)
|
||||
{
|
||||
phrase: []string{"two", "three"},
|
||||
phrase: [][]string{[]string{"two"}, []string{"three"}},
|
||||
slop: 2,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -469,7 +469,7 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
|
|||
},
|
||||
// palyndrone reversed
|
||||
{
|
||||
phrase: []string{"three", "two"},
|
||||
phrase: [][]string{[]string{"three"}, []string{"two"}},
|
||||
slop: 2,
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
|
@ -491,3 +491,99 @@ func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindMultiPhrasePaths(t *testing.T) {
|
||||
|
||||
tlm := search.TermLocationMap{
|
||||
"cat": search.Locations{
|
||||
&search.Location{
|
||||
Pos: 1,
|
||||
},
|
||||
},
|
||||
"dog": search.Locations{
|
||||
&search.Location{
|
||||
Pos: 2,
|
||||
},
|
||||
},
|
||||
"frog": search.Locations{
|
||||
&search.Location{
|
||||
Pos: 3,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
phrase [][]string
|
||||
paths []phrasePath
|
||||
}{
|
||||
// simplest, one of two possible terms matches
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{"dog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"dog", &search.Location{Pos: 2}},
|
||||
},
|
||||
},
|
||||
},
|
||||
// two possible terms, neither work
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{"chicken"}},
|
||||
},
|
||||
// two possible terms, one works, but out of position with next
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{"frog"}},
|
||||
},
|
||||
// matches multiple times, with different pairing
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "dog"}, []string{"dog", "frog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"dog", &search.Location{Pos: 2}},
|
||||
},
|
||||
phrasePath{
|
||||
&phrasePart{"dog", &search.Location{Pos: 2}},
|
||||
&phrasePart{"frog", &search.Location{Pos: 3}},
|
||||
},
|
||||
},
|
||||
},
|
||||
// multi-match over a gap
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{""}, []string{"frog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"frog", &search.Location{Pos: 3}},
|
||||
},
|
||||
},
|
||||
},
|
||||
// multi-match over a gap (same as before, but with empty term list)
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, []string{}, []string{"frog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"frog", &search.Location{Pos: 3}},
|
||||
},
|
||||
},
|
||||
},
|
||||
// multi-match over a gap (same once again, but nil term list)
|
||||
{
|
||||
phrase: [][]string{[]string{"cat", "rat"}, nil, []string{"frog"}},
|
||||
paths: []phrasePath{
|
||||
phrasePath{
|
||||
&phrasePart{"cat", &search.Location{Pos: 1}},
|
||||
&phrasePart{"frog", &search.Location{Pos: 3}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, 0)
|
||||
if !reflect.DeepEqual(actualPaths, test.paths) {
|
||||
t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue