0
0
Fork 0

phrase searcher now supports multi-phrase

backwards compatability maintained through previous constructor
very basic test added (not sufficient)
This commit is contained in:
Marty Schoch 2017-02-10 15:17:50 -05:00
parent 9c8e1e82de
commit 09d00829db
2 changed files with 89 additions and 13 deletions

View File

@ -28,24 +28,52 @@ type PhraseSearcher struct {
queryNorm float64
currMust *search.DocumentMatch
slop int
terms []string
terms [][]string
initialized bool
}
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []sting into [][]string
mterms := make([][]string, len(terms))
for i, term := range terms {
mterms[i] = []string{term}
}
return NewMultiPhraseSearcher(indexReader, mterms, field, options)
}
func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
options.IncludeTermVectors = true
termSearchers := make([]search.Searcher, 0)
for _, term := range terms {
if term != "" {
ts, err := NewTermSearcher(indexReader, term, field, 1.0, options)
var termPositionSearchers []search.Searcher
for _, termPos := range terms {
if len(termPos) == 1 && termPos[0] != "" {
// single term
ts, err := NewTermSearcher(indexReader, termPos[0], field, 1.0, options)
if err != nil {
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termSearchers = append(termSearchers, ts)
termPositionSearchers = append(termPositionSearchers, ts)
} else if len(termPos) > 1 {
// multiple terms
var termSearchers []search.Searcher
for _, term := range termPos {
if term == "" {
continue
}
ts, err := NewTermSearcher(indexReader, term, field, 1.0, options)
if err != nil {
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termSearchers = append(termSearchers, ts)
}
disjunction, err := NewDisjunctionSearcher(indexReader, termSearchers, 1, options)
if err != nil {
return nil, fmt.Errorf("phrase searcher error building term position disjunction searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, disjunction)
}
}
mustSearcher, err := NewConjunctionSearcher(indexReader, termSearchers, options)
mustSearcher, err := NewConjunctionSearcher(indexReader, termPositionSearchers, options)
if err != nil {
return nil, fmt.Errorf("phrase searcher error building conjunction searcher: %v", err)
}
@ -169,12 +197,7 @@ func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.D
// satisfied, and these locations are returned. otherwise 0 and either
// a nil or empty TermLocationMap
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext, tlm search.TermLocationMap) (int, search.TermLocationMap) {
// temporarily turn flat terms []sting into [][]string
terms := make([][]string, len(s.terms))
for i, term := range s.terms {
terms[i] = []string{term}
}
paths := findPhrasePaths(0, nil, terms, tlm, nil, 0)
paths := findPhrasePaths(0, nil, s.terms, tlm, nil, 0)
rv := make(search.TermLocationMap, len(s.terms))
for _, p := range paths {
p.MergeInto(rv)

View File

@ -109,6 +109,59 @@ func TestPhraseSearch(t *testing.T) {
}
}
func TestMultiPhraseSearch(t *testing.T) {
soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true}
tests := []struct {
phrase [][]string
docids [][]byte
}{
{
phrase: [][]string{[]string{"angst", "what"}, []string{"beer"}},
docids: [][]byte{[]byte("2")},
},
}
for i, test := range tests {
reader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
searcher, err := NewMultiPhraseSearcher(reader, test.phrase, "desc", soptions)
if err != nil {
t.Error(err)
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
}
next, err := searcher.Next(ctx)
var actualIds [][]byte
for err == nil && next != nil {
actualIds = append(actualIds, next.IndexInternalID)
ctx.DocumentMatchPool.Put(next)
next, err = searcher.Next(ctx)
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, i)
}
if !reflect.DeepEqual(test.docids, actualIds) {
t.Fatalf("expected ids: %v, got %v", test.docids, actualIds)
}
err = searcher.Close()
if err != nil {
t.Error(err)
}
err = reader.Close()
if err != nil {
t.Error(err)
}
}
}
func TestFindPhrasePaths(t *testing.T) {
tests := []struct {
phrase [][]string