2014-07-03 20:54:50 +02:00
// Copyright (c) 2013 Couchbase, Inc.
2016-10-02 16:13:14 +02:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2014-09-02 16:54:50 +02:00
2016-10-02 16:29:39 +02:00
package searcher
2014-07-03 20:54:50 +02:00
import (
2016-10-27 11:25:03 +02:00
"reflect"
2014-07-03 20:54:50 +02:00
"testing"
2014-09-01 17:15:38 +02:00
2016-08-01 20:26:50 +02:00
"github.com/blevesearch/bleve/index"
2014-09-01 17:15:38 +02:00
"github.com/blevesearch/bleve/search"
2014-07-03 20:54:50 +02:00
)
func TestPhraseSearch ( t * testing . T ) {
2014-10-31 14:40:23 +01:00
twoDocIndexReader , err := twoDocIndex . Reader ( )
if err != nil {
t . Error ( err )
}
2015-04-08 00:05:41 +02:00
defer func ( ) {
err := twoDocIndexReader . Close ( )
if err != nil {
t . Fatal ( err )
}
} ( )
2014-09-12 23:21:35 +02:00
2017-01-06 02:49:45 +01:00
soptions := search . SearcherOptions { Explain : true , IncludeTermVectors : true }
2017-02-10 18:02:30 +01:00
phraseSearcher , err := NewPhraseSearcher ( twoDocIndexReader , [ ] string { "angst" , "beer" } , "desc" , soptions )
2014-07-30 18:30:38 +02:00
if err != nil {
t . Fatal ( err )
}
2014-07-03 20:54:50 +02:00
tests := [ ] struct {
2016-10-27 11:25:03 +02:00
searcher search . Searcher
results [ ] * search . DocumentMatch
locations map [ string ] map [ string ] [ ] search . Location
fieldterms [ ] [ 2 ] string
2014-07-03 20:54:50 +02:00
} {
{
2014-07-30 18:30:38 +02:00
searcher : phraseSearcher ,
2016-08-01 20:58:02 +02:00
results : [ ] * search . DocumentMatch {
2016-04-03 03:54:33 +02:00
{
2016-08-01 20:58:02 +02:00
IndexInternalID : index . IndexInternalID ( "2" ) ,
Score : 1.0807601687084403 ,
2014-07-03 20:54:50 +02:00
} ,
} ,
2017-02-10 02:02:12 +01:00
locations : map [ string ] map [ string ] [ ] search . Location { "desc" : map [ string ] [ ] search . Location { "beer" : [ ] search . Location { search . Location { Pos : 2 , Start : 6 , End : 10 } } , "angst" : [ ] search . Location { search . Location { Pos : 1 , Start : 0 , End : 5 } } } } ,
2016-10-27 11:25:03 +02:00
fieldterms : [ ] [ 2 ] string { [ 2 ] string { "desc" , "beer" } , [ 2 ] string { "desc" , "angst" } } ,
2014-07-03 20:54:50 +02:00
} ,
}
for testIndex , test := range tests {
2015-04-08 00:05:41 +02:00
defer func ( ) {
err := test . searcher . Close ( )
if err != nil {
t . Fatal ( err )
}
} ( )
2014-07-03 20:54:50 +02:00
2016-08-09 04:21:47 +02:00
ctx := & search . SearchContext {
2016-08-25 01:02:22 +02:00
DocumentMatchPool : search . NewDocumentMatchPool ( test . searcher . DocumentMatchPoolSize ( ) , 0 ) ,
2016-08-09 04:21:47 +02:00
}
next , err := test . searcher . Next ( ctx )
2014-07-03 20:54:50 +02:00
i := 0
for err == nil && next != nil {
if i < len ( test . results ) {
2016-08-01 20:58:02 +02:00
if ! next . IndexInternalID . Equals ( test . results [ i ] . IndexInternalID ) {
2016-10-27 11:25:03 +02:00
t . Errorf ( "expected result %d to have id %s got %s for test %d\n" , i , test . results [ i ] . IndexInternalID , next . IndexInternalID , testIndex )
2014-07-03 20:54:50 +02:00
}
if next . Score != test . results [ i ] . Score {
2016-10-27 11:25:03 +02:00
t . Errorf ( "expected result %d to have score %v got %v for test %d\n" , i , test . results [ i ] . Score , next . Score , testIndex )
t . Logf ( "scoring explanation: %s\n" , next . Expl )
}
for _ , ft := range test . fieldterms {
locs := next . Locations [ ft [ 0 ] ] [ ft [ 1 ] ]
explocs := test . locations [ ft [ 0 ] ] [ ft [ 1 ] ]
if len ( explocs ) != len ( locs ) {
t . Fatalf ( "expected result %d to have %d Locations (%#v) but got %d (%#v) for test %d with field %q and term %q\n" , i , len ( explocs ) , explocs , len ( locs ) , locs , testIndex , ft [ 0 ] , ft [ 1 ] )
}
for ind , exploc := range explocs {
if ! reflect . DeepEqual ( * locs [ ind ] , exploc ) {
t . Errorf ( "expected result %d to have Location %v got %v for test %d\n" , i , exploc , locs [ ind ] , testIndex )
}
}
2014-07-03 20:54:50 +02:00
}
}
2016-10-27 11:25:03 +02:00
2016-08-09 04:21:47 +02:00
ctx . DocumentMatchPool . Put ( next )
next , err = test . searcher . Next ( ctx )
2014-07-03 20:54:50 +02:00
i ++
}
if err != nil {
t . Fatalf ( "error iterating searcher: %v for test %d" , err , testIndex )
}
if len ( test . results ) != i {
t . Errorf ( "expected %d results got %d for test %d" , len ( test . results ) , i , testIndex )
}
}
}
2017-02-03 22:05:21 +01:00
2017-02-10 21:17:50 +01:00
func TestMultiPhraseSearch ( t * testing . T ) {
soptions := search . SearcherOptions { Explain : true , IncludeTermVectors : true }
tests := [ ] struct {
phrase [ ] [ ] string
docids [ ] [ ] byte
} {
{
phrase : [ ] [ ] string { [ ] string { "angst" , "what" } , [ ] string { "beer" } } ,
docids : [ ] [ ] byte { [ ] byte ( "2" ) } ,
} ,
}
for i , test := range tests {
reader , err := twoDocIndex . Reader ( )
if err != nil {
t . Error ( err )
}
searcher , err := NewMultiPhraseSearcher ( reader , test . phrase , "desc" , soptions )
if err != nil {
t . Error ( err )
}
ctx := & search . SearchContext {
DocumentMatchPool : search . NewDocumentMatchPool ( searcher . DocumentMatchPoolSize ( ) , 0 ) ,
}
next , err := searcher . Next ( ctx )
var actualIds [ ] [ ] byte
for err == nil && next != nil {
actualIds = append ( actualIds , next . IndexInternalID )
ctx . DocumentMatchPool . Put ( next )
next , err = searcher . Next ( ctx )
}
if err != nil {
t . Fatalf ( "error iterating searcher: %v for test %d" , err , i )
}
if ! reflect . DeepEqual ( test . docids , actualIds ) {
t . Fatalf ( "expected ids: %v, got %v" , test . docids , actualIds )
}
err = searcher . Close ( )
if err != nil {
t . Error ( err )
}
err = reader . Close ( )
if err != nil {
t . Error ( err )
}
}
}
2017-02-03 22:05:21 +01:00
func TestFindPhrasePaths ( t * testing . T ) {
tests := [ ] struct {
2017-02-10 19:16:05 +01:00
phrase [ ] [ ] string
2017-02-03 22:05:21 +01:00
tlm search . TermLocationMap
paths [ ] phrasePath
} {
// simplest matching case
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "cat" } , [ ] string { "dog" } } ,
2017-02-03 22:05:21 +01:00
tlm : search . TermLocationMap {
"cat" : search . Locations {
& search . Location {
Pos : 1 ,
} ,
} ,
"dog" : search . Locations {
& search . Location {
Pos : 2 ,
} ,
} ,
} ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 1 } } ,
& phrasePart { "dog" , & search . Location { Pos : 2 } } ,
} ,
} ,
} ,
// second term missing, no match
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "cat" } , [ ] string { "dog" } } ,
2017-02-03 22:05:21 +01:00
tlm : search . TermLocationMap {
"cat" : search . Locations {
& search . Location {
Pos : 1 ,
} ,
} ,
} ,
paths : nil ,
} ,
// second term exists but in wrong position
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "cat" } , [ ] string { "dog" } } ,
2017-02-03 22:05:21 +01:00
tlm : search . TermLocationMap {
"cat" : search . Locations {
& search . Location {
Pos : 1 ,
} ,
} ,
"dog" : search . Locations {
& search . Location {
Pos : 3 ,
} ,
} ,
} ,
paths : nil ,
} ,
// matches multiple times
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "cat" } , [ ] string { "dog" } } ,
2017-02-03 22:05:21 +01:00
tlm : search . TermLocationMap {
"cat" : search . Locations {
& search . Location {
Pos : 1 ,
} ,
& search . Location {
Pos : 8 ,
} ,
} ,
"dog" : search . Locations {
& search . Location {
Pos : 2 ,
} ,
& search . Location {
Pos : 9 ,
} ,
} ,
} ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 1 } } ,
& phrasePart { "dog" , & search . Location { Pos : 2 } } ,
} ,
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 8 } } ,
& phrasePart { "dog" , & search . Location { Pos : 9 } } ,
} ,
} ,
} ,
// match over gaps
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "cat" } , [ ] string { "" } , [ ] string { "dog" } } ,
2017-02-03 22:05:21 +01:00
tlm : search . TermLocationMap {
"cat" : search . Locations {
& search . Location {
Pos : 1 ,
} ,
} ,
"dog" : search . Locations {
& search . Location {
Pos : 3 ,
} ,
} ,
} ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 1 } } ,
& phrasePart { "dog" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
2017-02-09 21:59:51 +01:00
// match with leading ""
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "" } , [ ] string { "cat" } , [ ] string { "dog" } } ,
2017-02-09 21:59:51 +01:00
tlm : search . TermLocationMap {
"cat" : search . Locations {
& search . Location {
Pos : 2 ,
} ,
} ,
"dog" : search . Locations {
& search . Location {
Pos : 3 ,
} ,
} ,
} ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 2 } } ,
& phrasePart { "dog" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
// match with trailing ""
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "cat" } , [ ] string { "dog" } , [ ] string { "" } } ,
2017-02-09 21:59:51 +01:00
tlm : search . TermLocationMap {
"cat" : search . Locations {
& search . Location {
Pos : 2 ,
} ,
} ,
"dog" : search . Locations {
& search . Location {
Pos : 3 ,
} ,
} ,
} ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 2 } } ,
& phrasePart { "dog" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
}
for i , test := range tests {
actualPaths := findPhrasePaths ( 0 , nil , test . phrase , test . tlm , nil , 0 )
if ! reflect . DeepEqual ( actualPaths , test . paths ) {
t . Fatalf ( "expected: %v got %v for test %d" , test . paths , actualPaths , i )
}
}
}
func TestFindPhrasePathsSloppy ( t * testing . T ) {
tlm := search . TermLocationMap {
"one" : search . Locations {
& search . Location {
Pos : 1 ,
} ,
} ,
"two" : search . Locations {
& search . Location {
Pos : 2 ,
} ,
} ,
"three" : search . Locations {
& search . Location {
Pos : 3 ,
} ,
} ,
"four" : search . Locations {
& search . Location {
Pos : 4 ,
} ,
} ,
"five" : search . Locations {
& search . Location {
Pos : 5 ,
} ,
} ,
}
tests := [ ] struct {
2017-02-10 19:16:05 +01:00
phrase [ ] [ ] string
2017-02-09 21:59:51 +01:00
paths [ ] phrasePath
slop int
} {
// no match
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "one" } , [ ] string { "five" } } ,
2017-02-09 21:59:51 +01:00
slop : 2 ,
} ,
// should match
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "one" } , [ ] string { "five" } } ,
2017-02-09 21:59:51 +01:00
slop : 3 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "one" , & search . Location { Pos : 1 } } ,
& phrasePart { "five" , & search . Location { Pos : 5 } } ,
} ,
} ,
} ,
// slop 0 finds exact match
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "four" } , [ ] string { "five" } } ,
2017-02-09 21:59:51 +01:00
slop : 0 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "four" , & search . Location { Pos : 4 } } ,
& phrasePart { "five" , & search . Location { Pos : 5 } } ,
} ,
} ,
} ,
// slop 0 does not find exact match (reversed)
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "two" } , [ ] string { "one" } } ,
2017-02-09 21:59:51 +01:00
slop : 0 ,
} ,
// slop 1 finds exact match
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "one" } , [ ] string { "two" } } ,
2017-02-09 21:59:51 +01:00
slop : 1 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "one" , & search . Location { Pos : 1 } } ,
& phrasePart { "two" , & search . Location { Pos : 2 } } ,
} ,
} ,
} ,
// slop 1 *still* does not find exact match (reversed) requires at least 2
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "two" } , [ ] string { "one" } } ,
2017-02-09 21:59:51 +01:00
slop : 1 ,
} ,
// slop 2 does finds exact match reversed
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "two" } , [ ] string { "one" } } ,
2017-02-09 21:59:51 +01:00
slop : 2 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "two" , & search . Location { Pos : 2 } } ,
& phrasePart { "one" , & search . Location { Pos : 1 } } ,
} ,
} ,
} ,
// slop 2 not enough for this
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "three" } , [ ] string { "one" } } ,
2017-02-09 21:59:51 +01:00
slop : 2 ,
} ,
// slop should be cumulative
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "one" } , [ ] string { "three" } , [ ] string { "five" } } ,
2017-02-09 21:59:51 +01:00
slop : 2 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "one" , & search . Location { Pos : 1 } } ,
& phrasePart { "three" , & search . Location { Pos : 3 } } ,
& phrasePart { "five" , & search . Location { Pos : 5 } } ,
} ,
} ,
} ,
// should require 6
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "five" } , [ ] string { "three" } , [ ] string { "one" } } ,
2017-02-09 21:59:51 +01:00
slop : 5 ,
} ,
// so lets try 6
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "five" } , [ ] string { "three" } , [ ] string { "one" } } ,
2017-02-09 21:59:51 +01:00
slop : 6 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "five" , & search . Location { Pos : 5 } } ,
& phrasePart { "three" , & search . Location { Pos : 3 } } ,
& phrasePart { "one" , & search . Location { Pos : 1 } } ,
} ,
} ,
} ,
}
for i , test := range tests {
actualPaths := findPhrasePaths ( 0 , nil , test . phrase , tlm , nil , test . slop )
if ! reflect . DeepEqual ( actualPaths , test . paths ) {
t . Fatalf ( "expected: %v got %v for test %d" , test . paths , actualPaths , i )
}
}
}
func TestFindPhrasePathsSloppyPalyndrome ( t * testing . T ) {
tlm := search . TermLocationMap {
"one" : search . Locations {
& search . Location {
Pos : 1 ,
} ,
& search . Location {
Pos : 5 ,
} ,
} ,
"two" : search . Locations {
& search . Location {
Pos : 2 ,
} ,
& search . Location {
Pos : 4 ,
} ,
} ,
"three" : search . Locations {
& search . Location {
Pos : 3 ,
} ,
} ,
}
tests := [ ] struct {
2017-02-10 19:16:05 +01:00
phrase [ ] [ ] string
2017-02-09 21:59:51 +01:00
paths [ ] phrasePath
slop int
} {
// search non palyndrone, exact match
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "two" } , [ ] string { "three" } } ,
2017-02-09 21:59:51 +01:00
slop : 0 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "two" , & search . Location { Pos : 2 } } ,
& phrasePart { "three" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
// same with slop 2 (not required) (find it twice)
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "two" } , [ ] string { "three" } } ,
2017-02-09 21:59:51 +01:00
slop : 2 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "two" , & search . Location { Pos : 2 } } ,
& phrasePart { "three" , & search . Location { Pos : 3 } } ,
} ,
phrasePath {
& phrasePart { "two" , & search . Location { Pos : 4 } } ,
& phrasePart { "three" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
// palyndrone reversed
{
2017-02-10 19:16:05 +01:00
phrase : [ ] [ ] string { [ ] string { "three" } , [ ] string { "two" } } ,
2017-02-09 21:59:51 +01:00
slop : 2 ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "three" , & search . Location { Pos : 3 } } ,
& phrasePart { "two" , & search . Location { Pos : 2 } } ,
} ,
phrasePath {
& phrasePart { "three" , & search . Location { Pos : 3 } } ,
& phrasePart { "two" , & search . Location { Pos : 4 } } ,
} ,
} ,
} ,
2017-02-03 22:05:21 +01:00
}
2017-02-09 21:59:51 +01:00
for i , test := range tests {
actualPaths := findPhrasePaths ( 0 , nil , test . phrase , tlm , nil , test . slop )
2017-02-03 22:05:21 +01:00
if ! reflect . DeepEqual ( actualPaths , test . paths ) {
2017-02-09 21:59:51 +01:00
t . Fatalf ( "expected: %v got %v for test %d" , test . paths , actualPaths , i )
2017-02-03 22:05:21 +01:00
}
}
}
2017-02-10 19:16:05 +01:00
func TestFindMultiPhrasePaths ( t * testing . T ) {
tlm := search . TermLocationMap {
"cat" : search . Locations {
& search . Location {
Pos : 1 ,
} ,
} ,
"dog" : search . Locations {
& search . Location {
Pos : 2 ,
} ,
} ,
"frog" : search . Locations {
& search . Location {
Pos : 3 ,
} ,
} ,
}
tests := [ ] struct {
phrase [ ] [ ] string
paths [ ] phrasePath
} {
// simplest, one of two possible terms matches
{
phrase : [ ] [ ] string { [ ] string { "cat" , "rat" } , [ ] string { "dog" } } ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 1 } } ,
& phrasePart { "dog" , & search . Location { Pos : 2 } } ,
} ,
} ,
} ,
// two possible terms, neither work
{
phrase : [ ] [ ] string { [ ] string { "cat" , "rat" } , [ ] string { "chicken" } } ,
} ,
// two possible terms, one works, but out of position with next
{
phrase : [ ] [ ] string { [ ] string { "cat" , "rat" } , [ ] string { "frog" } } ,
} ,
// matches multiple times, with different pairing
{
phrase : [ ] [ ] string { [ ] string { "cat" , "dog" } , [ ] string { "dog" , "frog" } } ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 1 } } ,
& phrasePart { "dog" , & search . Location { Pos : 2 } } ,
} ,
phrasePath {
& phrasePart { "dog" , & search . Location { Pos : 2 } } ,
& phrasePart { "frog" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
// multi-match over a gap
{
phrase : [ ] [ ] string { [ ] string { "cat" , "rat" } , [ ] string { "" } , [ ] string { "frog" } } ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 1 } } ,
& phrasePart { "frog" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
// multi-match over a gap (same as before, but with empty term list)
{
phrase : [ ] [ ] string { [ ] string { "cat" , "rat" } , [ ] string { } , [ ] string { "frog" } } ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 1 } } ,
& phrasePart { "frog" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
// multi-match over a gap (same once again, but nil term list)
{
phrase : [ ] [ ] string { [ ] string { "cat" , "rat" } , nil , [ ] string { "frog" } } ,
paths : [ ] phrasePath {
phrasePath {
& phrasePart { "cat" , & search . Location { Pos : 1 } } ,
& phrasePart { "frog" , & search . Location { Pos : 3 } } ,
} ,
} ,
} ,
}
for i , test := range tests {
actualPaths := findPhrasePaths ( 0 , nil , test . phrase , tlm , nil , 0 )
if ! reflect . DeepEqual ( actualPaths , test . paths ) {
t . Fatalf ( "expected: %v got %v for test %d" , test . paths , actualPaths , i )
}
}
}