parent
316970df13
commit
67beaca6d6
2
query.go
2
query.go
|
@ -107,7 +107,7 @@ func ParseQuery(input []byte) (Query, error) {
|
|||
if rv.Boost() == 0 {
|
||||
rv.SetBoost(1)
|
||||
}
|
||||
for _, tq := range rv.Terms {
|
||||
for _, tq := range rv.TermQueries {
|
||||
if tq.Boost() == 0 {
|
||||
tq.SetBoost(1)
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ package bleve
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
@ -56,7 +57,6 @@ func (q *matchPhraseQuery) SetField(f string) Query {
|
|||
}
|
||||
|
||||
func (q *matchPhraseQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
|
||||
|
||||
field := q.FieldVal
|
||||
if q.FieldVal == "" {
|
||||
field = m.DefaultField
|
||||
|
@ -75,18 +75,40 @@ func (q *matchPhraseQuery) Searcher(i index.IndexReader, m *IndexMapping, explai
|
|||
|
||||
tokens := analyzer.Analyze([]byte(q.MatchPhrase))
|
||||
if len(tokens) > 0 {
|
||||
ts := make([]string, len(tokens))
|
||||
for i, token := range tokens {
|
||||
ts[i] = string(token.Term)
|
||||
}
|
||||
|
||||
phraseQuery := NewPhraseQuery(ts, field).SetBoost(q.BoostVal)
|
||||
phrase := tokenStreamToPhrase(tokens)
|
||||
phraseQuery := NewPhraseQuery(phrase, field).SetBoost(q.BoostVal)
|
||||
return phraseQuery.Searcher(i, m, explain)
|
||||
}
|
||||
noneQuery := NewMatchNoneQuery()
|
||||
return noneQuery.Searcher(i, m, explain)
|
||||
}
|
||||
|
||||
func tokenStreamToPhrase(tokens analysis.TokenStream) []string {
|
||||
firstPosition := int(^uint(0) >> 1)
|
||||
lastPosition := 0
|
||||
for _, token := range tokens {
|
||||
if token.Position < firstPosition {
|
||||
firstPosition = token.Position
|
||||
}
|
||||
if token.Position > lastPosition {
|
||||
lastPosition = token.Position
|
||||
}
|
||||
}
|
||||
phraseLen := lastPosition - firstPosition + 1
|
||||
if phraseLen > 0 {
|
||||
rv := make([]string, phraseLen)
|
||||
for i := 0; i < phraseLen; i++ {
|
||||
rv[i] = ""
|
||||
}
|
||||
for _, token := range tokens {
|
||||
pos := token.Position - firstPosition
|
||||
rv[pos] = string(token.Term)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *matchPhraseQuery) Validate() error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -19,8 +19,9 @@ import (
|
|||
)
|
||||
|
||||
type phraseQuery struct {
|
||||
Terms []Query `json:"terms"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
TermQueries []Query `json:"terms"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
terms []string
|
||||
}
|
||||
|
||||
// NewPhraseQuery creates a new Query for finding
|
||||
|
@ -29,13 +30,16 @@ type phraseQuery struct {
|
|||
// order, at the correct index offsets, in the
|
||||
// specified field.
|
||||
func NewPhraseQuery(terms []string, field string) *phraseQuery {
|
||||
termQueries := make([]Query, len(terms))
|
||||
for i, term := range terms {
|
||||
termQueries[i] = NewTermQuery(term).SetField(field)
|
||||
termQueries := make([]Query, 0)
|
||||
for _, term := range terms {
|
||||
if term != "" {
|
||||
termQueries = append(termQueries, NewTermQuery(term).SetField(field))
|
||||
}
|
||||
}
|
||||
return &phraseQuery{
|
||||
Terms: termQueries,
|
||||
BoostVal: 1.0,
|
||||
TermQueries: termQueries,
|
||||
BoostVal: 1.0,
|
||||
terms: terms,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -50,21 +54,16 @@ func (q *phraseQuery) SetBoost(b float64) Query {
|
|||
|
||||
func (q *phraseQuery) Searcher(i index.IndexReader, m *IndexMapping, explain bool) (search.Searcher, error) {
|
||||
|
||||
terms := make([]string, len(q.Terms))
|
||||
for i, term := range q.Terms {
|
||||
terms[i] = term.(*termQuery).Term
|
||||
}
|
||||
|
||||
conjunctionQuery := NewConjunctionQuery(q.Terms)
|
||||
conjunctionQuery := NewConjunctionQuery(q.TermQueries)
|
||||
conjunctionSearcher, err := conjunctionQuery.Searcher(i, m, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return searchers.NewPhraseSearcher(i, conjunctionSearcher.(*searchers.ConjunctionSearcher), terms)
|
||||
return searchers.NewPhraseSearcher(i, conjunctionSearcher.(*searchers.ConjunctionSearcher), q.terms)
|
||||
}
|
||||
|
||||
func (q *phraseQuery) Validate() error {
|
||||
if len(q.Terms) < 1 {
|
||||
if len(q.TermQueries) < 1 {
|
||||
return ErrorPhraseQueryNoTerms
|
||||
}
|
||||
return nil
|
||||
|
@ -79,17 +78,19 @@ func (q *phraseQuery) UnmarshalJSON(data []byte) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Terms = make([]Query, len(tmp.Terms))
|
||||
q.TermQueries = make([]Query, len(tmp.Terms))
|
||||
q.terms = make([]string, 0)
|
||||
for i, term := range tmp.Terms {
|
||||
query, err := ParseQuery(term)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
q.Terms[i] = query
|
||||
_, isTermQuery := query.(*termQuery)
|
||||
q.TermQueries[i] = query
|
||||
tq, isTermQuery := query.(*termQuery)
|
||||
if !isTermQuery {
|
||||
return fmt.Errorf("phrase query can only contain term queries")
|
||||
}
|
||||
q.terms = append(q.terms, tq.Term)
|
||||
}
|
||||
q.BoostVal = tmp.BoostVal
|
||||
if q.BoostVal == 0 {
|
||||
|
|
|
@ -27,7 +27,6 @@ type PhraseSearcher struct {
|
|||
}
|
||||
|
||||
func NewPhraseSearcher(indexReader index.IndexReader, mustSearcher *ConjunctionSearcher, terms []string) (*PhraseSearcher, error) {
|
||||
|
||||
// build our searcher
|
||||
rv := PhraseSearcher{
|
||||
indexReader: indexReader,
|
||||
|
@ -112,7 +111,7 @@ func (s *PhraseSearcher) Next() (*search.DocumentMatch, error) {
|
|||
for _, location := range locations {
|
||||
crvtlm := make(search.TermLocationMap, 0)
|
||||
INNER:
|
||||
for i := 0; i < len(s.mustSearcher.searchers); i++ {
|
||||
for i := 0; i < len(s.terms); i++ {
|
||||
nextTerm := s.terms[i]
|
||||
if nextTerm != "" {
|
||||
// look through all this terms locations
|
||||
|
|
|
@ -11,22 +11,43 @@ package test
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/blevesearch/bleve"
|
||||
)
|
||||
|
||||
var dataset = flag.String("dataset", "", "only test datasets matching this regex")
|
||||
var keepIndex = flag.Bool("keepIndex", false, "keep the index after testing")
|
||||
|
||||
func TestIntegration(t *testing.T) {
|
||||
|
||||
flag.Parse()
|
||||
|
||||
var err error
|
||||
var datasetRegexp *regexp.Regexp
|
||||
if *dataset != "" {
|
||||
datasetRegexp, err = regexp.Compile(*dataset)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
fis, err := ioutil.ReadDir("tests")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, fi := range fis {
|
||||
if datasetRegexp != nil {
|
||||
if !datasetRegexp.MatchString(fi.Name()) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if fi.IsDir() {
|
||||
t.Logf("Running test: %s", fi.Name())
|
||||
runTestDir(t, "tests"+string(filepath.Separator)+fi.Name())
|
||||
|
@ -49,7 +70,9 @@ func runTestDir(t *testing.T, dir string) {
|
|||
}
|
||||
|
||||
// open new index
|
||||
defer os.RemoveAll("test.bleve")
|
||||
if !*keepIndex {
|
||||
defer os.RemoveAll("test.bleve")
|
||||
}
|
||||
index, err := bleve.New("test.bleve", &mapping)
|
||||
if err != nil {
|
||||
t.Errorf("error creating new index: %v", err)
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"body": "Twenty Thousand Leagues Under The Sea"
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"types": {
|
||||
"book": {
|
||||
"properties": {
|
||||
"body": {
|
||||
"fields": [
|
||||
{
|
||||
"include_term_vectors": true,
|
||||
"include_in_all": true,
|
||||
"index": true,
|
||||
"store": true,
|
||||
"analyzer": "en",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"dynamic": true,
|
||||
"enabled": true
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"default_type": "book"
|
||||
}
|
|
@ -0,0 +1,326 @@
|
|||
[
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Twenty"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Twenty Thousand"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Twenty Thousand Leagues"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Twenty Thousand Leagues Under"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Twenty Thousand Leagues Under the"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Twenty Thousand Leagues Under the Sea"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Thousand"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Thousand Leagues"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Thousand Leagues Under"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Thousand Leagues Under the"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Thousand Leagues Under the Sea"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Leagues"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Leagues Under"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Leagues Under the"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Leagues Under the Sea"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Under the Sea"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "the Sea"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "body",
|
||||
"match_phrase": "Sea"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "a"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue