0
0
bleve/search/query/query_string_parser_test.go
Marty Schoch a265218f76 heavier refactor of Query interface to simplify
Boostable, Fieldable, Validatable broken out into separate
interfaces.  This allows them to be discoverable when
needed, but ignorable otherwise.  The top-level bleve package
only every cares about Validatable and even that is optional.

Also, this change goes further to make the structure names
more reasonable, for cases where you're directly interacting
with the structures.
2016-09-29 14:54:16 -04:00

754 lines
15 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package query
import (
"reflect"
"strings"
"testing"
"github.com/blevesearch/bleve/mapping"
)
func TestQuerySyntaxParserValid(t *testing.T) {
fivePointOh := 5.0
theTruth := true
theFalsehood := false
theDate := "2006-01-02T15:04:05Z07:00"
tests := []struct {
input string
result Query
mapping mapping.IndexMapping
}{
{
input: "test",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("test"),
},
nil),
},
{
input: `"test phrase 1"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchPhraseQuery("test phrase 1"),
},
nil),
},
{
input: "field:test",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("test")
q.SetField("field")
return q
}(),
},
nil),
},
// - is allowed inside a term, just not the start
{
input: "field:t-est",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("t-est")
q.SetField("field")
return q
}(),
},
nil),
},
// + is allowed inside a term, just not the start
{
input: "field:t+est",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("t+est")
q.SetField("field")
return q
}(),
},
nil),
},
// > is allowed inside a term, just not the start
{
input: "field:t>est",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("t>est")
q.SetField("field")
return q
}(),
},
nil),
},
// < is allowed inside a term, just not the start
{
input: "field:t<est",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("t<est")
q.SetField("field")
return q
}(),
},
nil),
},
// = is allowed inside a term, just not the start
{
input: "field:t=est",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("t=est")
q.SetField("field")
return q
}(),
},
nil),
},
{
input: "+field1:test1",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
[]Query{
func() Query {
q := NewMatchQuery("test1")
q.SetField("field1")
return q
}(),
},
nil,
nil),
},
{
input: "-field2:test2",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
nil,
[]Query{
func() Query {
q := NewMatchQuery("test2")
q.SetField("field2")
return q
}(),
}),
},
{
input: `field3:"test phrase 2"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchPhraseQuery("test phrase 2")
q.SetField("field3")
return q
}(),
},
nil),
},
{
input: `+field4:"test phrase 1"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
[]Query{
func() Query {
q := NewMatchPhraseQuery("test phrase 1")
q.SetField("field4")
return q
}(),
},
nil,
nil),
},
{
input: `-field5:"test phrase 2"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
nil,
[]Query{
func() Query {
q := NewMatchPhraseQuery("test phrase 2")
q.SetField("field5")
return q
}(),
}),
},
{
input: `+field6:test3 -field7:test4 field8:test5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
[]Query{
func() Query {
q := NewMatchQuery("test3")
q.SetField("field6")
return q
}(),
},
[]Query{
func() Query {
q := NewMatchQuery("test5")
q.SetField("field8")
return q
}(),
},
[]Query{
func() Query {
q := NewMatchQuery("test4")
q.SetField("field7")
return q
}(),
}),
},
{
input: "test^3",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("test")
q.SetBoost(3.0)
return q
}(),
},
nil),
},
{
input: "test^3 other^6",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("test")
q.SetBoost(3.0)
return q
}(),
func() Query {
q := NewMatchQuery("other")
q.SetBoost(6.0)
return q
}(),
},
nil),
},
{
input: "33",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("33"),
},
nil),
},
{
input: "field:33",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("33")
q.SetField("field")
return q
}(),
},
nil),
},
{
input: "cat-dog",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("cat-dog"),
},
nil),
},
{
input: "watex~",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("watex")
q.SetFuzziness(1)
return q
}(),
},
nil),
},
{
input: "watex~2",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("watex")
q.SetFuzziness(2)
return q
}(),
},
nil),
},
{
input: "watex~ 2",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("watex")
q.SetFuzziness(1)
return q
}(),
NewMatchQuery("2"),
},
nil),
},
{
input: "field:watex~",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("watex")
q.SetFuzziness(1)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: "field:watex~2",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("watex")
q.SetFuzziness(2)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:555c3bb06f7a127cda000005`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("555c3bb06f7a127cda000005")
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:>5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(&fivePointOh, nil, &theFalsehood, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:>=5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(&fivePointOh, nil, &theTruth, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(nil, &fivePointOh, nil, &theFalsehood)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<=5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(nil, &fivePointOh, nil, &theTruth)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:>"2006-01-02T15:04:05Z07:00"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewDateRangeInclusiveQuery(&theDate, nil, &theFalsehood, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:>="2006-01-02T15:04:05Z07:00"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewDateRangeInclusiveQuery(&theDate, nil, &theTruth, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<"2006-01-02T15:04:05Z07:00"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewDateRangeInclusiveQuery(nil, &theDate, nil, &theFalsehood)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<="2006-01-02T15:04:05Z07:00"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewDateRangeInclusiveQuery(nil, &theDate, nil, &theTruth)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `/mar.*ty/`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewRegexpQuery("mar.*ty"),
},
nil),
},
{
input: `name:/mar.*ty/`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewRegexpQuery("mar.*ty")
q.SetField("name")
return q
}(),
},
nil),
},
{
input: `mart*`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewWildcardQuery("mart*"),
},
nil),
},
{
input: `name:mart*`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewWildcardQuery("mart*")
q.SetField("name")
return q
}(),
},
nil),
},
// tests for escaping
// escape : as field delimeter
{
input: `name\:marty`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("name:marty"),
},
nil),
},
// first colon delimiter, second escaped
{
input: `name:marty\:couchbase`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery("marty:couchbase")
q.SetField("name")
return q
}(),
},
nil),
},
// escape space, single arguemnt to match query
{
input: `marty\ couchbase`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("marty couchbase"),
},
nil),
},
// escape leading plus, not a must clause
{
input: `\+marty`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("+marty"),
},
nil),
},
// escape leading minus, not a must not clause
{
input: `\-marty`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("-marty"),
},
nil),
},
// escape quote inside of phrase
{
input: `"what does \"quote\" mean"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchPhraseQuery(`what does "quote" mean`),
},
nil),
},
// escaping an unsupported character retains backslash
{
input: `can\ i\ escap\e`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`can i escap\e`),
},
nil),
},
// leading spaces
{
input: ` what`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`what`),
},
nil),
},
// no boost value defaults to 1
{
input: `term^`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
func() Query {
q := NewMatchQuery(`term`)
q.SetBoost(1.0)
return q
}(),
},
nil),
},
// weird lexer cases, something that starts like a number
// but contains escape and ends up as string
{
input: `3.0\:`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`3.0:`),
},
nil),
},
{
input: `3.0\a`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`3.0\a`),
},
nil),
},
}
// turn on lexer debugging
// debugLexer = true
// debugParser = true
// logger = log.New(os.Stderr, "bleve ", log.LstdFlags)
for _, test := range tests {
q, err := parseQuerySyntax(test.input)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(q, test.result) {
t.Errorf("Expected %#v, got %#v: for %s", test.result, q, test.input)
t.Errorf("Expected %#v, got %#v: for %s", test.result.(*BooleanQuery).Should.(*DisjunctionQuery).Disjuncts[0], q.(*BooleanQuery).Should.(*DisjunctionQuery).Disjuncts[0], test.input)
}
}
}
func TestQuerySyntaxParserInvalid(t *testing.T) {
tests := []struct {
input string
}{
{"^"},
{"^5"},
{"field:-text"},
{"field:+text"},
{"field:>text"},
{"field:>=text"},
{"field:<text"},
{"field:<=text"},
{"field:~text"},
{"field:^text"},
{"field::text"},
{`"this is the time`},
{`cat^3\:`},
{`cat^3\0`},
{`cat~3\:`},
{`cat~3\0`},
}
// turn on lexer debugging
// debugLexer = true
// logger = log.New(os.Stderr, "bleve", log.LstdFlags)
for _, test := range tests {
_, err := parseQuerySyntax(test.input)
if err == nil {
t.Errorf("expected error, got nil for `%s`", test.input)
}
}
}
func BenchmarkLexer(b *testing.B) {
for n := 0; n < b.N; n++ {
var tokenTypes []int
var tokens []yySymType
r := strings.NewReader(`+field4:"test phrase 1"`)
l := newQueryStringLex(r)
var lval yySymType
rv := l.Lex(&lval)
for rv > 0 {
tokenTypes = append(tokenTypes, rv)
tokens = append(tokens, lval)
lval.s = ""
lval.n = 0
rv = l.Lex(&lval)
}
}
}