0
0

replaced nex lexer with custom lexer

this improvement was started to improve code coverage
but also improves performance and adds support for escaping

escaping:

The following quoted string enumerates the characters which
may be escaped.

"+-=&|><!(){}[]^\"~*?:\\/ "

Note that this list includes space.

In order to escape these characters, they are prefixed with the \
(backslash) character.  In all cases, using the escaped version
produces the character itself and is not interpretted by the
lexer.

Two simple examples:

my\ name

Will be interpretted as a single argument to a match query
with the value "my name".

"contains a\" character"

Will be interpretted as a single argument to a phrase query
with the value `contains a " character`.

Performance:

before$ go test -v -run=xxx -bench=BenchmarkLexer
BenchmarkLexer-4   	  100000	     13991 ns/op
PASS
ok  	github.com/blevesearch/bleve	1.570s

after$ go test -v -run=xxx -bench=BenchmarkLexer
BenchmarkLexer-4   	  500000	      3387 ns/op
PASS
ok  	github.com/blevesearch/bleve	1.740s
This commit is contained in:
Marty Schoch 2016-09-01 13:16:07 -04:00
parent 46f70bfa12
commit 5023993895
7 changed files with 531 additions and 1649 deletions

View File

@ -1,54 +0,0 @@
/\"((\\\")|(\\\\)|(\\\/)|(\\b)|(\\f)|(\\n)|(\\r)|(\\t)|(\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])|[^\"])*\"/ {
lval.s = yylex.Text()[1:len(yylex.Text())-1]
logDebugTokens("PHRASE - %s", lval.s);
return tPHRASE
}
/\+/ { logDebugTokens("PLUS"); return tPLUS }
/-/ { logDebugTokens("MINUS"); return tMINUS }
/:/ { logDebugTokens("COLON"); return tCOLON }
/\(/ { logDebugTokens("LPAREN"); return tLPAREN }
/\)/ { logDebugTokens("RPAREN"); return tRPAREN }
/>/ { logDebugTokens("GREATER"); return tGREATER }
/</ { logDebugTokens("LESS"); return tLESS }
/=/ { logDebugTokens("EQUAL"); return tEQUAL }
/\^([0-9]|[1-9][0-9]*)(\.[0-9][0-9]*)?/
{
lval.s = yylex.Text()[1:]
logDebugTokens("BOOST");
return tBOOST }
/\^/ {
lval.s = "1"
logDebugTokens("BOOST");
return tBOOST
}
/~([0-9]|[1-9][0-9]*)/
{
lval.s = yylex.Text()[1:]
logDebugTokens("TILDENUMBER - %s", lval.s);
return tTILDE
}
/~/ {
lval.s = "1"
logDebugTokens("TILDE");
return tTILDE
}
/-?([0-9]|[1-9][0-9]*)(\.[0-9][0-9]*)?/
{
lval.s = yylex.Text()
logDebugTokens("NUMBER - %s", lval.s);
return tNUMBER
}
/[ \t\n]+/ { logDebugTokens("WHITESPACE (count=%d)", len(yylex.Text())) /* eat up whitespace */ }
/[^\t\n\f\r :^\+\*\?><=~-][^\t\n\f\r :^~]*/ {
lval.s = yylex.Text()
logDebugTokens("STRING - %s", lval.s);
return tSTRING
}
//
package bleve
func logDebugTokens(format string, v ...interface{}) {
if debugLexer {
logger.Printf(format, v...)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
%{
package bleve
import (
"fmt"
"strconv"
"strings"
)
@ -18,7 +19,7 @@ n int
f float64
q Query}
%token tSTRING tPHRASE tPLUS tMINUS tCOLON tBOOST tLPAREN tRPAREN tNUMBER tSTRING tGREATER tLESS
%token tSTRING tPHRASE tPLUS tMINUS tCOLON tBOOST tNUMBER tSTRING tGREATER tLESS
tEQUAL tTILDE
%type <s> tSTRING
@ -93,7 +94,10 @@ tSTRING {
|
tSTRING tTILDE {
str := $1
fuzziness, _ := strconv.ParseFloat($2, 64)
fuzziness, err := strconv.ParseFloat($2, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FUZZY STRING - %s %f", str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
@ -103,7 +107,10 @@ tSTRING tTILDE {
tSTRING tCOLON tSTRING tTILDE {
field := $1
str := $3
fuzziness, _ := strconv.ParseFloat($4, 64)
fuzziness, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FIELD - %s FUZZY STRING - %s %f", field, str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
@ -239,7 +246,10 @@ searchSuffix:
}
|
tBOOST {
boost, _ := strconv.ParseFloat($1, 64)
boost, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid boost value: %v", err))
}
$$ = boost
logDebugGrammar("BOOST %f", boost)
};

View File

@ -4,6 +4,7 @@ import __yyfmt__ "fmt"
//line query_string.y:2
import (
"fmt"
"strconv"
"strings"
)
@ -14,7 +15,7 @@ func logDebugGrammar(format string, v ...interface{}) {
}
}
//line query_string.y:15
//line query_string.y:16
type yySymType struct {
yys int
s string
@ -29,13 +30,11 @@ const tPLUS = 57348
const tMINUS = 57349
const tCOLON = 57350
const tBOOST = 57351
const tLPAREN = 57352
const tRPAREN = 57353
const tNUMBER = 57354
const tGREATER = 57355
const tLESS = 57356
const tEQUAL = 57357
const tTILDE = 57358
const tNUMBER = 57352
const tGREATER = 57353
const tLESS = 57354
const tEQUAL = 57355
const tTILDE = 57356
var yyToknames = [...]string{
"$end",
@ -47,8 +46,6 @@ var yyToknames = [...]string{
"tMINUS",
"tCOLON",
"tBOOST",
"tLPAREN",
"tRPAREN",
"tNUMBER",
"tGREATER",
"tLESS",
@ -77,25 +74,25 @@ const yyPrivate = 57344
var yyTokenNames []string
var yyStates []string
const yyLast = 32
const yyLast = 31
var yyAct = [...]int{
16, 18, 21, 13, 27, 24, 3, 1, 17, 19,
20, 25, 22, 15, 26, 23, 9, 11, 31, 29,
4, 14, 5, 6, 10, 30, 28, 2, 12, 8,
0, 7,
16, 18, 21, 13, 27, 24, 17, 19, 20, 25,
22, 15, 26, 23, 9, 11, 31, 14, 29, 3,
10, 30, 2, 28, 5, 6, 7, 1, 4, 12,
8,
}
var yyPact = [...]int{
16, -1000, -1000, 16, 12, -1000, -1000, -1000, -6, 5,
-1000, -1000, -1000, -1000, -1000, -4, -14, -1000, -1000, 0,
-1, -1000, -1000, 14, -1000, -1000, 13, -1000, -1000, -1000,
18, -1000, -1000, 18, 10, -1000, -1000, -1000, -6, 3,
-1000, -1000, -1000, -1000, -1000, -4, -12, -1000, -1000, 0,
-1, -1000, -1000, 13, -1000, -1000, 11, -1000, -1000, -1000,
-1000, -1000,
}
var yyPgo = [...]int{
0, 29, 28, 20, 7, 27, 6,
0, 30, 29, 28, 27, 22, 19,
}
var yyR1 = [...]int{
@ -112,9 +109,9 @@ var yyR2 = [...]int{
var yyChk = [...]int{
-1000, -4, -5, -6, -3, 6, 7, -5, -1, 4,
12, 5, -2, 9, 16, 8, 4, 12, 5, 13,
14, 16, 12, 15, 5, 12, 15, 5, 12, 5,
12, 5,
10, 5, -2, 9, 14, 8, 4, 10, 5, 11,
12, 14, 10, 13, 5, 10, 13, 5, 10, 5,
10, 5,
}
var yyDef = [...]int{
@ -130,7 +127,7 @@ var yyTok1 = [...]int{
var yyTok2 = [...]int{
2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16,
12, 13, 14,
}
var yyTok3 = [...]int{
0,
@ -475,25 +472,25 @@ yydefault:
case 1:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:36
//line query_string.y:37
{
logDebugGrammar("INPUT")
}
case 2:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:41
//line query_string.y:42
{
logDebugGrammar("SEARCH PARTS")
}
case 3:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:45
//line query_string.y:46
{
logDebugGrammar("SEARCH PART")
}
case 4:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:50
//line query_string.y:51
{
query := yyDollar[2].q
query.SetBoost(yyDollar[3].f)
@ -508,27 +505,27 @@ yydefault:
}
case 5:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:65
//line query_string.y:66
{
yyVAL.n = queryShould
}
case 6:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:69
//line query_string.y:70
{
logDebugGrammar("PLUS")
yyVAL.n = queryMust
}
case 7:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:74
//line query_string.y:75
{
logDebugGrammar("MINUS")
yyVAL.n = queryMustNot
}
case 8:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:80
//line query_string.y:81
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
@ -544,10 +541,13 @@ yydefault:
}
case 9:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:94
//line query_string.y:95
{
str := yyDollar[1].s
fuzziness, _ := strconv.ParseFloat(yyDollar[2].s, 64)
fuzziness, err := strconv.ParseFloat(yyDollar[2].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FUZZY STRING - %s %f", str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
@ -555,11 +555,14 @@ yydefault:
}
case 10:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:103
//line query_string.y:107
{
field := yyDollar[1].s
str := yyDollar[3].s
fuzziness, _ := strconv.ParseFloat(yyDollar[4].s, 64)
fuzziness, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FIELD - %s FUZZY STRING - %s %f", field, str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
@ -568,7 +571,7 @@ yydefault:
}
case 11:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:114
//line query_string.y:121
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
@ -577,7 +580,7 @@ yydefault:
}
case 12:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:121
//line query_string.y:128
{
phrase := yyDollar[1].s
logDebugGrammar("PHRASE - %s", phrase)
@ -586,7 +589,7 @@ yydefault:
}
case 13:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:128
//line query_string.y:135
{
field := yyDollar[1].s
str := yyDollar[3].s
@ -604,7 +607,7 @@ yydefault:
}
case 14:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:144
//line query_string.y:151
{
field := yyDollar[1].s
str := yyDollar[3].s
@ -614,7 +617,7 @@ yydefault:
}
case 15:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:152
//line query_string.y:159
{
field := yyDollar[1].s
phrase := yyDollar[3].s
@ -624,7 +627,7 @@ yydefault:
}
case 16:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:160
//line query_string.y:167
{
field := yyDollar[1].s
min, _ := strconv.ParseFloat(yyDollar[4].s, 64)
@ -635,7 +638,7 @@ yydefault:
}
case 17:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:169
//line query_string.y:176
{
field := yyDollar[1].s
min, _ := strconv.ParseFloat(yyDollar[5].s, 64)
@ -646,7 +649,7 @@ yydefault:
}
case 18:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:178
//line query_string.y:185
{
field := yyDollar[1].s
max, _ := strconv.ParseFloat(yyDollar[4].s, 64)
@ -657,7 +660,7 @@ yydefault:
}
case 19:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:187
//line query_string.y:194
{
field := yyDollar[1].s
max, _ := strconv.ParseFloat(yyDollar[5].s, 64)
@ -668,7 +671,7 @@ yydefault:
}
case 20:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:196
//line query_string.y:203
{
field := yyDollar[1].s
minInclusive := false
@ -680,7 +683,7 @@ yydefault:
}
case 21:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:206
//line query_string.y:213
{
field := yyDollar[1].s
minInclusive := true
@ -692,7 +695,7 @@ yydefault:
}
case 22:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:216
//line query_string.y:223
{
field := yyDollar[1].s
maxInclusive := false
@ -704,7 +707,7 @@ yydefault:
}
case 23:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:226
//line query_string.y:233
{
field := yyDollar[1].s
maxInclusive := true
@ -716,15 +719,18 @@ yydefault:
}
case 24:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:237
//line query_string.y:244
{
yyVAL.f = 1.0
}
case 25:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:241
//line query_string.y:248
{
boost, _ := strconv.ParseFloat(yyDollar[1].s, 64)
boost, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid boost value: %v", err))
}
yyVAL.f = boost
logDebugGrammar("BOOST %f", boost)
}

317
query_string_lex.go Normal file
View File

@ -0,0 +1,317 @@
// Copyright (c) 2016 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"bufio"
"io"
"strings"
"unicode"
)
const reservedChars = "+-=&|><!(){}[]^\"~*?:\\/ "
func unescape(escaped string) string {
// see if this character can be escaped
if strings.ContainsAny(escaped, reservedChars) {
return escaped
}
// otherwise return it with the \ intact
return "\\" + escaped
}
type queryStringLex struct {
in *bufio.Reader
buf string
currState lexState
currConsumed bool
inEscape bool
nextToken *yySymType
nextTokenType int
seenDot bool
nextRune rune
nextRuneSize int
atEOF bool
}
func (l *queryStringLex) reset() {
l.buf = ""
l.inEscape = false
l.seenDot = false
}
func (l *queryStringLex) Error(msg string) {
panic(msg)
}
func (l *queryStringLex) Lex(lval *yySymType) int {
var err error
for l.nextToken == nil {
if l.currConsumed {
l.nextRune, l.nextRuneSize, err = l.in.ReadRune()
if err != nil && err == io.EOF {
l.nextRune = 0
l.atEOF = true
} else if err != nil {
return 0
}
}
l.currState, l.currConsumed = l.currState(l, l.nextRune, l.atEOF)
if l.currState == nil {
return 0
}
}
*lval = *l.nextToken
rv := l.nextTokenType
l.nextToken = nil
l.nextTokenType = 0
return rv
}
func newQueryStringLex(in io.Reader) *queryStringLex {
return &queryStringLex{
in: bufio.NewReader(in),
currState: startState,
currConsumed: true,
}
}
type lexState func(l *queryStringLex, next rune, eof bool) (lexState, bool)
func startState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
if eof {
return nil, false
}
// handle inside escape case up front
if l.inEscape {
l.inEscape = false
l.buf += unescape(string(next))
return inStrState, true
}
switch next {
case '"':
return inPhraseState, true
case '+', '-', ':', '>', '<', '=':
l.buf += string(next)
return singleCharOpState, true
case '^':
return inBoostState, true
case '~':
return inTildeState, true
}
switch {
case !l.inEscape && next == '\\':
l.inEscape = true
return startState, true
case unicode.IsDigit(next):
l.buf += string(next)
return inNumOrStrState, true
case !unicode.IsSpace(next):
l.buf += string(next)
return inStrState, true
}
// doesnt look like anything, just eat it and stay here
l.reset()
return startState, true
}
func inPhraseState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// unterminated phrase eats the phrase
if eof {
l.Error("unterminated quote")
return nil, false
}
// only a non-escaped " ends the phrase
if !l.inEscape && next == '"' {
// end phrase
l.nextTokenType = tPHRASE
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("PHRASE - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inPhraseState, true
}
func singleCharOpState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
l.nextToken = &yySymType{}
switch l.buf {
case "+":
l.nextTokenType = tPLUS
logDebugTokens("PLUS")
case "-":
l.nextTokenType = tMINUS
logDebugTokens("MINUS")
case ":":
l.nextTokenType = tCOLON
logDebugTokens("COLON")
case ">":
l.nextTokenType = tGREATER
logDebugTokens("GREATER")
case "<":
l.nextTokenType = tLESS
logDebugTokens("LESS")
case "=":
l.nextTokenType = tEQUAL
logDebugTokens("EQUAL")
}
l.reset()
return startState, false
}
func inBoostState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the boost (or eof)
if eof || (!l.inEscape && next == ' ') {
// end boost
l.nextTokenType = tBOOST
if l.buf == "" {
l.buf = "1"
}
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("BOOST - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inBoostState, true
}
func inTildeState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the tilde (or eof)
if eof || (!l.inEscape && next == ' ') {
// end tilde
l.nextTokenType = tTILDE
if l.buf == "" {
l.buf = "1"
}
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("TILDE - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inTildeState, true
}
func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the tilde (or eof)
if eof || (!l.inEscape && next == ' ') {
// end number
l.nextTokenType = tNUMBER
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("NUMBER - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
return inNumOrStrState, true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
// go directly to string, no successfully or unsuccessfully
// escaped string results in a valid number
return inStrState, true
}
// see where to go
if !l.seenDot && next == '.' {
// stay in this state
l.buf += string(next)
return inNumOrStrState, true
} else if unicode.IsDigit(next) {
l.buf += string(next)
return inNumOrStrState, true
}
// doesn't look like an number, transition
l.buf += string(next)
return inStrState, true
}
func inStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// end on non-escped space, colon, tilde, boost (or eof)
if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
// end string
l.nextTokenType = tSTRING
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("STRING - '%s'", l.nextToken.s)
l.reset()
consumed := true
if !eof && (next == ':' || next == '^' || next == '~') {
consumed = false
}
return startState, consumed
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inStrState, true
}
func logDebugTokens(format string, v ...interface{}) {
if debugLexer {
logger.Printf(format, v...)
}
}

View File

@ -7,11 +7,6 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
//go:generate nex query_string.nex
//go:generate sed -i "" -e s/Lexer/lexer/g query_string.nn.go
//go:generate sed -i "" -e s/Newlexer/newLexer/g query_string.nn.go
//go:generate sed -i "" -e s/debuglexer/debugLexer/g query_string.nn.go
//go:generate go fmt query_string.nn.go
//go:generate go tool yacc -o query_string.y.go query_string.y
//go:generate sed -i "" -e 1d query_string.y.go
@ -26,21 +21,20 @@ var debugParser bool
var debugLexer bool
func parseQuerySyntax(query string) (rq Query, err error) {
lex := newLexerWrapper(newLexer(strings.NewReader(query)))
lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query)))
doParse(lex)
if len(lex.errs) > 0 {
return nil, fmt.Errorf(strings.Join(lex.errs, "\n"))
} else {
return lex.query, nil
}
return lex.query, nil
}
func doParse(lex *lexerWrapper) {
defer func() {
r := recover()
if r != nil {
lex.Error("Errors while parsing.")
lex.errs = append(lex.errs, fmt.Sprintf("parse error: %v", r))
}
}()
@ -54,23 +48,22 @@ const (
)
type lexerWrapper struct {
nex yyLexer
lex yyLexer
errs []string
query *booleanQuery
}
func newLexerWrapper(nex yyLexer) *lexerWrapper {
func newLexerWrapper(lex yyLexer) *lexerWrapper {
return &lexerWrapper{
nex: nex,
errs: []string{},
lex: lex,
query: NewBooleanQuery(nil, nil, nil),
}
}
func (this *lexerWrapper) Lex(lval *yySymType) int {
return this.nex.Lex(lval)
func (l *lexerWrapper) Lex(lval *yySymType) int {
return l.lex.Lex(lval)
}
func (this *lexerWrapper) Error(s string) {
this.errs = append(this.errs, s)
func (l *lexerWrapper) Error(s string) {
l.errs = append(l.errs, s)
}

View File

@ -406,17 +406,142 @@ func TestQuerySyntaxParserValid(t *testing.T) {
},
nil),
},
// tests for escaping
// escape : as field delimeter
{
input: `name\:marty`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("name:marty"),
},
nil),
},
// first colon delimiter, second escaped
{
input: `name:marty\:couchbase`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("marty:couchbase").SetField("name"),
},
nil),
},
// escape space, single arguemnt to match query
{
input: `marty\ couchbase`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("marty couchbase"),
},
nil),
},
// escape leading plus, not a must clause
{
input: `\+marty`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("+marty"),
},
nil),
},
// escape leading minus, not a must not clause
{
input: `\-marty`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery("-marty"),
},
nil),
},
// escape quote inside of phrase
{
input: `"what does \"quote\" mean"`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchPhraseQuery(`what does "quote" mean`),
},
nil),
},
// escaping an unsupported character retains backslash
{
input: `can\ i\ escap\e`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`can i escap\e`),
},
nil),
},
// leading spaces
{
input: ` what`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`what`),
},
nil),
},
// no boost value defaults to 1
{
input: `term^`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`term`),
},
nil),
},
// weird lexer cases, something that starts like a number
// but contains escape and ends up as string
{
input: `3.0\:`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`3.0:`),
},
nil),
},
{
input: `3.0\a`,
mapping: NewIndexMapping(),
result: NewBooleanQuery(
nil,
[]Query{
NewMatchQuery(`3.0\a`),
},
nil),
},
}
// turn on lexer debugging
// debugLexer = true
// logger = log.New(os.Stderr, "bleve", log.LstdFlags)
// debugParser = true
// logger = log.New(os.Stderr, "bleve ", log.LstdFlags)
for _, test := range tests {
q, err := parseQuerySyntax(test.input)
if err != nil {
t.Error(err)
t.Fatal(err)
}
if !reflect.DeepEqual(q, test.result) {
t.Errorf("Expected %#v, got %#v: for %s", test.result, q, test.input)
@ -440,6 +565,11 @@ func TestQuerySyntaxParserInvalid(t *testing.T) {
{"field:~text"},
{"field:^text"},
{"field::text"},
{`"this is the time`},
{`cat^3\:`},
{`cat^3\0`},
{`cat~3\:`},
{`cat~3\0`},
}
// turn on lexer debugging
@ -460,7 +590,7 @@ func BenchmarkLexer(b *testing.B) {
var tokenTypes []int
var tokens []yySymType
r := strings.NewReader(`+field4:"test phrase 1"`)
l := newLexer(r)
l := newQueryStringLex(r)
var lval yySymType
rv := l.Lex(&lval)
for rv > 0 {