// Copyright (c) 2016 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software distributed under the // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. package query import ( "bufio" "io" "strings" "unicode" ) const reservedChars = "+-=&|>', '<', '=': l.buf += string(next) return singleCharOpState, true case '^': return inBoostState, true case '~': return inTildeState, true } switch { case !l.inEscape && next == '\\': l.inEscape = true return startState, true case unicode.IsDigit(next): l.buf += string(next) return inNumOrStrState, true case !unicode.IsSpace(next): l.buf += string(next) return inStrState, true } // doesnt look like anything, just eat it and stay here l.reset() return startState, true } func inPhraseState(l *queryStringLex, next rune, eof bool) (lexState, bool) { // unterminated phrase eats the phrase if eof { l.Error("unterminated quote") return nil, false } // only a non-escaped " ends the phrase if !l.inEscape && next == '"' { // end phrase l.nextTokenType = tPHRASE l.nextToken = &yySymType{ s: l.buf, } logDebugTokens("PHRASE - '%s'", l.nextToken.s) l.reset() return startState, true } else if !l.inEscape && next == '\\' { l.inEscape = true } else if l.inEscape { // if in escape, end it l.inEscape = false l.buf += unescape(string(next)) } else { l.buf += string(next) } return inPhraseState, true } func singleCharOpState(l *queryStringLex, next rune, eof bool) (lexState, bool) { l.nextToken = &yySymType{} switch l.buf { case "+": l.nextTokenType = tPLUS logDebugTokens("PLUS") case "-": l.nextTokenType = tMINUS logDebugTokens("MINUS") case ":": l.nextTokenType = tCOLON logDebugTokens("COLON") case ">": l.nextTokenType = tGREATER logDebugTokens("GREATER") case "<": l.nextTokenType = tLESS logDebugTokens("LESS") case "=": l.nextTokenType = tEQUAL logDebugTokens("EQUAL") } l.reset() return startState, false } func inBoostState(l *queryStringLex, next rune, eof bool) (lexState, bool) { // only a non-escaped space ends the boost (or eof) if eof || (!l.inEscape && next == ' ') { // end boost l.nextTokenType = tBOOST if l.buf == "" { l.buf = "1" } l.nextToken = &yySymType{ s: l.buf, } logDebugTokens("BOOST - '%s'", l.nextToken.s) l.reset() return startState, true } else if !l.inEscape && next == '\\' { l.inEscape = true } else if l.inEscape { // if in escape, end it l.inEscape = false l.buf += unescape(string(next)) } else { l.buf += string(next) } return inBoostState, true } func inTildeState(l *queryStringLex, next rune, eof bool) (lexState, bool) { // only a non-escaped space ends the tilde (or eof) if eof || (!l.inEscape && next == ' ') { // end tilde l.nextTokenType = tTILDE if l.buf == "" { l.buf = "1" } l.nextToken = &yySymType{ s: l.buf, } logDebugTokens("TILDE - '%s'", l.nextToken.s) l.reset() return startState, true } else if !l.inEscape && next == '\\' { l.inEscape = true } else if l.inEscape { // if in escape, end it l.inEscape = false l.buf += unescape(string(next)) } else { l.buf += string(next) } return inTildeState, true } func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) { // only a non-escaped space ends the tilde (or eof) if eof || (!l.inEscape && next == ' ') { // end number l.nextTokenType = tNUMBER l.nextToken = &yySymType{ s: l.buf, } logDebugTokens("NUMBER - '%s'", l.nextToken.s) l.reset() return startState, true } else if !l.inEscape && next == '\\' { l.inEscape = true return inNumOrStrState, true } else if l.inEscape { // if in escape, end it l.inEscape = false l.buf += unescape(string(next)) // go directly to string, no successfully or unsuccessfully // escaped string results in a valid number return inStrState, true } // see where to go if !l.seenDot && next == '.' { // stay in this state l.buf += string(next) return inNumOrStrState, true } else if unicode.IsDigit(next) { l.buf += string(next) return inNumOrStrState, true } // doesn't look like an number, transition l.buf += string(next) return inStrState, true } func inStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) { // end on non-escped space, colon, tilde, boost (or eof) if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) { // end string l.nextTokenType = tSTRING l.nextToken = &yySymType{ s: l.buf, } logDebugTokens("STRING - '%s'", l.nextToken.s) l.reset() consumed := true if !eof && (next == ':' || next == '^' || next == '~') { consumed = false } return startState, consumed } else if !l.inEscape && next == '\\' { l.inEscape = true } else if l.inEscape { // if in escape, end it l.inEscape = false l.buf += unescape(string(next)) } else { l.buf += string(next) } return inStrState, true } func logDebugTokens(format string, v ...interface{}) { if debugLexer { logger.Printf(format, v...) } }