diff options
Diffstat (limited to 'vendor/github.com/BurntSushi/toml/lex.go')
-rw-r--r-- | vendor/github.com/BurntSushi/toml/lex.go | 122 |
1 files changed, 85 insertions, 37 deletions
diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go index d4d7087..a1016d9 100644 --- a/vendor/github.com/BurntSushi/toml/lex.go +++ b/vendor/github.com/BurntSushi/toml/lex.go @@ -17,6 +17,7 @@ const ( itemEOF itemText itemString + itemStringEsc itemRawString itemMultilineString itemRawMultilineString @@ -46,12 +47,14 @@ func (p Position) String() string { } type lexer struct { - input string - start int - pos int - line int - state stateFn - items chan item + input string + start int + pos int + line int + state stateFn + items chan item + tomlNext bool + esc bool // Allow for backing up up to 4 runes. This is necessary because TOML // contains 3-rune tokens (""" and '''). @@ -87,13 +90,14 @@ func (lx *lexer) nextItem() item { } } -func lex(input string) *lexer { +func lex(input string, tomlNext bool) *lexer { lx := &lexer{ - input: input, - state: lexTop, - items: make(chan item, 10), - stack: make([]stateFn, 0, 10), - line: 1, + input: input, + state: lexTop, + items: make(chan item, 10), + stack: make([]stateFn, 0, 10), + line: 1, + tomlNext: tomlNext, } return lx } @@ -162,7 +166,7 @@ func (lx *lexer) next() (r rune) { } r, w := utf8.DecodeRuneInString(lx.input[lx.pos:]) - if r == utf8.RuneError { + if r == utf8.RuneError && w == 1 { lx.error(errLexUTF8{lx.input[lx.pos]}) return utf8.RuneError } @@ -268,7 +272,7 @@ func (lx *lexer) errorPos(start, length int, err error) stateFn { } // errorf is like error, and creates a new error. -func (lx *lexer) errorf(format string, values ...interface{}) stateFn { +func (lx *lexer) errorf(format string, values ...any) stateFn { if lx.atEOF { pos := lx.getPos() pos.Line-- @@ -331,9 +335,7 @@ func lexTopEnd(lx *lexer) stateFn { lx.emit(itemEOF) return nil } - return lx.errorf( - "expected a top-level item to end with a newline, comment, or EOF, but got %q instead", - r) + return lx.errorf("expected a top-level item to end with a newline, comment, or EOF, but got %q instead", r) } // lexTable lexes the beginning of a table. Namely, it makes sure that @@ -408,7 +410,7 @@ func lexTableNameEnd(lx *lexer) stateFn { // Lexes only one part, e.g. only 'a' inside 'a.b'. func lexBareName(lx *lexer) stateFn { r := lx.next() - if isBareKeyChar(r) { + if isBareKeyChar(r, lx.tomlNext) { return lexBareName } lx.backup() @@ -618,6 +620,9 @@ func lexInlineTableValue(lx *lexer) stateFn { case isWhitespace(r): return lexSkip(lx, lexInlineTableValue) case isNL(r): + if lx.tomlNext { + return lexSkip(lx, lexInlineTableValue) + } return lx.errorPrevLine(errLexInlineTableNL{}) case r == '#': lx.push(lexInlineTableValue) @@ -640,6 +645,9 @@ func lexInlineTableValueEnd(lx *lexer) stateFn { case isWhitespace(r): return lexSkip(lx, lexInlineTableValueEnd) case isNL(r): + if lx.tomlNext { + return lexSkip(lx, lexInlineTableValueEnd) + } return lx.errorPrevLine(errLexInlineTableNL{}) case r == '#': lx.push(lexInlineTableValueEnd) @@ -648,6 +656,9 @@ func lexInlineTableValueEnd(lx *lexer) stateFn { lx.ignore() lx.skip(isWhitespace) if lx.peek() == '}' { + if lx.tomlNext { + return lexInlineTableValueEnd + } return lx.errorf("trailing comma not allowed in inline tables") } return lexInlineTableValue @@ -687,7 +698,12 @@ func lexString(lx *lexer) stateFn { return lexStringEscape case r == '"': lx.backup() - lx.emit(itemString) + if lx.esc { + lx.esc = false + lx.emit(itemStringEsc) + } else { + lx.emit(itemString) + } lx.next() lx.ignore() return lx.pop() @@ -737,6 +753,7 @@ func lexMultilineString(lx *lexer) stateFn { lx.backup() /// backup: don't include the """ in the item. lx.backup() lx.backup() + lx.esc = false lx.emit(itemMultilineString) lx.next() /// Read over ''' again and discard it. lx.next() @@ -770,8 +787,8 @@ func lexRawString(lx *lexer) stateFn { } } -// lexMultilineRawString consumes a raw string. Nothing can be escaped in such -// a string. It assumes that the beginning ''' has already been consumed and +// lexMultilineRawString consumes a raw string. Nothing can be escaped in such a +// string. It assumes that the beginning triple-' has already been consumed and // ignored. func lexMultilineRawString(lx *lexer) stateFn { r := lx.next() @@ -826,8 +843,14 @@ func lexMultilineStringEscape(lx *lexer) stateFn { } func lexStringEscape(lx *lexer) stateFn { + lx.esc = true r := lx.next() switch r { + case 'e': + if !lx.tomlNext { + return lx.error(errLexEscape{r}) + } + fallthrough case 'b': fallthrough case 't': @@ -846,6 +869,11 @@ func lexStringEscape(lx *lexer) stateFn { fallthrough case '\\': return lx.pop() + case 'x': + if !lx.tomlNext { + return lx.error(errLexEscape{r}) + } + return lexHexEscape case 'u': return lexShortUnicodeEscape case 'U': @@ -854,14 +882,23 @@ func lexStringEscape(lx *lexer) stateFn { return lx.error(errLexEscape{r}) } +func lexHexEscape(lx *lexer) stateFn { + var r rune + for i := 0; i < 2; i++ { + r = lx.next() + if !isHex(r) { + return lx.errorf(`expected two hexadecimal digits after '\x', but got %q instead`, lx.current()) + } + } + return lx.pop() +} + func lexShortUnicodeEscape(lx *lexer) stateFn { var r rune for i := 0; i < 4; i++ { r = lx.next() - if !isHexadecimal(r) { - return lx.errorf( - `expected four hexadecimal digits after '\u', but got %q instead`, - lx.current()) + if !isHex(r) { + return lx.errorf(`expected four hexadecimal digits after '\u', but got %q instead`, lx.current()) } } return lx.pop() @@ -871,10 +908,8 @@ func lexLongUnicodeEscape(lx *lexer) stateFn { var r rune for i := 0; i < 8; i++ { r = lx.next() - if !isHexadecimal(r) { - return lx.errorf( - `expected eight hexadecimal digits after '\U', but got %q instead`, - lx.current()) + if !isHex(r) { + return lx.errorf(`expected eight hexadecimal digits after '\U', but got %q instead`, lx.current()) } } return lx.pop() @@ -941,7 +976,7 @@ func lexDatetime(lx *lexer) stateFn { // lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix. func lexHexInteger(lx *lexer) stateFn { r := lx.next() - if isHexadecimal(r) { + if isHex(r) { return lexHexInteger } switch r { @@ -1075,7 +1110,7 @@ func lexBaseNumberOrDate(lx *lexer) stateFn { return lexOctalInteger case 'x': r = lx.peek() - if !isHexadecimal(r) { + if !isHex(r) { lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r) } return lexHexInteger @@ -1173,7 +1208,7 @@ func (itype itemType) String() string { return "EOF" case itemText: return "Text" - case itemString, itemRawString, itemMultilineString, itemRawMultilineString: + case itemString, itemStringEsc, itemRawString, itemMultilineString, itemRawMultilineString: return "String" case itemBool: return "Bool" @@ -1206,7 +1241,7 @@ func (itype itemType) String() string { } func (item item) String() string { - return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val) + return fmt.Sprintf("(%s, %s)", item.typ, item.val) } func isWhitespace(r rune) bool { return r == '\t' || r == ' ' } @@ -1222,10 +1257,23 @@ func isControl(r rune) bool { // Control characters except \t, \r, \n func isDigit(r rune) bool { return r >= '0' && r <= '9' } func isBinary(r rune) bool { return r == '0' || r == '1' } func isOctal(r rune) bool { return r >= '0' && r <= '7' } -func isHexadecimal(r rune) bool { - return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F') -} -func isBareKeyChar(r rune) bool { +func isHex(r rune) bool { return (r >= '0' && r <= '9') || (r|0x20 >= 'a' && r|0x20 <= 'f') } +func isBareKeyChar(r rune, tomlNext bool) bool { + if tomlNext { + return (r >= 'A' && r <= 'Z') || + (r >= 'a' && r <= 'z') || + (r >= '0' && r <= '9') || + r == '_' || r == '-' || + r == 0xb2 || r == 0xb3 || r == 0xb9 || (r >= 0xbc && r <= 0xbe) || + (r >= 0xc0 && r <= 0xd6) || (r >= 0xd8 && r <= 0xf6) || (r >= 0xf8 && r <= 0x037d) || + (r >= 0x037f && r <= 0x1fff) || + (r >= 0x200c && r <= 0x200d) || (r >= 0x203f && r <= 0x2040) || + (r >= 0x2070 && r <= 0x218f) || (r >= 0x2460 && r <= 0x24ff) || + (r >= 0x2c00 && r <= 0x2fef) || (r >= 0x3001 && r <= 0xd7ff) || + (r >= 0xf900 && r <= 0xfdcf) || (r >= 0xfdf0 && r <= 0xfffd) || + (r >= 0x10000 && r <= 0xeffff) + } + return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || |