aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/BurntSushi/toml/lex.go
diff options
context:
space:
mode:
authorGibheer <gibheer+git@zero-knowledge.org>2022-11-11 11:22:50 +0100
committerGibheer <gibheer+git@zero-knowledge.org>2022-11-11 11:22:50 +0100
commitfe6bd04947e26a962fab3cf7a354abd44333bda6 (patch)
treede4714364747c05d391ab665176413eb24938545 /vendor/github.com/BurntSushi/toml/lex.go
parentdc9dfb76ff9375e6368e9e05a40e6dc07b325a8d (diff)
update dependencies
Diffstat (limited to 'vendor/github.com/BurntSushi/toml/lex.go')
-rw-r--r--vendor/github.com/BurntSushi/toml/lex.go374
1 files changed, 191 insertions, 183 deletions
diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go
index adc4eb5..d4d7087 100644
--- a/vendor/github.com/BurntSushi/toml/lex.go
+++ b/vendor/github.com/BurntSushi/toml/lex.go
@@ -37,28 +37,14 @@ const (
itemInlineTableEnd
)
-const (
- eof = 0
- comma = ','
- tableStart = '['
- tableEnd = ']'
- arrayTableStart = '['
- arrayTableEnd = ']'
- tableSep = '.'
- keySep = '='
- arrayStart = '['
- arrayEnd = ']'
- commentStart = '#'
- stringStart = '"'
- stringEnd = '"'
- rawStringStart = '\''
- rawStringEnd = '\''
- inlineTableStart = '{'
- inlineTableEnd = '}'
-)
+const eof = 0
type stateFn func(lx *lexer) stateFn
+func (p Position) String() string {
+ return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len)
+}
+
type lexer struct {
input string
start int
@@ -67,26 +53,26 @@ type lexer struct {
state stateFn
items chan item
- // Allow for backing up up to four runes.
- // This is necessary because TOML contains 3-rune tokens (""" and ''').
+ // Allow for backing up up to 4 runes. This is necessary because TOML
+ // contains 3-rune tokens (""" and ''').
prevWidths [4]int
- nprev int // how many of prevWidths are in use
- // If we emit an eof, we can still back up, but it is not OK to call
- // next again.
- atEOF bool
+ nprev int // how many of prevWidths are in use
+ atEOF bool // If we emit an eof, we can still back up, but it is not OK to call next again.
// A stack of state functions used to maintain context.
- // The idea is to reuse parts of the state machine in various places.
- // For example, values can appear at the top level or within arbitrarily
- // nested arrays. The last state on the stack is used after a value has
- // been lexed. Similarly for comments.
+ //
+ // The idea is to reuse parts of the state machine in various places. For
+ // example, values can appear at the top level or within arbitrarily nested
+ // arrays. The last state on the stack is used after a value has been lexed.
+ // Similarly for comments.
stack []stateFn
}
type item struct {
- typ itemType
- val string
- line int
+ typ itemType
+ val string
+ err error
+ pos Position
}
func (lx *lexer) nextItem() item {
@@ -96,7 +82,7 @@ func (lx *lexer) nextItem() item {
return item
default:
lx.state = lx.state(lx)
- //fmt.Printf(" STATE %-24s current: %-10q stack: %s\n", lx.state, lx.current(), lx.stack)
+ //fmt.Printf(" STATE %-24s current: %-10s stack: %s\n", lx.state, lx.current(), lx.stack)
}
}
}
@@ -105,9 +91,9 @@ func lex(input string) *lexer {
lx := &lexer{
input: input,
state: lexTop,
- line: 1,
items: make(chan item, 10),
stack: make([]stateFn, 0, 10),
+ line: 1,
}
return lx
}
@@ -129,13 +115,30 @@ func (lx *lexer) current() string {
return lx.input[lx.start:lx.pos]
}
+func (lx lexer) getPos() Position {
+ p := Position{
+ Line: lx.line,
+ Start: lx.start,
+ Len: lx.pos - lx.start,
+ }
+ if p.Len <= 0 {
+ p.Len = 1
+ }
+ return p
+}
+
func (lx *lexer) emit(typ itemType) {
- lx.items <- item{typ, lx.current(), lx.line}
+ // Needed for multiline strings ending with an incomplete UTF-8 sequence.
+ if lx.start > lx.pos {
+ lx.error(errLexUTF8{lx.input[lx.pos]})
+ return
+ }
+ lx.items <- item{typ: typ, pos: lx.getPos(), val: lx.current()}
lx.start = lx.pos
}
func (lx *lexer) emitTrim(typ itemType) {
- lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
+ lx.items <- item{typ: typ, pos: lx.getPos(), val: strings.TrimSpace(lx.current())}
lx.start = lx.pos
}
@@ -160,7 +163,13 @@ func (lx *lexer) next() (r rune) {
r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
if r == utf8.RuneError {
- lx.errorf("invalid UTF-8 byte at position %d (line %d): 0x%02x", lx.pos, lx.line, lx.input[lx.pos])
+ lx.error(errLexUTF8{lx.input[lx.pos]})
+ return utf8.RuneError
+ }
+
+ // Note: don't use peek() here, as this calls next().
+ if isControl(r) || (r == '\r' && (len(lx.input)-1 == lx.pos || lx.input[lx.pos+1] != '\n')) {
+ lx.errorControlChar(r)
return utf8.RuneError
}
@@ -188,6 +197,7 @@ func (lx *lexer) backup() {
lx.prevWidths[1] = lx.prevWidths[2]
lx.prevWidths[2] = lx.prevWidths[3]
lx.nprev--
+
lx.pos -= w
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
lx.line--
@@ -223,18 +233,58 @@ func (lx *lexer) skip(pred func(rune) bool) {
}
}
-// errorf stops all lexing by emitting an error and returning `nil`.
+// error stops all lexing by emitting an error and returning `nil`.
+//
// Note that any value that is a character is escaped if it's a special
// character (newlines, tabs, etc.).
+func (lx *lexer) error(err error) stateFn {
+ if lx.atEOF {
+ return lx.errorPrevLine(err)
+ }
+ lx.items <- item{typ: itemError, pos: lx.getPos(), err: err}
+ return nil
+}
+
+// errorfPrevline is like error(), but sets the position to the last column of
+// the previous line.
+//
+// This is so that unexpected EOF or NL errors don't show on a new blank line.
+func (lx *lexer) errorPrevLine(err error) stateFn {
+ pos := lx.getPos()
+ pos.Line--
+ pos.Len = 1
+ pos.Start = lx.pos - 1
+ lx.items <- item{typ: itemError, pos: pos, err: err}
+ return nil
+}
+
+// errorPos is like error(), but allows explicitly setting the position.
+func (lx *lexer) errorPos(start, length int, err error) stateFn {
+ pos := lx.getPos()
+ pos.Start = start
+ pos.Len = length
+ lx.items <- item{typ: itemError, pos: pos, err: err}
+ return nil
+}
+
+// errorf is like error, and creates a new error.
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
- lx.items <- item{
- itemError,
- fmt.Sprintf(format, values...),
- lx.line,
+ if lx.atEOF {
+ pos := lx.getPos()
+ pos.Line--
+ pos.Len = 1
+ pos.Start = lx.pos - 1
+ lx.items <- item{typ: itemError, pos: pos, err: fmt.Errorf(format, values...)}
+ return nil
}
+ lx.items <- item{typ: itemError, pos: lx.getPos(), err: fmt.Errorf(format, values...)}
return nil
}
+func (lx *lexer) errorControlChar(cc rune) stateFn {
+ return lx.errorPos(lx.pos-1, 1, errLexControl{cc})
+}
+
// lexTop consumes elements at the top level of TOML data.
func lexTop(lx *lexer) stateFn {
r := lx.next()
@@ -242,10 +292,10 @@ func lexTop(lx *lexer) stateFn {
return lexSkip(lx, lexTop)
}
switch r {
- case commentStart:
+ case '#':
lx.push(lexTop)
return lexCommentStart
- case tableStart:
+ case '[':
return lexTableStart
case eof:
if lx.pos > lx.start {
@@ -268,7 +318,7 @@ func lexTop(lx *lexer) stateFn {
func lexTopEnd(lx *lexer) stateFn {
r := lx.next()
switch {
- case r == commentStart:
+ case r == '#':
// a comment will read to a newline for us.
lx.push(lexTop)
return lexCommentStart
@@ -292,7 +342,7 @@ func lexTopEnd(lx *lexer) stateFn {
// It also handles the case that this is an item in an array of tables.
// e.g., '[[name]]'.
func lexTableStart(lx *lexer) stateFn {
- if lx.peek() == arrayTableStart {
+ if lx.peek() == '[' {
lx.next()
lx.emit(itemArrayTableStart)
lx.push(lexArrayTableEnd)
@@ -309,10 +359,8 @@ func lexTableEnd(lx *lexer) stateFn {
}
func lexArrayTableEnd(lx *lexer) stateFn {
- if r := lx.next(); r != arrayTableEnd {
- return lx.errorf(
- "expected end of table array name delimiter %q, but got %q instead",
- arrayTableEnd, r)
+ if r := lx.next(); r != ']' {
+ return lx.errorf("expected end of table array name delimiter ']', but got %q instead", r)
}
lx.emit(itemArrayTableEnd)
return lexTopEnd
@@ -321,11 +369,11 @@ func lexArrayTableEnd(lx *lexer) stateFn {
func lexTableNameStart(lx *lexer) stateFn {
lx.skip(isWhitespace)
switch r := lx.peek(); {
- case r == tableEnd || r == eof:
+ case r == ']' || r == eof:
return lx.errorf("unexpected end of table name (table names cannot be empty)")
- case r == tableSep:
+ case r == '.':
return lx.errorf("unexpected table separator (table names cannot be empty)")
- case r == stringStart || r == rawStringStart:
+ case r == '"' || r == '\'':
lx.ignore()
lx.push(lexTableNameEnd)
return lexQuotedName
@@ -342,10 +390,10 @@ func lexTableNameEnd(lx *lexer) stateFn {
switch r := lx.next(); {
case isWhitespace(r):
return lexTableNameEnd
- case r == tableSep:
+ case r == '.':
lx.ignore()
return lexTableNameStart
- case r == tableEnd:
+ case r == ']':
return lx.pop()
default:
return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
@@ -379,10 +427,10 @@ func lexQuotedName(lx *lexer) stateFn {
switch {
case isWhitespace(r):
return lexSkip(lx, lexValue)
- case r == stringStart:
+ case r == '"':
lx.ignore() // ignore the '"'
return lexString
- case r == rawStringStart:
+ case r == '\'':
lx.ignore() // ignore the "'"
return lexRawString
case r == eof:
@@ -400,7 +448,7 @@ func lexKeyStart(lx *lexer) stateFn {
return lx.errorf("unexpected '=': key name appears blank")
case r == '.':
return lx.errorf("unexpected '.': keys cannot start with a '.'")
- case r == stringStart || r == rawStringStart:
+ case r == '"' || r == '\'':
lx.ignore()
fallthrough
default: // Bare key
@@ -416,7 +464,7 @@ func lexKeyNameStart(lx *lexer) stateFn {
return lx.errorf("unexpected '='")
case r == '.':
return lx.errorf("unexpected '.'")
- case r == stringStart || r == rawStringStart:
+ case r == '"' || r == '\'':
lx.ignore()
lx.push(lexKeyEnd)
return lexQuotedName
@@ -434,7 +482,7 @@ func lexKeyEnd(lx *lexer) stateFn {
case isWhitespace(r):
return lexSkip(lx, lexKeyEnd)
case r == eof:
- return lx.errorf("unexpected EOF; expected key separator %q", keySep)
+ return lx.errorf("unexpected EOF; expected key separator '='")
case r == '.':
lx.ignore()
return lexKeyNameStart
@@ -461,17 +509,17 @@ func lexValue(lx *lexer) stateFn {
return lexNumberOrDateStart
}
switch r {
- case arrayStart:
+ case '[':
lx.ignore()
lx.emit(itemArray)
return lexArrayValue
- case inlineTableStart:
+ case '{':
lx.ignore()
lx.emit(itemInlineTableStart)
return lexInlineTableValue
- case stringStart:
- if lx.accept(stringStart) {
- if lx.accept(stringStart) {
+ case '"':
+ if lx.accept('"') {
+ if lx.accept('"') {
lx.ignore() // Ignore """
return lexMultilineString
}
@@ -479,9 +527,9 @@ func lexValue(lx *lexer) stateFn {
}
lx.ignore() // ignore the '"'
return lexString
- case rawStringStart:
- if lx.accept(rawStringStart) {
- if lx.accept(rawStringStart) {
+ case '\'':
+ if lx.accept('\'') {
+ if lx.accept('\'') {
lx.ignore() // Ignore """
return lexMultilineRawString
}
@@ -520,14 +568,12 @@ func lexArrayValue(lx *lexer) stateFn {
switch {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValue)
- case r == commentStart:
+ case r == '#':
lx.push(lexArrayValue)
return lexCommentStart
- case r == comma:
+ case r == ',':
return lx.errorf("unexpected comma")
- case r == arrayEnd:
- // NOTE(caleb): The spec isn't clear about whether you can have
- // a trailing comma or not, so we'll allow it.
+ case r == ']':
return lexArrayEnd
}
@@ -540,22 +586,20 @@ func lexArrayValue(lx *lexer) stateFn {
// the next value (or the end of the array): it ignores whitespace and newlines
// and expects either a ',' or a ']'.
func lexArrayValueEnd(lx *lexer) stateFn {
- r := lx.next()
- switch {
+ switch r := lx.next(); {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValueEnd)
- case r == commentStart:
+ case r == '#':
lx.push(lexArrayValueEnd)
return lexCommentStart
- case r == comma:
+ case r == ',':
lx.ignore()
return lexArrayValue // move on to the next value
- case r == arrayEnd:
+ case r == ']':
return lexArrayEnd
+ default:
+ return lx.errorf("expected a comma (',') or array terminator (']'), but got %s", runeOrEOF(r))
}
- return lx.errorf(
- "expected a comma or array terminator %q, but got %s instead",
- arrayEnd, runeOrEOF(r))
}
// lexArrayEnd finishes the lexing of an array.
@@ -574,13 +618,13 @@ func lexInlineTableValue(lx *lexer) stateFn {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValue)
case isNL(r):
- return lx.errorf("newlines not allowed within inline tables")
- case r == commentStart:
+ return lx.errorPrevLine(errLexInlineTableNL{})
+ case r == '#':
lx.push(lexInlineTableValue)
return lexCommentStart
- case r == comma:
+ case r == ',':
return lx.errorf("unexpected comma")
- case r == inlineTableEnd:
+ case r == '}':
return lexInlineTableEnd
}
lx.backup()
@@ -596,23 +640,21 @@ func lexInlineTableValueEnd(lx *lexer) stateFn {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValueEnd)
case isNL(r):
- return lx.errorf("newlines not allowed within inline tables")
- case r == commentStart:
+ return lx.errorPrevLine(errLexInlineTableNL{})
+ case r == '#':
lx.push(lexInlineTableValueEnd)
return lexCommentStart
- case r == comma:
+ case r == ',':
lx.ignore()
lx.skip(isWhitespace)
if lx.peek() == '}' {
return lx.errorf("trailing comma not allowed in inline tables")
}
return lexInlineTableValue
- case r == inlineTableEnd:
+ case r == '}':
return lexInlineTableEnd
default:
- return lx.errorf(
- "expected a comma or an inline table terminator %q, but got %s instead",
- inlineTableEnd, runeOrEOF(r))
+ return lx.errorf("expected a comma or an inline table terminator '}', but got %s instead", runeOrEOF(r))
}
}
@@ -638,14 +680,12 @@ func lexString(lx *lexer) stateFn {
switch {
case r == eof:
return lx.errorf(`unexpected EOF; expected '"'`)
- case isControl(r) || r == '\r':
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
case isNL(r):
- return lx.errorf("strings cannot contain newlines")
+ return lx.errorPrevLine(errLexStringNL{})
case r == '\\':
lx.push(lexString)
return lexStringEscape
- case r == stringEnd:
+ case r == '"':
lx.backup()
lx.emit(itemString)
lx.next()
@@ -660,26 +700,33 @@ func lexString(lx *lexer) stateFn {
func lexMultilineString(lx *lexer) stateFn {
r := lx.next()
switch r {
+ default:
+ return lexMultilineString
case eof:
return lx.errorf(`unexpected EOF; expected '"""'`)
- case '\r':
- if lx.peek() != '\n' {
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- }
- return lexMultilineString
case '\\':
return lexMultilineStringEscape
- case stringEnd:
+ case '"':
/// Found " → try to read two more "".
- if lx.accept(stringEnd) {
- if lx.accept(stringEnd) {
+ if lx.accept('"') {
+ if lx.accept('"') {
/// Peek ahead: the string can contain " and "", including at the
/// end: """str"""""
/// 6 or more at the end, however, is an error.
- if lx.peek() == stringEnd {
+ if lx.peek() == '"' {
/// Check if we already lexed 5 's; if so we have 6 now, and
/// that's just too many man!
- if strings.HasSuffix(lx.current(), `"""""`) {
+ ///
+ /// Second check is for the edge case:
+ ///
+ /// two quotes allowed.
+ /// vv
+ /// """lol \""""""
+ /// ^^ ^^^---- closing three
+ /// escaped
+ ///
+ /// But ugly, but it works
+ if strings.HasSuffix(lx.current(), `"""""`) && !strings.HasSuffix(lx.current(), `\"""""`) {
return lx.errorf(`unexpected '""""""'`)
}
lx.backup()
@@ -699,12 +746,8 @@ func lexMultilineString(lx *lexer) stateFn {
}
lx.backup()
}
+ return lexMultilineString
}
-
- if isControl(r) {
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- }
- return lexMultilineString
}
// lexRawString consumes a raw string. Nothing can be escaped in such a string.
@@ -712,43 +755,39 @@ func lexMultilineString(lx *lexer) stateFn {
func lexRawString(lx *lexer) stateFn {
r := lx.next()
switch {
+ default:
+ return lexRawString
case r == eof:
return lx.errorf(`unexpected EOF; expected "'"`)
- case isControl(r) || r == '\r':
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
case isNL(r):
- return lx.errorf("strings cannot contain newlines")
- case r == rawStringEnd:
+ return lx.errorPrevLine(errLexStringNL{})
+ case r == '\'':
lx.backup()
lx.emit(itemRawString)
lx.next()
lx.ignore()
return lx.pop()
}
- return lexRawString
}
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
-// a string. It assumes that the beginning "'''" has already been consumed and
+// a string. It assumes that the beginning ''' has already been consumed and
// ignored.
func lexMultilineRawString(lx *lexer) stateFn {
r := lx.next()
switch r {
+ default:
+ return lexMultilineRawString
case eof:
return lx.errorf(`unexpected EOF; expected "'''"`)
- case '\r':
- if lx.peek() != '\n' {
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- }
- return lexMultilineRawString
- case rawStringEnd:
+ case '\'':
/// Found ' → try to read two more ''.
- if lx.accept(rawStringEnd) {
- if lx.accept(rawStringEnd) {
+ if lx.accept('\'') {
+ if lx.accept('\'') {
/// Peek ahead: the string can contain ' and '', including at the
/// end: '''str'''''
/// 6 or more at the end, however, is an error.
- if lx.peek() == rawStringEnd {
+ if lx.peek() == '\'' {
/// Check if we already lexed 5 's; if so we have 6 now, and
/// that's just too many man!
if strings.HasSuffix(lx.current(), "'''''") {
@@ -771,19 +810,14 @@ func lexMultilineRawString(lx *lexer) stateFn {
}
lx.backup()
}
+ return lexMultilineRawString
}
-
- if isControl(r) {
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- }
- return lexMultilineRawString
}
// lexMultilineStringEscape consumes an escaped character. It assumes that the
// preceding '\\' has already been consumed.
func lexMultilineStringEscape(lx *lexer) stateFn {
- // Handle the special case first:
- if isNL(lx.next()) {
+ if isNL(lx.next()) { /// \ escaping newline.
return lexMultilineString
}
lx.backup()
@@ -817,8 +851,7 @@ func lexStringEscape(lx *lexer) stateFn {
case 'U':
return lexLongUnicodeEscape
}
- return lx.errorf("invalid escape character %q; only the following escape characters are allowed: "+
- `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
+ return lx.error(errLexEscape{r})
}
func lexShortUnicodeEscape(lx *lexer) stateFn {
@@ -1108,8 +1141,6 @@ func lexComment(lx *lexer) stateFn {
lx.backup()
lx.emit(itemText)
return lx.pop()
- case isControl(r):
- return lx.errorf("control characters are not allowed inside comments: '0x%02x'", r)
default:
return lexComment
}
@@ -1121,52 +1152,6 @@ func lexSkip(lx *lexer, nextState stateFn) stateFn {
return nextState
}
-// isWhitespace returns true if `r` is a whitespace character according
-// to the spec.
-func isWhitespace(r rune) bool {
- return r == '\t' || r == ' '
-}
-
-func isNL(r rune) bool {
- return r == '\n' || r == '\r'
-}
-
-// Control characters except \n, \t
-func isControl(r rune) bool {
- switch r {
- case '\t', '\r', '\n':
- return false
- default:
- return (r >= 0x00 && r <= 0x1f) || r == 0x7f
- }
-}
-
-func isDigit(r rune) bool {
- return r >= '0' && r <= '9'
-}
-
-func isHexadecimal(r rune) bool {
- return (r >= '0' && r <= '9') ||
- (r >= 'a' && r <= 'f') ||
- (r >= 'A' && r <= 'F')
-}
-
-func isOctal(r rune) bool {
- return r >= '0' && r <= '7'
-}
-
-func isBinary(r rune) bool {
- return r == '0' || r == '1'
-}
-
-func isBareKeyChar(r rune) bool {
- return (r >= 'A' && r <= 'Z') ||
- (r >= 'a' && r <= 'z') ||
- (r >= '0' && r <= '9') ||
- r == '_' ||
- r == '-'
-}
-
func (s stateFn) String() string {
name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
if i := strings.LastIndexByte(name, '.'); i > -1 {
@@ -1223,3 +1208,26 @@ func (itype itemType) String() string {
func (item item) String() string {
return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
}
+
+func isWhitespace(r rune) bool { return r == '\t' || r == ' ' }
+func isNL(r rune) bool { return r == '\n' || r == '\r' }
+func isControl(r rune) bool { // Control characters except \t, \r, \n
+ switch r {
+ case '\t', '\r', '\n':
+ return false
+ default:
+ return (r >= 0x00 && r <= 0x1f) || r == 0x7f
+ }
+}
+func isDigit(r rune) bool { return r >= '0' && r <= '9' }
+func isBinary(r rune) bool { return r == '0' || r == '1' }
+func isOctal(r rune) bool { return r >= '0' && r <= '7' }
+func isHexadecimal(r rune) bool {
+ return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')
+}
+func isBareKeyChar(r rune) bool {
+ return (r >= 'A' && r <= 'Z') ||
+ (r >= 'a' && r <= 'z') ||
+ (r >= '0' && r <= '9') ||
+ r == '_' || r == '-'
+}