aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/BurntSushi/toml/lex.go
diff options
context:
space:
mode:
authorGibheer <gibheer+git@zero-knowledge.org>2021-12-02 17:54:14 +0100
committerGibheer <gibheer+git@zero-knowledge.org>2021-12-02 17:54:14 +0100
commitfa05045d31c05c8928020f05f1d281901d983b2b (patch)
tree2ed3bac60302bfb14535a169f4b3e10d18fc6120 /vendor/github.com/BurntSushi/toml/lex.go
parent41d4805d584161ca16b8187194385e47c36422a6 (diff)
cmd/monfront - import monfront from separate repository
This is the import from the separate monfront repository. The history could not be imported, but this should suffice.
Diffstat (limited to 'vendor/github.com/BurntSushi/toml/lex.go')
-rw-r--r--vendor/github.com/BurntSushi/toml/lex.go1225
1 files changed, 1225 insertions, 0 deletions
diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go
new file mode 100644
index 0000000..adc4eb5
--- /dev/null
+++ b/vendor/github.com/BurntSushi/toml/lex.go
@@ -0,0 +1,1225 @@
+package toml
+
+import (
+ "fmt"
+ "reflect"
+ "runtime"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+type itemType int
+
+const (
+ itemError itemType = iota
+ itemNIL // used in the parser to indicate no type
+ itemEOF
+ itemText
+ itemString
+ itemRawString
+ itemMultilineString
+ itemRawMultilineString
+ itemBool
+ itemInteger
+ itemFloat
+ itemDatetime
+ itemArray // the start of an array
+ itemArrayEnd
+ itemTableStart
+ itemTableEnd
+ itemArrayTableStart
+ itemArrayTableEnd
+ itemKeyStart
+ itemKeyEnd
+ itemCommentStart
+ itemInlineTableStart
+ itemInlineTableEnd
+)
+
+const (
+ eof = 0
+ comma = ','
+ tableStart = '['
+ tableEnd = ']'
+ arrayTableStart = '['
+ arrayTableEnd = ']'
+ tableSep = '.'
+ keySep = '='
+ arrayStart = '['
+ arrayEnd = ']'
+ commentStart = '#'
+ stringStart = '"'
+ stringEnd = '"'
+ rawStringStart = '\''
+ rawStringEnd = '\''
+ inlineTableStart = '{'
+ inlineTableEnd = '}'
+)
+
+type stateFn func(lx *lexer) stateFn
+
+type lexer struct {
+ input string
+ start int
+ pos int
+ line int
+ state stateFn
+ items chan item
+
+ // Allow for backing up up to four runes.
+ // This is necessary because TOML contains 3-rune tokens (""" and ''').
+ prevWidths [4]int
+ nprev int // how many of prevWidths are in use
+ // If we emit an eof, we can still back up, but it is not OK to call
+ // next again.
+ atEOF bool
+
+ // A stack of state functions used to maintain context.
+ // The idea is to reuse parts of the state machine in various places.
+ // For example, values can appear at the top level or within arbitrarily
+ // nested arrays. The last state on the stack is used after a value has
+ // been lexed. Similarly for comments.
+ stack []stateFn
+}
+
+type item struct {
+ typ itemType
+ val string
+ line int
+}
+
+func (lx *lexer) nextItem() item {
+ for {
+ select {
+ case item := <-lx.items:
+ return item
+ default:
+ lx.state = lx.state(lx)
+ //fmt.Printf(" STATE %-24s current: %-10q stack: %s\n", lx.state, lx.current(), lx.stack)
+ }
+ }
+}
+
+func lex(input string) *lexer {
+ lx := &lexer{
+ input: input,
+ state: lexTop,
+ line: 1,
+ items: make(chan item, 10),
+ stack: make([]stateFn, 0, 10),
+ }
+ return lx
+}
+
+func (lx *lexer) push(state stateFn) {
+ lx.stack = append(lx.stack, state)
+}
+
+func (lx *lexer) pop() stateFn {
+ if len(lx.stack) == 0 {
+ return lx.errorf("BUG in lexer: no states to pop")
+ }
+ last := lx.stack[len(lx.stack)-1]
+ lx.stack = lx.stack[0 : len(lx.stack)-1]
+ return last
+}
+
+func (lx *lexer) current() string {
+ return lx.input[lx.start:lx.pos]
+}
+
+func (lx *lexer) emit(typ itemType) {
+ lx.items <- item{typ, lx.current(), lx.line}
+ lx.start = lx.pos
+}
+
+func (lx *lexer) emitTrim(typ itemType) {
+ lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
+ lx.start = lx.pos
+}
+
+func (lx *lexer) next() (r rune) {
+ if lx.atEOF {
+ panic("BUG in lexer: next called after EOF")
+ }
+ if lx.pos >= len(lx.input) {
+ lx.atEOF = true
+ return eof
+ }
+
+ if lx.input[lx.pos] == '\n' {
+ lx.line++
+ }
+ lx.prevWidths[3] = lx.prevWidths[2]
+ lx.prevWidths[2] = lx.prevWidths[1]
+ lx.prevWidths[1] = lx.prevWidths[0]
+ if lx.nprev < 4 {
+ lx.nprev++
+ }
+
+ r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
+ if r == utf8.RuneError {
+ lx.errorf("invalid UTF-8 byte at position %d (line %d): 0x%02x", lx.pos, lx.line, lx.input[lx.pos])
+ return utf8.RuneError
+ }
+
+ lx.prevWidths[0] = w
+ lx.pos += w
+ return r
+}
+
+// ignore skips over the pending input before this point.
+func (lx *lexer) ignore() {
+ lx.start = lx.pos
+}
+
+// backup steps back one rune. Can be called 4 times between calls to next.
+func (lx *lexer) backup() {
+ if lx.atEOF {
+ lx.atEOF = false
+ return
+ }
+ if lx.nprev < 1 {
+ panic("BUG in lexer: backed up too far")
+ }
+ w := lx.prevWidths[0]
+ lx.prevWidths[0] = lx.prevWidths[1]
+ lx.prevWidths[1] = lx.prevWidths[2]
+ lx.prevWidths[2] = lx.prevWidths[3]
+ lx.nprev--
+ lx.pos -= w
+ if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
+ lx.line--
+ }
+}
+
+// accept consumes the next rune if it's equal to `valid`.
+func (lx *lexer) accept(valid rune) bool {
+ if lx.next() == valid {
+ return true
+ }
+ lx.backup()
+ return false
+}
+
+// peek returns but does not consume the next rune in the input.
+func (lx *lexer) peek() rune {
+ r := lx.next()
+ lx.backup()
+ return r
+}
+
+// skip ignores all input that matches the given predicate.
+func (lx *lexer) skip(pred func(rune) bool) {
+ for {
+ r := lx.next()
+ if pred(r) {
+ continue
+ }
+ lx.backup()
+ lx.ignore()
+ return
+ }
+}
+
+// errorf stops all lexing by emitting an error and returning `nil`.
+// Note that any value that is a character is escaped if it's a special
+// character (newlines, tabs, etc.).
+func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
+ lx.items <- item{
+ itemError,
+ fmt.Sprintf(format, values...),
+ lx.line,
+ }
+ return nil
+}
+
+// lexTop consumes elements at the top level of TOML data.
+func lexTop(lx *lexer) stateFn {
+ r := lx.next()
+ if isWhitespace(r) || isNL(r) {
+ return lexSkip(lx, lexTop)
+ }
+ switch r {
+ case commentStart:
+ lx.push(lexTop)
+ return lexCommentStart
+ case tableStart:
+ return lexTableStart
+ case eof:
+ if lx.pos > lx.start {
+ return lx.errorf("unexpected EOF")
+ }
+ lx.emit(itemEOF)
+ return nil
+ }
+
+ // At this point, the only valid item can be a key, so we back up
+ // and let the key lexer do the rest.
+ lx.backup()
+ lx.push(lexTopEnd)
+ return lexKeyStart
+}
+
+// lexTopEnd is entered whenever a top-level item has been consumed. (A value
+// or a table.) It must see only whitespace, and will turn back to lexTop
+// upon a newline. If it sees EOF, it will quit the lexer successfully.
+func lexTopEnd(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case r == commentStart:
+ // a comment will read to a newline for us.
+ lx.push(lexTop)
+ return lexCommentStart
+ case isWhitespace(r):
+ return lexTopEnd
+ case isNL(r):
+ lx.ignore()
+ return lexTop
+ case r == eof:
+ lx.emit(itemEOF)
+ return nil
+ }
+ return lx.errorf(
+ "expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
+ r)
+}
+
+// lexTable lexes the beginning of a table. Namely, it makes sure that
+// it starts with a character other than '.' and ']'.
+// It assumes that '[' has already been consumed.
+// It also handles the case that this is an item in an array of tables.
+// e.g., '[[name]]'.
+func lexTableStart(lx *lexer) stateFn {
+ if lx.peek() == arrayTableStart {
+ lx.next()
+ lx.emit(itemArrayTableStart)
+ lx.push(lexArrayTableEnd)
+ } else {
+ lx.emit(itemTableStart)
+ lx.push(lexTableEnd)
+ }
+ return lexTableNameStart
+}
+
+func lexTableEnd(lx *lexer) stateFn {
+ lx.emit(itemTableEnd)
+ return lexTopEnd
+}
+
+func lexArrayTableEnd(lx *lexer) stateFn {
+ if r := lx.next(); r != arrayTableEnd {
+ return lx.errorf(
+ "expected end of table array name delimiter %q, but got %q instead",
+ arrayTableEnd, r)
+ }
+ lx.emit(itemArrayTableEnd)
+ return lexTopEnd
+}
+
+func lexTableNameStart(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.peek(); {
+ case r == tableEnd || r == eof:
+ return lx.errorf("unexpected end of table name (table names cannot be empty)")
+ case r == tableSep:
+ return lx.errorf("unexpected table separator (table names cannot be empty)")
+ case r == stringStart || r == rawStringStart:
+ lx.ignore()
+ lx.push(lexTableNameEnd)
+ return lexQuotedName
+ default:
+ lx.push(lexTableNameEnd)
+ return lexBareName
+ }
+}
+
+// lexTableNameEnd reads the end of a piece of a table name, optionally
+// consuming whitespace.
+func lexTableNameEnd(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.next(); {
+ case isWhitespace(r):
+ return lexTableNameEnd
+ case r == tableSep:
+ lx.ignore()
+ return lexTableNameStart
+ case r == tableEnd:
+ return lx.pop()
+ default:
+ return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
+ }
+}
+
+// lexBareName lexes one part of a key or table.
+//
+// It assumes that at least one valid character for the table has already been
+// read.
+//
+// Lexes only one part, e.g. only 'a' inside 'a.b'.
+func lexBareName(lx *lexer) stateFn {
+ r := lx.next()
+ if isBareKeyChar(r) {
+ return lexBareName
+ }
+ lx.backup()
+ lx.emit(itemText)
+ return lx.pop()
+}
+
+// lexBareName lexes one part of a key or table.
+//
+// It assumes that at least one valid character for the table has already been
+// read.
+//
+// Lexes only one part, e.g. only '"a"' inside '"a".b'.
+func lexQuotedName(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r):
+ return lexSkip(lx, lexValue)
+ case r == stringStart:
+ lx.ignore() // ignore the '"'
+ return lexString
+ case r == rawStringStart:
+ lx.ignore() // ignore the "'"
+ return lexRawString
+ case r == eof:
+ return lx.errorf("unexpected EOF; expected value")
+ default:
+ return lx.errorf("expected value but found %q instead", r)
+ }
+}
+
+// lexKeyStart consumes all key parts until a '='.
+func lexKeyStart(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.peek(); {
+ case r == '=' || r == eof:
+ return lx.errorf("unexpected '=': key name appears blank")
+ case r == '.':
+ return lx.errorf("unexpected '.': keys cannot start with a '.'")
+ case r == stringStart || r == rawStringStart:
+ lx.ignore()
+ fallthrough
+ default: // Bare key
+ lx.emit(itemKeyStart)
+ return lexKeyNameStart
+ }
+}
+
+func lexKeyNameStart(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.peek(); {
+ case r == '=' || r == eof:
+ return lx.errorf("unexpected '='")
+ case r == '.':
+ return lx.errorf("unexpected '.'")
+ case r == stringStart || r == rawStringStart:
+ lx.ignore()
+ lx.push(lexKeyEnd)
+ return lexQuotedName
+ default:
+ lx.push(lexKeyEnd)
+ return lexBareName
+ }
+}
+
+// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
+// separator).
+func lexKeyEnd(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.next(); {
+ case isWhitespace(r):
+ return lexSkip(lx, lexKeyEnd)
+ case r == eof:
+ return lx.errorf("unexpected EOF; expected key separator %q", keySep)
+ case r == '.':
+ lx.ignore()
+ return lexKeyNameStart
+ case r == '=':
+ lx.emit(itemKeyEnd)
+ return lexSkip(lx, lexValue)
+ default:
+ return lx.errorf("expected '.' or '=', but got %q instead", r)
+ }
+}
+
+// lexValue starts the consumption of a value anywhere a value is expected.
+// lexValue will ignore whitespace.
+// After a value is lexed, the last state on the next is popped and returned.
+func lexValue(lx *lexer) stateFn {
+ // We allow whitespace to precede a value, but NOT newlines.
+ // In array syntax, the array states are responsible for ignoring newlines.
+ r := lx.next()
+ switch {
+ case isWhitespace(r):
+ return lexSkip(lx, lexValue)
+ case isDigit(r):
+ lx.backup() // avoid an extra state and use the same as above
+ return lexNumberOrDateStart
+ }
+ switch r {
+ case arrayStart:
+ lx.ignore()
+ lx.emit(itemArray)
+ return lexArrayValue
+ case inlineTableStart:
+ lx.ignore()
+ lx.emit(itemInlineTableStart)
+ return lexInlineTableValue
+ case stringStart:
+ if lx.accept(stringStart) {
+ if lx.accept(stringStart) {
+ lx.ignore() // Ignore """
+ return lexMultilineString
+ }
+ lx.backup()
+ }
+ lx.ignore() // ignore the '"'
+ return lexString
+ case rawStringStart:
+ if lx.accept(rawStringStart) {
+ if lx.accept(rawStringStart) {
+ lx.ignore() // Ignore """
+ return lexMultilineRawString
+ }
+ lx.backup()
+ }
+ lx.ignore() // ignore the "'"
+ return lexRawString
+ case '.': // special error case, be kind to users
+ return lx.errorf("floats must start with a digit, not '.'")
+ case 'i', 'n':
+ if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
+ lx.emit(itemFloat)
+ return lx.pop()
+ }
+ case '-', '+':
+ return lexDecimalNumberStart
+ }
+ if unicode.IsLetter(r) {
+ // Be permissive here; lexBool will give a nice error if the
+ // user wrote something like
+ // x = foo
+ // (i.e. not 'true' or 'false' but is something else word-like.)
+ lx.backup()
+ return lexBool
+ }
+ if r == eof {
+ return lx.errorf("unexpected EOF; expected value")
+ }
+ return lx.errorf("expected value but found %q instead", r)
+}
+
+// lexArrayValue consumes one value in an array. It assumes that '[' or ','
+// have already been consumed. All whitespace and newlines are ignored.
+func lexArrayValue(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r) || isNL(r):
+ return lexSkip(lx, lexArrayValue)
+ case r == commentStart:
+ lx.push(lexArrayValue)
+ return lexCommentStart
+ case r == comma:
+ return lx.errorf("unexpected comma")
+ case r == arrayEnd:
+ // NOTE(caleb): The spec isn't clear about whether you can have
+ // a trailing comma or not, so we'll allow it.
+ return lexArrayEnd
+ }
+
+ lx.backup()
+ lx.push(lexArrayValueEnd)
+ return lexValue
+}
+
+// lexArrayValueEnd consumes everything between the end of an array value and
+// the next value (or the end of the array): it ignores whitespace and newlines
+// and expects either a ',' or a ']'.
+func lexArrayValueEnd(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r) || isNL(r):
+ return lexSkip(lx, lexArrayValueEnd)
+ case r == commentStart:
+ lx.push(lexArrayValueEnd)
+ return lexCommentStart
+ case r == comma:
+ lx.ignore()
+ return lexArrayValue // move on to the next value
+ case r == arrayEnd:
+ return lexArrayEnd
+ }
+ return lx.errorf(
+ "expected a comma or array terminator %q, but got %s instead",
+ arrayEnd, runeOrEOF(r))
+}
+
+// lexArrayEnd finishes the lexing of an array.
+// It assumes that a ']' has just been consumed.
+func lexArrayEnd(lx *lexer) stateFn {
+ lx.ignore()
+ lx.emit(itemArrayEnd)
+ return lx.pop()
+}
+
+// lexInlineTableValue consumes one key/value pair in an inline table.
+// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
+func lexInlineTableValue(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r):
+ return lexSkip(lx, lexInlineTableValue)
+ case isNL(r):
+ return lx.errorf("newlines not allowed within inline tables")
+ case r == commentStart:
+ lx.push(lexInlineTableValue)
+ return lexCommentStart
+ case r == comma:
+ return lx.errorf("unexpected comma")
+ case r == inlineTableEnd:
+ return lexInlineTableEnd
+ }
+ lx.backup()
+ lx.push(lexInlineTableValueEnd)
+ return lexKeyStart
+}
+
+// lexInlineTableValueEnd consumes everything between the end of an inline table
+// key/value pair and the next pair (or the end of the table):
+// it ignores whitespace and expects either a ',' or a '}'.
+func lexInlineTableValueEnd(lx *lexer) stateFn {
+ switch r := lx.next(); {
+ case isWhitespace(r):
+ return lexSkip(lx, lexInlineTableValueEnd)
+ case isNL(r):
+ return lx.errorf("newlines not allowed within inline tables")
+ case r == commentStart:
+ lx.push(lexInlineTableValueEnd)
+ return lexCommentStart
+ case r == comma:
+ lx.ignore()
+ lx.skip(isWhitespace)
+ if lx.peek() == '}' {
+ return lx.errorf("trailing comma not allowed in inline tables")
+ }
+ return lexInlineTableValue
+ case r == inlineTableEnd:
+ return lexInlineTableEnd
+ default:
+ return lx.errorf(
+ "expected a comma or an inline table terminator %q, but got %s instead",
+ inlineTableEnd, runeOrEOF(r))
+ }
+}
+
+func runeOrEOF(r rune) string {
+ if r == eof {
+ return "end of file"
+ }
+ return "'" + string(r) + "'"
+}
+
+// lexInlineTableEnd finishes the lexing of an inline table.
+// It assumes that a '}' has just been consumed.
+func lexInlineTableEnd(lx *lexer) stateFn {
+ lx.ignore()
+ lx.emit(itemInlineTableEnd)
+ return lx.pop()
+}
+
+// lexString consumes the inner contents of a string. It assumes that the
+// beginning '"' has already been consumed and ignored.
+func lexString(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case r == eof:
+ return lx.errorf(`unexpected EOF; expected '"'`)
+ case isControl(r) || r == '\r':
+ return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
+ case isNL(r):
+ return lx.errorf("strings cannot contain newlines")
+ case r == '\\':
+ lx.push(lexString)
+ return lexStringEscape
+ case r == stringEnd:
+ lx.backup()
+ lx.emit(itemString)
+ lx.next()
+ lx.ignore()
+ return lx.pop()
+ }
+ return lexString
+}
+
+// lexMultilineString consumes the inner contents of a string. It assumes that
+// the beginning '"""' has already been consumed and ignored.
+func lexMultilineString(lx *lexer) stateFn {
+ r := lx.next()
+ switch r {
+ case eof:
+ return lx.errorf(`unexpected EOF; expected '"""'`)
+ case '\r':
+ if lx.peek() != '\n' {
+ return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
+ }
+ return lexMultilineString
+ case '\\':
+ return lexMultilineStringEscape
+ case stringEnd:
+ /// Found " → try to read two more "".
+ if lx.accept(stringEnd) {
+ if lx.accept(stringEnd) {
+ /// Peek ahead: the string can contain " and "", including at the
+ /// end: """str"""""
+ /// 6 or more at the end, however, is an error.
+ if lx.peek() == stringEnd {
+ /// Check if we already lexed 5 's; if so we have 6 now, and
+ /// that's just too many man!
+ if strings.HasSuffix(lx.current(), `"""""`) {
+ return lx.errorf(`unexpected '""""""'`)
+ }
+ lx.backup()
+ lx.backup()
+ return lexMultilineString
+ }
+
+ lx.backup() /// backup: don't include the """ in the item.
+ lx.backup()
+ lx.backup()
+ lx.emit(itemMultilineString)
+ lx.next() /// Read over ''' again and discard it.
+ lx.next()
+ lx.next()
+ lx.ignore()
+ return lx.pop()
+ }
+ lx.backup()
+ }
+ }
+
+ if isControl(r) {
+ return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
+ }
+ return lexMultilineString
+}
+
+// lexRawString consumes a raw string. Nothing can be escaped in such a string.
+// It assumes that the beginning "'" has already been consumed and ignored.
+func lexRawString(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case r == eof:
+ return lx.errorf(`unexpected EOF; expected "'"`)
+ case isControl(r) || r == '\r':
+ return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
+ case isNL(r):
+ return lx.errorf("strings cannot contain newlines")
+ case r == rawStringEnd:
+ lx.backup()
+ lx.emit(itemRawString)
+ lx.next()
+ lx.ignore()
+ return lx.pop()
+ }
+ return lexRawString
+}
+
+// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
+// a string. It assumes that the beginning "'''" has already been consumed and
+// ignored.
+func lexMultilineRawString(lx *lexer) stateFn {
+ r := lx.next()
+ switch r {
+ case eof:
+ return lx.errorf(`unexpected EOF; expected "'''"`)
+ case '\r':
+ if lx.peek() != '\n' {
+ return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
+ }
+ return lexMultilineRawString
+ case rawStringEnd:
+ /// Found ' → try to read two more ''.
+ if lx.accept(rawStringEnd) {
+ if lx.accept(rawStringEnd) {
+ /// Peek ahead: the string can contain ' and '', including at the
+ /// end: '''str'''''
+ /// 6 or more at the end, however, is an error.
+ if lx.peek() == rawStringEnd {
+ /// Check if we already lexed 5 's; if so we have 6 now, and
+ /// that's just too many man!
+ if strings.HasSuffix(lx.current(), "'''''") {
+ return lx.errorf(`unexpected "''''''"`)
+ }
+ lx.backup()
+ lx.backup()
+ return lexMultilineRawString
+ }
+
+ lx.backup() /// backup: don't include the ''' in the item.
+ lx.backup()
+ lx.backup()
+ lx.emit(itemRawMultilineString)
+ lx.next() /// Read over ''' again and discard it.
+ lx.next()
+ lx.next()
+ lx.ignore()
+ return lx.pop()
+ }
+ lx.backup()
+ }
+ }
+
+ if isControl(r) {
+ return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
+ }
+ return lexMultilineRawString
+}
+
+// lexMultilineStringEscape consumes an escaped character. It assumes that the
+// preceding '\\' has already been consumed.
+func lexMultilineStringEscape(lx *lexer) stateFn {
+ // Handle the special case first:
+ if isNL(lx.next()) {
+ return lexMultilineString
+ }
+ lx.backup()
+ lx.push(lexMultilineString)
+ return lexStringEscape(lx)
+}
+
+func lexStringEscape(lx *lexer) stateFn {
+ r := lx.next()
+ switch r {
+ case 'b':
+ fallthrough
+ case 't':
+ fallthrough
+ case 'n':
+ fallthrough
+ case 'f':
+ fallthrough
+ case 'r':
+ fallthrough
+ case '"':
+ fallthrough
+ case ' ', '\t':
+ // Inside """ .. """ strings you can use \ to escape newlines, and any
+ // amount of whitespace can be between the \ and \n.
+ fallthrough
+ case '\\':
+ return lx.pop()
+ case 'u':
+ return lexShortUnicodeEscape
+ case 'U':
+ return lexLongUnicodeEscape
+ }
+ return lx.errorf("invalid escape character %q; only the following escape characters are allowed: "+
+ `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
+}
+
+func lexShortUnicodeEscape(lx *lexer) stateFn {
+ var r rune
+ for i := 0; i < 4; i++ {
+ r = lx.next()
+ if !isHexadecimal(r) {
+ return lx.errorf(
+ `expected four hexadecimal digits after '\u', but got %q instead`,
+ lx.current())
+ }
+ }
+ return lx.pop()
+}
+
+func lexLongUnicodeEscape(lx *lexer) stateFn {
+ var r rune
+ for i := 0; i < 8; i++ {
+ r = lx.next()
+ if !isHexadecimal(r) {
+ return lx.errorf(
+ `expected eight hexadecimal digits after '\U', but got %q instead`,
+ lx.current())
+ }
+ }
+ return lx.pop()
+}
+
+// lexNumberOrDateStart processes the first character of a value which begins
+// with a digit. It exists to catch values starting with '0', so that
+// lexBaseNumberOrDate can differentiate base prefixed integers from other
+// types.
+func lexNumberOrDateStart(lx *lexer) stateFn {
+ r := lx.next()
+ switch r {
+ case '0':
+ return lexBaseNumberOrDate
+ }
+
+ if !isDigit(r) {
+ // The only way to reach this state is if the value starts
+ // with a digit, so specifically treat anything else as an
+ // error.
+ return lx.errorf("expected a digit but got %q", r)
+ }
+
+ return lexNumberOrDate
+}
+
+// lexNumberOrDate consumes either an integer, float or datetime.
+func lexNumberOrDate(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexNumberOrDate
+ }
+ switch r {
+ case '-', ':':
+ return lexDatetime
+ case '_':
+ return lexDecimalNumber
+ case '.', 'e', 'E':
+ return lexFloat
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexDatetime consumes a Datetime, to a first approximation.
+// The parser validates that it matches one of the accepted formats.
+func lexDatetime(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexDatetime
+ }
+ switch r {
+ case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
+ return lexDatetime
+ }
+
+ lx.backup()
+ lx.emitTrim(itemDatetime)
+ return lx.pop()
+}
+
+// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
+func lexHexInteger(lx *lexer) stateFn {
+ r := lx.next()
+ if isHexadecimal(r) {
+ return lexHexInteger
+ }
+ switch r {
+ case '_':
+ return lexHexInteger
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
+func lexOctalInteger(lx *lexer) stateFn {
+ r := lx.next()
+ if isOctal(r) {
+ return lexOctalInteger
+ }
+ switch r {
+ case '_':
+ return lexOctalInteger
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
+func lexBinaryInteger(lx *lexer) stateFn {
+ r := lx.next()
+ if isBinary(r) {
+ return lexBinaryInteger
+ }
+ switch r {
+ case '_':
+ return lexBinaryInteger
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexDecimalNumber consumes a decimal float or integer.
+func lexDecimalNumber(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexDecimalNumber
+ }
+ switch r {
+ case '.', 'e', 'E':
+ return lexFloat
+ case '_':
+ return lexDecimalNumber
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexDecimalNumber consumes the first digit of a number beginning with a sign.
+// It assumes the sign has already been consumed. Values which start with a sign
+// are only allowed to be decimal integers or floats.
+//
+// The special "nan" and "inf" values are also recognized.
+func lexDecimalNumberStart(lx *lexer) stateFn {
+ r := lx.next()
+
+ // Special error cases to give users better error messages
+ switch r {
+ case 'i':
+ if !lx.accept('n') || !lx.accept('f') {
+ return lx.errorf("invalid float: '%s'", lx.current())
+ }
+ lx.emit(itemFloat)
+ return lx.pop()
+ case 'n':
+ if !lx.accept('a') || !lx.accept('n') {
+ return lx.errorf("invalid float: '%s'", lx.current())
+ }
+ lx.emit(itemFloat)
+ return lx.pop()
+ case '0':
+ p := lx.peek()
+ switch p {
+ case 'b', 'o', 'x':
+ return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
+ }
+ case '.':
+ return lx.errorf("floats must start with a digit, not '.'")
+ }
+
+ if isDigit(r) {
+ return lexDecimalNumber
+ }
+
+ return lx.errorf("expected a digit but got %q", r)
+}
+
+// lexBaseNumberOrDate differentiates between the possible values which
+// start with '0'. It assumes that before reaching this state, the initial '0'
+// has been consumed.
+func lexBaseNumberOrDate(lx *lexer) stateFn {
+ r := lx.next()
+ // Note: All datetimes start with at least two digits, so we don't
+ // handle date characters (':', '-', etc.) here.
+ if isDigit(r) {
+ return lexNumberOrDate
+ }
+ switch r {
+ case '_':
+ // Can only be decimal, because there can't be an underscore
+ // between the '0' and the base designator, and dates can't
+ // contain underscores.
+ return lexDecimalNumber
+ case '.', 'e', 'E':
+ return lexFloat
+ case 'b':
+ r = lx.peek()
+ if !isBinary(r) {
+ lx.errorf("not a binary number: '%s%c'", lx.current(), r)
+ }
+ return lexBinaryInteger
+ case 'o':
+ r = lx.peek()
+ if !isOctal(r) {
+ lx.errorf("not an octal number: '%s%c'", lx.current(), r)
+ }
+ return lexOctalInteger
+ case 'x':
+ r = lx.peek()
+ if !isHexadecimal(r) {
+ lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
+ }
+ return lexHexInteger
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexFloat consumes the elements of a float. It allows any sequence of
+// float-like characters, so floats emitted by the lexer are only a first
+// approximation and must be validated by the parser.
+func lexFloat(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexFloat
+ }
+ switch r {
+ case '_', '.', '-', '+', 'e', 'E':
+ return lexFloat
+ }
+
+ lx.backup()
+ lx.emit(itemFloat)
+ return lx.pop()
+}
+
+// lexBool consumes a bool string: 'true' or 'false.
+func lexBool(lx *lexer) stateFn {
+ var rs []rune
+ for {
+ r := lx.next()
+ if !unicode.IsLetter(r) {
+ lx.backup()
+ break
+ }
+ rs = append(rs, r)
+ }
+ s := string(rs)
+ switch s {
+ case "true", "false":
+ lx.emit(itemBool)
+ return lx.pop()
+ }
+ return lx.errorf("expected value but found %q instead", s)
+}
+
+// lexCommentStart begins the lexing of a comment. It will emit
+// itemCommentStart and consume no characters, passing control to lexComment.
+func lexCommentStart(lx *lexer) stateFn {
+ lx.ignore()
+ lx.emit(itemCommentStart)
+ return lexComment
+}
+
+// lexComment lexes an entire comment. It assumes that '#' has been consumed.
+// It will consume *up to* the first newline character, and pass control
+// back to the last state on the stack.
+func lexComment(lx *lexer) stateFn {
+ switch r := lx.next(); {
+ case isNL(r) || r == eof:
+ lx.backup()
+ lx.emit(itemText)
+ return lx.pop()
+ case isControl(r):
+ return lx.errorf("control characters are not allowed inside comments: '0x%02x'", r)
+ default:
+ return lexComment
+ }
+}
+
+// lexSkip ignores all slurped input and moves on to the next state.
+func lexSkip(lx *lexer, nextState stateFn) stateFn {
+ lx.ignore()
+ return nextState
+}
+
+// isWhitespace returns true if `r` is a whitespace character according
+// to the spec.
+func isWhitespace(r rune) bool {
+ return r == '\t' || r == ' '
+}
+
+func isNL(r rune) bool {
+ return r == '\n' || r == '\r'
+}
+
+// Control characters except \n, \t
+func isControl(r rune) bool {
+ switch r {
+ case '\t', '\r', '\n':
+ return false
+ default:
+ return (r >= 0x00 && r <= 0x1f) || r == 0x7f
+ }
+}
+
+func isDigit(r rune) bool {
+ return r >= '0' && r <= '9'
+}
+
+func isHexadecimal(r rune) bool {
+ return (r >= '0' && r <= '9') ||
+ (r >= 'a' && r <= 'f') ||
+ (r >= 'A' && r <= 'F')
+}
+
+func isOctal(r rune) bool {
+ return r >= '0' && r <= '7'
+}
+
+func isBinary(r rune) bool {
+ return r == '0' || r == '1'
+}
+
+func isBareKeyChar(r rune) bool {
+ return (r >= 'A' && r <= 'Z') ||
+ (r >= 'a' && r <= 'z') ||
+ (r >= '0' && r <= '9') ||
+ r == '_' ||
+ r == '-'
+}
+
+func (s stateFn) String() string {
+ name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
+ if i := strings.LastIndexByte(name, '.'); i > -1 {
+ name = name[i+1:]
+ }
+ if s == nil {
+ name = "<nil>"
+ }
+ return name + "()"
+}
+
+func (itype itemType) String() string {
+ switch itype {
+ case itemError:
+ return "Error"
+ case itemNIL:
+ return "NIL"
+ case itemEOF:
+ return "EOF"
+ case itemText:
+ return "Text"
+ case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
+ return "String"
+ case itemBool:
+ return "Bool"
+ case itemInteger:
+ return "Integer"
+ case itemFloat:
+ return "Float"
+ case itemDatetime:
+ return "DateTime"
+ case itemTableStart:
+ return "TableStart"
+ case itemTableEnd:
+ return "TableEnd"
+ case itemKeyStart:
+ return "KeyStart"
+ case itemKeyEnd:
+ return "KeyEnd"
+ case itemArray:
+ return "Array"
+ case itemArrayEnd:
+ return "ArrayEnd"
+ case itemCommentStart:
+ return "CommentStart"
+ case itemInlineTableStart:
+ return "InlineTableStart"
+ case itemInlineTableEnd:
+ return "InlineTableEnd"
+ }
+ panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
+}
+
+func (item item) String() string {
+ return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
+}