Gibheer
fa05045d31
This is the import from the separate monfront repository. The history could not be imported, but this should suffice.
1226 lines
29 KiB
Go
1226 lines
29 KiB
Go
package toml
|
|
|
|
import (
|
|
"fmt"
|
|
"reflect"
|
|
"runtime"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type itemType int
|
|
|
|
const (
|
|
itemError itemType = iota
|
|
itemNIL // used in the parser to indicate no type
|
|
itemEOF
|
|
itemText
|
|
itemString
|
|
itemRawString
|
|
itemMultilineString
|
|
itemRawMultilineString
|
|
itemBool
|
|
itemInteger
|
|
itemFloat
|
|
itemDatetime
|
|
itemArray // the start of an array
|
|
itemArrayEnd
|
|
itemTableStart
|
|
itemTableEnd
|
|
itemArrayTableStart
|
|
itemArrayTableEnd
|
|
itemKeyStart
|
|
itemKeyEnd
|
|
itemCommentStart
|
|
itemInlineTableStart
|
|
itemInlineTableEnd
|
|
)
|
|
|
|
const (
|
|
eof = 0
|
|
comma = ','
|
|
tableStart = '['
|
|
tableEnd = ']'
|
|
arrayTableStart = '['
|
|
arrayTableEnd = ']'
|
|
tableSep = '.'
|
|
keySep = '='
|
|
arrayStart = '['
|
|
arrayEnd = ']'
|
|
commentStart = '#'
|
|
stringStart = '"'
|
|
stringEnd = '"'
|
|
rawStringStart = '\''
|
|
rawStringEnd = '\''
|
|
inlineTableStart = '{'
|
|
inlineTableEnd = '}'
|
|
)
|
|
|
|
type stateFn func(lx *lexer) stateFn
|
|
|
|
type lexer struct {
|
|
input string
|
|
start int
|
|
pos int
|
|
line int
|
|
state stateFn
|
|
items chan item
|
|
|
|
// Allow for backing up up to four runes.
|
|
// This is necessary because TOML contains 3-rune tokens (""" and ''').
|
|
prevWidths [4]int
|
|
nprev int // how many of prevWidths are in use
|
|
// If we emit an eof, we can still back up, but it is not OK to call
|
|
// next again.
|
|
atEOF bool
|
|
|
|
// A stack of state functions used to maintain context.
|
|
// The idea is to reuse parts of the state machine in various places.
|
|
// For example, values can appear at the top level or within arbitrarily
|
|
// nested arrays. The last state on the stack is used after a value has
|
|
// been lexed. Similarly for comments.
|
|
stack []stateFn
|
|
}
|
|
|
|
type item struct {
|
|
typ itemType
|
|
val string
|
|
line int
|
|
}
|
|
|
|
func (lx *lexer) nextItem() item {
|
|
for {
|
|
select {
|
|
case item := <-lx.items:
|
|
return item
|
|
default:
|
|
lx.state = lx.state(lx)
|
|
//fmt.Printf(" STATE %-24s current: %-10q stack: %s\n", lx.state, lx.current(), lx.stack)
|
|
}
|
|
}
|
|
}
|
|
|
|
func lex(input string) *lexer {
|
|
lx := &lexer{
|
|
input: input,
|
|
state: lexTop,
|
|
line: 1,
|
|
items: make(chan item, 10),
|
|
stack: make([]stateFn, 0, 10),
|
|
}
|
|
return lx
|
|
}
|
|
|
|
func (lx *lexer) push(state stateFn) {
|
|
lx.stack = append(lx.stack, state)
|
|
}
|
|
|
|
func (lx *lexer) pop() stateFn {
|
|
if len(lx.stack) == 0 {
|
|
return lx.errorf("BUG in lexer: no states to pop")
|
|
}
|
|
last := lx.stack[len(lx.stack)-1]
|
|
lx.stack = lx.stack[0 : len(lx.stack)-1]
|
|
return last
|
|
}
|
|
|
|
func (lx *lexer) current() string {
|
|
return lx.input[lx.start:lx.pos]
|
|
}
|
|
|
|
func (lx *lexer) emit(typ itemType) {
|
|
lx.items <- item{typ, lx.current(), lx.line}
|
|
lx.start = lx.pos
|
|
}
|
|
|
|
func (lx *lexer) emitTrim(typ itemType) {
|
|
lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
|
|
lx.start = lx.pos
|
|
}
|
|
|
|
func (lx *lexer) next() (r rune) {
|
|
if lx.atEOF {
|
|
panic("BUG in lexer: next called after EOF")
|
|
}
|
|
if lx.pos >= len(lx.input) {
|
|
lx.atEOF = true
|
|
return eof
|
|
}
|
|
|
|
if lx.input[lx.pos] == '\n' {
|
|
lx.line++
|
|
}
|
|
lx.prevWidths[3] = lx.prevWidths[2]
|
|
lx.prevWidths[2] = lx.prevWidths[1]
|
|
lx.prevWidths[1] = lx.prevWidths[0]
|
|
if lx.nprev < 4 {
|
|
lx.nprev++
|
|
}
|
|
|
|
r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
|
|
if r == utf8.RuneError {
|
|
lx.errorf("invalid UTF-8 byte at position %d (line %d): 0x%02x", lx.pos, lx.line, lx.input[lx.pos])
|
|
return utf8.RuneError
|
|
}
|
|
|
|
lx.prevWidths[0] = w
|
|
lx.pos += w
|
|
return r
|
|
}
|
|
|
|
// ignore skips over the pending input before this point.
|
|
func (lx *lexer) ignore() {
|
|
lx.start = lx.pos
|
|
}
|
|
|
|
// backup steps back one rune. Can be called 4 times between calls to next.
|
|
func (lx *lexer) backup() {
|
|
if lx.atEOF {
|
|
lx.atEOF = false
|
|
return
|
|
}
|
|
if lx.nprev < 1 {
|
|
panic("BUG in lexer: backed up too far")
|
|
}
|
|
w := lx.prevWidths[0]
|
|
lx.prevWidths[0] = lx.prevWidths[1]
|
|
lx.prevWidths[1] = lx.prevWidths[2]
|
|
lx.prevWidths[2] = lx.prevWidths[3]
|
|
lx.nprev--
|
|
lx.pos -= w
|
|
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
|
|
lx.line--
|
|
}
|
|
}
|
|
|
|
// accept consumes the next rune if it's equal to `valid`.
|
|
func (lx *lexer) accept(valid rune) bool {
|
|
if lx.next() == valid {
|
|
return true
|
|
}
|
|
lx.backup()
|
|
return false
|
|
}
|
|
|
|
// peek returns but does not consume the next rune in the input.
|
|
func (lx *lexer) peek() rune {
|
|
r := lx.next()
|
|
lx.backup()
|
|
return r
|
|
}
|
|
|
|
// skip ignores all input that matches the given predicate.
|
|
func (lx *lexer) skip(pred func(rune) bool) {
|
|
for {
|
|
r := lx.next()
|
|
if pred(r) {
|
|
continue
|
|
}
|
|
lx.backup()
|
|
lx.ignore()
|
|
return
|
|
}
|
|
}
|
|
|
|
// errorf stops all lexing by emitting an error and returning `nil`.
|
|
// Note that any value that is a character is escaped if it's a special
|
|
// character (newlines, tabs, etc.).
|
|
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
|
|
lx.items <- item{
|
|
itemError,
|
|
fmt.Sprintf(format, values...),
|
|
lx.line,
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// lexTop consumes elements at the top level of TOML data.
|
|
func lexTop(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isWhitespace(r) || isNL(r) {
|
|
return lexSkip(lx, lexTop)
|
|
}
|
|
switch r {
|
|
case commentStart:
|
|
lx.push(lexTop)
|
|
return lexCommentStart
|
|
case tableStart:
|
|
return lexTableStart
|
|
case eof:
|
|
if lx.pos > lx.start {
|
|
return lx.errorf("unexpected EOF")
|
|
}
|
|
lx.emit(itemEOF)
|
|
return nil
|
|
}
|
|
|
|
// At this point, the only valid item can be a key, so we back up
|
|
// and let the key lexer do the rest.
|
|
lx.backup()
|
|
lx.push(lexTopEnd)
|
|
return lexKeyStart
|
|
}
|
|
|
|
// lexTopEnd is entered whenever a top-level item has been consumed. (A value
|
|
// or a table.) It must see only whitespace, and will turn back to lexTop
|
|
// upon a newline. If it sees EOF, it will quit the lexer successfully.
|
|
func lexTopEnd(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == commentStart:
|
|
// a comment will read to a newline for us.
|
|
lx.push(lexTop)
|
|
return lexCommentStart
|
|
case isWhitespace(r):
|
|
return lexTopEnd
|
|
case isNL(r):
|
|
lx.ignore()
|
|
return lexTop
|
|
case r == eof:
|
|
lx.emit(itemEOF)
|
|
return nil
|
|
}
|
|
return lx.errorf(
|
|
"expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
|
|
r)
|
|
}
|
|
|
|
// lexTable lexes the beginning of a table. Namely, it makes sure that
|
|
// it starts with a character other than '.' and ']'.
|
|
// It assumes that '[' has already been consumed.
|
|
// It also handles the case that this is an item in an array of tables.
|
|
// e.g., '[[name]]'.
|
|
func lexTableStart(lx *lexer) stateFn {
|
|
if lx.peek() == arrayTableStart {
|
|
lx.next()
|
|
lx.emit(itemArrayTableStart)
|
|
lx.push(lexArrayTableEnd)
|
|
} else {
|
|
lx.emit(itemTableStart)
|
|
lx.push(lexTableEnd)
|
|
}
|
|
return lexTableNameStart
|
|
}
|
|
|
|
func lexTableEnd(lx *lexer) stateFn {
|
|
lx.emit(itemTableEnd)
|
|
return lexTopEnd
|
|
}
|
|
|
|
func lexArrayTableEnd(lx *lexer) stateFn {
|
|
if r := lx.next(); r != arrayTableEnd {
|
|
return lx.errorf(
|
|
"expected end of table array name delimiter %q, but got %q instead",
|
|
arrayTableEnd, r)
|
|
}
|
|
lx.emit(itemArrayTableEnd)
|
|
return lexTopEnd
|
|
}
|
|
|
|
func lexTableNameStart(lx *lexer) stateFn {
|
|
lx.skip(isWhitespace)
|
|
switch r := lx.peek(); {
|
|
case r == tableEnd || r == eof:
|
|
return lx.errorf("unexpected end of table name (table names cannot be empty)")
|
|
case r == tableSep:
|
|
return lx.errorf("unexpected table separator (table names cannot be empty)")
|
|
case r == stringStart || r == rawStringStart:
|
|
lx.ignore()
|
|
lx.push(lexTableNameEnd)
|
|
return lexQuotedName
|
|
default:
|
|
lx.push(lexTableNameEnd)
|
|
return lexBareName
|
|
}
|
|
}
|
|
|
|
// lexTableNameEnd reads the end of a piece of a table name, optionally
|
|
// consuming whitespace.
|
|
func lexTableNameEnd(lx *lexer) stateFn {
|
|
lx.skip(isWhitespace)
|
|
switch r := lx.next(); {
|
|
case isWhitespace(r):
|
|
return lexTableNameEnd
|
|
case r == tableSep:
|
|
lx.ignore()
|
|
return lexTableNameStart
|
|
case r == tableEnd:
|
|
return lx.pop()
|
|
default:
|
|
return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
|
|
}
|
|
}
|
|
|
|
// lexBareName lexes one part of a key or table.
|
|
//
|
|
// It assumes that at least one valid character for the table has already been
|
|
// read.
|
|
//
|
|
// Lexes only one part, e.g. only 'a' inside 'a.b'.
|
|
func lexBareName(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isBareKeyChar(r) {
|
|
return lexBareName
|
|
}
|
|
lx.backup()
|
|
lx.emit(itemText)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexBareName lexes one part of a key or table.
|
|
//
|
|
// It assumes that at least one valid character for the table has already been
|
|
// read.
|
|
//
|
|
// Lexes only one part, e.g. only '"a"' inside '"a".b'.
|
|
func lexQuotedName(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case isWhitespace(r):
|
|
return lexSkip(lx, lexValue)
|
|
case r == stringStart:
|
|
lx.ignore() // ignore the '"'
|
|
return lexString
|
|
case r == rawStringStart:
|
|
lx.ignore() // ignore the "'"
|
|
return lexRawString
|
|
case r == eof:
|
|
return lx.errorf("unexpected EOF; expected value")
|
|
default:
|
|
return lx.errorf("expected value but found %q instead", r)
|
|
}
|
|
}
|
|
|
|
// lexKeyStart consumes all key parts until a '='.
|
|
func lexKeyStart(lx *lexer) stateFn {
|
|
lx.skip(isWhitespace)
|
|
switch r := lx.peek(); {
|
|
case r == '=' || r == eof:
|
|
return lx.errorf("unexpected '=': key name appears blank")
|
|
case r == '.':
|
|
return lx.errorf("unexpected '.': keys cannot start with a '.'")
|
|
case r == stringStart || r == rawStringStart:
|
|
lx.ignore()
|
|
fallthrough
|
|
default: // Bare key
|
|
lx.emit(itemKeyStart)
|
|
return lexKeyNameStart
|
|
}
|
|
}
|
|
|
|
func lexKeyNameStart(lx *lexer) stateFn {
|
|
lx.skip(isWhitespace)
|
|
switch r := lx.peek(); {
|
|
case r == '=' || r == eof:
|
|
return lx.errorf("unexpected '='")
|
|
case r == '.':
|
|
return lx.errorf("unexpected '.'")
|
|
case r == stringStart || r == rawStringStart:
|
|
lx.ignore()
|
|
lx.push(lexKeyEnd)
|
|
return lexQuotedName
|
|
default:
|
|
lx.push(lexKeyEnd)
|
|
return lexBareName
|
|
}
|
|
}
|
|
|
|
// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
|
|
// separator).
|
|
func lexKeyEnd(lx *lexer) stateFn {
|
|
lx.skip(isWhitespace)
|
|
switch r := lx.next(); {
|
|
case isWhitespace(r):
|
|
return lexSkip(lx, lexKeyEnd)
|
|
case r == eof:
|
|
return lx.errorf("unexpected EOF; expected key separator %q", keySep)
|
|
case r == '.':
|
|
lx.ignore()
|
|
return lexKeyNameStart
|
|
case r == '=':
|
|
lx.emit(itemKeyEnd)
|
|
return lexSkip(lx, lexValue)
|
|
default:
|
|
return lx.errorf("expected '.' or '=', but got %q instead", r)
|
|
}
|
|
}
|
|
|
|
// lexValue starts the consumption of a value anywhere a value is expected.
|
|
// lexValue will ignore whitespace.
|
|
// After a value is lexed, the last state on the next is popped and returned.
|
|
func lexValue(lx *lexer) stateFn {
|
|
// We allow whitespace to precede a value, but NOT newlines.
|
|
// In array syntax, the array states are responsible for ignoring newlines.
|
|
r := lx.next()
|
|
switch {
|
|
case isWhitespace(r):
|
|
return lexSkip(lx, lexValue)
|
|
case isDigit(r):
|
|
lx.backup() // avoid an extra state and use the same as above
|
|
return lexNumberOrDateStart
|
|
}
|
|
switch r {
|
|
case arrayStart:
|
|
lx.ignore()
|
|
lx.emit(itemArray)
|
|
return lexArrayValue
|
|
case inlineTableStart:
|
|
lx.ignore()
|
|
lx.emit(itemInlineTableStart)
|
|
return lexInlineTableValue
|
|
case stringStart:
|
|
if lx.accept(stringStart) {
|
|
if lx.accept(stringStart) {
|
|
lx.ignore() // Ignore """
|
|
return lexMultilineString
|
|
}
|
|
lx.backup()
|
|
}
|
|
lx.ignore() // ignore the '"'
|
|
return lexString
|
|
case rawStringStart:
|
|
if lx.accept(rawStringStart) {
|
|
if lx.accept(rawStringStart) {
|
|
lx.ignore() // Ignore """
|
|
return lexMultilineRawString
|
|
}
|
|
lx.backup()
|
|
}
|
|
lx.ignore() // ignore the "'"
|
|
return lexRawString
|
|
case '.': // special error case, be kind to users
|
|
return lx.errorf("floats must start with a digit, not '.'")
|
|
case 'i', 'n':
|
|
if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
|
|
lx.emit(itemFloat)
|
|
return lx.pop()
|
|
}
|
|
case '-', '+':
|
|
return lexDecimalNumberStart
|
|
}
|
|
if unicode.IsLetter(r) {
|
|
// Be permissive here; lexBool will give a nice error if the
|
|
// user wrote something like
|
|
// x = foo
|
|
// (i.e. not 'true' or 'false' but is something else word-like.)
|
|
lx.backup()
|
|
return lexBool
|
|
}
|
|
if r == eof {
|
|
return lx.errorf("unexpected EOF; expected value")
|
|
}
|
|
return lx.errorf("expected value but found %q instead", r)
|
|
}
|
|
|
|
// lexArrayValue consumes one value in an array. It assumes that '[' or ','
|
|
// have already been consumed. All whitespace and newlines are ignored.
|
|
func lexArrayValue(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case isWhitespace(r) || isNL(r):
|
|
return lexSkip(lx, lexArrayValue)
|
|
case r == commentStart:
|
|
lx.push(lexArrayValue)
|
|
return lexCommentStart
|
|
case r == comma:
|
|
return lx.errorf("unexpected comma")
|
|
case r == arrayEnd:
|
|
// NOTE(caleb): The spec isn't clear about whether you can have
|
|
// a trailing comma or not, so we'll allow it.
|
|
return lexArrayEnd
|
|
}
|
|
|
|
lx.backup()
|
|
lx.push(lexArrayValueEnd)
|
|
return lexValue
|
|
}
|
|
|
|
// lexArrayValueEnd consumes everything between the end of an array value and
|
|
// the next value (or the end of the array): it ignores whitespace and newlines
|
|
// and expects either a ',' or a ']'.
|
|
func lexArrayValueEnd(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case isWhitespace(r) || isNL(r):
|
|
return lexSkip(lx, lexArrayValueEnd)
|
|
case r == commentStart:
|
|
lx.push(lexArrayValueEnd)
|
|
return lexCommentStart
|
|
case r == comma:
|
|
lx.ignore()
|
|
return lexArrayValue // move on to the next value
|
|
case r == arrayEnd:
|
|
return lexArrayEnd
|
|
}
|
|
return lx.errorf(
|
|
"expected a comma or array terminator %q, but got %s instead",
|
|
arrayEnd, runeOrEOF(r))
|
|
}
|
|
|
|
// lexArrayEnd finishes the lexing of an array.
|
|
// It assumes that a ']' has just been consumed.
|
|
func lexArrayEnd(lx *lexer) stateFn {
|
|
lx.ignore()
|
|
lx.emit(itemArrayEnd)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexInlineTableValue consumes one key/value pair in an inline table.
|
|
// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
|
|
func lexInlineTableValue(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case isWhitespace(r):
|
|
return lexSkip(lx, lexInlineTableValue)
|
|
case isNL(r):
|
|
return lx.errorf("newlines not allowed within inline tables")
|
|
case r == commentStart:
|
|
lx.push(lexInlineTableValue)
|
|
return lexCommentStart
|
|
case r == comma:
|
|
return lx.errorf("unexpected comma")
|
|
case r == inlineTableEnd:
|
|
return lexInlineTableEnd
|
|
}
|
|
lx.backup()
|
|
lx.push(lexInlineTableValueEnd)
|
|
return lexKeyStart
|
|
}
|
|
|
|
// lexInlineTableValueEnd consumes everything between the end of an inline table
|
|
// key/value pair and the next pair (or the end of the table):
|
|
// it ignores whitespace and expects either a ',' or a '}'.
|
|
func lexInlineTableValueEnd(lx *lexer) stateFn {
|
|
switch r := lx.next(); {
|
|
case isWhitespace(r):
|
|
return lexSkip(lx, lexInlineTableValueEnd)
|
|
case isNL(r):
|
|
return lx.errorf("newlines not allowed within inline tables")
|
|
case r == commentStart:
|
|
lx.push(lexInlineTableValueEnd)
|
|
return lexCommentStart
|
|
case r == comma:
|
|
lx.ignore()
|
|
lx.skip(isWhitespace)
|
|
if lx.peek() == '}' {
|
|
return lx.errorf("trailing comma not allowed in inline tables")
|
|
}
|
|
return lexInlineTableValue
|
|
case r == inlineTableEnd:
|
|
return lexInlineTableEnd
|
|
default:
|
|
return lx.errorf(
|
|
"expected a comma or an inline table terminator %q, but got %s instead",
|
|
inlineTableEnd, runeOrEOF(r))
|
|
}
|
|
}
|
|
|
|
func runeOrEOF(r rune) string {
|
|
if r == eof {
|
|
return "end of file"
|
|
}
|
|
return "'" + string(r) + "'"
|
|
}
|
|
|
|
// lexInlineTableEnd finishes the lexing of an inline table.
|
|
// It assumes that a '}' has just been consumed.
|
|
func lexInlineTableEnd(lx *lexer) stateFn {
|
|
lx.ignore()
|
|
lx.emit(itemInlineTableEnd)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexString consumes the inner contents of a string. It assumes that the
|
|
// beginning '"' has already been consumed and ignored.
|
|
func lexString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == eof:
|
|
return lx.errorf(`unexpected EOF; expected '"'`)
|
|
case isControl(r) || r == '\r':
|
|
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
|
|
case isNL(r):
|
|
return lx.errorf("strings cannot contain newlines")
|
|
case r == '\\':
|
|
lx.push(lexString)
|
|
return lexStringEscape
|
|
case r == stringEnd:
|
|
lx.backup()
|
|
lx.emit(itemString)
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
return lexString
|
|
}
|
|
|
|
// lexMultilineString consumes the inner contents of a string. It assumes that
|
|
// the beginning '"""' has already been consumed and ignored.
|
|
func lexMultilineString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch r {
|
|
case eof:
|
|
return lx.errorf(`unexpected EOF; expected '"""'`)
|
|
case '\r':
|
|
if lx.peek() != '\n' {
|
|
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
|
|
}
|
|
return lexMultilineString
|
|
case '\\':
|
|
return lexMultilineStringEscape
|
|
case stringEnd:
|
|
/// Found " → try to read two more "".
|
|
if lx.accept(stringEnd) {
|
|
if lx.accept(stringEnd) {
|
|
/// Peek ahead: the string can contain " and "", including at the
|
|
/// end: """str"""""
|
|
/// 6 or more at the end, however, is an error.
|
|
if lx.peek() == stringEnd {
|
|
/// Check if we already lexed 5 's; if so we have 6 now, and
|
|
/// that's just too many man!
|
|
if strings.HasSuffix(lx.current(), `"""""`) {
|
|
return lx.errorf(`unexpected '""""""'`)
|
|
}
|
|
lx.backup()
|
|
lx.backup()
|
|
return lexMultilineString
|
|
}
|
|
|
|
lx.backup() /// backup: don't include the """ in the item.
|
|
lx.backup()
|
|
lx.backup()
|
|
lx.emit(itemMultilineString)
|
|
lx.next() /// Read over ''' again and discard it.
|
|
lx.next()
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
lx.backup()
|
|
}
|
|
}
|
|
|
|
if isControl(r) {
|
|
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
|
|
}
|
|
return lexMultilineString
|
|
}
|
|
|
|
// lexRawString consumes a raw string. Nothing can be escaped in such a string.
|
|
// It assumes that the beginning "'" has already been consumed and ignored.
|
|
func lexRawString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch {
|
|
case r == eof:
|
|
return lx.errorf(`unexpected EOF; expected "'"`)
|
|
case isControl(r) || r == '\r':
|
|
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
|
|
case isNL(r):
|
|
return lx.errorf("strings cannot contain newlines")
|
|
case r == rawStringEnd:
|
|
lx.backup()
|
|
lx.emit(itemRawString)
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
return lexRawString
|
|
}
|
|
|
|
// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
|
|
// a string. It assumes that the beginning "'''" has already been consumed and
|
|
// ignored.
|
|
func lexMultilineRawString(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch r {
|
|
case eof:
|
|
return lx.errorf(`unexpected EOF; expected "'''"`)
|
|
case '\r':
|
|
if lx.peek() != '\n' {
|
|
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
|
|
}
|
|
return lexMultilineRawString
|
|
case rawStringEnd:
|
|
/// Found ' → try to read two more ''.
|
|
if lx.accept(rawStringEnd) {
|
|
if lx.accept(rawStringEnd) {
|
|
/// Peek ahead: the string can contain ' and '', including at the
|
|
/// end: '''str'''''
|
|
/// 6 or more at the end, however, is an error.
|
|
if lx.peek() == rawStringEnd {
|
|
/// Check if we already lexed 5 's; if so we have 6 now, and
|
|
/// that's just too many man!
|
|
if strings.HasSuffix(lx.current(), "'''''") {
|
|
return lx.errorf(`unexpected "''''''"`)
|
|
}
|
|
lx.backup()
|
|
lx.backup()
|
|
return lexMultilineRawString
|
|
}
|
|
|
|
lx.backup() /// backup: don't include the ''' in the item.
|
|
lx.backup()
|
|
lx.backup()
|
|
lx.emit(itemRawMultilineString)
|
|
lx.next() /// Read over ''' again and discard it.
|
|
lx.next()
|
|
lx.next()
|
|
lx.ignore()
|
|
return lx.pop()
|
|
}
|
|
lx.backup()
|
|
}
|
|
}
|
|
|
|
if isControl(r) {
|
|
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
|
|
}
|
|
return lexMultilineRawString
|
|
}
|
|
|
|
// lexMultilineStringEscape consumes an escaped character. It assumes that the
|
|
// preceding '\\' has already been consumed.
|
|
func lexMultilineStringEscape(lx *lexer) stateFn {
|
|
// Handle the special case first:
|
|
if isNL(lx.next()) {
|
|
return lexMultilineString
|
|
}
|
|
lx.backup()
|
|
lx.push(lexMultilineString)
|
|
return lexStringEscape(lx)
|
|
}
|
|
|
|
func lexStringEscape(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch r {
|
|
case 'b':
|
|
fallthrough
|
|
case 't':
|
|
fallthrough
|
|
case 'n':
|
|
fallthrough
|
|
case 'f':
|
|
fallthrough
|
|
case 'r':
|
|
fallthrough
|
|
case '"':
|
|
fallthrough
|
|
case ' ', '\t':
|
|
// Inside """ .. """ strings you can use \ to escape newlines, and any
|
|
// amount of whitespace can be between the \ and \n.
|
|
fallthrough
|
|
case '\\':
|
|
return lx.pop()
|
|
case 'u':
|
|
return lexShortUnicodeEscape
|
|
case 'U':
|
|
return lexLongUnicodeEscape
|
|
}
|
|
return lx.errorf("invalid escape character %q; only the following escape characters are allowed: "+
|
|
`\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
|
|
}
|
|
|
|
func lexShortUnicodeEscape(lx *lexer) stateFn {
|
|
var r rune
|
|
for i := 0; i < 4; i++ {
|
|
r = lx.next()
|
|
if !isHexadecimal(r) {
|
|
return lx.errorf(
|
|
`expected four hexadecimal digits after '\u', but got %q instead`,
|
|
lx.current())
|
|
}
|
|
}
|
|
return lx.pop()
|
|
}
|
|
|
|
func lexLongUnicodeEscape(lx *lexer) stateFn {
|
|
var r rune
|
|
for i := 0; i < 8; i++ {
|
|
r = lx.next()
|
|
if !isHexadecimal(r) {
|
|
return lx.errorf(
|
|
`expected eight hexadecimal digits after '\U', but got %q instead`,
|
|
lx.current())
|
|
}
|
|
}
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexNumberOrDateStart processes the first character of a value which begins
|
|
// with a digit. It exists to catch values starting with '0', so that
|
|
// lexBaseNumberOrDate can differentiate base prefixed integers from other
|
|
// types.
|
|
func lexNumberOrDateStart(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
switch r {
|
|
case '0':
|
|
return lexBaseNumberOrDate
|
|
}
|
|
|
|
if !isDigit(r) {
|
|
// The only way to reach this state is if the value starts
|
|
// with a digit, so specifically treat anything else as an
|
|
// error.
|
|
return lx.errorf("expected a digit but got %q", r)
|
|
}
|
|
|
|
return lexNumberOrDate
|
|
}
|
|
|
|
// lexNumberOrDate consumes either an integer, float or datetime.
|
|
func lexNumberOrDate(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isDigit(r) {
|
|
return lexNumberOrDate
|
|
}
|
|
switch r {
|
|
case '-', ':':
|
|
return lexDatetime
|
|
case '_':
|
|
return lexDecimalNumber
|
|
case '.', 'e', 'E':
|
|
return lexFloat
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexDatetime consumes a Datetime, to a first approximation.
|
|
// The parser validates that it matches one of the accepted formats.
|
|
func lexDatetime(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isDigit(r) {
|
|
return lexDatetime
|
|
}
|
|
switch r {
|
|
case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
|
|
return lexDatetime
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emitTrim(itemDatetime)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
|
|
func lexHexInteger(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isHexadecimal(r) {
|
|
return lexHexInteger
|
|
}
|
|
switch r {
|
|
case '_':
|
|
return lexHexInteger
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
|
|
func lexOctalInteger(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isOctal(r) {
|
|
return lexOctalInteger
|
|
}
|
|
switch r {
|
|
case '_':
|
|
return lexOctalInteger
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
|
|
func lexBinaryInteger(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isBinary(r) {
|
|
return lexBinaryInteger
|
|
}
|
|
switch r {
|
|
case '_':
|
|
return lexBinaryInteger
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexDecimalNumber consumes a decimal float or integer.
|
|
func lexDecimalNumber(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isDigit(r) {
|
|
return lexDecimalNumber
|
|
}
|
|
switch r {
|
|
case '.', 'e', 'E':
|
|
return lexFloat
|
|
case '_':
|
|
return lexDecimalNumber
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexDecimalNumber consumes the first digit of a number beginning with a sign.
|
|
// It assumes the sign has already been consumed. Values which start with a sign
|
|
// are only allowed to be decimal integers or floats.
|
|
//
|
|
// The special "nan" and "inf" values are also recognized.
|
|
func lexDecimalNumberStart(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
|
|
// Special error cases to give users better error messages
|
|
switch r {
|
|
case 'i':
|
|
if !lx.accept('n') || !lx.accept('f') {
|
|
return lx.errorf("invalid float: '%s'", lx.current())
|
|
}
|
|
lx.emit(itemFloat)
|
|
return lx.pop()
|
|
case 'n':
|
|
if !lx.accept('a') || !lx.accept('n') {
|
|
return lx.errorf("invalid float: '%s'", lx.current())
|
|
}
|
|
lx.emit(itemFloat)
|
|
return lx.pop()
|
|
case '0':
|
|
p := lx.peek()
|
|
switch p {
|
|
case 'b', 'o', 'x':
|
|
return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
|
|
}
|
|
case '.':
|
|
return lx.errorf("floats must start with a digit, not '.'")
|
|
}
|
|
|
|
if isDigit(r) {
|
|
return lexDecimalNumber
|
|
}
|
|
|
|
return lx.errorf("expected a digit but got %q", r)
|
|
}
|
|
|
|
// lexBaseNumberOrDate differentiates between the possible values which
|
|
// start with '0'. It assumes that before reaching this state, the initial '0'
|
|
// has been consumed.
|
|
func lexBaseNumberOrDate(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
// Note: All datetimes start with at least two digits, so we don't
|
|
// handle date characters (':', '-', etc.) here.
|
|
if isDigit(r) {
|
|
return lexNumberOrDate
|
|
}
|
|
switch r {
|
|
case '_':
|
|
// Can only be decimal, because there can't be an underscore
|
|
// between the '0' and the base designator, and dates can't
|
|
// contain underscores.
|
|
return lexDecimalNumber
|
|
case '.', 'e', 'E':
|
|
return lexFloat
|
|
case 'b':
|
|
r = lx.peek()
|
|
if !isBinary(r) {
|
|
lx.errorf("not a binary number: '%s%c'", lx.current(), r)
|
|
}
|
|
return lexBinaryInteger
|
|
case 'o':
|
|
r = lx.peek()
|
|
if !isOctal(r) {
|
|
lx.errorf("not an octal number: '%s%c'", lx.current(), r)
|
|
}
|
|
return lexOctalInteger
|
|
case 'x':
|
|
r = lx.peek()
|
|
if !isHexadecimal(r) {
|
|
lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
|
|
}
|
|
return lexHexInteger
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemInteger)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexFloat consumes the elements of a float. It allows any sequence of
|
|
// float-like characters, so floats emitted by the lexer are only a first
|
|
// approximation and must be validated by the parser.
|
|
func lexFloat(lx *lexer) stateFn {
|
|
r := lx.next()
|
|
if isDigit(r) {
|
|
return lexFloat
|
|
}
|
|
switch r {
|
|
case '_', '.', '-', '+', 'e', 'E':
|
|
return lexFloat
|
|
}
|
|
|
|
lx.backup()
|
|
lx.emit(itemFloat)
|
|
return lx.pop()
|
|
}
|
|
|
|
// lexBool consumes a bool string: 'true' or 'false.
|
|
func lexBool(lx *lexer) stateFn {
|
|
var rs []rune
|
|
for {
|
|
r := lx.next()
|
|
if !unicode.IsLetter(r) {
|
|
lx.backup()
|
|
break
|
|
}
|
|
rs = append(rs, r)
|
|
}
|
|
s := string(rs)
|
|
switch s {
|
|
case "true", "false":
|
|
lx.emit(itemBool)
|
|
return lx.pop()
|
|
}
|
|
return lx.errorf("expected value but found %q instead", s)
|
|
}
|
|
|
|
// lexCommentStart begins the lexing of a comment. It will emit
|
|
// itemCommentStart and consume no characters, passing control to lexComment.
|
|
func lexCommentStart(lx *lexer) stateFn {
|
|
lx.ignore()
|
|
lx.emit(itemCommentStart)
|
|
return lexComment
|
|
}
|
|
|
|
// lexComment lexes an entire comment. It assumes that '#' has been consumed.
|
|
// It will consume *up to* the first newline character, and pass control
|
|
// back to the last state on the stack.
|
|
func lexComment(lx *lexer) stateFn {
|
|
switch r := lx.next(); {
|
|
case isNL(r) || r == eof:
|
|
lx.backup()
|
|
lx.emit(itemText)
|
|
return lx.pop()
|
|
case isControl(r):
|
|
return lx.errorf("control characters are not allowed inside comments: '0x%02x'", r)
|
|
default:
|
|
return lexComment
|
|
}
|
|
}
|
|
|
|
// lexSkip ignores all slurped input and moves on to the next state.
|
|
func lexSkip(lx *lexer, nextState stateFn) stateFn {
|
|
lx.ignore()
|
|
return nextState
|
|
}
|
|
|
|
// isWhitespace returns true if `r` is a whitespace character according
|
|
// to the spec.
|
|
func isWhitespace(r rune) bool {
|
|
return r == '\t' || r == ' '
|
|
}
|
|
|
|
func isNL(r rune) bool {
|
|
return r == '\n' || r == '\r'
|
|
}
|
|
|
|
// Control characters except \n, \t
|
|
func isControl(r rune) bool {
|
|
switch r {
|
|
case '\t', '\r', '\n':
|
|
return false
|
|
default:
|
|
return (r >= 0x00 && r <= 0x1f) || r == 0x7f
|
|
}
|
|
}
|
|
|
|
func isDigit(r rune) bool {
|
|
return r >= '0' && r <= '9'
|
|
}
|
|
|
|
func isHexadecimal(r rune) bool {
|
|
return (r >= '0' && r <= '9') ||
|
|
(r >= 'a' && r <= 'f') ||
|
|
(r >= 'A' && r <= 'F')
|
|
}
|
|
|
|
func isOctal(r rune) bool {
|
|
return r >= '0' && r <= '7'
|
|
}
|
|
|
|
func isBinary(r rune) bool {
|
|
return r == '0' || r == '1'
|
|
}
|
|
|
|
func isBareKeyChar(r rune) bool {
|
|
return (r >= 'A' && r <= 'Z') ||
|
|
(r >= 'a' && r <= 'z') ||
|
|
(r >= '0' && r <= '9') ||
|
|
r == '_' ||
|
|
r == '-'
|
|
}
|
|
|
|
func (s stateFn) String() string {
|
|
name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
|
|
if i := strings.LastIndexByte(name, '.'); i > -1 {
|
|
name = name[i+1:]
|
|
}
|
|
if s == nil {
|
|
name = "<nil>"
|
|
}
|
|
return name + "()"
|
|
}
|
|
|
|
func (itype itemType) String() string {
|
|
switch itype {
|
|
case itemError:
|
|
return "Error"
|
|
case itemNIL:
|
|
return "NIL"
|
|
case itemEOF:
|
|
return "EOF"
|
|
case itemText:
|
|
return "Text"
|
|
case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
|
|
return "String"
|
|
case itemBool:
|
|
return "Bool"
|
|
case itemInteger:
|
|
return "Integer"
|
|
case itemFloat:
|
|
return "Float"
|
|
case itemDatetime:
|
|
return "DateTime"
|
|
case itemTableStart:
|
|
return "TableStart"
|
|
case itemTableEnd:
|
|
return "TableEnd"
|
|
case itemKeyStart:
|
|
return "KeyStart"
|
|
case itemKeyEnd:
|
|
return "KeyEnd"
|
|
case itemArray:
|
|
return "Array"
|
|
case itemArrayEnd:
|
|
return "ArrayEnd"
|
|
case itemCommentStart:
|
|
return "CommentStart"
|
|
case itemInlineTableStart:
|
|
return "InlineTableStart"
|
|
case itemInlineTableEnd:
|
|
return "InlineTableEnd"
|
|
}
|
|
panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
|
|
}
|
|
|
|
func (item item) String() string {
|
|
return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
|
|
}
|