From 8222fbea5776d14032da6bfd500070a94c3dcb1c Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Fri, 10 Oct 2014 20:45:57 -0700 Subject: [PATCH] improve lexer handling of special characters characters like + and - are special but they should only be special at the beginning of strings inside someting that would otherwise be a string we should just let them be characters closes #103 --- query_string.nex | 2 +- query_string.nn.go | 700 +++++++++++++++++++----------------- query_string_parser_test.go | 10 + 3 files changed, 372 insertions(+), 340 deletions(-) diff --git a/query_string.nex b/query_string.nex index 99ed2b8a..66f99dc4 100644 --- a/query_string.nex +++ b/query_string.nex @@ -20,7 +20,7 @@ return tNUMBER } /[ \t\n]+/ { logDebugTokens("WHITESPACE (count=%d)", len(yylex.Text())) /* eat up whitespace */ } -/[^\t\n\f\r :^\+\-><=]+/ { +/[^\t\n\f\r :^\+\-><=][^\t\n\f\r :^]*/ { lval.s = yylex.Text() logDebugTokens("STRING - %s", lval.s); return tSTRING diff --git a/query_string.nn.go b/query_string.nn.go index 66342c7f..0d1d2ebc 100644 --- a/query_string.nn.go +++ b/query_string.nn.go @@ -23,17 +23,17 @@ a = make([]family, 1) { var acc [18]bool var fun [18]func(rune) int -fun[1] = func(r rune) int { +fun[13] = func(r rune) int { switch(r) { + case 47: return 2 case 116: return 2 - case 114: return 2 case 102: return 2 case 98: return 2 - case 110: return 2 + case 34: return 3 + case 114: return 2 case 117: return 2 - case 92: return 3 - case 34: return 4 - case 47: return 2 + case 110: return 2 + case 92: return 4 default: switch { case 48 <= r && r <= 57: return 2 @@ -44,144 +44,17 @@ fun[1] = func(r rune) int { } panic("unreachable") } -fun[15] = func(r rune) int { +fun[7] = func(r rune) int { switch(r) { - case 34: return 4 - case 47: return 2 - case 116: return 2 - case 114: return 2 - case 102: return 16 - case 98: return 16 - case 110: return 2 case 117: return 2 - case 92: return 3 - default: - switch { - case 48 <= r && r <= 57: return 16 - case 65 <= r && r <= 70: return 16 - case 97 <= r && r <= 102: return 16 - default: return 2 - } - } - panic("unreachable") -} -fun[17] = func(r rune) int { - switch(r) { - case 92: return 3 - case 34: return 4 + case 110: return 2 + case 92: return 4 case 47: return 2 case 116: return 2 - case 114: return 2 case 102: return 2 case 98: return 2 - case 110: return 2 - case 117: return 2 - default: - switch { - case 48 <= r && r <= 57: return 2 - case 65 <= r && r <= 70: return 2 - case 97 <= r && r <= 102: return 2 - default: return 2 - } - } - panic("unreachable") -} -fun[0] = func(r rune) int { - switch(r) { - case 34: return 1 - case 47: return -1 - case 116: return -1 - case 114: return -1 - case 102: return -1 - case 98: return -1 - case 110: return -1 - case 117: return -1 - case 92: return -1 - default: - switch { - case 48 <= r && r <= 57: return -1 - case 65 <= r && r <= 70: return -1 - case 97 <= r && r <= 102: return -1 - default: return -1 - } - } - panic("unreachable") -} -fun[2] = func(r rune) int { - switch(r) { - case 116: return 2 + case 34: return 3 case 114: return 2 - case 102: return 2 - case 98: return 2 - case 110: return 2 - case 117: return 2 - case 92: return 3 - case 34: return 4 - case 47: return 2 - default: - switch { - case 48 <= r && r <= 57: return 2 - case 65 <= r && r <= 70: return 2 - case 97 <= r && r <= 102: return 2 - default: return 2 - } - } - panic("unreachable") -} -acc[5] = true -fun[5] = func(r rune) int { - switch(r) { - case 116: return 2 - case 114: return 2 - case 102: return 2 - case 98: return 2 - case 110: return 2 - case 117: return 2 - case 92: return 3 - case 34: return 4 - case 47: return 2 - default: - switch { - case 48 <= r && r <= 57: return 2 - case 65 <= r && r <= 70: return 2 - case 97 <= r && r <= 102: return 2 - default: return 2 - } - } - panic("unreachable") -} -fun[12] = func(r rune) int { - switch(r) { - case 116: return 2 - case 114: return 2 - case 102: return 14 - case 98: return 14 - case 110: return 2 - case 117: return 2 - case 92: return 3 - case 34: return 4 - case 47: return 2 - default: - switch { - case 48 <= r && r <= 57: return 14 - case 65 <= r && r <= 70: return 14 - case 97 <= r && r <= 102: return 14 - default: return 2 - } - } - panic("unreachable") -} -fun[11] = func(r rune) int { - switch(r) { - case 34: return 4 - case 47: return 2 - case 116: return 2 - case 114: return 2 - case 102: return 2 - case 98: return 2 - case 110: return 2 - case 117: return 2 - case 92: return 3 default: switch { case 48 <= r && r <= 57: return 2 @@ -194,15 +67,15 @@ fun[11] = func(r rune) int { } fun[16] = func(r rune) int { switch(r) { + case 47: return 2 case 116: return 2 - case 114: return 2 case 102: return 17 case 98: return 17 - case 110: return 2 + case 34: return 3 + case 114: return 2 case 117: return 2 - case 92: return 3 - case 34: return 4 - case 47: return 2 + case 110: return 2 + case 92: return 4 default: switch { case 48 <= r && r <= 57: return 17 @@ -213,17 +86,102 @@ fun[16] = func(r rune) int { } panic("unreachable") } +acc[3] = true +fun[3] = func(r rune) int { + switch(r) { + case 110: return -1 + case 92: return -1 + case 47: return -1 + case 116: return -1 + case 102: return -1 + case 98: return -1 + case 34: return -1 + case 114: return -1 + case 117: return -1 + default: + switch { + case 48 <= r && r <= 57: return -1 + case 65 <= r && r <= 70: return -1 + case 97 <= r && r <= 102: return -1 + default: return -1 + } + } + panic("unreachable") +} +fun[4] = func(r rune) int { + switch(r) { + case 116: return 5 + case 102: return 6 + case 98: return 7 + case 34: return 8 + case 114: return 9 + case 117: return 10 + case 110: return 11 + case 92: return 12 + case 47: return 13 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[11] = func(r rune) int { + switch(r) { + case 110: return 2 + case 92: return 4 + case 47: return 2 + case 116: return 2 + case 102: return 2 + case 98: return 2 + case 34: return 3 + case 114: return 2 + case 117: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[12] = func(r rune) int { + switch(r) { + case 47: return 13 + case 116: return 5 + case 102: return 6 + case 98: return 7 + case 34: return 8 + case 114: return 9 + case 117: return 10 + case 110: return 11 + case 92: return 12 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} fun[9] = func(r rune) int { switch(r) { + case 47: return 2 case 116: return 2 - case 114: return 2 case 102: return 2 case 98: return 2 - case 110: return 2 + case 34: return 3 + case 114: return 2 case 117: return 2 - case 92: return 3 - case 34: return 4 - case 47: return 2 + case 110: return 2 + case 92: return 4 default: switch { case 48 <= r && r <= 57: return 2 @@ -234,17 +192,38 @@ fun[9] = func(r rune) int { } panic("unreachable") } -fun[8] = func(r rune) int { +fun[15] = func(r rune) int { switch(r) { + case 110: return 2 + case 92: return 4 + case 47: return 2 case 116: return 2 + case 102: return 16 + case 98: return 16 + case 34: return 3 case 114: return 2 + case 117: return 2 + default: + switch { + case 48 <= r && r <= 57: return 16 + case 65 <= r && r <= 70: return 16 + case 97 <= r && r <= 102: return 16 + default: return 2 + } + } + panic("unreachable") +} +fun[2] = func(r rune) int { + switch(r) { + case 47: return 2 + case 116: return 2 case 102: return 2 case 98: return 2 - case 110: return 2 + case 34: return 3 + case 114: return 2 case 117: return 2 - case 92: return 3 - case 34: return 4 - case 47: return 2 + case 110: return 2 + case 92: return 4 default: switch { case 48 <= r && r <= 57: return 2 @@ -255,17 +234,17 @@ fun[8] = func(r rune) int { } panic("unreachable") } -fun[13] = func(r rune) int { +fun[17] = func(r rune) int { switch(r) { - case 116: return 7 - case 114: return 8 - case 102: return 9 - case 98: return 10 - case 110: return 11 - case 117: return 12 - case 92: return 13 - case 34: return 5 - case 47: return 6 + case 47: return 2 + case 116: return 2 + case 102: return 2 + case 98: return 2 + case 34: return 3 + case 114: return 2 + case 117: return 2 + case 110: return 2 + case 92: return 4 default: switch { case 48 <= r && r <= 57: return 2 @@ -279,14 +258,14 @@ fun[13] = func(r rune) int { fun[6] = func(r rune) int { switch(r) { case 110: return 2 - case 117: return 2 - case 92: return 3 - case 34: return 4 + case 92: return 4 case 47: return 2 case 116: return 2 - case 114: return 2 case 102: return 2 case 98: return 2 + case 34: return 3 + case 114: return 2 + case 117: return 2 default: switch { case 48 <= r && r <= 57: return 2 @@ -297,37 +276,17 @@ fun[6] = func(r rune) int { } panic("unreachable") } -fun[7] = func(r rune) int { +acc[8] = true +fun[8] = func(r rune) int { switch(r) { + case 116: return 2 + case 102: return 2 + case 98: return 2 + case 34: return 3 + case 114: return 2 + case 117: return 2 case 110: return 2 - case 117: return 2 - case 92: return 3 - case 34: return 4 - case 47: return 2 - case 116: return 2 - case 114: return 2 - case 102: return 2 - case 98: return 2 - default: - switch { - case 48 <= r && r <= 57: return 2 - case 65 <= r && r <= 70: return 2 - case 97 <= r && r <= 102: return 2 - default: return 2 - } - } - panic("unreachable") -} -fun[10] = func(r rune) int { - switch(r) { - case 116: return 2 - case 114: return 2 - case 102: return 2 - case 98: return 2 - case 110: return 2 - case 117: return 2 - case 92: return 3 - case 34: return 4 + case 92: return 4 case 47: return 2 default: switch { @@ -339,39 +298,17 @@ fun[10] = func(r rune) int { } panic("unreachable") } -acc[4] = true -fun[4] = func(r rune) int { - switch(r) { - case 92: return -1 - case 34: return -1 - case 47: return -1 - case 116: return -1 - case 114: return -1 - case 102: return -1 - case 98: return -1 - case 110: return -1 - case 117: return -1 - default: - switch { - case 48 <= r && r <= 57: return -1 - case 65 <= r && r <= 70: return -1 - case 97 <= r && r <= 102: return -1 - default: return -1 - } - } - panic("unreachable") -} fun[14] = func(r rune) int { switch(r) { - case 92: return 3 - case 34: return 4 case 47: return 2 case 116: return 2 - case 114: return 2 case 102: return 15 case 98: return 15 - case 110: return 2 + case 34: return 3 + case 114: return 2 case 117: return 2 + case 110: return 2 + case 92: return 4 default: switch { case 48 <= r && r <= 57: return 15 @@ -382,17 +319,80 @@ fun[14] = func(r rune) int { } panic("unreachable") } -fun[3] = func(r rune) int { +fun[10] = func(r rune) int { switch(r) { - case 34: return 5 - case 47: return 6 - case 116: return 7 - case 114: return 8 - case 102: return 9 - case 98: return 10 - case 110: return 11 - case 117: return 12 - case 92: return 13 + case 116: return 2 + case 102: return 14 + case 98: return 14 + case 34: return 3 + case 114: return 2 + case 117: return 2 + case 110: return 2 + case 92: return 4 + case 47: return 2 + default: + switch { + case 48 <= r && r <= 57: return 14 + case 65 <= r && r <= 70: return 14 + case 97 <= r && r <= 102: return 14 + default: return 2 + } + } + panic("unreachable") +} +fun[1] = func(r rune) int { + switch(r) { + case 102: return 2 + case 98: return 2 + case 34: return 3 + case 114: return 2 + case 117: return 2 + case 110: return 2 + case 92: return 4 + case 47: return 2 + case 116: return 2 + default: + switch { + case 48 <= r && r <= 57: return 2 + case 65 <= r && r <= 70: return 2 + case 97 <= r && r <= 102: return 2 + default: return 2 + } + } + panic("unreachable") +} +fun[0] = func(r rune) int { + switch(r) { + case 92: return -1 + case 47: return -1 + case 116: return -1 + case 102: return -1 + case 98: return -1 + case 34: return 1 + case 114: return -1 + case 117: return -1 + case 110: return -1 + default: + switch { + case 48 <= r && r <= 57: return -1 + case 65 <= r && r <= 70: return -1 + case 97 <= r && r <= 102: return -1 + default: return -1 + } + } + panic("unreachable") +} +fun[5] = func(r rune) int { + switch(r) { + case 116: return 2 + case 102: return 2 + case 98: return 2 + case 34: return 3 + case 114: return 2 + case 117: return 2 + case 110: return 2 + case 92: return 4 + case 47: return 2 default: switch { case 48 <= r && r <= 57: return 2 @@ -466,9 +466,10 @@ a0[2].id = 2 { var acc [2]bool var fun [2]func(rune) int -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 58: return 1 + case 58: return -1 default: switch { default: return -1 @@ -476,10 +477,9 @@ fun[0] = func(r rune) int { } panic("unreachable") } -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 58: return -1 + case 58: return 1 default: switch { default: return -1 @@ -494,10 +494,9 @@ a0[3].id = 3 { var acc [2]bool var fun [2]func(rune) int -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 94: return -1 + case 94: return 1 default: switch { default: return -1 @@ -505,9 +504,10 @@ fun[1] = func(r rune) int { } panic("unreachable") } -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 94: return 1 + case 94: return -1 default: switch { default: return -1 @@ -522,10 +522,9 @@ a0[4].id = 4 { var acc [2]bool var fun [2]func(rune) int -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 40: return -1 + case 40: return 1 default: switch { default: return -1 @@ -533,9 +532,10 @@ fun[1] = func(r rune) int { } panic("unreachable") } -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 40: return 1 + case 40: return -1 default: switch { default: return -1 @@ -550,9 +550,10 @@ a0[5].id = 5 { var acc [2]bool var fun [2]func(rune) int -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 41: return 1 + case 41: return -1 default: switch { default: return -1 @@ -560,10 +561,9 @@ fun[0] = func(r rune) int { } panic("unreachable") } -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 41: return -1 + case 41: return 1 default: switch { default: return -1 @@ -634,10 +634,9 @@ a0[8].id = 8 { var acc [2]bool var fun [2]func(rune) int -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 61: return -1 + case 61: return 1 default: switch { default: return -1 @@ -645,9 +644,10 @@ fun[1] = func(r rune) int { } panic("unreachable") } -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { - case 61: return 1 + case 61: return -1 default: switch { default: return -1 @@ -662,60 +662,6 @@ a0[9].id = 9 { var acc [8]bool var fun [8]func(rune) int -acc[3] = true -fun[3] = func(r rune) int { - switch(r) { - case 46: return 4 - case 45: return -1 - default: - switch { - case 48 <= r && r <= 48: return 5 - case 49 <= r && r <= 57: return 5 - default: return -1 - } - } - panic("unreachable") -} -fun[4] = func(r rune) int { - switch(r) { - case 46: return -1 - case 45: return -1 - default: - switch { - case 48 <= r && r <= 48: return 6 - case 49 <= r && r <= 57: return 6 - default: return -1 - } - } - panic("unreachable") -} -acc[2] = true -fun[2] = func(r rune) int { - switch(r) { - case 46: return 4 - case 45: return -1 - default: - switch { - case 48 <= r && r <= 48: return -1 - case 49 <= r && r <= 57: return -1 - default: return -1 - } - } - panic("unreachable") -} -fun[0] = func(r rune) int { - switch(r) { - case 46: return -1 - case 45: return 1 - default: - switch { - case 48 <= r && r <= 48: return 2 - case 49 <= r && r <= 57: return 3 - default: return -1 - } - } - panic("unreachable") -} acc[7] = true fun[7] = func(r rune) int { switch(r) { @@ -730,10 +676,24 @@ fun[7] = func(r rune) int { } panic("unreachable") } -fun[1] = func(r rune) int { +acc[3] = true +fun[3] = func(r rune) int { switch(r) { - case 46: return -1 case 45: return -1 + case 46: return 4 + default: + switch { + case 48 <= r && r <= 48: return 5 + case 49 <= r && r <= 57: return 5 + default: return -1 + } + } + panic("unreachable") +} +fun[0] = func(r rune) int { + switch(r) { + case 45: return 1 + case 46: return -1 default: switch { case 48 <= r && r <= 48: return 2 @@ -743,6 +703,33 @@ fun[1] = func(r rune) int { } panic("unreachable") } +fun[4] = func(r rune) int { + switch(r) { + case 45: return -1 + case 46: return -1 + default: + switch { + case 48 <= r && r <= 48: return 6 + case 49 <= r && r <= 57: return 6 + default: return -1 + } + } + panic("unreachable") +} +acc[2] = true +fun[2] = func(r rune) int { + switch(r) { + case 45: return -1 + case 46: return 4 + default: + switch { + case 48 <= r && r <= 48: return -1 + case 49 <= r && r <= 57: return -1 + default: return -1 + } + } + panic("unreachable") +} acc[6] = true fun[6] = func(r rune) int { switch(r) { @@ -757,11 +744,24 @@ fun[6] = func(r rune) int { } panic("unreachable") } +fun[1] = func(r rune) int { + switch(r) { + case 46: return -1 + case 45: return -1 + default: + switch { + case 48 <= r && r <= 48: return 2 + case 49 <= r && r <= 57: return 3 + default: return -1 + } + } + panic("unreachable") +} acc[5] = true fun[5] = func(r rune) int { switch(r) { - case 46: return 4 case 45: return -1 + case 46: return 4 default: switch { case 48 <= r && r <= 48: return 5 @@ -778,11 +778,12 @@ a0[10].id = 10 { var acc [2]bool var fun [2]func(rune) int -fun[0] = func(r rune) int { +acc[1] = true +fun[1] = func(r rune) int { switch(r) { case 32: return 1 - case 10: return 1 case 9: return 1 + case 10: return 1 default: switch { default: return -1 @@ -790,11 +791,10 @@ fun[0] = func(r rune) int { } panic("unreachable") } -acc[1] = true -fun[1] = func(r rune) int { +fun[0] = func(r rune) int { switch(r) { - case 32: return 1 case 10: return 1 + case 32: return 1 case 9: return 1 default: switch { @@ -808,25 +808,26 @@ a0[11].f = fun[:] a0[11].id = 11 } { -var acc [2]bool -var fun [2]func(rune) int -fun[0] = func(r rune) int { +var acc [3]bool +var fun [3]func(rune) int +acc[2] = true +fun[2] = func(r rune) int { switch(r) { - case 94: return -1 + case 61: return 2 + case 45: return 2 case 32: return -1 + case 62: return 2 case 58: return -1 - case 45: return -1 - case 10: return -1 case 12: return -1 - case 9: return -1 - case 61: return -1 + case 94: return -1 + case 10: return -1 + case 43: return 2 case 13: return -1 - case 60: return -1 - case 62: return -1 - case 43: return -1 + case 60: return 2 + case 9: return -1 default: switch { - default: return 1 + default: return 2 } } panic("unreachable") @@ -834,18 +835,39 @@ fun[0] = func(r rune) int { acc[1] = true fun[1] = func(r rune) int { switch(r) { - case 43: return -1 - case 94: return -1 - case 32: return -1 - case 58: return -1 - case 45: return -1 - case 10: return -1 - case 12: return -1 + case 13: return -1 + case 60: return 2 case 9: return -1 - case 61: return -1 + case 61: return 2 + case 45: return 2 + case 32: return -1 + case 62: return 2 + case 58: return -1 + case 12: return -1 + case 94: return -1 + case 10: return -1 + case 43: return 2 + default: + switch { + default: return 2 + } + } + panic("unreachable") +} +fun[0] = func(r rune) int { + switch(r) { + case 43: return -1 case 13: return -1 case 60: return -1 + case 9: return -1 + case 61: return -1 + case 45: return -1 + case 32: return -1 case 62: return -1 + case 58: return -1 + case 12: return -1 + case 94: return -1 + case 10: return -1 default: switch { default: return 1 @@ -991,7 +1013,7 @@ func (yylex lexer) Lex(lval *yySymType) int { } case 11: //[ \t\n]+/ { logDebugTokens("WHITESPACE (count=%d)", len(yylex.Text())) /* eat up whitespace */ } - case 12: //[^\t\n\f\r :^\+\-><=]+/ + case 12: //[^\t\n\f\r :^\+\-><=][^\t\n\f\r :^]*/ { lval.s = yylex.Text() logDebugTokens("STRING - %s", lval.s); diff --git a/query_string_parser_test.go b/query_string_parser_test.go index d08c519b..1e33e000 100644 --- a/query_string_parser_test.go +++ b/query_string_parser_test.go @@ -155,6 +155,16 @@ func TestQuerySyntaxParserValid(t *testing.T) { }, nil), }, + { + input: "cat-dog", + mapping: NewIndexMapping(), + result: NewBooleanQuery( + nil, + []Query{ + NewMatchQuery("cat-dog"), + }, + nil), + }, } for _, test := range tests {