@@ -26,6 +26,7 @@ def tokenize(self, string):
2626
2727 new_lexer = ply .lex .lex (module = self , debug = self .debug , errorlog = logger )
2828 new_lexer .latest_newline = 0
29+ new_lexer .string_value = None
2930 new_lexer .input (string )
3031
3132 while True :
@@ -34,6 +35,9 @@ def tokenize(self, string):
3435 t .col = t .lexpos - new_lexer .latest_newline
3536 yield t
3637
38+ if new_lexer .string_value is not None :
39+ raise JsonPathLexerError ('Unexpected EOF in string literal or identifier' )
40+
3741 # ============== PLY Lexer specification ==================
3842 #
3943 # This probably should be private but:
@@ -66,17 +70,28 @@ def t_NUMBER(self, t):
6670 t .value = int (t .value )
6771 return t
6872
73+
6974 # Single-quoted strings
7075 t_singlequote_ignore = ''
71- def t_SINGLEQUOTE (self , t ):
72- r'\''
76+ def t_singlequote (self , t ):
77+ r"'"
7378 t .lexer .string_start = t .lexer .lexpos
79+ t .lexer .string_value = ''
7480 t .lexer .push_state ('singlequote' )
7581
76- def t_singlequote_SINGLEQUOTE (self , t ):
77- r"([^']|\\')*'"
78- t .value = t .value [:- 1 ]
82+ def t_singlequote_content (self , t ):
83+ r"[^'\\]+"
84+ t .lexer .string_value += t .value
85+
86+ def t_singlequote_escape (self , t ):
87+ r'\\.'
88+ t .lexer .string_value += t .value [1 ]
89+
90+ def t_singlequote_end (self , t ):
91+ r"'"
92+ t .value = t .lexer .string_value
7993 t .type = 'ID'
94+ t .lexer .string_value = None
8095 t .lexer .pop_state ()
8196 return t
8297
@@ -86,15 +101,25 @@ def t_singlequote_error(self, t):
86101
87102 # Double-quoted strings
88103 t_doublequote_ignore = ''
89- def t_DOUBLEQUOTE (self , t ):
104+ def t_doublequote (self , t ):
90105 r'"'
91106 t .lexer .string_start = t .lexer .lexpos
107+ t .lexer .string_value = ''
92108 t .lexer .push_state ('doublequote' )
93109
94- def t_doublequote_DOUBLEQUOTE (self , t ):
95- r'([^"]|\\")*"'
96- t .value = t .value [:- 1 ]
110+ def t_doublequote_content (self , t ):
111+ r'[^"\\]+'
112+ t .lexer .string_value += t .value
113+
114+ def t_doublequote_escape (self , t ):
115+ r'\\.'
116+ t .lexer .string_value += t .value [1 ]
117+
118+ def t_doublequote_end (self , t ):
119+ r'"'
120+ t .value = t .lexer .string_value
97121 t .type = 'ID'
122+ t .lexer .string_value = None
98123 t .lexer .pop_state ()
99124 return t
100125
@@ -104,15 +129,25 @@ def t_doublequote_error(self, t):
104129
105130 # Back-quoted "magic" operators
106131 t_backquote_ignore = ''
107- def t_BACKQUOTE (self , t ):
132+ def t_backquote (self , t ):
108133 r'`'
109134 t .lexer .string_start = t .lexer .lexpos
135+ t .lexer .string_value = ''
110136 t .lexer .push_state ('backquote' )
111137
112- def t_backquote_BACKQUOTE (self , t ):
113- r'([^`]|\\`)*`'
114- t .value = t .value [:- 1 ]
138+ def t_backquote_escape (self , t ):
139+ r'\\.'
140+ t .lexer .string_value += t .value [1 ]
141+
142+ def t_backquote_content (self , t ):
143+ r"[^`\\]+"
144+ t .lexer .string_value += t .value
145+
146+ def t_backquote_end (self , t ):
147+ r'`'
148+ t .value = t .lexer .string_value
115149 t .type = 'NAMED_OPERATOR'
150+ t .lexer .string_value = None
116151 t .lexer .pop_state ()
117152 return t
118153
0 commit comments