Skip to content

Commit 57818bd

Browse files
committed
Dedicated bracket stack
1 parent 2dabb87 commit 57818bd

File tree

2 files changed

+56
-17
lines changed

2 files changed

+56
-17
lines changed

jsonpath_rfc9535/lex.py

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,30 @@
2727
class Lexer:
2828
"""JSONPath expression lexical scanner."""
2929

30-
__slots__ = ("filter_depth", "paren_stack", "tokens", "start", "pos", "query")
30+
__slots__ = (
31+
"filter_depth",
32+
"func_call_stack",
33+
"bracket_stack",
34+
"tokens",
35+
"start",
36+
"pos",
37+
"query",
38+
)
3139

3240
def __init__(self, query: str) -> None:
3341
self.filter_depth = 0
3442
"""Filter nesting level."""
3543

36-
self.paren_stack: List[int] = []
44+
self.func_call_stack: List[int] = []
3745
"""A running count of parentheses for each, possibly nested, function call.
3846
3947
If the stack is empty, we are not in a function call. Remember that
4048
function arguments can be arbitrarily nested in parentheses.
4149
"""
4250

51+
self.bracket_stack: list[tuple[str, int]] = []
52+
"""A stack of opening (parentheses/bracket, index) pairs."""
53+
4354
self.tokens: List[Token] = []
4455
"""Tokens resulting from scanning a JSONPath expression."""
4556

@@ -133,7 +144,7 @@ def ignore_whitespace(self) -> bool:
133144

134145
def error(self, msg: str) -> None:
135146
"""Emit an error token."""
136-
# better error messages.
147+
# TODO: better error messages.
137148
self.tokens.append(
138149
Token(
139150
TokenType.ERROR,
@@ -179,6 +190,7 @@ def lex_segment(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0911
179190

180191
if c == "[":
181192
l.emit(TokenType.LBRACKET)
193+
l.bracket_stack.append((c, l.pos - 1))
182194
return lex_inside_bracketed_segment
183195

184196
if l.filter_depth:
@@ -202,6 +214,7 @@ def lex_descendant_segment(l: Lexer) -> Optional[StateFn]: # noqa: D103
202214

203215
if c == "[":
204216
l.emit(TokenType.LBRACKET)
217+
l.bracket_stack.append((c, l.pos - 1))
205218
return lex_inside_bracketed_segment
206219

207220
l.backup()
@@ -244,11 +257,17 @@ def lex_inside_bracketed_segment(l: Lexer) -> Optional[StateFn]: # noqa: PLR091
244257
c = l.next()
245258

246259
if c == "]":
260+
if not l.bracket_stack or l.bracket_stack[-1][0] != "[":
261+
l.backup()
262+
l.error("unbalanced brackets")
263+
return None
264+
265+
l.bracket_stack.pop()
247266
l.emit(TokenType.RBRACKET)
248267
return lex_segment
249268

250269
if c == "":
251-
l.error("unclosed bracketed selection")
270+
l.error("unbalanced brackets")
252271
return None
253272

254273
if c == "*":
@@ -306,7 +325,7 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL
306325
l.emit(TokenType.COMMA)
307326
# If we have unbalanced parens, we are inside a function call and a
308327
# comma separates arguments. Otherwise a comma separates selectors.
309-
if l.paren_stack:
328+
if l.func_call_stack:
310329
continue
311330
l.filter_depth -= 1
312331
return lex_inside_bracketed_segment
@@ -319,19 +338,26 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL
319338

320339
if c == "(":
321340
l.emit(TokenType.LPAREN)
341+
l.bracket_stack.append((c, l.pos - 1))
322342
# Are we in a function call? If so, a function argument contains parens.
323-
if l.paren_stack:
324-
l.paren_stack[-1] += 1
343+
if l.func_call_stack:
344+
l.func_call_stack[-1] += 1
325345
continue
326346

327347
if c == ")":
348+
if not l.bracket_stack or l.bracket_stack[-1][0] != "(":
349+
l.backup()
350+
l.error("unbalanced parentheses")
351+
return None
352+
353+
l.bracket_stack.pop()
328354
l.emit(TokenType.RPAREN)
329355
# Are we closing a function call or a parenthesized expression?
330-
if l.paren_stack:
331-
if l.paren_stack[-1] == 1:
332-
l.paren_stack.pop()
356+
if l.func_call_stack:
357+
if l.func_call_stack[-1] == 1:
358+
l.func_call_stack.pop()
333359
else:
334-
l.paren_stack[-1] -= 1
360+
l.func_call_stack[-1] -= 1
335361
continue
336362

337363
if c == "$":
@@ -398,8 +424,9 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL
398424
l.emit(TokenType.INT)
399425
elif l.accept_match(RE_FUNCTION_NAME) and l.peek() == "(":
400426
# Keep track of parentheses for this function call.
401-
l.paren_stack.append(1)
427+
l.func_call_stack.append(1)
402428
l.emit(TokenType.FUNCTION)
429+
l.bracket_stack.append(("(", l.pos))
403430
l.next()
404431
l.ignore() # ignore LPAREN
405432
else:
@@ -482,8 +509,20 @@ def tokenize(query: str) -> List[Token]:
482509
lexer, tokens = lex(query)
483510
lexer.run()
484511

485-
if len(lexer.paren_stack) == 1:
486-
raise JSONPathSyntaxError("unbalanced parentheses", token=tokens[-1])
512+
# Check for remaining opening brackets that have not been closes.
513+
if lexer.bracket_stack:
514+
ch, index = lexer.bracket_stack[0]
515+
msg = f"unbalanced {'brackets' if ch == '[' else 'parentheses'}"
516+
raise JSONPathSyntaxError(
517+
msg,
518+
token=Token(
519+
TokenType.ERROR,
520+
lexer.query[index],
521+
index,
522+
lexer.query,
523+
msg,
524+
),
525+
)
487526

488527
if tokens and tokens[-1].type_ == TokenType.ERROR:
489528
raise JSONPathSyntaxError(tokens[-1].message, token=tokens[-1])

tests/test_errors.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ def env() -> JSONPathEnvironment:
1818

1919
def test_unclosed_selection_list(env: JSONPathEnvironment) -> None:
2020
with pytest.raises(
21-
JSONPathSyntaxError, match=r"unclosed bracketed selection, line 1, column 5"
21+
JSONPathSyntaxError, match=r"unbalanced brackets, line 1, column 1"
2222
):
2323
env.compile("$[1,2")
2424

2525

2626
def test_unclosed_selection_list_inside_filter(env: JSONPathEnvironment) -> None:
2727
with pytest.raises(
28-
JSONPathSyntaxError, match=r"unclosed bracketed selection, line 1, column 10"
28+
JSONPathSyntaxError, match=r"unbalanced brackets, line 1, column 1"
2929
):
3030
env.compile("$[?@.a < 1")
3131

@@ -86,7 +86,7 @@ class MockEnv(JSONPathEnvironment):
8686

8787

8888
def test_nested_functions_unbalanced_parens(env: JSONPathEnvironment) -> None:
89-
with pytest.raises(JSONPathSyntaxError, match="unbalanced parentheses"):
89+
with pytest.raises(JSONPathSyntaxError, match="unbalanced brackets"):
9090
env.compile("$.values[?match(@.a, value($..['regex'])]")
9191

9292

0 commit comments

Comments
 (0)