Skip to content

Commit fa6201e

Browse files
committed
[Bugfix] Fix Llama3JsonToolParser regex to support deeply nested JSON
The previous regex pattern could only handle one level of JSON nesting, causing it to fail on tool calls with nested parameters like: {"name": "tool", "parameters": {"location": {"city": "SF", "state": "CA"}}} This fix replaces the pattern with a recursive regex using (?R) that supports arbitrary nesting depth while maintaining backward compatibility. Added regression tests for 2-level and 3+ level nested JSON structures. Signed-off-by: ym820 <yikai.mao@outlook.com>
1 parent 01d68f3 commit fa6201e

File tree

2 files changed

+44
-2
lines changed

2 files changed

+44
-2
lines changed

tests/entrypoints/openai/tool_parsers/test_llama3_json_tool_parser.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,44 @@ def test_extract_tool_calls_multiple_json_with_surrounding_text(parser):
132132
assert result.tool_calls[0].function.name == "searchTool"
133133
assert result.tool_calls[1].function.name == "getOpenIncidentsTool"
134134
assert result.tool_calls[2].function.name == "searchTool"
135+
136+
137+
def test_extract_tool_calls_deeply_nested_json(parser):
138+
# Test with deeply nested JSON (more than 2 levels)
139+
# This is a regression test for the regex pattern bug
140+
model_output = (
141+
'{"name": "get_current_conditions", '
142+
'"parameters": {"location": {"city": "San Francisco", "state": "CA"}, '
143+
'"unit": "Fahrenheit"}}'
144+
)
145+
result = parser.extract_tool_calls(model_output, None)
146+
147+
assert result.tools_called is True
148+
assert len(result.tool_calls) == 1
149+
assert result.tool_calls[0].function.name == "get_current_conditions"
150+
151+
# Verify the entire parameters object is captured
152+
import json
153+
args = json.loads(result.tool_calls[0].function.arguments)
154+
assert "location" in args
155+
assert args["location"]["city"] == "San Francisco"
156+
assert args["location"]["state"] == "CA"
157+
assert args["unit"] == "Fahrenheit"
158+
159+
160+
def test_extract_tool_calls_very_deeply_nested_json(parser):
161+
# Test with very deeply nested JSON (3+ levels)
162+
model_output = (
163+
'{"name": "complex_tool", '
164+
'"parameters": {"level1": {"level2": {"level3": {"value": "deep"}}}}}'
165+
)
166+
result = parser.extract_tool_calls(model_output, None)
167+
168+
assert result.tools_called is True
169+
assert len(result.tool_calls) == 1
170+
assert result.tool_calls[0].function.name == "complex_tool"
171+
172+
# Verify the entire nested structure is captured
173+
import json
174+
args = json.loads(result.tool_calls[0].function.arguments)
175+
assert args["level1"]["level2"]["level3"]["value"] == "deep"

vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,10 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase):
5757
0
5858
]
5959
# Updated regex to match multiple JSONs separated by semicolons
60-
# This pattern is more robust and can handle nested JSON objects
60+
# This pattern uses recursion to handle arbitrarily nested JSON objects
61+
# (?R) is a recursive pattern that matches the entire pattern again
6162
self.tool_call_regex = re.compile(
62-
r"{[^{}]*(?:{[^{}]*}[^{}]*)*}(?:\s*;\s*{[^{}]*(?:{[^{}]*}[^{}]*)*})*",
63+
r"\{(?:[^{}]|(?R))*\}(?:\s*;\s*\{(?:[^{}]|(?R))*\})*",
6364
re.DOTALL,
6465
)
6566

0 commit comments

Comments
 (0)