Skip to content

Commit c2e40fe

Browse files
committed
improve gguf-function-calling parser
1 parent d900341 commit c2e40fe

File tree

1 file changed

+41
-20
lines changed

1 file changed

+41
-20
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4174,6 +4174,7 @@ def gguf_function_calling(
41744174
"\nfunctions.<function_name>:"
41754175
'\n{ "arg1": "value1", "arg2": "value2" }'
41764176
"\n</function_calls>"
4177+
"\n\nYou can also combine both formats to provide explanatory text with function calls."
41774178
"{% endif %}"
41784179
"<|im_end|>\n"
41794180
"{% endif %}"
@@ -4292,15 +4293,13 @@ def gguf_function_calling(
42924293
prompt = template_renderer.render(
42934294
messages=messages, tools=tools, tool_calls=True, add_generation_prompt=True
42944295
)
4295-
# Decide initial grammar: message only, functions only, or message+functions
42964296
initial_gbnf_tool_grammar = (
4297-
"""root ::= message | function_calls | message_then_functions
4298-
message ::= "message:"
4299-
function_calls ::= "<function_calls>"
4300-
message_then_functions ::= "message:" "<function_calls>"
4301-
"""
4297+
(
4298+
'root ::= "<function_calls>" "\\n" functions | "message:"\n'
4299+
f"functions ::= {function_names}\n"
4300+
)
43024301
if tool_choice == "auto"
4303-
else f'root ::= "<function_calls>" functions\nfunctions ::= {function_names}\n'
4302+
else f'root ::= "<function_calls>" "\\n" functions\nfunctions ::= {function_names}\n'
43044303
)
43054304
completion = cast(
43064305
llama_types.CreateCompletionResponse,
@@ -4319,21 +4318,43 @@ def gguf_function_calling(
43194318
),
43204319
)
43214320
text = completion["choices"][0]["text"]
4322-
tool_name = None if text.startswith("message") else text.split("\n")[-1][len("functions.") :]
4323-
4324-
# Case 2 step 2A: Respond with a message
4325-
if tool_name is None:
4321+
4322+
# Parse the response to extract message and/or function calls
4323+
message_content = None
4324+
tool_name = None
4325+
4326+
if text.startswith("message:"):
4327+
# Always grab message
4328+
if "<function_calls>" in text:
4329+
parts = text.split("<function_calls>", 1)
4330+
message_content = parts[0][len("message:"):].strip()
4331+
if len(parts) > 1 and "functions." in parts[1]:
4332+
tool_name = parts[1].split("functions.", 1)[1].split(":", 1)[0].strip()
4333+
else:
4334+
message_content = text[len("message:"):].strip()
4335+
4336+
elif text.startswith("<function_calls>"):
4337+
# Function calls block, but allow stray message content too
4338+
parts = text.split("functions.", 1)
4339+
if len(parts) > 1:
4340+
tool_name = parts[1].split(":", 1)[0].strip()
4341+
# Optionally capture anything before the function calls as message text
4342+
before = text[len("<function_calls>"):].strip()
4343+
if before and not before.startswith("functions."):
4344+
message_content = before
4345+
4346+
# Case 2 step 2A: Respond with message only
4347+
if tool_name is None and message_content is not None:
43264348
prompt = template_renderer.render(
43274349
messages=messages, tools=[], tool_calls=None, add_generation_prompt=True
43284350
)
4329-
return _convert_completion_to_chat(
4330-
llama.create_completion(
4331-
prompt=prompt,
4332-
**completion_kwargs, # type: ignore[arg-type]
4333-
logprobs=top_logprobs if logprobs else None,
4334-
),
4335-
stream=stream,
4351+
completion_response = llama.create_completion(
4352+
prompt=prompt,
4353+
**completion_kwargs, # type: ignore[arg-type]
4354+
logprobs=top_logprobs if logprobs else None,
43364355
)
4356+
completion_response["choices"][0]["text"] = message_content
4357+
return _convert_completion_to_chat(completion_response, stream=stream)
43374358

43384359
# Case 2 step 2B: One or more function calls
43394360
follow_up_gbnf_tool_grammar = (
@@ -4343,7 +4364,7 @@ def gguf_function_calling(
43434364
prompt += "<function_calls>\n"
43444365
if stream:
43454366
return _stream_tool_calls(
4346-
llama, prompt, tools, tool_name, completion_kwargs, follow_up_gbnf_tool_grammar
4367+
llama, prompt, tools, tool_name, completion_kwargs, follow_up_gbnf_tool_grammar, message_content
43474368
)
43484369
tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None)
43494370
completions: List[llama_types.CreateCompletionResponse] = []
@@ -4411,7 +4432,7 @@ def gguf_function_calling(
44114432
),
44124433
"message": {
44134434
"role": "assistant",
4414-
"content": None,
4435+
"content": message_content, # Include message content if present
44154436
"tool_calls": [
44164437
{
44174438
"id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"],

0 commit comments

Comments
 (0)