@@ -4174,6 +4174,7 @@ def gguf_function_calling(
41744174 "\n functions.<function_name>:"
41754175 '\n { "arg1": "value1", "arg2": "value2" }'
41764176 "\n </function_calls>"
4177+ "\n \n You can also combine both formats to provide explanatory text with function calls."
41774178 "{% endif %}"
41784179 "<|im_end|>\n "
41794180 "{% endif %}"
@@ -4292,15 +4293,13 @@ def gguf_function_calling(
42924293 prompt = template_renderer .render (
42934294 messages = messages , tools = tools , tool_calls = True , add_generation_prompt = True
42944295 )
4295- # Decide initial grammar: message only, functions only, or message+functions
42964296 initial_gbnf_tool_grammar = (
4297- """root ::= message | function_calls | message_then_functions
4298- message ::= "message:"
4299- function_calls ::= "<function_calls>"
4300- message_then_functions ::= "message:" "<function_calls>"
4301- """
4297+ (
4298+ 'root ::= "<function_calls>" "\\ n" functions | "message:"\n '
4299+ f"functions ::= { function_names } \n "
4300+ )
43024301 if tool_choice == "auto"
4303- else f'root ::= "<function_calls>" functions\n functions ::= { function_names } \n '
4302+ else f'root ::= "<function_calls>" " \\ n" functions\n functions ::= { function_names } \n '
43044303 )
43054304 completion = cast (
43064305 llama_types .CreateCompletionResponse ,
@@ -4319,21 +4318,43 @@ def gguf_function_calling(
43194318 ),
43204319 )
43214320 text = completion ["choices" ][0 ]["text" ]
4322- tool_name = None if text .startswith ("message" ) else text .split ("\n " )[- 1 ][len ("functions." ) :]
4323-
4324- # Case 2 step 2A: Respond with a message
4325- if tool_name is None :
4321+
4322+ # Parse the response to extract message and/or function calls
4323+ message_content = None
4324+ tool_name = None
4325+
4326+ if text .startswith ("message:" ):
4327+ # Always grab message
4328+ if "<function_calls>" in text :
4329+ parts = text .split ("<function_calls>" , 1 )
4330+ message_content = parts [0 ][len ("message:" ):].strip ()
4331+ if len (parts ) > 1 and "functions." in parts [1 ]:
4332+ tool_name = parts [1 ].split ("functions." , 1 )[1 ].split (":" , 1 )[0 ].strip ()
4333+ else :
4334+ message_content = text [len ("message:" ):].strip ()
4335+
4336+ elif text .startswith ("<function_calls>" ):
4337+ # Function calls block, but allow stray message content too
4338+ parts = text .split ("functions." , 1 )
4339+ if len (parts ) > 1 :
4340+ tool_name = parts [1 ].split (":" , 1 )[0 ].strip ()
4341+ # Optionally capture anything before the function calls as message text
4342+ before = text [len ("<function_calls>" ):].strip ()
4343+ if before and not before .startswith ("functions." ):
4344+ message_content = before
4345+
4346+ # Case 2 step 2A: Respond with message only
4347+ if tool_name is None and message_content is not None :
43264348 prompt = template_renderer .render (
43274349 messages = messages , tools = [], tool_calls = None , add_generation_prompt = True
43284350 )
4329- return _convert_completion_to_chat (
4330- llama .create_completion (
4331- prompt = prompt ,
4332- ** completion_kwargs , # type: ignore[arg-type]
4333- logprobs = top_logprobs if logprobs else None ,
4334- ),
4335- stream = stream ,
4351+ completion_response = llama .create_completion (
4352+ prompt = prompt ,
4353+ ** completion_kwargs , # type: ignore[arg-type]
4354+ logprobs = top_logprobs if logprobs else None ,
43364355 )
4356+ completion_response ["choices" ][0 ]["text" ] = message_content
4357+ return _convert_completion_to_chat (completion_response , stream = stream )
43374358
43384359 # Case 2 step 2B: One or more function calls
43394360 follow_up_gbnf_tool_grammar = (
@@ -4343,7 +4364,7 @@ def gguf_function_calling(
43434364 prompt += "<function_calls>\n "
43444365 if stream :
43454366 return _stream_tool_calls (
4346- llama , prompt , tools , tool_name , completion_kwargs , follow_up_gbnf_tool_grammar
4367+ llama , prompt , tools , tool_name , completion_kwargs , follow_up_gbnf_tool_grammar , message_content
43474368 )
43484369 tool = next ((tool for tool in tools if tool ["function" ]["name" ] == tool_name ), None )
43494370 completions : List [llama_types .CreateCompletionResponse ] = []
@@ -4411,7 +4432,7 @@ def gguf_function_calling(
44114432 ),
44124433 "message" : {
44134434 "role" : "assistant" ,
4414- "content" : None ,
4435+ "content" : message_content , # Include message content if present
44154436 "tool_calls" : [
44164437 {
44174438 "id" : "call_" + f"_{ i } _" + tool_name + "_" + completion ["id" ],
0 commit comments