@@ -350,6 +350,7 @@ def _convert_text_completion_chunks_to_chat(
350350 "finish_reason" : chunk ["choices" ][0 ]["finish_reason" ],
351351 }
352352 ],
353+ "usage" : chunk .get ("usage" ) if "usage" in chunk else None ,
353354 }
354355
355356
@@ -434,7 +435,7 @@ def _stream_response_to_function_stream(
434435 created = chunk ["created" ]
435436 model = chunk ["model" ]
436437 tool_id = "call_" + "_0_" + tool_name + "_" + chunk ["id" ]
437- yield {
438+ response = {
438439 "id" : id_ ,
439440 "object" : "chat.completion.chunk" ,
440441 "created" : created ,
@@ -453,7 +454,11 @@ def _stream_response_to_function_stream(
453454 }
454455 ],
455456 }
456- yield {
457+ if "usage" in chunk :
458+ response ["usage" ] = chunk ["usage" ]
459+ yield response
460+
461+ response = {
457462 "id" : "chat" + chunk ["id" ],
458463 "object" : "chat.completion.chunk" ,
459464 "created" : chunk ["created" ],
@@ -487,10 +492,14 @@ def _stream_response_to_function_stream(
487492 }
488493 ],
489494 }
495+ if "usage" in chunk :
496+ response ["usage" ] = chunk ["usage" ]
497+ yield response
490498 first = False
491499 continue
500+
492501 assert tool_id is not None
493- yield {
502+ response = {
494503 "id" : "chat" + chunk ["id" ],
495504 "object" : "chat.completion.chunk" ,
496505 "created" : chunk ["created" ],
@@ -522,9 +531,12 @@ def _stream_response_to_function_stream(
522531 }
523532 ],
524533 }
534+ if "usage" in chunk :
535+ response ["usage" ] = chunk ["usage" ]
536+ yield response
525537
526538 if id_ is not None and created is not None and model is not None :
527- yield {
539+ response = {
528540 "id" : id_ ,
529541 "object" : "chat.completion.chunk" ,
530542 "created" : created ,
@@ -543,6 +555,9 @@ def _stream_response_to_function_stream(
543555 }
544556 ],
545557 }
558+ if "usage" in chunk :
559+ response ["usage" ] = chunk ["usage" ]
560+ yield response
546561
547562 return _stream_response_to_function_stream (chunks )
548563
@@ -2123,6 +2138,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21232138 },
21242139 }
21252140 ],
2141+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21262142 )
21272143 first = False
21282144 if tools is not None :
@@ -2163,6 +2179,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21632179 },
21642180 }
21652181 ],
2182+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21662183 )
21672184 # Yield tool_call/function_call stop message
21682185 yield llama_types .CreateChatCompletionStreamResponse (
@@ -2185,6 +2202,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21852202 },
21862203 }
21872204 ],
2205+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21882206 )
21892207 # If "auto" or no tool_choice/function_call
21902208 elif isinstance (function_call , str ) and function_call == "auto" :
@@ -2220,6 +2238,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22202238 "finish_reason" : None ,
22212239 }
22222240 ],
2241+ usage = chunk ["usage" ] if "usage" in chunk else None ,
22232242 )
22242243 else :
22252244 prompt += f"{ function_name } \n <|content|>"
@@ -2265,6 +2284,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22652284 },
22662285 }
22672286 ],
2287+ usage = chunk ["usage" ] if "usage" in chunk else None ,
22682288 )
22692289 # Generate content
22702290 stops = [RECIPIENT_TOKEN , STOP_TOKEN ]
@@ -2302,6 +2322,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23022322 },
23032323 }
23042324 ],
2325+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23052326 )
23062327 is_end = False
23072328 elif chunk ["choices" ][0 ]["text" ] == "\n " :
@@ -2331,6 +2352,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23312352 },
23322353 }
23332354 ],
2355+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23342356 )
23352357 # Check whether the model wants to generate another turn
23362358 if (
@@ -2363,6 +2385,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23632385 "finish_reason" : "stop" ,
23642386 }
23652387 ],
2388+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23662389 )
23672390 break
23682391 else :
@@ -2412,6 +2435,7 @@ def generate_streaming(tools, functions, function_call, prompt):
24122435 },
24132436 }
24142437 ],
2438+ usage = chunk ["usage" ] if "usage" in chunk else None ,
24152439 )
24162440 prompt += completion_text .strip ()
24172441 grammar = None
@@ -2451,6 +2475,7 @@ def generate_streaming(tools, functions, function_call, prompt):
24512475 },
24522476 }
24532477 ],
2478+ usage = chunk ["usage" ] if "usage" in chunk else None ,
24542479 )
24552480 break
24562481
0 commit comments