@@ -964,11 +964,13 @@ def _create_response_stream(
964964 finish_reason = None
965965 message = None
966966 usage = None
967+
967968 if "messageStart" in chunk :
968969 message = ChatResponseMessage (
969970 role = chunk ["messageStart" ]["role" ],
970971 content = "" ,
971972 )
973+
972974 if "contentBlockStart" in chunk :
973975 # tool call start
974976 delta = chunk ["contentBlockStart" ]["start" ]
@@ -988,25 +990,30 @@ def _create_response_stream(
988990 )
989991 ]
990992 )
993+
991994 if "contentBlockDelta" in chunk :
992995 delta = chunk ["contentBlockDelta" ]["delta" ]
993996 if "text" in delta :
994- # stream content
995- message = ChatResponseMessage (
996- content = delta ["text" ],
997- )
997+ # Regular text content - close thinking tag if open
998+ content = delta ["text" ]
999+ if self .think_emitted :
1000+ # Transition from reasoning to regular text
1001+ content = "</think>" + content
1002+ self .think_emitted = False
1003+ message = ChatResponseMessage (content = content )
9981004 elif "reasoningContent" in delta :
9991005 if "text" in delta ["reasoningContent" ]:
10001006 content = delta ["reasoningContent" ]["text" ]
10011007 if not self .think_emitted :
1002- # Port of "content_block_start" with "thinking"
1008+ # Start of reasoning content
10031009 content = "<think>" + content
10041010 self .think_emitted = True
10051011 message = ChatResponseMessage (content = content )
10061012 elif "signature" in delta ["reasoningContent" ]:
1007- # Port of "signature_delta"
1013+ # Port of "signature_delta" (for models that send it)
10081014 if self .think_emitted :
1009- message = ChatResponseMessage (content = "\n </think> \n \n " )
1015+ message = ChatResponseMessage (content = "</think>" )
1016+ self .think_emitted = False
10101017 else :
10111018 return None # Ignore signature if no <think> started
10121019 else :
@@ -1022,7 +1029,23 @@ def _create_response_stream(
10221029 )
10231030 ]
10241031 )
1032+
10251033 if "messageStop" in chunk :
1034+ # Safety check: Close any open thinking tags before message stops
1035+ if self .think_emitted :
1036+ self .think_emitted = False
1037+ return ChatStreamResponse (
1038+ id = message_id ,
1039+ model = model_id ,
1040+ choices = [
1041+ ChoiceDelta (
1042+ index = 0 ,
1043+ delta = ChatResponseMessage (content = "</think>" ),
1044+ logprobs = None ,
1045+ finish_reason = None ,
1046+ )
1047+ ],
1048+ )
10261049 message = ChatResponseMessage ()
10271050 finish_reason = chunk ["messageStop" ]["stopReason" ]
10281051
@@ -1063,6 +1086,7 @@ def _create_response_stream(
10631086 prompt_tokens_details = prompt_tokens_details ,
10641087 ),
10651088 )
1089+
10661090 if message :
10671091 return ChatStreamResponse (
10681092 id = message_id ,
0 commit comments