@@ -112,6 +112,7 @@ def __init__(
112112 available_tools : list [str ],
113113 ):
114114 self ._messages = messages
115+ self .finish_reason : Optional [str ] = None
115116 self .available_tools = available_tools
116117 self ._tool_sessions : dict [str , Union [ClientSession , Tool ]] = {}
117118 self .called_tools : set [str ] = set ()
@@ -135,7 +136,8 @@ def _update_num_reasoning_tokens(self):
135136 if self .parser .current_channel in {"analysis" , "commentary" }:
136137 self .num_reasoning_tokens += 1
137138
138- def append_output (self , output ) -> None :
139+ def append_output (self , output : Union [RequestOutput ,
140+ list [Message ]]) -> None :
139141 if isinstance (output , RequestOutput ):
140142 output_token_ids = output .outputs [0 ].token_ids
141143 self .parser = get_streamable_parser_for_assistant ()
@@ -150,25 +152,27 @@ def append_output(self, output) -> None:
150152 # Move current turn to previous turn for next turn's calculations
151153 self .previous_turn = self .current_turn .copy ()
152154 output_msgs = self .parser .messages
155+ # The responses finish reason is set in the last message
156+ self .finish_reason = output .outputs [0 ].finish_reason
153157 else :
154158 # Tool output.
155159 output_msgs = output
156160 self ._messages .extend (output_msgs )
157161
158162 def _update_prefill_token_usage (self , output : RequestOutput ) -> None :
159163 """Update token usage statistics for the prefill phase of generation.
160-
164+
161165 The prefill phase processes the input prompt tokens. This method:
162166 1. Counts the prompt tokens for this turn
163167 2. Calculates tool output tokens for multi-turn conversations
164168 3. Updates cached token counts
165169 4. Tracks state for next turn calculations
166-
170+
167171 Tool output tokens are calculated as:
168- current_prompt_tokens - last_turn_prompt_tokens -
172+ current_prompt_tokens - last_turn_prompt_tokens -
169173 last_turn_output_tokens
170174 This represents tokens added between turns (typically tool responses).
171-
175+
172176 Args:
173177 output: The RequestOutput containing prompt token information
174178 """
@@ -214,18 +218,18 @@ def _update_prefill_token_usage(self, output: RequestOutput) -> None:
214218
215219 def _update_decode_token_usage (self , output : RequestOutput ) -> int :
216220 """Update token usage statistics for the decode phase of generation.
217-
221+
218222 The decode phase processes the generated output tokens. This method:
219223 1. Counts output tokens from all completion outputs
220224 2. Updates the total output token count
221225 3. Tracks tokens generated in the current turn
222-
226+
223227 In streaming mode, this is called for each token generated.
224228 In non-streaming mode, this is called once with all output tokens.
225-
229+
226230 Args:
227231 output: The RequestOutput containing generated token information
228-
232+
229233 Returns:
230234 int: Number of output tokens processed in this call
231235 """
@@ -385,7 +389,8 @@ def __init__(self, *args, **kwargs):
385389 def messages (self ) -> list :
386390 return self .parser .messages
387391
388- def append_output (self , output ) -> None :
392+ def append_output (self , output : Union [RequestOutput ,
393+ list [Message ]]) -> None :
389394 if isinstance (output , RequestOutput ):
390395 # append_output is called for each output token in streaming case,
391396 # so we only want to add the prompt tokens once for each message.
0 commit comments