[Model] Fixed stream generator for gpt-oss + spec-decoding (vllm-project#26027)

astralord · web-flow · commit 73a99cc2a53c · 2025-10-03T13:43:41.000Z
Signed-off-by: Aleksandr Samarin &lt;astrlrd@nebius.com&gt;
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
@@ -691,11 +691,13 @@ async def chat_completion_stream_generator(
                     if self.use_harmony:
                         harmony_parser = harmony_parsers[i]
                         prev_recipient = harmony_parser.current_recipient
+                        delta_text = ""
                         for token_id in output.token_ids:
                             harmony_parser.process(token_id)
+                            delta_text += (harmony_parser.last_content_delta
+                                           or "")
                         cur_channel = harmony_parser.current_channel
                         cur_recipient = harmony_parser.current_recipient
-                        delta_text = harmony_parser.last_content_delta or ""
                     else:
                         delta_text = output.text