fix: openai streaming

kallebysantos · kallebysantos · commit c97a9a306d89 · 2025-05-13T18:11:55.000+01:00
- OpenAI uses a different streaming alternative that ends with `[DONE]`
diff --git a/ext/ai/js/llm/providers/openai.ts b/ext/ai/js/llm/providers/openai.ts
@@ -31,6 +31,9 @@ export type OpenAIRequest = {
   top_p?: number;
   n?: number;
   stream?: boolean;
+  stream_options: {
+    include_usage: boolean;
+  };
   stop?: string | string[];
   max_tokens?: number;
   presence_penalty?: number;
@@ -69,7 +72,7 @@ export type OpenAIResponseUsage = {
 
 export type OpenAIResponseChoice = {
   index: number;
-  message: {
+  message?: {
     role: "assistant" | "user" | "system" | "tool";
     content: string | null;
     function_call?: {
@@ -85,6 +88,9 @@ export type OpenAIResponseChoice = {
       };
     }[];
   };
+  delta?: {
+    content: string | null;
+  };
   finish_reason: "stop" | "length" | "tool_calls" | "content_filter" | null;
 };
 
@@ -98,7 +104,10 @@ export type OpenAIResponse = {
   usage?: OpenAIResponseUsage;
 };
 
-export type OpenAICompatibleInput = Omit<OpenAIRequest, "stream" | "model">;
+export type OpenAICompatibleInput = Omit<
+  OpenAIRequest,
+  "stream" | "stream_options" | "model"
+>;
 
 export type OpenAIProviderInput = ILLMProviderInput<OpenAICompatibleInput>;
 export type OpenAIProviderOutput = ILLMProviderOutput<OpenAIResponse>;
@@ -126,23 +135,25 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
     const parser = this.parse;
     const stream = async function* () {
       for await (const message of generator) {
+        // NOTE:(kallebysantos) while streaming the final message will not include 'finish_reason'
+        // Instead a '[DONE]' value will be returned to close the stream
+        if ("done" in message && message.done) {
+          return;
+        }
+
         if ("error" in message) {
           if (message.error instanceof Error) {
             throw message.error;
-          } else {
-            throw new Error(message.error as string);
           }
+
+          throw new Error(message.error as string);
         }
 
         yield parser(message);
-        const finishReason = message.choices[0].finish_reason;
 
-        if (finishReason) {
-          if (finishReason !== "stop") {
-            throw new Error("Expected a completed response.");
-          }
-
-          return;
+        const finish_reason = message.choices.at(0)?.finish_reason;
+        if (finish_reason && finish_reason !== "stop") {
+          throw new Error("Expected a completed response.");
         }
       }
 
@@ -172,12 +183,14 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
     return this.parse(response);
   }
 
-  private parse(message: OpenAIResponse): OpenAIProviderOutput {
-    const { usage } = message;
+  private parse(response: OpenAIResponse): OpenAIProviderOutput {
+    const { usage } = response;
+    const choice = response.choices.at(0);
 
     return {
-      value: message.choices.at(0)?.message.content ?? undefined,
-      inner: message,
+      // NOTE:(kallebysantos) while streaming the 'delta'  field will be used instead of 'message'
+      value: choice?.message?.content ?? choice?.delta?.content ?? undefined,
+      inner: response,
       usage: {
         // NOTE:(kallebysantos) usage maybe 'null' while streaming, but the final message will include it
         inputTokens: usage?.prompt_tokens ?? 0,
@@ -204,6 +217,9 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
             ...input,
             model: this.options.model,
             stream,
+            stream_options: {
+              include_usage: true,
+            },
           } satisfies OpenAIRequest,
         ),
         signal,
diff --git a/ext/ai/js/llm/utils/event_source_stream.mjs b/ext/ai/js/llm/utils/event_source_stream.mjs
@@ -1,4 +1,4 @@
-import EventStreamParser from './event_stream_parser.mjs';
+import EventStreamParser from "./event_stream_parser.mjs";
 /**
  * A Web stream which handles Server-Sent Events from a binary ReadableStream like you get from the fetch API.
  * Implements the TransformStream interface, and can be used with the Streams API as such.
@@ -9,11 +9,19 @@ class EventSourceStream {
     // 1. The SSE spec allows for an optional UTF-8 BOM.
     // 2. We have to use a *streaming* decoder, in case two adjacent data chunks are split up in the middle of a
     // multibyte Unicode character. Trying to parse the two separately would result in data corruption.
-    const decoder = new TextDecoderStream('utf-8');
+    const decoder = new TextDecoderStream("utf-8");
     let parser;
     const sseStream = new TransformStream({
       start(controller) {
         parser = new EventStreamParser((data, eventType, lastEventId) => {
+          // NOTE:(kallebysantos) Some providers like OpenAI send '[DONE]'
+          // to indicates stream terminates, so we need to check if the SSE contains "[DONE]" and close the stream
+          if (typeof data === "string" && data.trim() === "[DONE]") {
+            controller.terminate?.(); // If supported
+            controller.close?.(); // Fallback
+            return;
+          }
+
           controller.enqueue(
             new MessageEvent(eventType, { data, lastEventId }),
           );
diff --git a/ext/ai/js/llm/utils/json_parser.ts b/ext/ai/js/llm/utils/json_parser.ts
@@ -71,6 +71,7 @@ export async function* parseJSONOverEventStream<T extends object>(
       const { done, value } = await reader.read();
 
       if (done) {
+        yield { done };
         break;
       }
 

Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@ export async function* parseJSONOverEventStream<T extends object>(`
`71`	`71`	`const { done, value } = await reader.read();`
`72`	`72`
`73`	`73`	`if (done) {`
	`74`	`+ yield { done };`
`74`	`75`	`break;`
`75`	`76`	`}`
`76`	`77`