@@ -31,6 +31,9 @@ export type OpenAIRequest = {
3131 top_p ?: number ;
3232 n ?: number ;
3333 stream ?: boolean ;
34+ stream_options : {
35+ include_usage : boolean ;
36+ } ;
3437 stop ?: string | string [ ] ;
3538 max_tokens ?: number ;
3639 presence_penalty ?: number ;
@@ -69,7 +72,7 @@ export type OpenAIResponseUsage = {
6972
7073export type OpenAIResponseChoice = {
7174 index : number ;
72- message : {
75+ message ? : {
7376 role : "assistant" | "user" | "system" | "tool" ;
7477 content : string | null ;
7578 function_call ?: {
@@ -85,6 +88,9 @@ export type OpenAIResponseChoice = {
8588 } ;
8689 } [ ] ;
8790 } ;
91+ delta ?: {
92+ content : string | null ;
93+ } ;
8894 finish_reason : "stop" | "length" | "tool_calls" | "content_filter" | null ;
8995} ;
9096
@@ -98,7 +104,10 @@ export type OpenAIResponse = {
98104 usage ?: OpenAIResponseUsage ;
99105} ;
100106
101- export type OpenAICompatibleInput = Omit < OpenAIRequest , "stream" | "model" > ;
107+ export type OpenAICompatibleInput = Omit <
108+ OpenAIRequest ,
109+ "stream" | "stream_options" | "model"
110+ > ;
102111
103112export type OpenAIProviderInput = ILLMProviderInput < OpenAICompatibleInput > ;
104113export type OpenAIProviderOutput = ILLMProviderOutput < OpenAIResponse > ;
@@ -126,23 +135,25 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
126135 const parser = this . parse ;
127136 const stream = async function * ( ) {
128137 for await ( const message of generator ) {
138+ // NOTE:(kallebysantos) while streaming the final message will not include 'finish_reason'
139+ // Instead a '[DONE]' value will be returned to close the stream
140+ if ( "done" in message && message . done ) {
141+ return ;
142+ }
143+
129144 if ( "error" in message ) {
130145 if ( message . error instanceof Error ) {
131146 throw message . error ;
132- } else {
133- throw new Error ( message . error as string ) ;
134147 }
148+
149+ throw new Error ( message . error as string ) ;
135150 }
136151
137152 yield parser ( message ) ;
138- const finishReason = message . choices [ 0 ] . finish_reason ;
139153
140- if ( finishReason ) {
141- if ( finishReason !== "stop" ) {
142- throw new Error ( "Expected a completed response." ) ;
143- }
144-
145- return ;
154+ const finish_reason = message . choices . at ( 0 ) ?. finish_reason ;
155+ if ( finish_reason && finish_reason !== "stop" ) {
156+ throw new Error ( "Expected a completed response." ) ;
146157 }
147158 }
148159
@@ -172,12 +183,14 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
172183 return this . parse ( response ) ;
173184 }
174185
175- private parse ( message : OpenAIResponse ) : OpenAIProviderOutput {
176- const { usage } = message ;
186+ private parse ( response : OpenAIResponse ) : OpenAIProviderOutput {
187+ const { usage } = response ;
188+ const choice = response . choices . at ( 0 ) ;
177189
178190 return {
179- value : message . choices . at ( 0 ) ?. message . content ?? undefined ,
180- inner : message ,
191+ // NOTE:(kallebysantos) while streaming the 'delta' field will be used instead of 'message'
192+ value : choice ?. message ?. content ?? choice ?. delta ?. content ?? undefined ,
193+ inner : response ,
181194 usage : {
182195 // NOTE:(kallebysantos) usage maybe 'null' while streaming, but the final message will include it
183196 inputTokens : usage ?. prompt_tokens ?? 0 ,
@@ -204,6 +217,9 @@ export class OpenAILLMSession implements ILLMProvider, ILLMProviderMeta {
204217 ...input ,
205218 model : this . options . model ,
206219 stream,
220+ stream_options : {
221+ include_usage : true ,
222+ } ,
207223 } satisfies OpenAIRequest ,
208224 ) ,
209225 signal,
0 commit comments