@@ -415,6 +415,73 @@ describe('InferenceGatewayClient', () => {
415415
416416 expect ( callbacks . onError ) . toHaveBeenCalledTimes ( 1 ) ;
417417 } ) ;
418+
419+ it ( 'should handle streaming chat completions with usage metrics' , async ( ) => {
420+ const mockRequest = {
421+ model : 'gpt-4o' ,
422+ messages : [ { role : MessageRole . user , content : 'Hello' } ] ,
423+ stream : true ,
424+ stream_options : {
425+ include_usage : true ,
426+ } ,
427+ } ;
428+
429+ const mockStream = new TransformStream ( ) ;
430+ const writer = mockStream . writable . getWriter ( ) ;
431+ const encoder = new TextEncoder ( ) ;
432+
433+ mockFetch . mockResolvedValueOnce ( {
434+ ok : true ,
435+ body : mockStream . readable ,
436+ } ) ;
437+
438+ const callbacks = {
439+ onOpen : jest . fn ( ) ,
440+ onChunk : jest . fn ( ) ,
441+ onContent : jest . fn ( ) ,
442+ onUsageMetrics : jest . fn ( ) ,
443+ onFinish : jest . fn ( ) ,
444+ onError : jest . fn ( ) ,
445+ } ;
446+
447+ const streamPromise = client . streamChatCompletion ( mockRequest , callbacks ) ;
448+
449+ await writer . write (
450+ encoder . encode (
451+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
452+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
453+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
454+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n' +
455+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":8,"total_tokens":18}}\n\n' +
456+ 'data: [DONE]\n\n'
457+ )
458+ ) ;
459+
460+ await writer . close ( ) ;
461+ await streamPromise ;
462+
463+ expect ( callbacks . onOpen ) . toHaveBeenCalledTimes ( 1 ) ;
464+ expect ( callbacks . onChunk ) . toHaveBeenCalledTimes ( 5 ) ;
465+ expect ( callbacks . onContent ) . toHaveBeenCalledWith ( 'Hello' ) ;
466+ expect ( callbacks . onContent ) . toHaveBeenCalledWith ( '!' ) ;
467+ expect ( callbacks . onUsageMetrics ) . toHaveBeenCalledTimes ( 1 ) ;
468+ expect ( callbacks . onUsageMetrics ) . toHaveBeenCalledWith ( {
469+ prompt_tokens : 10 ,
470+ completion_tokens : 8 ,
471+ total_tokens : 18 ,
472+ } ) ;
473+ expect ( callbacks . onFinish ) . toHaveBeenCalledTimes ( 1 ) ;
474+ expect ( mockFetch ) . toHaveBeenCalledWith (
475+ 'http://localhost:8080/v1/chat/completions' ,
476+ expect . objectContaining ( {
477+ method : 'POST' ,
478+ body : JSON . stringify ( {
479+ ...mockRequest ,
480+ stream : true ,
481+ } ) ,
482+ } )
483+ ) ;
484+ } ) ;
418485 } ) ;
419486
420487 describe ( 'proxy' , ( ) => {
0 commit comments