@@ -12,7 +12,7 @@ import type { GatewayOptions } from '..'
1212import type { ModelAPI } from '../api'
1313import type { BaseAPI } from '../api/base'
1414import type { OtelSpan } from '../otel'
15- import type { GenAIAttributes } from '../otel/attributes'
15+ import { attributesFromRequest , attributesFromResponse , type GenAIAttributes } from '../otel/attributes'
1616import type { ApiKeyInfo , ProviderProxy } from '../types'
1717import { runAfter } from '../utils'
1818
@@ -29,10 +29,7 @@ export interface ProxySuccess {
2929}
3030
3131export interface ProxyWhitelistedEndpoint {
32- requestBody : string
33- httpStatusCode : number
34- responseHeaders : Headers
35- responseBody : string
32+ response : Response
3633}
3734
3835export interface ProxyStreamingSuccess {
@@ -42,7 +39,7 @@ export interface ProxyStreamingSuccess {
4239 responseHeaders : Headers
4340 responseStream : ReadableStream
4441 otelAttributes ?: GenAIAttributes
45- waitCompletion : Promise < void >
42+ onStreamComplete : Promise < { cost ?: number } | { error : Error } >
4643 // In case we get to the end of the response, and we are unable to calculate the cost, we need to know if we can disable the key.
4744 disableKey ?: boolean
4845}
@@ -297,27 +294,16 @@ export class DefaultProviderProxy {
297294 const response = await this . fetch ( url , { method, headers : requestHeaders , body : requestBodyText } )
298295
299296 if ( this . isWhitelistedEndpoint ( ) ) {
300- // TODO(Marcelo): We can't read the body if it's a streaming response.
301- const responseBody = await response . text ( )
302- const { headers, status } = response
303297 this . otelSpan . end (
304298 `${ this . request . method } ${ this . restOfPath } ` ,
305299 {
306- 'http.method' : this . request . method ,
307- 'http.url' : this . restOfPath ,
308- 'http.response.status_code' : status ,
300+ ...attributesFromRequest ( this . request ) ,
301+ ...attributesFromResponse ( response ) ,
309302 'http.request.body.text' : requestBodyText ,
310- 'http.response.body.text' : responseBody ,
311- ...Object . fromEntries (
312- Array . from ( requestHeaders . entries ( ) ) . map ( ( [ name , value ] ) => [ `http.request.header.${ name } ` , value ] ) ,
313- ) ,
314- ...Object . fromEntries (
315- Array . from ( headers . entries ( ) ) . map ( ( [ name , value ] ) => [ `http.response.header.${ name } ` , value ] ) ,
316- ) ,
317303 } ,
318304 { level : 'info' } ,
319305 )
320- return { requestBody : requestBodyText , httpStatusCode : status , responseHeaders : headers , responseBody }
306+ return { response }
321307 }
322308
323309 // Each provider should be able to modify the response headers, e.g. remove openai org
@@ -402,44 +388,55 @@ export class DefaultProviderProxy {
402388
403389 // Track completion but don't wait for it before returning
404390 this . runAfter ( 'extract-stream' , extractionPromise )
405- const waitCompletion = extractionPromise . catch ( ( ) => { } ) // Swallow errors, already logged
391+
392+ const onStreamComplete = extractionPromise
393+ . then ( ( result ) => {
394+ // TODO(Marcelo): I think we actually need to emit 2 spans: one for HTTP, and another for the LLM.
395+ this . otelSpan . end (
396+ `chat ${ modelAPI . extractedRequest ?. requestModel ?? 'streaming' } ` ,
397+ {
398+ ...modelAPI . toGenAiOtelAttributes ( ) ,
399+ ...attributesFromRequest ( this . request ) ,
400+ ...attributesFromResponse ( response ) ,
401+ } ,
402+ { level : 'info' } ,
403+ )
404+
405+ return result
406+ } )
407+ . catch ( ) // Swallow errors, already logged
406408
407409 return {
408410 requestModel,
409411 requestBody : requestBodyText ,
410412 successStatus : response . status ,
411413 responseHeaders,
412414 responseStream,
413- waitCompletion ,
415+ onStreamComplete ,
414416 }
415417 }
416418
417- private async processChunks < T > ( modelAPI : BaseAPI < unknown , unknown , T > , events : AsyncIterable < T > ) : Promise < void > {
419+ private async processChunks < T > (
420+ modelAPI : BaseAPI < unknown , unknown , T > ,
421+ events : AsyncIterable < T > ,
422+ ) : Promise < { cost ?: number } | { error : Error } > {
418423 for await ( const chunk of events ) {
419424 modelAPI . processChunk ( chunk )
420425 }
421426
422- this . otelSpan . end (
423- `chat ${ modelAPI . extractedRequest ?. requestModel ?? 'streaming' } ` ,
424- // TODO(Marcelo): Missing the HTTP attributes - Should we pass them around or store in ModelAPI?
425- { ...modelAPI . toGenAiOtelAttributes ( ) } ,
426- { level : 'info' } ,
427- )
428-
429427 const provider = this . usageProvider ( )
430428 const usage = modelAPI . extractedResponse . usage
431429 const responseModel = modelAPI . extractedResponse . responseModel
432430
433431 if ( ! provider || ! usage || ! responseModel ) {
434- logfire . warning ( 'Unable to calculate cost' , { provider, usage, responseModel } )
432+ return { error : new Error ( `Unable to calculate cost for model ${ responseModel } ` ) }
433+ }
434+
435+ const price = calcPrice ( usage , responseModel , { provider } )
436+ if ( price ) {
437+ return { cost : price . total_price }
435438 } else {
436- const price = calcPrice ( usage , responseModel , { providerId : this . providerId ( ) } )
437- if ( price ) {
438- this . cost = price . total_price
439- logfire . info ( 'cost {cost}' , { cost : this . cost , usage, responseModel } )
440- } else {
441- logfire . warning ( 'Unable to calculate cost' , { provider, usage, responseModel } )
442- }
439+ return { error : new Error ( `Unable to calculate cost for model ${ responseModel } and provider ${ provider . name } ` ) }
443440 }
444441 }
445442
0 commit comments