@@ -3,6 +3,8 @@ import { resolveProvider } from "../../lib/getInferenceProviderMapping.js";
33import { getProviderHelper } from "../../lib/getProviderHelper.js" ;
44import type { BaseArgs , Options } from "../../types.js" ;
55import { innerStreamingRequest } from "../../utils/request.js" ;
6+ import type { ConversationalTaskHelper , TaskProviderHelper } from "../../providers/providerHelper.js" ;
7+ import { AutoRouterConversationalTask } from "../../providers/providerHelper.js" ;
68
79/**
810 * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
@@ -11,8 +13,14 @@ export async function* chatCompletionStream(
1113 args : BaseArgs & ChatCompletionInput ,
1214 options ?: Options
1315) : AsyncGenerator < ChatCompletionStreamOutput > {
14- const provider = await resolveProvider ( args . provider , args . model , args . endpointUrl ) ;
15- const providerHelper = getProviderHelper ( provider , "conversational" ) ;
16+ let providerHelper : ConversationalTaskHelper & TaskProviderHelper ;
17+ if ( ! args . provider || args . provider === "auto" ) {
18+ // Special case: we have a dedicated auto-router for conversational models. No need to fetch provider mapping.
19+ providerHelper = new AutoRouterConversationalTask ( ) ;
20+ } else {
21+ const provider = await resolveProvider ( args . provider , args . model , args . endpointUrl ) ;
22+ providerHelper = getProviderHelper ( provider , "conversational" ) ;
23+ }
1624 yield * innerStreamingRequest < ChatCompletionStreamOutput > ( args , providerHelper , {
1725 ...options ,
1826 task : "conversational" ,
0 commit comments