llamastack
diff --git a/‎llama_stack/apis/inference/inference.py‎
Lines changed: 0 additions & 39 deletions b/‎llama_stack/apis/inference/inference.py‎
Lines changed: 0 additions & 39 deletions
diff --git a/‎llama_stack/core/routers/inference.py‎
Lines changed: 0 additions & 88 deletions b/‎llama_stack/core/routers/inference.py‎
Lines changed: 0 additions & 88 deletions
@@ -1006,45 +1006,6 @@ class InferenceProvider(Protocol):
 
     model_store: ModelStore | None = None
 
-    async def chat_completion(
-        self,
-        model_id: str,
-        messages: list[Message],
-        sampling_params: SamplingParams | None = None,
-        tools: list[ToolDefinition] | None = None,
-        tool_choice: ToolChoice | None = ToolChoice.auto,
-        tool_prompt_format: ToolPromptFormat | None = None,
-        response_format: ResponseFormat | None = None,
-        stream: bool | None = False,
-        logprobs: LogProbConfig | None = None,
-        tool_config: ToolConfig | None = None,
-    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
-        """Generate a chat completion for the given messages using the specified model.
-
-        :param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
-        :param messages: List of messages in the conversation.
-        :param sampling_params: Parameters to control the sampling strategy.
-        :param tools: (Optional) List of tool definitions available to the model.
-        :param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
-            .. deprecated::
-               Use tool_config instead.
-        :param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
-            - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
-            - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
-            - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
-            .. deprecated::
-               Use tool_config instead.
-        :param response_format: (Optional) Grammar specification for guided (structured) decoding. There are two options:
-            - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format.
-            - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it.
-        :param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
-        :param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
-        :param tool_config: (Optional) Configuration for tool use.
-        :returns: If stream=False, returns a ChatCompletionResponse with the full completion.
-                 If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk.
-        """
-        ...
-
     @webmethod(route="/inference/rerank", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def rerank(
         self,
 
@@ -27,7 +27,6 @@
     CompletionResponseStreamChunk,
     Inference,
     ListOpenAIChatCompletionResponse,
-    LogProbConfig,
     Message,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
@@ -42,12 +41,7 @@
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
     Order,
-    ResponseFormat,
-    SamplingParams,
     StopReason,
-    ToolChoice,
-    ToolConfig,
-    ToolDefinition,
     ToolPromptFormat,
 )
 from llama_stack.apis.models import Model, ModelType
@@ -185,88 +179,6 @@ async def _get_model(self, model_id: str, expected_model_type: str) -> Model:
             raise ModelTypeError(model_id, model.model_type, expected_model_type)
         return model
 
-    async def chat_completion(
-        self,
-        model_id: str,
-        messages: list[Message],
-        sampling_params: SamplingParams | None = None,
-        response_format: ResponseFormat | None = None,
-        tools: list[ToolDefinition] | None = None,
-        tool_choice: ToolChoice | None = None,
-        tool_prompt_format: ToolPromptFormat | None = None,
-        stream: bool | None = False,
-        logprobs: LogProbConfig | None = None,
-        tool_config: ToolConfig | None = None,
-    ) -> ChatCompletionResponse | AsyncIterator[ChatCompletionResponseStreamChunk]:
-        logger.debug(
-            f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
-        )
-        if sampling_params is None:
-            sampling_params = SamplingParams()
-        model = await self._get_model(model_id, ModelType.llm)
-        if tool_config:
-            if tool_choice and tool_choice != tool_config.tool_choice:
-                raise ValueError("tool_choice and tool_config.tool_choice must match")
-            if tool_prompt_format and tool_prompt_format != tool_config.tool_prompt_format:
-                raise ValueError("tool_prompt_format and tool_config.tool_prompt_format must match")
-        else:
-            params = {}
-            if tool_choice:
-                params["tool_choice"] = tool_choice
-            if tool_prompt_format:
-                params["tool_prompt_format"] = tool_prompt_format
-            tool_config = ToolConfig(**params)
-
-        tools = tools or []
-        if tool_config.tool_choice == ToolChoice.none:
-            tools = []
-        elif tool_config.tool_choice == ToolChoice.auto:
-            pass
-        elif tool_config.tool_choice == ToolChoice.required:
-            pass
-        else:
-            # verify tool_choice is one of the tools
-            tool_names = [t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value for t in tools]
-            if tool_config.tool_choice not in tool_names:
-                raise ValueError(f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}")
-
-        params = dict(
-            model_id=model_id,
-            messages=messages,
-            sampling_params=sampling_params,
-            tools=tools,
-            tool_choice=tool_choice,
-            tool_prompt_format=tool_prompt_format,
-            response_format=response_format,
-            stream=stream,
-            logprobs=logprobs,
-            tool_config=tool_config,
-        )
-        provider = await self.routing_table.get_provider_impl(model_id)
-        prompt_tokens = await self._count_tokens(messages, tool_config.tool_prompt_format)
-
-        if stream:
-            response_stream = await provider.chat_completion(**params)
-            return self.stream_tokens_and_compute_metrics(
-                response=response_stream,
-                prompt_tokens=prompt_tokens,
-                model=model,
-                tool_prompt_format=tool_config.tool_prompt_format,
-            )
-
-        response = await provider.chat_completion(**params)
-        metrics = await self.count_tokens_and_compute_metrics(
-            response=response,
-            prompt_tokens=prompt_tokens,
-            model=model,
-            tool_prompt_format=tool_config.tool_prompt_format,
-        )
-        # these metrics will show up in the client response.
-        response.metrics = (
-            metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
-        )
-        return response
-
     async def openai_completion(
         self,
         model: str,