diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts index c5c297cdb94..36b153fc1ec 100644 --- a/packages/types/src/providers/deepseek.ts +++ b/packages/types/src/providers/deepseek.ts @@ -1,22 +1,36 @@ import type { ModelInfo } from "../model.js" // https://platform.deepseek.com/docs/api +// https://api-docs.deepseek.com/quick_start/pricing export type DeepSeekModelId = keyof typeof deepSeekModels export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat" +// DeepSeek V3 model info (shared between deepseek-chat and aliases) +// DeepSeek V3.2 supports thinking mode with tool calling via the "thinking" parameter +// See: https://api-docs.deepseek.com/guides/thinking_mode +const deepSeekV3Info: ModelInfo = { + maxTokens: 8192, // 8K max output + contextWindow: 128_000, + supportsImages: false, + supportsPromptCache: true, + supportsNativeTools: true, + supportsReasoningBinary: true, // Supports thinking mode via { thinking: { type: "enabled" } } + inputPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025 + outputPrice: 1.68, // $1.68 per million tokens - Updated Sept 5, 2025 + cacheWritesPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025 + cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit) - Updated Sept 5, 2025 + description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally. Supports thinking mode with tool calling when enabled.`, +} + export const deepSeekModels = { - "deepseek-chat": { - maxTokens: 8192, // 8K max output - contextWindow: 128_000, - supportsImages: false, - supportsPromptCache: true, - supportsNativeTools: true, - inputPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025 - outputPrice: 1.68, // $1.68 per million tokens - Updated Sept 5, 2025 - cacheWritesPrice: 0.56, // $0.56 per million tokens (cache miss) - Updated Sept 5, 2025 - cacheReadsPrice: 0.07, // $0.07 per million tokens (cache hit) - Updated Sept 5, 2025 - description: `DeepSeek-V3 achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally.`, + "deepseek-chat": deepSeekV3Info, + // deepseek-3.2 is an alias for deepseek-chat (V3.2 is the current version) + // Note: The DeepSeek API only supports "deepseek-chat" and "deepseek-reasoner" + // See: https://api-docs.deepseek.com/quick_start/pricing + "deepseek-3.2": { + ...deepSeekV3Info, + description: `DeepSeek V3.2 (alias for deepseek-chat). ${deepSeekV3Info.description}`, }, "deepseek-reasoner": { maxTokens: 65536, // 64K max output for reasoning mode @@ -32,4 +46,11 @@ export const deepSeekModels = { }, } as const satisfies Record +// Map of model aliases to their official API model names +// The DeepSeek API only supports "deepseek-chat" and "deepseek-reasoner" +// See: https://api-docs.deepseek.com/quick_start/pricing +export const deepSeekModelAliases: Record = { + "deepseek-3.2": "deepseek-chat", +} + export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.6 diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts index 50cabfa9226..b28f6b7ed96 100644 --- a/src/api/providers/__tests__/deepseek.spec.ts +++ b/src/api/providers/__tests__/deepseek.spec.ts @@ -147,6 +147,25 @@ describe("DeepSeekHandler", () => { const _handler = new DeepSeekHandler(mockOptions) expect(OpenAI).toHaveBeenCalledWith(expect.objectContaining({ apiKey: mockOptions.deepSeekApiKey })) }) + + it("should map deepseek-3.2 alias to deepseek-chat for API calls", async () => { + vi.clearAllMocks() + const handlerWith32 = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-3.2", + }) + const stream = handlerWith32.createMessage("test", []) + for await (const _chunk of stream) { + // consume stream + } + // Verify the API was called with deepseek-chat (not deepseek-3.2) + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "deepseek-chat", + }), + expect.anything(), + ) + }) }) describe("getModel", () => { @@ -174,6 +193,19 @@ describe("DeepSeekHandler", () => { expect(model.info.supportsPromptCache).toBe(true) }) + it("should return correct model info for deepseek-3.2 alias", () => { + const handlerWith32 = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-3.2", + }) + const model = handlerWith32.getModel() + expect(model.id).toBe("deepseek-3.2") // Returns user's model ID + expect(model.info).toBeDefined() + expect(model.info.maxTokens).toBe(8192) // Same as deepseek-chat + expect(model.info.contextWindow).toBe(128_000) + expect(model.info.supportsNativeTools).toBe(true) + }) + it("should return provided model ID with default model info if model does not exist", () => { const handlerWithInvalidModel = new DeepSeekHandler({ ...mockOptions, @@ -317,4 +349,219 @@ describe("DeepSeekHandler", () => { expect(result.cacheReadTokens).toBeUndefined() }) }) + + describe("Thinking Mode Support", () => { + it("should add thinking parameter when enableReasoningEffort is true for V3 models", async () => { + vi.clearAllMocks() + const handlerWithThinking = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-chat", + enableReasoningEffort: true, + }) + const stream = handlerWithThinking.createMessage("test", []) + for await (const _chunk of stream) { + // consume stream + } + // Verify the API was called with the thinking parameter + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "deepseek-chat", + thinking: { type: "enabled" }, + }), + ) + }) + + it("should add thinking parameter when enableReasoningEffort is true for deepseek-3.2 alias", async () => { + vi.clearAllMocks() + const handlerWithThinking = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-3.2", + enableReasoningEffort: true, + }) + const stream = handlerWithThinking.createMessage("test", []) + for await (const _chunk of stream) { + // consume stream + } + // Verify the API was called with the thinking parameter and mapped model ID + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "deepseek-chat", + thinking: { type: "enabled" }, + }), + ) + }) + + it("should NOT add thinking parameter when enableReasoningEffort is false", async () => { + vi.clearAllMocks() + const handlerWithoutThinking = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-chat", + enableReasoningEffort: false, + }) + const stream = handlerWithoutThinking.createMessage("test", []) + for await (const _chunk of stream) { + // consume stream + } + // Verify the API was called WITHOUT the thinking parameter + expect(mockCreate).toHaveBeenCalledWith( + expect.not.objectContaining({ + thinking: expect.anything(), + }), + expect.anything(), + ) + }) + + it("should NOT add thinking parameter for deepseek-reasoner model even with enableReasoningEffort", async () => { + vi.clearAllMocks() + const handlerReasoner = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-reasoner", + enableReasoningEffort: true, + }) + const stream = handlerReasoner.createMessage("test", []) + for await (const _chunk of stream) { + // consume stream + } + // Verify the API was called WITHOUT the thinking parameter + // (deepseek-reasoner uses R1 format, not thinking mode) + expect(mockCreate).toHaveBeenCalledWith( + expect.not.objectContaining({ + thinking: expect.anything(), + }), + expect.anything(), + ) + }) + + it("should handle reasoning_content in response when thinking mode is enabled", async () => { + // Mock a response with reasoning_content + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { reasoning_content: "Let me think about this..." }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: { content: "Here is my answer." }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 15, + total_tokens: 25, + }, + } + }, + })) + + const handlerWithThinking = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-chat", + enableReasoningEffort: true, + }) + const stream = handlerWithThinking.createMessage("test", []) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should have a reasoning chunk + const reasoningChunks = chunks.filter((c) => c.type === "reasoning") + expect(reasoningChunks.length).toBeGreaterThan(0) + expect(reasoningChunks[0].text).toBe("Let me think about this...") + + // Should have a text chunk + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks.length).toBeGreaterThan(0) + expect(textChunks[0].text).toBe("Here is my answer.") + }) + + it("should handle tool calls with thinking mode enabled", async () => { + // Mock a response with tool calls in thinking mode + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { reasoning_content: "I need to call a tool..." }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_123", + function: { + name: "read_file", + arguments: '{"path": "/test.txt"}', + }, + }, + ], + }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 20, + total_tokens: 30, + }, + } + }, + })) + + const handlerWithThinking = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-chat", + enableReasoningEffort: true, + }) + // Note: tools are passed in Anthropic format and converted internally + const stream = handlerWithThinking.createMessage("test", []) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should have a reasoning chunk + const reasoningChunks = chunks.filter((c) => c.type === "reasoning") + expect(reasoningChunks.length).toBeGreaterThan(0) + + // Should have a tool call chunk + const toolCallChunks = chunks.filter((c) => c.type === "tool_call_partial") + expect(toolCallChunks.length).toBeGreaterThan(0) + expect(toolCallChunks[0].name).toBe("read_file") + expect(toolCallChunks[0].id).toBe("call_123") + }) + }) }) diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index 37b839f3030..f9f3ed47480 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -338,4 +338,215 @@ describe("OpenRouterHandler", () => { await expect(handler.completePrompt("test prompt")).rejects.toThrow("Unexpected error") }) }) + + describe("DeepSeek Model Handling", () => { + // Test DeepSeek V3.2 model (uses standard format) + it("should NOT use R1 format for DeepSeek V3.2 models", async () => { + const deepseek32Handler = new OpenRouterHandler({ + ...mockOptions, + openRouterModelId: "deepseek/deepseek-v3.2", + }) + + const mockStream = { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [{ delta: { content: "test" }, finish_reason: null }], + usage: null, + } + yield { + choices: [{ delta: {}, finish_reason: "stop" }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(mockStream) + ;(OpenAI as any).prototype.chat = { + completions: { create: mockCreate }, + } as any + + const generator = deepseek32Handler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of generator) { + chunks.push(chunk) + } + + // Verify that the messages were NOT converted to R1 format + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ role: "system", content: expect.anything() }), + ]), + }), + undefined, + ) + }) + + it("should still use R1 format for DeepSeek R1 models", async () => { + const deepseekR1Handler = new OpenRouterHandler({ + ...mockOptions, + openRouterModelId: "deepseek/deepseek-r1", + }) + + const mockStream = { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [{ delta: { content: "test" }, finish_reason: null }], + usage: null, + } + yield { + choices: [{ delta: {}, finish_reason: "stop" }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(mockStream) + ;(OpenAI as any).prototype.chat = { + completions: { create: mockCreate }, + } as any + + const generator = deepseekR1Handler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of generator) { + chunks.push(chunk) + } + + // Verify that the messages WERE converted to R1 format (user role instead of system) + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ role: "user", content: expect.anything() }), + ]), + }), + undefined, + ) + }) + + // Test DeepSeek Chat V3.1 model (uses standard format, NOT R1) + it("should NOT use R1 format for DeepSeek Chat V3.1 models", async () => { + const deepseekChatHandler = new OpenRouterHandler({ + ...mockOptions, + openRouterModelId: "deepseek/deepseek-chat-v3.1", + }) + + const mockStream = { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [{ delta: { content: "test" }, finish_reason: null }], + usage: null, + } + yield { + choices: [{ delta: {}, finish_reason: "stop" }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(mockStream) + ;(OpenAI as any).prototype.chat = { + completions: { create: mockCreate }, + } as any + + const generator = deepseekChatHandler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of generator) { + chunks.push(chunk) + } + + // Verify that the messages were NOT converted to R1 format (system role preserved) + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ role: "system", content: expect.anything() }), + ]), + }), + undefined, + ) + }) + + // Test DeepSeek R1-0528 variant (uses R1 format) + it("should use R1 format for DeepSeek R1-0528 models", async () => { + const deepseekR1Handler = new OpenRouterHandler({ + ...mockOptions, + openRouterModelId: "deepseek/deepseek-r1-0528", + }) + + const mockStream = { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [{ delta: { content: "test" }, finish_reason: null }], + usage: null, + } + yield { + choices: [{ delta: {}, finish_reason: "stop" }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(mockStream) + ;(OpenAI as any).prototype.chat = { + completions: { create: mockCreate }, + } as any + + const generator = deepseekR1Handler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of generator) { + chunks.push(chunk) + } + + // Verify that the messages WERE converted to R1 format (user role instead of system) + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ role: "user", content: expect.anything() }), + ]), + }), + undefined, + ) + }) + + // Test DeepSeek Prover V2 model (uses R1 format as it's a reasoning model) + it("should use R1 format for DeepSeek Prover V2 models", async () => { + const deepseekProverHandler = new OpenRouterHandler({ + ...mockOptions, + openRouterModelId: "deepseek/deepseek-prover-v2", + }) + + const mockStream = { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [{ delta: { content: "test" }, finish_reason: null }], + usage: null, + } + yield { + choices: [{ delta: {}, finish_reason: "stop" }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(mockStream) + ;(OpenAI as any).prototype.chat = { + completions: { create: mockCreate }, + } as any + + const generator = deepseekProverHandler.createMessage("system prompt", []) + const chunks = [] + for await (const chunk of generator) { + chunks.push(chunk) + } + + // Verify that the messages WERE converted to R1 format (user role instead of system) + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ role: "user", content: expect.anything() }), + ]), + }), + undefined, + ) + }) + }) }) diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index de119de6dba..731a9d6f1df 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -1,31 +1,195 @@ -import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types" +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + +import { + deepSeekModels, + deepSeekDefaultModelId, + deepSeekModelAliases, + DEEP_SEEK_DEFAULT_TEMPERATURE, + type ModelInfo, +} from "@roo-code/types" import type { ApiHandlerOptions } from "../../shared/api" -import type { ApiStreamUsageChunk } from "../transform/stream" +import type { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { getModelParams } from "../transform/model-params" +import { convertToOpenAiMessages } from "../transform/openai-format" import { OpenAiHandler } from "./openai" +import type { ApiHandlerCreateMessageMetadata } from "../index" +import { XmlMatcher } from "../../utils/xml-matcher" + +/** + * Maps a user-provided model ID to the official DeepSeek API model name. + * The DeepSeek API uses specific model names (deepseek-chat, deepseek-reasoner), + * but users may use alternative names like deepseek-v3, deepseek-3.2, etc. + */ +function getApiModelId(modelId: string): string { + return deepSeekModelAliases[modelId] ?? modelId +} + +/** + * Checks if a model is a DeepSeek V3/Chat model (not a reasoner model). + * V3/Chat models support thinking mode with tool calling via the "thinking" parameter. + */ +function isDeepSeekV3Model(modelId: string): boolean { + // Map alias to actual model ID for checking + const actualModelId = getApiModelId(modelId) + // V3/Chat models use deepseek-chat, not deepseek-reasoner + return actualModelId === "deepseek-chat" +} export class DeepSeekHandler extends OpenAiHandler { constructor(options: ApiHandlerOptions) { + const userModelId = options.apiModelId ?? deepSeekDefaultModelId + // Map the user's model ID to the official API model name + const apiModelId = getApiModelId(userModelId) + super({ ...options, openAiApiKey: options.deepSeekApiKey ?? "not-provided", - openAiModelId: options.apiModelId ?? deepSeekDefaultModelId, + openAiModelId: apiModelId, // Use the mapped API model ID openAiBaseUrl: options.deepSeekBaseUrl ?? "https://api.deepseek.com", openAiStreamingEnabled: true, includeMaxTokens: true, }) + + // Store the original user model ID for getModel() + this.userModelId = userModelId } + // Store the user's original model ID (before alias mapping) + private userModelId: string + override getModel() { - const id = this.options.apiModelId ?? deepSeekDefaultModelId + // Use the user's original model ID for info lookup (so they see the model they selected) + const id = this.userModelId const info = deepSeekModels[id as keyof typeof deepSeekModels] || deepSeekModels[deepSeekDefaultModelId] const params = getModelParams({ format: "openai", modelId: id, model: info, settings: this.options }) return { id, info, ...params } } + /** + * Override createMessage to add DeepSeek V3.2 thinking mode support. + * When enableReasoningEffort is true and the model is a V3/Chat model, + * we add the thinking parameter to enable thinking mode with tool calling. + * See: https://api-docs.deepseek.com/guides/thinking_mode + */ + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const { info: modelInfo } = this.getModel() + const modelId = this.options.openAiModelId ?? "" + + // Check if thinking mode should be enabled for DeepSeek V3 models + // Cast to ModelInfo to access optional supportsReasoningBinary property + const shouldEnableThinking = + this.options.enableReasoningEffort && + (modelInfo as ModelInfo).supportsReasoningBinary && + isDeepSeekV3Model(this.userModelId) + + // If thinking mode is not enabled, use the default OpenAI handler behavior + if (!shouldEnableThinking) { + yield* super.createMessage(systemPrompt, messages, metadata) + return + } + + // For DeepSeek V3 with thinking mode enabled, we need to: + // 1. Add the thinking parameter to the request + // 2. Handle reasoning_content in the response + // 3. Preserve reasoning_content in conversation history (handled by openai-format.ts) + + const temperature = this.options.modelTemperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE + + // Convert messages to OpenAI format, preserving reasoning_content + const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "system", content: systemPrompt }, + ...convertToOpenAiMessages(messages), + ] + + // Build the request with thinking mode enabled + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & { + thinking?: { type: string } + } = { + model: modelId, + temperature, + messages: openAiMessages, + stream: true as const, + stream_options: { include_usage: true }, + // Enable thinking mode for DeepSeek V3.2 + // See: https://api-docs.deepseek.com/guides/thinking_mode + thinking: { type: "enabled" }, + ...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }), + ...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }), + } + + // Add max_tokens if needed + if (this.options.includeMaxTokens && modelInfo.maxTokens) { + requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens + } + + // Create the stream using the protected client from OpenAiHandler + // We need to access the client directly since we're overriding the method + const client = (this as any).client as OpenAI + const stream = await client.chat.completions.create(requestOptions) + + const matcher = new XmlMatcher( + "think", + (chunk) => + ({ + type: chunk.matched ? "reasoning" : "text", + text: chunk.data, + }) as const, + ) + + let lastUsage: any + + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta ?? {} + + if (delta.content) { + for (const processedChunk of matcher.update(delta.content)) { + yield processedChunk + } + } + + // Handle reasoning_content from DeepSeek thinking mode + if ("reasoning_content" in delta && delta.reasoning_content) { + yield { + type: "reasoning", + text: (delta.reasoning_content as string | undefined) || "", + } + } + + // Handle tool calls + if (delta.tool_calls) { + for (const toolCall of delta.tool_calls) { + yield { + type: "tool_call_partial", + index: toolCall.index, + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + } + } + } + + if (chunk.usage) { + lastUsage = chunk.usage + } + } + + for (const chunk of matcher.final()) { + yield chunk + } + + if (lastUsage) { + yield this.processUsageMetrics(lastUsage) + } + } + // Override to handle DeepSeek's usage metrics, including caching. protected override processUsageMetrics(usage: any): ApiStreamUsageChunk { return { diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts index 38d3c52fa94..66b82f0419f 100644 --- a/src/api/providers/fetchers/openrouter.ts +++ b/src/api/providers/fetchers/openrouter.ts @@ -266,5 +266,23 @@ export const parseOpenRouterModel = ({ modelInfo.maxTokens = 32768 } + // Configure DeepSeek V3/Chat models properly + // These models support standard tool calling but NOT R1 format + // OpenRouter uses deepseek/deepseek-v3, deepseek/deepseek-v3.2, deepseek/deepseek-chat-v3.1, etc. + if ( + id.startsWith("deepseek/deepseek-v3") || // Matches deepseek/deepseek-v3, deepseek/deepseek-v3.2, deepseek/deepseek-v3.1-terminus, etc. + id.startsWith("deepseek/deepseek-chat") // Matches deepseek/deepseek-chat, deepseek/deepseek-chat-v3.1, deepseek/deepseek-chat-v3-0324, etc. + ) { + // Ensure these models are marked as supporting native tools + // but NOT reasoning format (they're not R1 models) + if (modelInfo.supportsNativeTools === undefined) { + modelInfo.supportsNativeTools = true + } + // Ensure reasonable max tokens if not set + if (!modelInfo.maxTokens || modelInfo.maxTokens < 8192) { + modelInfo.maxTokens = 8192 + } + } + return modelInfo } diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index e8e95ad58df..d6adbe8ac27 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -135,8 +135,13 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH ...convertToOpenAiMessages(messages), ] - // DeepSeek highly recommends using user instead of system role. - if (modelId.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning") { + // DeepSeek R1 and Prover models require user instead of system role. + // Note: DeepSeek V3/Chat models (deepseek-v3, deepseek-v3.2, deepseek-chat-v3.1, etc.) do NOT use R1 format + if ( + modelId.startsWith("deepseek/deepseek-r1") || + modelId.startsWith("deepseek/deepseek-prover") || + modelId === "perplexity/sonar-reasoning" + ) { openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) } @@ -388,7 +393,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH info = this.endpoints[this.options.openRouterSpecificProvider] } - const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || id === "perplexity/sonar-reasoning" + // Only DeepSeek R1 and Prover models use special temperature and topP settings + // DeepSeek V3/Chat models (v3, v3.2, chat-v3.1, etc.) use standard settings + const isDeepSeekR1 = + id.startsWith("deepseek/deepseek-r1") || + id.startsWith("deepseek/deepseek-prover") || + id === "perplexity/sonar-reasoning" + const isDeepSeekV3 = + id.startsWith("deepseek/deepseek-v3") || // Matches deepseek/deepseek-v3, deepseek/deepseek-v3.2, etc. + id.startsWith("deepseek/deepseek-chat") // Matches deepseek/deepseek-chat, deepseek/deepseek-chat-v3.1, etc. const params = getModelParams({ format: "openrouter", diff --git a/src/api/transform/openai-format.ts b/src/api/transform/openai-format.ts index 6a88491b7e1..404375f2aa9 100644 --- a/src/api/transform/openai-format.ts +++ b/src/api/transform/openai-format.ts @@ -132,7 +132,10 @@ export function convertToOpenAiMessages( }, })) - // Check if the message has reasoning_details (used by Gemini 3, etc.) + // Check if the message has reasoning_details or reasoning_content + // - reasoning_details: used by Gemini 3, OpenRouter, etc. + // - reasoning_content: used by DeepSeek V3.2 thinking mode + // See: https://api-docs.deepseek.com/guides/thinking_mode const messageWithDetails = anthropicMessage as any const baseMessage: OpenAI.Chat.ChatCompletionAssistantMessageParam = { role: "assistant", @@ -146,6 +149,13 @@ export function convertToOpenAiMessages( ;(baseMessage as any).reasoning_details = messageWithDetails.reasoning_details } + // Preserve reasoning_content if present (used by DeepSeek V3.2 thinking mode) + // DeepSeek requires reasoning_content to be passed back in subsequent API calls + // when using thinking mode with tool calling + if (messageWithDetails.reasoning_content && typeof messageWithDetails.reasoning_content === "string") { + ;(baseMessage as any).reasoning_content = messageWithDetails.reasoning_content + } + openAiMessages.push(baseMessage) } } diff --git a/webview-ui/src/components/chat/__tests__/ChatView.spec.tsx b/webview-ui/src/components/chat/__tests__/ChatView.spec.tsx index 7a6d0a0bc10..35126c9ba87 100644 --- a/webview-ui/src/components/chat/__tests__/ChatView.spec.tsx +++ b/webview-ui/src/components/chat/__tests__/ChatView.spec.tsx @@ -468,7 +468,12 @@ describe("ChatView - Focus Grabbing Tests", () => { expect(getByTestId("chat-textarea")).toBeInTheDocument() }) - // Clear any initial calls after state has settled + // Wait for any debounced focus effects to complete (useDebounceEffect has 50ms delay) + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 100)) + }) + + // Clear any initial calls after state has settled and debounce completed mockFocus.mockClear() // Add follow-up question @@ -489,12 +494,13 @@ describe("ChatView - Focus Grabbing Tests", () => { ], }) - // Wait for state update to complete - await waitFor(() => { - expect(getByTestId("chat-textarea")).toBeInTheDocument() + // Wait for state update to complete and any debounced effects + await act(async () => { + await new Promise((resolve) => setTimeout(resolve, 100)) }) // Should not grab focus for follow-up questions + // The followup case sets enableButtons to true, which prevents focus grabbing expect(mockFocus).not.toHaveBeenCalled() }) })