From a57c23842eed738c10382194f88f7384ba8f7c6a Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Fri, 26 Sep 2025 13:08:57 +0800
Subject: [PATCH 01/20] feature gpt-5-codex responses api

---
 src/routes/messages/handler.ts                | 138 +++-
 .../messages/responses-stream-translation.ts  | 664 ++++++++++++++++++
 src/routes/messages/responses-translation.ts  | 638 +++++++++++++++++
 src/routes/responses/handler.ts               |  78 ++
 src/routes/responses/route.ts                 |  15 +
 src/routes/responses/utils.ts                 |  71 ++
 src/server.ts                                 |   3 +
 src/services/copilot/create-responses.ts      | 212 ++++++
 src/services/copilot/get-models.ts            |   4 +
 tests/responses-stream-translation.test.ts    | 137 ++++
 tests/translation.test.ts                     | 159 +++++
 11 files changed, 2115 insertions(+), 4 deletions(-)
 create mode 100644 src/routes/messages/responses-stream-translation.ts
 create mode 100644 src/routes/messages/responses-translation.ts
 create mode 100644 src/routes/responses/handler.ts
 create mode 100644 src/routes/responses/route.ts
 create mode 100644 src/routes/responses/utils.ts
 create mode 100644 src/services/copilot/create-responses.ts
 create mode 100644 tests/responses-stream-translation.test.ts
 create mode 100644 tests/translation.test.ts

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf624..10b97c53 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -6,11 +6,24 @@ import { streamSSE } from "hono/streaming"
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import {
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+import {
+  translateAnthropicMessagesToResponsesPayload,
+  translateResponsesResultToAnthropic,
+} from "~/routes/messages/responses-translation"
+import { getResponsesRequestOptions } from "~/routes/responses/utils"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import {
+  createResponses,
+  type ResponsesResult,
+} from "~/services/copilot/create-responses"
 
 import {
   type AnthropicMessagesPayload,
@@ -28,16 +41,31 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  if (useResponsesApi) {
+    return await handleWithResponsesApi(c, anthropicPayload)
+  }
+
+  return await handleWithChatCompletions(c, anthropicPayload)
+}
+
+const RESPONSES_ENDPOINT = "/responses"
+
+const handleWithChatCompletions = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+) => {
   const openAIPayload = translateToOpenAI(anthropicPayload)
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
   )
 
-  if (state.manualApprove) {
-    await awaitApproval()
-  }
-
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
@@ -86,6 +114,108 @@ export async function handleCompletion(c: Context) {
   })
 }
 
+const handleWithResponsesApi = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+) => {
+  const responsesPayload =
+    translateAnthropicMessagesToResponsesPayload(anthropicPayload)
+  consola.debug(
+    "Translated Responses payload:",
+    JSON.stringify(responsesPayload),
+  )
+
+  const { vision, initiator } = getResponsesRequestOptions(responsesPayload)
+  const response = await createResponses(responsesPayload, {
+    vision,
+    initiator,
+  })
+
+  if (responsesPayload.stream && isAsyncIterable(response)) {
+    consola.debug("Streaming response from Copilot (Responses API)")
+    return streamSSE(c, async (stream) => {
+      const streamState = createResponsesStreamState()
+
+      for await (const chunk of response) {
+        consola.debug("Responses raw stream event:", JSON.stringify(chunk))
+
+        const eventName = (chunk as { event?: string }).event
+        if (eventName === "ping") {
+          await stream.writeSSE({ event: "ping", data: "" })
+          continue
+        }
+
+        const data = (chunk as { data?: string }).data
+        if (!data) {
+          continue
+        }
+
+        if (data === "[DONE]") {
+          break
+        }
+
+        const parsed = safeJsonParse(data)
+        if (!parsed) {
+          continue
+        }
+
+        const events = translateResponsesStreamEvent(parsed, streamState)
+        for (const event of events) {
+          consola.debug("Translated Anthropic event:", JSON.stringify(event))
+          await stream.writeSSE({
+            event: event.type,
+            data: JSON.stringify(event),
+          })
+        }
+      }
+
+      if (!streamState.messageCompleted) {
+        consola.warn(
+          "Responses stream ended without completion; sending fallback message_stop",
+        )
+        const fallback = { type: "message_stop" as const }
+        await stream.writeSSE({
+          event: fallback.type,
+          data: JSON.stringify(fallback),
+        })
+      }
+    })
+  }
+
+  consola.debug(
+    "Non-streaming Responses result:",
+    JSON.stringify(response).slice(-400),
+  )
+  const anthropicResponse = translateResponsesResultToAnthropic(
+    response as ResponsesResult,
+  )
+  consola.debug(
+    "Translated Anthropic response:",
+    JSON.stringify(anthropicResponse),
+  )
+  return c.json(anthropicResponse)
+}
+
+const shouldUseResponsesApi = (modelId: string): boolean => {
+  const selectedModel = state.models?.data.find((model) => model.id === modelId)
+  return (
+    selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
+  )
+}
+
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
+
+const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
+  Boolean(value)
+  && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
+
+const safeJsonParse = (value: string): Record<string, unknown> | undefined => {
+  try {
+    return JSON.parse(value) as Record<string, unknown>
+  } catch (error) {
+    consola.warn("Failed to parse Responses stream chunk:", value, error)
+    return undefined
+  }
+}
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
new file mode 100644
index 00000000..06feab1a
--- /dev/null
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -0,0 +1,664 @@
+import { type ResponsesResult } from "~/services/copilot/create-responses"
+
+import { type AnthropicStreamEventData } from "./anthropic-types"
+import { translateResponsesResultToAnthropic } from "./responses-translation"
+
+export interface ResponsesStreamState {
+  messageStartSent: boolean
+  messageCompleted: boolean
+  nextContentBlockIndex: number
+  blockIndexByKey: Map<string, number>
+  openBlocks: Set<number>
+  blockHasDelta: Set<number>
+  currentResponseId?: string
+  currentModel?: string
+  initialInputTokens?: number
+  functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
+  functionCallOutputIndexByItemId: Map<string, number>
+}
+
+type FunctionCallStreamState = {
+  blockIndex: number
+  toolCallId: string
+  name: string
+}
+
+export const createResponsesStreamState = (): ResponsesStreamState => ({
+  messageStartSent: false,
+  messageCompleted: false,
+  nextContentBlockIndex: 0,
+  blockIndexByKey: new Map(),
+  openBlocks: new Set(),
+  blockHasDelta: new Set(),
+  functionCallStateByOutputIndex: new Map(),
+  functionCallOutputIndexByItemId: new Map(),
+})
+
+export const translateResponsesStreamEvent = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const eventType =
+    typeof rawEvent.type === "string" ? rawEvent.type : undefined
+  if (!eventType) {
+    return []
+  }
+
+  switch (eventType) {
+    case "response.created": {
+      return handleResponseCreated(rawEvent, state)
+    }
+
+    case "response.reasoning_summary_text.delta":
+    case "response.output_text.delta": {
+      return handleOutputTextDelta(rawEvent, state)
+    }
+
+    case "response.reasoning_summary_part.done":
+    case "response.output_text.done": {
+      return handleOutputTextDone(rawEvent, state)
+    }
+
+    case "response.output_item.added": {
+      return handleOutputItemAdded(rawEvent, state)
+    }
+
+    case "response.function_call_arguments.delta": {
+      return handleFunctionCallArgumentsDelta(rawEvent, state)
+    }
+
+    case "response.function_call_arguments.done": {
+      return handleFunctionCallArgumentsDone(rawEvent, state)
+    }
+
+    case "response.completed":
+    case "response.incomplete": {
+      return handleResponseCompleted(rawEvent, state)
+    }
+
+    case "response.failed": {
+      return handleResponseFailed(rawEvent, state)
+    }
+
+    case "error": {
+      return handleErrorEvent(rawEvent, state)
+    }
+
+    default: {
+      return []
+    }
+  }
+}
+
+// Helper handlers to keep translateResponsesStreamEvent concise
+const handleResponseCreated = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = toResponsesResult(rawEvent.response)
+  if (response) {
+    cacheResponseMetadata(state, response)
+  }
+  return ensureMessageStart(state, response)
+}
+
+const handleOutputItemAdded = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = toResponsesResult(rawEvent.response)
+  const events = ensureMessageStart(state, response)
+
+  const functionCallDetails = extractFunctionCallDetails(rawEvent, state)
+  if (!functionCallDetails) {
+    return events
+  }
+
+  const { outputIndex, toolCallId, name, initialArguments, itemId } =
+    functionCallDetails
+
+  if (itemId) {
+    state.functionCallOutputIndexByItemId.set(itemId, outputIndex)
+  }
+
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    toolCallId,
+    name,
+    events,
+  })
+
+  if (initialArguments !== undefined && initialArguments.length > 0) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "input_json_delta",
+        partial_json: initialArguments,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  return events
+}
+
+const handleFunctionCallArgumentsDelta = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = resolveFunctionCallOutputIndex(state, rawEvent)
+  if (outputIndex === undefined) {
+    return events
+  }
+
+  const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "input_json_delta",
+      partial_json: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleFunctionCallArgumentsDone = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = resolveFunctionCallOutputIndex(state, rawEvent)
+  if (outputIndex === undefined) {
+    return events
+  }
+
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    events,
+  })
+
+  const finalArguments =
+    typeof rawEvent.arguments === "string" ? rawEvent.arguments : undefined
+
+  if (!state.blockHasDelta.has(blockIndex) && finalArguments) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "input_json_delta",
+        partial_json: finalArguments,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  closeBlockIfOpen(state, blockIndex, events)
+
+  const existingState = state.functionCallStateByOutputIndex.get(outputIndex)
+  if (existingState) {
+    state.functionCallOutputIndexByItemId.delete(existingState.toolCallId)
+  }
+  state.functionCallStateByOutputIndex.delete(outputIndex)
+
+  const itemId = toNonEmptyString(rawEvent.item_id)
+  if (itemId) {
+    state.functionCallOutputIndexByItemId.delete(itemId)
+  }
+
+  return events
+}
+
+const handleOutputTextDelta = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = toNumber(rawEvent.content_index)
+  const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""
+
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openTextBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "text_delta",
+      text: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleOutputTextDone = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = toNumber(rawEvent.content_index)
+  const text = typeof rawEvent.text === "string" ? rawEvent.text : ""
+
+  const blockIndex = openTextBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  if (text && !state.blockHasDelta.has(blockIndex)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "text_delta",
+        text,
+      },
+    })
+  }
+
+  closeBlockIfOpen(state, blockIndex, events)
+
+  return events
+}
+
+const handleResponseCompleted = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = toResponsesResult(rawEvent.response)
+  const events = ensureMessageStart(state, response)
+
+  closeAllOpenBlocks(state, events)
+
+  if (response) {
+    const anthropic = translateResponsesResultToAnthropic(response)
+    events.push({
+      type: "message_delta",
+      delta: {
+        stop_reason: anthropic.stop_reason,
+        stop_sequence: anthropic.stop_sequence,
+      },
+      usage: anthropic.usage,
+    })
+  } else {
+    events.push({
+      type: "message_delta",
+      delta: {
+        stop_reason: null,
+        stop_sequence: null,
+      },
+    })
+  }
+
+  events.push({ type: "message_stop" })
+  state.messageCompleted = true
+
+  return events
+}
+
+const handleResponseFailed = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = toResponsesResult(rawEvent.response)
+  const events = ensureMessageStart(state, response)
+
+  closeAllOpenBlocks(state, events)
+
+  const message =
+    typeof rawEvent.error === "string" ?
+      rawEvent.error
+    : "Response generation failed."
+
+  events.push(buildErrorEvent(message))
+  state.messageCompleted = true
+
+  return events
+}
+
+const handleErrorEvent = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const message =
+    typeof rawEvent.message === "string" ?
+      rawEvent.message
+    : "An unexpected error occurred during streaming."
+
+  state.messageCompleted = true
+  return [buildErrorEvent(message)]
+}
+
+const ensureMessageStart = (
+  state: ResponsesStreamState,
+  response?: ResponsesResult,
+): Array<AnthropicStreamEventData> => {
+  if (state.messageStartSent) {
+    return []
+  }
+
+  if (response) {
+    cacheResponseMetadata(state, response)
+  }
+
+  const id = response?.id ?? state.currentResponseId ?? "response"
+  const model = response?.model ?? state.currentModel ?? ""
+
+  const inputTokens =
+    response?.usage?.input_tokens ?? state.initialInputTokens ?? 0
+
+  state.messageStartSent = true
+
+  return [
+    {
+      type: "message_start",
+      message: {
+        id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model,
+        stop_reason: null,
+        stop_sequence: null,
+        usage: {
+          input_tokens: inputTokens,
+          output_tokens: 0,
+        },
+      },
+    },
+  ]
+}
+
+const openTextBlockIfNeeded = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    contentIndex: number
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, contentIndex, events } = params
+  const key = getBlockKey(outputIndex, contentIndex)
+  let blockIndex = state.blockIndexByKey.get(key)
+
+  if (blockIndex === undefined) {
+    blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+    state.blockIndexByKey.set(key, blockIndex)
+  }
+
+  if (!state.openBlocks.has(blockIndex)) {
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "text",
+        text: "",
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+const closeBlockIfOpen = (
+  state: ResponsesStreamState,
+  blockIndex: number,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  if (!state.openBlocks.has(blockIndex)) {
+    return
+  }
+
+  events.push({ type: "content_block_stop", index: blockIndex })
+  state.openBlocks.delete(blockIndex)
+  state.blockHasDelta.delete(blockIndex)
+}
+
+const closeAllOpenBlocks = (
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  for (const blockIndex of state.openBlocks) {
+    closeBlockIfOpen(state, blockIndex, events)
+  }
+
+  state.functionCallStateByOutputIndex.clear()
+  state.functionCallOutputIndexByItemId.clear()
+}
+
+const cacheResponseMetadata = (
+  state: ResponsesStreamState,
+  response: ResponsesResult,
+) => {
+  state.currentResponseId = response.id
+  state.currentModel = response.model
+  state.initialInputTokens = response.usage?.input_tokens ?? 0
+}
+
+const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
+  type: "error",
+  error: {
+    type: "api_error",
+    message,
+  },
+})
+
+const getBlockKey = (outputIndex: number, contentIndex: number): string =>
+  `${outputIndex}:${contentIndex}`
+
+const resolveFunctionCallOutputIndex = (
+  state: ResponsesStreamState,
+  rawEvent: Record<string, unknown>,
+): number | undefined => {
+  if (
+    typeof rawEvent.output_index === "number"
+    || (typeof rawEvent.output_index === "string"
+      && rawEvent.output_index.length > 0)
+  ) {
+    const parsed = toOptionalNumber(rawEvent.output_index)
+    if (parsed !== undefined) {
+      return parsed
+    }
+  }
+
+  const itemId = toNonEmptyString(rawEvent.item_id)
+  if (itemId) {
+    const mapped = state.functionCallOutputIndexByItemId.get(itemId)
+    if (mapped !== undefined) {
+      return mapped
+    }
+  }
+
+  return undefined
+}
+
+const openFunctionCallBlock = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    toolCallId?: string
+    name?: string
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, toolCallId, name, events } = params
+
+  let functionCallState = state.functionCallStateByOutputIndex.get(outputIndex)
+
+  if (!functionCallState) {
+    const blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+
+    const resolvedToolCallId = toolCallId ?? `tool_call_${blockIndex}`
+    const resolvedName = name ?? "function"
+
+    functionCallState = {
+      blockIndex,
+      toolCallId: resolvedToolCallId,
+      name: resolvedName,
+    }
+
+    state.functionCallStateByOutputIndex.set(outputIndex, functionCallState)
+    state.functionCallOutputIndexByItemId.set(resolvedToolCallId, outputIndex)
+  }
+
+  const { blockIndex } = functionCallState
+
+  if (!state.openBlocks.has(blockIndex)) {
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "tool_use",
+        id: functionCallState.toolCallId,
+        name: functionCallState.name,
+        input: {},
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+type FunctionCallDetails = {
+  outputIndex: number
+  toolCallId: string
+  name: string
+  initialArguments?: string
+  itemId?: string
+}
+
+const extractFunctionCallDetails = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): FunctionCallDetails | undefined => {
+  const item = isRecord(rawEvent.item) ? rawEvent.item : undefined
+  if (!item) {
+    return undefined
+  }
+
+  const itemType = typeof item.type === "string" ? item.type : undefined
+  if (itemType !== "function_call") {
+    return undefined
+  }
+
+  const outputIndex = resolveFunctionCallOutputIndex(state, rawEvent)
+  if (outputIndex === undefined) {
+    return undefined
+  }
+
+  const callId = toNonEmptyString(item.call_id)
+  const itemId = toNonEmptyString(item.id)
+  const name = toNonEmptyString(item.name) ?? "function"
+
+  const toolCallId = callId ?? itemId ?? `tool_call_${outputIndex}`
+  const initialArguments =
+    typeof item.arguments === "string" ? item.arguments : undefined
+
+  return {
+    outputIndex,
+    toolCallId,
+    name,
+    initialArguments,
+    itemId,
+  }
+}
+
+const toResponsesResult = (value: unknown): ResponsesResult | undefined =>
+  isResponsesResult(value) ? value : undefined
+
+const toOptionalNumber = (value: unknown): number | undefined => {
+  if (typeof value === "number" && Number.isFinite(value)) {
+    return value
+  }
+
+  if (typeof value === "string" && value.length > 0) {
+    const parsed = Number(value)
+    if (Number.isFinite(parsed)) {
+      return parsed
+    }
+  }
+
+  return undefined
+}
+
+const toNonEmptyString = (value: unknown): string | undefined => {
+  if (typeof value === "string" && value.length > 0) {
+    return value
+  }
+
+  return undefined
+}
+
+const toNumber = (value: unknown): number => {
+  if (typeof value === "number" && Number.isFinite(value)) {
+    return value
+  }
+
+  if (typeof value === "string") {
+    const parsed = Number(value)
+    if (Number.isFinite(parsed)) {
+      return parsed
+    }
+  }
+
+  return 0
+}
+
+const isResponsesResult = (value: unknown): value is ResponsesResult => {
+  if (!isRecord(value)) {
+    return false
+  }
+
+  if (typeof value.id !== "string") {
+    return false
+  }
+
+  if (typeof value.model !== "string") {
+    return false
+  }
+
+  if (!Array.isArray(value.output)) {
+    return false
+  }
+
+  if (typeof value.object !== "string") {
+    return false
+  }
+
+  return true
+}
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
new file mode 100644
index 00000000..057a7b96
--- /dev/null
+++ b/src/routes/messages/responses-translation.ts
@@ -0,0 +1,638 @@
+import consola from "consola"
+
+import {
+  type ResponsesPayload,
+  type ResponseInputContent,
+  type ResponseInputImage,
+  type ResponseInputItem,
+  type ResponseInputMessage,
+  type ResponseInputText,
+  type ResponsesResult,
+  type ResponseOutputContentBlock,
+  type ResponseOutputFunctionCall,
+  type ResponseOutputFunctionCallOutput,
+  type ResponseOutputItem,
+  type ResponseOutputReasoning,
+  type ResponseReasoningBlock,
+  type ResponseOutputRefusal,
+  type ResponseOutputText,
+  type ResponseFunctionToolCallItem,
+  type ResponseFunctionCallOutputItem,
+} from "~/services/copilot/create-responses"
+
+import {
+  type AnthropicAssistantContentBlock,
+  type AnthropicAssistantMessage,
+  type AnthropicResponse,
+  type AnthropicImageBlock,
+  type AnthropicMessage,
+  type AnthropicMessagesPayload,
+  type AnthropicTextBlock,
+  type AnthropicTool,
+  type AnthropicToolResultBlock,
+  type AnthropicToolUseBlock,
+  type AnthropicUserContentBlock,
+  type AnthropicUserMessage,
+} from "./anthropic-types"
+
+const MESSAGE_TYPE = "message"
+
+export const translateAnthropicMessagesToResponsesPayload = (
+  payload: AnthropicMessagesPayload,
+): ResponsesPayload => {
+  const input: Array<ResponseInputItem> = []
+
+  for (const message of payload.messages) {
+    input.push(...translateMessage(message))
+  }
+
+  const translatedTools = convertAnthropicTools(payload.tools)
+  const toolChoice = convertAnthropicToolChoice(payload.tool_choice)
+
+  const { safetyIdentifier, promptCacheKey } = parseUserId(
+    payload.metadata?.user_id,
+  )
+
+  const responsesPayload: ResponsesPayload = {
+    model: payload.model,
+    input,
+    instructions: translateSystemPrompt(payload.system),
+    temperature: payload.temperature ?? null,
+    top_p: payload.top_p ?? null,
+    max_output_tokens: payload.max_tokens,
+    tools: translatedTools,
+    tool_choice: toolChoice,
+    metadata: payload.metadata ? { ...payload.metadata } : null,
+    safety_identifier: safetyIdentifier,
+    prompt_cache_key: promptCacheKey,
+    stream: payload.stream ?? null,
+    store: false,
+    parallel_tool_calls: true,
+    reasoning: { effort: "high", summary: "auto" },
+    include: ["reasoning.encrypted_content"],
+  }
+
+  return responsesPayload
+}
+
+const translateMessage = (
+  message: AnthropicMessage,
+): Array<ResponseInputItem> => {
+  if (message.role === "user") {
+    return translateUserMessage(message)
+  }
+
+  return translateAssistantMessage(message)
+}
+
+const translateUserMessage = (
+  message: AnthropicUserMessage,
+): Array<ResponseInputItem> => {
+  if (typeof message.content === "string") {
+    return [createMessage("user", message.content)]
+  }
+
+  if (!Array.isArray(message.content)) {
+    return []
+  }
+
+  const items: Array<ResponseInputItem> = []
+  const pendingContent: Array<ResponseInputContent> = []
+
+  for (const block of message.content) {
+    if (block.type === "tool_result") {
+      flushPendingContent("user", pendingContent, items)
+      items.push(createFunctionCallOutput(block))
+      continue
+    }
+
+    const converted = translateUserContentBlock(block)
+    if (converted) {
+      pendingContent.push(converted)
+    }
+  }
+
+  flushPendingContent("user", pendingContent, items)
+
+  return items
+}
+
+const translateAssistantMessage = (
+  message: AnthropicAssistantMessage,
+): Array<ResponseInputItem> => {
+  if (typeof message.content === "string") {
+    return [createMessage("assistant", message.content)]
+  }
+
+  if (!Array.isArray(message.content)) {
+    return []
+  }
+
+  const items: Array<ResponseInputItem> = []
+  const pendingContent: Array<ResponseInputContent> = []
+
+  for (const block of message.content) {
+    if (block.type === "tool_use") {
+      flushPendingContent("assistant", pendingContent, items)
+      items.push(createFunctionToolCall(block))
+      continue
+    }
+
+    const converted = translateAssistantContentBlock(block)
+    if (converted) {
+      pendingContent.push(converted)
+    }
+  }
+
+  flushPendingContent("assistant", pendingContent, items)
+
+  return items
+}
+
+const translateUserContentBlock = (
+  block: AnthropicUserContentBlock,
+): ResponseInputContent | undefined => {
+  switch (block.type) {
+    case "text": {
+      return createTextContent(block.text)
+    }
+    case "image": {
+      return createImageContent(block)
+    }
+    case "tool_result": {
+      return undefined
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const translateAssistantContentBlock = (
+  block: AnthropicAssistantContentBlock,
+): ResponseInputContent | undefined => {
+  switch (block.type) {
+    case "text": {
+      return createOutPutTextContent(block.text)
+    }
+    case "thinking": {
+      return createOutPutTextContent(block.thinking)
+    }
+    case "tool_use": {
+      return undefined
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const flushPendingContent = (
+  role: ResponseInputMessage["role"],
+  pendingContent: Array<ResponseInputContent>,
+  target: Array<ResponseInputItem>,
+) => {
+  if (pendingContent.length === 0) {
+    return
+  }
+
+  const messageContent =
+    pendingContent.length === 1 && isPlainText(pendingContent[0]) ?
+      pendingContent[0].text
+    : [...pendingContent]
+
+  target.push(createMessage(role, messageContent))
+  pendingContent.length = 0
+}
+
+const createMessage = (
+  role: ResponseInputMessage["role"],
+  content: string | Array<ResponseInputContent>,
+): ResponseInputMessage => ({
+  type: MESSAGE_TYPE,
+  role,
+  content,
+})
+
+const createTextContent = (text: string): ResponseInputText => ({
+  type: "input_text",
+  text,
+})
+
+const createOutPutTextContent = (text: string): ResponseInputText => ({
+  type: "output_text",
+  text,
+})
+
+const createImageContent = (
+  block: AnthropicImageBlock,
+): ResponseInputImage => ({
+  type: "input_image",
+  image_url: `data:${block.source.media_type};base64,${block.source.data}`,
+})
+
+const createFunctionToolCall = (
+  block: AnthropicToolUseBlock,
+): ResponseFunctionToolCallItem => ({
+  type: "function_call",
+  call_id: block.id,
+  name: block.name,
+  arguments: JSON.stringify(block.input),
+  status: "completed",
+})
+
+const createFunctionCallOutput = (
+  block: AnthropicToolResultBlock,
+): ResponseFunctionCallOutputItem => ({
+  type: "function_call_output",
+  call_id: block.tool_use_id,
+  output: block.content,
+  status: block.is_error ? "incomplete" : "completed",
+})
+
+const translateSystemPrompt = (
+  system: string | Array<AnthropicTextBlock> | undefined,
+): string | null => {
+  if (!system) {
+    return null
+  }
+
+  const toolUsePrompt = `
+## Tool use
+- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
+### Bash tool
+When using the Bash tool, follow these rules:
+- always run_in_background set to false, unless you are running a long-running command (e.g., a server or a watch command).
+### BashOutput tool
+When using the BashOutput tool, follow these rules:
+- Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
+### TodoWrite tool
+When using the TodoWrite tool, follow these rules:
+- Skip using the TodoWrite tool for straightforward tasks (roughly the easiest 25%).
+- Do not make single-step todo lists.
+- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.`
+
+  if (typeof system === "string") {
+    return system + toolUsePrompt
+  }
+
+  const text = system
+    .map((block, index) => {
+      if (index === 0) {
+        return block.text + toolUsePrompt
+      }
+      return block.text
+    })
+    .join(" ")
+  return text.length > 0 ? text : null
+}
+
+const convertAnthropicTools = (
+  tools: Array<AnthropicTool> | undefined,
+): Array<Record<string, unknown>> | null => {
+  if (!tools || tools.length === 0) {
+    return null
+  }
+
+  return tools.map((tool) => ({
+    type: "function",
+    name: tool.name,
+    parameters: tool.input_schema,
+    strict: false,
+    ...(tool.description ? { description: tool.description } : {}),
+  }))
+}
+
+const convertAnthropicToolChoice = (
+  choice: AnthropicMessagesPayload["tool_choice"],
+): unknown => {
+  if (!choice) {
+    return undefined
+  }
+
+  switch (choice.type) {
+    case "auto": {
+      return "auto"
+    }
+    case "any": {
+      return "required"
+    }
+    case "tool": {
+      return choice.name ? { type: "function", name: choice.name } : undefined
+    }
+    case "none": {
+      return "none"
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const isPlainText = (
+  content: ResponseInputContent,
+): content is ResponseInputText | { text: string } => {
+  if (typeof content !== "object") {
+    return false
+  }
+
+  return (
+    "text" in content
+    && typeof (content as ResponseInputText).text === "string"
+    && !("image_url" in content)
+  )
+}
+
+export const translateResponsesResultToAnthropic = (
+  response: ResponsesResult,
+): AnthropicResponse => {
+  const contentBlocks = mapOutputToAnthropicContent(response.output)
+  const usage = mapResponsesUsage(response)
+  let anthropicContent = fallbackContentBlocks(response.output_text)
+  if (contentBlocks.length > 0) {
+    anthropicContent = contentBlocks
+  }
+
+  const stopReason = mapResponsesStopReason(response)
+
+  return {
+    id: response.id,
+    type: "message",
+    role: "assistant",
+    content: anthropicContent,
+    model: response.model,
+    stop_reason: stopReason,
+    stop_sequence: null,
+    usage,
+  }
+}
+
+const mapOutputToAnthropicContent = (
+  output: Array<ResponseOutputItem>,
+): Array<AnthropicAssistantContentBlock> => {
+  const contentBlocks: Array<AnthropicAssistantContentBlock> = []
+
+  for (const item of output) {
+    switch (item.type) {
+      case "reasoning": {
+        const thinkingText = extractReasoningText(item)
+        if (thinkingText.length > 0) {
+          contentBlocks.push({ type: "thinking", thinking: thinkingText })
+        }
+        break
+      }
+      case "function_call": {
+        const toolUseBlock = createToolUseContentBlock(item)
+        if (toolUseBlock) {
+          contentBlocks.push(toolUseBlock)
+        }
+        break
+      }
+      case "function_call_output": {
+        const outputBlock = createFunctionCallOutputBlock(item)
+        if (outputBlock) {
+          contentBlocks.push(outputBlock)
+        }
+        break
+      }
+      case "message":
+      case "output_text": {
+        const combinedText = combineMessageTextContent(item.content)
+        if (combinedText.length > 0) {
+          contentBlocks.push({ type: "text", text: combinedText })
+        }
+        break
+      }
+      default: {
+        // Future compatibility for unrecognized output item types.
+        const combinedText = combineMessageTextContent(
+          (item as { content?: Array<ResponseOutputContentBlock> }).content,
+        )
+        if (combinedText.length > 0) {
+          contentBlocks.push({ type: "text", text: combinedText })
+        }
+      }
+    }
+  }
+
+  return contentBlocks
+}
+
+const combineMessageTextContent = (
+  content: Array<ResponseOutputContentBlock> | undefined,
+): string => {
+  if (!Array.isArray(content)) {
+    return ""
+  }
+
+  let aggregated = ""
+
+  for (const block of content) {
+    if (isResponseOutputText(block)) {
+      aggregated += block.text
+      continue
+    }
+
+    if (isResponseOutputRefusal(block)) {
+      aggregated += block.refusal
+      continue
+    }
+
+    if (typeof (block as { text?: unknown }).text === "string") {
+      aggregated += (block as { text: string }).text
+      continue
+    }
+
+    if (typeof (block as { reasoning?: unknown }).reasoning === "string") {
+      aggregated += (block as { reasoning: string }).reasoning
+      continue
+    }
+  }
+
+  return aggregated
+}
+
+const extractReasoningText = (item: ResponseOutputReasoning): string => {
+  const segments: Array<string> = []
+
+  const collectFromBlocks = (blocks?: Array<ResponseReasoningBlock>) => {
+    if (!Array.isArray(blocks)) {
+      return
+    }
+
+    for (const block of blocks) {
+      if (typeof block.text === "string") {
+        segments.push(block.text)
+        continue
+      }
+
+      if (typeof block.thinking === "string") {
+        segments.push(block.thinking)
+        continue
+      }
+
+      const reasoningValue = (block as Record<string, unknown>).reasoning
+      if (typeof reasoningValue === "string") {
+        segments.push(reasoningValue)
+      }
+    }
+  }
+
+  collectFromBlocks(item.reasoning)
+  collectFromBlocks(item.summary)
+
+  if (typeof item.thinking === "string") {
+    segments.push(item.thinking)
+  }
+
+  const textValue = (item as Record<string, unknown>).text
+  if (typeof textValue === "string") {
+    segments.push(textValue)
+  }
+
+  return segments.join("").trim()
+}
+
+const createToolUseContentBlock = (
+  call: ResponseOutputFunctionCall,
+): AnthropicToolUseBlock | null => {
+  const toolId = call.call_id ?? call.id
+  if (!call.name || !toolId) {
+    return null
+  }
+
+  const input = parseFunctionCallArguments(call.arguments)
+
+  return {
+    type: "tool_use",
+    id: toolId,
+    name: call.name,
+    input,
+  }
+}
+
+const createFunctionCallOutputBlock = (
+  output: ResponseOutputFunctionCallOutput,
+): AnthropicAssistantContentBlock | null => {
+  if (typeof output.output !== "string" || output.output.length === 0) {
+    return null
+  }
+
+  return {
+    type: "text",
+    text: output.output,
+  }
+}
+
+const parseFunctionCallArguments = (
+  rawArguments: string,
+): Record<string, unknown> => {
+  if (typeof rawArguments !== "string" || rawArguments.trim().length === 0) {
+    return {}
+  }
+
+  try {
+    const parsed: unknown = JSON.parse(rawArguments)
+
+    if (Array.isArray(parsed)) {
+      return { arguments: parsed }
+    }
+
+    if (parsed && typeof parsed === "object") {
+      return parsed as Record<string, unknown>
+    }
+  } catch (error) {
+    consola.warn("Failed to parse function call arguments", {
+      error,
+      rawArguments,
+    })
+  }
+
+  return { raw_arguments: rawArguments }
+}
+
+const fallbackContentBlocks = (
+  outputText: string,
+): Array<AnthropicAssistantContentBlock> => {
+  if (!outputText) {
+    return []
+  }
+
+  return [
+    {
+      type: "text",
+      text: outputText,
+    },
+  ]
+}
+
+const mapResponsesStopReason = (
+  response: ResponsesResult,
+): AnthropicResponse["stop_reason"] => {
+  const { status, incomplete_details: incompleteDetails } = response
+
+  if (status === "completed") {
+    return "end_turn"
+  }
+
+  if (status === "incomplete") {
+    if (incompleteDetails?.reason === "max_output_tokens") {
+      return "max_tokens"
+    }
+    if (incompleteDetails?.reason === "content_filter") {
+      return "end_turn"
+    }
+    if (incompleteDetails?.reason === "tool_use") {
+      return "tool_use"
+    }
+  }
+
+  return null
+}
+
+const mapResponsesUsage = (
+  response: ResponsesResult,
+): AnthropicResponse["usage"] => {
+  const promptTokens = response.usage?.input_tokens ?? 0
+  const completionTokens = response.usage?.output_tokens ?? 0
+
+  return {
+    input_tokens: promptTokens,
+    output_tokens: completionTokens,
+  }
+}
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null
+
+const isResponseOutputText = (
+  block: ResponseOutputContentBlock,
+): block is ResponseOutputText =>
+  isRecord(block)
+  && "type" in block
+  && (block as { type?: unknown }).type === "output_text"
+
+const isResponseOutputRefusal = (
+  block: ResponseOutputContentBlock,
+): block is ResponseOutputRefusal =>
+  isRecord(block)
+  && "type" in block
+  && (block as { type?: unknown }).type === "refusal"
+
+const parseUserId = (
+  userId: string | undefined,
+): { safetyIdentifier: string | null; promptCacheKey: string | null } => {
+  if (!userId || typeof userId !== "string") {
+    return { safetyIdentifier: null, promptCacheKey: null }
+  }
+
+  // Parse safety_identifier: content between "user_" and "_account"
+  const userMatch = userId.match(/user_([^_]+)_account/)
+  const safetyIdentifier = userMatch ? userMatch[1] : null
+
+  // Parse prompt_cache_key: content after "_session_"
+  const sessionMatch = userId.match(/_session_(.+)$/)
+  const promptCacheKey = sessionMatch ? sessionMatch[1] : null
+
+  return { safetyIdentifier, promptCacheKey }
+}
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
new file mode 100644
index 00000000..ef7b38b9
--- /dev/null
+++ b/src/routes/responses/handler.ts
@@ -0,0 +1,78 @@
+import type { Context } from "hono"
+
+import consola from "consola"
+import { streamSSE } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import {
+  createResponses,
+  type ResponsesPayload,
+  type ResponsesResult,
+} from "~/services/copilot/create-responses"
+
+import { getResponsesRequestOptions } from "./utils"
+
+const RESPONSES_ENDPOINT = "/responses"
+
+export const handleResponses = async (c: Context) => {
+  await checkRateLimit(state)
+
+  const payload = await c.req.json<ResponsesPayload>()
+  consola.debug("Responses request payload:", JSON.stringify(payload))
+
+  const selectedModel = state.models?.data.find(
+    (model) => model.id === payload.model,
+  )
+  const supportsResponses =
+    selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
+
+  if (!supportsResponses) {
+    return c.json(
+      {
+        error: {
+          message:
+            "This model does not support the responses endpoint. Please choose a different model.",
+          type: "invalid_request_error",
+        },
+      },
+      400,
+    )
+  }
+
+  const { vision, initiator } = getResponsesRequestOptions(payload)
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  const response = await createResponses(payload, { vision, initiator })
+
+  if (isStreamingRequested(payload) && isAsyncIterable(response)) {
+    consola.debug("Forwarding native Responses stream")
+    return streamSSE(c, async (stream) => {
+      for await (const chunk of response) {
+        consola.debug("Responses stream chunk:", JSON.stringify(chunk))
+        await stream.writeSSE({
+          id: (chunk as { id?: string }).id,
+          event: (chunk as { event?: string }).event,
+          data: (chunk as { data?: string }).data ?? "",
+        })
+      }
+    })
+  }
+
+  consola.debug(
+    "Forwarding native Responses result:",
+    JSON.stringify(response).slice(-400),
+  )
+  return c.json(response as ResponsesResult)
+}
+
+const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
+  Boolean(value)
+  && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
+
+const isStreamingRequested = (payload: ResponsesPayload): boolean =>
+  Boolean(payload.stream)
diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
new file mode 100644
index 00000000..af242342
--- /dev/null
+++ b/src/routes/responses/route.ts
@@ -0,0 +1,15 @@
+import { Hono } from "hono"
+
+import { forwardError } from "~/lib/error"
+
+import { handleResponses } from "./handler"
+
+export const responsesRoutes = new Hono()
+
+responsesRoutes.post("/", async (c) => {
+  try {
+    return await handleResponses(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
new file mode 100644
index 00000000..5dea1daa
--- /dev/null
+++ b/src/routes/responses/utils.ts
@@ -0,0 +1,71 @@
+import type {
+  ResponseInputItem,
+  ResponsesPayload,
+} from "~/services/copilot/create-responses"
+
+export const getResponsesRequestOptions = (
+  payload: ResponsesPayload,
+): { vision: boolean; initiator: "agent" | "user" } => {
+  const vision = hasVisionInput(payload)
+  const initiator = hasAgentInitiator(payload) ? "agent" : "user"
+
+  return { vision, initiator }
+}
+
+export const hasAgentInitiator = (payload: ResponsesPayload): boolean =>
+  getPayloadItems(payload).some((item) => {
+    if (!("role" in item) || !item.role) {
+      return true
+    }
+    const role = typeof item.role === "string" ? item.role.toLowerCase() : ""
+    return role === "assistant"
+  })
+
+export const hasVisionInput = (payload: ResponsesPayload): boolean => {
+  const values = getPayloadItems(payload)
+  return values.some((item) => containsVisionContent(item))
+}
+
+const getPayloadItems = (
+  payload: ResponsesPayload,
+): Array<ResponseInputItem> => {
+  const result: Array<ResponseInputItem> = []
+
+  const { input, instructions } = payload
+
+  if (Array.isArray(input)) {
+    result.push(...input)
+  }
+
+  if (Array.isArray(instructions)) {
+    result.push(...instructions)
+  }
+
+  return result
+}
+
+const containsVisionContent = (value: unknown): boolean => {
+  if (!value) return false
+
+  if (Array.isArray(value)) {
+    return value.some((entry) => containsVisionContent(entry))
+  }
+
+  if (typeof value !== "object") {
+    return false
+  }
+
+  const record = value as Record<string, unknown>
+  const type =
+    typeof record.type === "string" ? record.type.toLowerCase() : undefined
+
+  if (type === "input_image") {
+    return true
+  }
+
+  if (Array.isArray(record.content)) {
+    return record.content.some((entry) => containsVisionContent(entry))
+  }
+
+  return false
+}
diff --git a/src/server.ts b/src/server.ts
index 3cb2bb86..2d792c56 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
+import { responsesRoutes } from "./routes/responses/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
@@ -21,11 +22,13 @@ server.route("/models", modelRoutes)
 server.route("/embeddings", embeddingRoutes)
 server.route("/usage", usageRoute)
 server.route("/token", tokenRoute)
+server.route("/responses", responsesRoutes)
 
 // Compatibility with tools that expect v1/ prefix
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+server.route("/v1/responses", responsesRoutes)
 
 // Anthropic compatible endpoints
 server.route("/v1/messages", messageRoutes)
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
new file mode 100644
index 00000000..9009abf6
--- /dev/null
+++ b/src/services/copilot/create-responses.ts
@@ -0,0 +1,212 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+export interface ResponsesPayload {
+  model: string
+  input?: string | Array<ResponseInputItem>
+  instructions?: string | Array<ResponseInputItem> | null
+  temperature?: number | null
+  top_p?: number | null
+  max_output_tokens?: number | null
+  tools?: Array<Record<string, unknown>> | null
+  tool_choice?: unknown
+  metadata?: Record<string, unknown> | null
+  stream?: boolean | null
+  response_format?: Record<string, unknown> | null
+  safety_identifier?: string | null
+  prompt_cache_key?: string | null
+  parallel_tool_calls?: boolean | null
+  store?: boolean | null
+  reasoning?: Record<string, unknown> | null
+  include?: Array<string>
+  [key: string]: unknown
+}
+
+export interface ResponseInputMessage {
+  type?: "message"
+  role: "user" | "assistant" | "system" | "developer"
+  content?: string | Array<ResponseInputContent>
+  status?: string
+}
+
+export interface ResponseFunctionToolCallItem {
+  type: "function_call"
+  call_id: string
+  name: string
+  arguments: string
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export interface ResponseFunctionCallOutputItem {
+  type: "function_call_output"
+  call_id: string
+  output: string
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export type ResponseInputItem =
+  | ResponseInputMessage
+  | ResponseFunctionToolCallItem
+  | ResponseFunctionCallOutputItem
+  | Record<string, unknown>
+
+export type ResponseInputContent =
+  | ResponseInputText
+  | ResponseInputImage
+  | ResponseContentTextLike
+  | Record<string, unknown>
+
+export interface ResponseInputText {
+  type?: "input_text" | "text" | "output_text"
+  text: string
+}
+
+export interface ResponseInputImage {
+  type: "input_image"
+  image_url?: string | null
+  file_id?: string | null
+  detail?: "low" | "high" | "auto"
+}
+
+export interface ResponseContentTextLike {
+  type?: "text"
+  text: string
+}
+
+export interface ResponsesResult {
+  id: string
+  object: "response"
+  created_at: number
+  model: string
+  output: Array<ResponseOutputItem>
+  output_text: string
+  status: string
+  usage?: ResponseUsage | null
+  error: Record<string, unknown> | null
+  incomplete_details: Record<string, unknown> | null
+  instructions: string | null
+  metadata: Record<string, unknown> | null
+  parallel_tool_calls: boolean
+  temperature: number | null
+  tool_choice: unknown
+  tools: Array<Record<string, unknown>>
+  top_p: number | null
+}
+
+export type ResponseOutputItem =
+  | ResponseOutputMessage
+  | ResponseOutputReasoning
+  | ResponseOutputFunctionCall
+  | ResponseOutputFunctionCallOutput
+
+export interface ResponseOutputMessage {
+  id: string
+  type: "message" | "output_text"
+  role: "assistant"
+  status: "completed" | "in_progress" | "incomplete"
+  content?: Array<ResponseOutputContentBlock>
+}
+
+export interface ResponseOutputReasoning {
+  id: string
+  type: "reasoning"
+  reasoning?: Array<ResponseReasoningBlock>
+  summary?: Array<ResponseReasoningBlock>
+  thinking?: string
+  [key: string]: unknown
+}
+
+export interface ResponseReasoningBlock {
+  type: string
+  text?: string
+  thinking?: string
+  [key: string]: unknown
+}
+
+export interface ResponseOutputFunctionCall {
+  id: string
+  type: "function_call"
+  call_id?: string
+  name: string
+  arguments: string
+  status?: "in_progress" | "completed" | "incomplete"
+  [key: string]: unknown
+}
+
+export interface ResponseOutputFunctionCallOutput {
+  id: string
+  type: "function_call_output"
+  call_id: string
+  output: string
+  status?: "in_progress" | "completed" | "incomplete"
+  [key: string]: unknown
+}
+
+export type ResponseOutputContentBlock =
+  | ResponseOutputText
+  | ResponseOutputRefusal
+  | Record<string, unknown>
+
+export interface ResponseOutputText {
+  type: "output_text"
+  text: string
+  annotations: Array<unknown>
+}
+
+export interface ResponseOutputRefusal {
+  type: "refusal"
+  refusal: string
+}
+
+export interface ResponseUsage {
+  input_tokens: number
+  output_tokens?: number
+  total_tokens: number
+  input_tokens_details?: {
+    cached_tokens: number
+  }
+  output_tokens_details?: {
+    reasoning_tokens: number
+  }
+}
+
+export type ResponsesStream = ReturnType<typeof events>
+export type CreateResponsesReturn = ResponsesResult | ResponsesStream
+
+interface ResponsesRequestOptions {
+  vision: boolean
+  initiator: "agent" | "user"
+}
+
+export const createResponses = async (
+  payload: ResponsesPayload,
+  { vision, initiator }: ResponsesRequestOptions,
+): Promise<CreateResponsesReturn> => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, vision),
+    "X-Initiator": initiator,
+  }
+
+  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create responses", response)
+    throw new HTTPError("Failed to create responses", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as ResponsesResult
+}
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 792adc48..d5618085 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -28,6 +28,9 @@ interface ModelSupports {
   tool_calls?: boolean
   parallel_tool_calls?: boolean
   dimensions?: boolean
+  streaming?: boolean
+  structured_outputs?: boolean
+  vision?: boolean
 }
 
 interface ModelCapabilities {
@@ -52,4 +55,5 @@ interface Model {
     state: string
     terms: string
   }
+  supported_endpoints?: Array<string>
 }
diff --git a/tests/responses-stream-translation.test.ts b/tests/responses-stream-translation.test.ts
new file mode 100644
index 00000000..9f149e1b
--- /dev/null
+++ b/tests/responses-stream-translation.test.ts
@@ -0,0 +1,137 @@
+import { describe, expect, test } from "bun:test"
+
+import type { AnthropicStreamEventData } from "~/routes/messages/anthropic-types"
+
+import {
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+
+const createFunctionCallAddedEvent = () => ({
+  type: "response.output_item.added",
+  output_index: 1,
+  item: {
+    id: "item-1",
+    type: "function_call",
+    call_id: "call-1",
+    name: "TodoWrite",
+    arguments: "",
+    status: "in_progress",
+  },
+})
+
+describe("translateResponsesStreamEvent tool calls", () => {
+  test("streams function call arguments across deltas", () => {
+    const state = createResponsesStreamState()
+
+    const events = [
+      translateResponsesStreamEvent(createFunctionCallAddedEvent(), state),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.delta",
+          output_index: 1,
+          delta: '{"todos":',
+        },
+        state,
+      ),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.delta",
+          output_index: 1,
+          delta: "[]}",
+        },
+        state,
+      ),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.done",
+          output_index: 1,
+          arguments: '{"todos":[]}',
+        },
+        state,
+      ),
+    ].flat()
+
+    const messageStart = events.find((event) => event.type === "message_start")
+    expect(messageStart).toBeDefined()
+
+    const blockStart = events.find(
+      (event) => event.type === "content_block_start",
+    )
+    expect(blockStart).toBeDefined()
+    if (blockStart?.type === "content_block_start") {
+      expect(blockStart.content_block).toEqual({
+        type: "tool_use",
+        id: "call-1",
+        name: "TodoWrite",
+        input: {},
+      })
+    }
+
+    const deltas = events.filter(
+      (
+        event,
+      ): event is Extract<
+        AnthropicStreamEventData,
+        { type: "content_block_delta" }
+      > => event.type === "content_block_delta",
+    )
+    expect(deltas).toHaveLength(2)
+    expect(deltas[0].delta).toEqual({
+      type: "input_json_delta",
+      partial_json: '{"todos":',
+    })
+    expect(deltas[1].delta).toEqual({
+      type: "input_json_delta",
+      partial_json: "[]}",
+    })
+
+    const blockStop = events.find(
+      (event) => event.type === "content_block_stop",
+    )
+    expect(blockStop).toBeDefined()
+
+    expect(state.openBlocks.size).toBe(0)
+    expect(state.functionCallStateByOutputIndex.size).toBe(0)
+  })
+
+  test("emits full arguments when only done payload is present", () => {
+    const state = createResponsesStreamState()
+
+    const events = [
+      translateResponsesStreamEvent(createFunctionCallAddedEvent(), state),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.done",
+          output_index: 1,
+          arguments:
+            '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
+        },
+        state,
+      ),
+    ].flat()
+
+    const deltas = events.filter(
+      (
+        event,
+      ): event is Extract<
+        AnthropicStreamEventData,
+        { type: "content_block_delta" }
+      > => event.type === "content_block_delta",
+    )
+    expect(deltas).toHaveLength(1)
+    expect(deltas[0].delta).toEqual({
+      type: "input_json_delta",
+      partial_json:
+        '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
+    })
+
+    const blockStop = events.find(
+      (event) => event.type === "content_block_stop",
+    )
+    expect(blockStop).toBeDefined()
+
+    expect(state.openBlocks.size).toBe(0)
+    expect(state.functionCallStateByOutputIndex.size).toBe(0)
+  })
+})
diff --git a/tests/translation.test.ts b/tests/translation.test.ts
new file mode 100644
index 00000000..84856b93
--- /dev/null
+++ b/tests/translation.test.ts
@@ -0,0 +1,159 @@
+import { describe, expect, it } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+import type {
+  ResponseInputMessage,
+  ResponsesResult,
+} from "~/services/copilot/create-responses"
+
+import {
+  translateAnthropicMessagesToResponsesPayload,
+  translateResponsesResultToAnthropic,
+} from "~/routes/messages/responses-translation"
+
+const samplePayload = {
+  model: "claude-3-5-sonnet",
+  max_tokens: 1024,
+  messages: [
+    {
+      role: "user",
+      content: [
+        {
+          type: "text",
+          text: "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n      \n      IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "hi",
+        },
+        {
+          type: "text",
+          text: "<system-reminder>\nThe user opened the file c:\\Work2\\copilot-api\\src\\routes\\responses\\translation.ts in the IDE. This may or may not be related to the current task.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "hi",
+          cache_control: {
+            type: "ephemeral",
+          },
+        },
+      ],
+    },
+  ],
+} as unknown as AnthropicMessagesPayload
+
+describe("translateAnthropicMessagesToResponsesPayload", () => {
+  it("converts anthropic text blocks into response input messages", () => {
+    const result = translateAnthropicMessagesToResponsesPayload(samplePayload)
+
+    console.log("result:", JSON.stringify(result, null, 2))
+    expect(Array.isArray(result.input)).toBe(true)
+    const input = result.input as Array<ResponseInputMessage>
+    expect(input).toHaveLength(1)
+
+    const message = input[0]
+    expect(message.role).toBe("user")
+    expect(Array.isArray(message.content)).toBe(true)
+
+    const content = message.content as Array<{ text: string }>
+    expect(content.map((item) => item.text)).toEqual([
+      "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.\n</system-reminder>",
+      "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n      \n      IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>",
+      "hi",
+      "<system-reminder>\nThe user opened the file c:\\Work2\\copilot-api\\src\\routes\\responses\\translation.ts in the IDE. This may or may not be related to the current task.\n</system-reminder>",
+      "hi",
+    ])
+  })
+})
+
+describe("translateResponsesResultToAnthropic", () => {
+  it("handles reasoning and function call items", () => {
+    const responsesResult: ResponsesResult = {
+      id: "resp_123",
+      object: "response",
+      created_at: 0,
+      model: "gpt-4.1",
+      output: [
+        {
+          id: "reason_1",
+          type: "reasoning",
+          reasoning: [{ type: "text", text: "Thinking about the task." }],
+        },
+        {
+          id: "call_1",
+          type: "function_call",
+          call_id: "call_1",
+          name: "TodoWrite",
+          arguments:
+            '{"todos":[{"content":"Read src/routes/responses/translation.ts","status":"in_progress"}]}',
+          status: "completed",
+        },
+        {
+          id: "message_1",
+          type: "message",
+          role: "assistant",
+          status: "completed",
+          content: [
+            {
+              type: "output_text",
+              text: "Added the task to your todo list.",
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      output_text: "Added the task to your todo list.",
+      status: "incomplete",
+      usage: {
+        input_tokens: 120,
+        output_tokens: 36,
+        total_tokens: 156,
+      },
+      error: null,
+      incomplete_details: { reason: "tool_use" },
+      instructions: null,
+      metadata: null,
+      parallel_tool_calls: false,
+      temperature: null,
+      tool_choice: null,
+      tools: [],
+      top_p: null,
+    }
+
+    const anthropicResponse =
+      translateResponsesResultToAnthropic(responsesResult)
+
+    expect(anthropicResponse.stop_reason).toBe("tool_use")
+    expect(anthropicResponse.content).toHaveLength(3)
+
+    const [thinkingBlock, toolUseBlock, textBlock] = anthropicResponse.content
+
+    expect(thinkingBlock.type).toBe("thinking")
+    if (thinkingBlock.type === "thinking") {
+      expect(thinkingBlock.thinking).toContain("Thinking about the task")
+    }
+
+    expect(toolUseBlock.type).toBe("tool_use")
+    if (toolUseBlock.type === "tool_use") {
+      expect(toolUseBlock.id).toBe("call_1")
+      expect(toolUseBlock.name).toBe("TodoWrite")
+      expect(toolUseBlock.input).toEqual({
+        todos: [
+          {
+            content: "Read src/routes/responses/translation.ts",
+            status: "in_progress",
+          },
+        ],
+      })
+    }
+
+    expect(textBlock.type).toBe("text")
+    if (textBlock.type === "text") {
+      expect(textBlock.text).toBe("Added the task to your todo list.")
+    }
+  })
+})

From 87899a137a711e571852b41b35f90a91aa7013bd Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 29 Sep 2025 15:43:04 +0800
Subject: [PATCH 02/20] feat: enhance output type for function call and add
 content conversion utility

---
 src/routes/messages/responses-translation.ts | 28 ++++++++++++++++++--
 src/services/copilot/create-responses.ts     | 10 ++-----
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 057a7b96..5d0ff3b9 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -246,7 +246,7 @@ const createFunctionCallOutput = (
 ): ResponseFunctionCallOutputItem => ({
   type: "function_call_output",
   call_id: block.tool_use_id,
-  output: block.content,
+  output: convertToolResultContent(block.content),
   status: block.is_error ? "incomplete" : "completed",
 })
 
@@ -268,7 +268,7 @@ When using the BashOutput tool, follow these rules:
 - Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
 ### TodoWrite tool
 When using the TodoWrite tool, follow these rules:
-- Skip using the TodoWrite tool for straightforward tasks (roughly the easiest 25%).
+- Skip using the TodoWrite tool for simple or straightforward tasks (roughly the easiest 25%).
 - Do not make single-step todo lists.
 - When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.`
 
@@ -636,3 +636,27 @@ const parseUserId = (
 
   return { safetyIdentifier, promptCacheKey }
 }
+
+const convertToolResultContent = (
+  content: string | Array<AnthropicTextBlock> | Array<AnthropicImageBlock>,
+): string | Array<ResponseInputContent> => {
+  if (typeof content === "string") {
+    return content
+  }
+
+  if (Array.isArray(content)) {
+    if (content.length > 0 && content[0].type === "text") {
+      return (content as Array<AnthropicTextBlock>).map((block) =>
+        createTextContent(block.text),
+      )
+    }
+
+    if (content.length > 0 && content[0].type === "image") {
+      return (content as Array<AnthropicImageBlock>).map((block) =>
+        createImageContent(block),
+      )
+    }
+  }
+
+  return ""
+}
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 9009abf6..52a162ef 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -44,7 +44,7 @@ export interface ResponseFunctionToolCallItem {
 export interface ResponseFunctionCallOutputItem {
   type: "function_call_output"
   call_id: string
-  output: string
+  output: string | Array<ResponseInputContent>
   status?: "in_progress" | "completed" | "incomplete"
 }
 
@@ -57,11 +57,10 @@ export type ResponseInputItem =
 export type ResponseInputContent =
   | ResponseInputText
   | ResponseInputImage
-  | ResponseContentTextLike
   | Record<string, unknown>
 
 export interface ResponseInputText {
-  type?: "input_text" | "text" | "output_text"
+  type?: "input_text" | "output_text"
   text: string
 }
 
@@ -72,11 +71,6 @@ export interface ResponseInputImage {
   detail?: "low" | "high" | "auto"
 }
 
-export interface ResponseContentTextLike {
-  type?: "text"
-  text: string
-}
-
 export interface ResponsesResult {
   id: string
   object: "response"

From 4fc0fa0e6d5b70623344081e6ca4eab9eb6fc128 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 29 Sep 2025 20:09:45 +0800
Subject: [PATCH 03/20] refactor: optimize content conversion logic in
 convertToolResultContent function

---
 src/routes/messages/responses-translation.ts | 26 ++++++++++++--------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 5d0ff3b9..71c877e1 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -645,17 +645,23 @@ const convertToolResultContent = (
   }
 
   if (Array.isArray(content)) {
-    if (content.length > 0 && content[0].type === "text") {
-      return (content as Array<AnthropicTextBlock>).map((block) =>
-        createTextContent(block.text),
-      )
-    }
-
-    if (content.length > 0 && content[0].type === "image") {
-      return (content as Array<AnthropicImageBlock>).map((block) =>
-        createImageContent(block),
-      )
+    const result: Array<ResponseInputContent> = []
+    for (const block of content) {
+      switch (block.type) {
+        case "text": {
+          result.push(createTextContent(block.text))
+          break
+        }
+        case "image": {
+          result.push(createImageContent(block))
+          break
+        }
+        default: {
+          break
+        }
+      }
     }
+    return result
   }
 
   return ""

From 2b9733bc0d0bc7af22ba834e7cc7d46f26cb10ae Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 09:31:52 +0800
Subject: [PATCH 04/20] refactor: remove unused function call output type and
 simplify response output message type

---
 src/routes/messages/responses-translation.ts | 24 +-------------------
 src/services/copilot/create-responses.ts     | 12 +---------
 2 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 71c877e1..41c26299 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -10,7 +10,6 @@ import {
   type ResponsesResult,
   type ResponseOutputContentBlock,
   type ResponseOutputFunctionCall,
-  type ResponseOutputFunctionCallOutput,
   type ResponseOutputItem,
   type ResponseOutputReasoning,
   type ResponseReasoningBlock,
@@ -388,15 +387,7 @@ const mapOutputToAnthropicContent = (
         }
         break
       }
-      case "function_call_output": {
-        const outputBlock = createFunctionCallOutputBlock(item)
-        if (outputBlock) {
-          contentBlocks.push(outputBlock)
-        }
-        break
-      }
-      case "message":
-      case "output_text": {
+      case "message": {
         const combinedText = combineMessageTextContent(item.content)
         if (combinedText.length > 0) {
           contentBlocks.push({ type: "text", text: combinedText })
@@ -511,19 +502,6 @@ const createToolUseContentBlock = (
   }
 }
 
-const createFunctionCallOutputBlock = (
-  output: ResponseOutputFunctionCallOutput,
-): AnthropicAssistantContentBlock | null => {
-  if (typeof output.output !== "string" || output.output.length === 0) {
-    return null
-  }
-
-  return {
-    type: "text",
-    text: output.output,
-  }
-}
-
 const parseFunctionCallArguments = (
   rawArguments: string,
 ): Record<string, unknown> => {
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 52a162ef..b13349e4 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -95,11 +95,10 @@ export type ResponseOutputItem =
   | ResponseOutputMessage
   | ResponseOutputReasoning
   | ResponseOutputFunctionCall
-  | ResponseOutputFunctionCallOutput
 
 export interface ResponseOutputMessage {
   id: string
-  type: "message" | "output_text"
+  type: "message"
   role: "assistant"
   status: "completed" | "in_progress" | "incomplete"
   content?: Array<ResponseOutputContentBlock>
@@ -131,15 +130,6 @@ export interface ResponseOutputFunctionCall {
   [key: string]: unknown
 }
 
-export interface ResponseOutputFunctionCallOutput {
-  id: string
-  type: "function_call_output"
-  call_id: string
-  output: string
-  status?: "in_progress" | "completed" | "incomplete"
-  [key: string]: unknown
-}
-
 export type ResponseOutputContentBlock =
   | ResponseOutputText
   | ResponseOutputRefusal

From 505f648a77af6036cd3b846b91fe1eb67c3168c1 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 15:43:05 +0800
Subject: [PATCH 05/20] feat: add signature and reasoning handling to responses
 translation and state management

---
 src/routes/messages/anthropic-types.ts        |   1 +
 .../messages/responses-stream-translation.ts  | 159 +++++++++++++++++-
 src/routes/messages/responses-translation.ts  |  56 +++---
 src/routes/responses/utils.ts                 |   6 +-
 src/services/copilot/create-responses.ts      |  22 ++-
 5 files changed, 200 insertions(+), 44 deletions(-)

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 881fffcc..f07485bf 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  signature: string
 }
 
 export type AnthropicUserContentBlock =
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 06feab1a..a3857e8a 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -15,6 +15,7 @@ export interface ResponsesStreamState {
   initialInputTokens?: number
   functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
   functionCallOutputIndexByItemId: Map<string, number>
+  summryIndex: number
 }
 
 type FunctionCallStreamState = {
@@ -32,6 +33,7 @@ export const createResponsesStreamState = (): ResponsesStreamState => ({
   blockHasDelta: new Set(),
   functionCallStateByOutputIndex: new Map(),
   functionCallOutputIndexByItemId: new Map(),
+  summryIndex: 0,
 })
 
 export const translateResponsesStreamEvent = (
@@ -49,12 +51,18 @@ export const translateResponsesStreamEvent = (
       return handleResponseCreated(rawEvent, state)
     }
 
-    case "response.reasoning_summary_text.delta":
+    case "response.reasoning_summary_text.delta": {
+      return handleReasoningSummaryTextDelta(rawEvent, state)
+    }
+
     case "response.output_text.delta": {
       return handleOutputTextDelta(rawEvent, state)
     }
 
-    case "response.reasoning_summary_part.done":
+    case "response.reasoning_summary_part.done": {
+      return handleReasoningSummaryPartDone(rawEvent, state)
+    }
+
     case "response.output_text.done": {
       return handleOutputTextDone(rawEvent, state)
     }
@@ -63,6 +71,10 @@ export const translateResponsesStreamEvent = (
       return handleOutputItemAdded(rawEvent, state)
     }
 
+    case "response.output_item.done": {
+      return handleOutputItemDone(rawEvent, state)
+    }
+
     case "response.function_call_arguments.delta": {
       return handleFunctionCallArgumentsDelta(rawEvent, state)
     }
@@ -143,6 +155,51 @@ const handleOutputItemAdded = (
   return events
 }
 
+const handleOutputItemDone = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const item = isRecord(rawEvent.item) ? rawEvent.item : undefined
+  if (!item) {
+    return events
+  }
+
+  const itemType = typeof item.type === "string" ? item.type : undefined
+  if (itemType !== "reasoning") {
+    return events
+  }
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = state.summryIndex
+
+  const blockIndex = openThinkingBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  const signature =
+    typeof item.encrypted_content === "string" ? item.encrypted_content : ""
+
+  if (signature) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "signature_delta",
+        signature,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  closeBlockIfOpen(state, blockIndex, events)
+
+  return events
+}
+
 const handleFunctionCallArgumentsDelta = (
   rawEvent: Record<string, unknown>,
   state: ResponsesStreamState,
@@ -257,6 +314,71 @@ const handleOutputTextDelta = (
   return events
 }
 
+const handleReasoningSummaryTextDelta = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = toNumber(rawEvent.summary_index)
+  const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""
+
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openThinkingBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "thinking_delta",
+      thinking: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleReasoningSummaryPartDone = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = toNumber(rawEvent.summary_index)
+  state.summryIndex = contentIndex
+  const part = isRecord(rawEvent.part) ? rawEvent.part : undefined
+  const text = part && typeof part.text === "string" ? part.text : ""
+
+  const blockIndex = openThinkingBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  if (text && !state.blockHasDelta.has(blockIndex)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "thinking_delta",
+        thinking: text,
+      },
+    })
+  }
+
+  return events
+}
+
 const handleOutputTextDone = (
   rawEvent: Record<string, unknown>,
   state: ResponsesStreamState,
@@ -430,6 +552,39 @@ const openTextBlockIfNeeded = (
   return blockIndex
 }
 
+const openThinkingBlockIfNeeded = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    contentIndex: number
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, contentIndex, events } = params
+  const key = getBlockKey(outputIndex, contentIndex)
+  let blockIndex = state.blockIndexByKey.get(key)
+
+  if (blockIndex === undefined) {
+    blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+    state.blockIndexByKey.set(key, blockIndex)
+  }
+
+  if (!state.openBlocks.has(blockIndex)) {
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "thinking",
+        thinking: "",
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
 const closeBlockIfOpen = (
   state: ResponsesStreamState,
   blockIndex: number,
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 41c26299..50ae3a60 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -6,6 +6,7 @@ import {
   type ResponseInputImage,
   type ResponseInputItem,
   type ResponseInputMessage,
+  type ResponseInputReasoning,
   type ResponseInputText,
   type ResponsesResult,
   type ResponseOutputContentBlock,
@@ -27,6 +28,7 @@ import {
   type AnthropicMessage,
   type AnthropicMessagesPayload,
   type AnthropicTextBlock,
+  type AnthropicThinkingBlock,
   type AnthropicTool,
   type AnthropicToolResultBlock,
   type AnthropicToolUseBlock,
@@ -137,6 +139,12 @@ const translateAssistantMessage = (
       continue
     }
 
+    if (block.type === "thinking") {
+      flushPendingContent("assistant", pendingContent, items)
+      items.push(createReasoningContent(block))
+      continue
+    }
+
     const converted = translateAssistantContentBlock(block)
     if (converted) {
       pendingContent.push(converted)
@@ -158,9 +166,6 @@ const translateUserContentBlock = (
     case "image": {
       return createImageContent(block)
     }
-    case "tool_result": {
-      return undefined
-    }
     default: {
       return undefined
     }
@@ -174,12 +179,6 @@ const translateAssistantContentBlock = (
     case "text": {
       return createOutPutTextContent(block.text)
     }
-    case "thinking": {
-      return createOutPutTextContent(block.thinking)
-    }
-    case "tool_use": {
-      return undefined
-    }
     default: {
       return undefined
     }
@@ -230,6 +229,19 @@ const createImageContent = (
   image_url: `data:${block.source.media_type};base64,${block.source.data}`,
 })
 
+const createReasoningContent = (
+  block: AnthropicThinkingBlock,
+): ResponseInputReasoning => ({
+  type: "reasoning",
+  summary: [
+    {
+      type: "summary_text",
+      text: block.thinking,
+    },
+  ],
+  encrypted_content: block.signature,
+})
+
 const createFunctionToolCall = (
   block: AnthropicToolUseBlock,
 ): ResponseFunctionToolCallItem => ({
@@ -376,7 +388,11 @@ const mapOutputToAnthropicContent = (
       case "reasoning": {
         const thinkingText = extractReasoningText(item)
         if (thinkingText.length > 0) {
-          contentBlocks.push({ type: "thinking", thinking: thinkingText })
+          contentBlocks.push({
+            type: "thinking",
+            thinking: thinkingText,
+            signature: item.encrypted_content ?? "",
+          })
         }
         break
       }
@@ -456,31 +472,11 @@ const extractReasoningText = (item: ResponseOutputReasoning): string => {
         segments.push(block.text)
         continue
       }
-
-      if (typeof block.thinking === "string") {
-        segments.push(block.thinking)
-        continue
-      }
-
-      const reasoningValue = (block as Record<string, unknown>).reasoning
-      if (typeof reasoningValue === "string") {
-        segments.push(reasoningValue)
-      }
     }
   }
 
-  collectFromBlocks(item.reasoning)
   collectFromBlocks(item.summary)
 
-  if (typeof item.thinking === "string") {
-    segments.push(item.thinking)
-  }
-
-  const textValue = (item as Record<string, unknown>).text
-  if (typeof textValue === "string") {
-    segments.push(textValue)
-  }
-
   return segments.join("").trim()
 }
 
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
index 5dea1daa..734319cd 100644
--- a/src/routes/responses/utils.ts
+++ b/src/routes/responses/utils.ts
@@ -31,16 +31,12 @@ const getPayloadItems = (
 ): Array<ResponseInputItem> => {
   const result: Array<ResponseInputItem> = []
 
-  const { input, instructions } = payload
+  const { input } = payload
 
   if (Array.isArray(input)) {
     result.push(...input)
   }
 
-  if (Array.isArray(instructions)) {
-    result.push(...instructions)
-  }
-
   return result
 }
 
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index b13349e4..8322cace 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -7,13 +7,13 @@ import { state } from "~/lib/state"
 
 export interface ResponsesPayload {
   model: string
+  instructions?: string | null
   input?: string | Array<ResponseInputItem>
-  instructions?: string | Array<ResponseInputItem> | null
+  tools?: Array<Record<string, unknown>> | null
+  tool_choice?: unknown
   temperature?: number | null
   top_p?: number | null
   max_output_tokens?: number | null
-  tools?: Array<Record<string, unknown>> | null
-  tool_choice?: unknown
   metadata?: Record<string, unknown> | null
   stream?: boolean | null
   response_format?: Record<string, unknown> | null
@@ -48,10 +48,20 @@ export interface ResponseFunctionCallOutputItem {
   status?: "in_progress" | "completed" | "incomplete"
 }
 
+export interface ResponseInputReasoning {
+  type: "reasoning"
+  summary: Array<{
+    type: "summary_text"
+    text: string
+  }>
+  encrypted_content: string
+}
+
 export type ResponseInputItem =
   | ResponseInputMessage
   | ResponseFunctionToolCallItem
   | ResponseFunctionCallOutputItem
+  | ResponseInputReasoning
   | Record<string, unknown>
 
 export type ResponseInputContent =
@@ -107,17 +117,15 @@ export interface ResponseOutputMessage {
 export interface ResponseOutputReasoning {
   id: string
   type: "reasoning"
-  reasoning?: Array<ResponseReasoningBlock>
   summary?: Array<ResponseReasoningBlock>
-  thinking?: string
+  encrypted_content?: string
+  status: "completed" | "in_progress" | "incomplete"
   [key: string]: unknown
 }
 
 export interface ResponseReasoningBlock {
   type: string
   text?: string
-  thinking?: string
-  [key: string]: unknown
 }
 
 export interface ResponseOutputFunctionCall {

From 9477b4541280246541f16c3416865d58d8170a1d Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 15:48:32 +0800
Subject: [PATCH 06/20] feat: add signature to thinking messages and enhance
 reasoning structure in translation tests

---
 tests/anthropic-request.test.ts | 2 ++
 tests/translation.test.ts       | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
index a4a5b06b..c86bcac1 100644
--- a/tests/anthropic-request.test.ts
+++ b/tests/anthropic-request.test.ts
@@ -136,6 +136,7 @@ describe("Anthropic to OpenAI translation logic", () => {
             {
               type: "thinking",
               thinking: "Let me think about this simple math problem...",
+              signature: "abc123",
             },
             { type: "text", text: "2+2 equals 4." },
           ],
@@ -168,6 +169,7 @@ describe("Anthropic to OpenAI translation logic", () => {
               type: "thinking",
               thinking:
                 "I need to call the weather API to get current weather information.",
+              signature: "def456",
             },
             { type: "text", text: "I'll check the weather for you." },
             {
diff --git a/tests/translation.test.ts b/tests/translation.test.ts
index 84856b93..0c3ececb 100644
--- a/tests/translation.test.ts
+++ b/tests/translation.test.ts
@@ -81,7 +81,9 @@ describe("translateResponsesResultToAnthropic", () => {
         {
           id: "reason_1",
           type: "reasoning",
-          reasoning: [{ type: "text", text: "Thinking about the task." }],
+          summary: [{ type: "text", text: "Thinking about the task." }],
+          status: "completed",
+          encrypted_content: "encrypted_reasoning_content",
         },
         {
           id: "call_1",

From 44551f9aae3fd6cb99a27b349cce827df37021f5 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 15:56:34 +0800
Subject: [PATCH 07/20] refactor: remove summaryIndex from ResponsesStreamState
 and related handlers

---
 .../messages/responses-stream-translation.ts  | 33 ++++---------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index a3857e8a..3a4bdfd9 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -15,7 +15,6 @@ export interface ResponsesStreamState {
   initialInputTokens?: number
   functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
   functionCallOutputIndexByItemId: Map<string, number>
-  summryIndex: number
 }
 
 type FunctionCallStreamState = {
@@ -33,7 +32,6 @@ export const createResponsesStreamState = (): ResponsesStreamState => ({
   blockHasDelta: new Set(),
   functionCallStateByOutputIndex: new Map(),
   functionCallOutputIndexByItemId: new Map(),
-  summryIndex: 0,
 })
 
 export const translateResponsesStreamEvent = (
@@ -172,13 +170,8 @@ const handleOutputItemDone = (
   }
 
   const outputIndex = toNumber(rawEvent.output_index)
-  const contentIndex = state.summryIndex
 
-  const blockIndex = openThinkingBlockIfNeeded(state, {
-    outputIndex,
-    contentIndex,
-    events,
-  })
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
 
   const signature =
     typeof item.encrypted_content === "string" ? item.encrypted_content : ""
@@ -321,18 +314,13 @@ const handleReasoningSummaryTextDelta = (
   const events = ensureMessageStart(state)
 
   const outputIndex = toNumber(rawEvent.output_index)
-  const contentIndex = toNumber(rawEvent.summary_index)
   const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""
 
   if (!deltaText) {
     return events
   }
 
-  const blockIndex = openThinkingBlockIfNeeded(state, {
-    outputIndex,
-    contentIndex,
-    events,
-  })
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
 
   events.push({
     type: "content_block_delta",
@@ -354,16 +342,10 @@ const handleReasoningSummaryPartDone = (
   const events = ensureMessageStart(state)
 
   const outputIndex = toNumber(rawEvent.output_index)
-  const contentIndex = toNumber(rawEvent.summary_index)
-  state.summryIndex = contentIndex
   const part = isRecord(rawEvent.part) ? rawEvent.part : undefined
   const text = part && typeof part.text === "string" ? part.text : ""
 
-  const blockIndex = openThinkingBlockIfNeeded(state, {
-    outputIndex,
-    contentIndex,
-    events,
-  })
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
 
   if (text && !state.blockHasDelta.has(blockIndex)) {
     events.push({
@@ -554,13 +536,10 @@ const openTextBlockIfNeeded = (
 
 const openThinkingBlockIfNeeded = (
   state: ResponsesStreamState,
-  params: {
-    outputIndex: number
-    contentIndex: number
-    events: Array<AnthropicStreamEventData>
-  },
+  outputIndex: number,
+  events: Array<AnthropicStreamEventData>,
 ): number => {
-  const { outputIndex, contentIndex, events } = params
+  const contentIndex = 0
   const key = getBlockKey(outputIndex, contentIndex)
   let blockIndex = state.blockIndexByKey.get(key)
 

From 708ae3377f58ff1b7902d5983e308434ee00bb4f Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 23:21:27 +0800
Subject: [PATCH 08/20] feat: enhance streaming response handling with ping
 mechanism

---
 README.md                       | 11 ++++++-----
 src/routes/responses/handler.ts | 30 +++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index e5b39099..e0aa9d3e 100644
--- a/README.md
+++ b/README.md
@@ -184,11 +184,12 @@ The server exposes several endpoints to interact with the Copilot API. It provid
 
 These endpoints mimic the OpenAI API structure.
 
-| Endpoint                    | Method | Description                                               |
-| --------------------------- | ------ | --------------------------------------------------------- |
-| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
-| `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
-| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
+| Endpoint                    | Method | Description                                                      |
+| --------------------------- | ------ | ---------------------------------------------------------------- |
+| `POST /v1/responses`        | `POST` | Most advanced interface for generating model responses.          |
+| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation.        |
+| `GET /v1/models`            | `GET`  | Lists the currently available models.                            |
+| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.         |
 
 ### Anthropic Compatible Endpoints
 
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index ef7b38b9..d06d02d6 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -52,13 +52,29 @@ export const handleResponses = async (c: Context) => {
   if (isStreamingRequested(payload) && isAsyncIterable(response)) {
     consola.debug("Forwarding native Responses stream")
     return streamSSE(c, async (stream) => {
-      for await (const chunk of response) {
-        consola.debug("Responses stream chunk:", JSON.stringify(chunk))
-        await stream.writeSSE({
-          id: (chunk as { id?: string }).id,
-          event: (chunk as { event?: string }).event,
-          data: (chunk as { data?: string }).data ?? "",
-        })
+      const pingInterval = setInterval(async () => {
+        try {
+          await stream.writeSSE({
+            event: "ping",
+            data: JSON.stringify({ timestamp: Date.now() }),
+          })
+        } catch (error) {
+          consola.warn("Failed to send ping:", error)
+          clearInterval(pingInterval)
+        }
+      }, 3000)
+
+      try {
+        for await (const chunk of response) {
+          consola.debug("Responses stream chunk:", JSON.stringify(chunk))
+          await stream.writeSSE({
+            id: (chunk as { id?: string }).id,
+            event: (chunk as { event?: string }).event,
+            data: (chunk as { data?: string }).data ?? "",
+          })
+        }
+      } finally {
+        clearInterval(pingInterval)
       }
     })
   }

From 47fb3e46032ed8062fb50e9f20609733588bc95c Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 1 Oct 2025 18:43:11 +0800
Subject: [PATCH 09/20] feat: responses translation add cache_read_input_tokens

---
 src/routes/messages/responses-stream-translation.ts | 11 ++++++++---
 src/routes/messages/responses-translation.ts        | 13 +++++++++----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 3a4bdfd9..db09bf14 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -13,6 +13,7 @@ export interface ResponsesStreamState {
   currentResponseId?: string
   currentModel?: string
   initialInputTokens?: number
+  initialInputCachedTokens?: number
   functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
   functionCallOutputIndexByItemId: Map<string, number>
 }
@@ -476,11 +477,10 @@ const ensureMessageStart = (
   const id = response?.id ?? state.currentResponseId ?? "response"
   const model = response?.model ?? state.currentModel ?? ""
 
-  const inputTokens =
-    response?.usage?.input_tokens ?? state.initialInputTokens ?? 0
-
   state.messageStartSent = true
 
+  const inputTokens =
+    (state.initialInputTokens ?? 0) - (state.initialInputCachedTokens ?? 0)
   return [
     {
       type: "message_start",
@@ -495,6 +495,9 @@ const ensureMessageStart = (
         usage: {
           input_tokens: inputTokens,
           output_tokens: 0,
+          ...(state.initialInputCachedTokens !== undefined && {
+            cache_creation_input_tokens: state.initialInputCachedTokens,
+          }),
         },
       },
     },
@@ -597,6 +600,8 @@ const cacheResponseMetadata = (
   state.currentResponseId = response.id
   state.currentModel = response.model
   state.initialInputTokens = response.usage?.input_tokens ?? 0
+  state.initialInputCachedTokens =
+    response.usage?.input_tokens_details?.cached_tokens
 }
 
 const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 50ae3a60..00f48129 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -567,12 +567,17 @@ const mapResponsesStopReason = (
 const mapResponsesUsage = (
   response: ResponsesResult,
 ): AnthropicResponse["usage"] => {
-  const promptTokens = response.usage?.input_tokens ?? 0
-  const completionTokens = response.usage?.output_tokens ?? 0
+  const inputTokens = response.usage?.input_tokens ?? 0
+  const outputTokens = response.usage?.output_tokens ?? 0
+  const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens
 
   return {
-    input_tokens: promptTokens,
-    output_tokens: completionTokens,
+    input_tokens: inputTokens - (inputCachedTokens ?? 0),
+    output_tokens: outputTokens,
+    ...(response.usage?.input_tokens_details?.cached_tokens !== undefined && {
+      cache_read_input_tokens:
+        response.usage.input_tokens_details.cached_tokens,
+    }),
   }
 }
 

From 619d4828ad6dc54842f2b34fd7cd4d13a976fe3d Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 7 Oct 2025 20:57:05 +0800
Subject: [PATCH 10/20] feat: improve event log and enhance reasoning content
 handling by adding signature check and update prompt

---
 src/routes/messages/handler.ts               | 13 ++++++-----
 src/routes/messages/responses-translation.ts | 23 +++++++++++++++-----
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index a3faf045..cf50d8f7 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -138,28 +138,29 @@ const handleWithResponsesApi = async (
       const streamState = createResponsesStreamState()
 
       for await (const chunk of response) {
-        consola.debug("Responses raw stream event:", JSON.stringify(chunk))
-
-        const eventName = (chunk as { event?: string }).event
+        const eventName = chunk.event
         if (eventName === "ping") {
           await stream.writeSSE({ event: "ping", data: "" })
           continue
         }
 
-        const data = (chunk as { data?: string }).data
+        const data = chunk.data
         if (!data) {
           continue
         }
 
+        consola.debug("Responses raw stream event:", data)
+
         const events = translateResponsesStreamEvent(
           JSON.parse(data) as ResponseStreamEvent,
           streamState,
         )
         for (const event of events) {
-          consola.debug("Translated Anthropic event:", JSON.stringify(event))
+          const eventData = JSON.stringify(event)
+          consola.debug("Translated Anthropic event:", eventData)
           await stream.writeSSE({
             event: event.type,
-            data: JSON.stringify(event),
+            data: eventData,
           })
         }
       }
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index ceb5e301..9084fc78 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -142,7 +142,11 @@ const translateAssistantMessage = (
       continue
     }
 
-    if (block.type === "thinking") {
+    if (
+      block.type === "thinking"
+      && block.signature
+      && block.signature.includes("@")
+    ) {
       flushPendingContent("assistant", pendingContent, items)
       items.push(createReasoningContent(block))
       continue
@@ -233,16 +237,21 @@ const createImageContent = (
 const createReasoningContent = (
   block: AnthropicThinkingBlock,
 ): ResponseInputReasoning => {
-  // allign with vscode-copilot-chat extractThinkingData, otherwise it will cause miss cache occasionally —— the usage input cached tokens to be 0
+  // align with vscode-copilot-chat extractThinkingData, should add id, otherwise it will cause miss cache occasionally —— the usage input cached tokens to be 0
   // https://github.com/microsoft/vscode-copilot-chat/blob/main/src/platform/endpoint/node/responsesApi.ts#L162
   // when use in codex cli, reasoning id is empty, so it will cause miss cache occasionally
   const array = block.signature.split("@")
   const signature = array[0]
-  const id = array.length > 1 ? array[1] : undefined
+  const id = array[1]
   return {
     id,
     type: "reasoning",
-    summary: [],
+    summary: [
+      {
+        type: "summary_text",
+        text: block.thinking,
+      },
+    ],
     encrypted_content: signature,
   }
 }
@@ -284,9 +293,11 @@ When using the BashOutput tool, follow these rules:
 - Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
 ### TodoWrite tool
 When using the TodoWrite tool, follow these rules:
-- Skip using the TodoWrite tool for simple or straightforward tasks (roughly the easiest 25%).
+- Skip using the TodoWrite tool for tasks with three or fewer steps.
 - Do not make single-step todo lists.
-- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.`
+- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.
+## Special user requests
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as ''date''), you should do so.`
 
   if (typeof system === "string") {
     return system + toolUsePrompt

From 5c6e4c6fae09356d60652a70a03a8dad3c59ad64 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 8 Oct 2025 00:40:34 +0800
Subject: [PATCH 11/20] 1.fix claude code 2.0.28 warmup request consume premium
 request, forcing small model if no tools are used 2.add bun idleTimeout = 0
 3.feat: Compatible with Claude code JSONL file usage error scenarios, delay
 closeBlockIfOpen and map responses api to anthropic  support tool_use and fix
 spelling errors 4.feat: add configuration management with extra prompt
 handling and ensure config file creation

---
 src/lib/config.ts                             | 87 +++++++++++++++++++
 src/lib/paths.ts                              |  3 +
 src/routes/messages/handler.ts                |  6 ++
 .../messages/responses-stream-translation.ts  | 21 +++--
 src/routes/messages/responses-translation.ts  | 30 +++----
 src/routes/responses/handler.ts               | 30 ++-----
 src/start.ts                                  |  3 +
 tests/responses-stream-translation.test.ts    | 14 +--
 8 files changed, 130 insertions(+), 64 deletions(-)
 create mode 100644 src/lib/config.ts

diff --git a/src/lib/config.ts b/src/lib/config.ts
new file mode 100644
index 00000000..d39e4684
--- /dev/null
+++ b/src/lib/config.ts
@@ -0,0 +1,87 @@
+import consola from "consola"
+import fs from "node:fs"
+
+import { PATHS } from "./paths"
+
+export interface AppConfig {
+  extraPrompts?: Record<string, string>
+  smallModel?: string
+}
+
+const defaultConfig: AppConfig = {
+  extraPrompts: {
+    "gpt-5-codex": `
+## Tool use
+- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
+### Bash tool
+When using the Bash tool, follow these rules:
+- always run_in_background set to false, unless you are running a long-running command (e.g., a server or a watch command).
+### BashOutput tool
+When using the BashOutput tool, follow these rules:
+- Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
+### TodoWrite tool
+When using the TodoWrite tool, follow these rules:
+- Skip using the TodoWrite tool for tasks with three or fewer steps.
+- Do not make single-step todo lists.
+- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.
+## Special user requests
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as 'date'), you should do so.
+`,
+  },
+  smallModel: "gpt-5-mini",
+}
+
+let cachedConfig: AppConfig | null = null
+
+function ensureConfigFile(): void {
+  try {
+    fs.accessSync(PATHS.CONFIG_PATH, fs.constants.R_OK | fs.constants.W_OK)
+  } catch {
+    fs.writeFileSync(
+      PATHS.CONFIG_PATH,
+      `${JSON.stringify(defaultConfig, null, 2)}\n`,
+      "utf8",
+    )
+    try {
+      fs.chmodSync(PATHS.CONFIG_PATH, 0o600)
+    } catch {
+      return
+    }
+  }
+}
+
+function readConfigFromDisk(): AppConfig {
+  ensureConfigFile()
+  try {
+    const raw = fs.readFileSync(PATHS.CONFIG_PATH, "utf8")
+    if (!raw.trim()) {
+      fs.writeFileSync(
+        PATHS.CONFIG_PATH,
+        `${JSON.stringify(defaultConfig, null, 2)}\n`,
+        "utf8",
+      )
+      return defaultConfig
+    }
+    return JSON.parse(raw) as AppConfig
+  } catch (error) {
+    consola.error("Failed to read config file, using default config", error)
+    return defaultConfig
+  }
+}
+
+export function getConfig(): AppConfig {
+  if (!cachedConfig) {
+    cachedConfig = readConfigFromDisk()
+  }
+  return cachedConfig
+}
+
+export function getExtraPromptForModel(model: string): string {
+  const config = getConfig()
+  return config.extraPrompts?.[model] ?? ""
+}
+
+export function getSmallModel(): string {
+  const config = getConfig()
+  return config.smallModel ?? "gpt-5-mini"
+}
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index 8d0a9f02..e85c21d8 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -5,15 +5,18 @@ import path from "node:path"
 const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
 
 const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
+const CONFIG_PATH = path.join(APP_DIR, "config.json")
 
 export const PATHS = {
   APP_DIR,
   GITHUB_TOKEN_PATH,
+  CONFIG_PATH,
 }
 
 export async function ensurePaths(): Promise<void> {
   await fs.mkdir(PATHS.APP_DIR, { recursive: true })
   await ensureFile(PATHS.GITHUB_TOKEN_PATH)
+  await ensureFile(PATHS.CONFIG_PATH)
 }
 
 async function ensureFile(filePath: string): Promise<void> {
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index cf50d8f7..f6dd033d 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -4,6 +4,7 @@ import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { getSmallModel } from "~/lib/config"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import {
@@ -42,6 +43,11 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  // fix claude code 2.0.28 warmup request consume premium request, forcing small model if no tools are used
+  if (!anthropicPayload.tools || anthropicPayload.tools.length === 0) {
+    anthropicPayload.model = getSmallModel()
+  }
+
   const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
 
   if (state.manualApprove) {
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 56fd536e..de4b0c3a 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -176,8 +176,6 @@ const handleOutputItemDone = (
     state.blockHasDelta.add(blockIndex)
   }
 
-  closeBlockIfOpen(state, blockIndex, events)
-
   return events
 }
 
@@ -232,7 +230,6 @@ const handleFunctionCallArgumentsDone = (
     state.blockHasDelta.add(blockIndex)
   }
 
-  closeBlockIfOpen(state, blockIndex, events)
   state.functionCallStateByOutputIndex.delete(outputIndex)
   return events
 }
@@ -340,8 +337,6 @@ const handleOutputTextDone = (
     })
   }
 
-  closeBlockIfOpen(state, blockIndex, events)
-
   return events
 }
 
@@ -421,9 +416,7 @@ const messageStart = (
         usage: {
           input_tokens: inputTokens,
           output_tokens: 0,
-          ...(inputCachedTokens !== undefined && {
-            cache_creation_input_tokens: inputCachedTokens,
-          }),
+          cache_read_input_tokens: inputCachedTokens ?? 0,
         },
       },
     },
@@ -449,6 +442,7 @@ const openTextBlockIfNeeded = (
   }
 
   if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
     events.push({
       type: "content_block_start",
       index: blockIndex,
@@ -480,6 +474,7 @@ const openThinkingBlockIfNeeded = (
   }
 
   if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
     events.push({
       type: "content_block_start",
       index: blockIndex,
@@ -508,13 +503,20 @@ const closeBlockIfOpen = (
   state.blockHasDelta.delete(blockIndex)
 }
 
-const closeAllOpenBlocks = (
+const closeOpenBlocks = (
   state: ResponsesStreamState,
   events: Array<AnthropicStreamEventData>,
 ) => {
   for (const blockIndex of state.openBlocks) {
     closeBlockIfOpen(state, blockIndex, events)
   }
+}
+
+const closeAllOpenBlocks = (
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  closeOpenBlocks(state, events)
 
   state.functionCallStateByOutputIndex.clear()
 }
@@ -562,6 +564,7 @@ const openFunctionCallBlock = (
   const { blockIndex } = functionCallState
 
   if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
     events.push({
       type: "content_block_start",
       index: blockIndex,
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 9084fc78..d1593198 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -1,5 +1,6 @@
 import consola from "consola"
 
+import { getExtraPromptForModel } from "~/lib/config"
 import {
   type ResponsesPayload,
   type ResponseInputContent,
@@ -60,8 +61,8 @@ export const translateAnthropicMessagesToResponsesPayload = (
   const responsesPayload: ResponsesPayload = {
     model: payload.model,
     input,
-    instructions: translateSystemPrompt(payload.system),
-    temperature: payload.temperature ?? null,
+    instructions: translateSystemPrompt(payload.system, payload.model),
+    temperature: 1, // reasoning high temperature fixed to 1
     top_p: payload.top_p ?? null,
     max_output_tokens: payload.max_tokens,
     tools: translatedTools,
@@ -277,36 +278,22 @@ const createFunctionCallOutput = (
 
 const translateSystemPrompt = (
   system: string | Array<AnthropicTextBlock> | undefined,
+  model: string,
 ): string | null => {
   if (!system) {
     return null
   }
 
-  const toolUsePrompt = `
-## Tool use
-- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
-### Bash tool
-When using the Bash tool, follow these rules:
-- always run_in_background set to false, unless you are running a long-running command (e.g., a server or a watch command).
-### BashOutput tool
-When using the BashOutput tool, follow these rules:
-- Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
-### TodoWrite tool
-When using the TodoWrite tool, follow these rules:
-- Skip using the TodoWrite tool for tasks with three or fewer steps.
-- Do not make single-step todo lists.
-- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.
-## Special user requests
-- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as ''date''), you should do so.`
+  const extraPrompt = getExtraPromptForModel(model)
 
   if (typeof system === "string") {
-    return system + toolUsePrompt
+    return system + extraPrompt
   }
 
   const text = system
     .map((block, index) => {
       if (index === 0) {
-        return block.text + toolUsePrompt
+        return block.text + extraPrompt
       }
       return block.text
     })
@@ -548,6 +535,9 @@ const mapResponsesStopReason = (
   const { status, incomplete_details: incompleteDetails } = response
 
   if (status === "completed") {
+    if (response.output.some((item) => item.type === "function_call")) {
+      return "tool_use"
+    }
     return "end_turn"
   }
 
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index d06d02d6..ef7b38b9 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -52,29 +52,13 @@ export const handleResponses = async (c: Context) => {
   if (isStreamingRequested(payload) && isAsyncIterable(response)) {
     consola.debug("Forwarding native Responses stream")
     return streamSSE(c, async (stream) => {
-      const pingInterval = setInterval(async () => {
-        try {
-          await stream.writeSSE({
-            event: "ping",
-            data: JSON.stringify({ timestamp: Date.now() }),
-          })
-        } catch (error) {
-          consola.warn("Failed to send ping:", error)
-          clearInterval(pingInterval)
-        }
-      }, 3000)
-
-      try {
-        for await (const chunk of response) {
-          consola.debug("Responses stream chunk:", JSON.stringify(chunk))
-          await stream.writeSSE({
-            id: (chunk as { id?: string }).id,
-            event: (chunk as { event?: string }).event,
-            data: (chunk as { data?: string }).data ?? "",
-          })
-        }
-      } finally {
-        clearInterval(pingInterval)
+      for await (const chunk of response) {
+        consola.debug("Responses stream chunk:", JSON.stringify(chunk))
+        await stream.writeSSE({
+          id: (chunk as { id?: string }).id,
+          event: (chunk as { event?: string }).event,
+          data: (chunk as { data?: string }).data ?? "",
+        })
       }
     })
   }
diff --git a/src/start.ts b/src/start.ts
index 46798ad8..b832bddd 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -111,6 +111,9 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,
+    bun: {
+      idleTimeout: 0,
+    },
   })
 }
 
diff --git a/tests/responses-stream-translation.test.ts b/tests/responses-stream-translation.test.ts
index 039411cf..885ac911 100644
--- a/tests/responses-stream-translation.test.ts
+++ b/tests/responses-stream-translation.test.ts
@@ -96,12 +96,7 @@ describe("translateResponsesStreamEvent tool calls", () => {
       partial_json: "[]}",
     })
 
-    const blockStop = events.find(
-      (event) => event.type === "content_block_stop",
-    )
-    expect(blockStop).toBeDefined()
-
-    expect(state.openBlocks.size).toBe(0)
+    expect(state.openBlocks.size).toBe(1)
     expect(state.functionCallStateByOutputIndex.size).toBe(0)
   })
 
@@ -139,12 +134,7 @@ describe("translateResponsesStreamEvent tool calls", () => {
         '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
     })
 
-    const blockStop = events.find(
-      (event) => event.type === "content_block_stop",
-    )
-    expect(blockStop).toBeDefined()
-
-    expect(state.openBlocks.size).toBe(0)
+    expect(state.openBlocks.size).toBe(1)
     expect(state.functionCallStateByOutputIndex.size).toBe(0)
   })
 })

From 32cb10a1ce2deffdc4a2aa5b500339aa03d2528b Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 11:13:50 +0800
Subject: [PATCH 12/20] fix: the cluade code small model where max_tokens is
 only 512, which is incompatible with gpt-5-mini

---
 src/routes/messages/responses-translation.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index d1593198..5c9728cd 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -64,7 +64,7 @@ export const translateAnthropicMessagesToResponsesPayload = (
     instructions: translateSystemPrompt(payload.system, payload.model),
     temperature: 1, // reasoning high temperature fixed to 1
     top_p: payload.top_p ?? null,
-    max_output_tokens: payload.max_tokens,
+    max_output_tokens: Math.max(payload.max_tokens, 12800),
     tools: translatedTools,
     tool_choice: toolChoice,
     metadata: payload.metadata ? { ...payload.metadata } : null,

From 9051a213a2f3b9f862b88dd2992b1cfea492ee14 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 12:41:16 +0800
Subject: [PATCH 13/20] feat: add model reasoning efforts configuration and
 integrate into message translation

---
 src/lib/config.ts                            | 13 ++++++++++++-
 src/routes/messages/responses-translation.ts | 10 ++++++++--
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index d39e4684..19642e99 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -6,6 +6,7 @@ import { PATHS } from "./paths"
 export interface AppConfig {
   extraPrompts?: Record<string, string>
   smallModel?: string
+  modelReasoningEfforts?: Record<string, "minimal" | "low" | "medium" | "high">
 }
 
 const defaultConfig: AppConfig = {
@@ -29,6 +30,9 @@ When using the TodoWrite tool, follow these rules:
 `,
   },
   smallModel: "gpt-5-mini",
+  modelReasoningEfforts: {
+    "gpt-5-mini": "low",
+  },
 }
 
 let cachedConfig: AppConfig | null = null
@@ -71,7 +75,7 @@ function readConfigFromDisk(): AppConfig {
 
 export function getConfig(): AppConfig {
   if (!cachedConfig) {
-    cachedConfig = readConfigFromDisk()
+    cachedConfig ??= readConfigFromDisk()
   }
   return cachedConfig
 }
@@ -85,3 +89,10 @@ export function getSmallModel(): string {
   const config = getConfig()
   return config.smallModel ?? "gpt-5-mini"
 }
+
+export function getReasoningEffortForModel(
+  model: string,
+): "minimal" | "low" | "medium" | "high" {
+  const config = getConfig()
+  return config.modelReasoningEfforts?.[model] ?? "high"
+}
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 5c9728cd..c8e9460b 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -1,6 +1,9 @@
 import consola from "consola"
 
-import { getExtraPromptForModel } from "~/lib/config"
+import {
+  getExtraPromptForModel,
+  getReasoningEffortForModel,
+} from "~/lib/config"
 import {
   type ResponsesPayload,
   type ResponseInputContent,
@@ -73,7 +76,10 @@ export const translateAnthropicMessagesToResponsesPayload = (
     stream: payload.stream ?? null,
     store: false,
     parallel_tool_calls: true,
-    reasoning: { effort: "high", summary: "detailed" },
+    reasoning: {
+      effort: getReasoningEffortForModel(payload.model),
+      summary: "detailed",
+    },
     include: ["reasoning.encrypted_content"],
   }
 

From eeeb820d1ac8c6df3f9d892bbd4137d481c69784 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 12:56:30 +0800
Subject: [PATCH 14/20] fix: ensure application directory is created when
 config file is missing

---
 src/lib/config.ts | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index 19642e99..a13b3fe3 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -41,6 +41,7 @@ function ensureConfigFile(): void {
   try {
     fs.accessSync(PATHS.CONFIG_PATH, fs.constants.R_OK | fs.constants.W_OK)
   } catch {
+    fs.mkdirSync(PATHS.APP_DIR, { recursive: true })
     fs.writeFileSync(
       PATHS.CONFIG_PATH,
       `${JSON.stringify(defaultConfig, null, 2)}\n`,
@@ -74,9 +75,7 @@ function readConfigFromDisk(): AppConfig {
 }
 
 export function getConfig(): AppConfig {
-  if (!cachedConfig) {
-    cachedConfig ??= readConfigFromDisk()
-  }
+  cachedConfig ??= readConfigFromDisk()
   return cachedConfig
 }
 

From 3f69f131860abad11c30c8bd1d8094d5a69c7c92 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 29 Oct 2025 15:19:10 +0800
Subject: [PATCH 15/20] feat: consola file logger for handler.ts

---
 src/lib/logger.ts                      | 179 +++++++++++++++++++++++++
 src/routes/chat-completions/handler.ts |  20 +--
 src/routes/messages/handler.ts         |  32 ++---
 src/routes/responses/handler.ts        |  12 +-
 4 files changed, 214 insertions(+), 29 deletions(-)
 create mode 100644 src/lib/logger.ts

diff --git a/src/lib/logger.ts b/src/lib/logger.ts
new file mode 100644
index 00000000..466b4cab
--- /dev/null
+++ b/src/lib/logger.ts
@@ -0,0 +1,179 @@
+import consola, { type ConsolaInstance } from "consola"
+import fs from "node:fs"
+import path from "node:path"
+import util from "node:util"
+
+import { PATHS } from "./paths"
+
+const LOG_RETENTION_DAYS = 7
+const LOG_RETENTION_MS = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1000
+const CLEANUP_INTERVAL_MS = 24 * 60 * 60 * 1000
+const LOG_DIR = path.join(PATHS.APP_DIR, "logs")
+const FLUSH_INTERVAL_MS = 1000
+const MAX_BUFFER_SIZE = 100
+
+const logStreams = new Map<string, fs.WriteStream>()
+const logBuffers = new Map<string, Array<string>>()
+
+const ensureLogDirectory = () => {
+  if (!fs.existsSync(LOG_DIR)) {
+    fs.mkdirSync(LOG_DIR, { recursive: true })
+  }
+}
+
+const cleanupOldLogs = () => {
+  if (!fs.existsSync(LOG_DIR)) {
+    return
+  }
+
+  const now = Date.now()
+
+  for (const entry of fs.readdirSync(LOG_DIR)) {
+    const filePath = path.join(LOG_DIR, entry)
+
+    let stats: fs.Stats
+    try {
+      stats = fs.statSync(filePath)
+    } catch {
+      continue
+    }
+
+    if (!stats.isFile()) {
+      continue
+    }
+
+    if (now - stats.mtimeMs > LOG_RETENTION_MS) {
+      try {
+        fs.rmSync(filePath)
+      } catch {
+        continue
+      }
+    }
+  }
+}
+
+const formatArgs = (args: Array<unknown>) =>
+  args
+    .map((arg) =>
+      typeof arg === "string" ? arg : (
+        util.inspect(arg, { depth: null, colors: false })
+      ),
+    )
+    .join(" ")
+
+const sanitizeName = (name: string) => {
+  const normalized = name
+    .toLowerCase()
+    .replaceAll(/[^a-z0-9]+/g, "-")
+    .replaceAll(/^-+|-+$/g, "")
+
+  return normalized === "" ? "handler" : normalized
+}
+
+const getLogStream = (filePath: string): fs.WriteStream => {
+  let stream = logStreams.get(filePath)
+  if (!stream || stream.destroyed) {
+    stream = fs.createWriteStream(filePath, { flags: "a" })
+    logStreams.set(filePath, stream)
+
+    stream.on("error", (error: unknown) => {
+      console.warn("Log stream error", error)
+      logStreams.delete(filePath)
+    })
+  }
+  return stream
+}
+
+const flushBuffer = (filePath: string) => {
+  const buffer = logBuffers.get(filePath)
+  if (!buffer || buffer.length === 0) {
+    return
+  }
+
+  const stream = getLogStream(filePath)
+  const content = buffer.join("\n") + "\n"
+  stream.write(content, (error) => {
+    if (error) {
+      console.warn("Failed to write handler log", error)
+    }
+  })
+
+  logBuffers.set(filePath, [])
+}
+
+const flushAllBuffers = () => {
+  for (const filePath of logBuffers.keys()) {
+    flushBuffer(filePath)
+  }
+}
+
+const appendLine = (filePath: string, line: string) => {
+  let buffer = logBuffers.get(filePath)
+  if (!buffer) {
+    buffer = []
+    logBuffers.set(filePath, buffer)
+  }
+
+  buffer.push(line)
+
+  if (buffer.length >= MAX_BUFFER_SIZE) {
+    flushBuffer(filePath)
+  }
+}
+
+setInterval(flushAllBuffers, FLUSH_INTERVAL_MS)
+
+const cleanup = () => {
+  flushAllBuffers()
+  for (const stream of logStreams.values()) {
+    stream.end()
+  }
+  logStreams.clear()
+  logBuffers.clear()
+}
+
+process.on("exit", cleanup)
+process.on("SIGINT", () => {
+  cleanup()
+  process.exit(0)
+})
+process.on("SIGTERM", () => {
+  cleanup()
+  process.exit(0)
+})
+
+let lastCleanup = 0
+
+export const createHandlerLogger = (name: string): ConsolaInstance => {
+  ensureLogDirectory()
+
+  const sanitizedName = sanitizeName(name)
+  const instance = consola.withTag(name)
+
+  instance.level = 5
+  instance.setReporters([])
+
+  instance.addReporter({
+    log(logObj) {
+      ensureLogDirectory()
+
+      if (Date.now() - lastCleanup > CLEANUP_INTERVAL_MS) {
+        cleanupOldLogs()
+        lastCleanup = Date.now()
+      }
+
+      const date = logObj.date
+      const dateKey = date.toLocaleDateString("sv-SE")
+      const timestamp = date.toLocaleString("sv-SE", { hour12: false })
+      const filePath = path.join(LOG_DIR, `${sanitizedName}-${dateKey}.log`)
+      const message = formatArgs(logObj.args as Array<unknown>)
+      const line = `[${timestamp}] [${logObj.type}] [${logObj.tag || name}]${
+        message ? ` ${message}` : ""
+      }`
+
+      appendLine(filePath, line)
+    },
+  })
+
+  return instance
+}
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9e..3a037a52 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -1,9 +1,9 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
@@ -14,11 +14,13 @@ import {
   type ChatCompletionsPayload,
 } from "~/services/copilot/create-chat-completions"
 
+const logger = createHandlerLogger("chat-completions-handler")
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   let payload = await c.req.json<ChatCompletionsPayload>()
-  consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
+  logger.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
   // Find the selected model
   const selectedModel = state.models?.data.find(
@@ -29,12 +31,12 @@ export async function handleCompletion(c: Context) {
   try {
     if (selectedModel) {
       const tokenCount = await getTokenCount(payload, selectedModel)
-      consola.info("Current token count:", tokenCount)
+      logger.info("Current token count:", tokenCount)
     } else {
-      consola.warn("No model selected, skipping token count calculation")
+      logger.warn("No model selected, skipping token count calculation")
     }
   } catch (error) {
-    consola.warn("Failed to calculate token count:", error)
+    logger.warn("Failed to calculate token count:", error)
   }
 
   if (state.manualApprove) await awaitApproval()
@@ -44,20 +46,20 @@ export async function handleCompletion(c: Context) {
       ...payload,
       max_tokens: selectedModel?.capabilities.limits.max_output_tokens,
     }
-    consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
+    logger.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
   const response = await createChatCompletions(payload)
 
   if (isNonStreaming(response)) {
-    consola.debug("Non-streaming response:", JSON.stringify(response))
+    logger.debug("Non-streaming response:", JSON.stringify(response))
     return c.json(response)
   }
 
-  consola.debug("Streaming response")
+  logger.debug("Streaming response")
   return streamSSE(c, async (stream) => {
     for await (const chunk of response) {
-      consola.debug("Streaming chunk:", JSON.stringify(chunk))
+      logger.debug("Streaming chunk:", JSON.stringify(chunk))
       await stream.writeSSE(chunk as SSEMessage)
     }
   })
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index f6dd033d..7fcccd83 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -1,10 +1,10 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
 import { getSmallModel } from "~/lib/config"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import {
@@ -37,11 +37,13 @@ import {
 } from "./non-stream-translation"
 import { translateChunkToAnthropicEvents } from "./stream-translation"
 
+const logger = createHandlerLogger("messages-handler")
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
-  consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
+  logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
   // fix claude code 2.0.28 warmup request consume premium request, forcing small model if no tools are used
   if (!anthropicPayload.tools || anthropicPayload.tools.length === 0) {
@@ -68,7 +70,7 @@ const handleWithChatCompletions = async (
   anthropicPayload: AnthropicMessagesPayload,
 ) => {
   const openAIPayload = translateToOpenAI(anthropicPayload)
-  consola.debug(
+  logger.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
   )
@@ -76,19 +78,19 @@ const handleWithChatCompletions = async (
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
-    consola.debug(
+    logger.debug(
       "Non-streaming response from Copilot:",
       JSON.stringify(response).slice(-400),
     )
     const anthropicResponse = translateToAnthropic(response)
-    consola.debug(
+    logger.debug(
       "Translated Anthropic response:",
       JSON.stringify(anthropicResponse),
     )
     return c.json(anthropicResponse)
   }
 
-  consola.debug("Streaming response from Copilot")
+  logger.debug("Streaming response from Copilot")
   return streamSSE(c, async (stream) => {
     const streamState: AnthropicStreamState = {
       messageStartSent: false,
@@ -98,7 +100,7 @@ const handleWithChatCompletions = async (
     }
 
     for await (const rawEvent of response) {
-      consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
+      logger.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
       if (rawEvent.data === "[DONE]") {
         break
       }
@@ -111,7 +113,7 @@ const handleWithChatCompletions = async (
       const events = translateChunkToAnthropicEvents(chunk, streamState)
 
       for (const event of events) {
-        consola.debug("Translated Anthropic event:", JSON.stringify(event))
+        logger.debug("Translated Anthropic event:", JSON.stringify(event))
         await stream.writeSSE({
           event: event.type,
           data: JSON.stringify(event),
@@ -127,7 +129,7 @@ const handleWithResponsesApi = async (
 ) => {
   const responsesPayload =
     translateAnthropicMessagesToResponsesPayload(anthropicPayload)
-  consola.debug(
+  logger.debug(
     "Translated Responses payload:",
     JSON.stringify(responsesPayload),
   )
@@ -139,7 +141,7 @@ const handleWithResponsesApi = async (
   })
 
   if (responsesPayload.stream && isAsyncIterable(response)) {
-    consola.debug("Streaming response from Copilot (Responses API)")
+    logger.debug("Streaming response from Copilot (Responses API)")
     return streamSSE(c, async (stream) => {
       const streamState = createResponsesStreamState()
 
@@ -155,7 +157,7 @@ const handleWithResponsesApi = async (
           continue
         }
 
-        consola.debug("Responses raw stream event:", data)
+        logger.debug("Responses raw stream event:", data)
 
         const events = translateResponsesStreamEvent(
           JSON.parse(data) as ResponseStreamEvent,
@@ -163,7 +165,7 @@ const handleWithResponsesApi = async (
         )
         for (const event of events) {
           const eventData = JSON.stringify(event)
-          consola.debug("Translated Anthropic event:", eventData)
+          logger.debug("Translated Anthropic event:", eventData)
           await stream.writeSSE({
             event: event.type,
             data: eventData,
@@ -172,7 +174,7 @@ const handleWithResponsesApi = async (
       }
 
       if (!streamState.messageCompleted) {
-        consola.warn(
+        logger.warn(
           "Responses stream ended without completion; sending fallback message_stop",
         )
         const fallback = { type: "message_stop" as const }
@@ -184,14 +186,14 @@ const handleWithResponsesApi = async (
     })
   }
 
-  consola.debug(
+  logger.debug(
     "Non-streaming Responses result:",
     JSON.stringify(response).slice(-400),
   )
   const anthropicResponse = translateResponsesResultToAnthropic(
     response as ResponsesResult,
   )
-  consola.debug(
+  logger.debug(
     "Translated Anthropic response:",
     JSON.stringify(anthropicResponse),
   )
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index ef7b38b9..574d61fc 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -1,9 +1,9 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import {
@@ -14,13 +14,15 @@ import {
 
 import { getResponsesRequestOptions } from "./utils"
 
+const logger = createHandlerLogger("responses-handler")
+
 const RESPONSES_ENDPOINT = "/responses"
 
 export const handleResponses = async (c: Context) => {
   await checkRateLimit(state)
 
   const payload = await c.req.json<ResponsesPayload>()
-  consola.debug("Responses request payload:", JSON.stringify(payload))
+  logger.debug("Responses request payload:", JSON.stringify(payload))
 
   const selectedModel = state.models?.data.find(
     (model) => model.id === payload.model,
@@ -50,10 +52,10 @@ export const handleResponses = async (c: Context) => {
   const response = await createResponses(payload, { vision, initiator })
 
   if (isStreamingRequested(payload) && isAsyncIterable(response)) {
-    consola.debug("Forwarding native Responses stream")
+    logger.debug("Forwarding native Responses stream")
     return streamSSE(c, async (stream) => {
       for await (const chunk of response) {
-        consola.debug("Responses stream chunk:", JSON.stringify(chunk))
+        logger.debug("Responses stream chunk:", JSON.stringify(chunk))
         await stream.writeSSE({
           id: (chunk as { id?: string }).id,
           event: (chunk as { event?: string }).event,
@@ -63,7 +65,7 @@ export const handleResponses = async (c: Context) => {
     })
   }
 
-  consola.debug(
+  logger.debug(
     "Forwarding native Responses result:",
     JSON.stringify(response).slice(-400),
   )

From 4c0d775e1dc6b8648c7ad5f21fb783fc3246facf Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Thu, 30 Oct 2025 10:42:32 +0800
Subject: [PATCH 16/20] fix: copolit function call returning infinite line
 breaks until max_tokens limit

---
 src/routes/messages/handler.ts                | 16 +++-
 .../messages/responses-stream-translation.ts  | 89 ++++++++++++++++++-
 2 files changed, 97 insertions(+), 8 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 7fcccd83..19ba0168 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -8,6 +8,7 @@ import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import {
+  buildErrorEvent,
   createResponsesStreamState,
   translateResponsesStreamEvent,
 } from "~/routes/messages/responses-stream-translation"
@@ -171,16 +172,23 @@ const handleWithResponsesApi = async (
             data: eventData,
           })
         }
+
+        if (streamState.messageCompleted) {
+          logger.debug("Message completed, ending stream")
+          break
+        }
       }
 
       if (!streamState.messageCompleted) {
         logger.warn(
-          "Responses stream ended without completion; sending fallback message_stop",
+          "Responses stream ended without completion; sending erorr event",
+        )
+        const errorEvent = buildErrorEvent(
+          "Responses stream ended without completion",
         )
-        const fallback = { type: "message_stop" as const }
         await stream.writeSSE({
-          event: fallback.type,
-          data: JSON.stringify(fallback),
+          event: errorEvent.type,
+          data: JSON.stringify(errorEvent),
         })
       }
     })
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index de4b0c3a..524b2e17 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -1,5 +1,3 @@
-import consola from "consola"
-
 import {
   type ResponseCompletedEvent,
   type ResponseCreatedEvent,
@@ -21,6 +19,39 @@ import {
 import { type AnthropicStreamEventData } from "./anthropic-types"
 import { translateResponsesResultToAnthropic } from "./responses-translation"
 
+const MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE = 20
+
+class FunctionCallArgumentsValidationError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = "FunctionCallArgumentsValidationError"
+  }
+}
+
+const updateWhitespaceRunState = (
+  previousCount: number,
+  chunk: string,
+): {
+  nextCount: number
+  exceeded: boolean
+} => {
+  let count = previousCount
+
+  for (const char of chunk) {
+    if (char === " " || char === "\r" || char === "\n") {
+      count += 1
+      if (count > MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE) {
+        return { nextCount: count, exceeded: true }
+      }
+      continue
+    }
+
+    count = 0
+  }
+
+  return { nextCount: count, exceeded: false }
+}
+
 export interface ResponsesStreamState {
   messageStartSent: boolean
   messageCompleted: boolean
@@ -35,6 +66,7 @@ type FunctionCallStreamState = {
   blockIndex: number
   toolCallId: string
   name: string
+  consecutiveWhitespaceCount: number
 }
 
 export const createResponsesStreamState = (): ResponsesStreamState => ({
@@ -102,7 +134,6 @@ export const translateResponsesStreamEvent = (
     }
 
     default: {
-      consola.debug("Unknown Responses stream event type:", eventType)
       return []
     }
   }
@@ -186,11 +217,45 @@ const handleFunctionCallArgumentsDelta = (
   const events = new Array<AnthropicStreamEventData>()
   const outputIndex = rawEvent.output_index
   const deltaText = rawEvent.delta
+
+  if (!deltaText) {
+    return events
+  }
+
   const blockIndex = openFunctionCallBlock(state, {
     outputIndex,
     events,
   })
 
+  const functionCallState =
+    state.functionCallStateByOutputIndex.get(outputIndex)
+  if (!functionCallState) {
+    return handleFunctionCallArgumentsValidationError(
+      new FunctionCallArgumentsValidationError(
+        "Received function call arguments delta without an open tool call block.",
+      ),
+      state,
+      events,
+    )
+  }
+
+  // fix: copolit function call returning infinite line breaks until max_tokens limit
+  // "arguments": "{\"path\":\"xxx\",\"pattern\":\"**/*.ts\",\"} }? Wait extra braces. Need correct. I should run? Wait overcame. Need proper JSON with pattern \"\n\n\n\n\n\n\n\n...
+  const { nextCount, exceeded } = updateWhitespaceRunState(
+    functionCallState.consecutiveWhitespaceCount,
+    deltaText,
+  )
+  if (exceeded) {
+    return handleFunctionCallArgumentsValidationError(
+      new FunctionCallArgumentsValidationError(
+        "Received function call arguments delta containing more than 20 consecutive whitespace characters.",
+      ),
+      state,
+      events,
+    )
+  }
+  functionCallState.consecutiveWhitespaceCount = nextCount
+
   events.push({
     type: "content_block_delta",
     index: blockIndex,
@@ -394,6 +459,21 @@ const handleErrorEvent = (
   return [buildErrorEvent(message)]
 }
 
+const handleFunctionCallArgumentsValidationError = (
+  error: FunctionCallArgumentsValidationError,
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData> = [],
+): Array<AnthropicStreamEventData> => {
+  const reason = error.message
+
+  closeAllOpenBlocks(state, events)
+  state.messageCompleted = true
+
+  events.push(buildErrorEvent(reason))
+
+  return events
+}
+
 const messageStart = (
   state: ResponsesStreamState,
   response: ResponsesResult,
@@ -521,7 +601,7 @@ const closeAllOpenBlocks = (
   state.functionCallStateByOutputIndex.clear()
 }
 
-const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
+export const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
   type: "error",
   error: {
     type: "api_error",
@@ -556,6 +636,7 @@ const openFunctionCallBlock = (
       blockIndex,
       toolCallId: resolvedToolCallId,
       name: resolvedName,
+      consecutiveWhitespaceCount: 0,
     }
 
     state.functionCallStateByOutputIndex.set(outputIndex, functionCallState)

From 1ec12db6805443f4e6d55cb3027ca19e4c39c1be Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 13:25:41 +0800
Subject: [PATCH 17/20] feat: add verbose logging configuration to enhance log
 detail level

---
 src/lib/logger.ts | 5 ++++-
 src/lib/state.ts  | 1 +
 src/start.ts      | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/lib/logger.ts b/src/lib/logger.ts
index 466b4cab..93a3b01f 100644
--- a/src/lib/logger.ts
+++ b/src/lib/logger.ts
@@ -4,6 +4,7 @@ import path from "node:path"
 import util from "node:util"
 
 import { PATHS } from "./paths"
+import { state } from "./state"
 
 const LOG_RETENTION_DAYS = 7
 const LOG_RETENTION_MS = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1000
@@ -150,7 +151,9 @@ export const createHandlerLogger = (name: string): ConsolaInstance => {
   const sanitizedName = sanitizeName(name)
   const instance = consola.withTag(name)
 
-  instance.level = 5
+  if (state.verbose) {
+    instance.level = 5
+  }
   instance.setReporters([])
 
   instance.addReporter({
diff --git a/src/lib/state.ts b/src/lib/state.ts
index 5ba4dc1d..7c20f3c3 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -15,6 +15,7 @@ export interface State {
   // Rate limiting configuration
   rateLimitSeconds?: number
   lastRequestTimestamp?: number
+  verbose?: boolean
 }
 
 export const state: State = {
diff --git a/src/start.ts b/src/start.ts
index b832bddd..9cef69f2 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -27,6 +27,7 @@ interface RunServerOptions {
 
 export async function runServer(options: RunServerOptions): Promise<void> {
   if (options.verbose) {
+    state.verbose = true
     consola.level = 5
     consola.info("Verbose logging enabled")
   }

From 174e868e2a3803da450a13438e5f42a058fc6bd6 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 13:44:59 +0800
Subject: [PATCH 18/20] fix: update verbose property to be required in State
 interface and adjust runServer to set verbose level correctly

---
 src/lib/state.ts | 3 ++-
 src/start.ts     | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/lib/state.ts b/src/lib/state.ts
index 7c20f3c3..5d5bc2bb 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -15,7 +15,7 @@ export interface State {
   // Rate limiting configuration
   rateLimitSeconds?: number
   lastRequestTimestamp?: number
-  verbose?: boolean
+  verbose: boolean
 }
 
 export const state: State = {
@@ -23,4 +23,5 @@ export const state: State = {
   manualApprove: false,
   rateLimitWait: false,
   showToken: false,
+  verbose: false,
 }
diff --git a/src/start.ts b/src/start.ts
index 9cef69f2..bcd16e35 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -11,7 +11,6 @@ import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
 import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
-import { server } from "./server"
 
 interface RunServerOptions {
   port: number
@@ -26,8 +25,8 @@ interface RunServerOptions {
 }
 
 export async function runServer(options: RunServerOptions): Promise<void> {
+  state.verbose = options.verbose
   if (options.verbose) {
-    state.verbose = true
     consola.level = 5
     consola.info("Verbose logging enabled")
   }
@@ -109,6 +108,8 @@ export async function runServer(options: RunServerOptions): Promise<void> {
     `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage`,
   )
 
+  const { server } = await import("./server")
+
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,

From 6f479267e70a8b6fcad1691b23f7e61a51107813 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Thu, 6 Nov 2025 17:05:36 +0800
Subject: [PATCH 19/20] fix: correct typo in warning message and refine
 whitespace handling logic

---
 src/routes/messages/handler.ts                      | 2 +-
 src/routes/messages/responses-stream-translation.ts | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 19ba0168..54f4a062 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -181,7 +181,7 @@ const handleWithResponsesApi = async (
 
       if (!streamState.messageCompleted) {
         logger.warn(
-          "Responses stream ended without completion; sending erorr event",
+          "Responses stream ended without completion; sending error event",
         )
         const errorEvent = buildErrorEvent(
           "Responses stream ended without completion",
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 524b2e17..5a0338b9 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -38,7 +38,7 @@ const updateWhitespaceRunState = (
   let count = previousCount
 
   for (const char of chunk) {
-    if (char === " " || char === "\r" || char === "\n") {
+    if (char === "\r" || char === "\n") {
       count += 1
       if (count > MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE) {
         return { nextCount: count, exceeded: true }
@@ -46,7 +46,9 @@ const updateWhitespaceRunState = (
       continue
     }
 
-    count = 0
+    if (char !== " ") {
+      count = 0
+    }
   }
 
   return { nextCount: count, exceeded: false }

From 01d4adb07158de9bf722e8ea4f5080f978c830f6 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Mon, 10 Nov 2025 23:26:33 +0800
Subject: [PATCH 20/20] fix: update token counting logic for GPT and Claude and
 Grok models, adjusting input token calculations and handling tool prompts

---
 src/lib/tokenizer.ts                        | 65 ++++++++++++++++-----
 src/routes/messages/count-tokens-handler.ts | 15 ++---
 2 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts
index 8c3eda73..e9b83ac5 100644
--- a/src/lib/tokenizer.ts
+++ b/src/lib/tokenizer.ts
@@ -37,7 +37,9 @@ const calculateToolCallsTokens = (
   let tokens = 0
   for (const toolCall of toolCalls) {
     tokens += constants.funcInit
-    tokens += encoder.encode(JSON.stringify(toolCall)).length
+    tokens += encoder.encode(toolCall.id).length
+    tokens += encoder.encode(toolCall.function.name).length
+    tokens += encoder.encode(toolCall.function.arguments).length
   }
   tokens += constants.funcEnd
   return tokens
@@ -158,6 +160,7 @@ const getModelConstants = (model: Model) => {
         enumInit: -3,
         enumItem: 3,
         funcEnd: 12,
+        isGpt: true,
       }
     : {
         funcInit: 7,
@@ -166,6 +169,7 @@ const getModelConstants = (model: Model) => {
         enumInit: -3,
         enumItem: 3,
         funcEnd: 12,
+        isGpt: model.id.startsWith("gpt-"),
       }
 }
 
@@ -218,8 +222,12 @@ const calculateParameterTokens = (
   const line = `${paramName}:${paramType}:${paramDesc}`
   tokens += encoder.encode(line).length
 
+  if (param.type === "array" && param["items"]) {
+    tokens += calculateParametersTokens(param["items"], encoder, constants)
+  }
+
   // Handle additional properties (excluding standard ones)
-  const excludedKeys = new Set(["type", "description", "enum"])
+  const excludedKeys = new Set(["type", "description", "enum", "items"])
   for (const propertyName of Object.keys(param)) {
     if (!excludedKeys.has(propertyName)) {
       const propertyValue = param[propertyName]
@@ -234,6 +242,27 @@ const calculateParameterTokens = (
   return tokens
 }
 
+/**
+ * Calculate tokens for properties object
+ */
+const calculatePropertiesTokens = (
+  properties: Record<string, unknown>,
+  encoder: Encoder,
+  constants: ReturnType<typeof getModelConstants>,
+): number => {
+  let tokens = 0
+  if (Object.keys(properties).length > 0) {
+    tokens += constants.propInit
+    for (const propKey of Object.keys(properties)) {
+      tokens += calculateParameterTokens(propKey, properties[propKey], {
+        encoder,
+        constants,
+      })
+    }
+  }
+  return tokens
+}
+
 /**
  * Calculate tokens for function parameters
  */
@@ -249,18 +278,17 @@ const calculateParametersTokens = (
   const params = parameters as Record<string, unknown>
   let tokens = 0
 
+  const excludedKeys = new Set(["$schema", "additionalProperties"])
   for (const [key, value] of Object.entries(params)) {
+    if (excludedKeys.has(key)) {
+      continue
+    }
     if (key === "properties") {
-      const properties = value as Record<string, unknown>
-      if (Object.keys(properties).length > 0) {
-        tokens += constants.propInit
-        for (const propKey of Object.keys(properties)) {
-          tokens += calculateParameterTokens(propKey, properties[propKey], {
-            encoder,
-            constants,
-          })
-        }
-      }
+      tokens += calculatePropertiesTokens(
+        value as Record<string, unknown>,
+        encoder,
+        constants,
+      )
     } else {
       const paramText =
         typeof value === "string" ? value : JSON.stringify(value)
@@ -306,10 +334,16 @@ export const numTokensForTools = (
   constants: ReturnType<typeof getModelConstants>,
 ): number => {
   let funcTokenCount = 0
-  for (const tool of tools) {
-    funcTokenCount += calculateToolTokens(tool, encoder, constants)
+  if (constants.isGpt) {
+    for (const tool of tools) {
+      funcTokenCount += calculateToolTokens(tool, encoder, constants)
+    }
+    funcTokenCount += constants.funcEnd
+  } else {
+    for (const tool of tools) {
+      funcTokenCount += encoder.encode(JSON.stringify(tool)).length
+    }
   }
-  funcTokenCount += constants.funcEnd
   return funcTokenCount
 }
 
@@ -335,6 +369,7 @@ export const getTokenCount = async (
   )
 
   const constants = getModelConstants(model)
+  // gpt count token https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
   let inputTokens = calculateTokens(inputMessages, encoder, constants)
   if (payload.tools && payload.tools.length > 0) {
     inputTokens += numTokensForTools(payload.tools, encoder, constants)
diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
index 2ec849cb..70bd6a53 100644
--- a/src/routes/messages/count-tokens-handler.ts
+++ b/src/routes/messages/count-tokens-handler.ts
@@ -33,18 +33,21 @@ export async function handleCountTokens(c: Context) {
     const tokenCount = await getTokenCount(openAIPayload, selectedModel)
 
     if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
-      let mcpToolExist = false
+      let addToolSystemPromptCount = false
       if (anthropicBeta?.startsWith("claude-code")) {
-        mcpToolExist = anthropicPayload.tools.some((tool) =>
-          tool.name.startsWith("mcp__"),
+        const toolsLength = anthropicPayload.tools.length
+        addToolSystemPromptCount = !anthropicPayload.tools.some(
+          (tool) =>
+            tool.name.startsWith("mcp__")
+            || (tool.name === "Skill" && toolsLength === 1),
         )
       }
-      if (!mcpToolExist) {
+      if (addToolSystemPromptCount) {
         if (anthropicPayload.model.startsWith("claude")) {
           // https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview#pricing
           tokenCount.input = tokenCount.input + 346
         } else if (anthropicPayload.model.startsWith("grok")) {
-          tokenCount.input = tokenCount.input + 480
+          tokenCount.input = tokenCount.input + 120
         }
       }
     }
@@ -52,8 +55,6 @@ export async function handleCountTokens(c: Context) {
     let finalTokenCount = tokenCount.input + tokenCount.output
     if (anthropicPayload.model.startsWith("claude")) {
       finalTokenCount = Math.round(finalTokenCount * 1.15)
-    } else if (anthropicPayload.model.startsWith("grok")) {
-      finalTokenCount = Math.round(finalTokenCount * 1.03)
     }
 
     consola.info("Token count:", finalTokenCount)