diff --git a/README.md b/README.md index f4d42e6..05e64fd 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,10 @@ A CLI tool to create the scaffolding for a new Kernel applications. This tool h - Sample App: A basic template that extracts page titles using Playwright - Browser Use: A template implementing the Browser Use SDK - Stagehand: A template implementing the Stagehand SDK + - Advanced Sample: Implements sample apps using advanced Kernel configs + - Computer Use: Implements a prompt loop using Anthropic Computer Use (Python only) + - Anthropic Computer Use: Implements a prompt loop using Anthropic Computer Use (Typescript only) + - CUA: Implements a Computer Use Agent (OpenAI CUA) sample - ⚡️ Automatic dependency setup - 🫶 Interactive CLI @@ -46,7 +50,8 @@ create-kernel-app [app-name] [options] - `browser-use`: Template with Browser Use SDK (Python only) - `stagehand`: Template with Stagehand SDK (Typescript only) - `advanced-sample`: Implements sample apps using advanced Kernel configs - - `computer-use`: Implements a prompt loop using Anthropic Computer Use + - `computer-use`: Implements a prompt loop using Anthropic Computer Use (Python only) + - `anthropic-computer-use`: Implements a prompt loop using Anthropic Computer Use (Typescript only) - `cua`: Implements a Computer Use Agent (OpenAI CUA) sample ### Examples @@ -61,9 +66,9 @@ Create a Typescript application with Stagehand template: npx @onkernel/create-kernel-app my-app --language typescript --template stagehand ``` -Create a Typescript application with Computer Use template: +Create a Typescript application with Anthropic Computer Use template: ```bash -npx @onkernel/create-kernel-app my-app --language typescript --template computer-use +npx @onkernel/create-kernel-app my-app --language typescript --template anthropic-computer-use ``` Create a Python application with a sample app: @@ -75,6 +80,10 @@ Create a Python application with Browser Use template: ```bash npx @onkernel/create-kernel-app my-app --language python --template browser-use ``` + +Create a Python application with Computer Use template: +```bash +npx @onkernel/create-kernel-app my-app --language python --template computer-use ``` ## Next Steps @@ -101,7 +110,7 @@ export KERNEL_API_KEY= kernel deploy index.ts # --env OPENAI_API_KEY=XXX if Stagehand; --env ANTHROPIC_API_KEY=XXX if Computer Use # Python -kernel deploy main.py # --env OPENAI_API_KEY=XXX if Browser Use +kernel deploy main.py # --env OPENAI_API_KEY=XXX if Browser Use or CUA; --env ANTHROPIC_API_KEY=XXX if Computer Use ``` If deploying an app that requires environment variables, make sure to [set them](https://docs.onkernel.com/launch/deploy#environment-variables) when you `deploy`. @@ -114,8 +123,8 @@ kernel invoke ts-basic get-page-title --payload '{"url": "https://www.google.com # Typescript + Stagehand kernel invoke ts-stagehand stagehand-task --payload '{"query": "Best wired earbuds"}' -# Typescript + Computer Use -kernel invoke ts-cu cu-task --payload '{"query": "Search for the top 3 restaurants in NYC according to Pete Wells"}' +# Typescript + Anthropic Computer Use +kernel invoke ts-anthropic-cu computer-use-task --payload '{"query": "Search for the top 3 restaurants in NYC according to Pete Wells"}' # Python + Sample App kernel invoke python-basic get-page-title --payload '{"url": "https://www.google.com"}' @@ -140,7 +149,8 @@ These are the sample apps currently available when you run `npx @onkernel/create | **browser-use** | Completes a specified task | Browser Use | `{ task }` | | **stagehand** | Returns the first result of a specified Google search | Stagehand | `{ query }` | | **advanced-sample** | Implements sample apps using advanced Kernel configs | n/a | -| **computer-use** | Implements a prompt loop | Anthropic Computer Use API | `{ query }` | +| **computer-use** | Implements a prompt loop | Anthropic Computer Use API (Python only) | `{ query }` | +| **anthropic-computer-use** | Implements a prompt loop | Anthropic Computer Use API (Typescript only) | `{ query }` | | **cua** | Implements the OpenAI Computer Using Agent (CUA) | OpenAI CUA | `{ task }` | ## Documentation diff --git a/index.ts b/index.ts index 1eaed85..3a44893 100644 --- a/index.ts +++ b/index.ts @@ -19,6 +19,7 @@ type TemplateKey = | "stagehand" | "advanced-sample" | "computer-use" + | "anthropic-computer-use" | "cua"; type LanguageInfo = { name: string; shorthand: string }; type TemplateInfo = { @@ -35,6 +36,7 @@ const TEMPLATE_BROWSER_USE = "browser-use"; const TEMPLATE_STAGEHAND = "stagehand"; const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample"; const TEMPLATE_COMPUTER_USE = "computer-use"; +const TEMPLATE_ANTHROPIC_COMPUTER_USE = "anthropic-computer-use"; const TEMPLATE_CUA = "cua"; const LANGUAGE_SHORTHAND_TS = "ts"; const LANGUAGE_SHORTHAND_PY = "py"; @@ -73,7 +75,12 @@ const TEMPLATES: Record = { [TEMPLATE_COMPUTER_USE]: { name: "Computer Use", description: "Implements the Anthropic Computer Use SDK", - languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON], + languages: [LANGUAGE_PYTHON], + }, + [TEMPLATE_ANTHROPIC_COMPUTER_USE]: { + name: "Anthropic Computer Use", + description: "Implements the Anthropic Computer Use SDK with @onkernel/cu-playwright", + languages: [LANGUAGE_TYPESCRIPT], }, [TEMPLATE_CUA]: { name: "CUA Sample", @@ -93,8 +100,8 @@ const INVOKE_SAMPLES: Record< 'kernel invoke ts-stagehand stagehand-task --payload \'{"query": "Best wired earbuds"}\'', [TEMPLATE_ADVANCED_SAMPLE]: 'kernel invoke ts-advanced test-captcha-solver', - [TEMPLATE_COMPUTER_USE]: - 'kernel invoke ts-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'', + [TEMPLATE_ANTHROPIC_COMPUTER_USE]: + 'kernel invoke ts-anthropic-cu computer-use-task --payload \'{"query": "Search for the top 3 restaurants in NYC according to Pete Wells"}\'', [TEMPLATE_CUA]: 'kernel invoke ts-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'', }, @@ -123,8 +130,8 @@ const REGISTERED_APP_NAMES: Record< 'ts-stagehand', [TEMPLATE_ADVANCED_SAMPLE]: 'ts-advanced', - [TEMPLATE_COMPUTER_USE]: - 'ts-cu', + [TEMPLATE_ANTHROPIC_COMPUTER_USE]: + 'ts-anthropic-cu', [TEMPLATE_CUA]: 'ts-cua', }, diff --git a/templates/typescript/anthropic-computer-use/README.md b/templates/typescript/anthropic-computer-use/README.md new file mode 100644 index 0000000..af5deac --- /dev/null +++ b/templates/typescript/anthropic-computer-use/README.md @@ -0,0 +1,3 @@ +# Anthropic Computer Use Sample + +This sample app demonstrates how to use the `@onkernel/cu-playwright` package to perform a simple search query. \ No newline at end of file diff --git a/templates/typescript/anthropic-computer-use/_gitignore b/templates/typescript/anthropic-computer-use/_gitignore new file mode 100644 index 0000000..6394993 --- /dev/null +++ b/templates/typescript/anthropic-computer-use/_gitignore @@ -0,0 +1,17 @@ +# Node +node_modules +dist +.DS_Store +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* + +# Editor +.vscode +.idea +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? \ No newline at end of file diff --git a/templates/typescript/anthropic-computer-use/bun.lock b/templates/typescript/anthropic-computer-use/bun.lock new file mode 100644 index 0000000..2e69ea2 --- /dev/null +++ b/templates/typescript/anthropic-computer-use/bun.lock @@ -0,0 +1,38 @@ +{ + "lockfileVersion": 1, + "workspaces": { + "": { + "name": "ts-anthropic-cu", + "dependencies": { + "@onkernel/cu-playwright": "^0.1.0", + "@onkernel/sdk": "^0.6.0", + "playwright": "^1.52.0", + "zod": "^3.25.0", + }, + "peerDependencies": { + "typescript": "^5", + }, + }, + }, + "packages": { + "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.52.0", "", { "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-d4c+fg+xy9e46c8+YnrrgIQR45CZlAi7PwdzIfDXDM6ACxEZli1/fxhURsq30ZpMZy6LvSkr41jGq5aF5TD7rQ=="], + + "@onkernel/cu-playwright": ["@onkernel/cu-playwright@0.1.1", "", { "dependencies": { "@anthropic-ai/sdk": "0.52.0", "luxon": "3.6.0", "zod": "^3.25.0", "zod-to-json-schema": "^3.23.1" }, "peerDependencies": { "playwright": "^1.52.0", "typescript": "^5" } }, "sha512-BSjeU49FW0gDl7NbV/OtwzN8fFwUGAEr3nMKoRy5k875fZnx/CNlqkmB+meFs7JmT87EcVhfd419zjr3Qk+YAQ=="], + + "@onkernel/sdk": ["@onkernel/sdk@0.6.1", "", {}, "sha512-ygk39kbtahhzS4nHEMGCRKu0lfaWM2tExex7GDta6JCIqiFTLQtiDd9xDGd/uX1FyhUnOqkyYaiy6XxDreX9MQ=="], + + "fsevents": ["fsevents@2.3.2", "", { "os": "darwin" }, "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA=="], + + "luxon": ["luxon@3.6.0", "", {}, "sha512-WE7p0p7W1xji9qxkLYsvcIxZyfP48GuFrWIBQZIsbjCyf65dG1rv4n83HcOyEyhvzxJCrUoObCRNFgRNIQ5KNA=="], + + "playwright": ["playwright@1.53.1", "", { "dependencies": { "playwright-core": "1.53.1" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-LJ13YLr/ocweuwxyGf1XNFWIU4M2zUSo149Qbp+A4cpwDjsxRPj7k6H25LBrEHiEwxvRbD8HdwvQmRMSvquhYw=="], + + "playwright-core": ["playwright-core@1.53.1", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-Z46Oq7tLAyT0lGoFx4DOuB1IA9D1TPj0QkYxpPVUnGDqHHvDpCftu1J2hM2PiWsNMoZh8+LQaarAWcDfPBc6zg=="], + + "typescript": ["typescript@5.8.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ=="], + + "zod": ["zod@3.25.67", "", {}, "sha512-idA2YXwpCdqUSKRCACDE6ItZD9TZzy3OZMtpfLoh6oPR47lipysRrJfjzMqFxQ3uJuUPyUeWe1r9vLH33xO/Qw=="], + + "zod-to-json-schema": ["zod-to-json-schema@3.24.5", "", { "peerDependencies": { "zod": "^3.24.1" } }, "sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g=="], + } +} diff --git a/templates/typescript/anthropic-computer-use/index.ts b/templates/typescript/anthropic-computer-use/index.ts new file mode 100644 index 0000000..dc1ffcf --- /dev/null +++ b/templates/typescript/anthropic-computer-use/index.ts @@ -0,0 +1,73 @@ +import { Kernel, type KernelContext } from '@onkernel/sdk'; +import { chromium } from 'playwright'; +import { z } from 'zod'; +import { ComputerUseAgent } from '@onkernel/cu-playwright'; + +const kernel = new Kernel(); + +const app = kernel.app('ts-anthropic-cu'); + +const HackerNewsStorySchema = z.object({ + title: z.string(), + points: z.number(), + author: z.string(), + comments: z.number(), + url: z.string().optional(), +}); + +type HackerNewsStory = z.infer; + +interface GetStoriesInput { + count?: number; +} + +interface GetStoriesOutput { + stories: HackerNewsStory[]; +} + +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; + +if (!ANTHROPIC_API_KEY) { + throw new Error('ANTHROPIC_API_KEY is not set'); +} + +app.action( + 'anthropic-computer-use-task', + async (ctx: KernelContext, payload?: GetStoriesInput): Promise => { + const count = payload?.count || 5; + + const kernelBrowser = await kernel.browsers.create({ + invocation_id: ctx.invocation_id, + stealth: true, + }); + + console.log("Kernel browser live view url: ", kernelBrowser.browser_live_view_url); + + const browser = await chromium.connectOverCDP(kernelBrowser.cdp_ws_url); + + try { + const context = browser.contexts()[0]; + if (!context) { + throw new Error("No browser context found."); + } + const page = context.pages()[0]; + if (!page) { + throw new Error("No page found in browser context."); + } + + const agent = new ComputerUseAgent({ + apiKey: ANTHROPIC_API_KEY, + page, + }); + + const stories = await agent.execute( + `Get the top ${count} Hacker News stories with their details`, + z.array(HackerNewsStorySchema).max(count) + ); + + return { stories }; + } finally { + await browser.close(); + } + }, +); \ No newline at end of file diff --git a/templates/typescript/computer-use/package.json b/templates/typescript/anthropic-computer-use/package.json similarity index 68% rename from templates/typescript/computer-use/package.json rename to templates/typescript/anthropic-computer-use/package.json index fa8b15e..9811a1d 100644 --- a/templates/typescript/computer-use/package.json +++ b/templates/typescript/anthropic-computer-use/package.json @@ -1,5 +1,5 @@ { - "name": "ts-cu", + "name": "ts-anthropic-cu", "module": "index.ts", "type": "module", "private": true, @@ -8,8 +8,8 @@ }, "dependencies": { "@onkernel/sdk": "^0.6.0", + "@onkernel/cu-playwright": "^0.1.0", "playwright": "^1.52.0", - "@anthropic-ai/sdk": "0.52.0", - "luxon": "3.6.0" + "zod": "^3.25.0" } -} \ No newline at end of file +} \ No newline at end of file diff --git a/templates/typescript/computer-use/tsconfig.json b/templates/typescript/anthropic-computer-use/tsconfig.json similarity index 99% rename from templates/typescript/computer-use/tsconfig.json rename to templates/typescript/anthropic-computer-use/tsconfig.json index 39959d0..8431a0c 100644 --- a/templates/typescript/computer-use/tsconfig.json +++ b/templates/typescript/anthropic-computer-use/tsconfig.json @@ -27,5 +27,4 @@ }, "include": ["./**/*.ts", "./**/*.tsx"], "exclude": ["node_modules", "dist"] -} - \ No newline at end of file +} \ No newline at end of file diff --git a/templates/typescript/computer-use/README.md b/templates/typescript/computer-use/README.md deleted file mode 100644 index 7465e25..0000000 --- a/templates/typescript/computer-use/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Kernel Typscript Sample App - Computer Use - -This is a simple Kernel application that implements a prompt loop using Anthropic Computer Use. - -It generally follows the [Anthropic Reference Implementation](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) but replaces `xodotool` and `gnome-screenshot` with Playwright. - -See the [docs](https://docs.onkernel.com/quickstart) for information. \ No newline at end of file diff --git a/templates/typescript/computer-use/_gitignore b/templates/typescript/computer-use/_gitignore deleted file mode 100644 index 9325515..0000000 --- a/templates/typescript/computer-use/_gitignore +++ /dev/null @@ -1,39 +0,0 @@ -# Dependencies -node_modules/ -package-lock.json - -# TypeScript -*.tsbuildinfo -dist/ -build/ - -# Environment -.env -.env.local -.env.*.local - -# IDE -.vscode/ -.idea/ -*.swp -*.swo - -# OS -.DS_Store -Thumbs.db - -# Logs -logs/ -*.log -npm-debug.log* -yarn-debug.log* -yarn-error.log* - -# Testing -coverage/ -.nyc_output/ - -# Misc -.cache/ -.temp/ -.tmp/ \ No newline at end of file diff --git a/templates/typescript/computer-use/index.ts b/templates/typescript/computer-use/index.ts deleted file mode 100644 index 8774dd1..0000000 --- a/templates/typescript/computer-use/index.ts +++ /dev/null @@ -1,83 +0,0 @@ -import { Kernel, type KernelContext } from '@onkernel/sdk'; -import { samplingLoop } from './loop'; -import { chromium } from 'playwright'; - -const kernel = new Kernel(); - -const app = kernel.app('ts-cu'); - -interface QueryInput { - query: string; -} - -interface QueryOutput { - result: string; -} - -// LLM API Keys are set in the environment during `kernel deploy -e ANTHROPIC_API_KEY=XXX` -// See https://docs.onkernel.com/launch/deploy#environment-variables -const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; - -if (!ANTHROPIC_API_KEY) { - throw new Error('ANTHROPIC_API_KEY is not set'); -} - -app.action( - 'cu-task', - async (ctx: KernelContext, payload?: QueryInput): Promise => { - if (!payload?.query) { - throw new Error('Query is required'); - } - - const kernelBrowser = await kernel.browsers.create({ - invocation_id: ctx.invocation_id, - stealth: true, - }); - - console.log("Kernel browser live view url: ", kernelBrowser.browser_live_view_url); - - const browser = await chromium.connectOverCDP(kernelBrowser.cdp_ws_url); - const context = await browser.contexts()[0]; - const page = await context?.pages()[0]; - if (!page) { - throw new Error('Error getting initial page'); - } - - try { - // Run the sampling loop - const finalMessages = await samplingLoop({ - model: 'claude-sonnet-4-20250514', - messages: [{ - role: 'user', - content: payload.query, - }], - apiKey: ANTHROPIC_API_KEY, - thinkingBudget: 1024, - playwrightPage: page, - }); - - // Extract the final result from the messages - if (finalMessages.length === 0) { - throw new Error('No messages were generated during the sampling loop'); - } - - const lastMessage = finalMessages[finalMessages.length - 1]; - if (!lastMessage) { - throw new Error('Failed to get the last message from the sampling loop'); - } - - const result = typeof lastMessage.content === 'string' - ? lastMessage.content - : lastMessage.content.map(block => - block.type === 'text' ? block.text : '' - ).join(''); - - return { result }; - } catch (error) { - console.error('Error in sampling loop:', error); - throw error; - } finally { - await browser.close(); - } - }, -); diff --git a/templates/typescript/computer-use/loop.ts b/templates/typescript/computer-use/loop.ts deleted file mode 100644 index 11ffbe9..0000000 --- a/templates/typescript/computer-use/loop.ts +++ /dev/null @@ -1,194 +0,0 @@ -import { Anthropic } from '@anthropic-ai/sdk'; -import { DateTime } from 'luxon'; -import type { Page } from 'playwright'; -import type { BetaMessageParam, BetaTextBlock } from './types/beta'; -import { ToolCollection, DEFAULT_TOOL_VERSION, TOOL_GROUPS_BY_VERSION, type ToolVersion } from './tools/collection'; -import { responseToParams, maybeFilterToNMostRecentImages, injectPromptCaching, PROMPT_CACHING_BETA_FLAG } from './utils/message-processing'; -import { makeApiToolResult } from './utils/tool-results'; -import { ComputerTool20241022, ComputerTool20250124 } from './tools/computer'; -import type { ActionParams } from './tools/types/computer'; -import { Action } from './tools/types/computer'; - -// System prompt optimized for the environment -const SYSTEM_PROMPT = ` -* You are utilising an Ubuntu virtual machine using ${process.arch} architecture with internet access. -* When you connect to the display, CHROMIUM IS ALREADY OPEN. The url bar is not visible but it is there. -* If you need to navigate to a new page, use ctrl+l to focus the url bar and then enter the url. -* You won't be able to see the url bar from the screenshot but ctrl-l still works. -* As the initial step click on the search bar. -* When viewing a page it can be helpful to zoom out so that you can see everything on the page. -* Either that, or make sure you scroll down to see everything before deciding something isn't available. -* When using your computer function calls, they take a while to run and send back to you. -* Where possible/feasible, try to chain multiple of these calls all into one function calls request. -* The current date is ${DateTime.now().toFormat('EEEE, MMMM d, yyyy')}. -* After each step, take a screenshot and carefully evaluate if you have achieved the right outcome. -* Explicitly show your thinking: "I have evaluated step X..." If not correct, try again. -* Only when you confirm a step was executed correctly should you move on to the next one. - - - -* When using Chromium, if a startup wizard appears, IGNORE IT. Do not even click "skip this step". -* Instead, click on the search bar on the center of the screen where it says "Search or enter address", and enter the appropriate search term or URL there. -`; - -// Add new type definitions -interface ThinkingConfig { - type: 'enabled'; - budget_tokens: number; -} - -interface ExtraBodyConfig { - thinking?: ThinkingConfig; -} - -interface ToolUseInput extends Record { - action: Action; -} - -export async function samplingLoop({ - model, - systemPromptSuffix, - messages, - apiKey, - onlyNMostRecentImages, - maxTokens = 4096, - toolVersion, - thinkingBudget, - tokenEfficientToolsBeta = false, - playwrightPage, -}: { - model: string; - systemPromptSuffix?: string; - messages: BetaMessageParam[]; - apiKey: string; - onlyNMostRecentImages?: number; - maxTokens?: number; - toolVersion?: ToolVersion; - thinkingBudget?: number; - tokenEfficientToolsBeta?: boolean; - playwrightPage: Page; -}): Promise { - const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION; - const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion]; - const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(playwrightPage))); - - const system: BetaTextBlock = { - type: 'text', - text: `${SYSTEM_PROMPT}${systemPromptSuffix ? ' ' + systemPromptSuffix : ''}`, - }; - - while (true) { - const betas: string[] = toolGroup.beta_flag ? [toolGroup.beta_flag] : []; - - if (tokenEfficientToolsBeta) { - betas.push('token-efficient-tools-2025-02-19'); - } - - let imageTruncationThreshold = onlyNMostRecentImages || 0; - - const client = new Anthropic({ apiKey, maxRetries: 4 }); - const enablePromptCaching = true; - - if (enablePromptCaching) { - betas.push(PROMPT_CACHING_BETA_FLAG); - injectPromptCaching(messages); - onlyNMostRecentImages = 0; - (system as BetaTextBlock).cache_control = { type: 'ephemeral' }; - } - - if (onlyNMostRecentImages) { - maybeFilterToNMostRecentImages( - messages, - onlyNMostRecentImages, - imageTruncationThreshold - ); - } - - const extraBody: ExtraBodyConfig = {}; - if (thinkingBudget) { - extraBody.thinking = { type: 'enabled', budget_tokens: thinkingBudget }; - } - - const toolParams = toolCollection.toParams(); - - const response = await client.beta.messages.create({ - max_tokens: maxTokens, - messages, - model, - system: [system], - tools: toolParams, - betas, - ...extraBody, - }); - - const responseParams = responseToParams(response); - - const loggableContent = responseParams.map(block => { - if (block.type === 'tool_use') { - return { - type: 'tool_use', - name: block.name, - input: block.input - }; - } - return block; - }); - console.log('=== LLM RESPONSE ==='); - console.log('Stop reason:', response.stop_reason); - console.log(loggableContent); - console.log("===") - - messages.push({ - role: 'assistant', - content: responseParams, - }); - - if (response.stop_reason === 'end_turn') { - console.log('LLM has completed its task, ending loop'); - return messages; - } - - const toolResultContent = []; - let hasToolUse = false; - - for (const contentBlock of responseParams) { - if (contentBlock.type === 'tool_use' && contentBlock.name && contentBlock.input && typeof contentBlock.input === 'object') { - const input = contentBlock.input as ToolUseInput; - if ('action' in input && typeof input.action === 'string') { - hasToolUse = true; - const toolInput: ActionParams = { - action: input.action as Action, - ...Object.fromEntries( - Object.entries(input).filter(([key]) => key !== 'action') - ) - }; - - try { - const result = await toolCollection.run( - contentBlock.name, - toolInput - ); - - const toolResult = makeApiToolResult(result, contentBlock.id!); - toolResultContent.push(toolResult); - } catch (error) { - console.error(error); - throw error; - } - } - } - } - - if (toolResultContent.length === 0 && !hasToolUse && response.stop_reason !== 'tool_use') { - console.log('No tool use or results, and not waiting for tool use, ending loop'); - return messages; - } - - if (toolResultContent.length > 0) { - messages.push({ - role: 'user', - content: toolResultContent, - }); - } - } -} diff --git a/templates/typescript/computer-use/pnpm-lock.yaml b/templates/typescript/computer-use/pnpm-lock.yaml deleted file mode 100644 index 344fa3e..0000000 --- a/templates/typescript/computer-use/pnpm-lock.yaml +++ /dev/null @@ -1,79 +0,0 @@ -lockfileVersion: '9.0' - -settings: - autoInstallPeers: true - excludeLinksFromLockfile: false - -importers: - - .: - dependencies: - '@anthropic-ai/sdk': - specifier: 0.52.0 - version: 0.52.0 - '@onkernel/sdk': - specifier: '>=0.5.0' - version: 0.5.0 - luxon: - specifier: 3.6.0 - version: 3.6.0 - playwright: - specifier: ^1.52.0 - version: 1.52.0 - typescript: - specifier: ^5 - version: 5.8.3 - -packages: - - '@anthropic-ai/sdk@0.52.0': - resolution: {integrity: sha512-d4c+fg+xy9e46c8+YnrrgIQR45CZlAi7PwdzIfDXDM6ACxEZli1/fxhURsq30ZpMZy6LvSkr41jGq5aF5TD7rQ==} - hasBin: true - - '@onkernel/sdk@0.5.0': - resolution: {integrity: sha512-n7gwc7rU0GY/XcDnEV0piHPd76bHTSfuTjQW4qFKUWQji0UK9YUVKDFklqAWbyGlXPUezWCfxh79ELv2cFYOBA==} - - fsevents@2.3.2: - resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} - engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} - os: [darwin] - - luxon@3.6.0: - resolution: {integrity: sha512-WE7p0p7W1xji9qxkLYsvcIxZyfP48GuFrWIBQZIsbjCyf65dG1rv4n83HcOyEyhvzxJCrUoObCRNFgRNIQ5KNA==} - engines: {node: '>=12'} - - playwright-core@1.52.0: - resolution: {integrity: sha512-l2osTgLXSMeuLZOML9qYODUQoPPnUsKsb5/P6LJ2e6uPKXUdPK5WYhN4z03G+YNbWmGDY4YENauNu4ZKczreHg==} - engines: {node: '>=18'} - hasBin: true - - playwright@1.52.0: - resolution: {integrity: sha512-JAwMNMBlxJ2oD1kce4KPtMkDeKGHQstdpFPcPH3maElAXon/QZeTvtsfXmTMRyO9TslfoYOXkSsvao2nE1ilTw==} - engines: {node: '>=18'} - hasBin: true - - typescript@5.8.3: - resolution: {integrity: sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==} - engines: {node: '>=14.17'} - hasBin: true - -snapshots: - - '@anthropic-ai/sdk@0.52.0': {} - - '@onkernel/sdk@0.5.0': {} - - fsevents@2.3.2: - optional: true - - luxon@3.6.0: {} - - playwright-core@1.52.0: {} - - playwright@1.52.0: - dependencies: - playwright-core: 1.52.0 - optionalDependencies: - fsevents: 2.3.2 - - typescript@5.8.3: {} diff --git a/templates/typescript/computer-use/tools/collection.ts b/templates/typescript/computer-use/tools/collection.ts deleted file mode 100644 index 45f3afe..0000000 --- a/templates/typescript/computer-use/tools/collection.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { ComputerTool20241022, ComputerTool20250124 } from './computer'; -import { Action } from './types/computer'; -import type { ActionParams, ToolResult } from './types/computer'; - -export type ToolVersion = 'computer_use_20250124' | 'computer_use_20241022' | 'computer_use_20250429'; - -export const DEFAULT_TOOL_VERSION: ToolVersion = 'computer_use_20250429'; - -interface ToolGroup { - readonly version: ToolVersion; - readonly tools: (typeof ComputerTool20241022 | typeof ComputerTool20250124)[]; - readonly beta_flag: string; -} - -export const TOOL_GROUPS: ToolGroup[] = [ - { - version: 'computer_use_20241022', - tools: [ComputerTool20241022], - beta_flag: 'computer-use-2024-10-22', - }, - { - version: 'computer_use_20250124', - tools: [ComputerTool20250124], - beta_flag: 'computer-use-2025-01-24', - }, - // 20250429 version inherits from 20250124 - { - version: 'computer_use_20250429', - tools: [ComputerTool20250124], - beta_flag: 'computer-use-2025-01-24', - }, -]; - -export const TOOL_GROUPS_BY_VERSION: Record = Object.fromEntries( - TOOL_GROUPS.map(group => [group.version, group]) -) as Record; - -export class ToolCollection { - private tools: Map; - - constructor(...tools: (ComputerTool20241022 | ComputerTool20250124)[]) { - this.tools = new Map(tools.map(tool => [tool.name, tool])); - } - - toParams(): ActionParams[] { - return Array.from(this.tools.values()).map(tool => tool.toParams()); - } - - async run(name: string, toolInput: { action: Action } & Record): Promise { - const tool = this.tools.get(name); - if (!tool) { - throw new Error(`Tool ${name} not found`); - } - - if (!Object.values(Action).includes(toolInput.action)) { - throw new Error(`Invalid action ${toolInput.action} for tool ${name}`); - } - - return await tool.call(toolInput); - } -} \ No newline at end of file diff --git a/templates/typescript/computer-use/tools/computer.ts b/templates/typescript/computer-use/tools/computer.ts deleted file mode 100644 index df8e021..0000000 --- a/templates/typescript/computer-use/tools/computer.ts +++ /dev/null @@ -1,250 +0,0 @@ -import type { Page } from 'playwright'; -import { Action, ToolError } from './types/computer'; -import type { ActionParams, BaseAnthropicTool, ToolResult } from './types/computer'; -import { KeyboardUtils } from './utils/keyboard'; -import { ActionValidator } from './utils/validator'; - -const TYPING_DELAY_MS = 12; - -export class ComputerTool implements BaseAnthropicTool { - name: 'computer' = 'computer'; - protected page: Page; - protected _screenshotDelay = 2.0; - protected version: '20241022' | '20250124'; - - private readonly mouseActions = new Set([ - Action.LEFT_CLICK, - Action.RIGHT_CLICK, - Action.MIDDLE_CLICK, - Action.DOUBLE_CLICK, - Action.TRIPLE_CLICK, - Action.MOUSE_MOVE, - Action.LEFT_CLICK_DRAG, - Action.LEFT_MOUSE_DOWN, - Action.LEFT_MOUSE_UP, - ]); - - private readonly keyboardActions = new Set([ - Action.KEY, - Action.TYPE, - Action.HOLD_KEY, - ]); - - private readonly systemActions = new Set([ - Action.SCREENSHOT, - Action.CURSOR_POSITION, - Action.SCROLL, - Action.WAIT, - ]); - - constructor(page: Page, version: '20241022' | '20250124' = '20250124') { - this.page = page; - this.version = version; - } - - get apiType(): 'computer_20241022' | 'computer_20250124' { - return this.version === '20241022' ? 'computer_20241022' : 'computer_20250124'; - } - - toParams(): ActionParams { - const params = { - name: this.name, - type: this.apiType, - display_width_px: 1280, - display_height_px: 720, - display_number: null, - }; - return params; - } - - private getMouseButton(action: Action): 'left' | 'right' | 'middle' { - switch (action) { - case Action.LEFT_CLICK: - case Action.DOUBLE_CLICK: - case Action.TRIPLE_CLICK: - case Action.LEFT_CLICK_DRAG: - case Action.LEFT_MOUSE_DOWN: - case Action.LEFT_MOUSE_UP: - return 'left'; - case Action.RIGHT_CLICK: - return 'right'; - case Action.MIDDLE_CLICK: - return 'middle'; - default: - throw new ToolError(`Invalid mouse action: ${action}`); - } - } - - private async handleMouseAction(action: Action, coordinate: [number, number]): Promise { - const [x, y] = ActionValidator.validateAndGetCoordinates(coordinate); - await this.page.mouse.move(x, y); - await this.page.waitForTimeout(100); - - if (action === Action.LEFT_MOUSE_DOWN) { - await this.page.mouse.down(); - } else if (action === Action.LEFT_MOUSE_UP) { - await this.page.mouse.up(); - } else { - const button = this.getMouseButton(action); - if (action === Action.DOUBLE_CLICK) { - await this.page.mouse.dblclick(x, y, { button }); - } else if (action === Action.TRIPLE_CLICK) { - await this.page.mouse.click(x, y, { button, clickCount: 3 }); - } else { - await this.page.mouse.click(x, y, { button }); - } - } - - await this.page.waitForTimeout(500); - return await this.screenshot(); - } - - private async handleKeyboardAction(action: Action, text: string, duration?: number): Promise { - if (action === Action.HOLD_KEY) { - const key = KeyboardUtils.getPlaywrightKey(text); - await this.page.keyboard.down(key); - await new Promise(resolve => setTimeout(resolve, duration! * 1000)); - await this.page.keyboard.up(key); - } else if (action === Action.KEY) { - const keys = KeyboardUtils.parseKeyCombination(text); - for (const key of keys) { - await this.page.keyboard.down(key); - } - for (const key of keys.reverse()) { - await this.page.keyboard.up(key); - } - } else { - await this.page.keyboard.type(text, { delay: TYPING_DELAY_MS }); - } - - await this.page.waitForTimeout(500); - return await this.screenshot(); - } - - async screenshot(): Promise { - try { - console.log('Starting screenshot...'); - await new Promise(resolve => setTimeout(resolve, this._screenshotDelay * 1000)); - const screenshot = await this.page.screenshot({ type: 'png' }); - console.log('Screenshot taken, size:', screenshot.length, 'bytes'); - - return { - base64Image: screenshot.toString('base64'), - }; - } catch (error) { - throw new ToolError(`Failed to take screenshot: ${error}`); - } - } - - async call(params: ActionParams): Promise { - const { - action, - text, - coordinate, - scrollDirection: scrollDirectionParam, - scroll_amount, - scrollAmount, - duration, - ...kwargs - } = params; - - ActionValidator.validateActionParams(params, this.mouseActions, this.keyboardActions); - - if (action === Action.SCREENSHOT) { - return await this.screenshot(); - } - - if (action === Action.CURSOR_POSITION) { - const position = await this.page.evaluate(() => { - const selection = window.getSelection(); - const range = selection?.getRangeAt(0); - const rect = range?.getBoundingClientRect(); - return rect ? { x: rect.x, y: rect.y } : null; - }); - - if (!position) { - throw new ToolError('Failed to get cursor position'); - } - - return { output: `X=${position.x},Y=${position.y}` }; - } - - if (action === Action.SCROLL) { - if (this.version !== '20250124') { - throw new ToolError(`${action} is only available in version 20250124`); - } - - const scrollDirection = scrollDirectionParam || kwargs.scroll_direction; - const scrollAmountValue = scrollAmount || scroll_amount; - - if (!scrollDirection || !['up', 'down', 'left', 'right'].includes(scrollDirection)) { - throw new ToolError(`Scroll direction "${scrollDirection}" must be 'up', 'down', 'left', or 'right'`); - } - if (typeof scrollAmountValue !== 'number' || scrollAmountValue < 0) { - throw new ToolError(`Scroll amount "${scrollAmountValue}" must be a non-negative number`); - } - - if (coordinate) { - const [x, y] = ActionValidator.validateAndGetCoordinates(coordinate); - await this.page.mouse.move(x, y); - await this.page.waitForTimeout(100); - } - - const pageDimensions = await this.page.evaluate(() => { - return { h: window.innerHeight, w: window.innerWidth }; - }); - const pagePartitions = 25; - const scrollFactor = (scrollAmountValue || 10) / pagePartitions; - - if (scrollDirection === 'down' || scrollDirection === 'up') { - const amount = pageDimensions.h * scrollFactor; - console.log(`Scrolling ${amount.toFixed(2)} pixels ${scrollDirection}`); - await this.page.mouse.wheel(0, scrollDirection === 'down' ? amount : -amount); - } else { - const amount = pageDimensions.w * scrollFactor; - console.log(`Scrolling ${amount.toFixed(2)} pixels ${scrollDirection}`); - await this.page.mouse.wheel(scrollDirection === 'right' ? amount : -amount, 0); - } - - await this.page.waitForTimeout(500); - return await this.screenshot(); - } - - if (action === Action.WAIT) { - if (this.version !== '20250124') { - throw new ToolError(`${action} is only available in version 20250124`); - } - await new Promise(resolve => setTimeout(resolve, duration! * 1000)); - return await this.screenshot(); - } - - if (this.mouseActions.has(action)) { - if (!coordinate) { - throw new ToolError(`coordinate is required for ${action}`); - } - return await this.handleMouseAction(action, coordinate); - } - - if (this.keyboardActions.has(action)) { - if (!text) { - throw new ToolError(`text is required for ${action}`); - } - return await this.handleKeyboardAction(action, text, duration); - } - - throw new ToolError(`Invalid action: ${action}`); - } -} - -// For backward compatibility -export class ComputerTool20241022 extends ComputerTool { - constructor(page: Page) { - super(page, '20241022'); - } -} - -export class ComputerTool20250124 extends ComputerTool { - constructor(page: Page) { - super(page, '20250124'); - } -} diff --git a/templates/typescript/computer-use/tools/types/computer.ts b/templates/typescript/computer-use/tools/types/computer.ts deleted file mode 100644 index f4b061f..0000000 --- a/templates/typescript/computer-use/tools/types/computer.ts +++ /dev/null @@ -1,64 +0,0 @@ -export enum Action { - // Mouse actions - MOUSE_MOVE = 'mouse_move', - LEFT_CLICK = 'left_click', - RIGHT_CLICK = 'right_click', - MIDDLE_CLICK = 'middle_click', - DOUBLE_CLICK = 'double_click', - TRIPLE_CLICK = 'triple_click', - LEFT_CLICK_DRAG = 'left_click_drag', - LEFT_MOUSE_DOWN = 'left_mouse_down', - LEFT_MOUSE_UP = 'left_mouse_up', - - // Keyboard actions - KEY = 'key', - TYPE = 'type', - HOLD_KEY = 'hold_key', - - // System actions - SCREENSHOT = 'screenshot', - CURSOR_POSITION = 'cursor_position', - SCROLL = 'scroll', - WAIT = 'wait', -} - -// For backward compatibility -export type Action_20241022 = Action; -export type Action_20250124 = Action; - -export type MouseButton = 'left' | 'right' | 'middle'; -export type ScrollDirection = 'up' | 'down' | 'left' | 'right'; -export type Coordinate = [number, number]; -export type Duration = number; - -export interface ActionParams { - action: Action; - text?: string; - coordinate?: Coordinate; - scrollDirection?: ScrollDirection; - scroll_amount?: number; - scrollAmount?: number; - duration?: Duration; - key?: string; - [key: string]: Action | string | Coordinate | ScrollDirection | number | Duration | undefined; -} - -export interface ToolResult { - output?: string; - error?: string; - base64Image?: string; - system?: string; -} - -export interface BaseAnthropicTool { - name: string; - apiType: string; - toParams(): ActionParams; -} - -export class ToolError extends Error { - constructor(message: string) { - super(message); - this.name = 'ToolError'; - } -} \ No newline at end of file diff --git a/templates/typescript/computer-use/tools/utils/keyboard.ts b/templates/typescript/computer-use/tools/utils/keyboard.ts deleted file mode 100644 index 244cddf..0000000 --- a/templates/typescript/computer-use/tools/utils/keyboard.ts +++ /dev/null @@ -1,88 +0,0 @@ -export class KeyboardUtils { - // Only map alternative names to standard Playwright modifier keys - private static readonly modifierKeyMap: Record = { - 'ctrl': 'Control', - 'alt': 'Alt', - 'cmd': 'Meta', - 'command': 'Meta', - 'win': 'Meta', - }; - - // Essential key mappings for Playwright compatibility - private static readonly keyMap: Record = { - 'return': 'Enter', - 'space': ' ', - 'left': 'ArrowLeft', - 'right': 'ArrowRight', - 'up': 'ArrowUp', - 'down': 'ArrowDown', - 'home': 'Home', - 'end': 'End', - 'pageup': 'PageUp', - 'page_up': 'PageUp', - 'pagedown': 'PageDown', - 'page_down': 'PageDown', - 'delete': 'Delete', - 'backspace': 'Backspace', - 'tab': 'Tab', - 'esc': 'Escape', - 'escape': 'Escape', - 'insert': 'Insert', - 'super_l': 'Meta', - 'f1': 'F1', - 'f2': 'F2', - 'f3': 'F3', - 'f4': 'F4', - 'f5': 'F5', - 'f6': 'F6', - 'f7': 'F7', - 'f8': 'F8', - 'f9': 'F9', - 'f10': 'F10', - 'f11': 'F11', - 'f12': 'F12', - 'minus': '-', - 'equal': '=', - 'plus': '+', - }; - - static isModifierKey(key: string | undefined): boolean { - if (!key) return false; - const normalizedKey = this.modifierKeyMap[key.toLowerCase()] || key; - return ['Control', 'Alt', 'Shift', 'Meta'].includes(normalizedKey); - } - - static getPlaywrightKey(key: string | undefined): string { - if (!key) { - throw new Error('Key cannot be undefined'); - } - - const normalizedKey = key.toLowerCase(); - - // Handle special cases - if (normalizedKey in this.keyMap) { - return this.keyMap[normalizedKey] as string; - } - - // Normalize modifier keys - if (normalizedKey in this.modifierKeyMap) { - return this.modifierKeyMap[normalizedKey] as string; - } - - // Return the key as is - Playwright handles standard key names - return key; - } - - static parseKeyCombination(combo: string): string[] { - if (!combo) { - throw new Error('Key combination cannot be empty'); - } - return combo.toLowerCase().split('+').map(key => { - const trimmedKey = key.trim(); - if (!trimmedKey) { - throw new Error('Invalid key combination: empty key'); - } - return this.getPlaywrightKey(trimmedKey); - }); - } -} \ No newline at end of file diff --git a/templates/typescript/computer-use/tools/utils/validator.ts b/templates/typescript/computer-use/tools/utils/validator.ts deleted file mode 100644 index b8522c8..0000000 --- a/templates/typescript/computer-use/tools/utils/validator.ts +++ /dev/null @@ -1,67 +0,0 @@ -import { Action, ToolError } from '../types/computer'; -import type { ActionParams, Coordinate, Duration } from '../types/computer'; - -export class ActionValidator { - static validateText(text: string | undefined, required: boolean, action: string): void { - if (required && text === undefined) { - throw new ToolError(`text is required for ${action}`); - } - if (text !== undefined && typeof text !== 'string') { - throw new ToolError(`${text} must be a string`); - } - } - - static validateCoordinate(coordinate: Coordinate | undefined, required: boolean, action: string): void { - if (required && !coordinate) { - throw new ToolError(`coordinate is required for ${action}`); - } - if (coordinate) { - this.validateAndGetCoordinates(coordinate); - } - } - - static validateDuration(duration: Duration | undefined): void { - if (duration === undefined || typeof duration !== 'number') { - throw new ToolError(`${duration} must be a number`); - } - if (duration < 0) { - throw new ToolError(`${duration} must be non-negative`); - } - if (duration > 100) { - throw new ToolError(`${duration} is too long`); - } - } - - static validateAndGetCoordinates(coordinate: Coordinate): Coordinate { - if (!Array.isArray(coordinate) || coordinate.length !== 2) { - throw new ToolError(`${coordinate} must be a tuple of length 2`); - } - if (!coordinate.every(i => typeof i === 'number' && i >= 0)) { - throw new ToolError(`${coordinate} must be a tuple of non-negative numbers`); - } - return coordinate; - } - - static validateActionParams(params: ActionParams, mouseActions: Set, keyboardActions: Set): void { - const { action, text, coordinate, duration } = params; - - // Validate text parameter - if (keyboardActions.has(action)) { - this.validateText(text, true, action); - } else { - this.validateText(text, false, action); - } - - // Validate coordinate parameter - if (mouseActions.has(action)) { - this.validateCoordinate(coordinate, true, action); - } else { - this.validateCoordinate(coordinate, false, action); - } - - // Validate duration parameter - if (action === Action.HOLD_KEY || action === Action.WAIT) { - this.validateDuration(duration); - } - } -} \ No newline at end of file diff --git a/templates/typescript/computer-use/types/beta.ts b/templates/typescript/computer-use/types/beta.ts deleted file mode 100644 index 35328d7..0000000 --- a/templates/typescript/computer-use/types/beta.ts +++ /dev/null @@ -1,58 +0,0 @@ -import type { BetaMessageParam as AnthropicMessageParam, BetaMessage as AnthropicMessage, BetaContentBlock as AnthropicContentBlock } from '@anthropic-ai/sdk/resources/beta/messages/messages'; -import type { ActionParams } from '../tools/types/computer'; - -// Re-export the SDK types -export type BetaMessageParam = AnthropicMessageParam; -export type BetaMessage = AnthropicMessage; -export type BetaContentBlock = AnthropicContentBlock; - -// Keep our local types for internal use -export interface BetaTextBlock { - type: 'text'; - text: string; - id?: string; - cache_control?: { type: 'ephemeral' }; -} - -export interface BetaImageBlock { - type: 'image'; - source: { - type: 'base64'; - media_type: 'image/png'; - data: string; - }; - id?: string; - cache_control?: { type: 'ephemeral' }; -} - -export interface BetaToolUseBlock { - type: 'tool_use'; - name: string; - input: ActionParams; - id?: string; - cache_control?: { type: 'ephemeral' }; -} - -export interface BetaThinkingBlock { - type: 'thinking'; - thinking: { - type: 'enabled'; - budget_tokens: number; - } | { - type: 'disabled'; - }; - signature?: string; - id?: string; - cache_control?: { type: 'ephemeral' }; -} - -export interface BetaToolResultBlock { - type: 'tool_result'; - content: (BetaTextBlock | BetaImageBlock)[] | string; - tool_use_id: string; - is_error: boolean; - id?: string; - cache_control?: { type: 'ephemeral' }; -} - -export type BetaLocalContentBlock = BetaTextBlock | BetaImageBlock | BetaToolUseBlock | BetaThinkingBlock | BetaToolResultBlock; \ No newline at end of file diff --git a/templates/typescript/computer-use/utils/message-processing.ts b/templates/typescript/computer-use/utils/message-processing.ts deleted file mode 100644 index acc5ac4..0000000 --- a/templates/typescript/computer-use/utils/message-processing.ts +++ /dev/null @@ -1,79 +0,0 @@ -import type { BetaMessage, BetaMessageParam, BetaToolResultBlock, BetaContentBlock, BetaLocalContentBlock } from '../types/beta'; - -export function responseToParams(response: BetaMessage): BetaContentBlock[] { - return response.content.map(block => { - if (block.type === 'text' && block.text) { - return { type: 'text', text: block.text }; - } - if (block.type === 'thinking') { - const { thinking, signature, ...rest } = block; - return { ...rest, thinking, ...(signature && { signature }) }; - } - return block as BetaContentBlock; - }); -} - -export function maybeFilterToNMostRecentImages( - messages: BetaMessageParam[], - imagesToKeep: number, - minRemovalThreshold: number -): void { - if (!imagesToKeep) return; - - const toolResultBlocks = messages - .flatMap(message => Array.isArray(message?.content) ? message.content : []) - .filter((item): item is BetaToolResultBlock => - typeof item === 'object' && item.type === 'tool_result' - ); - - const totalImages = toolResultBlocks.reduce((count, toolResult) => { - if (!Array.isArray(toolResult.content)) return count; - return count + toolResult.content.filter( - content => typeof content === 'object' && content.type === 'image' - ).length; - }, 0); - - let imagesToRemove = Math.floor((totalImages - imagesToKeep) / minRemovalThreshold) * minRemovalThreshold; - - for (const toolResult of toolResultBlocks) { - if (Array.isArray(toolResult.content)) { - toolResult.content = toolResult.content.filter(content => { - if (typeof content === 'object' && content.type === 'image') { - if (imagesToRemove > 0) { - imagesToRemove--; - return false; - } - } - return true; - }); - } - } -} - -const PROMPT_CACHING_BETA_FLAG = 'prompt-caching-2024-07-31'; - -export function injectPromptCaching(messages: BetaMessageParam[]): void { - let breakpointsRemaining = 3; - - for (let i = messages.length - 1; i >= 0; i--) { - const message = messages[i]; - if (!message) continue; - if (message.role === 'user' && Array.isArray(message.content)) { - if (breakpointsRemaining > 0) { - breakpointsRemaining--; - const lastContent = message.content[message.content.length - 1]; - if (lastContent) { - (lastContent as BetaLocalContentBlock).cache_control = { type: 'ephemeral' }; - } - } else { - const lastContent = message.content[message.content.length - 1]; - if (lastContent) { - delete (lastContent as BetaLocalContentBlock).cache_control; - } - break; - } - } - } -} - -export { PROMPT_CACHING_BETA_FLAG }; \ No newline at end of file diff --git a/templates/typescript/computer-use/utils/tool-results.ts b/templates/typescript/computer-use/utils/tool-results.ts deleted file mode 100644 index c18eab2..0000000 --- a/templates/typescript/computer-use/utils/tool-results.ts +++ /dev/null @@ -1,49 +0,0 @@ -import type { ToolResult } from '../tools/types/computer'; -import type { BetaToolResultBlock, BetaTextBlock, BetaImageBlock } from '../types/beta'; - -export function makeApiToolResult( - result: ToolResult, - toolUseId: string -): BetaToolResultBlock { - const toolResultContent: (BetaTextBlock | BetaImageBlock)[] = []; - let isError = false; - - if (result.error) { - isError = true; - toolResultContent.push({ - type: 'text', - text: maybePrependSystemToolResult(result, result.error), - }); - } else { - if (result.output) { - toolResultContent.push({ - type: 'text', - text: maybePrependSystemToolResult(result, result.output), - }); - } - if (result.base64Image) { - toolResultContent.push({ - type: 'image', - source: { - type: 'base64', - media_type: 'image/png', - data: result.base64Image, - }, - }); - } - } - - return { - type: 'tool_result', - content: toolResultContent, - tool_use_id: toolUseId, - is_error: isError, - }; -} - -export function maybePrependSystemToolResult(result: ToolResult, resultText: string): string { - if (result.system) { - return `${result.system}\n${resultText}`; - } - return resultText; -} \ No newline at end of file