Skip to content

Commit 314c283

Browse files
feat: Add Gemini CUA template (#59)
* feat(create-kernel-app): Add Gemini CUA template Co-authored-by: null <> * fix(gemini-cua): remove redundant comments in runStagehandTask function * Update Clean Up Process Updated browser clean up process + added more robust error messasging to the output. * Remove unused variable removed unused variable that was declared but not used. * Make browserOptions more re-usable * Update dependencies Update dependencies for Kernel + Zod to fix viewport sizing. --------- Co-authored-by: tembo[bot] <208362400+tembo-io[bot]@users.noreply.github.com> Co-authored-by: Daniel Prevoznik <danny@onkernel.com>
1 parent 80b8048 commit 314c283

File tree

6 files changed

+259
-2
lines changed

6 files changed

+259
-2
lines changed

index.ts

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ type TemplateKey =
2020
| "advanced-sample"
2121
| "computer-use"
2222
| "cua"
23-
| "magnitude";
23+
| "magnitude"
24+
| "gemini-cua";
2425
type LanguageInfo = { name: string; shorthand: string };
2526
type TemplateInfo = {
2627
name: string;
@@ -38,6 +39,7 @@ const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample";
3839
const TEMPLATE_COMPUTER_USE = "computer-use";
3940
const TEMPLATE_CUA = "cua";
4041
const TEMPLATE_MAGNITUDE = "magnitude";
42+
const TEMPLATE_GEMINI_CUA = "gemini-cua";
4143
const LANGUAGE_SHORTHAND_TS = "ts";
4244
const LANGUAGE_SHORTHAND_PY = "py";
4345

@@ -86,6 +88,11 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
8688
description: "Implements the Magnitude.run SDK",
8789
languages: [LANGUAGE_TYPESCRIPT],
8890
},
91+
[TEMPLATE_GEMINI_CUA]: {
92+
name: "Gemini Computer Use",
93+
description: "Implements Gemini 2.5 Computer Use Agent",
94+
languages: [LANGUAGE_TYPESCRIPT],
95+
},
8996
};
9097

9198
const INVOKE_SAMPLES: Record<
@@ -104,6 +111,8 @@ const INVOKE_SAMPLES: Record<
104111
'kernel invoke ts-cua cua-task --payload \'{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
105112
[TEMPLATE_MAGNITUDE]:
106113
'kernel invoke ts-magnitude mag-url-extract --payload \'{"url": "https://en.wikipedia.org/wiki/Special:Random"}\'',
114+
[TEMPLATE_GEMINI_CUA]:
115+
'kernel invoke ts-gemini-cua gemini-cua-task',
107116
},
108117
[LANGUAGE_PYTHON]: {
109118
[TEMPLATE_SAMPLE_APP]:
@@ -130,6 +139,7 @@ const REGISTERED_APP_NAMES: Record<
130139
[TEMPLATE_COMPUTER_USE]: "ts-cu",
131140
[TEMPLATE_CUA]: "ts-cua",
132141
[TEMPLATE_MAGNITUDE]: "ts-magnitude",
142+
[TEMPLATE_GEMINI_CUA]: "ts-gemini-cua",
133143
},
134144
[LANGUAGE_PYTHON]: {
135145
[TEMPLATE_SAMPLE_APP]: "python-basic",
@@ -372,6 +382,8 @@ function printNextSteps(
372382
? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
373383
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA
374384
? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
385+
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_GEMINI_CUA
386+
? "kernel deploy index.ts --env GOOGLE_API_KEY=XXX --env OPENAI_API_KEY=XXX"
375387
: language === LANGUAGE_PYTHON &&
376388
(template === TEMPLATE_SAMPLE_APP ||
377389
template === TEMPLATE_ADVANCED_SAMPLE)
@@ -415,7 +427,7 @@ program
415427
)
416428
.option(
417429
"-t, --template <template>",
418-
`Template type (${TEMPLATE_SAMPLE_APP}, ${TEMPLATE_BROWSER_USE}, ${TEMPLATE_STAGEHAND}, ${TEMPLATE_ADVANCED_SAMPLE}, ${TEMPLATE_COMPUTER_USE}, ${TEMPLATE_CUA}, ${TEMPLATE_MAGNITUDE})`
430+
`Template type (${TEMPLATE_SAMPLE_APP}, ${TEMPLATE_BROWSER_USE}, ${TEMPLATE_STAGEHAND}, ${TEMPLATE_ADVANCED_SAMPLE}, ${TEMPLATE_COMPUTER_USE}, ${TEMPLATE_CUA}, ${TEMPLATE_MAGNITUDE}, ${TEMPLATE_GEMINI_CUA})`
419431
)
420432
.action(
421433
async (
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Kernel TypeScript SDK + Stagehand + Gemini Computer Use Agent
2+
3+
A Kernel application that demonstrates Computer Use Agent (CUA) capabilities using Google's Gemini 2.5 model with Stagehand for browser automation.
4+
5+
## What It Does
6+
7+
This app uses [Gemini 2.5's computer use model](https://blog.google/technology/google-deepmind/gemini-computer-use-model/) capabilities to autonomously navigate websites and complete tasks. The example task searches for Kernel's company page on YCombinator and writes a blog post about their product.
8+
9+
## Setup
10+
11+
1. **Add your API keys as environment variables:**
12+
- `KERNEL_API_KEY` - Get from [Kernel dashboard](https://dashboard.onkernel.com/sign-in)
13+
- `GOOGLE_API_KEY` - Get from [Google AI Studio](https://aistudio.google.com/apikey)
14+
- `OPENAI_API_KEY` - Get from [OpenAI platform](https://platform.openai.com/api-keys)
15+
16+
## Running Locally
17+
18+
Execute the script directly with tsx:
19+
20+
```bash
21+
npx tsx index.ts
22+
```
23+
24+
This runs the agent without a Kernel invocation context and provides the browser live view URL for debugging.
25+
26+
## Deploying to Kernel
27+
28+
1. **Deploy the application:**
29+
```bash
30+
kernel deploy index.ts --env GOOGLE_API_KEY=XXX --env OPENAI_API_KEY=XXX
31+
```
32+
33+
2. **Invoke the action:**
34+
```bash
35+
kernel invoke ts-gemini-cua gemini-cua-task
36+
```
37+
38+
The action creates a Kernel-managed browser and associates it with the invocation for tracking and monitoring.
39+
40+
## Documentation
41+
42+
- [Kernel Documentation](https://docs.onkernel.com/quickstart)
43+
- [Kernel Stagehand Guide](https://www.onkernel.com/docs/integrations/stagehand)
44+
- [Gemini 2.5 Computer Use](https://blog.google/technology/google-deepmind/gemini-computer-use-model/)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Dependencies
2+
node_modules/
3+
package-lock.json
4+
5+
# TypeScript
6+
*.tsbuildinfo
7+
dist/
8+
build/
9+
10+
# Environment
11+
.env
12+
.env.local
13+
.env.*.local
14+
15+
# IDE
16+
.vscode/
17+
.idea/
18+
*.swp
19+
*.swo
20+
21+
# OS
22+
.DS_Store
23+
Thumbs.db
24+
25+
# Logs
26+
logs/
27+
*.log
28+
npm-debug.log*
29+
yarn-debug.log*
30+
yarn-error.log*
31+
32+
# Testing
33+
coverage/
34+
.nyc_output/
35+
36+
# Misc
37+
.cache/
38+
.temp/
39+
.tmp/
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import { Stagehand } from "@browserbasehq/stagehand";
2+
import { Kernel, type KernelContext } from '@onkernel/sdk';
3+
4+
const kernel = new Kernel({
5+
apiKey: process.env.KERNEL_API_KEY
6+
});
7+
8+
const app = kernel.app('ts-gemini-cua');
9+
10+
interface SearchQueryOutput {
11+
success: boolean;
12+
result: string;
13+
error?: string;
14+
}
15+
16+
// API Keys for LLM providers
17+
// - GOOGLE_API_KEY: Required for Gemini 2.5 Computer Use Agent
18+
// - OPENAI_API_KEY: Required for Stagehand's GPT-4o model
19+
// Set via environment variables or `kernel deploy <filename> --env-file .env`
20+
// See https://docs.onkernel.com/launch/deploy#environment-variables
21+
const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY;
22+
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
23+
24+
if (!OPENAI_API_KEY) {
25+
throw new Error('OPENAI_API_KEY is not set');
26+
}
27+
28+
if (!GOOGLE_API_KEY) {
29+
throw new Error('GOOGLE_API_KEY is not set');
30+
}
31+
32+
async function runStagehandTask(invocationId?: string): Promise<SearchQueryOutput> {
33+
// Executes a Computer Use Agent (CUA) task using Gemini 2.5 and Stagehand
34+
35+
const browserOptions = {
36+
stealth: true,
37+
// viewport: {
38+
// width: 1440,
39+
// height: 900,
40+
// refresh_rate: 25
41+
// },
42+
...(invocationId && { invocation_id: invocationId })
43+
};
44+
45+
const kernelBrowser = await kernel.browsers.create(browserOptions);
46+
47+
console.log("Kernel browser live view url: ", kernelBrowser.browser_live_view_url);
48+
49+
const stagehand = new Stagehand({
50+
env: "LOCAL",
51+
verbose: 1,
52+
domSettleTimeoutMs: 30_000,
53+
modelName: "gpt-4o",
54+
modelClientOptions: {
55+
apiKey: OPENAI_API_KEY
56+
},
57+
localBrowserLaunchOptions: {
58+
cdpUrl: kernelBrowser.cdp_ws_url
59+
}
60+
});
61+
await stagehand.init();
62+
63+
/////////////////////////////////////
64+
// Your Stagehand implementation here
65+
/////////////////////////////////////
66+
try {
67+
const page = stagehand.page;
68+
69+
const agent = stagehand.agent({
70+
provider: "google",
71+
model: "gemini-2.5-computer-use-preview-10-2025",
72+
instructions: `You are a helpful assistant that can use a web browser.
73+
You are currently on the following page: ${page.url()}.
74+
Do not ask follow up questions, the user will trust your judgement.`,
75+
options: {
76+
apiKey: GOOGLE_API_KEY,
77+
}
78+
});
79+
80+
// Navigate to YCombinator's website
81+
await page.goto("https://www.ycombinator.com/companies");
82+
83+
// Define the instructions for the CUA agent
84+
const instruction = "Find Kernel's company page on the YCombinator website and write a blog post about their product offering.";
85+
86+
// Execute the instruction
87+
const result = await agent.execute({
88+
instruction,
89+
maxSteps: 20,
90+
});
91+
92+
console.log("result: ", result);
93+
94+
return { success: true, result: result.message };
95+
} catch (error) {
96+
console.error(error);
97+
const errorMessage = error instanceof Error ? error.message : String(error);
98+
return { success: false, result: "", error: errorMessage };
99+
} finally {
100+
console.log("Deleting browser and closing stagehand...");
101+
await stagehand.close();
102+
await kernel.browsers.deleteByID(kernelBrowser.session_id);
103+
}
104+
}
105+
106+
// Register Kernel action handler for remote invocation
107+
// Invoked via: kernel invoke ts-gemini-cua gemini-cua-task
108+
app.action<void, SearchQueryOutput>(
109+
'gemini-cua-task',
110+
async (ctx: KernelContext): Promise<SearchQueryOutput> => {
111+
return runStagehandTask(ctx.invocation_id);
112+
},
113+
);
114+
115+
// Run locally if executed directly (not imported as a module)
116+
// Execute via: npx tsx index.ts
117+
if (import.meta.url === `file://${process.argv[1]}`) {
118+
runStagehandTask().then(result => {
119+
console.log('Local execution result:', result);
120+
process.exit(result.success ? 0 : 1);
121+
}).catch(error => {
122+
console.error('Local execution failed:', error);
123+
process.exit(1);
124+
});
125+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"name": "ts-gemini-cua",
3+
"module": "index.ts",
4+
"type": "module",
5+
"private": true,
6+
"peerDependencies": {
7+
"typescript": "^5"
8+
},
9+
"dependencies": {
10+
"@browserbasehq/stagehand": "^2.5.2",
11+
"@onkernel/sdk": "^0.15.0",
12+
"zod": "^3.25.67"
13+
}
14+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"compilerOptions": {
3+
"lib": ["ESNext", "DOM"],
4+
"target": "ESNext",
5+
"module": "ESNext",
6+
"moduleDetection": "force",
7+
"jsx": "react-jsx",
8+
"allowJs": true,
9+
"moduleResolution": "bundler",
10+
"allowImportingTsExtensions": true,
11+
"verbatimModuleSyntax": true,
12+
"noEmit": true,
13+
"strict": true,
14+
"skipLibCheck": true,
15+
"noFallthroughCasesInSwitch": true,
16+
"noUncheckedIndexedAccess": true,
17+
"noUnusedLocals": false,
18+
"noUnusedParameters": false,
19+
"noPropertyAccessFromIndexSignature": false
20+
},
21+
"include": ["./**/*.ts", "./**/*.tsx"],
22+
"exclude": ["node_modules", "dist"]
23+
}

0 commit comments

Comments
 (0)