Skip to content

Commit 317925e

Browse files
raiden-stagingraidendotai
andauthored
ts-cua : lint + openai sdk types ; python-cua version bumps (#40)
* ts-cua sample added * python/browser-use : fixed viewport & window resize * ts-cua updates+ * kernel sdk version in package * kernel sdk versions * * cleanup * added python-cua + updated readme & cli * ts-cua: lint & openai types update * ts-cua: lint & openai types update * * ts-cua: lint & openai types update ** * python-cua: requirements versions * python-cua: requirements versions * * cli : payload examples fixes --------- Co-authored-by: raidendotai <railway@raiden.ai>
1 parent 60a2f40 commit 317925e

File tree

16 files changed

+788
-847
lines changed

16 files changed

+788
-847
lines changed

index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ const INVOKE_SAMPLES: Record<
9696
[TEMPLATE_COMPUTER_USE]:
9797
'kernel invoke ts-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
9898
[TEMPLATE_CUA]:
99-
'kernel invoke ts-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
99+
'kernel invoke ts-cua cua-task --payload \'{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
100100
},
101101
[LANGUAGE_PYTHON]: {
102102
[TEMPLATE_SAMPLE_APP]:
@@ -108,7 +108,7 @@ const INVOKE_SAMPLES: Record<
108108
[TEMPLATE_COMPUTER_USE]:
109109
'kernel invoke python-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
110110
[TEMPLATE_CUA]:
111-
'kernel invoke python-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
111+
'kernel invoke python-cua cua-task --payload \'{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
112112
},
113113
};
114114

templates/python/cua/pyproject.toml

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,25 @@ readme = "README.md"
66
requires-python = ">=3.11"
77
dependencies = [
88
"annotated-types==0.7.0",
9-
"anyio==4.8.0",
10-
"certifi==2025.1.31",
11-
"charset-normalizer==3.4.1",
9+
"anyio==4.9.0",
10+
"certifi==2025.6.15",
11+
"charset-normalizer==3.4.2",
1212
"distro==1.9.0",
13-
"greenlet==3.1.1",
14-
"h11==0.14.0",
15-
"httpcore==1.0.7",
13+
"greenlet==3.2.3",
14+
"h11==0.16.0",
15+
"httpcore==1.0.9",
1616
"httpx==0.28.1",
1717
"idna==3.10",
18-
"jiter==0.8.2",
19-
"pillow==11.1.0",
20-
"playwright==1.50.0",
21-
"pydantic==2.10.6",
22-
"pydantic_core==2.27.2",
23-
"pyee==12.1.1",
24-
"python-dotenv==1.0.1",
25-
"requests==2.32.3",
18+
"jiter==0.10.0",
19+
"pillow==11.2.1",
20+
"kernel>=0.6.0",
21+
"playwright==1.52.0",
22+
"pydantic==2.11.7",
23+
"pydantic_core==2.35.1",
24+
"pyee==13.0.0",
25+
"python-dotenv==1.1.0",
26+
"requests==2.32.4",
2627
"sniffio==1.3.1",
27-
"typing_extensions==4.12.2",
28-
"urllib3==2.3.0",
28+
"typing_extensions==4.14.0",
29+
"urllib3==2.5.0",
2930
]
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
OPENAI_API_KEY=YOUR_OPENAI_API_KEY
2+
# KERNEL_API_KEY=YOUR_KERNEL_KEY

templates/typescript/cua/.prettierrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
"singleQuote": true,
55
"printWidth": 100,
66
"tabWidth": 2
7-
}
7+
}

templates/typescript/cua/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ This is a Kernel application that demonstrates using the Computer Using Agent (C
55
It generally follows the [OpenAI CUA Sample App Reference](https://github.com/openai/openai-cua-sample-app) and uses Playwright via Kernel for browser automation.
66
Also makes use of the latest OpenAI SDK format, and has local equivalent to Kernel methods for local testing before deploying on Kernel.
77

8-
See the [docs](https://docs.onkernel.com/quickstart) for information.
8+
See the [docs](https://docs.onkernel.com/quickstart) for information.

templates/typescript/cua/index.ts

Lines changed: 81 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,24 @@
1-
import "dotenv/config";
2-
import { Kernel, type KernelContext } from "@onkernel/sdk";
3-
import { Agent } from "./lib/agent";
4-
import computers from "./lib/computers";
1+
import 'dotenv/config';
2+
import { Kernel, type KernelContext } from '@onkernel/sdk';
3+
import { Agent } from './lib/agent';
4+
import computers from './lib/computers';
5+
import type { ResponseOutputMessage, ResponseItem } from 'openai/resources/responses/responses';
6+
7+
interface CuaInput {
8+
task: string;
9+
}
10+
interface CuaOutput {
11+
elapsed: number;
12+
answer: string | null;
13+
logs?: ResponseItem[];
14+
}
515

616
const kernel = new Kernel();
7-
const app = kernel.app("ts-cua");
17+
const app = kernel.app('ts-cua');
818

9-
// LLM API Keys are set in the environment during `kernel deploy <filename> -e ANTHROPIC_API_KEY=XXX`
10-
// See https://docs.onkernel.com/launch/deploy#environment-variables
11-
if (!process.env.OPENAI_API_KEY) throw new Error('OPENAI_API_KEY is not set');
19+
if (!process.env.OPENAI_API_KEY) {
20+
throw new Error('OPENAI_API_KEY is not set');
21+
}
1222

1323
/**
1424
* Example app that run an agent using openai CUA
@@ -24,88 +34,74 @@ if (!process.env.OPENAI_API_KEY) throw new Error('OPENAI_API_KEY is not set');
2434
* kernel logs ts-cua -f # Open in separate tab
2535
*/
2636

27-
interface CuaInput {
28-
task: string;
29-
}
30-
31-
interface CuaOutput {
32-
elapsed: number;
33-
response?: Array<object>;
34-
answer: object;
35-
}
36-
3737
app.action<CuaInput, CuaOutput>(
38-
"cua-task",
39-
async (ctx: KernelContext, payload?: CuaInput): Promise<CuaOutput> => {
40-
const startTime = Date.now();
41-
const kernelBrowser = await kernel.browsers.create({
42-
invocation_id: ctx.invocation_id,
43-
});
44-
console.log(
45-
"> Kernel browser live view url: ",
46-
kernelBrowser.browser_live_view_url,
47-
);
48-
49-
if (!payload?.task){
50-
throw new Error('task is required');
51-
}
52-
53-
try {
38+
'cua-task',
39+
async (ctx: KernelContext, payload?: CuaInput): Promise<CuaOutput> => {
40+
const start = Date.now();
41+
if (!payload?.task) throw new Error('task is required');
5442

55-
// kernel browser
56-
const { computer } = await computers.create({
57-
type: "kernel", // for local testing before deploying to Kernel, you can use type: "local"
58-
cdp_ws_url: kernelBrowser.cdp_ws_url,
59-
});
43+
try {
44+
const kb = await kernel.browsers.create({ invocation_id: ctx.invocation_id });
45+
console.log('> Kernel browser live view url:', kb.browser_live_view_url);
6046

61-
// setup agent
62-
const agent = new Agent({
63-
model: "computer-use-preview",
64-
computer,
65-
tools: [], // additional function_call tools to provide to the llm
66-
acknowledge_safety_check_callback: (message: string) => {
67-
console.log(`> safety check: ${message}`);
68-
return true; // Auto-acknowledge all safety checks for testing
69-
},
70-
});
47+
const { computer } = await computers.create({ type: 'kernel', cdp_ws_url: kb.cdp_ws_url });
48+
const agent = new Agent({
49+
model: 'computer-use-preview',
50+
computer,
51+
tools: [],
52+
acknowledge_safety_check_callback: (m: string): boolean => {
53+
console.log(`> safety check: ${m}`);
54+
return true;
55+
},
56+
});
7157

72-
// start agent run
73-
const response = await agent.runFullTurn({
74-
messages: [
75-
{
76-
role: "system",
77-
content: `- Current date and time: ${new Date().toISOString()} (${new Date().toLocaleDateString("en-US", { weekday: "long" })})`,
78-
},
79-
{
80-
type: "message",
81-
role: "user",
82-
content: [
83-
{
84-
type: "input_text",
85-
text: payload.task,
86-
// text: "go to https://news.ycombinator.com , open top article , describe the target website design (in yaml format)"
87-
},
88-
],
89-
},
90-
],
91-
print_steps: true, // log function_call and computer_call actions
92-
debug: true, // show agent debug logs (llm messages and responses)
93-
show_images: false, // if set to true, response messages stack will return base64 images (webp format) of screenshots, if false, replaced with "[omitted]""
94-
});
58+
// run agent and get response
59+
const logs = await agent.runFullTurn({
60+
messages: [
61+
{
62+
role: 'system',
63+
content: `- Current date and time: ${new Date().toISOString()} (${new Date().toLocaleDateString(
64+
'en-US',
65+
{ weekday: 'long' },
66+
)})`,
67+
},
68+
{
69+
type: 'message',
70+
role: 'user',
71+
content: [{ type: 'input_text', text: payload.task }],
72+
},
73+
],
74+
print_steps: true,
75+
debug: true,
76+
show_images: false,
77+
});
9578

96-
console.log("> agent run done");
79+
const elapsed = parseFloat(((Date.now() - start) / 1000).toFixed(2));
9780

98-
const endTime = Date.now();
99-
const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
81+
// filter only LLM messages
82+
const messages = logs.filter(
83+
(item): item is ResponseOutputMessage =>
84+
item.type === 'message' &&
85+
typeof (item as ResponseOutputMessage).role === 'string' &&
86+
Array.isArray((item as ResponseOutputMessage).content),
87+
);
88+
const assistant = messages.find((m) => m.role === 'assistant');
89+
const lastContentIndex = assistant?.content?.length ? assistant.content.length - 1 : -1;
90+
const lastContent = lastContentIndex >= 0 ? assistant?.content?.[lastContentIndex] : null;
91+
const answer = lastContent && 'text' in lastContent ? lastContent.text : null;
10092

101-
return {
102-
// response, // full messages stack trace
103-
elapsed: parseFloat(timeElapsed.toFixed(2)),
104-
answer: response?.slice(-1)?.[0]?.content?.[0]?.text ?? null,
105-
};
106-
} finally {
107-
// Note: KernelPlaywrightComputer handles browser cleanup internally
108-
// No need to manually close browser here
109-
}
110-
},
93+
return {
94+
// logs, // optionally, get the full agent run messages logs
95+
elapsed,
96+
answer,
97+
};
98+
} catch (error) {
99+
const elapsed = parseFloat(((Date.now() - start) / 1000).toFixed(2));
100+
console.error('Error in cua-task:', error);
101+
return {
102+
elapsed,
103+
answer: null,
104+
};
105+
}
106+
},
111107
);

0 commit comments

Comments
 (0)