|
| 1 | +import { Stagehand } from "@browserbasehq/stagehand"; |
| 2 | +import { Kernel, type KernelContext } from '@onkernel/sdk'; |
| 3 | + |
| 4 | +const kernel = new Kernel({ |
| 5 | + apiKey: process.env.KERNEL_API_KEY |
| 6 | +}); |
| 7 | + |
| 8 | +const app = kernel.app('ts-gemini-cua'); |
| 9 | + |
| 10 | +interface SearchQueryOutput { |
| 11 | + success: boolean; |
| 12 | + result: string; |
| 13 | + error?: string; |
| 14 | +} |
| 15 | + |
| 16 | +// API Keys for LLM providers |
| 17 | +// - GOOGLE_API_KEY: Required for Gemini 2.5 Computer Use Agent |
| 18 | +// - OPENAI_API_KEY: Required for Stagehand's GPT-4o model |
| 19 | +// Set via environment variables or `kernel deploy <filename> --env-file .env` |
| 20 | +// See https://docs.onkernel.com/launch/deploy#environment-variables |
| 21 | +const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY; |
| 22 | +const OPENAI_API_KEY = process.env.OPENAI_API_KEY; |
| 23 | + |
| 24 | +if (!OPENAI_API_KEY) { |
| 25 | + throw new Error('OPENAI_API_KEY is not set'); |
| 26 | +} |
| 27 | + |
| 28 | +if (!GOOGLE_API_KEY) { |
| 29 | + throw new Error('GOOGLE_API_KEY is not set'); |
| 30 | +} |
| 31 | + |
| 32 | +async function runStagehandTask(invocationId?: string): Promise<SearchQueryOutput> { |
| 33 | + // Executes a Computer Use Agent (CUA) task using Gemini 2.5 and Stagehand |
| 34 | + |
| 35 | + const browserOptions = { |
| 36 | + stealth: true, |
| 37 | + // viewport: { |
| 38 | + // width: 1440, |
| 39 | + // height: 900, |
| 40 | + // refresh_rate: 25 |
| 41 | + // }, |
| 42 | + ...(invocationId && { invocation_id: invocationId }) |
| 43 | + }; |
| 44 | + |
| 45 | + const kernelBrowser = await kernel.browsers.create(browserOptions); |
| 46 | + |
| 47 | + console.log("Kernel browser live view url: ", kernelBrowser.browser_live_view_url); |
| 48 | + |
| 49 | + const stagehand = new Stagehand({ |
| 50 | + env: "LOCAL", |
| 51 | + verbose: 1, |
| 52 | + domSettleTimeoutMs: 30_000, |
| 53 | + modelName: "gpt-4o", |
| 54 | + modelClientOptions: { |
| 55 | + apiKey: OPENAI_API_KEY |
| 56 | + }, |
| 57 | + localBrowserLaunchOptions: { |
| 58 | + cdpUrl: kernelBrowser.cdp_ws_url |
| 59 | + } |
| 60 | + }); |
| 61 | + await stagehand.init(); |
| 62 | + |
| 63 | + ///////////////////////////////////// |
| 64 | + // Your Stagehand implementation here |
| 65 | + ///////////////////////////////////// |
| 66 | + try { |
| 67 | + const page = stagehand.page; |
| 68 | + |
| 69 | + const agent = stagehand.agent({ |
| 70 | + provider: "google", |
| 71 | + model: "gemini-2.5-computer-use-preview-10-2025", |
| 72 | + instructions: `You are a helpful assistant that can use a web browser. |
| 73 | + You are currently on the following page: ${page.url()}. |
| 74 | + Do not ask follow up questions, the user will trust your judgement.`, |
| 75 | + options: { |
| 76 | + apiKey: GOOGLE_API_KEY, |
| 77 | + } |
| 78 | + }); |
| 79 | + |
| 80 | + // Navigate to YCombinator's website |
| 81 | + await page.goto("https://www.ycombinator.com/companies"); |
| 82 | + |
| 83 | + // Define the instructions for the CUA agent |
| 84 | + const instruction = "Find Kernel's company page on the YCombinator website and write a blog post about their product offering."; |
| 85 | + |
| 86 | + // Execute the instruction |
| 87 | + const result = await agent.execute({ |
| 88 | + instruction, |
| 89 | + maxSteps: 20, |
| 90 | + }); |
| 91 | + |
| 92 | + console.log("result: ", result); |
| 93 | + |
| 94 | + return { success: true, result: result.message }; |
| 95 | + } catch (error) { |
| 96 | + console.error(error); |
| 97 | + const errorMessage = error instanceof Error ? error.message : String(error); |
| 98 | + return { success: false, result: "", error: errorMessage }; |
| 99 | + } finally { |
| 100 | + console.log("Deleting browser and closing stagehand..."); |
| 101 | + await stagehand.close(); |
| 102 | + await kernel.browsers.deleteByID(kernelBrowser.session_id); |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +// Register Kernel action handler for remote invocation |
| 107 | +// Invoked via: kernel invoke ts-gemini-cua gemini-cua-task |
| 108 | +app.action<void, SearchQueryOutput>( |
| 109 | + 'gemini-cua-task', |
| 110 | + async (ctx: KernelContext): Promise<SearchQueryOutput> => { |
| 111 | + return runStagehandTask(ctx.invocation_id); |
| 112 | + }, |
| 113 | +); |
| 114 | + |
| 115 | +// Run locally if executed directly (not imported as a module) |
| 116 | +// Execute via: npx tsx index.ts |
| 117 | +if (import.meta.url === `file://${process.argv[1]}`) { |
| 118 | + runStagehandTask().then(result => { |
| 119 | + console.log('Local execution result:', result); |
| 120 | + process.exit(result.success ? 0 : 1); |
| 121 | + }).catch(error => { |
| 122 | + console.error('Local execution failed:', error); |
| 123 | + process.exit(1); |
| 124 | + }); |
| 125 | +} |
0 commit comments