From d632e112f3ad93e3c901b349bfbf60df73246063 Mon Sep 17 00:00:00 2001 From: Mish <10400064+mishushakov@users.noreply.github.com> Date: Tue, 11 Mar 2025 21:32:19 +0100 Subject: [PATCH 1/6] added E2B Desktop --- .env.example | 2 + README.md | 2 + agent/agent.py | 2 +- cli.py | 3 ++ computers/__init__.py | 1 + computers/e2b.py | 92 +++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + simple_cua_loop.py | 2 +- 8 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 computers/e2b.py diff --git a/.env.example b/.env.example index 43032b5..cf537ef 100644 --- a/.env.example +++ b/.env.example @@ -6,4 +6,6 @@ OPENAI_ORG = "org-123" BROWSERBASE_API_KEY="00000000-0000-0000-0000-000000000000" BROWSERBASE_PROJECT_ID="bb_live_00000000-00000" +E2B_API_KEY="e2b_key" + SCRAPYBARA_API_KEY="scrapy-123" \ No newline at end of file diff --git a/README.md b/README.md index 8795b43..be84608 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ Other included sample [computer environments](#computer-environments): - [Docker](https://docker.com/) (containerized desktop) - [Browserbase](https://www.browserbase.com/) (remote browser, requires account) +- [E2B](https://e2b.dev) (remote computer, requires account) - [Scrapybara](https://scrapybara.com) (remote browser or computer, requires account) - ...or implement your own `Computer`! @@ -91,6 +92,7 @@ This sample app provides a set of implemented `Computer` examples, but feel free | `LocalPlaywright` | local-playwright | `browser` | Local browser window | [Playwright SDK](https://playwright.dev/) | | `Docker` | docker | `linux` | Docker container environment | [Docker](https://docs.docker.com/engine/install/) running | | `Browserbase` | browserbase | `browser` | Remote browser environment | [Browserbase](https://www.browserbase.com/) API key in `.env` | +| `E2B` | e2b | `linux` | Remote desktop environment | [E2B](https://e2b.dev) API key in `.env` | | `ScrapybaraBrowser` | scrapybara-browser | `browser` | Remote browser environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | | `ScrapybaraUbuntu` | scrapybara-ubuntu | `linux` | Remote Ubuntu desktop environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | diff --git a/agent/agent.py b/agent/agent.py index 47eab67..2514c24 100644 --- a/agent/agent.py +++ b/agent/agent.py @@ -19,7 +19,7 @@ class Agent: def __init__( self, - model="computer-use-preview-2025-02-04", + model="computer-use-preview", computer: Computer = None, tools: list[dict] = [], acknowledge_safety_check_callback: Callable = lambda: False, diff --git a/cli.py b/cli.py index 15bc37b..e705079 100644 --- a/cli.py +++ b/cli.py @@ -2,6 +2,7 @@ from agent.agent import Agent from computers import ( BrowserbaseBrowser, + E2BDesktop, ScrapybaraBrowser, ScrapybaraUbuntu, LocalPlaywrightComputer, @@ -26,6 +27,7 @@ def main(): "local-playwright", "docker", "browserbase", + "e2b", "scrapybara-browser", "scrapybara-ubuntu", ], @@ -60,6 +62,7 @@ def main(): "local-playwright": LocalPlaywrightComputer, "docker": DockerComputer, "browserbase": BrowserbaseBrowser, + "e2b": E2BDesktop, "scrapybara-browser": ScrapybaraBrowser, "scrapybara-ubuntu": ScrapybaraUbuntu, } diff --git a/computers/__init__.py b/computers/__init__.py index 606332e..99e5e8f 100644 --- a/computers/__init__.py +++ b/computers/__init__.py @@ -1,5 +1,6 @@ from .computer import Computer from .browserbase import BrowserbaseBrowser +from .e2b import E2BDesktop from .local_playwright import LocalPlaywrightComputer from .docker import DockerComputer from .scrapybara import ScrapybaraBrowser, ScrapybaraUbuntu diff --git a/computers/e2b.py b/computers/e2b.py new file mode 100644 index 0000000..2fe508e --- /dev/null +++ b/computers/e2b.py @@ -0,0 +1,92 @@ +import base64 +from e2b_desktop import Sandbox + +cua_e2b_key_mapping = { + "ENTER": "Return", + "LEFT": "Left", + "RIGHT": "Right", + "UP": "Up", + "DOWN": "Down", + "ESC": "Escape", + "SPACE": "space", + "BACKSPACE": "BackSpace", + "TAB": "Tab", +} + +class E2BDesktop: + """ + E2B Desktop is a desktop environment in the cloud. + """ + + def __init__(self): + self.environment = "linux" # "windows", "mac", "linux", or "browser" + self.dimensions = (1024, 768) + + def __enter__(self): + print("Starting E2B Desktop Sandbox") + self.sandbox = Sandbox( + resolution=self.dimensions, + timeout=300, + ) + + print(f"Started E2B Desktop Sandbox with id '{self.sandbox.sandbox_id}'") + + print("Starting Desktop Stream") + self.sandbox.stream.start(require_auth=True) + + stream_url = self.sandbox.stream.get_url(auth_key=self.sandbox.stream.get_auth_key()) + print(f"Desktop Stream is running at {stream_url}") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.sandbox.kill() + + def screenshot(self) -> str: + screenshot = self.sandbox.screenshot() + base64_image = base64.b64encode(screenshot).decode("utf-8") + return base64_image + + def click(self, x: int, y: int, button: str = "left") -> None: + match button: + case "left": + self.sandbox.move_mouse(x, y) + self.sandbox.left_click() + case "right": + self.sandbox.move_mouse(x, y) + self.sandbox.right_click() + case "middle": + self.sandbox.move_mouse(x, y) + self.sandbox.middle_click() + + def double_click(self, x: int, y: int) -> None: + self.sandbox.move_mouse(x, y) + self.sandbox.double_click() + + def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + self.sandbox.move_mouse(x, y) + self.sandbox.scroll(scroll_x, scroll_y) + + def type(self, text: str) -> None: + self.sandbox.write(text) + + def wait(self, ms: int = 1000) -> None: + self.sandbox.wait(ms) + + def move(self, x: int, y: int) -> None: + self.sandbox.move_mouse(x, y) + + def keypress(self, keys: list[str]) -> None: + # convert keys to e2b desktop keys + mapped_keys = [cua_e2b_key_mapping.get(key, key) for key in keys] + self.sandbox.press(mapped_keys) + + def drag(self, path: list[dict[str, int]]) -> None: + if not path: + return + start_x = path[0]["x"] + start_y = path[0]["y"] + + end_x = path[-1]["x"] + end_y = path[-1]["y"] + + self.sandbox.drag((start_x, start_y), (end_x, end_y)) diff --git a/requirements.txt b/requirements.txt index 13769fb..1afe9b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ scrapybara>=2.3.6 sniffio==1.3.1 typing_extensions==4.12.2 urllib3==2.3.0 +e2b_desktop==1.4.0 \ No newline at end of file diff --git a/simple_cua_loop.py b/simple_cua_loop.py index 1d66bfe..1bbb0fc 100644 --- a/simple_cua_loop.py +++ b/simple_cua_loop.py @@ -72,7 +72,7 @@ def main(): while True: # keep looping until we get a final response response = create_response( - model="computer-use-preview-2025-02-04", + model="computer-use-preview", input=items, tools=tools, truncation="auto", From f0fae4480f2d9868133f7e9a893c8d2b695ceace Mon Sep 17 00:00:00 2001 From: Mish <10400064+mishushakov@users.noreply.github.com> Date: Wed, 23 Apr 2025 17:14:58 +0200 Subject: [PATCH 2/6] added E2B Desktop --- .env.example | 2 + README.md | 2 + computers/contrib/__init__.py | 1 + computers/contrib/e2b.py | 75 +++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 5 files changed, 81 insertions(+) create mode 100644 computers/contrib/e2b.py diff --git a/.env.example b/.env.example index 43032b5..cf537ef 100644 --- a/.env.example +++ b/.env.example @@ -6,4 +6,6 @@ OPENAI_ORG = "org-123" BROWSERBASE_API_KEY="00000000-0000-0000-0000-000000000000" BROWSERBASE_PROJECT_ID="bb_live_00000000-00000" +E2B_API_KEY="e2b_key" + SCRAPYBARA_API_KEY="scrapy-123" \ No newline at end of file diff --git a/README.md b/README.md index 5dd1ac4..5d9a9a5 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ Other included sample [computer environments](#computer-environments): - [Docker](https://docker.com/) (containerized desktop) - [Browserbase](https://www.browserbase.com/) (remote browser, requires account) +- [E2B](https://e2b.dev) (remote computer, open-source, requires account) - [Scrapybara](https://scrapybara.com) (remote browser or computer, requires account) - ...or implement your own `Computer`! @@ -94,6 +95,7 @@ This sample app provides a set of implemented `Computer` examples, but feel free | `LocalPlaywright` | local-playwright | `browser` | Local browser window | [Playwright SDK](https://playwright.dev/) | | `Docker` | docker | `linux` | Docker container environment | [Docker](https://docs.docker.com/engine/install/) running | | `Browserbase` | browserbase | `browser` | Remote browser environment | [Browserbase](https://www.browserbase.com/) API key in `.env` | +| `E2B` | e2b | `linux` | Open-source desktop environment | [E2B](https://e2b.dev) API key in `.env` | | `ScrapybaraBrowser` | scrapybara-browser | `browser` | Remote browser environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | | `ScrapybaraUbuntu` | scrapybara-ubuntu | `linux` | Remote Ubuntu desktop environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | diff --git a/computers/contrib/__init__.py b/computers/contrib/__init__.py index e69de29..e7395fd 100644 --- a/computers/contrib/__init__.py +++ b/computers/contrib/__init__.py @@ -0,0 +1 @@ +from .e2b import E2BDesktop diff --git a/computers/contrib/e2b.py b/computers/contrib/e2b.py new file mode 100644 index 0000000..379d76e --- /dev/null +++ b/computers/contrib/e2b.py @@ -0,0 +1,75 @@ +import base64 +from e2b_desktop import Sandbox + +class E2BDesktop: + """ + E2B Desktop is an open-source desktop environment for AI Agents. + You can get started for free at https://e2b.dev or read our docs at https://docs.e2b.dev + """ + + def __init__(self): + self.environment = "linux" # "windows", "mac", "linux", or "browser" + self.dimensions = (1024, 768) + + def __enter__(self): + print("Starting E2B Desktop Sandbox") + self.sandbox = Sandbox( + resolution=self.dimensions, + timeout=300, + ) + + print(f"Started E2B Desktop Sandbox with id '{self.sandbox.sandbox_id}'") + + print("Starting Desktop Stream") + self.sandbox.stream.start(require_auth=True) + + stream_url = self.sandbox.stream.get_url(auth_key=self.sandbox.stream.get_auth_key()) + print(f"Desktop Stream is running at {stream_url}") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.sandbox.kill() + + def screenshot(self) -> str: + screenshot = self.sandbox.screenshot() + base64_image = base64.b64encode(screenshot).decode("utf-8") + return base64_image + + def click(self, x: int, y: int, button: str = "left") -> None: + match button: + case "left": + self.sandbox.left_click(x, y) + case "right": + self.sandbox.right_click(x, y) + case "middle": + self.sandbox.middle_click(x, y) + + def double_click(self, x: int, y: int) -> None: + self.sandbox.double_click(x, y) + + def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + self.sandbox.move_mouse(x, y) + self.sandbox.scroll(scroll_x, scroll_y) + + def type(self, text: str) -> None: + self.sandbox.write(text) + + def wait(self, ms: int = 1000) -> None: + self.sandbox.wait(ms) + + def move(self, x: int, y: int) -> None: + self.sandbox.move_mouse(x, y) + + def keypress(self, keys: list[str]) -> None: + self.sandbox.press(keys) + + def drag(self, path: list[dict[str, int]]) -> None: + if not path: + return + start_x = path[0]["x"] + start_y = path[0]["y"] + + end_x = path[-1]["x"] + end_y = path[-1]["y"] + + self.sandbox.drag((start_x, start_y), (end_x, end_y)) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 13769fb..6441d97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ scrapybara>=2.3.6 sniffio==1.3.1 typing_extensions==4.12.2 urllib3==2.3.0 +e2b_desktop==1.5.2 \ No newline at end of file From a68ab183e6a76a1844c967d5e5a4e78460643688 Mon Sep 17 00:00:00 2001 From: Mish <10400064+mishushakov@users.noreply.github.com> Date: Mon, 19 May 2025 17:55:25 +0200 Subject: [PATCH 3/6] updated e2b --- computers/config.py | 11 +++++++++++ computers/contrib/e2b.py | 7 +++++-- requirements.txt | 2 +- 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 computers/config.py diff --git a/computers/config.py b/computers/config.py new file mode 100644 index 0000000..9144947 --- /dev/null +++ b/computers/config.py @@ -0,0 +1,11 @@ +from .default import * +from .contrib import * + +computers_config = { + "local-playwright": LocalPlaywrightBrowser, + "docker": DockerComputer, + "browserbase": BrowserbaseBrowser, + "scrapybara-browser": ScrapybaraBrowser, + "scrapybara-ubuntu": ScrapybaraUbuntu, + "e2b": E2BDesktop, +} \ No newline at end of file diff --git a/computers/contrib/e2b.py b/computers/contrib/e2b.py index 379d76e..1ee38e3 100644 --- a/computers/contrib/e2b.py +++ b/computers/contrib/e2b.py @@ -31,7 +31,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.sandbox.kill() def screenshot(self) -> str: - screenshot = self.sandbox.screenshot() + screenshot = self.sandbox.screenshot(format="bytes") base64_image = base64.b64encode(screenshot).decode("utf-8") return base64_image @@ -49,7 +49,10 @@ def double_click(self, x: int, y: int) -> None: def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: self.sandbox.move_mouse(x, y) - self.sandbox.scroll(scroll_x, scroll_y) + if scroll_y < 0: + self.sandbox.scroll("up", abs(scroll_y)) + elif scroll_y > 0: + self.sandbox.scroll("down", scroll_y) def type(self, text: str) -> None: self.sandbox.write(text) diff --git a/requirements.txt b/requirements.txt index 90221b9..1d5c0e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,4 @@ scrapybara>=2.3.6 sniffio==1.3.1 typing_extensions==4.12.2 urllib3==2.3.0 -e2b_desktop==1.5.2 +e2b_desktop==1.7.1 From 5a2790493310d9452834a9ff76cc6b75be77ced2 Mon Sep 17 00:00:00 2001 From: Mish <10400064+mishushakov@users.noreply.github.com> Date: Mon, 19 May 2025 17:56:18 +0200 Subject: [PATCH 4/6] updated e2b --- computers/config.py | 11 +++++++++++ computers/contrib/e2b.py | 7 +++++-- requirements.txt | 2 +- 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 computers/config.py diff --git a/computers/config.py b/computers/config.py new file mode 100644 index 0000000..9144947 --- /dev/null +++ b/computers/config.py @@ -0,0 +1,11 @@ +from .default import * +from .contrib import * + +computers_config = { + "local-playwright": LocalPlaywrightBrowser, + "docker": DockerComputer, + "browserbase": BrowserbaseBrowser, + "scrapybara-browser": ScrapybaraBrowser, + "scrapybara-ubuntu": ScrapybaraUbuntu, + "e2b": E2BDesktop, +} \ No newline at end of file diff --git a/computers/contrib/e2b.py b/computers/contrib/e2b.py index 379d76e..1ee38e3 100644 --- a/computers/contrib/e2b.py +++ b/computers/contrib/e2b.py @@ -31,7 +31,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.sandbox.kill() def screenshot(self) -> str: - screenshot = self.sandbox.screenshot() + screenshot = self.sandbox.screenshot(format="bytes") base64_image = base64.b64encode(screenshot).decode("utf-8") return base64_image @@ -49,7 +49,10 @@ def double_click(self, x: int, y: int) -> None: def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: self.sandbox.move_mouse(x, y) - self.sandbox.scroll(scroll_x, scroll_y) + if scroll_y < 0: + self.sandbox.scroll("up", abs(scroll_y)) + elif scroll_y > 0: + self.sandbox.scroll("down", scroll_y) def type(self, text: str) -> None: self.sandbox.write(text) diff --git a/requirements.txt b/requirements.txt index 90221b9..1d5c0e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,4 @@ scrapybara>=2.3.6 sniffio==1.3.1 typing_extensions==4.12.2 urllib3==2.3.0 -e2b_desktop==1.5.2 +e2b_desktop==1.7.1 From a77034a60c21453627ab2f15b34770aa357fab31 Mon Sep 17 00:00:00 2001 From: Mish <10400064+mishushakov@users.noreply.github.com> Date: Mon, 19 May 2025 17:57:40 +0200 Subject: [PATCH 5/6] cli --- cli.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/cli.py b/cli.py index 4f22bab..a96595e 100644 --- a/cli.py +++ b/cli.py @@ -1,19 +1,8 @@ import argparse from agent.agent import Agent -<<<<<<< HEAD from computers.config import * from computers.default import * from computers import computers_config -======= -from computers import ( - BrowserbaseBrowser, - E2BDesktop, - ScrapybaraBrowser, - ScrapybaraUbuntu, - LocalPlaywrightComputer, - DockerComputer, -) ->>>>>>> d632e112f3ad93e3c901b349bfbf60df73246063 def acknowledge_safety_check_callback(message: str) -> bool: @@ -29,18 +18,7 @@ def main(): ) parser.add_argument( "--computer", -<<<<<<< HEAD choices=computers_config.keys(), -======= - choices=[ - "local-playwright", - "docker", - "browserbase", - "e2b", - "scrapybara-browser", - "scrapybara-ubuntu", - ], ->>>>>>> d632e112f3ad93e3c901b349bfbf60df73246063 help="Choose the computer environment to use.", default="local-playwright", ) @@ -67,21 +45,7 @@ def main(): default="https://bing.com", ) args = parser.parse_args() -<<<<<<< HEAD ComputerClass = computers_config[args.computer] -======= - - computer_mapping = { - "local-playwright": LocalPlaywrightComputer, - "docker": DockerComputer, - "browserbase": BrowserbaseBrowser, - "e2b": E2BDesktop, - "scrapybara-browser": ScrapybaraBrowser, - "scrapybara-ubuntu": ScrapybaraUbuntu, - } - - ComputerClass = computer_mapping[args.computer] ->>>>>>> d632e112f3ad93e3c901b349bfbf60df73246063 with ComputerClass() as computer: agent = Agent( From 2c037eb00a39f0241f73acddadcce28225160e72 Mon Sep 17 00:00:00 2001 From: Mish <10400064+mishushakov@users.noreply.github.com> Date: Sat, 24 May 2025 19:17:39 +0200 Subject: [PATCH 6/6] updated the e2b computer --- computers/contrib/e2b.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/computers/contrib/e2b.py b/computers/contrib/e2b.py index 1ee38e3..91346f9 100644 --- a/computers/contrib/e2b.py +++ b/computers/contrib/e2b.py @@ -1,5 +1,6 @@ import base64 from e2b_desktop import Sandbox +from typing import Literal class E2BDesktop: """ @@ -8,8 +9,9 @@ class E2BDesktop: """ def __init__(self): - self.environment = "linux" # "windows", "mac", "linux", or "browser" + self.environment: Literal["windows", "mac", "linux", "browser"] = "linux" # "windows", "mac", "linux", or "browser" self.dimensions = (1024, 768) + self.stream_url: str | None = None def __enter__(self): print("Starting E2B Desktop Sandbox") @@ -20,11 +22,8 @@ def __enter__(self): print(f"Started E2B Desktop Sandbox with id '{self.sandbox.sandbox_id}'") - print("Starting Desktop Stream") self.sandbox.stream.start(require_auth=True) - stream_url = self.sandbox.stream.get_url(auth_key=self.sandbox.stream.get_auth_key()) - print(f"Desktop Stream is running at {stream_url}") return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -75,4 +74,13 @@ def drag(self, path: list[dict[str, int]]) -> None: end_x = path[-1]["x"] end_y = path[-1]["y"] - self.sandbox.drag((start_x, start_y), (end_x, end_y)) \ No newline at end of file + self.sandbox.drag((start_x, start_y), (end_x, end_y)) + + def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: + return self.environment + + def get_dimensions(self) -> tuple[int, int]: + return self.dimensions + + def get_current_url(self) -> str: + return self.sandbox.stream.get_url(auth_key=self.sandbox.stream.get_auth_key()) \ No newline at end of file