diff --git a/.env.example b/.env.example index 43032b5..cf537ef 100644 --- a/.env.example +++ b/.env.example @@ -6,4 +6,6 @@ OPENAI_ORG = "org-123" BROWSERBASE_API_KEY="00000000-0000-0000-0000-000000000000" BROWSERBASE_PROJECT_ID="bb_live_00000000-00000" +E2B_API_KEY="e2b_key" + SCRAPYBARA_API_KEY="scrapy-123" \ No newline at end of file diff --git a/README.md b/README.md index 5dd1ac4..5d9a9a5 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ Other included sample [computer environments](#computer-environments): - [Docker](https://docker.com/) (containerized desktop) - [Browserbase](https://www.browserbase.com/) (remote browser, requires account) +- [E2B](https://e2b.dev) (remote computer, open-source, requires account) - [Scrapybara](https://scrapybara.com) (remote browser or computer, requires account) - ...or implement your own `Computer`! @@ -94,6 +95,7 @@ This sample app provides a set of implemented `Computer` examples, but feel free | `LocalPlaywright` | local-playwright | `browser` | Local browser window | [Playwright SDK](https://playwright.dev/) | | `Docker` | docker | `linux` | Docker container environment | [Docker](https://docs.docker.com/engine/install/) running | | `Browserbase` | browserbase | `browser` | Remote browser environment | [Browserbase](https://www.browserbase.com/) API key in `.env` | +| `E2B` | e2b | `linux` | Open-source desktop environment | [E2B](https://e2b.dev) API key in `.env` | | `ScrapybaraBrowser` | scrapybara-browser | `browser` | Remote browser environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | | `ScrapybaraUbuntu` | scrapybara-ubuntu | `linux` | Remote Ubuntu desktop environment | [Scrapybara](https://scrapybara.com/dashboard) API key in `.env` | diff --git a/computers/config.py b/computers/config.py index 699f1a8..9604298 100644 --- a/computers/config.py +++ b/computers/config.py @@ -7,4 +7,5 @@ "browserbase": BrowserbaseBrowser, "scrapybara-browser": ScrapybaraBrowser, "scrapybara-ubuntu": ScrapybaraUbuntu, + "e2b": E2BDesktop, } diff --git a/computers/contrib/__init__.py b/computers/contrib/__init__.py index e69de29..e7395fd 100644 --- a/computers/contrib/__init__.py +++ b/computers/contrib/__init__.py @@ -0,0 +1 @@ +from .e2b import E2BDesktop diff --git a/computers/contrib/e2b.py b/computers/contrib/e2b.py new file mode 100644 index 0000000..91346f9 --- /dev/null +++ b/computers/contrib/e2b.py @@ -0,0 +1,86 @@ +import base64 +from e2b_desktop import Sandbox +from typing import Literal + +class E2BDesktop: + """ + E2B Desktop is an open-source desktop environment for AI Agents. + You can get started for free at https://e2b.dev or read our docs at https://docs.e2b.dev + """ + + def __init__(self): + self.environment: Literal["windows", "mac", "linux", "browser"] = "linux" # "windows", "mac", "linux", or "browser" + self.dimensions = (1024, 768) + self.stream_url: str | None = None + + def __enter__(self): + print("Starting E2B Desktop Sandbox") + self.sandbox = Sandbox( + resolution=self.dimensions, + timeout=300, + ) + + print(f"Started E2B Desktop Sandbox with id '{self.sandbox.sandbox_id}'") + + self.sandbox.stream.start(require_auth=True) + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.sandbox.kill() + + def screenshot(self) -> str: + screenshot = self.sandbox.screenshot(format="bytes") + base64_image = base64.b64encode(screenshot).decode("utf-8") + return base64_image + + def click(self, x: int, y: int, button: str = "left") -> None: + match button: + case "left": + self.sandbox.left_click(x, y) + case "right": + self.sandbox.right_click(x, y) + case "middle": + self.sandbox.middle_click(x, y) + + def double_click(self, x: int, y: int) -> None: + self.sandbox.double_click(x, y) + + def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + self.sandbox.move_mouse(x, y) + if scroll_y < 0: + self.sandbox.scroll("up", abs(scroll_y)) + elif scroll_y > 0: + self.sandbox.scroll("down", scroll_y) + + def type(self, text: str) -> None: + self.sandbox.write(text) + + def wait(self, ms: int = 1000) -> None: + self.sandbox.wait(ms) + + def move(self, x: int, y: int) -> None: + self.sandbox.move_mouse(x, y) + + def keypress(self, keys: list[str]) -> None: + self.sandbox.press(keys) + + def drag(self, path: list[dict[str, int]]) -> None: + if not path: + return + start_x = path[0]["x"] + start_y = path[0]["y"] + + end_x = path[-1]["x"] + end_y = path[-1]["y"] + + self.sandbox.drag((start_x, start_y), (end_x, end_y)) + + def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: + return self.environment + + def get_dimensions(self) -> tuple[int, int]: + return self.dimensions + + def get_current_url(self) -> str: + return self.sandbox.stream.get_url(auth_key=self.sandbox.stream.get_auth_key()) \ No newline at end of file diff --git a/computers/e2b.py b/computers/e2b.py new file mode 100644 index 0000000..2fe508e --- /dev/null +++ b/computers/e2b.py @@ -0,0 +1,92 @@ +import base64 +from e2b_desktop import Sandbox + +cua_e2b_key_mapping = { + "ENTER": "Return", + "LEFT": "Left", + "RIGHT": "Right", + "UP": "Up", + "DOWN": "Down", + "ESC": "Escape", + "SPACE": "space", + "BACKSPACE": "BackSpace", + "TAB": "Tab", +} + +class E2BDesktop: + """ + E2B Desktop is a desktop environment in the cloud. + """ + + def __init__(self): + self.environment = "linux" # "windows", "mac", "linux", or "browser" + self.dimensions = (1024, 768) + + def __enter__(self): + print("Starting E2B Desktop Sandbox") + self.sandbox = Sandbox( + resolution=self.dimensions, + timeout=300, + ) + + print(f"Started E2B Desktop Sandbox with id '{self.sandbox.sandbox_id}'") + + print("Starting Desktop Stream") + self.sandbox.stream.start(require_auth=True) + + stream_url = self.sandbox.stream.get_url(auth_key=self.sandbox.stream.get_auth_key()) + print(f"Desktop Stream is running at {stream_url}") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.sandbox.kill() + + def screenshot(self) -> str: + screenshot = self.sandbox.screenshot() + base64_image = base64.b64encode(screenshot).decode("utf-8") + return base64_image + + def click(self, x: int, y: int, button: str = "left") -> None: + match button: + case "left": + self.sandbox.move_mouse(x, y) + self.sandbox.left_click() + case "right": + self.sandbox.move_mouse(x, y) + self.sandbox.right_click() + case "middle": + self.sandbox.move_mouse(x, y) + self.sandbox.middle_click() + + def double_click(self, x: int, y: int) -> None: + self.sandbox.move_mouse(x, y) + self.sandbox.double_click() + + def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + self.sandbox.move_mouse(x, y) + self.sandbox.scroll(scroll_x, scroll_y) + + def type(self, text: str) -> None: + self.sandbox.write(text) + + def wait(self, ms: int = 1000) -> None: + self.sandbox.wait(ms) + + def move(self, x: int, y: int) -> None: + self.sandbox.move_mouse(x, y) + + def keypress(self, keys: list[str]) -> None: + # convert keys to e2b desktop keys + mapped_keys = [cua_e2b_key_mapping.get(key, key) for key in keys] + self.sandbox.press(mapped_keys) + + def drag(self, path: list[dict[str, int]]) -> None: + if not path: + return + start_x = path[0]["x"] + start_y = path[0]["y"] + + end_x = path[-1]["x"] + end_y = path[-1]["y"] + + self.sandbox.drag((start_x, start_y), (end_x, end_y)) diff --git a/requirements.txt b/requirements.txt index 13769fb..1d5c0e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ scrapybara>=2.3.6 sniffio==1.3.1 typing_extensions==4.12.2 urllib3==2.3.0 +e2b_desktop==1.7.1