Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
[submodule "kernel-images"]
path = kernel-images
path = submodules/kernel-images
url = https://github.com/onkernel/kernel-images.git

[submodule "browser-operator-core"]
path = browser-operator-core
path = submodules/browser-operator-core
url = git@github.com:BrowserOperator/browser-operator-core.git
shallow = true

[submodule "webarena"]
path = submodules/webarena
url = https://github.com/web-arena-x/webarena.git
380 changes: 306 additions & 74 deletions CLAUDE.md

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions Dockerfile.devtools
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,20 @@ FROM devtools-base AS devtools-local

# Copy local changes from browser-operator-core submodule FIRST
# This happens before checking out upstream, so we copy over the upstream code
COPY browser-operator-core/front_end /workspace/devtools/devtools-frontend/front_end
COPY submodules/browser-operator-core/front_end /workspace/devtools/devtools-frontend/front_end
COPY browser-agent-server /workspace/devtools/devtools-frontend/browser-agent-server

WORKDIR /workspace/devtools/devtools-frontend

# Force automated mode
RUN sed -i 's/AUTOMATED_MODE: false/AUTOMATED_MODE: true/' front_end/panels/ai_chat/core/BuildConfig.ts || true

# Force complete regeneration of build files by removing the entire out directory
# This ensures the build system picks up all changes in BUILD.gn files
RUN rm -rf out/Default

# Build Browser Operator version with local changes
# This build is much faster since we're only building the changed files
# This will regenerate all build files from scratch based on the copied BUILD.gn
RUN npm run build

# Create marker file
Expand Down
24 changes: 12 additions & 12 deletions Dockerfile.kernel-cloud
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@ ARG TARGETOS
ARG TARGETARCH
ENV CGO_ENABLED=0

COPY kernel-images/server/go.mod ./
COPY kernel-images/server/go.sum ./
COPY submodules/kernel-images/server/go.mod ./
COPY submodules/kernel-images/server/go.sum ./
RUN go mod download

COPY kernel-images/server/ .
COPY submodules/kernel-images/server/ .
RUN GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \
go build -ldflags="-s -w" -o /out/kernel-images-api ./cmd/api

# webrtc client
FROM node:22-bullseye-slim AS client
WORKDIR /src
COPY kernel-images/images/chromium-headful/client/package*.json ./
COPY submodules/kernel-images/images/chromium-headful/client/package*.json ./
RUN npm install
COPY kernel-images/images/chromium-headful/client/ .
COPY submodules/kernel-images/images/chromium-headful/client/ .
RUN npm run build

# xorg dependencies
Expand All @@ -31,7 +31,7 @@ RUN set -eux; \
apt-get install -y \
git gcc pkgconf autoconf automake libtool make xorg-dev xutils-dev \
&& rm -rf /var/lib/apt/lists/*;
COPY kernel-images/images/chromium-headful/xorg-deps/ /xorg/
COPY submodules/kernel-images/images/chromium-headful/xorg-deps/ /xorg/
# build xf86-video-dummy v0.3.8 with RandR support
RUN set -eux; \
cd xf86-video-dummy/v0.3.8; \
Expand Down Expand Up @@ -169,19 +169,19 @@ ENV WIDTH=1024
ENV WITHDOCKER=true
ENV PORT=8080

COPY kernel-images/images/chromium-headful/xorg.conf /etc/neko/xorg.conf
COPY kernel-images/images/chromium-headful/neko.yaml /etc/neko/neko.yaml
COPY submodules/kernel-images/images/chromium-headful/xorg.conf /etc/neko/xorg.conf
COPY submodules/kernel-images/images/chromium-headful/neko.yaml /etc/neko/neko.yaml
COPY --from=neko /usr/bin/neko /usr/bin/neko
COPY --from=client /src/dist/ /var/www
COPY --from=xorg-deps /usr/local/lib/xorg/modules/drivers/dummy_drv.so /usr/lib/xorg/modules/drivers/dummy_drv.so
COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xorg/modules/input/neko_drv.so

COPY kernel-images/images/chromium-headful/image-chromium/ /
COPY kernel-images/images/chromium-headful/start-chromium.sh /images/chromium-headful/start-chromium.sh
COPY submodules/kernel-images/images/chromium-headful/image-chromium/ /
COPY submodules/kernel-images/images/chromium-headful/start-chromium.sh /images/chromium-headful/start-chromium.sh
RUN chmod +x /images/chromium-headful/start-chromium.sh
COPY kernel-images/images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf
COPY submodules/kernel-images/images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf
COPY supervisord-cloudrun.conf /etc/supervisor/supervisord-cloudrun.conf
COPY kernel-images/images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/
COPY submodules/kernel-images/images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/
# Copy Cloud Run-specific supervisor configs
COPY supervisor-cloudrun/ /etc/supervisor/conf.d/services-cloudrun/
# Copy Cloud Run-specific chromium start script
Expand Down
91 changes: 90 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -425,10 +425,99 @@ Edit `service.yaml` to modify Chrome behavior:
For production WebRTC, configure a TURN server:

```yaml
- name: NEKO_ICESERVERS
- name: NEKO_ICESERVERS
value: '[{"urls": ["turn:turn.example.com:3478"], "username": "user", "credential": "pass"}]'
```

### WebArena Configuration (Optional)

The platform supports running **WebArena benchmark evaluations** against self-hosted test websites. This is completely optional and only needed if you're running WebArena tasks.

#### What is WebArena?

WebArena is a research benchmark with 812 tasks across 7 self-hosted websites (e-commerce, forums, GitLab, Wikipedia, etc.). To run these evaluations, you need to route specific domains to a custom IP address.

#### Quick Setup

**1. Configure environment variables in `evals/.env`:**

```bash
cd evals
cp .env.example .env
vim .env
```

Add:
```bash
# WebArena Infrastructure Configuration
WEBARENA_HOST_IP=172.16.55.59 # IP where WebArena sites are hosted
WEBARENA_NETWORK=172.16.55.0/24 # Network CIDR for routing

# WebArena Site URLs (optional - customize if needed)
SHOPPING=http://onestopmarket.com
SHOPPING_ADMIN=http://onestopmarket.com/admin
REDDIT=http://reddit.com
GITLAB=http://gitlab.com
WIKIPEDIA=http://wikipedia.org
```

**2. Start container (configuration is auto-loaded):**

```bash
make compose-up # OR make run
```

**3. Verify WebArena routing is enabled:**

```bash
docker logs kernel-browser-extended | grep -i webarena
```

You should see:
```
🌐 [init] Configuring WebArena DNS mapping to 172.16.55.59...
🌐 [init] Adding route to 172.16.55.0/24 via 172.17.0.1...
```

**4. Run WebArena evaluations:**

```bash
cd evals
python3 run_webarena.py --task-id 1 --verbose
```

#### How It Works

When `WEBARENA_HOST_IP` is set:
- **DNS Mapping**: Chromium routes WebArena domains (gitlab.com, reddit.com, etc.) to your specified IP
- **Network Routing**: Container adds route to reach the WebArena network
- **Automatic**: Configuration happens on container startup via `scripts/init-container.sh`

Without configuration (default):
- System works normally with standard DNS resolution
- WebArena routing is completely disabled
- No impact on regular browser automation

#### Deployment-Specific IPs

You can use different IP addresses for different environments:

```bash
# Local development
WEBARENA_HOST_IP=172.16.55.59
WEBARENA_NETWORK=172.16.55.0/24

# Cloud deployment
WEBARENA_HOST_IP=34.123.45.67
WEBARENA_NETWORK=34.123.45.0/24

# Disable WebArena (default)
WEBARENA_HOST_IP=
WEBARENA_NETWORK=
```

**See `CLAUDE.md` for detailed WebArena configuration documentation.**

## 📁 Project Structure

```
Expand Down
1 change: 0 additions & 1 deletion browser-operator-core
Submodule browser-operator-core deleted from cfd482
File renamed without changes.
File renamed without changes.
27 changes: 15 additions & 12 deletions Dockerfile.cloudrun → deployments/cloudrun/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,20 @@ ARG TARGETOS
ARG TARGETARCH
ENV CGO_ENABLED=0

COPY kernel-images/server/go.mod ./
COPY kernel-images/server/go.sum ./
COPY submodules/kernel-images/server/go.mod ./
COPY submodules/kernel-images/server/go.sum ./
RUN go mod download

COPY kernel-images/server/ .
COPY submodules/kernel-images/server/ .
RUN GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \
go build -ldflags="-s -w" -o /out/kernel-images-api ./cmd/api

# WebRTC client build
FROM node:22-bullseye-slim AS client
WORKDIR /src
COPY kernel-images/images/chromium-headful/client/package*.json ./
COPY submodules/kernel-images/images/chromium-headful/client/package*.json ./
RUN npm install
COPY kernel-images/images/chromium-headful/client/ .
COPY submodules/kernel-images/images/chromium-headful/client/ .
RUN npm run build

# Xorg dependencies
Expand All @@ -95,7 +95,7 @@ RUN set -eux; \
apt-get install -y \
git gcc pkgconf autoconf automake libtool make xorg-dev xutils-dev \
&& rm -rf /var/lib/apt/lists/*;
COPY kernel-images/images/chromium-headful/xorg-deps/ /xorg/
COPY submodules/kernel-images/images/chromium-headful/xorg-deps/ /xorg/
# build xf86-video-dummy v0.3.8 with RandR support
RUN set -eux; \
cd xf86-video-dummy/v0.3.8; \
Expand Down Expand Up @@ -248,19 +248,22 @@ ENV WITHDOCKER=true
ENV PORT=8080

# Copy configurations
COPY kernel-images/images/chromium-headful/xorg.conf /etc/neko/xorg.conf
COPY kernel-images/images/chromium-headful/neko.yaml /etc/neko/neko.yaml
COPY submodules/kernel-images/images/chromium-headful/xorg.conf /etc/neko/xorg.conf
COPY submodules/kernel-images/images/chromium-headful/neko.yaml /etc/neko/neko.yaml
COPY --from=neko /usr/bin/neko /usr/bin/neko
COPY --from=client /src/dist/ /var/www
COPY --from=xorg-deps /usr/local/lib/xorg/modules/drivers/dummy_drv.so /usr/lib/xorg/modules/drivers/dummy_drv.so
COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xorg/modules/input/neko_drv.so

COPY kernel-images/images/chromium-headful/image-chromium/ /
COPY kernel-images/images/chromium-headful/start-chromium.sh /images/chromium-headful/start-chromium.sh
COPY submodules/kernel-images/images/chromium-headful/image-chromium/ /

# Copy custom start-chromium.sh with patches
COPY deployments/cloudrun/scripts/start-chromium.sh /images/chromium-headful/start-chromium.sh
RUN chmod +x /images/chromium-headful/start-chromium.sh
COPY kernel-images/images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf

COPY submodules/kernel-images/images/chromium-headful/supervisord.conf /etc/supervisor/supervisord.conf
COPY deployment/cloudrun/supervisord-cloudrun.conf /etc/supervisor/supervisord-cloudrun.conf
COPY kernel-images/images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/
COPY submodules/kernel-images/images/chromium-headful/supervisor/services/ /etc/supervisor/conf.d/services/

# Copy the kernel-images API binary
COPY --from=server-builder /out/kernel-images-api /usr/local/bin/kernel-images-api
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,15 @@ if [ -d "/tmp" ]; then
rm -f /tmp/.X*-lock 2>/dev/null || true
fi

# Add route to 172.16.55.0/24 network via Docker host gateway
# This allows the container to reach hosts on the 172.16.55.x network
if command -v ip >/dev/null 2>&1; then
GATEWAY=$(ip route | grep default | awk '{print $3}')
if [ -n "$GATEWAY" ]; then
echo "🌐 [init] Adding route to 172.16.55.0/24 via $GATEWAY..."
ip route add 172.16.55.0/24 via $GATEWAY 2>/dev/null || echo "⚠️ [init] Route already exists or failed to add"
fi
fi

echo "✅ [init] Container initialization complete"
exit 0
51 changes: 51 additions & 0 deletions deployments/cloudrun/scripts/start-chromium.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

set -o pipefail -o errexit -o nounset

# This script is launched by supervisord to start Chromium in the foreground.
# PATCHED VERSION: Properly quotes CHROMIUM_FLAGS to avoid word splitting

echo "Starting Chromium launcher (patched version with proper flag quoting)"

# Resolve internal port for the remote debugging interface
INTERNAL_PORT="${INTERNAL_PORT:-9223}"

# Load additional Chromium flags from env and optional file
CHROMIUM_FLAGS="${CHROMIUM_FLAGS:-}"
if [[ -f /chromium/flags ]]; then
CHROMIUM_FLAGS="$CHROMIUM_FLAGS $(cat /chromium/flags)"
fi
echo "CHROMIUM_FLAGS: $CHROMIUM_FLAGS"

# Always use display :1 and point DBus to the system bus socket
export DISPLAY=":1"
export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/dbus/system_bus_socket"

RUN_AS_ROOT="${RUN_AS_ROOT:-false}"

# Build chromium command with properly quoted flags
CHROMIUM_ARGS=(
--remote-debugging-port="$INTERNAL_PORT"
--user-data-dir=/home/kernel/user-data
--password-store=basic
--no-first-run
)

# Parse CHROMIUM_FLAGS properly using eval to handle quotes
if [[ -n "$CHROMIUM_FLAGS" ]]; then
eval "CHROMIUM_ARGS+=($CHROMIUM_FLAGS)"
fi

if [[ "$RUN_AS_ROOT" == "true" ]]; then
echo "Running chromium as root"
exec chromium "${CHROMIUM_ARGS[@]}"
else
echo "Running chromium as kernel user"
exec runuser -u kernel -- env \
DISPLAY=":1" \
DBUS_SESSION_BUS_ADDRESS="unix:path=/run/dbus/system_bus_socket" \
XDG_CONFIG_HOME=/home/kernel/.config \
XDG_CACHE_HOME=/home/kernel/.cache \
HOME=/home/kernel \
chromium "${CHROMIUM_ARGS[@]}"
fi
File renamed without changes.
File renamed without changes.
File renamed without changes.
17 changes: 17 additions & 0 deletions deployments/local-webarena/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Twilio Network Traversal Service Credentials
# Get these from your Twilio Console:
# 1. Go to https://console.twilio.com/
# 2. Navigate to Account > API Keys & Tokens
# 3. Create a new API Key
# 4. Use the SID as TWILIO_ACCOUNT_SID
# 5. Use the Secret as TWILIO_AUTH_TOKEN
TWILIO_ACCOUNT_SID=SK...your_api_key_sid_here
TWILIO_AUTH_TOKEN=your_api_key_secret_here

# Google Cloud Configuration
# If not provided, will use current gcloud config
PROJECT_ID=your-gcp-project-id
# REGION=us-central1

# Optional: Service Configuration
# SERVICE_NAME=kernel-browser
Loading