add browser gym inference example

burtenshaw · burtenshaw · commit a57dd4dbbffa · 2025-11-06T14:35:16.000+01:00
diff --git a/examples/browsergym_example.py b/examples/browsergym_example.py
@@ -0,0 +1,267 @@
+"""BrowserGym MiniWoB example with Qwen deciding the next action.
+
+This is an inference example for the BrowserGym environment. It uses the OpenAI 
+client and a vision language model to decide the next action. We use Hugging Face 
+Inference Providers API to access the model, but you can use any other provider that 
+is compatible with the OpenAI API.
+
+Prerequisites:
+- Clone the MiniWoB++ tasks repository.
+- Serve the HTML bundle with `python -m http.server 8888` inside the
+  `miniwob-plusplus/miniwob/html` directory.
+- Export the MiniWoB URL (must include the `/miniwob/` suffix):
+    `export MINIWOB_URL=http://host.docker.internal:8888/miniwob/`
+- Export your Hugging Face token for the router:
+    `export HF_TOKEN=your_token_here`
+
+Usage:
+    python examples/browsergym_example.py
+"""
+
+import os
+import re
+import base64
+import textwrap
+from io import BytesIO
+from typing import List, Optional, Dict
+
+from openai import OpenAI
+import numpy as np
+from PIL import Image
+
+from envs.browsergym_env import BrowserGymAction, BrowserGymEnv
+
+API_BASE_URL = "https://router.huggingface.co/v1"
+API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
+MODEL_NAME = "Qwen/Qwen3-VL-30B-A3B-Instruct:novita"
+MAX_STEPS = 8
+MAX_DOM_CHARS = 3500
+TEMPERATURE = 0.2
+MAX_TOKENS = 200
+FALLBACK_ACTION = "noop()"
+
+DEBUG = True
+ACTION_PREFIX_RE = re.compile(
+    r"^(action|next action)\s*[:\-]\s*",
+    re.IGNORECASE,
+)
+ACTION_PATTERN = re.compile(r"[A-Za-z_]+\s*\(.*\)", re.DOTALL)
+
+
+SYSTEM_PROMPT = textwrap.dedent(
+    """
+    You control a web browser through BrowserGym.
+    Reply with exactly one action string.
+    The action must be a valid BrowserGym command such as:
+    - noop()
+    - click('<BID>')
+    - type('selector', 'text to enter')
+    - fill('selector', 'text to enter')
+    - send_keys('Enter')
+    - scroll('down')
+    Use single quotes around string arguments.
+    When clicking, use the BrowserGym element IDs (BIDs) listed in the user message.
+    If you are unsure, respond with noop().
+    Do not include explanations or additional text.
+    """
+).strip()
+
+
+def build_history_lines(history: List[str]) -> str:
+    if not history:
+        return "None"
+    return "\n".join(history[-4:])
+
+
+def extract_screenshot_uri(observation) -> Optional[str]:
+    if observation.screenshot is None:
+        return None
+    screen_array = np.array(observation.screenshot, dtype=np.uint8)
+    image = Image.fromarray(screen_array)
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    buffer.seek(0)
+    data_uri = base64.b64encode(buffer.read()).decode("utf-8")
+    return f"data:image/png;base64,{data_uri}"
+
+
+def extract_clickable_elements(observation) -> List[Dict[str, str]]:
+    """Collect BrowserGym element IDs that can be clicked."""
+
+    metadata = getattr(observation, "metadata", {}) or {}
+    obs_dict = metadata.get("browsergym_obs", {}) or {}
+    extra_props = obs_dict.get("extra_element_properties", {}) or {}
+
+    clickables: List[Dict[str, str]] = []
+    for bid, props in extra_props.items():
+        if not props.get("clickable"):
+            continue
+
+        bbox = props.get("bbox") or []
+        bbox_str = ", ".join(bbox) if bbox else "?"
+        clickables.append({
+            "bid": str(bid),
+            "bbox": bbox_str,
+        })
+
+    # Keep a stable ordering for readability
+    clickables.sort(key=lambda item: item["bid"])
+    return clickables
+
+
+def build_user_prompt(step: int, observation, history: List[str]) -> str:
+    goal = observation.goal or "(not provided)"
+    url = observation.url or "(unknown)"
+    error_note = "Yes" if observation.last_action_error else "No"
+
+    clickables = extract_clickable_elements(observation)
+    if clickables:
+        actions_hint = "\n".join(
+            f"    - {item['bid']} (bbox: {item['bbox']})" for item in clickables
+        )
+    else:
+        actions_hint = "    (none detected)"
+
+    prompt = textwrap.dedent(
+        f"""
+        Step: {step}
+        Goal: {goal}
+        Current URL: {url}
+        Previous steps:
+        {build_history_lines(history)}
+        Last action error: {error_note}
+
+        Available clickable element IDs: {actions_hint}
+
+        Reply with exactly one BrowserGym action string.
+        """
+    ).strip()
+    return prompt
+
+
+def parse_model_action(response_text: str) -> str:
+    if not response_text:
+        return FALLBACK_ACTION
+
+    # Prefer the first line that looks like an action string
+    lines = response_text.splitlines()
+    for raw_line in lines:
+        line = raw_line.strip()
+        if not line:
+            continue
+        line = ACTION_PREFIX_RE.sub("", line)
+        match = ACTION_PATTERN.search(line)
+        if match:
+            action = match.group(0).strip()
+            # Collapse internal whitespace
+            action = re.sub(r"\s+", " ", action)
+            # If the model tried to click by natural-language description while we
+            # only exposed numeric BrowserGym IDs, fallback to the single detected ID.
+            return action
+
+    # Fall back to searching the whole response
+    match = ACTION_PATTERN.search(response_text)
+    if match:
+        action = match.group(0).strip()
+        action = re.sub(r"\s+", " ", action)
+        return action
+
+    return FALLBACK_ACTION
+
+
+def main() -> None:
+
+    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
+
+    env = BrowserGymEnv.from_hub(
+        "browsergym-env:latest",
+        env_vars={
+            "BROWSERGYM_BENCHMARK": "miniwob",
+            "BROWSERGYM_TASK_NAME": "click-test",
+        },
+        ports={8000: 8000},
+    )
+    
+    history: List[str] = []
+
+    try:
+        result = env.reset()
+        observation = result.observation
+        print(f"Episode goal: {observation.goal}")
+
+        for step in range(1, MAX_STEPS + 1):
+            if result.done:
+                print("Environment signalled done. Stopping early.")
+                break
+
+            user_prompt = build_user_prompt(step, observation, history)
+            user_content = [{"type": "text", "text": user_prompt}]
+            screenshot_uri = extract_screenshot_uri(observation)
+            if screenshot_uri:
+                user_content.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": screenshot_uri},
+                    }
+                )
+
+            messages = [
+                {
+                    "role": "system",
+                    "content": [{"type": "text", "text": SYSTEM_PROMPT}],
+                },
+                {
+                    "role": "user",
+                    "content": user_content,
+                },
+            ]
+
+            try:
+                completion = client.chat.completions.create(
+                    model=MODEL_NAME,
+                    messages=messages,
+                    temperature=TEMPERATURE,
+                    max_tokens=MAX_TOKENS,
+                    stream=False,
+                )
+                response_text = completion.choices[0].message.content or ""
+            # pylint: disable=broad-except
+            except Exception as exc:  # noqa: BLE001
+                failure_msg = (
+                    f"Model request failed ({exc}). Using fallback action."
+                )
+                print(failure_msg)
+                response_text = FALLBACK_ACTION
+
+            action_str = parse_model_action(response_text)
+            print(f"Step {step}: model suggested -> {action_str}")
+
+            result = env.step(BrowserGymAction(action_str=action_str))
+            observation = result.observation
+
+            reward = result.reward or 0.0
+            error_flag = " ERROR" if observation.last_action_error else ""
+            history_line = (
+                f"Step {step}: {action_str} -> reward {reward:+.2f}"
+                f"{error_flag}"
+            )
+            history.append(history_line)
+            print(
+                "  Reward: "
+                f"{reward:+.2f} | Done: {result.done} | Last action error: "
+                f"{observation.last_action_error}"
+            )
+
+            if result.done:
+                print("Episode complete.")
+                break
+
+        else:
+            print(f"Reached max steps ({MAX_STEPS}).")
+
+    finally:
+        env.close()
+
+
+if __name__ == "__main__":
+    main()