Skip to content

Commit a57dd4d

Browse files
committed
add browser gym inference example
1 parent 32d5efb commit a57dd4d

File tree

1 file changed

+267
-0
lines changed

1 file changed

+267
-0
lines changed

examples/browsergym_example.py

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
"""BrowserGym MiniWoB example with Qwen deciding the next action.
2+
3+
This is an inference example for the BrowserGym environment. It uses the OpenAI
4+
client and a vision language model to decide the next action. We use Hugging Face
5+
Inference Providers API to access the model, but you can use any other provider that
6+
is compatible with the OpenAI API.
7+
8+
Prerequisites:
9+
- Clone the MiniWoB++ tasks repository.
10+
- Serve the HTML bundle with `python -m http.server 8888` inside the
11+
`miniwob-plusplus/miniwob/html` directory.
12+
- Export the MiniWoB URL (must include the `/miniwob/` suffix):
13+
`export MINIWOB_URL=http://host.docker.internal:8888/miniwob/`
14+
- Export your Hugging Face token for the router:
15+
`export HF_TOKEN=your_token_here`
16+
17+
Usage:
18+
python examples/browsergym_example.py
19+
"""
20+
21+
import os
22+
import re
23+
import base64
24+
import textwrap
25+
from io import BytesIO
26+
from typing import List, Optional, Dict
27+
28+
from openai import OpenAI
29+
import numpy as np
30+
from PIL import Image
31+
32+
from envs.browsergym_env import BrowserGymAction, BrowserGymEnv
33+
34+
API_BASE_URL = "https://router.huggingface.co/v1"
35+
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
36+
MODEL_NAME = "Qwen/Qwen3-VL-30B-A3B-Instruct:novita"
37+
MAX_STEPS = 8
38+
MAX_DOM_CHARS = 3500
39+
TEMPERATURE = 0.2
40+
MAX_TOKENS = 200
41+
FALLBACK_ACTION = "noop()"
42+
43+
DEBUG = True
44+
ACTION_PREFIX_RE = re.compile(
45+
r"^(action|next action)\s*[:\-]\s*",
46+
re.IGNORECASE,
47+
)
48+
ACTION_PATTERN = re.compile(r"[A-Za-z_]+\s*\(.*\)", re.DOTALL)
49+
50+
51+
SYSTEM_PROMPT = textwrap.dedent(
52+
"""
53+
You control a web browser through BrowserGym.
54+
Reply with exactly one action string.
55+
The action must be a valid BrowserGym command such as:
56+
- noop()
57+
- click('<BID>')
58+
- type('selector', 'text to enter')
59+
- fill('selector', 'text to enter')
60+
- send_keys('Enter')
61+
- scroll('down')
62+
Use single quotes around string arguments.
63+
When clicking, use the BrowserGym element IDs (BIDs) listed in the user message.
64+
If you are unsure, respond with noop().
65+
Do not include explanations or additional text.
66+
"""
67+
).strip()
68+
69+
70+
def build_history_lines(history: List[str]) -> str:
71+
if not history:
72+
return "None"
73+
return "\n".join(history[-4:])
74+
75+
76+
def extract_screenshot_uri(observation) -> Optional[str]:
77+
if observation.screenshot is None:
78+
return None
79+
screen_array = np.array(observation.screenshot, dtype=np.uint8)
80+
image = Image.fromarray(screen_array)
81+
buffer = BytesIO()
82+
image.save(buffer, format="PNG")
83+
buffer.seek(0)
84+
data_uri = base64.b64encode(buffer.read()).decode("utf-8")
85+
return f"data:image/png;base64,{data_uri}"
86+
87+
88+
def extract_clickable_elements(observation) -> List[Dict[str, str]]:
89+
"""Collect BrowserGym element IDs that can be clicked."""
90+
91+
metadata = getattr(observation, "metadata", {}) or {}
92+
obs_dict = metadata.get("browsergym_obs", {}) or {}
93+
extra_props = obs_dict.get("extra_element_properties", {}) or {}
94+
95+
clickables: List[Dict[str, str]] = []
96+
for bid, props in extra_props.items():
97+
if not props.get("clickable"):
98+
continue
99+
100+
bbox = props.get("bbox") or []
101+
bbox_str = ", ".join(bbox) if bbox else "?"
102+
clickables.append({
103+
"bid": str(bid),
104+
"bbox": bbox_str,
105+
})
106+
107+
# Keep a stable ordering for readability
108+
clickables.sort(key=lambda item: item["bid"])
109+
return clickables
110+
111+
112+
def build_user_prompt(step: int, observation, history: List[str]) -> str:
113+
goal = observation.goal or "(not provided)"
114+
url = observation.url or "(unknown)"
115+
error_note = "Yes" if observation.last_action_error else "No"
116+
117+
clickables = extract_clickable_elements(observation)
118+
if clickables:
119+
actions_hint = "\n".join(
120+
f" - {item['bid']} (bbox: {item['bbox']})" for item in clickables
121+
)
122+
else:
123+
actions_hint = " (none detected)"
124+
125+
prompt = textwrap.dedent(
126+
f"""
127+
Step: {step}
128+
Goal: {goal}
129+
Current URL: {url}
130+
Previous steps:
131+
{build_history_lines(history)}
132+
Last action error: {error_note}
133+
134+
Available clickable element IDs: {actions_hint}
135+
136+
Reply with exactly one BrowserGym action string.
137+
"""
138+
).strip()
139+
return prompt
140+
141+
142+
def parse_model_action(response_text: str) -> str:
143+
if not response_text:
144+
return FALLBACK_ACTION
145+
146+
# Prefer the first line that looks like an action string
147+
lines = response_text.splitlines()
148+
for raw_line in lines:
149+
line = raw_line.strip()
150+
if not line:
151+
continue
152+
line = ACTION_PREFIX_RE.sub("", line)
153+
match = ACTION_PATTERN.search(line)
154+
if match:
155+
action = match.group(0).strip()
156+
# Collapse internal whitespace
157+
action = re.sub(r"\s+", " ", action)
158+
# If the model tried to click by natural-language description while we
159+
# only exposed numeric BrowserGym IDs, fallback to the single detected ID.
160+
return action
161+
162+
# Fall back to searching the whole response
163+
match = ACTION_PATTERN.search(response_text)
164+
if match:
165+
action = match.group(0).strip()
166+
action = re.sub(r"\s+", " ", action)
167+
return action
168+
169+
return FALLBACK_ACTION
170+
171+
172+
def main() -> None:
173+
174+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
175+
176+
env = BrowserGymEnv.from_hub(
177+
"browsergym-env:latest",
178+
env_vars={
179+
"BROWSERGYM_BENCHMARK": "miniwob",
180+
"BROWSERGYM_TASK_NAME": "click-test",
181+
},
182+
ports={8000: 8000},
183+
)
184+
185+
history: List[str] = []
186+
187+
try:
188+
result = env.reset()
189+
observation = result.observation
190+
print(f"Episode goal: {observation.goal}")
191+
192+
for step in range(1, MAX_STEPS + 1):
193+
if result.done:
194+
print("Environment signalled done. Stopping early.")
195+
break
196+
197+
user_prompt = build_user_prompt(step, observation, history)
198+
user_content = [{"type": "text", "text": user_prompt}]
199+
screenshot_uri = extract_screenshot_uri(observation)
200+
if screenshot_uri:
201+
user_content.append(
202+
{
203+
"type": "image_url",
204+
"image_url": {"url": screenshot_uri},
205+
}
206+
)
207+
208+
messages = [
209+
{
210+
"role": "system",
211+
"content": [{"type": "text", "text": SYSTEM_PROMPT}],
212+
},
213+
{
214+
"role": "user",
215+
"content": user_content,
216+
},
217+
]
218+
219+
try:
220+
completion = client.chat.completions.create(
221+
model=MODEL_NAME,
222+
messages=messages,
223+
temperature=TEMPERATURE,
224+
max_tokens=MAX_TOKENS,
225+
stream=False,
226+
)
227+
response_text = completion.choices[0].message.content or ""
228+
# pylint: disable=broad-except
229+
except Exception as exc: # noqa: BLE001
230+
failure_msg = (
231+
f"Model request failed ({exc}). Using fallback action."
232+
)
233+
print(failure_msg)
234+
response_text = FALLBACK_ACTION
235+
236+
action_str = parse_model_action(response_text)
237+
print(f"Step {step}: model suggested -> {action_str}")
238+
239+
result = env.step(BrowserGymAction(action_str=action_str))
240+
observation = result.observation
241+
242+
reward = result.reward or 0.0
243+
error_flag = " ERROR" if observation.last_action_error else ""
244+
history_line = (
245+
f"Step {step}: {action_str} -> reward {reward:+.2f}"
246+
f"{error_flag}"
247+
)
248+
history.append(history_line)
249+
print(
250+
" Reward: "
251+
f"{reward:+.2f} | Done: {result.done} | Last action error: "
252+
f"{observation.last_action_error}"
253+
)
254+
255+
if result.done:
256+
print("Episode complete.")
257+
break
258+
259+
else:
260+
print(f"Reached max steps ({MAX_STEPS}).")
261+
262+
finally:
263+
env.close()
264+
265+
266+
if __name__ == "__main__":
267+
main()

0 commit comments

Comments
 (0)