Skip to content

Commit dcbc4af

Browse files
authored
Merge pull request #99 from burtenshaw/textarena-env
[ENVIRONMENT] textarena wrapper env
2 parents d41d977 + 941da1a commit dcbc4af

File tree

13 files changed

+955
-0
lines changed

13 files changed

+955
-0
lines changed

.github/workflows/docker-build.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ jobs:
7979
dockerfile: src/envs/atari_env/server/Dockerfile
8080
- name: git-env
8181
dockerfile: src/envs/git_env/server/Dockerfile
82+
- name: textarena-env
83+
dockerfile: src/envs/textarena_env/server/Dockerfile
8284

8385
steps:
8486
- name: Checkout code

examples/textarena_simple.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
"""Quickstart example for the generic TextArena environment."""
9+
10+
from __future__ import annotations
11+
12+
import sys
13+
from pathlib import Path
14+
15+
# Add project src/ to import path
16+
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
17+
18+
from envs.textarena_env import TextArenaEnv, TextArenaAction
19+
20+
21+
def main() -> None:
22+
23+
print("=" * 60)
24+
print("💬 TextArena Hello World - GuessTheNumber-v0")
25+
print("=" * 60)
26+
27+
env = TextArenaEnv.from_docker_image(
28+
"textarena-env:latest",
29+
env_vars={
30+
"TEXTARENA_ENV_ID": "GuessTheNumber-v0",
31+
"TEXTARENA_NUM_PLAYERS": "1",
32+
},
33+
ports={8000: 8000},
34+
)
35+
36+
try:
37+
print("\n📍 Resetting environment...")
38+
result = env.reset()
39+
print(f" Prompt:\n{result.observation.prompt}\n")
40+
41+
# Simple heuristic: if prompt mentions a range, start with midpoint
42+
guess = "[10]"
43+
44+
for step in range(5):
45+
print(f"🎯 Step {step + 1}: sending guess {guess}")
46+
result = env.step(TextArenaAction(message=guess))
47+
48+
for message in result.observation.messages:
49+
print(f" [{message.category}] {message.content}")
50+
51+
if result.done:
52+
break
53+
54+
# Basic update: look for 'higher' or 'lower' hints
55+
feedback = " ".join(msg.content for msg in result.observation.messages)
56+
if "higher" in feedback:
57+
guess = "[15]"
58+
elif "lower" in feedback:
59+
guess = "[5]"
60+
else:
61+
guess = "[10]"
62+
63+
print("\n✅ Episode finished!")
64+
print(f" Reward: {result.reward}")
65+
print(f" Done: {result.done}")
66+
67+
state = env.state()
68+
print("\n📊 Server State Snapshot:")
69+
print(f" Episode ID: {state.episode_id}")
70+
print(f" Step count: {state.step_count}")
71+
print(f" Env ID: {state.env_id}")
72+
73+
except Exception as exc: # pragma: no cover - demonstration script
74+
print(f"\n❌ Error: {exc}")
75+
print("\nMake sure you have built the Docker image first:")
76+
print(" docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .")
77+
print("\nAlternatively run the server manually:")
78+
print(" python -m envs.textarena_env.server.app")
79+
80+
finally:
81+
env.close()
82+
print("\n👋 Done!")
83+
84+
85+
if __name__ == "__main__":
86+
main()
87+
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#!/usr/bin/env python3
2+
"""Play TextArena Wordle with a hosted LLM via Hugging Face Inference Providers.
3+
4+
This script mirrors the structure of the Kuhn Poker inference sample but targets
5+
the Wordle environment. We deploy the generic TextArena server (wrapped in
6+
OpenEnv) inside a local Docker container and query a single hosted model using
7+
the OpenAI-compatible API provided by Hugging Face's router.
8+
9+
Prerequisites
10+
-------------
11+
1. Build the TextArena Docker image::
12+
13+
docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .
14+
15+
2. Set your Hugging Face token::
16+
17+
export HF_TOKEN=your_token_here
18+
19+
3. Run this script::
20+
21+
python examples/wordle_inference.py
22+
23+
By default we ask the DeepSeek Terminus model to play ``Wordle-v0``. Adjust the
24+
``MODEL`` constant if you'd like to experiment with another provider-compatible
25+
model.
26+
"""
27+
28+
from __future__ import annotations
29+
30+
import os
31+
import re
32+
from typing import Iterable, List
33+
34+
from openai import OpenAI
35+
36+
from envs.textarena_env import TextArenaAction, TextArenaEnv
37+
from envs.textarena_env.models import TextArenaMessage
38+
39+
# ---------------------------------------------------------------------------
40+
# Configuration
41+
# ---------------------------------------------------------------------------
42+
43+
API_BASE_URL = "https://router.huggingface.co/v1"
44+
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
45+
46+
MODEL = "openai/gpt-oss-120b:novita"
47+
MAX_TURNS = 8
48+
VERBOSE = True
49+
50+
SYSTEM_PROMPT = (
51+
"You are an expert Wordle solver."
52+
" Always respond with a single guess inside square brackets, e.g. [crane]."
53+
" Use lowercase letters, exactly one five-letter word per reply."
54+
" Reason about prior feedback before choosing the next guess."
55+
" Words must be 5 letters long and real English words."
56+
" Do not not include any other text in your response."
57+
" Do not repeat the same guess twice."
58+
)
59+
60+
61+
# ---------------------------------------------------------------------------
62+
# Helpers
63+
# ---------------------------------------------------------------------------
64+
65+
def format_history(messages: Iterable[TextArenaMessage]) -> str:
66+
"""Convert TextArena message history into plain text for the model."""
67+
68+
lines: List[str] = []
69+
for message in messages:
70+
tag = message.category or "MESSAGE"
71+
lines.append(f"[{tag}] {message.content}")
72+
return "\n".join(lines)
73+
74+
75+
def extract_guess(text: str) -> str:
76+
"""Return the first Wordle-style guess enclosed in square brackets."""
77+
78+
match = re.search(r"\[[A-Za-z]{5}\]", text)
79+
if match:
80+
return match.group(0).lower()
81+
# Fallback: remove whitespace and ensure lowercase, then wrap
82+
cleaned = re.sub(r"[^a-zA-Z]", "", text).lower()
83+
if len(cleaned) >= 5:
84+
return f"[{cleaned[:5]}]"
85+
return "[dunno]"
86+
87+
88+
def make_user_prompt(prompt_text: str, messages: Iterable[TextArenaMessage]) -> str:
89+
"""Combine the TextArena prompt and feedback history for the model."""
90+
91+
history = format_history(messages)
92+
return (
93+
f"Current prompt:\n{prompt_text}\n\n"
94+
f"Conversation so far:\n{history}\n\n"
95+
"Reply with your next guess enclosed in square brackets."
96+
)
97+
98+
99+
# ---------------------------------------------------------------------------
100+
# Gameplay
101+
# ---------------------------------------------------------------------------
102+
103+
def play_wordle(env: TextArenaEnv, client: OpenAI) -> None:
104+
result = env.reset()
105+
observation = result.observation
106+
107+
if VERBOSE:
108+
print("📜 Initial Prompt:\n" + observation.prompt)
109+
110+
for turn in range(1, MAX_TURNS + 1):
111+
if result.done:
112+
break
113+
114+
user_prompt = make_user_prompt(observation.prompt, observation.messages)
115+
116+
response = client.chat.completions.create(
117+
model=MODEL,
118+
messages=[
119+
{"role": "system", "content": SYSTEM_PROMPT},
120+
{"role": "user", "content": user_prompt},
121+
],
122+
max_tokens=2048,
123+
temperature=0.7,
124+
)
125+
126+
raw_output = response.choices[0].message.content.strip()
127+
guess = extract_guess(raw_output)
128+
129+
if VERBOSE:
130+
print(f"\n🎯 Turn {turn}: model replied with -> {raw_output}")
131+
print(f" Parsed guess: {guess}")
132+
133+
result = env.step(TextArenaAction(message=guess))
134+
observation = result.observation
135+
136+
if VERBOSE:
137+
print(" Feedback messages:")
138+
for message in observation.messages:
139+
print(f" [{message.category}] {message.content}")
140+
141+
print("\n✅ Game finished")
142+
print(f" Reward: {result.reward}")
143+
print(f" Done: {result.done}")
144+
145+
146+
# ---------------------------------------------------------------------------
147+
# Entrypoint
148+
# ---------------------------------------------------------------------------
149+
150+
def main() -> None:
151+
if not API_KEY:
152+
raise SystemExit("HF_TOKEN (or API_KEY) must be set to query the model.")
153+
154+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
155+
156+
env = TextArenaEnv.from_docker_image(
157+
"textarena-env:latest",
158+
env_vars={
159+
"TEXTARENA_ENV_ID": "Wordle-v0",
160+
"TEXTARENA_NUM_PLAYERS": "1",
161+
},
162+
ports={8000: 8000},
163+
)
164+
165+
try:
166+
play_wordle(env, client)
167+
finally:
168+
env.close()
169+
170+
171+
if __name__ == "__main__":
172+
main()
173+
174+

src/envs/textarena_env/README.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# TextArena Environment
2+
3+
Generic wrapper for any [TextArena](https://www.textarena.ai/docs/overview) game inside OpenEnv. This module exposes the TextArena `Env` interface through the standard HTTP server/client APIs used by other OpenEnv environments, enabling quick experimentation with the full suite of word, reasoning, and multi-agent games.
4+
5+
## Features
6+
- Works with any registered TextArena game (e.g. `Wordle-v0`, `GuessTheNumber-v0`, `Chess-v0`, ...).
7+
- Transparent access to TextArena message streams, rewards, and state snapshots.
8+
- Docker image for easy deployment with Python 3.11 and preinstalled dependencies.
9+
- Example client demonstrating end-to-end interaction.
10+
11+
## Docker
12+
13+
Build the container from the project root:
14+
15+
```bash
16+
docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest .
17+
```
18+
19+
Run it with your desired game (default is `Wordle-v0`). Environment configuration is handled via env vars:
20+
21+
```bash
22+
docker run -p 8000:8000 \
23+
-e TEXTARENA_ENV_ID=GuessTheNumber-v0 \
24+
-e TEXTARENA_NUM_PLAYERS=1 \
25+
textarena-env:latest
26+
```
27+
28+
Additional environment arguments can be passed using the `TEXTARENA_KW_` prefix. For example, to enable `hardcore=True`:
29+
30+
```bash
31+
docker run -p 8000:8000 \
32+
-e TEXTARENA_ENV_ID=Wordle-v0 \
33+
-e TEXTARENA_KW_hardcore=true \
34+
textarena-env:latest
35+
```
36+
37+
## Python Example
38+
39+
The repository ships with a simple client script that connects to a running server (local or Docker) and plays a few turns. Run it from the repo root:
40+
41+
```bash
42+
python examples/textarena_simple.py
43+
```
44+
45+
The script uses `TextArenaEnv.from_docker_image` to automatically build/run the container if needed. Review the source (`examples/textarena_simple.py`) for more details and to customize the gameplay loop.
46+

src/envs/textarena_env/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
"""TextArena environment integration for OpenEnv."""
8+
9+
from .client import TextArenaEnv
10+
from .models import (
11+
TextArenaAction,
12+
TextArenaMessage,
13+
TextArenaObservation,
14+
TextArenaState,
15+
)
16+
from .rewards import RewardProvider, build_reward_providers
17+
18+
__all__ = [
19+
"TextArenaEnv",
20+
"TextArenaAction",
21+
"TextArenaObservation",
22+
"TextArenaState",
23+
"TextArenaMessage",
24+
"RewardProvider",
25+
"build_reward_providers",
26+
]

0 commit comments

Comments
 (0)