implement basic textarena wrapper server

burtenshaw · burtenshaw · commit bcecb3b0d09b · 2025-10-25T12:35:02.000+02:00
diff --git a/src/envs/textarena_env/server/Dockerfile b/src/envs/textarena_env/server/Dockerfile
@@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Use the shared OpenEnv base image (Python 3.11)
+ARG BASE_IMAGE=openenv-base:latest
+FROM ${BASE_IMAGE}
+
+# Install system libraries required by TextArena (cv2 needs libGL, glib)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install TextArena and Python dependencies
+RUN pip install --no-cache-dir \
+    textarena==0.6.1 \
+    nltk==3.9.2
+
+# Copy OpenEnv core and TextArena environment sources
+COPY src/core/ /app/src/core/
+COPY src/envs/textarena_env/ /app/src/envs/textarena_env/
+
+# Optional: health check to ensure server responsiveness
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+
+# Run the TextArena FastAPI server
+CMD ["uvicorn", "envs.textarena_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
+
diff --git a/src/envs/textarena_env/server/__init__.py b/src/envs/textarena_env/server/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Server components for the generic TextArena environment."""
+
+from .environment import TextArenaEnvironment
+
+__all__ = ["TextArenaEnvironment"]
+
diff --git a/src/envs/textarena_env/server/app.py b/src/envs/textarena_env/server/app.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""FastAPI application entrypoint for the TextArena environment."""
+
+from __future__ import annotations
+
+import os
+
+from core.env_server.http_server import create_app
+
+from ..models import TextArenaAction, TextArenaObservation
+from .environment import TextArenaEnvironment
+
+
+def _parse_env_kwargs(prefix: str = "TEXTARENA_KW_") -> dict[str, str]:
+    """Collect arbitrary environment kwargs from the process environment."""
+
+    env_kwargs: dict[str, str] = {}
+    for key, value in os.environ.items():
+        if key.startswith(prefix):
+            env_key = key[len(prefix) :].lower()
+            env_kwargs[env_key] = value
+    return env_kwargs
+
+
+env_id = os.getenv("TEXTARENA_ENV_ID", "Wordle-v0")
+num_players = int(os.getenv("TEXTARENA_NUM_PLAYERS", "1"))
+max_turns_env = os.getenv("TEXTARENA_MAX_TURNS")
+max_turns = int(max_turns_env) if max_turns_env is not None else None
+download_nltk = os.getenv("TEXTARENA_DOWNLOAD_NLTK", "1") in {"1", "true", "True"}
+
+extra_kwargs = _parse_env_kwargs()
+
+environment = TextArenaEnvironment(
+    env_id=env_id,
+    num_players=num_players,
+    max_turns=max_turns,
+    download_nltk=download_nltk,
+    env_kwargs=extra_kwargs,
+)
+
+app = create_app(environment, TextArenaAction, TextArenaObservation, env_name="textarena_env")
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+
diff --git a/src/envs/textarena_env/server/environment.py b/src/envs/textarena_env/server/environment.py
@@ -0,0 +1,218 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Server implementation for the generic TextArena environment."""
+
+from __future__ import annotations
+
+import sys
+from typing import Any, Dict, Iterable, List, Optional
+from uuid import uuid4
+
+import nltk
+
+from core.env_server.interfaces import Environment
+
+from ..models import TextArenaAction, TextArenaMessage, TextArenaObservation, TextArenaState
+
+
+_TEXTARENA_MODULE: Any | None = None
+_TEXTARENA_IMPORT_ERROR: Exception | None = None
+
+
+def _import_textarena() -> Any:
+    """Import ``textarena`` lazily and cache the module reference."""
+
+    global _TEXTARENA_MODULE, _TEXTARENA_IMPORT_ERROR
+
+    if _TEXTARENA_MODULE is not None:
+        return _TEXTARENA_MODULE
+
+    if _TEXTARENA_IMPORT_ERROR is not None:
+        raise _TEXTARENA_IMPORT_ERROR
+
+    if sys.version_info < (3, 10):
+        _TEXTARENA_IMPORT_ERROR = RuntimeError(
+            "TextArena environments require Python 3.10 or newer; "
+            f"current interpreter is {sys.version_info.major}.{sys.version_info.minor}"
+        )
+        raise _TEXTARENA_IMPORT_ERROR
+
+    try:
+        import textarena as ta  # type: ignore[import]
+    except Exception as exc:  # pragma: no cover - surfaced to caller
+        _TEXTARENA_IMPORT_ERROR = exc
+        raise
+
+    _TEXTARENA_MODULE = ta
+    return ta
+
+
+class TextArenaEnvironment(Environment):
+    """Wrap any TextArena game behind the OpenEnv ``Environment`` API."""
+
+    def __init__(
+        self,
+        env_id: str = "Wordle-v0",
+        *,
+        num_players: int = 1,
+        max_turns: Optional[int] = None,
+        download_nltk: bool = True,
+        env_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        super().__init__()
+
+        ta = _import_textarena()
+
+        if download_nltk:
+            nltk.download("words", quiet=True)
+            nltk.download("averaged_perceptron_tagger_eng", quiet=True)
+
+        self.env_id = env_id
+        self.num_players = num_players
+        self.max_turns = max_turns
+        self._env_kwargs = env_kwargs or {}
+
+        self._ta_env = ta.make(env_id=env_id, **self._env_kwargs)
+
+        self._state = TextArenaState(
+            env_id=env_id,
+            num_players=num_players,
+            max_turns=max_turns,
+        )
+
+    # ------------------------------------------------------------------
+    # Environment interface
+    # ------------------------------------------------------------------
+    def reset(self) -> TextArenaObservation:
+        self._ta_env.reset(num_players=self.num_players)
+
+        self._state.episode_id = str(uuid4())
+        self._state.step_count = 0
+        self._state.turn = 0
+        self._state.last_reward = 0.0
+        self._state.last_info = {}
+        self._state.raw_state = self._snapshot_state()
+
+        observation = self._build_observation()
+        observation.reward = 0.0
+        observation.done = False
+
+        return observation
+
+    def step(self, action: TextArenaAction) -> TextArenaObservation:  # type: ignore[override]
+        if not isinstance(action, TextArenaAction):
+            raise TypeError(f"Expected TextArenaAction, received {type(action)!r}")
+
+        done, info = self._ta_env.step(action.message)
+
+        self._state.step_count += 1
+        self._state.turn = getattr(self._ta_env.state, "turn", self._state.turn + 1)
+        self._state.last_info = info or {}
+
+        observation = self._build_observation()
+        observation.done = done
+
+        reward = self._extract_reward()
+        observation.reward = reward
+        self._state.last_reward = reward
+        self._state.raw_state = self._snapshot_state()
+
+        return observation
+
+    @property
+    def state(self) -> TextArenaState:
+        return self._state
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+    def _build_observation(self) -> TextArenaObservation:
+        player_id, messages = self._ta_env.get_observation()
+
+        ta_messages = self._convert_messages(messages)
+        prompt_lines = [msg.content for msg in ta_messages if msg.category == "PROMPT"]
+        if not prompt_lines:
+            # Fallback to most recent message history for prompt
+            prompt_lines = [msg.content for msg in ta_messages]
+
+        info: Dict[str, Any] = {}
+        info.update(getattr(self._ta_env.state, "step_info", {}))
+
+        observation = TextArenaObservation(
+            prompt="\n".join(prompt_lines).strip(),
+            messages=ta_messages,
+            current_player_id=player_id,
+            legal_players=self._legal_players(),
+            info=info,
+            metadata={
+                "env_id": self.env_id,
+                "turn": getattr(self._ta_env.state, "turn", 0),
+                "raw_messages": [
+                    {
+                        "sender_id": msg.sender_id,
+                        "content": msg.content,
+                        "category": msg.category,
+                    }
+                    for msg in ta_messages
+                ],
+            },
+        )
+
+        return observation
+
+    def _legal_players(self) -> List[int]:
+        role_mapping = getattr(self._ta_env.state, "role_mapping", {}) or {}
+        players = [pid for pid in role_mapping.keys() if isinstance(pid, int) and pid >= 0]
+        return sorted(players)
+
+    def _convert_messages(self, messages: Iterable[Any]) -> List[TextArenaMessage]:
+        converted: List[TextArenaMessage] = []
+        for entry in messages:
+            if isinstance(entry, tuple) and len(entry) == 3:
+                sender, content, category = entry
+            elif isinstance(entry, tuple) and len(entry) == 2:
+                sender, content = entry
+                category = "MESSAGE"
+            else:
+                sender, content, category = -1, str(entry), "MESSAGE"
+
+            category_name = getattr(category, "name", str(category))
+            converted.append(
+                TextArenaMessage(
+                    sender_id=int(sender) if isinstance(sender, (int, float)) else -1,
+                    content=str(content),
+                    category=category_name,
+                )
+            )
+
+        return converted
+
+    def _extract_reward(self) -> float:
+        rewards = getattr(self._ta_env.state, "rewards", None)
+        if isinstance(rewards, dict):
+            # Use current player reward if available, otherwise default to player 0.
+            player_id = getattr(self._ta_env.state, "current_player_id", 0)
+            if player_id in rewards:
+                return float(rewards[player_id])
+            if 0 in rewards:
+                return float(rewards[0])
+        return 0.0
+
+    def _snapshot_state(self) -> Dict[str, Any]:
+        state = self._ta_env.state
+        snapshot: Dict[str, Any] = {
+            "turn": getattr(state, "turn", 0),
+            "game_state": getattr(state, "game_state", {}),
+            "logs": list(getattr(state, "logs", [])),
+            "rewards": getattr(state, "rewards", None),
+            "done": getattr(state, "done", False),
+            "role_mapping": getattr(state, "role_mapping", {}),
+            "game_info": getattr(state, "game_info", {}),
+            "step_info": getattr(state, "step_info", {}),
+        }
+        return snapshot
+