From 5dc124dab1be008293811c1076aa7fe6f292da01 Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Sun, 2 Nov 2025 14:24:07 +0100 Subject: [PATCH 1/9] added gym env --- src/core/pyproject.toml | 8 +- src/envs/gym_env/__init__.py | 12 + src/envs/gym_env/client.py | 102 +++++ src/envs/gym_env/models.py | 50 +++ src/envs/gym_env/server/__init__.py | 0 src/envs/gym_env/server/app.py | 46 +++ .../gym_env/server/gymnasium_environment.py | 380 ++++++++++++++++++ 7 files changed, 597 insertions(+), 1 deletion(-) create mode 100644 src/envs/gym_env/__init__.py create mode 100644 src/envs/gym_env/client.py create mode 100644 src/envs/gym_env/models.py create mode 100644 src/envs/gym_env/server/__init__.py create mode 100644 src/envs/gym_env/server/app.py create mode 100644 src/envs/gym_env/server/gymnasium_environment.py diff --git a/src/core/pyproject.toml b/src/core/pyproject.toml index 32602f58..47e8701f 100644 --- a/src/core/pyproject.toml +++ b/src/core/pyproject.toml @@ -43,4 +43,10 @@ packages = [ "openenv_core.env_server", "openenv_core.tools" ] -package-dir = {"openenv_core" = "."} +[tool.black] +line-length = 80 + +[tool.ruff] +line-length = 80 +select = ["E", "F", "W"] +ignore = ["E501"] # Ignore long lines, if desired diff --git a/src/envs/gym_env/__init__.py b/src/envs/gym_env/__init__.py new file mode 100644 index 00000000..ff747cd6 --- /dev/null +++ b/src/envs/gym_env/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Generic Gymnasium environment integration for OpenEnv.""" + +from .client import GymEnvironment +from .models import GymAction, GymObservation, GymState + +__all__ = ["GymEnvironment", "GymAction", "GymObservation", "GymState"] diff --git a/src/envs/gym_env/client.py b/src/envs/gym_env/client.py new file mode 100644 index 00000000..d8d1a669 --- /dev/null +++ b/src/envs/gym_env/client.py @@ -0,0 +1,102 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""HTTP client for generic Gymnasium environments served over HTTP.""" + +from __future__ import annotations + +from typing import Any, Dict, TYPE_CHECKING + +from core.client_types import StepResult + +from core.http_env_client import HTTPEnvClient + +from .models import GymAction, GymObservation, GymState + +if TYPE_CHECKING: + from core.containers.runtime import ContainerProvider + + +class GymEnvironment(HTTPEnvClient[GymAction, GymObservation]): + """Client for interacting with Gymnasium environments over HTTP. + + Example: + >>> client = GymEnvironment(base_url="http://localhost:8000") + >>> result = client.reset() + >>> print(result.observation.state) + >>> result = client.step(GymAction(action=1)) + >>> print(result.reward, result.done) + + Example with Docker: + >>> client = GymEnvironment.from_docker_image("generic-gym-env:latest") + >>> _ = client.reset() + >>> _ = client.step(GymAction(action=0)) + """ + + def _step_payload(self, action: GymAction) -> Dict[str, Any]: + """ + Convert GymAction to JSON payload for step request. + + Args: + action: GymAction instance. + + Returns: + Dictionary representation suitable for JSON encoding. + """ + payload: Dict[str, Any] = {"action": action.action, "return_frame": action.return_frame} + if action.metadata: + payload["metadata"] = action.metadata + return payload + + def _parse_result(self, payload: Dict[str, Any]) -> StepResult[GymObservation]: + """ + Parse server response into StepResult[GymObservation]. + + Args: + payload: JSON response from server. + + Returns: + StepResult with GymObservation. + """ + obs_data = payload.get("observation", {}) + + observation = GymObservation( + state=obs_data.get("state"), + legal_actions=obs_data.get("legal_actions"), + episode_length=obs_data.get("episode_length", 0), + total_reward=obs_data.get("total_reward", 0.0), + done=bool(payload.get("done", False)), + reward=payload.get("reward"), + metadata=obs_data.get("metadata", {}), + frame= obs_data.get("frame", None) + ) + + return StepResult( + observation=observation, + reward=payload.get("reward"), + done=bool(payload.get("done", False)), + ) + + def _parse_state(self, payload: Dict[str, Any]) -> GymState: + """ + Parse server response into GymState object. + + Args: + payload: JSON response from /state endpoint. + + Returns: + GymState object with environment state information. + """ + return GymState( + env_id=payload.get("env_id", "Unknown"), + episode_id=payload.get("episode_id"), + step_count=payload.get("step_count", 0), + render_mode=payload.get("render_mode"), + max_steps=payload.get("max_steps"), + seed=payload.get("seed"), + episode_length=payload.get("episode_length", 0), + total_reward=payload.get("total_reward", 0.0), + ) diff --git a/src/envs/gym_env/models.py b/src/envs/gym_env/models.py new file mode 100644 index 00000000..652eee31 --- /dev/null +++ b/src/envs/gym_env/models.py @@ -0,0 +1,50 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Data models for Gymnasium-based environments. + +This module defines generic Action, Observation, and State representations +used by the Gym environment integration. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, List, Optional + +from core.env_server import Action, Observation, State + + +@dataclass +class GymAction(Action): + """Generic action wrapper for Gymnasium environments.""" + + action: Any + return_frame: bool = False + + +@dataclass +class GymObservation(Observation): + """Observation returned by a Gymnasium environment.""" + + state: Any + legal_actions: Optional[Any] = None + episode_length: int = 0 + total_reward: float = 0.0 + frame: Optional[List] = None + + +@dataclass +class GymState(State): + """Server-side state snapshot for Gymnasium environments.""" + + env_id: str = "Unknown" + render_mode: Optional[str] = None + max_steps: Optional[int] = None + seed: Optional[int] = None + episode_length: int = 0 + total_reward: float = 0.0 diff --git a/src/envs/gym_env/server/__init__.py b/src/envs/gym_env/server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/envs/gym_env/server/app.py b/src/envs/gym_env/server/app.py new file mode 100644 index 00000000..231f051d --- /dev/null +++ b/src/envs/gym_env/server/app.py @@ -0,0 +1,46 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""FastAPI application that exposes a generic Gymnasium environment.""" + +import os + +from core.env_server import create_app + +from ..models import GymAction, GymObservation +from .gymnasium_environment import GymnasiumEnvironment + +# Environment configuration via environment variables +env_id = os.getenv("GYM_ENVIRONMENT_ID", "MountainCarContinuous-v0") +render_mode = os.getenv("GYM_RENDER_MODE") or None + +max_steps_str = os.getenv("GYM_MAX_STEPS") +max_steps = int(max_steps_str) if max_steps_str else 1000 + +seed_str = os.getenv("GYM_SEED") +seed = int(seed_str) if seed_str else None + +# Create the environment instance +env = GymnasiumEnvironment( + env_id=env_id, + render_mode=render_mode, + max_steps=max_steps, + seed=seed, +) + +# Create the FastAPI app with web interface and README integration +app = create_app( + env, + GymAction, + GymObservation, + env_name=env_id.lower().replace("-", "_"), +) + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=8010) diff --git a/src/envs/gym_env/server/gymnasium_environment.py b/src/envs/gym_env/server/gymnasium_environment.py new file mode 100644 index 00000000..f6e471eb --- /dev/null +++ b/src/envs/gym_env/server/gymnasium_environment.py @@ -0,0 +1,380 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Generic Gymnasium environment server implementation.""" + +from __future__ import annotations + +import logging +import math +import uuid +from typing import Any, Dict, Optional +import numpy as np + +try: + import gymnasium as gym +except ImportError: + raise ValueError("Please install gymnasium with: pip install gymnasium") +from gymnasium import spaces +import numpy.typing as npt +from core.env_server import Environment + +from ..models import GymAction, GymObservation, GymState + +logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + + +class GymnasiumEnvironment(Environment): + """ + Generic Gymnasium environment wrapper for OpenEnv. + + Any Gymnasium environment can be served by providing its environment id. + The wrapper handles common concerns such as seed management, type conversion, + and JSON-friendly serialization of observations. + """ + + def __init__( + self, + env_id: str, + render_mode: Optional[str] = None, + max_steps: Optional[int] = None, + seed: Optional[int] = None, + ): + super().__init__() + + self.env_id = env_id + self.render_mode = render_mode + self.max_steps = max_steps if max_steps and max_steps > 0 else None + self._initial_seed = seed + self._next_seed = seed + + logger.info( + "Creating Gymnasium environment '%s' (render_mode=%s, max_steps=%s, seed=%s)", + env_id, + render_mode, + self.max_steps, + seed, + ) + + self.env = gym.make(env_id, render_mode=render_mode) + + if self.max_steps is not None: + self.env = gym.wrappers.TimeLimit( + self.env, max_episode_steps=self.max_steps + ) + + self._action_space_metadata = self._describe_space(self.env.action_space) + self._observation_space_metadata = self._describe_space( + self.env.observation_space + ) + self._legal_actions = self._summarize_action_space(self.env.action_space) + + self._state = GymState( + env_id=env_id, + render_mode=render_mode, + max_steps=self.max_steps, + seed=seed, + ) + + logger.info("GymnasiumEnvironment for '%s' initialized", env_id) + + def reset(self) -> GymObservation: + """Reset the environment and return the initial observation.""" + seed = self._consume_seed() + obs, info = self.env.reset(seed=seed) + + self._state.episode_id = str(uuid.uuid4()) + self._state.step_count = 0 + self._state.episode_length = 0 + self._state.total_reward = 0.0 + self._state.seed = seed + + observation = self._make_observation( + obs=obs, + reward=None, + done=False, + info=info, + terminated=False, + truncated=False, + raw_reward=0.0, + ) + + logger.info( + "Environment '%s' reset (episode_id=%s, seed=%s)", + self.env_id, + self._state.episode_id, + seed, + ) + + return observation + + def step(self, action: GymAction) -> GymObservation: + """Execute an action and return the resulting observation.""" + gym_action = self._convert_action(action) + obs, reward, terminated, truncated, info = self.env.step(gym_action) + if action.return_frame: + frame = self.env.render() + else: + frame = None + + self._state.step_count += 1 + self._state.episode_length += 1 + + reward_value, raw_reward = self._normalize_reward(reward) + if reward_value is not None: + self._state.total_reward += reward_value + + done = bool(terminated or truncated) + + observation = self._make_observation( + obs=obs, + reward=reward_value, + done=done, + info=info, + terminated=terminated, + truncated=truncated, + raw_reward=raw_reward, + frame=frame, + ) + + logger.debug( + "Step %s -> reward=%s terminated=%s truncated=%s", + self._state.step_count, + reward, + terminated, + truncated, + ) + + return observation + + @property + def state(self) -> GymState: + """Return the current environment state.""" + return self._state + + def close(self) -> None: + """Close the underlying Gymnasium environment.""" + logger.info("Closing GymnasiumEnvironment for '%s'", self.env_id) + if hasattr(self.env, "close"): + self.env.close() + logger.info("GymnasiumEnvironment closed") + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _consume_seed(self) -> Optional[int]: + if self._next_seed is None: + return None + seed = self._next_seed + self._next_seed += 1 + return seed + + def _convert_action(self, action: GymAction) -> Any: + if not isinstance(action, GymAction): + raise ValueError(f"Expected GymAction, received {type(action)}") + + raw_action = action.action + space = self.env.action_space + + if space.contains(raw_action): + return raw_action + + converted = self._convert_action_for_space(space, raw_action) + + if not space.contains(converted): + raise ValueError( + f"Action {raw_action!r} could not be converted for space {space}" + ) + + return converted + + def _convert_action_for_space(self, space: spaces.Space, value: Any) -> Any: + if isinstance(space, spaces.Discrete): + return int(value) + + if isinstance(space, spaces.MultiDiscrete): + return np.asarray(value, dtype=space.dtype) + + if isinstance(space, spaces.MultiBinary): + return np.asarray(value, dtype=space.dtype) + + if isinstance(space, spaces.Box): + return np.asarray(value, dtype=space.dtype) + + if isinstance(space, spaces.Tuple): + if not isinstance(value, (list, tuple)): + raise TypeError( + f"Tuple action space expects list/tuple, received {type(value)}" + ) + if len(value) != len(space.spaces): + raise ValueError( + f"Tuple action with length {len(value)} does not match " + f"expected length {len(space.spaces)}" + ) + return tuple( + self._convert_action_for_space(subspace, subvalue) + for subspace, subvalue in zip(space.spaces, value) + ) + + if isinstance(space, spaces.Dict): + if not isinstance(value, dict): + raise TypeError( + f"Dict action space expects dict, received {type(value)}" + ) + return { + key: self._convert_action_for_space(space.spaces[key], value[key]) + for key in space.spaces + } + + if isinstance(space, spaces.Text): + return str(value) + + return value + + def _normalize_reward(self, reward: Any) -> tuple[Optional[float], Any]: + if isinstance(reward, (int, float)): + value = float(reward) + return value, value + + if isinstance(reward, (np.integer, np.floating)): + value = float(reward.item()) + return value, value + + return None, self._to_serializable(reward) + + def _make_observation( + self, + obs: Any, + reward: Optional[float], + done: bool, + info: Dict[str, Any], + terminated: bool, + truncated: bool, + raw_reward: Any, + frame: Optional[npt.ArrayLike] = None, + ) -> GymObservation: + metadata = { + "env_id": self.env_id, + "render_mode": self.render_mode, + "max_steps": self.max_steps, + "seed": self._state.seed, + "info": self._to_serializable(info), + "raw_reward": raw_reward, + "terminated": terminated, + "truncated": truncated, + "action_space": self._action_space_metadata, + "observation_space": self._observation_space_metadata, + } + + # Remove keys with None values for cleaner payloads + metadata = {key: value for key, value in metadata.items() if value is not None} + + return GymObservation( + state=self._to_serializable(obs), + legal_actions=self._legal_actions, + episode_length=self._state.episode_length, + total_reward=self._state.total_reward, + done=done, + reward=reward, + metadata=metadata, + frame=self._to_serializable(frame), + ) + + def _describe_space(self, space: spaces.Space) -> Dict[str, Any]: + description: Dict[str, Any] = {"type": type(space).__name__} + + if hasattr(space, "shape"): + description["shape"] = self._to_serializable(getattr(space, "shape")) + + dtype = getattr(space, "dtype", None) + if dtype is not None: + description["dtype"] = str(dtype) + + if isinstance(space, spaces.Discrete): + description["n"] = int(space.n) + + elif isinstance(space, spaces.MultiDiscrete): + description["nvec"] = self._to_serializable(space.nvec) + + elif isinstance(space, spaces.MultiBinary): + description["n"] = self._to_serializable(space.n) + + elif isinstance(space, spaces.Box): + description["low"] = self._to_serializable(space.low) + description["high"] = self._to_serializable(space.high) + + elif isinstance(space, spaces.Tuple): + description["spaces"] = [ + self._describe_space(subspace) for subspace in space.spaces + ] + + elif isinstance(space, spaces.Dict): + description["spaces"] = { + key: self._describe_space(subspace) + for key, subspace in space.spaces.items() + } + + elif isinstance(space, spaces.Text): + description["min_length"] = space.min_length + description["max_length"] = space.max_length + + return description + + def _summarize_action_space(self, space: spaces.Space) -> Any: + if isinstance(space, spaces.Discrete): + return list(range(int(space.n))) + + if isinstance(space, spaces.MultiDiscrete): + return [list(range(int(n))) for n in self._to_serializable(space.nvec)] + + if isinstance(space, spaces.MultiBinary): + return [0, 1] + + if isinstance(space, spaces.Box): + return { + "low": self._to_serializable(space.low), + "high": self._to_serializable(space.high), + } + + if isinstance(space, spaces.Tuple): + return [self._summarize_action_space(subspace) for subspace in space.spaces] + + if isinstance(space, spaces.Dict): + return { + key: self._summarize_action_space(subspace) + for key, subspace in space.spaces.items() + } + + if isinstance(space, spaces.Text): + return {"charset": "unicode"} + + return None + + def _to_serializable(self, value: Any) -> Any: + if isinstance(value, np.ndarray): + return [self._to_serializable(v) for v in value.tolist()] + + if isinstance(value, (np.floating, np.integer)): + return self._to_serializable(value.item()) + + if isinstance(value, np.bool_): + return bool(value) + + if isinstance(value, (list, tuple, set)): + return [self._to_serializable(v) for v in value] + + if isinstance(value, dict): + return {str(k): self._to_serializable(v) for k, v in value.items()} + + if isinstance(value, (int, bool, float)) or value is None: + return value + + return str(value) From 8624168a8424ade99157aac1548faaee213218ef Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Mon, 3 Nov 2025 05:46:08 +0400 Subject: [PATCH 2/9] added dockerfile and requirements.txt --- src/envs/gym_env/client.py | 3 +- src/envs/gym_env/models.py | 4 +- src/envs/gym_env/server/Dockerfile | 46 ++++ src/envs/gym_env/server/app.py | 2 +- .../gym_env/server/gymnasium_environment.py | 12 +- src/envs/gym_env/server/requirements.txt | 3 + tests/envs/test_gym_environment.py | 211 ++++++++++++++++++ 7 files changed, 265 insertions(+), 16 deletions(-) create mode 100644 src/envs/gym_env/server/Dockerfile create mode 100644 src/envs/gym_env/server/requirements.txt create mode 100644 tests/envs/test_gym_environment.py diff --git a/src/envs/gym_env/client.py b/src/envs/gym_env/client.py index d8d1a669..e49bc3dd 100644 --- a/src/envs/gym_env/client.py +++ b/src/envs/gym_env/client.py @@ -46,7 +46,7 @@ def _step_payload(self, action: GymAction) -> Dict[str, Any]: Returns: Dictionary representation suitable for JSON encoding. """ - payload: Dict[str, Any] = {"action": action.action, "return_frame": action.return_frame} + payload: Dict[str, Any] = {"action": action.action} if action.metadata: payload["metadata"] = action.metadata return payload @@ -71,7 +71,6 @@ def _parse_result(self, payload: Dict[str, Any]) -> StepResult[GymObservation]: done=bool(payload.get("done", False)), reward=payload.get("reward"), metadata=obs_data.get("metadata", {}), - frame= obs_data.get("frame", None) ) return StepResult( diff --git a/src/envs/gym_env/models.py b/src/envs/gym_env/models.py index 652eee31..ff670a95 100644 --- a/src/envs/gym_env/models.py +++ b/src/envs/gym_env/models.py @@ -14,7 +14,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, List, Optional +from typing import Any, Optional from core.env_server import Action, Observation, State @@ -24,7 +24,6 @@ class GymAction(Action): """Generic action wrapper for Gymnasium environments.""" action: Any - return_frame: bool = False @dataclass @@ -35,7 +34,6 @@ class GymObservation(Observation): legal_actions: Optional[Any] = None episode_length: int = 0 total_reward: float = 0.0 - frame: Optional[List] = None @dataclass diff --git a/src/envs/gym_env/server/Dockerfile b/src/envs/gym_env/server/Dockerfile new file mode 100644 index 00000000..6e0330c2 --- /dev/null +++ b/src/envs/gym_env/server/Dockerfile @@ -0,0 +1,46 @@ +# Dockerfile for Atari Environment +# This image provides Atari 2600 games via the Arcade Learning Environment (ALE) + +# Configurable base image - defaults to local build, can be overridden for CI/CD +# Base image provides: fastapi, uvicorn, requests, curl, PYTHONPATH=/app/src +# +# Local build: docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile . +# docker build -f src/envs/atari_env/server/Dockerfile -t atari-env:latest . +# +# CI/CD build: docker build --build-arg BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest \ +# -f src/envs/atari_env/server/Dockerfile -t atari-env:latest . +ARG BASE_IMAGE=openenv-base:latest +FROM ${BASE_IMAGE} + +# Install dependencies +COPY src/envs/gym_env/server/requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt + +# Copy OpenEnv core (base image already set WORKDIR=/app) +COPY src/core/ /app/src/core/ + +# Copy Atari environment code +COPY src/envs/gym_env/ /app/src/envs/gym_env/ + +# Copy README for web interface documentation +COPY src/envs/gym_env/README.md /app/README.md + +ARG GYM_ENVIRONMENT_ID="MountainCarContinuous-v0" +ARG GYM_RENDER_MODE="rgb_array" + + +# --- Runtime environment with defaults --- +# These ENV lines set defaults but still allow runtime overrides +ENV GYM_ENVIRONMENT_ID=${GYM_ENVIRONMENT_ID} +ENV NODE_ENV=${BUILD_ENV} + + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the FastAPI server +CMD ["uvicorn", "envs.atari_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/envs/gym_env/server/app.py b/src/envs/gym_env/server/app.py index 231f051d..8aa3d6ad 100644 --- a/src/envs/gym_env/server/app.py +++ b/src/envs/gym_env/server/app.py @@ -15,7 +15,7 @@ # Environment configuration via environment variables env_id = os.getenv("GYM_ENVIRONMENT_ID", "MountainCarContinuous-v0") -render_mode = os.getenv("GYM_RENDER_MODE") or None +render_mode = os.getenv("GYM_RENDER_MODE", "rgb_array") max_steps_str = os.getenv("GYM_MAX_STEPS") max_steps = int(max_steps_str) if max_steps_str else 1000 diff --git a/src/envs/gym_env/server/gymnasium_environment.py b/src/envs/gym_env/server/gymnasium_environment.py index f6e471eb..3fbff57c 100644 --- a/src/envs/gym_env/server/gymnasium_environment.py +++ b/src/envs/gym_env/server/gymnasium_environment.py @@ -9,17 +9,16 @@ from __future__ import annotations import logging -import math import uuid from typing import Any, Dict, Optional import numpy as np try: import gymnasium as gym + from gymnasium import spaces except ImportError: raise ValueError("Please install gymnasium with: pip install gymnasium") -from gymnasium import spaces -import numpy.typing as npt + from core.env_server import Environment from ..models import GymAction, GymObservation, GymState @@ -119,10 +118,6 @@ def step(self, action: GymAction) -> GymObservation: """Execute an action and return the resulting observation.""" gym_action = self._convert_action(action) obs, reward, terminated, truncated, info = self.env.step(gym_action) - if action.return_frame: - frame = self.env.render() - else: - frame = None self._state.step_count += 1 self._state.episode_length += 1 @@ -141,7 +136,6 @@ def step(self, action: GymAction) -> GymObservation: terminated=terminated, truncated=truncated, raw_reward=raw_reward, - frame=frame, ) logger.debug( @@ -259,7 +253,6 @@ def _make_observation( terminated: bool, truncated: bool, raw_reward: Any, - frame: Optional[npt.ArrayLike] = None, ) -> GymObservation: metadata = { "env_id": self.env_id, @@ -285,7 +278,6 @@ def _make_observation( done=done, reward=reward, metadata=metadata, - frame=self._to_serializable(frame), ) def _describe_space(self, space: spaces.Space) -> Dict[str, Any]: diff --git a/src/envs/gym_env/server/requirements.txt b/src/envs/gym_env/server/requirements.txt new file mode 100644 index 00000000..65e28925 --- /dev/null +++ b/src/envs/gym_env/server/requirements.txt @@ -0,0 +1,3 @@ +gymnasium>=0.29.0 +ale-py>=0.8.0 +numpy>=1.24.0 diff --git a/tests/envs/test_gym_environment.py b/tests/envs/test_gym_environment.py new file mode 100644 index 00000000..33c8d62e --- /dev/null +++ b/tests/envs/test_gym_environment.py @@ -0,0 +1,211 @@ +"""Tests for the generic Gymnasium environment integration.""" + +import sys +from pathlib import Path + +import pytest + + +try: + pass +except ModuleNotFoundError: + pytest.skip("gymnasium not installed", allow_module_level=True) + +from envs.gym_environment.client import GymAction, GymEnvironment +from envs.gym_environment.server.gymnasium_environment import GymnasiumEnvironment + + +ENV_ID = "BipedalWalker-v3" + + +@pytest.fixture(name="env") +def fixture_env(): + env = GymnasiumEnvironment(env_id=ENV_ID, seed=123, render_mode="rgb_array") + yield env + env.close() + + +def test_bipedalwalker_reset_and_step(env: GymnasiumEnvironment): + """Reset and step the BipedalWalker environment (continuous actions). + + The BipedalWalker environment uses a continuous Box action space, so + the test checks that there are no discrete `legal_actions` and that the + reported action_space metadata describes a Box (with numeric low/high lists). + """ + obs = env.reset() + state = env.state + + assert state.env_id == ENV_ID + assert state.step_count == 0 + # Continuous environments typically don't expose discrete legal_actions + # (set to None or empty). Accept either case. + assert obs.legal_actions == { + "low": [-1.0, -1.0, -1.0, -1.0], + "high": [1.0, 1.0, 1.0, 1.0], + } + assert isinstance(obs.state, list) + + # Provide a sample continuous action. The client/server should convert + # python lists into the correct numeric action shape for Gym. + # Use a small vector; the environment will validate internally. + sample_action = [0.0, 0.0, 0.0, 0.0] + next_obs = env.step(GymAction(action=sample_action)) + assert env.state.step_count == 1 + assert isinstance(next_obs.state, list) + assert next_obs.reward is not None + assert "action_space" in next_obs.metadata + # Expect a Box action space for BipedalWalker + assert next_obs.metadata["action_space"]["type"] in ("Box", "box") + low = next_obs.metadata["action_space"].get("low") + high = next_obs.metadata["action_space"].get("high") + assert isinstance(low, list) and isinstance(high, list) + assert len(low) == len(high) + + +def test_continuous_action_conversion_and_metadata(): + env = GymnasiumEnvironment(env_id="MountainCarContinuous-v0", seed=42) + # Capture initial observation from reset (some envs return different shapes on reset) + _ = env.reset() + + obs = env.step(GymAction(action=[0.5])) + # State should be serializable to a list + assert isinstance(obs.state, list) + assert not isinstance(obs.state, tuple) + + # Action space metadata should describe a Box for continuous envs + assert "action_space" in obs.metadata + action_space = obs.metadata["action_space"] + assert action_space["type"] in ("Box", "box") + low = action_space["low"] + high = action_space["high"] + assert isinstance(low, list) and isinstance(high, list) + assert len(low) == len(high) == 1 + + env.close() + + +def test_client_parsers_handle_payloads(): + client = GymEnvironment(base_url="http://localhost:9000") + state = [ + 0.0027464781887829304, + 6.556225798703963e-06, + -0.0008549225749447942, + -0.016000041738152504, + 0.09236064553260803, + 0.0019846635404974222, + 0.8599309325218201, + -0.00017501995898783207, + 1.0, + 0.03271123394370079, + 0.001984562259167433, + 0.8535996675491333, + -0.00135040411259979, + 1.0, + 0.4408135712146759, + 0.4458196759223938, + 0.461422324180603, + 0.4895496964454651, + 0.5341022610664368, + 0.6024604439735413, + 0.7091481685638428, + 0.8859308958053589, + 1.0, + 1.0, + ] + payload = { + "observation": { + "state": state, + "legal_actions": { + "low": [-1.0, -1.0, -1.0, -1.0], + "high": [1.0, 1.0, 1.0, 1.0], + }, + "episode_length": 0, + "total_reward": 0.0, + "metadata": { + "env_id": "BipedalWalker-v3", + "render_mode": "rgb_array", + "seed": 124, + "info": {}, + "raw_reward": 0.0, + "terminated": False, + "truncated": False, + "action_space": { + "type": "Box", + "shape": [4], + "dtype": "float32", + "low": [-1.0, -1.0, -1.0, -1.0], + "high": [1.0, 1.0, 1.0, 1.0], + }, + "observation_space": { + "type": "Box", + "shape": [24], + "dtype": "float32", + "low": [ + -3.1415927410125732, + -5.0, + -5.0, + -5.0, + -3.1415927410125732, + -5.0, + -3.1415927410125732, + -5.0, + -0.0, + -3.1415927410125732, + -5.0, + -3.1415927410125732, + -5.0, + -0.0, + -1.0, + -1.0, + -1.0, + -1.0, + -1.0, + -1.0, + -1.0, + -1.0, + -1.0, + -1.0, + ], + "high": [ + 3.1415927410125732, + 5.0, + 5.0, + 5.0, + 3.1415927410125732, + 5.0, + 3.1415927410125732, + 5.0, + 5.0, + 3.1415927410125732, + 5.0, + 3.1415927410125732, + 5.0, + 5.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + ], + }, + }, + }, + "reward": 0.0, + "done": False, + } + + result = client._parse_result(payload) + assert result.observation.state == state + assert result.observation.legal_actions == { + "low": [-1.0, -1.0, -1.0, -1.0], + "high": [1.0, 1.0, 1.0, 1.0], + } + assert result.reward == 0.0 + assert result.done is False + + client.close() From af006077829702e0a52844fdbe991682716bfa3e Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Mon, 3 Nov 2025 06:01:04 +0400 Subject: [PATCH 3/9] fixed tests --- .../gym_env/server/gymnasium_environment.py | 3 +- tests/envs/test_gym_environment.py | 121 +++++++++++++++++- 2 files changed, 120 insertions(+), 4 deletions(-) diff --git a/src/envs/gym_env/server/gymnasium_environment.py b/src/envs/gym_env/server/gymnasium_environment.py index 3fbff57c..71051830 100644 --- a/src/envs/gym_env/server/gymnasium_environment.py +++ b/src/envs/gym_env/server/gymnasium_environment.py @@ -45,6 +45,7 @@ def __init__( render_mode: Optional[str] = None, max_steps: Optional[int] = None, seed: Optional[int] = None, + **gym_kwargs, ): super().__init__() @@ -62,7 +63,7 @@ def __init__( seed, ) - self.env = gym.make(env_id, render_mode=render_mode) + self.env = gym.make(env_id, render_mode=render_mode, **gym_kwargs) if self.max_steps is not None: self.env = gym.wrappers.TimeLimit( diff --git a/tests/envs/test_gym_environment.py b/tests/envs/test_gym_environment.py index 33c8d62e..a812fc4e 100644 --- a/tests/envs/test_gym_environment.py +++ b/tests/envs/test_gym_environment.py @@ -7,12 +7,16 @@ try: - pass + # Ensure gymnasium is available; skip the whole module if it's missing. + import importlib + + if importlib.util.find_spec("gymnasium") is None: + raise ModuleNotFoundError except ModuleNotFoundError: pytest.skip("gymnasium not installed", allow_module_level=True) -from envs.gym_environment.client import GymAction, GymEnvironment -from envs.gym_environment.server.gymnasium_environment import GymnasiumEnvironment +from envs.gym_env.client import GymAction, GymEnvironment +from envs.gym_env.server.gymnasium_environment import GymnasiumEnvironment ENV_ID = "BipedalWalker-v3" @@ -84,6 +88,55 @@ def test_continuous_action_conversion_and_metadata(): env.close() +def test_lunarlander_environments(): + """Test both discrete and continuous versions of LunarLander. + + This test verifies that: + 1. Both discrete and continuous versions can be initialized + 2. Action spaces are correctly reported + 3. Observations and rewards are properly structured + 4. State transitions work as expected + """ + # Test LunarLander-v2 (discrete actions) + env_discrete = GymnasiumEnvironment(env_id="LunarLander-v3", seed=42) + obs_discrete = env_discrete.reset() + + # Verify discrete action space + assert obs_discrete.legal_actions == [0, 1, 2, 3] # Four discrete actions + assert isinstance(obs_discrete.state, list) + assert len(obs_discrete.state) == 8 # LunarLander has 8 state components + + # Test a discrete action + next_obs = env_discrete.step(GymAction(action=1)) # Main engine + assert env_discrete.state.step_count == 1 + assert isinstance(next_obs.state, list) + assert next_obs.reward is not None + assert next_obs.metadata["action_space"]["type"] == "Discrete" + env_discrete.close() + + # Test LunarLander-v2 with continuous actions + env_continuous = GymnasiumEnvironment( + env_id="LunarLander-v3", seed=42, continuous=True + ) + obs_continuous = env_continuous.reset() + + # Verify continuous action space + assert obs_continuous.legal_actions == { + "low": [-1.0, -1.0], # Main engine, left-right engines + "high": [1.0, 1.0], + } + assert isinstance(obs_continuous.state, list) + + # Test a continuous action + next_obs = env_continuous.step(GymAction(action=[0.5, 0.0])) + assert env_continuous.state.step_count == 1 + assert isinstance(next_obs.state, list) + assert next_obs.reward is not None + assert next_obs.metadata["action_space"]["type"] in ("Box", "box") + assert len(next_obs.metadata["action_space"]["low"]) == 2 + env_continuous.close() + + def test_client_parsers_handle_payloads(): client = GymEnvironment(base_url="http://localhost:9000") state = [ @@ -209,3 +262,65 @@ def test_client_parsers_handle_payloads(): assert result.done is False client.close() + + +def test_cartpole_discrete_action_space_and_step(): + env = GymnasiumEnvironment(env_id="CartPole-v1", seed=7) + obs = env.reset() + + # Discrete action space should expose 'n' in metadata and legal_actions as a list + assert env.state.env_id == "CartPole-v1" + assert "action_space" in obs.metadata + action_meta = obs.metadata["action_space"] + assert action_meta["type"] in ("Discrete", "discrete") + assert "n" in action_meta and isinstance(action_meta["n"], int) + + # legal_actions should be a list of integers 0..n-1 + assert isinstance(obs.legal_actions, list) + assert obs.legal_actions == list(range(action_meta["n"])) + + # Perform a step with a valid discrete action + next_obs = env.step(GymAction(action=0)) + assert isinstance(next_obs.state, list) or next_obs.state is not None + assert next_obs.reward is not None + env.close() + + +def test_taxi_discrete_action_space(): + # Taxi is a classic discrete-action environment (n typically 6) + env = GymnasiumEnvironment(env_id="Taxi-v3", seed=10) + obs = env.reset() + + assert env.state.env_id == "Taxi-v3" + assert "action_space" in obs.metadata + action_meta = obs.metadata["action_space"] + assert action_meta["type"] in ("Discrete", "discrete") + assert action_meta.get("n", None) is not None + assert isinstance(obs.legal_actions, list) + + # Try a valid action (0) and ensure step returns a serializable state + next_obs = env.step(GymAction(action=0)) + assert next_obs.reward is not None + assert next_obs.done in (True, False) + env.close() + + +def test_pendulum_continuous_action_box(): + # Pendulum has a continuous Box action space of shape (1,) + env = GymnasiumEnvironment(env_id="Pendulum-v1", seed=42) + obs = env.reset() + + assert env.state.env_id == "Pendulum-v1" + assert "action_space" in obs.metadata + action_meta = obs.metadata["action_space"] + assert action_meta["type"] in ("Box", "box") + # Expect shape to be present and of length 1 + shape = action_meta.get("shape") + assert isinstance(shape, list) or isinstance(shape, tuple) + assert len(shape) >= 1 + + # Provide a valid continuous action (single-element list) + next_obs = env.step(GymAction(action=[0.0])) + assert next_obs.reward is not None + assert isinstance(next_obs.state, list) or next_obs.state is not None + env.close() From 53d6500a065be04098c44f85b89264f0efc96c48 Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Mon, 3 Nov 2025 06:05:09 +0400 Subject: [PATCH 4/9] YAML file for further deployment parameters --- src/envs/gym_env/server/additional_env_parameters.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/envs/gym_env/server/additional_env_parameters.yaml diff --git a/src/envs/gym_env/server/additional_env_parameters.yaml b/src/envs/gym_env/server/additional_env_parameters.yaml new file mode 100644 index 00000000..e69de29b From 3a4bf81ee508bb4ddc3e345a987d8cc57dde3cca Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Mon, 3 Nov 2025 06:12:47 +0400 Subject: [PATCH 5/9] added additional params parsing --- src/envs/gym_env/server/Dockerfile | 3 ++- src/envs/gym_env/server/app.py | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/envs/gym_env/server/Dockerfile b/src/envs/gym_env/server/Dockerfile index 6e0330c2..93a4893d 100644 --- a/src/envs/gym_env/server/Dockerfile +++ b/src/envs/gym_env/server/Dockerfile @@ -27,13 +27,14 @@ COPY src/envs/gym_env/README.md /app/README.md ARG GYM_ENVIRONMENT_ID="MountainCarContinuous-v0" ARG GYM_RENDER_MODE="rgb_array" +ENV ADDITIONAL_PARAMETERS_YAML_FILE="" # --- Runtime environment with defaults --- # These ENV lines set defaults but still allow runtime overrides ENV GYM_ENVIRONMENT_ID=${GYM_ENVIRONMENT_ID} ENV NODE_ENV=${BUILD_ENV} - +ENV ADDITIONAL_PARAMETERS=${ADDITIONAL_PARAMETERS_YAML_FILE} # Expose port EXPOSE 8000 diff --git a/src/envs/gym_env/server/app.py b/src/envs/gym_env/server/app.py index 8aa3d6ad..8d714a48 100644 --- a/src/envs/gym_env/server/app.py +++ b/src/envs/gym_env/server/app.py @@ -12,6 +12,7 @@ from ..models import GymAction, GymObservation from .gymnasium_environment import GymnasiumEnvironment +import yaml # Environment configuration via environment variables env_id = os.getenv("GYM_ENVIRONMENT_ID", "MountainCarContinuous-v0") @@ -22,6 +23,13 @@ seed_str = os.getenv("GYM_SEED") seed = int(seed_str) if seed_str else None +yaml_param_file_path = os.getenv("ADDITIONAL_PARAMETERS_YAML_FILE") +additional_params = {} + +# Load additional parameters from YAML if file path is provided +if yaml_param_file_path and os.path.exists(yaml_param_file_path): + with open(yaml_param_file_path, "r") as f: + additional_params = yaml.safe_load(f) # Create the environment instance env = GymnasiumEnvironment( @@ -29,6 +37,7 @@ render_mode=render_mode, max_steps=max_steps, seed=seed, + **additional_params, ) # Create the FastAPI app with web interface and README integration From ff1d0fa5e3006c25e4938f95ed8b48ffd8577417 Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Sat, 8 Nov 2025 22:46:36 +0400 Subject: [PATCH 6/9] Fixed dockerfile and added readme --- src/envs/gym_env/README.md | 100 +++++++++++++++++++++++ src/envs/gym_env/server/Dockerfile | 12 ++- src/envs/gym_env/server/requirements.txt | 2 +- 3 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 src/envs/gym_env/README.md diff --git a/src/envs/gym_env/README.md b/src/envs/gym_env/README.md new file mode 100644 index 00000000..7a9c2de7 --- /dev/null +++ b/src/envs/gym_env/README.md @@ -0,0 +1,100 @@ +--- +title: Gym Environment Server +emoji: 🎮 +colorFrom: '#0E84B5' +colorTo: '#34D399' +sdk: docker +pinned: false +app_port: 8000 +base_path: /web +tags: + - openenv +--- + +# Gym Environment + +Integration of OpenAI Gym/Gymnasium environments with the OpenEnv framework. Gymnasium provides a wide variety of environments for reinforcement learning research and development. + +## Supported Environments + +Gymnasium includes numerous environments across different categories: + +### Classic Control +- **CartPole** - Balance a pole on a moving cart +- **Pendulum** - Swing up and balance an inverted pendulum +- **Acrobot** - Swing up a two-link robotic arm +- **MountainCar** - Drive up a mountain with limited power +- **MountainCarContinuous** - Continuous version of MountainCar + +### Box2D +- **LunarLander** - Land a spacecraft safely +- **BipedalWalker** - Train a 2D biped to walk +- **CarRacing** - Race a car around a track + +And many more! For a complete list, see [Gymnasium documentation](https://gymnasium.farama.org/environments/classic_control/). + +## Architecture + +``` +┌────────────────────────────────────┐ +│ RL Training Code (Client) │ +│ GymEnv.step(action) │ +└──────────────┬─────────────────────┘ + │ HTTP +┌──────────────▼─────────────────────┐ +│ FastAPI Server (Docker) │ +│ GymEnvironment │ +│ ├─ Wraps Gymnasium Env │ +│ ├─ Handles observations │ +│ └─ Action execution │ +└────────────────────────────────────┘ +``` + +## Installation & Usage + +### Option 1: Local Development (without Docker) + +**Requirements:** +- Python 3.11+ +- gymnasium installed: `pip install gymnasium` + +```python +# Connect to local server +from envs.gym_env import GymEnvironment, GymAction + +# Start local server manually +# python -m envs.gym_env.server.app + +env = GymEnvironment(base_url="http://0.0.0.0:8000") + +# Reset environment +result = env.reset() +print(f"Observation : {result.observation.state}") +print(f"Action space: {result.observation.legal_actions}") + +# Take actions +for _ in range(100): + action = 1 # Example action + result = env.step(GymAction(action=[action])) + print(f"Reward: {result.reward}, Done: {result.done}") + if result.done: + break + +# Cleanup +env.close() + +``` + +### Option 2: Docker (Recommended) + +**Build Gym image:** + +```bash +cd OpenEnv + +# Build the image +docker build \ + -f src/envs/gym_env/server/Dockerfile \ + -t gym-env:latest \ + . +``` \ No newline at end of file diff --git a/src/envs/gym_env/server/Dockerfile b/src/envs/gym_env/server/Dockerfile index 93a4893d..083a4001 100644 --- a/src/envs/gym_env/server/Dockerfile +++ b/src/envs/gym_env/server/Dockerfile @@ -14,12 +14,17 @@ FROM ${BASE_IMAGE} # Install dependencies COPY src/envs/gym_env/server/requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt +RUN apt-get update && apt-get install -y \ + swig \ + build-essential \ + python3-dev + +RUN pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt # Copy OpenEnv core (base image already set WORKDIR=/app) COPY src/core/ /app/src/core/ -# Copy Atari environment code +# Copy Gym environment code COPY src/envs/gym_env/ /app/src/envs/gym_env/ # Copy README for web interface documentation @@ -33,7 +38,6 @@ ENV ADDITIONAL_PARAMETERS_YAML_FILE="" # --- Runtime environment with defaults --- # These ENV lines set defaults but still allow runtime overrides ENV GYM_ENVIRONMENT_ID=${GYM_ENVIRONMENT_ID} -ENV NODE_ENV=${BUILD_ENV} ENV ADDITIONAL_PARAMETERS=${ADDITIONAL_PARAMETERS_YAML_FILE} # Expose port @@ -44,4 +48,4 @@ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ CMD curl -f http://localhost:8000/health || exit 1 # Run the FastAPI server -CMD ["uvicorn", "envs.atari_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["uvicorn", "envs.gym_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/envs/gym_env/server/requirements.txt b/src/envs/gym_env/server/requirements.txt index 65e28925..da9fb2b3 100644 --- a/src/envs/gym_env/server/requirements.txt +++ b/src/envs/gym_env/server/requirements.txt @@ -1,3 +1,3 @@ -gymnasium>=0.29.0 +gymnasium[all]>=0.29.0 ale-py>=0.8.0 numpy>=1.24.0 From fe3c0cd6deafb3769531193e6bc1792c7a79ef18 Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Sat, 8 Nov 2025 22:55:57 +0400 Subject: [PATCH 7/9] bugfix --- src/envs/gym_env/server/gymnasium_environment.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/envs/gym_env/server/gymnasium_environment.py b/src/envs/gym_env/server/gymnasium_environment.py index 71051830..e5729f3f 100644 --- a/src/envs/gym_env/server/gymnasium_environment.py +++ b/src/envs/gym_env/server/gymnasium_environment.py @@ -179,9 +179,6 @@ def _convert_action(self, action: GymAction) -> Any: raw_action = action.action space = self.env.action_space - if space.contains(raw_action): - return raw_action - converted = self._convert_action_for_space(space, raw_action) if not space.contains(converted): From acc24d700d1ed204f264fdad87c7fad695aa76a7 Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Sat, 8 Nov 2025 23:06:42 +0400 Subject: [PATCH 8/9] Small doc modification --- src/envs/gym_env/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/src/envs/gym_env/README.md b/src/envs/gym_env/README.md index 7a9c2de7..1bf25603 100644 --- a/src/envs/gym_env/README.md +++ b/src/envs/gym_env/README.md @@ -24,7 +24,6 @@ Gymnasium includes numerous environments across different categories: - **Pendulum** - Swing up and balance an inverted pendulum - **Acrobot** - Swing up a two-link robotic arm - **MountainCar** - Drive up a mountain with limited power -- **MountainCarContinuous** - Continuous version of MountainCar ### Box2D - **LunarLander** - Land a spacecraft safely From 538274a3e025d65b4d6a89b2b5d63d1ef780c702 Mon Sep 17 00:00:00 2001 From: Ali Alami-idrissi Date: Sat, 8 Nov 2025 23:10:56 +0400 Subject: [PATCH 9/9] revert pyproject.toml --- src/core/pyproject.toml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/core/pyproject.toml b/src/core/pyproject.toml index 47e8701f..32602f58 100644 --- a/src/core/pyproject.toml +++ b/src/core/pyproject.toml @@ -43,10 +43,4 @@ packages = [ "openenv_core.env_server", "openenv_core.tools" ] -[tool.black] -line-length = 80 - -[tool.ruff] -line-length = 80 -select = ["E", "F", "W"] -ignore = ["E501"] # Ignore long lines, if desired +package-dir = {"openenv_core" = "."}