diff --git a/examples/nle_random_agent.py b/examples/nle_random_agent.py new file mode 100644 index 00000000..587c74ff --- /dev/null +++ b/examples/nle_random_agent.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +""" +Example: Random Agent Playing NetHack via OpenEnv + +This script demonstrates how to use the NLE environment through OpenEnv's +HTTP interface. It runs a random agent for a few episodes. + +Prerequisites: + 1. Build the Docker image: + cd src/envs/nle_env/server + docker build -t nle-env:latest . + + 2. Run this script: + python examples/nle_random_agent.py +""" + +import random +import time + +# Add src to path if running directly +import sys +from pathlib import Path + +src_path = Path(__file__).parent.parent / "src" +sys.path.insert(0, str(src_path)) + +from envs.nle_env import NLEEnv, NLEAction + + +def print_stats(observation): + """Print human-readable stats from observation.""" + if observation.blstats is None: + return + + blstats = observation.blstats + # BLstats indices from NLE documentation + print(f" HP: {blstats[10]}/{blstats[11]}") + print(f" XP Level: {blstats[18]}") + print(f" Gold: {blstats[13]}") + print(f" Dungeon Level: {blstats[12]}") + + +def main(): + print("=" * 70) + print("NLE Random Agent Example") + print("=" * 70) + + # Start environment (automatically launches Docker container) + print("\n[1/3] Starting NLE environment...") + print("(This may take a moment if container needs to start)") + + env = NLEEnv.from_docker_image( + "nle-env:latest", + # Optional: customize container + # env_vars={"NLE_MAX_STEPS": "1000"} + ) + + print("✓ Environment connected!") + + # Run a few episodes + num_episodes = 3 + max_steps_per_episode = 100 + + print(f"\n[2/3] Running {num_episodes} episodes...") + + for episode in range(num_episodes): + print(f"\n--- Episode {episode + 1}/{num_episodes} ---") + + # Reset environment + result = env.reset() + print("Environment reset") + print_stats(result.observation) + + episode_reward = 0 + steps = 0 + + # Play episode + for step in range(max_steps_per_episode): + # Random action (0-112) + action = NLEAction(action_id=random.randint(0, 112)) + + # Take step + result = env.step(action) + + episode_reward += result.reward or 0 + steps += 1 + + # Print occasional updates + if step % 20 == 0: + print(f" Step {step}: reward={episode_reward:.1f}") + + # Check if done + if result.done: + state = env.state() + print(f"\nEpisode ended after {steps} steps!") + print(f" Total reward: {episode_reward:.1f}") + print(f" End status: {state.end_status}") + print(f" Final stats:") + print_stats(result.observation) + break + else: + print(f"\nReached max steps ({max_steps_per_episode})") + print(f" Total reward: {episode_reward:.1f}") + + time.sleep(0.5) # Brief pause between episodes + + # Cleanup + print("\n[3/3] Cleaning up...") + env.close() + print("✓ Environment closed") + + print("\n" + "=" * 70) + print("Example complete!") + print("=" * 70) + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\nInterrupted by user") + except Exception as e: + print(f"\n\nError: {e}") + import traceback + + traceback.print_exc() diff --git a/src/envs/nle_env/README.md b/src/envs/nle_env/README.md new file mode 100644 index 00000000..667532fe --- /dev/null +++ b/src/envs/nle_env/README.md @@ -0,0 +1,316 @@ +# NetHack Learning Environment (NLE) for OpenEnv + +A reinforcement learning environment based on NetHack 3.6.6, wrapped for the OpenEnv framework. + +## Overview + +NetHack is one of the oldest and most challenging roguelike games, featuring: +- **Procedurally generated dungeons** - Every episode is unique +- **Complex action space** - 113+ discrete actions (movement, combat, magic, inventory management) +- **Rich observation space** - 14+ observation types including dungeon map, stats, inventory, messages +- **Challenging gameplay** - One of the hardest RL benchmarks available +- **Deterministic (with seeding)** - Reproducible episodes for evaluation + +This environment wraps the [NetHack Learning Environment (NLE)](https://github.com/facebookresearch/nle) project, which provides a Gym interface to NetHack. + +## Quick Start + +### Using Docker (Recommended) + +```python +from envs.nle_env import NLEEnv, NLEAction + +# Automatically start container and connect +env = NLEEnv.from_docker_image("nle-env:latest") + +# Reset to start a new game +result = env.reset() +print(f"Episode started: {result.observation.message}") + +# Take actions in the game +for step in range(100): + # Action IDs: 0-112 (movement, commands, etc.) + action = NLEAction(action_id=0) # Move north + result = env.step(action) + + print(f"Step {step}: Reward={result.reward}, Done={result.done}") + + if result.done: + print("Episode ended!") + break + +env.close() +``` + +### Building the Docker Image + +```bash +# Build from repository root (not from server directory) +cd /Users/sanyambhutani/GH/OpenEnv +docker build -f src/envs/nle_env/server/Dockerfile -t nle-env:latest . +``` + +**Note:** Building NLE from source can take 5-10 minutes as it compiles NetHack C code. + +### Running the Server Locally + +```bash +# Install NLE (requires cmake, build-essential) +pip install nle gym + +# Run the server +python -m envs.nle_env.server.app + +# Server will be available at http://localhost:8000 +``` + +## Action Space + +NLE uses a discrete action space with 113 actions: + +| Action ID Range | Category | Examples | +|----------------|----------|----------| +| 0-7 | Cardinal movement | North, South, East, West | +| 8-15 | Diagonal movement | NE, SE, SW, NW | +| 16-20 | Stair navigation | Up, Down | +| 21-112 | Commands | Eat, Search, Apply, Quaff, Read, etc. | + +Common actions: +```python +# Movement +NLEAction(action_id=0) # Move north (k) +NLEAction(action_id=1) # Move east (l) +NLEAction(action_id=2) # Move south (j) +NLEAction(action_id=3) # Move west (h) + +# Interactions +NLEAction(action_id=37) # Eat (e) +NLEAction(action_id=50) # Search (s) +NLEAction(action_id=104) # Inventory (i) +NLEAction(action_id=86) # Wait (.) +``` + +For a complete action mapping, see [NLE Actions Documentation](https://github.com/facebookresearch/nle/blob/main/nle/nethack/actions.py). + +## Observation Space + +NLE provides rich observations about the game state. With OpenEnv's beefy compute assumption, all observations are included by default: + +### Core Observations +- **glyphs** `(21, 79)`: Symbolic dungeon map representation +- **blstats** `(26,)`: Bottom-line stats (HP, MaxHP, XP, Gold, etc.) +- **message** `(256,)`: Latest game message as byte array + +### Visual Observations +- **chars** `(21, 79)`: ASCII character display +- **colors** `(21, 79)`: Color codes for display +- **specials** `(21, 79)`: Special attributes (bold, inverse, etc.) + +### Inventory Observations +- **inv_glyphs** `(55,)`: Inventory item glyphs +- **inv_strs** `(55, 80)`: Inventory item descriptions +- **inv_letters** `(55,)`: Inventory item letters (a-z, A-Z) +- **inv_oclasses** `(55,)`: Inventory object classes + +### Terminal Observations (for rendering) +- **tty_chars** `(24, 80)`: Full terminal character display +- **tty_colors** `(24, 80)`: Full terminal colors +- **tty_cursor** `(2,)`: Terminal cursor position [row, col] + +### Extended Observations +- **screen_descriptions** `(21, 79, 80)`: Text descriptions of dungeon cells +- **program_state** `(6,)`: Internal program state +- **internal** `(9,)`: Internal game state +- **misc** `(4,)`: Miscellaneous info + +All observations are serialized as nested lists (converted from numpy arrays) for JSON compatibility. + +## Reward Structure + +By default, NLE uses **score delta** as the reward: +``` +reward = current_score - previous_score +``` + +Score increases by: +- Defeating monsters +- Collecting gold +- Advancing to deeper dungeon levels +- Finding items +- Gaining experience points + +## Episode Termination + +Episodes end when: +1. **Death** - Character dies (most common) +2. **Ascension** - Player completes the game (very rare!) +3. **Aborted** - Max episode steps reached (default: 5000) +4. **Task Successful** - For task-specific environments + +Check the end status: +```python +result = env.step(action) +if result.done: + state = env.state() + print(f"End status: {state.end_status}") + # Possible values: RUNNING, DEATH, TASK_SUCCESSFUL, ABORTED +``` + +## Configuration + +Configure the environment via environment variables or Docker args: + +```bash +# Task variant (default: score) +export NLE_TASK=score + +# Character (role-race-gender-alignment) +export NLE_CHARACTER=mon-hum-neu-mal + +# Max episode steps (default: 5000) +export NLE_MAX_STEPS=10000 +``` + +### Character Options + +Format: `role-race-gender-alignment` + +**Roles:** Archaeologist (arc), Barbarian (bar), Caveman (cav), Healer (hea), Knight (kni), Monk (mon), Priest (pri), Ranger (ran), Rogue (rog), Samurai (sam), Tourist (tou), Valkyrie (val), Wizard (wiz) + +**Races:** Human (hum), Dwarf (dwa), Elf (elf), Gnome (gno), Orc (orc) + +**Genders:** Male (mal), Female (fem) + +**Alignments:** Lawful (law), Neutral (neu), Chaotic (cha) + +Example: `wiz-elf-fem-cha` = Female Elven Chaotic Wizard + +## Example: Random Agent + +```python +import random +from envs.nle_env import NLEEnv, NLEAction + +env = NLEEnv.from_docker_image("nle-env:latest") + +episodes = 10 +for episode in range(episodes): + result = env.reset() + total_reward = 0 + + while True: + # Random action + action = NLEAction(action_id=random.randint(0, 112)) + result = env.step(action) + + total_reward += result.reward or 0 + + if result.done: + state = env.state() + print(f"Episode {episode}: Reward={total_reward:.1f}, " + f"Steps={state.step_count}, Status={state.end_status}") + break + +env.close() +``` + +## Example: Rendering Game State + +```python +import numpy as np +from envs.nle_env import NLEEnv, NLEAction + +env = NLEEnv.from_docker_image("nle-env:latest") +result = env.reset() + +# Get terminal display +tty_chars = np.array(result.observation.tty_chars) +tty_colors = np.array(result.observation.tty_colors) + +# Print ASCII display +for row in tty_chars: + print(''.join(chr(c) for c in row)) + +# Get game message +message = bytes(result.observation.message) +print(f"Message: {message[:message.index(b'\\0')].decode('ascii')}") + +# Get stats +blstats = result.observation.blstats +print(f"HP: {blstats[10]}/{blstats[11]}, Gold: {blstats[13]}, " + f"XP Level: {blstats[18]}") + +env.close() +``` + +## Performance Considerations + +With **beefy compute** (64+ cores, 256GB+ RAM, 10Gbps network): +- Observation size: ~140KB per step (all observation types) +- Network overhead: Negligible (<1ms on fast network) +- Memory: ~200-500MB per container +- Throughput: 100+ parallel environments easily + +**Optimizations are NOT needed** - just run it simple with JSON serialization! + +## Task Variants (Future) + +Current implementation: **NetHackScore** (maximize game score) + +Planned task variants: +- **NetHackStaircase** - Reach the stairs down +- **NetHackOracle** - Find the Oracle +- **NetHackGold** - Collect gold +- **NetHackEat** - Maximize hunger satisfaction +- **NetHackScout** - Maximize exploration + +## Troubleshooting + +### Build Issues + +If Docker build fails with cmake errors: +```bash +# Ensure cmake is recent enough (3.15+) +cmake --version +``` + +### Container Won't Start + +Check logs: +```bash +docker logs +``` + +Common issues: +- NLE compilation failed → Check cmake, build-essential installed +- Import errors → Check PYTHONPATH set correctly +- Port already in use → Use different port mapping + +### Slow Performance + +If you experience slowness even with beefy compute: +1. Check network latency: `ping ` +2. Monitor CPU: NLE is CPU-intensive for dungeon generation +3. Check Docker resources: Ensure containers have sufficient CPU allocation + +## References + +- [NLE GitHub](https://github.com/facebookresearch/nle) +- [NLE Paper (NeurIPS 2020)](https://arxiv.org/abs/2006.13760) +- [NetHack Wiki](https://nethackwiki.com) +- [NetHack Official Site](https://nethack.org) + +## Citation + +If you use NLE in your research, please cite: + +```bibtex +@inproceedings{kuettler2020nethack, + title={The NetHack Learning Environment}, + author={K{\"u}ttler, Heinrich and Nardelli, Nantas and Miller, Alexander H and + Raileanu, Roberta and Selvatici, Marco and Grefenstette, Edward and + Rockt{\"a}schel, Tim}, + booktitle={Proceedings of NeurIPS}, + year={2020} +} +``` diff --git a/src/envs/nle_env/__init__.py b/src/envs/nle_env/__init__.py new file mode 100644 index 00000000..eaf85e0e --- /dev/null +++ b/src/envs/nle_env/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""NetHack Learning Environment - OpenEnv integration.""" + +from .client import NLEEnv +from .models import NLEAction, NLEObservation, NLEState + +__all__ = ["NLEAction", "NLEObservation", "NLEState", "NLEEnv"] diff --git a/src/envs/nle_env/client.py b/src/envs/nle_env/client.py new file mode 100644 index 00000000..dee5c1d1 --- /dev/null +++ b/src/envs/nle_env/client.py @@ -0,0 +1,147 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +NetHack Learning Environment HTTP Client. + +This module provides the client for connecting to an NLE Environment server +over HTTP. +""" + +from typing import Dict + +from core.client_types import StepResult +from core.http_env_client import HTTPEnvClient + +from .models import NLEAction, NLEObservation, NLEState + + +class NLEEnv(HTTPEnvClient[NLEAction, NLEObservation]): + """ + HTTP client for the NetHack Learning Environment. + + This client connects to an NLEEnvironment HTTP server and provides + methods to interact with NetHack: reset(), step(), and state access. + + With beefy compute, we use simple JSON serialization. The server sends + all observation arrays as nested lists, which we keep as-is or convert + back to numpy arrays as needed. + + Example: + >>> # Connect to a running server + >>> client = NLEEnv(base_url="http://localhost:8000") + >>> result = client.reset() + >>> print(result.observation.blstats) # [HP, MaxHP, ...] + >>> + >>> # Take a step (move north) + >>> result = client.step(NLEAction(action_id=0)) + >>> print(result.reward) + >>> print(result.done) + + Example with Docker: + >>> # Automatically start container and connect + >>> client = NLEEnv.from_docker_image("nle-env:latest") + >>> result = client.reset() + >>> + >>> # Play NetHack! + >>> for _ in range(100): + ... action = NLEAction(action_id=random.randint(0, 112)) + ... result = client.step(action) + ... if result.done: + ... break + """ + + def _step_payload(self, action: NLEAction) -> Dict: + """ + Convert NLEAction to JSON payload for step request. + + Args: + action: NLEAction instance with action_id + + Returns: + Dictionary representation suitable for JSON encoding + """ + return { + "action_id": action.action_id, + } + + def _parse_result(self, payload: Dict) -> StepResult[NLEObservation]: + """ + Parse server response into StepResult[NLEObservation]. + + The server sends all arrays as nested lists. With beefy compute, + we just keep them as lists - no need to convert back to numpy + unless the user specifically needs it. + + Args: + payload: JSON response from server + + Returns: + StepResult with NLEObservation + """ + obs_data = payload.get("observation", {}) + + # Extract standard fields + done = obs_data.get("done", False) + reward = obs_data.get("reward") + metadata = obs_data.get("metadata", {}) + + # Build observation with all the array fields + # Keep them as lists - simple and works great with beefy compute + observation = NLEObservation( + # Core observations + glyphs=obs_data.get("glyphs"), + blstats=obs_data.get("blstats"), + message=obs_data.get("message"), + # Visual observations + chars=obs_data.get("chars"), + colors=obs_data.get("colors"), + specials=obs_data.get("specials"), + # Inventory observations + inv_glyphs=obs_data.get("inv_glyphs"), + inv_strs=obs_data.get("inv_strs"), + inv_letters=obs_data.get("inv_letters"), + inv_oclasses=obs_data.get("inv_oclasses"), + # Terminal observations + tty_chars=obs_data.get("tty_chars"), + tty_colors=obs_data.get("tty_colors"), + tty_cursor=obs_data.get("tty_cursor"), + # Extended observations + screen_descriptions=obs_data.get("screen_descriptions"), + program_state=obs_data.get("program_state"), + internal=obs_data.get("internal"), + misc=obs_data.get("misc"), + # Standard fields + done=done, + reward=reward, + metadata=metadata, + ) + + return StepResult( + observation=observation, + reward=reward, + done=done, + ) + + def _parse_state(self, payload: Dict) -> NLEState: + """ + Parse server response into NLEState object. + + Args: + payload: JSON response from /state endpoint + + Returns: + NLEState object with episode and game information + """ + return NLEState( + episode_id=payload.get("episode_id"), + step_count=payload.get("step_count", 0), + game_over=payload.get("game_over", False), + end_status=payload.get("end_status", "RUNNING"), + in_normal_game=payload.get("in_normal_game", False), + character=payload.get("character", "mon-hum-neu-mal"), + task_name=payload.get("task_name", "NetHackScore-v0"), + ) diff --git a/src/envs/nle_env/models.py b/src/envs/nle_env/models.py new file mode 100644 index 00000000..c362ae28 --- /dev/null +++ b/src/envs/nle_env/models.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Data models for the NetHack Learning Environment (NLE). + +The NLE environment wraps the NetHack 3.6.6 game as a reinforcement learning +environment, providing rich observations and a complex action space. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +from core.env_server import Action, Observation, State + + +@dataclass +class NLEAction(Action): + """ + Action for the NetHack Learning Environment. + + Uses discrete action space where action_id maps to NetHack commands + (movement, interactions, etc.). The action space has ~113 actions. + + Examples: + - action_id=0: Move North (k) + - action_id=1: Move East (l) + - action_id=37: Eat (e) + - action_id=50: Search (s) + """ + + action_id: int # Index into nethack.USEFUL_ACTIONS (0-112) + + +@dataclass +class NLEObservation(Observation): + """ + Observation from the NetHack Learning Environment. + + Contains a subset of NLE's 14+ observation types. All numpy arrays are + serialized as nested lists for JSON compatibility. + + Observation types (all optional, configured at env creation): + - glyphs: (21, 79) - Symbolic dungeon map representation + - chars: (21, 79) - ASCII character display + - colors: (21, 79) - Color codes for display + - specials: (21, 79) - Special attributes + - blstats: (26,) - Bottom-line stats (HP, XP, gold, etc.) + - message: (256,) - Game message as byte array + - inv_glyphs: (55,) - Inventory item glyphs + - inv_strs: (55, 80) - Inventory item descriptions + - inv_letters: (55,) - Inventory item letters (a-z, A-Z) + - inv_oclasses: (55,) - Inventory object classes + - tty_chars: (24, 80) - Full terminal character display + - tty_colors: (24, 80) - Full terminal colors + - tty_cursor: (2,) - Terminal cursor position [row, col] + - screen_descriptions: (21, 79, 80) - Text descriptions of dungeon + + With beefy compute, we include all observations by default. + """ + + # Core observations (always useful) + glyphs: Optional[List[List[int]]] = None + blstats: Optional[List[int]] = None + message: Optional[List[int]] = None + + # Visual observations + chars: Optional[List[List[int]]] = None + colors: Optional[List[List[int]]] = None + specials: Optional[List[List[int]]] = None + + # Inventory observations + inv_glyphs: Optional[List[int]] = None + inv_strs: Optional[List[List[int]]] = None + inv_letters: Optional[List[int]] = None + inv_oclasses: Optional[List[int]] = None + + # Terminal observations (for rendering) + tty_chars: Optional[List[List[int]]] = None + tty_colors: Optional[List[List[int]]] = None + tty_cursor: Optional[List[int]] = None + + # Extended observations + screen_descriptions: Optional[List[List[List[int]]]] = None + program_state: Optional[List[int]] = None + internal: Optional[List[int]] = None + misc: Optional[List[int]] = None + + +@dataclass +class NLEState(State): + """ + Extended state for the NLE environment. + + Includes NetHack-specific state information beyond basic episode tracking. + """ + + # NLE-specific state + game_over: bool = False + end_status: str = "RUNNING" # RUNNING, DEATH, TASK_SUCCESSFUL, ABORTED + in_normal_game: bool = False + character: str = "mon-hum-neu-mal" # role-race-gender-alignment + + # Task-specific info + task_name: str = "NetHackScore-v0" diff --git a/src/envs/nle_env/server/Dockerfile b/src/envs/nle_env/server/Dockerfile new file mode 100644 index 00000000..8d965a00 --- /dev/null +++ b/src/envs/nle_env/server/Dockerfile @@ -0,0 +1,60 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# NLE requires build dependencies, so we start from a base image with build tools +# Using Python 3.11 for kw_only dataclass support (required by OpenEnv core) +FROM python:3.11-slim + +# Install system dependencies needed for NLE +RUN apt-get update && apt-get install -y \ + build-essential \ + cmake \ + git \ + libbz2-dev \ + flex \ + bison \ + libncurses5-dev \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Install Python dependencies +# NLE requires cmake to be available during installation +RUN pip install --no-cache-dir \ + fastapi \ + uvicorn \ + requests \ + pydantic \ + numpy + +# Install NLE (this will compile NetHack from source) +# Using the stable version from PyPI +RUN pip install --no-cache-dir nle gym + +# Copy OpenEnv core +COPY src/core/ /app/src/core/ + +# Copy NLE environment implementation +COPY src/envs/nle_env/ /app/src/envs/nle_env/ + +# Copy README for web interface documentation +COPY src/envs/nle_env/README.md /app/README.md + +# Set PYTHONPATH so imports work +ENV PYTHONPATH=/app/src + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the FastAPI server +# Note: workers=1 because NLE uses C extension with global state +CMD ["uvicorn", "envs.nle_env.server.app:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"] diff --git a/src/envs/nle_env/server/__init__.py b/src/envs/nle_env/server/__init__.py new file mode 100644 index 00000000..f9087cf5 --- /dev/null +++ b/src/envs/nle_env/server/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Server module for NLE environment.""" diff --git a/src/envs/nle_env/server/app.py b/src/envs/nle_env/server/app.py new file mode 100644 index 00000000..19fc69f4 --- /dev/null +++ b/src/envs/nle_env/server/app.py @@ -0,0 +1,54 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +FastAPI application for the NetHack Learning Environment. + +This module creates an HTTP server that exposes the NLE environment +over HTTP endpoints, making it compatible with HTTPEnvClient. + +Usage: + # Development (with auto-reload): + uvicorn envs.nle_env.server.app:app --reload --host 0.0.0.0 --port 8000 + + # Production: + uvicorn envs.nle_env.server.app:app --host 0.0.0.0 --port 8000 --workers 1 + + # Or run directly: + python -m envs.nle_env.server.app + +Note: + NLE is single-threaded (uses C extension with global state), so workers=1 +""" + +import os + +from core.env_server.http_server import create_app + +from ..models import NLEAction, NLEObservation +from .nle_environment import NLEEnvironment + +# Read configuration from environment variables +TASK_NAME = os.getenv("NLE_TASK", "score") +CHARACTER = os.getenv("NLE_CHARACTER", "mon-hum-neu-mal") +MAX_STEPS = int(os.getenv("NLE_MAX_STEPS", "5000")) + +# Create the environment instance +env = NLEEnvironment( + task_name=TASK_NAME, + character=CHARACTER, + max_episode_steps=MAX_STEPS, +) + +# Create the app with web interface and README integration +app = create_app(env, NLEAction, NLEObservation, env_name="nle_env") + + +if __name__ == "__main__": + import uvicorn + + # NLE must run single-threaded (workers=1) due to C extension + uvicorn.run(app, host="0.0.0.0", port=8000, workers=1) diff --git a/src/envs/nle_env/server/nle_environment.py b/src/envs/nle_env/server/nle_environment.py new file mode 100644 index 00000000..9839b0de --- /dev/null +++ b/src/envs/nle_env/server/nle_environment.py @@ -0,0 +1,241 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +NetHack Learning Environment Implementation. + +This module wraps the NLE (NetHack Learning Environment) as an OpenEnv +environment, providing HTTP-based access to NetHack for RL training. +""" + +import time +from typing import Optional + +from core.env_server.interfaces import Environment, Transform + +from ..models import NLEAction, NLEObservation, NLEState + +# Import NLE - will be installed in Docker +try: + from nle.env import NLE +except ImportError: + NLE = None # type: ignore + + +class NLEEnvironment(Environment): + """ + OpenEnv wrapper for the NetHack Learning Environment. + + This environment wraps NLE's gym interface and provides OpenEnv-compatible + reset(), step(), and state access. + + With beefy compute, we use simple JSON serialization and include all + observation types by default. No optimization needed - compute handles it. + + Example: + >>> env = NLEEnvironment() + >>> obs = env.reset() + >>> print(obs.reward) # 0.0 + >>> + >>> obs = env.step(NLEAction(action_id=0)) # Move north + >>> print(obs.reward) # Score delta + >>> print(env.state.step_count) # 1 + """ + + def __init__( + self, + task_name: str = "score", + character: str = "mon-hum-neu-mal", + max_episode_steps: int = 5000, + observation_keys: tuple = ( + "glyphs", + "chars", + "colors", + "specials", + "blstats", + "message", + "inv_glyphs", + "inv_strs", + "inv_letters", + "inv_oclasses", + "tty_chars", + "tty_colors", + "tty_cursor", + ), + transform: Optional[Transform] = None, + ): + """ + Initialize the NLE environment. + + Args: + task_name: Task variant (score, staircase, oracle, gold, etc.) + character: Character definition (role-race-gender-alignment) + max_episode_steps: Maximum steps before episode is aborted + observation_keys: Which observations to include + transform: Optional observation transform + """ + super().__init__(transform=transform) + + if NLE is None: + raise ImportError( + "NLE is not installed. Install with: pip install nle\n" + "For Docker builds, this will be installed automatically." + ) + + self._task_name = task_name + self._character = character + self._observation_keys = observation_keys + + # Create NLE gym environment + # With beefy compute: no ttyrec saving, all observations enabled + self.nle_env = NLE( + character=character, + observation_keys=observation_keys, + max_episode_steps=max_episode_steps, + save_ttyrec_every=0, # Disable by default (can enable via env var) + wizard=False, # Can enable via env var for debugging + spawn_monsters=True, + ) + + # Episode tracking + self._episode_id: Optional[str] = None + self._step_count = 0 + self._last_reward = 0.0 + self._last_done = False + self._end_status = "RUNNING" + self._in_normal_game = False + + def reset(self) -> NLEObservation: + """ + Reset the environment and return initial observation. + + Returns: + NLEObservation with initial game state + """ + # Reset NLE gym env + # Note: Gym 0.26+ returns (obs, info) tuple from reset() + reset_result = self.nle_env.reset() + + # Handle both old gym API (returns obs dict) and new API (returns tuple) + if isinstance(reset_result, tuple): + gym_obs, _ = reset_result # Unpack (observation, info) + else: + gym_obs = reset_result # Old API + + # Initialize episode tracking + self._episode_id = f"nle_{int(time.time() * 1000000)}" + self._step_count = 0 + self._last_reward = 0.0 + self._last_done = False + self._end_status = "RUNNING" + self._in_normal_game = self.nle_env.nethack.in_normal_game() + + # Convert gym observation to OpenEnv observation + obs = self._convert_observation(gym_obs, reward=0.0, done=False) + + return self._apply_transform(obs) + + def step(self, action: NLEAction) -> NLEObservation: # type: ignore[override] + """ + Execute action in NetHack and return observation. + + Args: + action: NLEAction with action_id (0-112) + + Returns: + NLEObservation with game state after action + """ + # Execute action in NLE + # Note: Gym 0.26+ returns (obs, reward, terminated, truncated, info) + # Older gym returns (obs, reward, done, info) + step_result = self.nle_env.step(action.action_id) + + # Handle both old and new gym APIs + if len(step_result) == 5: + # New gym API (0.26+): (obs, reward, terminated, truncated, info) + gym_obs, reward, terminated, truncated, info = step_result + done = terminated or truncated + elif len(step_result) == 4: + # Old gym API: (obs, reward, done, info) + gym_obs, reward, done, info = step_result + else: + raise ValueError(f"Unexpected step result length: {len(step_result)}") + + # Update tracking + self._step_count += 1 + self._last_reward = float(reward) + self._last_done = bool(done) + self._end_status = str(info.get("end_status", "RUNNING")) + self._in_normal_game = self.nle_env.nethack.in_normal_game() + + # Convert observation + obs = self._convert_observation(gym_obs, reward=reward, done=done) + + # Add metadata from NLE + obs.metadata.update( + { + "end_status": self._end_status, + "is_ascended": info.get("is_ascended", False), + } + ) + + return self._apply_transform(obs) + + @property + def state(self) -> NLEState: + """ + Get current environment state. + + Returns: + NLEState with episode and game information + """ + return NLEState( + episode_id=self._episode_id, + step_count=self._step_count, + game_over=self._last_done, + end_status=self._end_status, + in_normal_game=self._in_normal_game, + character=self._character, + task_name=self._task_name, + ) + + def _convert_observation( + self, gym_obs: dict, reward: float, done: bool + ) -> NLEObservation: + """ + Convert NLE gym observation to NLEObservation. + + With beefy compute, we just convert numpy arrays to lists. + No compression, no optimization - simplicity first. + + Args: + gym_obs: Dictionary from NLE gym env + reward: Reward for this step + done: Whether episode is done + + Returns: + NLEObservation with serialized arrays + """ + obs_dict = { + "reward": float(reward), + "done": bool(done), + "metadata": {}, + } + + # Convert each observation type from numpy array to nested list + # This is simple and works perfectly with JSON + beefy compute + for key in self._observation_keys: + if key in gym_obs: + array = gym_obs[key] + # Convert numpy array to nested list for JSON serialization + obs_dict[key] = array.tolist() + + return NLEObservation(**obs_dict) + + def close(self): + """Clean up NLE environment.""" + if hasattr(self, "nle_env"): + self.nle_env.close()