diff --git a/.github/workflows/deploy-hf-env.yml b/.github/workflows/deploy-hf-env.yml index d84833df..98eae1c9 100644 --- a/.github/workflows/deploy-hf-env.yml +++ b/.github/workflows/deploy-hf-env.yml @@ -15,6 +15,7 @@ on: - 'chat_env' - 'atari_env' - 'openspiel_env' + - 'wildfire_env' custom_environment: description: 'Custom environment to deploy (leave empty for none)' required: false @@ -63,7 +64,7 @@ jobs: if [ "${{ github.event.inputs.environment }}" = "all" ]; then echo "deploy_all=true" >> $GITHUB_OUTPUT echo "use_matrix=true" >> $GITHUB_OUTPUT - echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env" >> $GITHUB_OUTPUT + echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env,wildfire_env" >> $GITHUB_OUTPUT echo "Manual trigger - deploying all environments with matrix" else echo "deploy_all=false" >> $GITHUB_OUTPUT @@ -78,14 +79,14 @@ jobs: if git diff --name-only HEAD~1 HEAD | grep -E '^src/core/' > /dev/null; then echo "deploy_all=true" >> $GITHUB_OUTPUT echo "use_matrix=true" >> $GITHUB_OUTPUT - echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env" >> $GITHUB_OUTPUT + echo "environments=echo_env,coding_env,chat_env,atari_env,openspiel_env,wildfire_env" >> $GITHUB_OUTPUT echo "Core files changed - deploying all environments with matrix" exit 0 fi # Check which specific environments changed changed_envs=() - for env in echo_env coding_env chat_env atari_env openspiel_env; do + for env in echo_env coding_env chat_env atari_env openspiel_env wildfire_env; do if git diff --name-only HEAD~1 HEAD | grep -E "^src/envs/$env/" > /dev/null; then changed_envs+=("$env") fi @@ -110,7 +111,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - environment: [echo_env, coding_env, chat_env, atari_env, openspiel_env] + environment: [echo_env, coding_env, chat_env, atari_env, openspiel_env, wildfire_env] permissions: contents: read diff --git a/examples/wildfire.py b/examples/wildfire.py new file mode 100644 index 00000000..43f99389 --- /dev/null +++ b/examples/wildfire.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Simple example demonstrating Wildfire Environment usage. + +This example shows how to: +1. Connect to a Wildfire environment +2. Reset the environment +3. Take strategic actions (water, firebreak, wait) +4. Monitor fire spread and containment +5. Visualize the grid state + +Usage: + # First, start the server: + python -m envs.wildfire_env.server.app + + # Then run this script: + python examples/wildfire.py +""" + +import sys +from pathlib import Path +import random + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from envs.wildfire_env import WildfireEnv, WildfireAction +from envs.wildfire_env.client import render_grid + + +def simple_agent_strategy(obs): + """ + Simple firefighting strategy: + - Target burning cells with water if available + - Build firebreaks near fires if water is depleted + - Otherwise wait + """ + # Find burning cells + burning_cells = [] + for y in range(obs.height): + for x in range(obs.width): + idx = y * obs.width + x + if obs.grid[idx] == 2: # burning + burning_cells.append((x, y)) + + if not burning_cells: + return WildfireAction(action="wait") + + # Pick a random burning cell to target + target_x, target_y = random.choice(burning_cells) + + # Use water if available, otherwise use firebreak + if obs.remaining_water > 0: + return WildfireAction(action="water", x=target_x, y=target_y) + elif obs.remaining_breaks > 0: + # Build firebreak adjacent to fire + return WildfireAction(action="break", x=target_x, y=target_y) + else: + return WildfireAction(action="wait") + + +def main(): + """Run a wildfire containment episode.""" + # Connect to the Wildfire environment server + print("Connecting to Wildfire environment...") + print("Note: Make sure the server is running with: python -m envs.wildfire_env.server.app") + + # Connect to local server + env = WildfireEnv(base_url="http://localhost:8000") + + try: + # Reset the environment + print("\nResetting environment...") + result = env.reset() + obs = result.observation + + print(f"\nš² Wildfire Containment Mission Started!") + print(f"Grid size: {obs.width}x{obs.height}") + print(f"Initial fires: {obs.burning_count}") + print(f"Wind direction: {obs.wind_dir}") + print(f"Humidity: {obs.humidity:.2f}") + print(f"Water capacity: {obs.remaining_water}") + print(f"Firebreak materials: {obs.remaining_breaks}") + + # Print initial grid + print("\nInitial state:") + print(render_grid(obs)) + print("\nLegend: ā¬=ash š©=fuel š„=fire š«=firebreak š¦=water") + + # Run episode + print("\n" + "="*60) + print("Starting containment operations...") + print("="*60) + + episode_reward = 0 + step_count = 0 + max_steps = 50 # Limit steps for demo + + while not result.done and step_count < max_steps: + # Choose action using simple strategy + action = simple_agent_strategy(obs) + + # Take action + result = env.step(action) + obs = result.observation + episode_reward += result.reward or 0 + step_count += 1 + + # Print progress every 5 steps + if step_count % 5 == 0 or result.done: + print(f"\n--- Step {step_count} ---") + print(f"Action: {action.action}" + + (f" at ({action.x}, {action.y})" if action.x is not None else "")) + print(f"Reward: {result.reward:.3f} | Total: {episode_reward:.2f}") + print(f"Fires: {obs.burning_count} | Burned: {obs.burned_count}") + print(f"Water left: {obs.remaining_water} | Breaks left: {obs.remaining_breaks}") + print(render_grid(obs)) + + if result.done: + break + + # Episode summary + print("\n" + "="*60) + print("š EPISODE COMPLETE") + print("="*60) + + if obs.burning_count == 0: + print("ā SUCCESS! All fires have been extinguished!") + else: + print(f"ā ļø Episode ended with {obs.burning_count} fires still burning") + + print(f"\nFinal Statistics:") + print(f" Steps taken: {step_count}") + print(f" Total reward: {episode_reward:.2f}") + print(f" Cells burned: {obs.burned_count}") + print(f" Cells saved: {obs.width * obs.height - obs.burned_count}") + print(f" Water used: {result.observation.remaining_water} remaining (started with more)") + print(f" Firebreaks used: {result.observation.remaining_breaks} remaining") + + # Get environment state + state = env.state() + print(f"\nš Environment State:") + print(f" Episode ID: {state.episode_id}") + print(f" Total burned: {state.total_burned}") + print(f" Total extinguished: {state.total_extinguished}") + print(f" Final wind: {state.wind_dir}") + print(f" Final humidity: {state.humidity:.2f}") + + except Exception as e: + print(f"\nā Error: {e}") + print("\nMake sure the Wildfire server is running:") + print(" python -m envs.wildfire_env.server.app") + + finally: + # Cleanup + print("\nClosing environment...") + env.close() + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/scripts/prepare_hf_deployment.sh b/scripts/prepare_hf_deployment.sh index 23fd4779..d39f3320 100755 --- a/scripts/prepare_hf_deployment.sh +++ b/scripts/prepare_hf_deployment.sh @@ -157,6 +157,7 @@ README_EOF "chat_env") ENV_CLASS="ChatEnv" ;; "atari_env") ENV_CLASS="AtariEnv" ;; "openspiel_env") ENV_CLASS="OpenSpielEnv" ;; + "wildfire_env") ENV_CLASS="WildfireEnv" ;; *) ENV_CLASS="Env" ;; esac diff --git a/src/envs/wildfire_env/README.md b/src/envs/wildfire_env/README.md new file mode 100644 index 00000000..3d96fcd5 --- /dev/null +++ b/src/envs/wildfire_env/README.md @@ -0,0 +1,1075 @@ +# š² Wildfire Environment + +Autonomous wildfire-control simulation for reinforcement-learning agents, built on the [OpenEnv](https://github.com/openenv) framework. +Agents must contain spreading fires using **water**, **firebreaks**, and **timing strategies** under changing **wind** and **humidity** conditions. + +[](https://hub.docker.com/) +[](https://www.python.org/) +[](https://fastapi.tiangolo.com/) +[](LICENSE) + +--- + +## š Table of Contents + +1. [Why Wildfire Simulation?](#-why-wildfire-simulation) +2. [Quick Start](#-quick-start) +3. [Environment Overview](#-environment-overview) +4. [Grid Format & Encoding](#-grid-format--encoding) +5. [Actions](#-actions) +6. [Observations](#-observations) +7. [Reward Structure](#-reward-structure) +8. [Fire Spread Mechanics](#-fire-spread-mechanics) +9. [Configuration](#-configuration) +10. [Installation & Usage](#-installation--usage) +11. [API Reference](#-api-reference) +12. [Examples](#-examples) +13. [Web Interface](#-web-interface) +14. [Troubleshooting](#-troubleshooting) +15. [References](#-references) + +--- + +## š„ Why Wildfire Simulation? + +Wildland fires are intensifying globally due to climate change ā increasing the urgency for **AI-assisted decision-making**. +This environment explores how intelligent systems can **control** fire spread in real time, under limited resources. + +### Research Motivation +ā Based on real wildfire science inspired by: +- **Rothermel Surface Fire Spread Model** (USDA Forest Service) +- **MITRE Fireline's SimFire** ā physics-informed RL fire simulator +- **SimHarness** ā RL evaluation for disaster response + +### Application Goals +| Research Theme | Role in This Environment | +|---|---| +| Resource-Constrained Planning | Finite water + firebreak budgets | +| Fire Spread + Containment Strategy | Directional wind & moisture effects | +| Disaster Response RL | Safety-focused reward design | +| LLM Agents for Control Tasks | Text-based action decision making | + +This makes WildfireEnv a **fast, controllable**, and **open benchmark** for applied RL and LLM reasoning. + +--- + +## š Quick Start + +### Using Docker (Recommended) + +```bash +# Build base image (first time only) +docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile . + +# Build wildfire environment +docker build -t wildfire-env:latest -f src/envs/wildfire_env/server/Dockerfile . + +# Run container +docker run -p 8000:8000 -e ENABLE_WEB_INTERFACE=true wildfire-env:latest +``` + +**Note:** The web interface can be enabled with `ENABLE_WEB_INTERFACE=true`. Access it at `http://localhost:8000/web` when enabled. + +### Basic Python Client + +```python +from envs.wildfire_env import WildfireEnv, WildfireAction + +# Connect to running server +env = WildfireEnv(base_url="http://localhost:8000") + +# Reset environment +result = env.reset() +obs = result.observation +print(f"Grid: {obs.width}x{obs.height}, Fires: {obs.burning_count}, Water: {obs.remaining_water}") + +# Take action (water a burning cell) +result = env.step(WildfireAction(action="water", x=10, y=15)) +print(f"Reward: {result.reward:.2f}, Burning: {result.observation.burning_count}") + +# Create firebreak +result = env.step(WildfireAction(action="break", x=12, y=15)) + +# Wait (fire spreads) +result = env.step(WildfireAction(action="wait")) + +env.close() +``` + +--- + +## š„ Environment Overview + +This environment models **forest-fire dynamics** influenced by: +- **Wind direction** (8 directions + calm) - accelerates fire spread in wind direction +- **Humidity** (0.0-1.0) - suppresses ignition probability +- **Fuel type and spread rate** - vegetation burns and spreads to neighbors +- **Limited resources** (water units, break materials) - strategic resource management +- **Time pressure** (each step costs small reward penalty) + +The goal is to **minimize fire spread** and **total burned area** while using resources efficiently. + +### Episode Termination + +An episode ends when: +- **All fires are extinguished** (`burning_count == 0`) - **Success!** +- **Maximum steps reached** (`step_count >= max_steps`) - Time limit exceeded + +--- + +## š§± Grid Format & Encoding + +### Grid Structure + +The grid is returned as a **flat 1D array** in the observation. To access cell at position `(x, y)`: + +```python +index = y * width + x +cell_value = observation.grid[index] +``` + +**Example:** For a 32Ć32 grid, cell at (10, 15): +```python +index = 15 * 32 + 10 # = 490 +cell_value = observation.grid[490] +``` + +### Cell Encoding + +| Code | Meaning | Color (Visualization) | Behavior | +|------|----------------|-----------------------|----------| +| `0` | Ash (burned) | Black ā« | Burned out, cannot reignite | +| `1` | Fuel | Green š© | Healthy vegetation, can ignite | +| `2` | Burning | Red š„ | Currently on fire, spreads to neighbors | +| `3` | Firebreak | Brown š« | Barrier, fire cannot cross | +| `4` | Water/Damp | Blue šµ | Dampened, immune to ignition temporarily | + +### Grid Visualization Example + +```python +import numpy as np + +obs = env.reset().observation +grid_2d = np.array(obs.grid).reshape(obs.height, obs.width) + +# Now grid_2d[y][x] gives the cell value at position (x, y) +print(grid_2d[15][10]) # Cell at x=10, y=15 +``` + +--- + +## š® Actions + +### Action Types + +#### 1. `water` - Apply Water +**Extinguishes burning cells and dampens fuel to prevent ignition.** + +```python +WildfireAction(action="water", x=10, y=15) +``` + +**Effects:** +- **Burning cell (2)**: Extinguishes ā becomes Water/Damp (4), gives **+0.25 reward** +- **Fuel cell (1)**: Dampens ā becomes Water/Damp (4), gives **-0.10 reward** (preventive, slight penalty) +- **Water/Damp cell (4)**: Redundant watering, gives **-0.05 reward** +- **Ash/Break (0, 3)**: Wasteful, gives **-0.05 reward** + +**Resource Cost:** 1 water unit per action +**Requires:** `remaining_water > 0` and valid coordinates + +**Best Use:** Extinguish active fires before they spread + +--- + +#### 2. `break` - Create Firebreak +**Builds a fire-resistant barrier that stops fire spread.** + +```python +WildfireAction(action="break", x=12, y=15) +``` + +**Effects:** +- **Fuel/Water cell (1, 4)**: Creates firebreak ā becomes Firebreak (3), gives **+0.15 reward** +- **Burning cell (2)**: Extinguishes ā becomes Firebreak (3), gives **-0.02 reward** (less effective than water) +- **Firebreak (3)**: Redundant, gives **-0.01 reward** +- **Ash (0)**: Wasteful, gives **-0.02 reward** + +**Resource Cost:** 1 firebreak material per action +**Requires:** `remaining_breaks > 0` and valid coordinates + +**Best Use:** Create barriers ahead of fire front to contain spread + +--- + +#### 3. `wait` - Do Nothing +**Let natural fire dynamics occur (fire spreads).** + +```python +WildfireAction(action="wait") +``` + +**Effects:** +- No resource cost +- No coordinate required +- Fire spreads naturally to neighboring cells +- Small time penalty (-0.01 reward per step) + +**Best Use:** When fire is contained, waiting for it to burn out + +--- + +### Invalid Actions + +Actions that fail (give **-0.05 reward**): +- Invalid coordinates (out of bounds) +- Using water when `remaining_water == 0` +- Using break when `remaining_breaks == 0` +- Missing required coordinates for water/break actions + +--- + +## šļø Observations + +### `WildfireObservation` + +Returned after every `reset()` or `step()`: + +```python +@dataclass +class WildfireObservation(Observation): + grid: List[int] # Flat array: [1,1,2,1,...] length = width Ć height + width: int # Grid width (default: 32) + height: int # Grid height (default: 32) + step: int # Current step number (0 at reset) + wind_dir: str # "N", "NE", "E", "SE", "S", "SW", "W", "NW", "CALM" + humidity: float # [0.0, 1.0] - higher = less fire spread + burning_count: int # Number of cells currently on fire + burned_count: int # Total number of ash cells (cumulative) + remaining_water: int # Water units left + remaining_breaks: int # Firebreak materials left + reward_hint: float # Shaping reward (for debugging) + done: bool # Episode ended? + reward: float # Step reward +``` + +### Example Observation + +```python +result = env.reset() +obs = result.observation + +print(f"Step: {obs.step}") # 0 +print(f"Grid size: {obs.width}x{obs.height}") # 32x32 +print(f"Grid cells: {len(obs.grid)}") # 1024 +print(f"Active fires: {obs.burning_count}") # 2 +print(f"Wind: {obs.wind_dir}") # "NE" +print(f"Humidity: {obs.humidity:.2f}") # 0.24 +print(f"Water left: {obs.remaining_water}") # 8 +print(f"Breaks left: {obs.remaining_breaks}") # 50 +``` + +--- + +## š° Reward Structure + +### Step Rewards + +| Action | Condition | Reward | +|--------|-----------|--------| +| **Water burning cell** | Extinguishes fire | **+0.25** | +| **Water fuel cell** | Preventive dampening | **-0.10** | +| **Create firebreak** | From fuel/water | **+0.15** | +| **Fire spreads** | Each new burning cell | **-0.15 per cell** | +| **Fire shrinks** | Each extinguished cell | **+0.10 per cell** | +| **New burned area** | Each cell turns to ash | **-0.05 per cell** | +| **Time penalty** | Every step | **-0.01** | +| **Invalid action** | Out of bounds, no resources | **-0.05** | +| **Redundant action** | Watering already damp cell | **-0.05** | + +### Episode End Bonuses + +When episode terminates (`done == True`): + +- **Fire contained** (`burning_count == 0`): + - **+0.5** base bonus + - **+0.5 Ć saved_ratio** bonus (proportion of cells not burned) + +- **Fallback reward**: + - **+0.2 Ć (1.0 - burned_ratio)** bonus + +**Example:** Perfect containment (no burned cells): +```python +Reward = +0.5 + 0.5 Ć 1.0 = +1.0 +``` + +### Reward Interpretation + +- **Positive rewards**: Good containment actions, extinguishing fires +- **Negative rewards**: Fire spread, resource waste, time penalty +- **Goal**: Maximize cumulative reward = minimize fire damage + +--- + +## šŖļø Fire Spread Mechanics + +### Spread Model + +Fire spreads using an **8-directional neighbor model**: + +1. **Burning cells persist** for `burn_lifetime = 3` ticks before turning to ash +2. Each burning cell can ignite **neighboring fuel cells** (8 directions) +3. Spread probability depends on: + - **Base ignition probability**: `0.30` (30% chance) + - **Humidity factor**: `(1.0 - humidity)` - higher humidity = less spread + - **Wind multiplier**: + - **+2.0x** in wind direction + - **+0.5x** against wind + - **+1.0x** perpendicular + - **Diagonal factor**: `0.6x` for diagonal neighbors (slower spread) + +4. **Water/Damp cells (4)** are **immune** to ignition while damp +5. **Firebreaks (3)** **cannot** be crossed by fire +6. **Ash cells (0)** cannot reignite + +### Wind Effects + +| Wind Direction | Effect on Fire Spread | +|----------------|----------------------| +| **In wind direction** | 2Ć faster ignition probability | +| **Against wind** | 0.5Ć slower ignition probability | +| **Perpendicular** | Normal (1Ć) ignition probability | +| **CALM** | No directional bias | + +### Water Dampening Duration + +Watered cells (4) remain damp for **6 ticks** before reverting to fuel (1). + +### Example Fire Spread + +``` +Step 0: Step 1: Step 2: +š©š©š© š©š„š© š«š„š« +š©š„š© ā š„š„š„ ā š„š„š„ (Wind: E, spreading east) +š©š©š© š©š„š© š«š„š« +``` + +--- + +## āļø Configuration + +### Environment Variables + +Set these **before starting the server**: + +| Variable | Description | Default | Range | +|-----------|-------------|---------|-------| +| `WILDFIRE_WIDTH` | Grid width in cells | `32` | 8-128 | +| `WILDFIRE_HEIGHT` | Grid height in cells | `32` | 8-128 | +| `WILDFIRE_HUMIDITY` | Initial humidity level | `0.25` | 0.0-1.0 | +| `WILDFIRE_WIND` | Wind direction (fixed) | Random | `N`, `NE`, `E`, `SE`, `S`, `SW`, `W`, `NW`, `CALM` | +| `WILDFIRE_SEED` | Random seed | `3407` | Any integer | +| `WILDFIRE_MAX_STEPS` | Max steps per episode | `128` | 10-1000 | +| `WILDFIRE_WATER_CAPACITY` | Initial water units | `8` | 1-100 | +| `WILDFIRE_BREAK_CAPACITY` | Initial firebreak materials | `50` | 1-200 | + +### Python API Configuration + +```python +from envs.wildfire_env.server.wildfire_environment import WildfireEnvironment + +env = WildfireEnvironment( + width=64, + height=64, + humidity=0.3, + init_sources=3, # Number of initial fires + max_steps=200, + water_capacity=10, + break_capacity=75, + seed=42 +) +``` + +### Docker Configuration + +```bash +docker run -p 8000:8000 \ + -e WILDFIRE_WIDTH=64 \ + -e WILDFIRE_HEIGHT=64 \ + -e WILDFIRE_HUMIDITY=0.4 \ + -e WILDFIRE_WIND=N \ + -e WILDFIRE_WATER_CAPACITY=12 \ + wildfire-env:latest +``` + +### Custom Configuration + +```bash +# Build and run with custom configuration +docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile . +docker build -t wildfire-env:latest -f src/envs/wildfire_env/server/Dockerfile . +docker run -p 8000:8000 \ + -e ENABLE_WEB_INTERFACE=true \ + -e WILDFIRE_WIDTH=64 \ + -e WILDFIRE_HEIGHT=64 \ + -e WILDFIRE_HUMIDITY=0.5 \ + wildfire-env:latest +``` + +--- + +## š Installation & Usage + +### Option 1: Docker (Recommended) + +**Manual setup:** +```bash +# Build base image (first time only) +docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile . + +# Build wildfire environment +docker build -t wildfire-env:latest -f src/envs/wildfire_env/server/Dockerfile . + +# Run container +docker run -p 8000:8000 -e ENABLE_WEB_INTERFACE=true wildfire-env:latest +``` + +This approach: +- Builds the base image if needed +- Rebuilds the wildfire image +- Starts the container +- Shows logs in real-time + +**Alternative: Using build_docker.sh script:** +```bash +# Build base image (first time only) +docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile . + +# Build wildfire environment using the script +cd src/envs/wildfire_env/server +./build_docker.sh + +# Run container +docker run -d -p 8000:8000 --name wildfire-env-container wildfire-env:latest + +# View logs +docker logs -f wildfire-env-container + +# Stop container +docker stop wildfire-env-container + +# Remove container +docker rm wildfire-env-container +``` + +### Option 2: Local Development (No Docker) + +**Requirements:** +```bash +pip install fastapi uvicorn numpy matplotlib requests +``` + +**Run server:** +```bash +# From OpenEnv root directory +python -m envs.wildfire_env.server.app +``` + +**Or with environment variables:** +```bash +WILDFIRE_WIDTH=64 WILDFIRE_HUMIDITY=0.3 python -m envs.wildfire_env.server.app +``` + +--- + +## š API Reference + +### Client Class + +```python +from envs.wildfire_env import WildfireEnv + +# Connect to existing server +env = WildfireEnv(base_url="http://localhost:8000") + +# Or create from Docker image +env = WildfireEnv.from_docker_image("wildfire-env:latest") +``` + +### Methods + +#### `reset() -> StepResult[WildfireObservation]` + +Resets the environment to initial state. + +```python +result = env.reset() +obs = result.observation +print(f"New episode: {obs.step == 0}") +``` + +#### `step(action: WildfireAction) -> StepResult[WildfireObservation]` + +Takes an action and returns new observation. + +```python +action = WildfireAction(action="water", x=10, y=15) +result = env.step(action) +print(f"Reward: {result.reward}, Done: {result.done}") +``` + +#### `state -> WildfireState` + +Access current environment state. + +```python +state = env.state +print(f"Episode ID: {state.episode_id}") +print(f"Total burned: {state.total_burned}") +print(f"Total extinguished: {state.total_extinguished}") +``` + +#### `close()` + +Closes the connection (for HTTP clients, this is a no-op but good practice). + +```python +env.close() +``` + +### Data Classes + +#### `WildfireAction` + +```python +@dataclass +class WildfireAction(Action): + action: str # "water" | "break" | "wait" + x: Optional[int] = None # Target X coordinate (required for water/break) + y: Optional[int] = None # Target Y coordinate (required for water/break) +``` + +**Examples:** +```python +WildfireAction(action="water", x=10, y=15) +WildfireAction(action="break", x=12, y=15) +WildfireAction(action="wait") # x, y not needed +``` + +#### `WildfireObservation` + +See [Observations](#-observations) section for full details. + +#### `WildfireState` + +```python +@dataclass +class WildfireState(State): + episode_id: str + step_count: int + total_burned: int + total_extinguished: int + last_action: str + width: int + height: int + wind_dir: str + humidity: float + remaining_water: int + remaining_breaks: int + grid: List[int] + burn_timers: List[int] +``` + +--- + +## š Examples + +### Example 1: Simple Containment Strategy + +```python +from envs.wildfire_env import WildfireEnv, WildfireAction +import numpy as np + +env = WildfireEnv(base_url="http://localhost:8000") +result = env.reset() +obs = result.observation + +grid_2d = np.array(obs.grid).reshape(obs.height, obs.width) +total_reward = 0 + +while not result.done: + # Find burning cells + burning_indices = np.where(grid_2d == 2) + + if len(burning_indices[0]) > 0 and obs.remaining_water > 0: + # Water the first burning cell + y, x = burning_indices[0][0], burning_indices[1][0] + action = WildfireAction(action="water", x=int(x), y=int(y)) + else: + # Wait if no water or no fires + action = WildfireAction(action="wait") + + result = env.step(action) + obs = result.observation + total_reward += result.reward or 0 + + # Update grid + grid_2d = np.array(obs.grid).reshape(obs.height, obs.width) + + print(f"Step {obs.step}: Burning={obs.burning_count}, Reward={result.reward:.3f}") + +print(f"\nEpisode ended. Total reward: {total_reward:.2f}") +print(f"Final stats: Burned={obs.burned_count}, Extinguished={env.state.total_extinguished}") +env.close() +``` + +### Example 2: Firebreak Strategy + +```python +from envs.wildfire_env import WildfireEnv, WildfireAction +import numpy as np + +env = WildfireEnv(base_url="http://localhost:8000") +result = env.reset() +obs = result.observation + +def create_firebreak_barrier(obs, env): + """Create firebreak ahead of fire front based on wind direction.""" + grid_2d = np.array(obs.grid).reshape(obs.height, obs.width) + wind = obs.wind_dir + + # Find burning cells + burning_y, burning_x = np.where(grid_2d == 2) + + if len(burning_x) == 0 or obs.remaining_breaks == 0: + return WildfireAction(action="wait") + + # Calculate fire front position + if wind == "E": + target_x = int(np.max(burning_x)) + 2 # Ahead of easternmost fire + target_y = int(np.mean(burning_y)) + elif wind == "W": + target_x = int(np.min(burning_x)) - 2 + target_y = int(np.mean(burning_y)) + elif wind == "N": + target_x = int(np.mean(burning_x)) + target_y = int(np.min(burning_y)) - 2 + elif wind == "S": + target_x = int(np.mean(burning_x)) + target_y = int(np.max(burning_y)) + 2 + else: + # Fallback: water nearest burning cell + return WildfireAction(action="water", x=int(burning_x[0]), y=int(burning_y[0])) + + # Ensure within bounds + target_x = max(0, min(obs.width - 1, target_x)) + target_y = max(0, min(obs.height - 1, target_y)) + + return WildfireAction(action="break", x=target_x, y=target_y) + +total_reward = 0 +while not result.done: + action = create_firebreak_barrier(obs, env) + result = env.step(action) + obs = result.observation + total_reward += result.reward or 0 + + if obs.step % 10 == 0: + print(f"Step {obs.step}: Fires={obs.burning_count}, Water={obs.remaining_water}, Breaks={obs.remaining_breaks}") + +env.close() +``` + +### Example 3: Visualization with Matplotlib + +```python +import matplotlib.pyplot as plt +import numpy as np +import matplotlib.colors as mcolors +from envs.wildfire_env import WildfireEnv, WildfireAction + +env = WildfireEnv(base_url="http://localhost:8000") +result = env.reset() +obs = result.observation + +# Setup colormap +cmap = mcolors.ListedColormap([ + "black", # 0 = ash + "green", # 1 = fuel + "red", # 2 = burning + "saddlebrown", # 3 = firebreak + "blue" # 4 = water +]) +norm = mcolors.BoundaryNorm([0, 1, 2, 3, 4, 5], cmap.N) + +fig, ax = plt.subplots(figsize=(8, 8)) +plt.ion() + +for step in range(50): + if result.done: + break + + # Render grid + grid_2d = np.array(obs.grid).reshape(obs.height, obs.width) + ax.clear() + ax.imshow(grid_2d, cmap=cmap, norm=norm, interpolation='nearest') + ax.set_title( + f"Step {obs.step} | Fires: {obs.burning_count} | Burned: {obs.burned_count}\n" + f"Wind: {obs.wind_dir} | Humidity: {obs.humidity:.2f} | " + f"Water: {obs.remaining_water} | Breaks: {obs.remaining_breaks}" + ) + plt.pause(0.1) + + # Take action (simple: water first burning cell) + if obs.burning_count > 0 and obs.remaining_water > 0: + burning_indices = np.where(grid_2d == 2) + if len(burning_indices[0]) > 0: + y, x = burning_indices[0][0], burning_indices[1][0] + action = WildfireAction(action="water", x=int(x), y=int(y)) + else: + action = WildfireAction(action="wait") + else: + action = WildfireAction(action="wait") + + result = env.step(action) + obs = result.observation + +plt.ioff() +plt.show() +env.close() +``` + +### Example 4: Training Loop for RL + +```python +from envs.wildfire_env import WildfireEnv, WildfireAction +import random + +env = WildfireEnv(base_url="http://localhost:8000") + +num_episodes = 10 +episode_rewards = [] + +for episode in range(num_episodes): + result = env.reset() + obs = result.observation + episode_reward = 0 + episode_steps = 0 + + while not result.done: + # Random policy (replace with your RL agent) + if random.random() < 0.4 and obs.remaining_water > 0: + action = WildfireAction( + action="water", + x=random.randint(0, obs.width - 1), + y=random.randint(0, obs.height - 1) + ) + elif random.random() < 0.3 and obs.remaining_breaks > 0: + action = WildfireAction( + action="break", + x=random.randint(0, obs.width - 1), + y=random.randint(0, obs.height - 1) + ) + else: + action = WildfireAction(action="wait") + + result = env.step(action) + obs = result.observation + episode_reward += result.reward or 0 + episode_steps += 1 + + episode_rewards.append(episode_reward) + state = env.state + print( + f"Episode {episode + 1}: " + f"Reward={episode_reward:.2f}, " + f"Steps={episode_steps}, " + f"Burned={state.total_burned}, " + f"Extinguished={state.total_extinguished}" + ) + +print(f"\nAverage reward: {sum(episode_rewards) / len(episode_rewards):.2f}") +env.close() +``` + +--- + +## š Web Interface + +The Wildfire Environment includes a **custom web interface** with visual grid display and wildfire-specific features. + +### Accessing the Web Interface + +#### Using Docker + +```bash +# Build base image (first time only) +docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile . + +# Build wildfire environment +docker build -t wildfire-env:latest -f src/envs/wildfire_env/server/Dockerfile . + +# Run container +docker run -p 8000:8000 -e ENABLE_WEB_INTERFACE=true wildfire-env:latest +``` + +Then open: `http://localhost:8000/web` + +#### Local Testing (No Docker) + +```bash +# Enable web interface with flag +ENABLE_WEB_INTERFACE=true PYTHONPATH=src uvicorn src.envs.wildfire_env.server.app:app --reload --host 0.0.0.0 --port 8000 +``` + +### Web Interface Features + +#### Left Pane: Action Interface +- **Wildfire-specific action form** + - Action dropdown: Water (Extinguish Fire), Break (Create Firebreak), Wait (Do Nothing) + - Coordinate inputs (X, Y) - auto-populated when clicking grid cells + - Coordinates show/hide based on action type +- **Environment stats display** + - Step count + - Water remaining + - Breaks remaining + - Burning cells count +- **Current state display** + - Status (Reset/Running) + - Episode ID + - Wind direction + - Humidity +- **Control buttons** + - Reset Environment + - Get State + +#### Right Pane: Visual Grid & Logs +- **Visual 2D Grid Display** š„ + - 16Ć16 grid rendered as color-coded cells + - **Color coding:** + - š© **Green** = Fuel (safe, value 1) + - š„ **Orange/Red** = Burning (fire, value 2) + - ⬠**Dark Gray** = Ash (burned, value 0) + - š« **Brown** = Firebreak (value 3) + - š¦ **Blue** = Watered/Damp (value 4) + - **Interactive:** Click cells to set coordinates for water/break actions + - **Auto-updates:** Grid refreshes automatically via WebSocket +- **Legend** + - Color-coded legend explaining all cell types +- **Action history** + - Log of all actions with timestamps + - Shows action, observation, reward, and done status + +#### Additional Features +- **WebSocket connection** - Real-time state updates without page refresh +- **Instructions panel** - Collapsible environment documentation +- **Grid status indicator** - Shows grid dimensions and cell count + +### Using the Web Interface + +1. **Start the server** (see above) +2. **Open browser** to: `http://localhost:8000/web` +3. **Click "Reset Environment"** to initialize and display the grid +4. **Interact with the grid:** + - Click on a cell to set coordinates for water/break actions + - Or manually enter X, Y coordinates +5. **Select action:** + - Choose `water`, `break`, or `wait` from the dropdown +6. **Click "Execute Action"** +7. **Watch the grid update in real-time:** + - Fire spreads automatically + - Cells change color based on state + - Stats update automatically +8. **Monitor resources** in the stats panel (water, breaks, burning count) + +--- + +## š§ Troubleshooting + +### Common Issues + +#### 1. Connection Errors + +**Problem:** `ConnectionRefusedError` or `Cannot connect to server` + +**Solutions:** +- Verify server is running: `curl http://localhost:8000/health` +- Check Docker container: `docker ps | grep wildfire` +- Ensure port 8000 is not in use: `lsof -i :8000` + +#### 2. Index Errors + +**Problem:** `IndexError: list index out of range` + +**Solution:** Ensure coordinates are within bounds: +```python +# Always check bounds before accessing +if 0 <= x < obs.width and 0 <= y < obs.height: + action = WildfireAction(action="water", x=x, y=y) +``` + +#### 3. Invalid Action Warnings + +**Problem:** Actions returning -0.05 reward repeatedly + +**Solutions:** +- Check `remaining_water` and `remaining_breaks` before using resources +- Verify coordinates are integers and within grid bounds +- Use `action="wait"` when resources are exhausted + +#### 4. Grid Format Confusion + +**Problem:** How to access grid cells? + +**Solution:** +```python +# Convert flat array to 2D +grid_2d = np.array(obs.grid).reshape(obs.height, obs.width) + +# Access cell at (x, y) +cell_value = grid_2d[y][x] + +# Or use flat index +index = y * obs.width + x +cell_value = obs.grid[index] +``` + +#### 5. Docker Build Failures + +**Problem:** `failed to solve: openenv-base:latest` + +**Solution:** +```bash +# Build base image first +docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile . + +# Then build wildfire image +docker build -t wildfire-env:latest -f src/envs/wildfire_env/server/Dockerfile . +``` + +### Debugging Tips + +1. **Enable verbose logging:** + ```bash + docker logs -f wildfire-env-container + ``` + +2. **Check environment state:** + ```python + state = env.state + print(f"State: {state}") + ``` + +3. **Validate actions:** + ```python + obs = env.reset().observation + print(f"Bounds: 0 <= x < {obs.width}, 0 <= y < {obs.height}") + print(f"Resources: Water={obs.remaining_water}, Breaks={obs.remaining_breaks}") + ``` + +4. **Monitor grid changes:** + ```python + prev_grid = obs.grid.copy() + result = env.step(action) + new_grid = result.observation.grid + changes = [i for i, (a, b) in enumerate(zip(prev_grid, new_grid)) if a != b] + print(f"Changed cells: {len(changes)}") + ``` + +--- + +## š Performance Considerations + +### Grid Size Impact + +- **Small grids (16Ć16)**: Fast, good for quick testing +- **Medium grids (32Ć32)**: Default, balanced performance +- **Large grids (64Ć64+)**: Slower, more realistic but requires more compute + +### Resource Limits + +- **Low water (4-8)**: Forces strategic decisions +- **High water (20+)**: More forgiving, easier to succeed +- **Low breaks (25)**: Emphasizes firebreak placement strategy +- **High breaks (100+)**: More freedom, less constraint + +### Episode Length + +- **Short episodes (50 steps)**: Fast iteration, good for debugging +- **Medium episodes (128 steps)**: Default, balanced +- **Long episodes (200+ steps)**: Better for complex strategies + +--- + +## š§ References + +### Papers & Research + +- **Rothermel Model**: [USDA Forest Service - Surface Fire Spread Model](https://www.fs.fed.us/rm/pubs_series/rmrs/gtr/rmrs_gtr371.pdf) +- **SimFire**: [MITRE Fireline Project](https://github.com/mitrefireline/simfire) +- **RL for Wildfires**: [arXiv:2311.15925](https://arxiv.org/abs/2311.15925) + +### OpenEnv Framework + +- **Main Repository**: [OpenEnv GitHub](https://github.com/openenv) +- **Documentation**: See `rfcs/` directory for design documents +- **Other Environments**: See `src/envs/` for more environment examples + +### Related Tools + +- **FastAPI**: [FastAPI Documentation](https://fastapi.tiangolo.com/) +- **Reinforcement Learning**: [Spinning Up in Deep RL](https://spinningup.openai.com/) +- **Docker**: [Docker Documentation](https://docs.docker.com/) + +--- + +## š License + +This environment is part of the OpenEnv project. See the main LICENSE file for details. + +--- + +## š¤ Contributing + +Contributions welcome! Please see `CONTRIBUTING.md` in the main OpenEnv repository. + +--- + +## š Citations + +```bibtex +@techreport{rothermel2022surface, + title = {The Rothermel Surface Fire Spread Model and Associated Developments}, + author = {Andrews, Patricia L. and Rothermel, Richard C.}, + year = {2022}, + institution = {USDA Forest Service}, + number = {RMRS-GTR-371}, + url = {https://www.fs.usda.gov/rm/pubs_series/rmrs/gtr/rmrs_gtr371.pdf} +} + +@article{tapley2023reinforcement, + title = {Reinforcement Learning for Wildfire Mitigation in Simulated Disaster Environments}, + author = {Tapley, A. and Dotter, M. and Doyle, M. and others}, + journal = {arXiv preprint arXiv:2311.15925}, + year = {2023}, + url = {https://arxiv.org/abs/2311.15925} +} + +@misc{mitrefireline2023simfire, + author = {{MITRE Fireline Project}}, + title = {SimFire: Wildfire Simulator for Decision-Support and AI Research}, + year = {2023}, + howpublished = {\url{https://github.com/mitrefireline/simfire}} +} + +@misc{wildfire-openenv-2025, + title = {Wildfire Environment for OpenEnv: Containment-Focused RL Simulation}, + author = {OpenEnv Contributors}, + year = {2025}, + url = {https://github.com/openenv/openenv} +} +``` + +--- + +**Happy firefighting! š„š** diff --git a/src/envs/wildfire_env/__init__.py b/src/envs/wildfire_env/__init__.py new file mode 100644 index 00000000..5df8fe34 --- /dev/null +++ b/src/envs/wildfire_env/__init__.py @@ -0,0 +1,9 @@ +from .models import WildfireAction, WildfireObservation, WildfireState +from .client import WildfireEnv + +__all__ = [ + "WildfireAction", + "WildfireObservation", + "WildfireState", + "WildfireEnv", +] diff --git a/src/envs/wildfire_env/client.py b/src/envs/wildfire_env/client.py new file mode 100644 index 00000000..49c7cd89 --- /dev/null +++ b/src/envs/wildfire_env/client.py @@ -0,0 +1,29 @@ +from core.http_env_client import HTTPEnvClient +from core.client_types import StepResult +from .models import WildfireAction, WildfireObservation, WildfireState + +class WildfireEnv(HTTPEnvClient[WildfireAction, WildfireObservation]): + def _step_payload(self, action: WildfireAction) -> dict: + return {"action": action.action, "x": action.x, "y": action.y} + + def _parse_result(self, payload: dict) -> StepResult[WildfireObservation]: + obs = WildfireObservation(**payload["observation"]) + return StepResult( + observation=obs, + reward=payload.get("reward"), + done=payload.get("done", False), + ) + + def _parse_state(self, payload: dict) -> WildfireState: + return WildfireState(**payload) + + +def render_grid(obs: WildfireObservation) -> str: + legend = {0:"ā¬", 1:"š©", 2:"š„", 3:"š«", 4:"š¦"} + w, h = obs.width, obs.height + g = obs.grid + rows = [] + for y in range(h): + rows.append("".join(legend.get(g[y*w+x], "?") for x in range(w))) + meta = f"step={obs.step} wind={obs.wind_dir} hum={obs.humidity:.2f} burning={obs.burning_count} burned={obs.burned_count}" + return "\n".join(rows + [meta]) diff --git a/src/envs/wildfire_env/models.py b/src/envs/wildfire_env/models.py new file mode 100644 index 00000000..13ba5d47 --- /dev/null +++ b/src/envs/wildfire_env/models.py @@ -0,0 +1,47 @@ +from dataclasses import dataclass, field +from typing import List, Optional +from core.env_server import Action, Observation, State + +# Grid cell encoding: +# 0 = empty/ash, 1 = fuel (healthy), 2 = burning, 3 = firebreak, 4 = watered (damp) +# (You can tweak encodings, but keep them ints for compact obs.) + +@dataclass +class WildfireAction(Action): + # action: "break" (build firebreak), "water" (drop water), "wait" + action: str + x: Optional[int] = None + y: Optional[int] = None + +@dataclass +class WildfireObservation(Observation): + grid: List[int] # flattened grid H*W, ints in {0..4} + width: int + height: int + step: int + wind_dir: str # e.g. "N","NE","E","SE","S","SW","W","NW","CALM" + humidity: float # [0,1] + burning_count: int + burned_count: int # total ash (0) cells (cumulative) + reward_hint: float = 0.0 # optional shaping info + remaining_water: int = 0 + remaining_breaks: int = 0 + +@dataclass +class WildfireState(State): + episode_id: str = "" + step_count: int = 0 + total_burned: int = 0 + total_extinguished: int = 0 + last_action: str = "reset" + # For visibility / debugging (not required by core): + width: int = 0 + height: int = 0 + wind_dir: str = "CALM" + humidity: float = 0.25 + remaining_water: int = 20 # simple resource constraint + remaining_breaks: int = 50 + # internal full grid as flattened ints + grid: List[int] = field(default_factory=list) + # burn timers for each cell (track how long cells have been burning/damp) + burn_timers: List[int] = field(default_factory=list) diff --git a/src/envs/wildfire_env/server/Dockerfile b/src/envs/wildfire_env/server/Dockerfile new file mode 100644 index 00000000..060d6e91 --- /dev/null +++ b/src/envs/wildfire_env/server/Dockerfile @@ -0,0 +1,22 @@ +# Build ARG for CI/CD consistency +ARG BASE_IMAGE=openenv-base:latest +FROM ${BASE_IMAGE} + +# Work in app directory (already used by base image) +WORKDIR /app + +# Copy OpenEnv core +COPY src/core/ /app/src/core/ + +# Copy Wildfire environment +COPY src/envs/wildfire_env/ /app/src/envs/wildfire_env/ + +# Environment variables (override at runtime if needed) +ENV PYTHONPATH=/app/src + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run FastAPI server +CMD ["uvicorn", "envs.wildfire_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/envs/wildfire_env/server/__init__.py b/src/envs/wildfire_env/server/__init__.py new file mode 100644 index 00000000..4434f887 --- /dev/null +++ b/src/envs/wildfire_env/server/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) Arizona State University and contributors. +# All rights reserved. +# +# This source code is licensed under the BSD-style license +# found in the LICENSE file in the root directory of this source tree. + +""" +Wildfire Environment Server. + +Server-side implementation of the wildfire environment for OpenEnv. +""" + +from .wildfire_environment import WildfireEnvironment + +__all__ = ["WildfireEnvironment"] diff --git a/src/envs/wildfire_env/server/app.py b/src/envs/wildfire_env/server/app.py new file mode 100644 index 00000000..dce8be49 --- /dev/null +++ b/src/envs/wildfire_env/server/app.py @@ -0,0 +1,70 @@ +# server/app.py +import os +from fastapi.responses import HTMLResponse +from fastapi import WebSocket, WebSocketDisconnect +from core.env_server import create_fastapi_app +from core.env_server.web_interface import load_environment_metadata, WebInterfaceManager +from core.env_server.types import Action, Observation +from ..models import WildfireAction, WildfireObservation +from .wildfire_environment import WildfireEnvironment +from .wildfire_web_interface import get_wildfire_web_interface_html +from dataclasses import asdict + +W = int(os.getenv("WILDFIRE_WIDTH", "16")) +H = int(os.getenv("WILDFIRE_HEIGHT", "16")) +env = WildfireEnvironment(width=W, height=H) + +# Create base app without web interface +app = create_fastapi_app(env, WildfireAction, WildfireObservation) + +# Check if web interface should be enabled +# This can be controlled via environment variable +enable_web = ( + os.getenv("ENABLE_WEB_INTERFACE", "false").lower() in ("true", "1", "yes") +) + +if enable_web: + # Load environment metadata + metadata = load_environment_metadata(env, 'wildfire_env') + + # Create web interface manager (needed for /web/reset, /web/step, /ws endpoints) + web_manager = WebInterfaceManager(env, WildfireAction, WildfireObservation, metadata) + + # Add our custom wildfire interface route + @app.get("/web", response_class=HTMLResponse) + async def wildfire_web_interface(): + """Custom wildfire-specific web interface.""" + return get_wildfire_web_interface_html(metadata) + + # Add web interface endpoints (these are needed for the interface to work) + @app.get("/web/metadata") + async def web_metadata(): + """Get environment metadata.""" + return asdict(metadata) + + @app.websocket("/ws") + async def websocket_endpoint(websocket: WebSocket): + """WebSocket endpoint for real-time updates.""" + await web_manager.connect_websocket(websocket) + try: + while True: + # Keep connection alive + await websocket.receive_text() + except WebSocketDisconnect: + await web_manager.disconnect_websocket(websocket) + + @app.post("/web/reset") + async def web_reset(): + """Reset endpoint for web interface.""" + return await web_manager.reset_environment() + + @app.post("/web/step") + async def web_step(request: dict): + """Step endpoint for web interface.""" + action_data = request.get("action", {}) + return await web_manager.step_environment(action_data) + + @app.get("/web/state") + async def web_state(): + """State endpoint for web interface.""" + return web_manager.get_state() diff --git a/src/envs/wildfire_env/server/build_docker.sh b/src/envs/wildfire_env/server/build_docker.sh new file mode 100644 index 00000000..939a811f --- /dev/null +++ b/src/envs/wildfire_env/server/build_docker.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +TAG="${1:-latest}" +IMAGE_NAME="wildfire-env:${TAG}" + +echo "š„ Building Wildfire Environment Docker Image" +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +OPENENV_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)" + +docker build \ + -f "$SCRIPT_DIR/Dockerfile" \ + -t "$IMAGE_NAME" \ + "$OPENENV_ROOT" diff --git a/src/envs/wildfire_env/server/wildfire_environment.py b/src/envs/wildfire_env/server/wildfire_environment.py new file mode 100644 index 00000000..99c27499 --- /dev/null +++ b/src/envs/wildfire_env/server/wildfire_environment.py @@ -0,0 +1,406 @@ + +import os +import random +import uuid + +from core.env_server import Environment +from ..models import WildfireAction, WildfireObservation, WildfireState + +# Helpers +DIRS_8 = { + "N": (0, -1), "NE": (1, -1), "E": (1, 0), "SE": (1, 1), + "S": (0, 1), "SW": (-1, 1), "W": (-1, 0), "NW": (-1, -1), + "CALM": (0, 0), +} + +def idx(x: int, y: int, w: int) -> int: + # Defensive type conversion to ensure all parameters are integers + x, y, w = int(x), int(y), int(w) + return y * w + x + +def in_bounds(x: int, y: int, w: int, h: int) -> bool: + # Defensive type conversion to ensure all parameters are integers + x, y, w, h = int(x), int(y), int(w), int(h) + return 0 <= x < w and 0 <= y < h + + +class WildfireEnvironment(Environment): + """ + Weather-aware wildfire simulation. + + Grid encodings: + 0 = ash (burned out) + 1 = fuel / vegetation + 2 = burning + 3 = firebreak + 4 = watered / damp + + Each step: + - agent acts (water/break/wait) + - burning spreads to neighbors with wind + humidity effects + - burning cells burn for multiple ticks, then become ash + """ + + def __init__( + self, + width: int = 32, + height: int = 32, + base_ignite_prob: float = 0.30, + wind_bias: float = 0.20, # kept for compatibility (not directly used in B model) + diag_factor: float = 0.7, # kept for compatibility (not directly used in B model) + humidity: float = 0.25, + init_sources: int = 2, + seed: int = 3407, + max_steps: int = 128, + water_capacity: int = 8, # ā encourage strategic water use + break_capacity: int = 50, + ): + super().__init__() + + # --- Env-var overrides (optional) --- + width = int(os.environ.get("WILDFIRE_WIDTH", width)) + height = int(os.environ.get("WILDFIRE_HEIGHT", height)) + humidity = float(os.environ.get("WILDFIRE_HUMIDITY", humidity)) + forced_wind = os.environ.get("WILDFIRE_WIND", None) + + # Store config (ensure integers) + self.w = int(width) + self.h = int(height) + self.base_ignite_prob = base_ignite_prob + self.wind_bias = wind_bias + self.diag_factor = diag_factor + self.init_humidity = humidity + self.init_sources = init_sources + self.rng = random.Random(seed) + self.max_steps = max_steps + self.init_water = water_capacity + self.init_breaks = break_capacity + self.forced_wind = forced_wind + + # burn lifetime in ticks (balanced model) + self.burn_lifetime = 3 + + self._state = WildfireState() + + # --- Core API --- + + def reset(self) -> WildfireObservation: + # Ensure w and h are integers (defensive type conversion) + w, h = int(self.w), int(self.h) + + # Start with all fuel + grid = [1] * (w * h) + + # Wind (forced if provided) + if self.forced_wind and self.forced_wind in DIRS_8: + wind_dir = self.forced_wind + else: + wind_dir = self.rng.choice(list(DIRS_8.keys())) + + # Humidity small variation around init + humidity = min(1.0, max(0.0, self.init_humidity + self.rng.uniform(-0.05, 0.05))) + + # Place initial fires + for _ in range(self.init_sources): + x = self.rng.randrange(w) + y = self.rng.randrange(h) + i = idx(x, y, w) + # Safety check: ensure index is within grid bounds + if 0 <= i < len(grid): + grid[i] = 2 + + self._state = WildfireState( + episode_id=str(uuid.uuid4()), + step_count=0, + total_burned=0, + total_extinguished=0, + last_action="reset", + width=w, + height=h, + wind_dir=wind_dir, + humidity=humidity, + remaining_water=self.init_water, + remaining_breaks=self.init_breaks, + grid=grid, + ) + + # per-cell burn timers (persist across steps) + self._state.burn_timers = [0] * (w * h) + + obs = self._make_observation(reward_hint=0.0) + return obs + + def step(self, action: WildfireAction) -> WildfireObservation: + st = self._state + reward = 0.0 + + # --- Agent action effects --- + if ( + action.action == "water" + and st.remaining_water > 0 + and action.x is not None + and action.y is not None + ): + reward += self._apply_water(action.x, action.y) + elif ( + action.action == "break" + and st.remaining_breaks > 0 + and action.x is not None + and action.y is not None + ): + reward += self._apply_break(action.x, action.y) + elif action.action == "wait": + pass + else: + reward -= 0.05 # invalid or exhausted resources + + # --- Natural fire dynamics --- + prev_burning = self._burning_count() + prev_burned = sum(1 for v in st.grid if v == 0) + + newly_burned = self._spread_fire() + new_burning = self._burning_count() + now_burned = sum(1 for v in st.grid if v == 0) + + st.total_burned += newly_burned + st.step_count += 1 + st.last_action = action.action + + # --- Spread vs containment shaping --- + spread_delta = new_burning - prev_burning + burned_delta = now_burned - prev_burned + + # Strong penalty for spread + if spread_delta > 0: + reward -= 0.15 * spread_delta # š„ focus on containment + elif spread_delta < 0: + reward += 0.10 * abs(spread_delta) # reward shrinkage + + # Mild penalty for newly burned cells (area loss) + if burned_delta > 0: + reward -= 0.05 * burned_delta + + # Small time penalty to prefer fast control + reward -= 0.01 + + done = self._is_done() + + # --- End of episode bonuses --- + if done: + saved_ratio = self._saved_cells() / (self.w * self.h) + burned_ratio = now_burned / (self.w * self.h) + burning_left = self._burning_count() + + # Big containment bonus + if burning_left == 0: + reward += 0.5 + 0.5 * saved_ratio + + # Fallback proportional reward + reward += 0.2 * (1.0 - burned_ratio) + + obs = self._make_observation(reward_hint=reward) + obs.done = done + obs.reward = reward + return obs + + + # --- Internal mechanics --- + + def _apply_water(self, x: int, y: int) -> float: + st = self._state + # Ensure x and y are integers (defensive type conversion) + x, y = int(x), int(y) + if not in_bounds(x, y, self.w, self.h): + return -0.05 + + # Strong penalty if no water left + if st.remaining_water <= 0: + return -0.5 + + i = idx(x, y, self.w) + # Safety check: ensure index is within grid bounds + if i < 0 or i >= len(st.grid): + return -0.05 + + reward = 0.0 + + if st.grid[i] == 2: + st.grid[i] = 4 # extinguish & dampen + st.burn_timers[i] = 0 + st.total_extinguished += 1 + reward += 0.25 + elif st.grid[i] == 1: + st.grid[i] = 4 # dampen fuel (mild penalty to avoid spamming) + st.burn_timers[i] = 0 + reward -= 0.10 + elif st.grid[i] == 4: + # redundant watering + reward -= 0.05 + else: + # watering ash/break gives slight penalty + reward -= 0.05 + + st.remaining_water -= 1 + return reward + + def _apply_break(self, x: int, y: int) -> float: + st = self._state + # Ensure x and y are integers (defensive type conversion) + x, y = int(x), int(y) + if not in_bounds(x, y, self.w, self.h): + return -0.05 + i = idx(x, y, self.w) + # Safety check: ensure index is within grid bounds + if i < 0 or i >= len(st.grid): + return -0.05 + + reward = 0.0 + + if st.grid[i] in (1, 4): + st.grid[i] = 3 + st.burn_timers[i] = 0 + reward += 0.15 # slightly more than before to make firebreaks attractive + elif st.grid[i] == 2: + st.grid[i] = 3 + st.burn_timers[i] = 0 + reward -= 0.02 + elif st.grid[i] == 3: + reward -= 0.01 + else: + reward -= 0.02 + + st.remaining_breaks -= 1 + return reward + + def _spread_fire(self) -> int: + """ + Balanced wildfire spread model: + - burning cells persist for multiple ticks before turning to ash + - 8-direction spread (diagonals weaker) + - wind accelerates in wind direction, weakens upwind + - humidity suppresses ignition probability + - water (4) is IMMUNE to ignition while damp and reverts to fuel after several ticks + """ + st = self._state + new_grid = st.grid[:] + newly_burned = 0 + + # Ensure w and h are integers (defensive type conversion) + w, h = int(self.w), int(self.h) + + # 8-neighbor model + neighbors = [(-1, 0), (1, 0), (0, -1), (0, 1), + (-1, -1), (1, -1), (-1, 1), (1, 1)] + wx, wy = DIRS_8.get(st.wind_dir, (0, 0)) + + base = self.base_ignite_prob + humidity_factor = (1.0 - st.humidity) + + ignite_flags = [False] * (w * h) + + # First pass: evaluate ignitions, increment burn timers + for y in range(h): + for x in range(w): + i = idx(x, y, w) + # Safety check: ensure index is within grid bounds + if i < 0 or i >= len(st.grid): + continue + cell = st.grid[i] + + if cell == 2: # burning + st.burn_timers[i] += 1 + + for dx, dy in neighbors: + nx, ny = x + dx, y + dy + if not in_bounds(nx, ny, w, h): + continue + ni = idx(nx, ny, w) + # Safety check: ensure neighbor index is within grid bounds + if ni < 0 or ni >= len(st.grid): + continue + target = st.grid[ni] + + # Only fuel or water/damp can be candidates, but cells with code 4 (watered/damp) are immune to ignition + if target == 4: + # Watered/damp cells (code 4) do not ignite at all while in this state + continue + if target != 1: + continue + + # Wind multiplier + if (dx, dy) == (wx, wy): + wind_mult = 2.0 + elif (dx, dy) == (-wx, -wy): + wind_mult = 0.5 + else: + wind_mult = 1.0 + + # Diagonals weaker + diag_mult = 0.6 if (dx != 0 and dy != 0) else 1.0 + + p = base * humidity_factor * wind_mult * diag_mult + p = max(0.0, min(1.0, p)) + if self.rng.random() < p: + # Safety check: ensure ni is within ignite_flags bounds + if 0 <= ni < len(ignite_flags): + ignite_flags[ni] = True + + # Second pass: apply transitions + for i, cell in enumerate(st.grid): + # Safety check: ensure index is within bounds for all arrays + if i < 0 or i >= len(new_grid) or i >= len(st.burn_timers): + continue + + if cell == 2: + # burns for burn_lifetime ticks before turning to ash + if st.burn_timers[i] >= self.burn_lifetime: + new_grid[i] = 0 # ash + newly_burned += 1 + else: + new_grid[i] = 2 # keep burning + elif i < len(ignite_flags) and ignite_flags[i] and new_grid[i] == 1: + new_grid[i] = 2 + st.burn_timers[i] = 0 + elif cell == 4: + # Water stays damp for several ticks before reverting to fuel + st.burn_timers[i] += 1 + if st.burn_timers[i] >= 6: # was 3; extend to make water useful + new_grid[i] = 1 + + st.grid = new_grid + return newly_burned + + def _burning_count(self) -> int: + return sum(1 for v in self._state.grid if v == 2) + + def _saved_cells(self) -> int: + # cells not turned to ash (includes fuel, burning, break, water) + return sum(1 for v in self._state.grid if v in (1, 2, 3, 4)) + + def _is_done(self) -> bool: + return self._burning_count() == 0 or self._state.step_count >= self.max_steps + + def _make_observation(self, reward_hint: float = 0.0) -> WildfireObservation: + st = self._state + burning = self._burning_count() + burned = sum(1 for v in st.grid if v == 0) + return WildfireObservation( + grid=st.grid[:], + width=self.w, + height=self.h, + step=st.step_count, + wind_dir=st.wind_dir, + humidity=st.humidity, + burning_count=burning, + remaining_water=st.remaining_water, # ā new + remaining_breaks=st.remaining_breaks, # ā new + burned_count=burned, + reward_hint=reward_hint, + ) + + # --- Required abstract property implementation --- + @property + def state(self) -> WildfireState: + """Return the current environment state.""" + return self._state + diff --git a/src/envs/wildfire_env/server/wildfire_web_interface.py b/src/envs/wildfire_env/server/wildfire_web_interface.py new file mode 100644 index 00000000..42bb8cd0 --- /dev/null +++ b/src/envs/wildfire_env/server/wildfire_web_interface.py @@ -0,0 +1,983 @@ +""" +Custom web interface for Wildfire Environment. + +This module provides a wildfire-specific web interface with visual grid display +and wildfire-specific features, without modifying the base web_interface.py. +""" + +from typing import Optional +from dataclasses import asdict +from core.env_server.types import EnvironmentMetadata +from ..models import WildfireAction + + +def get_wildfire_web_interface_html(metadata: Optional[EnvironmentMetadata] = None) -> str: + """Generate custom HTML for the wildfire environment web interface.""" + + # Convert markdown to HTML for instructions + instructions_html = "" + if metadata and metadata.readme_content: + instructions_html = _markdown_to_html_simple(metadata.readme_content) + + return f""" + + +
+ + ++ Click on a cell to set coordinates for water/break actions +
+\2', html_content, flags=re.DOTALL)
+ html_content = re.sub(r'`([^`]+)`', r'\1', html_content)
+
+ # Convert bold and italic
+ html_content = re.sub(r'\*\*(.*?)\*\*', r'\1', html_content)
+ html_content = re.sub(r'\*(.*?)\*', r'\1', html_content)
+
+ # Convert lists
+ html_content = re.sub(r'^- (.*?)$', r'