diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..dddb24d2 --- /dev/null +++ b/.flake8 @@ -0,0 +1,11 @@ +[flake8] +max-line-length = 79 +extend-ignore = E203,W503 +exclude = + .git, + __pycache__, + .venv, + venv, + build, + dist, + *.egg-info \ No newline at end of file diff --git a/.gitignore b/.gitignore index e6173067..04d64c5a 100644 --- a/.gitignore +++ b/.gitignore @@ -52,7 +52,6 @@ coverage.xml # Virtual environments .env .venv -env/ venv/ ENV/ env.bak/ @@ -66,21 +65,33 @@ dmypy.json # Pyre type checker .pyre/ -# IDE -.vscode/ +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be added to the global gitignore or merged into this project gitignore. For a PyCharm +# project, it is recommended to ignore the whole idea folder. .idea/ -*.swp -*.swo -*~ -# OS +# VS Code +.vscode/ + +# macOS .DS_Store -.DS_Store? -._* -.Spotlight-V100 -.Trashes -ehthumbs.db + +# Windows Thumbs.db +ehthumbs.db +Desktop.ini + +# Docker +.dockerignore -# Logs -*.log +# Exclude anything containing "claude" (case-insensitive) +*claude* +*Claude* +*CLAUDE* diff --git a/README.md b/README.md index 4ac09658..e42d118a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,195 @@ -# envtorch -An environment library for RL and beyond \ No newline at end of file +# EnvTorch: Agentic Execution Environments + +A unified framework for CodeAct environments that supports both agent execution and RL training, built on Gym/Gymnasium APIs with PyTorch/HuggingFace integration patterns. + +## Overview + +EnvTorch provides a standard for agentic execution environments following the CodeAct paradigm, where actions are arbitrary Python code that can chain multiple tool calls. The framework bridges traditional RL environments with modern agent capabilities. + +### Key Features + +- **CodeAct Execution**: Actions are Python code strings executed in persistent contexts +- **State Persistence**: Variables and functions persist across steps within episodes +- **Tool Integration**: MCP (Model Context Protocol) support for external capabilities +- **RL Compatibility**: Transform system for reward computation and training +- **Error Handling**: Exceptions become observations for agent learning +- **Clean APIs**: Minimal, opinionous design following KISS principles + +## Quick Start + +```python +from src import create_codeact_env, CodeAction + +# Create environment +env = create_codeact_env() +obs = env.reset() + +# Execute Python code +action = CodeAction(code=""" +x = 10 +y = 20 +result = x * y +print(f"Result: {result}") +result # Return value +""") + +obs = env.step(action) +print(f"Output: {obs.execution_result.stdout}") +print(f"Return: {obs.execution_result.return_value}") +``` + +## Core Components + +### Actions and Observations + +```python +# Actions contain arbitrary Python code +action = CodeAction(code="math.sqrt(16)") + +# Observations include execution results +obs = env.step(action) +print(obs.execution_result.return_value) # 4.0 +print(obs.execution_result.success) # True +print(obs.execution_result.stdout) # Any print output +``` + +### Tool Integration + +```python +from src import create_mcp_environment + +# Environment with MCP tools +env = create_mcp_environment() +obs = env.reset() + +# Tools available as Python objects +action = CodeAction(code=""" +content = "Hello, world!" +file_write("/tmp/hello.txt", content) +result = file_read("/tmp/hello.txt") +print(f"File contents: {result}") +""") + +obs = env.step(action) +``` + +### RL Training with Transforms + +```python +from src import create_math_env_transform + +# Environment that rewards correct math solutions +transform = create_math_env_transform(expected_answer=42) +env = create_codeact_env() +env.transform = transform + +# Agent gets rewarded for correct answers +action = CodeAction(code="21 * 2") # Correct answer +obs = env.step(action) +print(obs.reward) # 1.0 (success) + quality bonuses +``` + +## Architecture + +### Type System +- `Action` / `CodeAction`: Base and concrete action types +- `Observation` / `CodeObservation`: Base and concrete observation types +- `State` / `CodeState`: Environment state with execution context +- `ExecutionResult`: Detailed code execution results + +### Core Classes +- `Environment`: Base class following Gym API +- `CodeActEnvironment`: Main environment for code execution +- `Transform`: Base class for observation modification +- `ToolRegistry`: Manages available tools and functions + +### Transform Examples +- `CodeSafetyTransform`: Penalizes unsafe code patterns +- `MathProblemTransform`: Rewards correct numerical answers +- `CodeQualityTransform`: Evaluates code quality metrics +- `CompositeTransform`: Combines multiple transforms + +## File Structure + +``` +src/ +├── types.py # Core type definitions +├── interfaces.py # Abstract base classes +├── environment.py # Main CodeAct environment +├── transforms.py # Transform implementations +├── mcp.py # MCP integration +└── __init__.py # Clean exports +``` + +## Usage Patterns + +### Agent Exploration +```python +env = create_codeact_env() +obs = env.reset() + +# Multi-step problem solving +action1 = CodeAction(code="data = [1, 2, 3, 4, 5]") +obs = env.step(action1) + +action2 = CodeAction(code="mean = sum(data) / len(data); mean") +obs = env.step(action2) # Uses persistent data from step 1 +``` + +### RL Training Loop +```python +# Create environment with reward function +transform = create_safe_env_transform() +env = create_codeact_env() +env.transform = transform + +for episode in range(100): + obs = env.reset() + action = generate_action() # From your policy + obs = env.step(action) + + reward = obs.reward # Computed by transforms + # Update policy based on reward +``` + +### Hybrid Agent + RL +```python +# Phase 1: Agent exploration +env = create_codeact_env() +# Agent explores different solution approaches + +# Phase 2: RL optimization +env.transform = optimization_transform +# Train to optimize based on exploration insights +``` + +## Design Principles + +- **KISS Approach**: Minimal, opinionated design +- **Single Way**: One clear way to accomplish tasks +- **Pythonic**: Follows PyTorch/HuggingFace patterns +- **No Inline Comments**: Code should be self-explanatory +- **Functional Composition**: Private functions explain complex logic + +## Testing + +Run the test suite: +```bash +python test_unified.py +``` + +Run examples: +```bash +python example.py +``` + +## Requirements + +See `requirements.txt` for dependencies. Core requirements: +- Python 3.9+ +- PyTorch 2.0+ +- HuggingFace datasets + +## License + +BSD 3-Clause License (see LICENSE file) \ No newline at end of file diff --git a/example.py b/example.py new file mode 100644 index 00000000..4e7a8474 --- /dev/null +++ b/example.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Simple example demonstrating EnvTorch environment usage. + +This shows the minimal steps to get started with code execution environments. +""" + +from src import CodeAction, CodeExecutionEnvironment, CodingEnv, Transform + + +def basic_code_execution_example(): + """Basic example using CodeExecutionEnvironment.""" + print("=== Basic Code Execution Example ===") + + # Create basic code execution environment + env = CodeExecutionEnvironment() + + print("Note: This example shows the interface but requires Docker to actually run") + print("Environment created successfully!") + + # Create an action to calculate compound interest + action = CodeAction( + code=""" +# Calculate compound interest +principal = 1000 +rate = 0.05 +time = 3 + +final_amount = principal * (1 + rate) ** time +interest_earned = final_amount - principal + +print(f"Principal: ${principal}") +print(f"Rate: {rate*100}%") +print(f"Time: {time} years") +print(f"Final amount: ${final_amount:.2f}") +print(f"Interest earned: ${interest_earned:.2f}") + +final_amount +""" + ) + + print(f"Created action with code length: {len(action.code)} characters") + print() + + +def coding_environment_example(): + """Example using CodingEnv with safety and quality transforms.""" + print("=== Coding Environment Example ===") + + # Create coding environment with built-in transforms + env = CodingEnv() + + print("CodingEnv created with safety and quality transforms!") + print("This environment includes:") + print("• Code safety checks") + print("• Code quality analysis") + print("• Composite transform system") + + # Example of safe code + safe_action = CodeAction( + code=""" +# Safe mathematical calculation +import math + +def calculate_fibonacci(n): + if n <= 1: + return n + return calculate_fibonacci(n-1) + calculate_fibonacci(n-2) + +# Calculate first 10 Fibonacci numbers +fib_sequence = [calculate_fibonacci(i) for i in range(10)] +print(f"First 10 Fibonacci numbers: {fib_sequence}") +fib_sequence +""" + ) + + print(f"Created safe action with code length: {len(safe_action.code)} characters") + print() + + +def transform_system_example(): + """Example showing how to create custom transforms.""" + print("=== Transform System Example ===") + + # Example custom transform + class RewardTransform(Transform): + """Transform that adds rewards based on code execution results.""" + + def __call__(self, observation): + # This is just an example - actual implementation would need + # a proper observation object with execution results + print("Custom transform would analyze execution results here") + print("and add rewards based on success criteria") + return observation + + transform = RewardTransform() + print("Created custom RewardTransform") + + print("Transform system allows:") + print("• Chaining multiple transforms") + print("• Adding rewards for RL training") + print("• Custom observation processing") + print("• Safety and quality checks") + print() + + +if __name__ == "__main__": + print("EnvTorch Environment Examples") + print("=" * 40) + print() + + basic_code_execution_example() + coding_environment_example() + transform_system_example() + + print("=" * 40) + print("Examples complete! 🎉") + print() + print("Key takeaways:") + print("• CodeAction(code='...') for arbitrary Python execution") + print("• CodeExecutionEnvironment provides base functionality") + print("• CodingEnv adds safety and quality transforms") + print("• Transform system enables customization and RL training") + print("• Docker integration provides sandboxed execution") + print("=" * 40) diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 00000000..54935272 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,45 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""EnvTorch: Standardized agentic execution environments.""" + +# Core interfaces and types +from .core.env import ( + Environment, Transform, Tool, ToolRegistry, + Action, CodeAction, Observation, CodeObservation, + State, CodeState, ExecutionResult, + CompositeTransform, NullTransform, + CodeExecutionEnvironment +) + +# Docker execution +from .core.docker import DockerExecutor + +# Environment implementations +from .envs import CodingEnv + +__version__ = "0.1.0" + +__all__ = [ + # Core interfaces + "Environment", "Transform", "Tool", "ToolRegistry", + + # Types + "Action", "CodeAction", "Observation", "CodeObservation", + "State", "CodeState", "ExecutionResult", + + # Base transforms + "CompositeTransform", "NullTransform", + + # Base environment implementation + "CodeExecutionEnvironment", + + # Execution engines + "DockerExecutor", + + # Concrete environment implementations + "CodingEnv", +] diff --git a/src/core/__init__.py b/src/core/__init__.py index 65d1442c..dda1ff4b 100644 --- a/src/core/__init__.py +++ b/src/core/__init__.py @@ -1 +1,13 @@ -# Core SDK functionality +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Core components for agentic environments.""" + +# Re-export main components from submodules for convenience +from .env import * +from .docker import * + +# Note: MCP module doesn't export anything yet diff --git a/src/core/docker/__init__.py b/src/core/docker/__init__.py new file mode 100644 index 00000000..0864cb91 --- /dev/null +++ b/src/core/docker/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Docker-based code execution.""" + +from .docker_executor import DockerExecutor + +__all__ = ["DockerExecutor"] \ No newline at end of file diff --git a/src/core/docker/docker_executor.py b/src/core/docker/docker_executor.py new file mode 100644 index 00000000..fe06460b --- /dev/null +++ b/src/core/docker/docker_executor.py @@ -0,0 +1,234 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import json +import subprocess +import time +from typing import Any + +from ..env.types import ExecutionResult + + +class DockerExecutor: + """Simple Docker-based Python code executor with persistent session.""" + + def __init__(self, image: str = "python:3.11-slim", timeout_seconds: int = 30): + self.image = image + self.timeout_seconds = timeout_seconds + self.container_id: str | None = None + self._process: subprocess.Popen | None = None + + def start_session(self) -> None: + """Start new Docker container with persistent Python session.""" + if self.container_id: + self.stop_session() + + # Run interactive Python in container + cmd = [ + "docker", "run", "--rm", "-i", + "--memory=512m", + "--cpus=1.0", + "--network=host", # For MCP integration later + self.image, + "python", "-u", "-c", + "import sys; [exec(input()) for _ in iter(int, 1)]" + ] + + try: + self._process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=0 + ) + + # Get container ID for potential cleanup + # This is a bit hacky but works for the simple approach + time.sleep(0.1) # Give container time to start + + except Exception as e: + raise RuntimeError(f"Failed to start Docker container: {e}") + + def execute_code(self, code: str) -> ExecutionResult: + """Send code to running container and get results.""" + if not self._process: + raise RuntimeError("No active session. Call start_session() first.") + + start_time = time.time() + + try: + # Wrap user code to capture output and results + wrapped_code = self._wrap_code_for_execution(code) + + # Send code to container + self._process.stdin.write(wrapped_code + "\n") + self._process.stdin.flush() + + # Read result with timeout + stdout_lines = [] + stderr_lines = [] + + # Simple timeout mechanism - not perfect but works for MVP + end_time = start_time + self.timeout_seconds + result_captured = False + + while time.time() < end_time and not result_captured: + if self._process.poll() is not None: + # Process died + break + + # Try to read a line with short timeout + try: + # This is simplified - in production we'd use select/threading + self._process.stdout.settimeout(0.1) + line = self._process.stdout.readline() + if line: + if line.startswith("__ENVTORCH_RESULT__"): + result_captured = True + result_json = line[len("__ENVTORCH_RESULT__"):].strip() + break + else: + stdout_lines.append(line.rstrip()) + except: + time.sleep(0.01) + + if not result_captured: + return ExecutionResult.from_exception( + TimeoutError(f"Code execution timed out after {self.timeout_seconds}s"), + stdout="\n".join(stdout_lines), + stderr="\n".join(stderr_lines) + ) + + # Parse result + try: + result_data = json.loads(result_json) + execution_time_ms = (time.time() - start_time) * 1000 + + if result_data.get("success", True): + return ExecutionResult.from_success( + return_value=result_data.get("return_value"), + stdout=result_data.get("stdout", ""), + stderr=result_data.get("stderr", ""), + execution_time_ms=execution_time_ms + ) + else: + # Reconstruct exception from data + exc_type = result_data.get("exception_type", "Exception") + exc_message = result_data.get("exception_message", "") + + # Create a generic exception for now + exc = Exception(f"{exc_type}: {exc_message}") + result = ExecutionResult.from_exception( + exc, + stdout=result_data.get("stdout", ""), + stderr=result_data.get("stderr", "") + ) + result.traceback_str = result_data.get("traceback", "") + result.execution_time_ms = execution_time_ms + return result + + except json.JSONDecodeError as e: + return ExecutionResult.from_exception( + RuntimeError(f"Failed to parse execution result: {e}"), + stdout="\n".join(stdout_lines) + ) + + except Exception as e: + return ExecutionResult.from_exception(e) + + def get_variable_dump(self) -> dict[str, Any]: + """Get all variables for render() - send globals() inspection command.""" + if not self._process: + raise RuntimeError("No active session. Call start_session() first.") + + dump_code = ''' +import json +result = {} +for name, value in globals().items(): + if not name.startswith('_') and name not in ['json']: + try: + # Try to get a readable representation + if hasattr(value, '__dict__') and not callable(value): + result[name] = f"<{type(value).__name__}: {str(value)[:100]}>" + else: + result[name] = repr(value)[:200] # Limit length + except: + result[name] = f"<{type(value).__name__} object>" +print("__ENVTORCH_DUMP__" + json.dumps(result)) +''' + + # Execute the dump code + exec_result = self.execute_code(dump_code) + if not exec_result.success: + return {"error": "Failed to dump variables", "details": exec_result.exception_message} + + # Extract dump from stdout + for line in exec_result.stdout.split('\n'): + if line.startswith("__ENVTORCH_DUMP__"): + try: + return json.loads(line[len("__ENVTORCH_DUMP__"):]) + except json.JSONDecodeError: + pass + + return {"error": "No variable dump found in output"} + + def stop_session(self) -> None: + """Kill the container process.""" + if self._process: + try: + self._process.terminate() + self._process.wait(timeout=5) + except subprocess.TimeoutExpired: + self._process.kill() + self._process.wait() + finally: + self._process = None + self.container_id = None + + def _wrap_code_for_execution(self, code: str) -> str: + """Wrap user code to capture results and exceptions.""" + return f''' +import sys +import json +import traceback +from io import StringIO + +# Capture stdout/stderr +old_stdout = sys.stdout +old_stderr = sys.stderr +stdout_capture = StringIO() +stderr_capture = StringIO() +sys.stdout = stdout_capture +sys.stderr = stderr_capture + +result = {{"success": True, "return_value": None, "stdout": "", "stderr": ""}} + +try: + # Execute user code + exec_result = None + exec("""{code}""") + result["return_value"] = exec_result if 'exec_result' in locals() else None +except Exception as e: + result["success"] = False + result["exception_type"] = e.__class__.__name__ + result["exception_message"] = str(e) + result["traceback"] = traceback.format_exc() +finally: + # Restore stdout/stderr and capture output + sys.stdout = old_stdout + sys.stderr = old_stderr + result["stdout"] = stdout_capture.getvalue() + result["stderr"] = stderr_capture.getvalue() + +# Send result back +print("__ENVTORCH_RESULT__" + json.dumps(result, default=str)) +''' + + def __del__(self): + """Cleanup on destruction.""" + self.stop_session() \ No newline at end of file diff --git a/src/core/env/__init__.py b/src/core/env/__init__.py new file mode 100644 index 00000000..b0bd5b9d --- /dev/null +++ b/src/core/env/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Core environment interfaces and types.""" + +from .interfaces import Environment, Transform, Tool, ToolRegistry +from .types import ( + Action, CodeAction, Observation, CodeObservation, + State, CodeState, ExecutionResult +) +from .base_transforms import CompositeTransform, NullTransform +from .code_execution_environment import CodeExecutionEnvironment + +__all__ = [ + # Core interfaces + "Environment", "Transform", "Tool", "ToolRegistry", + + # Types + "Action", "CodeAction", "Observation", "CodeObservation", + "State", "CodeState", "ExecutionResult", + + # Base transforms + "CompositeTransform", "NullTransform", + + # Base environment implementation + "CodeExecutionEnvironment" +] \ No newline at end of file diff --git a/src/core/env/base_transforms.py b/src/core/env/base_transforms.py new file mode 100644 index 00000000..d8165e3d --- /dev/null +++ b/src/core/env/base_transforms.py @@ -0,0 +1,29 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Base transform implementations for composing environment-specific transforms.""" + +from .interfaces import Transform +from .types import Observation + + +class CompositeTransform(Transform): + """Combines multiple transforms into a single transform.""" + + def __init__(self, transforms: list[Transform]): + self.transforms = transforms + + def __call__(self, observation: Observation) -> Observation: + for transform in self.transforms: + observation = transform(observation) + return observation + + +class NullTransform(Transform): + """Default transform that passes through unchanged.""" + + def __call__(self, observation: Observation) -> Observation: + return observation \ No newline at end of file diff --git a/src/core/env/code_execution_environment.py b/src/core/env/code_execution_environment.py new file mode 100644 index 00000000..89456120 --- /dev/null +++ b/src/core/env/code_execution_environment.py @@ -0,0 +1,167 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import json +import uuid +from typing import Any, Dict, Literal + +from ..docker.docker_executor import DockerExecutor +from .interfaces import Environment, Transform +from .types import CodeAction, CodeObservation, CodeState, Action, Observation, State + + +class CodeExecutionEnvironment(Environment): + """Environment for executing Python code actions using Docker.""" + + def __init__( + self, + transform: Transform | None = None, + docker_image: str = "python:3.11-slim", + timeout_seconds: int = 30 + ): + super().__init__(transform) + self.docker_image = docker_image + self.timeout_seconds = timeout_seconds + self.executor = DockerExecutor(docker_image, timeout_seconds) + self._state = CodeState() + + def reset(self) -> Observation: + """Reset environment and start fresh Docker session.""" + # Stop any existing session + self.executor.stop_session() + + # Initialize fresh state + self._state = CodeState( + episode_id=str(uuid.uuid4()), + step_count=0 + ) + + # Start new Docker session + try: + self.executor.start_session() + except Exception as e: + # Fail hard as requested + raise RuntimeError(f"Failed to start Docker session: {e}") + + # Return initial observation + observation = CodeObservation( + execution_result=None, + available_tools=[] # TODO: populate from MCP registry + ) + + return self._apply_transform(observation) + + def step(self, action: Action) -> Observation: + """Execute code action and return observation.""" + if not isinstance(action, CodeAction): + raise ValueError(f"Expected CodeAction, got {type(action)}") + + # Execute the code + execution_result = self.executor.execute_code(action.code) + + # Update state + self._state.step_count += 1 + self._state.action_history.append(action) + self._state.result_history.append(execution_result) + + # Create observation + observation = CodeObservation( + execution_result=execution_result, + available_tools=[] # TODO: populate from MCP registry + ) + + return self._apply_transform(observation) + + def render(self, mode: Literal["human", "raw", "ansi"] = "human") -> Any: + """Render current environment state.""" + try: + variables = self.executor.get_variable_dump() + except Exception as e: + variables = {"error": f"Failed to get variables: {e}"} + + render_data = { + "episode_id": self._state.episode_id, + "step_count": self._state.step_count, + "variables": variables, + "last_result": self._state.result_history[-1] if self._state.result_history else None + } + + if mode == "raw": + return render_data + elif mode == "ansi": + return self._render_ansi(render_data) + else: # mode == "human" + return self._render_human(render_data) + + def close(self) -> None: + """Close environment and clean up Docker container.""" + self.executor.stop_session() + + @property + def state(self) -> State: + """Get current environment state.""" + return self._state + + def _render_human(self, data: Dict[str, Any]) -> str: + """Render in human-readable format.""" + lines = [] + lines.append(f"=== Code Environment (Episode: {data['episode_id'][:8]}...) ===") + lines.append(f"Steps: {data['step_count']}") + + if data.get("last_result"): + result = data["last_result"] + lines.append(f"Last execution: {'✓ Success' if result.success else '✗ Failed'}") + if result.stdout: + lines.append(f"Output: {result.stdout[:100]}...") + if not result.success and result.exception_message: + lines.append(f"Error: {result.exception_message}") + + lines.append("\n--- Variables ---") + variables = data.get("variables", {}) + if "error" in variables: + lines.append(f"Error getting variables: {variables['error']}") + else: + for name, value in sorted(variables.items()): + lines.append(f"{name}: {value}") + + return "\n".join(lines) + + def _render_ansi(self, data: Dict[str, Any]) -> str: + """Render in ANSI terminal format with colors.""" + lines = [] + + # ANSI color codes + BLUE = "\033[34m" + GREEN = "\033[32m" + RED = "\033[31m" + YELLOW = "\033[33m" + RESET = "\033[0m" + BOLD = "\033[1m" + + lines.append(f"{BOLD}{BLUE}=== Code Environment ==={RESET}") + lines.append(f"Episode: {data['episode_id'][:8]}...") + lines.append(f"Steps: {YELLOW}{data['step_count']}{RESET}") + + if data.get("last_result"): + result = data["last_result"] + status_color = GREEN if result.success else RED + status_text = "Success" if result.success else "Failed" + lines.append(f"Last execution: {status_color}{status_text}{RESET}") + + if result.stdout: + lines.append(f"Output: {result.stdout[:100]}...") + if not result.success and result.exception_message: + lines.append(f"{RED}Error: {result.exception_message}{RESET}") + + lines.append(f"\n{BOLD}--- Variables ---{RESET}") + variables = data.get("variables", {}) + if "error" in variables: + lines.append(f"{RED}Error getting variables: {variables['error']}{RESET}") + else: + for name, value in sorted(variables.items()): + lines.append(f"{YELLOW}{name}{RESET}: {value}") + + return "\n".join(lines) \ No newline at end of file diff --git a/src/core/env/interfaces.py b/src/core/env/interfaces.py new file mode 100644 index 00000000..0a618361 --- /dev/null +++ b/src/core/env/interfaces.py @@ -0,0 +1,96 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from abc import ABC, abstractmethod +from typing import Any + +from .types import Action, Observation, State + + +class Transform(ABC): + """Transform observations to add rewards, metrics, or other modifications. + + Transforms follow the TorchRL pattern where they take an observation + and return a (potentially modified) observation. This allows for + flexible reward computation and observation augmentation. + """ + + @abstractmethod + def __call__(self, observation: Observation) -> Observation: + """Transform an observation. + + Args: + observation: The input observation + + Returns: + The transformed observation + """ + pass + + +class Environment(ABC): + """Base class for all environments following Gym/Gymnasium API. + + Args: + transform: Optional transform to apply to observations + """ + + def __init__(self, transform: Transform | None = None): + self.transform = transform + + @abstractmethod + def reset(self) -> Observation: + """Reset the environment and return initial observation.""" + pass + + @abstractmethod + def step(self, action: Action) -> Observation: + """Take a step in the environment.""" + pass + + @property + @abstractmethod + def state(self) -> State: + """Get the current environment state.""" + pass + + def _apply_transform(self, observation: Observation) -> Observation: + """Apply transform if one is provided.""" + if self.transform is not None: + return self.transform(observation) + return observation + + +class Tool(ABC): + """Base class for tools that can be used in code execution.""" + + @abstractmethod + def __call__(self, *args, **kwargs) -> Any: + """Execute the tool.""" + pass + + +class ToolRegistry: + """Registry for managing tools available to code execution.""" + + def __init__(self): + self._tools: dict[str, Any] = {} + + def register(self, name: str, tool: Any): + """Register a tool with a name.""" + self._tools[name] = tool + + def get(self, name: str) -> Any | None: + """Get a tool by name.""" + return self._tools.get(name) + + def get_all(self) -> dict[str, Any]: + """Get all registered tools.""" + return self._tools.copy() + + def get_names(self) -> list[str]: + """Get all tool names.""" + return list(self._tools.keys()) diff --git a/src/core/env/types.py b/src/core/env/types.py new file mode 100644 index 00000000..2bae38e4 --- /dev/null +++ b/src/core/env/types.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import traceback +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Union + + +# Type aliases +Scalar = Union[int, float, bool] + + +@dataclass(kw_only=True) +class Action: + """Base class for all environment actions.""" + + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass(kw_only=True) +class CodeAction(Action): + """Action containing Python code to execute in a CodeAct environment.""" + + code: str + + def __post_init__(self): + if not self.code or not self.code.strip(): + raise ValueError("code is required and cannot be empty") + + +@dataclass +class ExecutionResult: + """Result of executing Python code.""" + + stdout: str = "" + stderr: str = "" + exit_code: int = 0 + execution_time_ms: float = 0.0 + + @classmethod + def from_exception( + cls, exc: Exception, stdout: str = "", stderr: str = "" + ) -> "ExecutionResult": + return cls( + stdout=stdout, + stderr=stderr, + exception=exc, + exception_type=exc.__class__.__name__, + exception_message=str(exc), + traceback_str=traceback.format_exc(), + success=False, + ) + + @classmethod + def from_success( + cls, + return_value: Any = None, + stdout: str = "", + stderr: str = "", + execution_time_ms: float = 0.0, + ) -> "ExecutionResult": + return cls( + stdout=stdout, + stderr=stderr, + return_value=return_value, + execution_time_ms=execution_time_ms, + success=True, + ) + + +@dataclass(kw_only=True) +class Observation: + """Base class for all environment observations.""" + + done: bool = False + reward: Union[bool, int, float, None] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass(kw_only=True) +class CodeObservation(Observation): + """Observation from CodeAct environment execution.""" + + execution_result: ExecutionResult = field(default_factory=ExecutionResult) + available_tools: List[str] = field(default_factory=list) + + +@dataclass +class State: + """Base class for environment state.""" + + episode_id: Optional[str] = None + step_count: int = 0 + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class CodeState(State): + """State for CodeAct environment with persistent execution context.""" + + execution_globals: Dict[str, Any] = field(default_factory=dict) + action_history: List[CodeAction] = field(default_factory=list) + result_history: List[ExecutionResult] = field(default_factory=list) + + def __post_init__(self): + if not self.execution_globals: + self.execution_globals = {"__builtins__": __builtins__} diff --git a/src/envs/__init__.py b/src/envs/__init__.py index f631431b..8a49f508 100644 --- a/src/envs/__init__.py +++ b/src/envs/__init__.py @@ -1 +1,11 @@ -# Environment implementations like CodingEnv here +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Environment implementations.""" + +from .coding import CodingEnv + +__all__ = ["CodingEnv"] diff --git a/src/envs/coding/__init__.py b/src/envs/coding/__init__.py new file mode 100644 index 00000000..f977077e --- /dev/null +++ b/src/envs/coding/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""CodingEnv: Environment for learning to code with safety and quality transforms.""" + +from .coding_env import CodingEnv +from .transforms import ( + CodeSafetyTransform, + CodeQualityTransform, + create_safe_coding_transform +) + +__all__ = [ + "CodingEnv", + "CodeSafetyTransform", + "CodeQualityTransform", + "create_safe_coding_transform" +] \ No newline at end of file diff --git a/src/envs/coding/coding_env.py b/src/envs/coding/coding_env.py new file mode 100644 index 00000000..34723b09 --- /dev/null +++ b/src/envs/coding/coding_env.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""CodingEnv: A concrete environment implementation for learning to code.""" + +from ...core.env.code_execution_environment import CodeExecutionEnvironment +from ...core.env.interfaces import Transform +from ...core.env.types import Action, Observation +from .transforms import create_safe_coding_transform + + +class CodingEnv(CodeExecutionEnvironment): + """Environment for learning to code with safety and quality evaluation. + + This environment extends the base CodeExecutionEnvironment with transforms + that evaluate code safety and quality, making it suitable for training + agents to write safe, high-quality code. + """ + + def __init__( + self, + transform: Transform | None = None, + docker_image: str = "python:3.11-slim", + timeout_seconds: int = 30, + use_safety_transforms: bool = True + ): + # If no transform provided but safety transforms requested, use default + if transform is None and use_safety_transforms: + transform = create_safe_coding_transform() + + super().__init__( + transform=transform, + docker_image=docker_image, + timeout_seconds=timeout_seconds + ) + + def step(self, action: Action) -> Observation: + """Override step to add code to observation metadata for transforms.""" + # Store the code in metadata so transforms can access it + if hasattr(action, 'code'): + # Execute the step + observation = super().step(action) + # Add code to metadata for transforms + observation.metadata['last_code'] = action.code + # Re-apply transforms now that metadata is populated + return self._apply_transform(observation) + else: + return super().step(action) \ No newline at end of file diff --git a/src/envs/coding/transforms.py b/src/envs/coding/transforms.py new file mode 100644 index 00000000..2c74e5dc --- /dev/null +++ b/src/envs/coding/transforms.py @@ -0,0 +1,93 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Transforms specific to coding environments.""" + +import re +import ast + +from ...core.env.interfaces import Transform +from ...core.env.base_transforms import CompositeTransform +from ...core.env.types import CodeObservation, Observation + + +class CodeSafetyTransform(Transform): + """Evaluates code safety and assigns penalties for dangerous patterns.""" + + def __init__(self, penalty: float = -1.0): + self.penalty = penalty + self.dangerous_patterns = [ + r'import\s+os', + r'import\s+subprocess', + r'eval\(', + r'exec\(', + r'__import__', + r'open\(', + ] + + def __call__(self, observation: Observation) -> Observation: + if not isinstance(observation, CodeObservation): + return observation + + if 'last_code' in observation.metadata: + code = observation.metadata['last_code'] + for pattern in self.dangerous_patterns: + if re.search(pattern, code): + observation.reward = self.penalty + observation.metadata['safety_violation'] = pattern + break + else: + if observation.reward is None: + observation.reward = 0.0 + + return observation + + +class CodeQualityTransform(Transform): + """Evaluates and rewards code quality metrics.""" + + def __init__(self, + concise_bonus: float = 0.1, + max_length_threshold: int = 100, + syntax_penalty: float = -0.2): + self.concise_bonus = concise_bonus + self.max_length_threshold = max_length_threshold + self.syntax_penalty = syntax_penalty + + def __call__(self, observation: Observation) -> Observation: + if not isinstance(observation, CodeObservation): + return observation + + quality_score = 0.0 + + if 'last_code' in observation.metadata: + code = observation.metadata['last_code'] + + # Reward concise code + if len(code.strip()) <= self.max_length_threshold: + quality_score += self.concise_bonus + + # Check syntax (redundant but useful for quality assessment) + try: + ast.parse(code) + except SyntaxError: + quality_score += self.syntax_penalty + + # Add to existing reward + if observation.reward is None: + observation.reward = quality_score + else: + observation.reward += quality_score + + return observation + + +def create_safe_coding_transform() -> CompositeTransform: + """Create a transform focused on safe coding practices and quality.""" + return CompositeTransform([ + CodeSafetyTransform(), + CodeQualityTransform() + ]) \ No newline at end of file