Readd env removed by gitignore

Darktex · Darktex · commit 54824d2cfd73 · 2025-10-06T14:40:59.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -52,7 +52,6 @@ coverage.xml
 # Virtual environments
 .env
 .venv
-env/
 venv/
 ENV/
 env.bak/
diff --git a/src/core/env/__init__.py b/src/core/env/__init__.py
@@ -0,0 +1,30 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Core environment interfaces and types."""
+
+from .interfaces import Environment, Transform, Tool, ToolRegistry
+from .types import (
+    Action, CodeAction, Observation, CodeObservation,
+    State, CodeState, ExecutionResult
+)
+from .base_transforms import CompositeTransform, NullTransform
+from .code_execution_environment import CodeExecutionEnvironment
+
+__all__ = [
+    # Core interfaces
+    "Environment", "Transform", "Tool", "ToolRegistry",
+
+    # Types
+    "Action", "CodeAction", "Observation", "CodeObservation",
+    "State", "CodeState", "ExecutionResult",
+
+    # Base transforms
+    "CompositeTransform", "NullTransform",
+
+    # Base environment implementation
+    "CodeExecutionEnvironment"
+]
diff --git a/src/core/env/base_transforms.py b/src/core/env/base_transforms.py
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Base transform implementations for composing environment-specific transforms."""
+
+from .interfaces import Transform
+from .types import Observation
+
+
+class CompositeTransform(Transform):
+    """Combines multiple transforms into a single transform."""
+
+    def __init__(self, transforms: list[Transform]):
+        self.transforms = transforms
+
+    def __call__(self, observation: Observation) -> Observation:
+        for transform in self.transforms:
+            observation = transform(observation)
+        return observation
+
+
+class NullTransform(Transform):
+    """Default transform that passes through unchanged."""
+
+    def __call__(self, observation: Observation) -> Observation:
+        return observation
diff --git a/src/core/env/code_execution_environment.py b/src/core/env/code_execution_environment.py
@@ -0,0 +1,167 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import uuid
+from typing import Any, Dict, Literal
+
+from ..docker.docker_executor import DockerExecutor
+from .interfaces import Environment, Transform
+from .types import CodeAction, CodeObservation, CodeState, Action, Observation, State
+
+
+class CodeExecutionEnvironment(Environment):
+    """Environment for executing Python code actions using Docker."""
+
+    def __init__(
+        self,
+        transform: Transform | None = None,
+        docker_image: str = "python:3.11-slim",
+        timeout_seconds: int = 30
+    ):
+        super().__init__(transform)
+        self.docker_image = docker_image
+        self.timeout_seconds = timeout_seconds
+        self.executor = DockerExecutor(docker_image, timeout_seconds)
+        self._state = CodeState()
+
+    def reset(self) -> Observation:
+        """Reset environment and start fresh Docker session."""
+        # Stop any existing session
+        self.executor.stop_session()
+
+        # Initialize fresh state
+        self._state = CodeState(
+            episode_id=str(uuid.uuid4()),
+            step_count=0
+        )
+
+        # Start new Docker session
+        try:
+            self.executor.start_session()
+        except Exception as e:
+            # Fail hard as requested
+            raise RuntimeError(f"Failed to start Docker session: {e}")
+
+        # Return initial observation
+        observation = CodeObservation(
+            execution_result=None,
+            available_tools=[]  # TODO: populate from MCP registry
+        )
+
+        return self._apply_transform(observation)
+
+    def step(self, action: Action) -> Observation:
+        """Execute code action and return observation."""
+        if not isinstance(action, CodeAction):
+            raise ValueError(f"Expected CodeAction, got {type(action)}")
+
+        # Execute the code
+        execution_result = self.executor.execute_code(action.code)
+
+        # Update state
+        self._state.step_count += 1
+        self._state.action_history.append(action)
+        self._state.result_history.append(execution_result)
+
+        # Create observation
+        observation = CodeObservation(
+            execution_result=execution_result,
+            available_tools=[]  # TODO: populate from MCP registry
+        )
+
+        return self._apply_transform(observation)
+
+    def render(self, mode: Literal["human", "raw", "ansi"] = "human") -> Any:
+        """Render current environment state."""
+        try:
+            variables = self.executor.get_variable_dump()
+        except Exception as e:
+            variables = {"error": f"Failed to get variables: {e}"}
+
+        render_data = {
+            "episode_id": self._state.episode_id,
+            "step_count": self._state.step_count,
+            "variables": variables,
+            "last_result": self._state.result_history[-1] if self._state.result_history else None
+        }
+
+        if mode == "raw":
+            return render_data
+        elif mode == "ansi":
+            return self._render_ansi(render_data)
+        else:  # mode == "human"
+            return self._render_human(render_data)
+
+    def close(self) -> None:
+        """Close environment and clean up Docker container."""
+        self.executor.stop_session()
+
+    @property
+    def state(self) -> State:
+        """Get current environment state."""
+        return self._state
+
+    def _render_human(self, data: Dict[str, Any]) -> str:
+        """Render in human-readable format."""
+        lines = []
+        lines.append(f"=== Code Environment (Episode: {data['episode_id'][:8]}...) ===")
+        lines.append(f"Steps: {data['step_count']}")
+
+        if data.get("last_result"):
+            result = data["last_result"]
+            lines.append(f"Last execution: {'✓ Success' if result.success else '✗ Failed'}")
+            if result.stdout:
+                lines.append(f"Output: {result.stdout[:100]}...")
+            if not result.success and result.exception_message:
+                lines.append(f"Error: {result.exception_message}")
+
+        lines.append("\n--- Variables ---")
+        variables = data.get("variables", {})
+        if "error" in variables:
+            lines.append(f"Error getting variables: {variables['error']}")
+        else:
+            for name, value in sorted(variables.items()):
+                lines.append(f"{name}: {value}")
+
+        return "\n".join(lines)
+
+    def _render_ansi(self, data: Dict[str, Any]) -> str:
+        """Render in ANSI terminal format with colors."""
+        lines = []
+
+        # ANSI color codes
+        BLUE = "\033[34m"
+        GREEN = "\033[32m"
+        RED = "\033[31m"
+        YELLOW = "\033[33m"
+        RESET = "\033[0m"
+        BOLD = "\033[1m"
+
+        lines.append(f"{BOLD}{BLUE}=== Code Environment ==={RESET}")
+        lines.append(f"Episode: {data['episode_id'][:8]}...")
+        lines.append(f"Steps: {YELLOW}{data['step_count']}{RESET}")
+
+        if data.get("last_result"):
+            result = data["last_result"]
+            status_color = GREEN if result.success else RED
+            status_text = "Success" if result.success else "Failed"
+            lines.append(f"Last execution: {status_color}{status_text}{RESET}")
+
+            if result.stdout:
+                lines.append(f"Output: {result.stdout[:100]}...")
+            if not result.success and result.exception_message:
+                lines.append(f"{RED}Error: {result.exception_message}{RESET}")
+
+        lines.append(f"\n{BOLD}--- Variables ---{RESET}")
+        variables = data.get("variables", {})
+        if "error" in variables:
+            lines.append(f"{RED}Error getting variables: {variables['error']}{RESET}")
+        else:
+            for name, value in sorted(variables.items()):
+                lines.append(f"{YELLOW}{name}{RESET}: {value}")
+
+        return "\n".join(lines)
diff --git a/src/core/env/interfaces.py b/src/core/env/interfaces.py
@@ -0,0 +1,96 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from abc import ABC, abstractmethod
+from typing import Any
+
+from .types import Action, Observation, State
+
+
+class Transform(ABC):
+    """Transform observations to add rewards, metrics, or other modifications.
+
+    Transforms follow the TorchRL pattern where they take an observation
+    and return a (potentially modified) observation. This allows for
+    flexible reward computation and observation augmentation.
+    """
+
+    @abstractmethod
+    def __call__(self, observation: Observation) -> Observation:
+        """Transform an observation.
+
+        Args:
+            observation: The input observation
+
+        Returns:
+            The transformed observation
+        """
+        pass
+
+
+class Environment(ABC):
+    """Base class for all environments following Gym/Gymnasium API.
+
+    Args:
+        transform: Optional transform to apply to observations
+    """
+
+    def __init__(self, transform: Transform | None = None):
+        self.transform = transform
+
+    @abstractmethod
+    def reset(self) -> Observation:
+        """Reset the environment and return initial observation."""
+        pass
+
+    @abstractmethod
+    def step(self, action: Action) -> Observation:
+        """Take a step in the environment."""
+        pass
+
+    @property
+    @abstractmethod
+    def state(self) -> State:
+        """Get the current environment state."""
+        pass
+
+    def _apply_transform(self, observation: Observation) -> Observation:
+        """Apply transform if one is provided."""
+        if self.transform is not None:
+            return self.transform(observation)
+        return observation
+
+
+class Tool(ABC):
+    """Base class for tools that can be used in code execution."""
+
+    @abstractmethod
+    def __call__(self, *args, **kwargs) -> Any:
+        """Execute the tool."""
+        pass
+
+
+class ToolRegistry:
+    """Registry for managing tools available to code execution."""
+
+    def __init__(self):
+        self._tools: dict[str, Any] = {}
+
+    def register(self, name: str, tool: Any):
+        """Register a tool with a name."""
+        self._tools[name] = tool
+
+    def get(self, name: str) -> Any | None:
+        """Get a tool by name."""
+        return self._tools.get(name)
+
+    def get_all(self) -> dict[str, Any]:
+        """Get all registered tools."""
+        return self._tools.copy()
+
+    def get_names(self) -> list[str]:
+        """Get all tool names."""
+        return list(self._tools.keys())
diff --git a/src/core/env/types.py b/src/core/env/types.py