From 8e3c19a21711dc058359feea2cf2e8fec449b657 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Thu, 30 Oct 2025 22:35:02 -0700
Subject: [PATCH 01/10] add PYTHON_ADDITIONAL_IMPORTS

---
 PODMAN_SOLUTION.md                            | 136 +++++++++
 src/core/containers/runtime/__init__.py       |   5 +-
 src/core/containers/runtime/providers.py      | 286 +++++++++++++++++-
 src/envs/coding_env/server/app.py             |  23 +-
 .../coding_env/server/python_codeact_env.py   |   6 +-
 test_custom_port.py                           |  27 ++
 test_docker_host_network.py                   |  27 ++
 test_log                                      |  10 +
 test_openenv.py                               |  41 +++
 9 files changed, 548 insertions(+), 13 deletions(-)
 create mode 100644 PODMAN_SOLUTION.md
 create mode 100644 test_custom_port.py
 create mode 100644 test_docker_host_network.py
 create mode 100644 test_log
 create mode 100644 test_openenv.py

diff --git a/PODMAN_SOLUTION.md b/PODMAN_SOLUTION.md
new file mode 100644
index 00000000..f7d5a448
--- /dev/null
+++ b/PODMAN_SOLUTION.md
@@ -0,0 +1,136 @@
+# Podman Networking Issue - Solution Summary
+
+## Problem
+When using `podman-docker` (podman emulating Docker), the container provider was failing with timeout errors:
+```
+TimeoutError: Container at http://localhost:63915 did not become ready within 100s
+```
+
+### Root Cause Analysis
+1. **IPv6 Connection Reset**: When connecting to `localhost`, curl was trying IPv6 (`::1`) and getting "Connection reset by peer"
+2. **IPv4 Connection Refused**: When connecting to `127.0.0.1`, curl was getting "Connection refused"
+3. **Podman Networking Issue**: Rootless podman has known networking issues with port forwarding using `pasta` or `slirp4netns`
+
+```bash
+# IPv6 attempt
+$ curl http://localhost:63915/health
+* Connected to localhost (::1) port 63915
+* Recv failure: Connection reset by peer
+
+# IPv4 attempt
+$ curl http://127.0.0.1:63915/health
+* Failed to connect to 127.0.0.1 port 63915: Connection refused
+```
+
+## Solution: PodmanProvider with Host Networking
+
+Created a dedicated `PodmanProvider` class that uses native podman commands with `--network=host` to bypass port forwarding issues.
+
+### Key Implementation Details
+
+**File**: `/home/kaiwu/work/kaiwu/OpenEnv/src/core/containers/runtime/providers.py`
+
+```python
+class PodmanProvider(ContainerProvider):
+    """
+    Container provider for Podman.
+
+    Uses host networking to avoid rootless podman port forwarding issues.
+    Container binds directly to port 8000 on the host.
+    """
+
+    def start_container(self, image: str, ...) -> str:
+        cmd = [
+            "podman", "run",
+            "-d",
+            "--name", self._container_name,
+            "--network", "host",  # Host networking bypasses port forwarding
+        ]
+        # Container binds directly to host port 8000
+        return "http://127.0.0.1:8000"
+```
+
+**File**: `/home/kaiwu/work/kaiwu/OpenEnv/src/core/containers/runtime/__init__.py`
+
+```python
+from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider, PodmanProvider
+
+__all__ = [
+    "ContainerProvider",
+    "LocalDockerProvider",
+    "PodmanProvider",  # Now exported
+    "KubernetesProvider",
+]
+```
+
+### Usage
+
+**File**: `/home/kaiwu/work/kaiwu/OpenEnv/test_openenv.py`
+
+```python
+from openenv_core.containers.runtime import PodmanProvider
+
+# Use PodmanProvider instead of LocalDockerProvider
+provider = PodmanProvider()
+base_url = provider.start_container("coding-env:latest")
+print(base_url)  # http://127.0.0.1:8000
+
+provider.wait_for_ready(base_url, timeout_s=100)
+coding_env = CodingEnv(base_url=base_url, provider=provider)
+```
+
+## Test Results
+
+```bash
+$ python test_openenv.py
+http://127.0.0.1:8000
+Reset complete: exit_code=0
+Code: print('Hello, World!')
+  → stdout: Hello, World!
+  → exit_code: 0
+Code: x = 5 + 3
+print(f'Result: {x}')
+  → stdout: Result: 8
+  → exit_code: 0
+Code: import math
+print(math.pi)
+  → stdout: 3.141592653589793
+  → exit_code: 0
+```
+
+✅ **All tests passed successfully!**
+
+## Trade-offs and Limitations
+
+### Host Networking Mode
+- **Pro**: Bypasses all rootless podman networking issues
+- **Pro**: Direct port access (no port forwarding overhead)
+- **Con**: Container always uses port 8000 (no dynamic port allocation)
+- **Con**: Can only run one container at a time on the same host
+
+### Alternative Approaches Considered
+
+1. **Use explicit IPv4 binding** (`127.0.0.1:port:8000`) - ❌ Failed with pasta error
+2. **Use default port mapping** (`port:8000`) - ❌ Same networking issues
+3. **Run with root privileges** - ❌ Security concern
+4. **Switch to slirp4netns** - ⚠️ More complex, might still have issues
+
+## Recommendations
+
+1. **For local development**: Use `PodmanProvider` - it's simple and reliable
+2. **For CI/CD**: Consider using actual Docker or running podman with root
+3. **For production**: Use `KubernetesProvider` or cloud-based container services
+
+## When to Use Which Provider
+
+| Provider | Use Case | Networking Mode |
+|----------|----------|----------------|
+| `LocalDockerProvider` | Docker installed | Port forwarding |
+| `PodmanProvider` | Rootless podman | Host networking |
+| `KubernetesProvider` | K8s cluster | Service/Ingress |
+
+## Future Improvements
+
+1. Consider adding a `--rootful` option for `PodmanProvider` to enable port forwarding
+2. Add dynamic port support by overriding uvicorn command with custom port
+3. Create comprehensive documentation on container runtime selection
diff --git a/src/core/containers/runtime/__init__.py b/src/core/containers/runtime/__init__.py
index a72b5301..1cd25562 100644
--- a/src/core/containers/runtime/__init__.py
+++ b/src/core/containers/runtime/__init__.py
@@ -6,10 +6,11 @@
 
 """Container runtime providers."""
 
-from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider
+from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider, PodmanProvider
 
 __all__ = [
     "ContainerProvider",
     "LocalDockerProvider",
+    "PodmanProvider",
     "KubernetesProvider",
-]
\ No newline at end of file
+]
diff --git a/src/core/containers/runtime/providers.py b/src/core/containers/runtime/providers.py
index 637b3be5..d7dc0c8b 100644
--- a/src/core/containers/runtime/providers.py
+++ b/src/core/containers/runtime/providers.py
@@ -135,9 +135,10 @@ def start_container(
 
         Args:
             image: Docker image name
-            port: Port to expose (if None, finds available port)
+            port: Port to expose (if None, uses 8000)
             env_vars: Environment variables for the container
             **kwargs: Additional Docker run options
+                - command_override: List of command args to override container CMD
 
         Returns:
             Base URL to connect to the container
@@ -145,19 +146,20 @@ def start_container(
         import subprocess
         import time
 
-        # Find available port if not specified
+        # Use default port if not specified
         if port is None:
-            port = self._find_available_port()
+            port = 8000
 
         # Generate container name
         self._container_name = self._generate_container_name(image)
 
         # Build docker run command
+        # Use host networking for better performance and consistency with podman
         cmd = [
             "docker", "run",
             "-d",  # Detached
             "--name", self._container_name,
-            "-p", f"{port}:8000",  # Map port
+            "--network", "host",  # Use host network
         ]
 
         # Add environment variables
@@ -167,15 +169,33 @@ def start_container(
 
         # Add image
         cmd.append(image)
+          
+        # Add command override if provided (to change port)
+        if "command_override" in kwargs:
+            cmd.extend(kwargs["command_override"])
+        elif port != 8000:
+            # Infer app path from image name for common environments
+            app_module = self._infer_app_module(image)
+            if app_module:
+                cmd.extend([
+                    "uvicorn",
+                    app_module,
+                    "--host", "0.0.0.0",
+                    "--port", str(port)
+                ])
 
         # Run container
-        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-        self._container_id = result.stdout.strip()
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            self._container_id = result.stdout.strip()
+        except subprocess.CalledProcessError as e:
+            error_msg = f"Failed to start container: {e}\nstdout: {e.stdout}\nstderr: {e.stderr}"
+            raise RuntimeError(error_msg) from e
 
         # Wait a moment for container to start
         time.sleep(1)
 
-        base_url = f"http://localhost:{port}"
+        base_url = f"http://127.0.0.1:{port}"
         return base_url
 
     def stop_container(self) -> None:
@@ -272,6 +292,258 @@ def _generate_container_name(self, image: str) -> str:
         timestamp = int(time.time() * 1000)
         return f"{clean_image}-{timestamp}"
 
+    def _infer_app_module(self, image: str) -> Optional[str]:
+        """
+        Infer the uvicorn app module path from the image name.
+
+        Args:
+            image: Container image name
+
+        Returns:
+            App module path like "envs.coding_env.server.app:app" or None
+        """
+        clean_image = image.split("/")[-1].split(":")[0]
+        
+        # Map common environment names to their app modules
+        env_module_map = {
+            "coding-env": "envs.coding_env.server.app:app",
+            "echo-env": "envs.echo_env.server.app:app",
+            "git-env": "envs.git_env.server.app:app",
+            "openspiel-env": "envs.openspiel_env.server.app:app",
+            "sumo-rl-env": "envs.sumo_rl_env.server.app:app",
+            "finrl-env": "envs.finrl_env.server.app:app",
+        }
+        
+        return env_module_map.get(clean_image)
+
+
+class PodmanProvider(ContainerProvider):
+    """
+    Container provider for Podman.
+
+    This provider runs containers using native Podman commands, which avoids
+    the networking issues that can occur with podman-docker emulation.
+
+    Example:
+        >>> provider = PodmanProvider()
+        >>> base_url = provider.start_container("echo-env:latest")
+        >>> # Container running on http://localhost:<random-port>
+        >>> provider.stop_container()
+    """
+
+    def __init__(self):
+        """Initialize the Podman provider."""
+        self._container_id: Optional[str] = None
+        self._container_name: Optional[str] = None
+
+        # Check if Podman is available
+        import subprocess
+
+        try:
+            subprocess.run(
+                ["podman", "version"],
+                check=True,
+                capture_output=True,
+                timeout=5,
+            )
+        except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+            raise RuntimeError(
+                "Podman is not available. Please install Podman."
+            )
+
+    def start_container(
+        self,
+        image: str,
+        port: Optional[int] = None,
+        env_vars: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> str:
+        """
+        Start a Podman container.
+
+        Args:
+            image: Container image name
+            port: Port to expose (if None, uses 8000)
+            env_vars: Environment variables for the container
+            **kwargs: Additional Podman run options
+                - command_override: List of command args to override container CMD
+
+        Returns:
+            Base URL to connect to the container
+        """
+        import subprocess
+        import time
+
+        # Use default port if not specified
+        if port is None:
+            port = 8000
+
+        # Generate container name
+        self._container_name = self._generate_container_name(image)
+
+        # Build podman run command
+        # Use host networking to avoid rootless podman port forwarding issues
+        cmd = [
+            "podman", "run",
+            "-d",  # Detached
+            "--name", self._container_name,
+            "--network", "host",  # Use host network to avoid port forwarding issues
+        ]
+
+        # Add environment variables
+        if env_vars:
+            for key, value in env_vars.items():
+                cmd.extend(["-e", f"{key}={value}"])
+
+        # Add image
+        cmd.append(image)
+        
+        # Add command override if provided (to change port)
+        if "command_override" in kwargs:
+            cmd.extend(kwargs["command_override"])
+        elif port != 8000:
+            # Infer app path from image name for common environments
+            app_module = self._infer_app_module(image)
+            if app_module:
+                cmd.extend([
+                    "uvicorn",
+                    app_module,
+                    "--host", "0.0.0.0",
+                    "--port", str(port)
+                ])
+
+        # Run container
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            self._container_id = result.stdout.strip()
+        except subprocess.CalledProcessError as e:
+            error_msg = f"Failed to start container: {e}\nstdout: {e.stdout}\nstderr: {e.stderr}"
+            raise RuntimeError(error_msg) from e
+
+        # Wait a moment for container to start
+        time.sleep(1)
+
+        base_url = f"http://127.0.0.1:{port}"
+        return base_url
+
+    def stop_container(self) -> None:
+        """
+        Stop and remove the Podman container.
+        """
+        if self._container_id is None:
+            return
+
+        import subprocess
+
+        try:
+            # Stop container
+            subprocess.run(
+                ["podman", "stop", self._container_id],
+                capture_output=True,
+                check=True,
+                timeout=10,
+            )
+
+            # Remove container
+            subprocess.run(
+                ["podman", "rm", self._container_id],
+                capture_output=True,
+                check=True,
+                timeout=10,
+            )
+        except subprocess.CalledProcessError:
+            # Container might already be stopped/removed
+            pass
+        finally:
+            self._container_id = None
+            self._container_name = None
+
+    def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
+        """
+        Wait for container to be ready by polling /health endpoint.
+
+        Args:
+            base_url: Base URL of the container
+            timeout_s: Maximum time to wait
+
+        Raises:
+            TimeoutError: If container doesn't become ready
+        """
+        import time
+        import requests
+
+        start_time = time.time()
+        health_url = f"{base_url}/health"
+
+        while time.time() - start_time < timeout_s:
+            try:
+                response = requests.get(health_url, timeout=2.0)
+                if response.status_code == 200:
+                    return
+            except requests.RequestException:
+                pass
+
+            time.sleep(0.5)
+
+        raise TimeoutError(
+            f"Container at {base_url} did not become ready within {timeout_s}s"
+        )
+
+    def _find_available_port(self) -> int:
+        """
+        Find an available port on localhost.
+
+        Returns:
+            An available port number
+        """
+        import socket
+
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(("", 0))
+            s.listen(1)
+            port = s.getsockname()[1]
+        return port
+
+    def _generate_container_name(self, image: str) -> str:
+        """
+        Generate a unique container name based on image name and timestamp.
+
+        Args:
+            image: Container image name
+
+        Returns:
+            A unique container name
+        """
+        import time
+
+        clean_image = image.split("/")[-1].split(":")[0]
+        timestamp = int(time.time() * 1000)
+        return f"{clean_image}-{timestamp}"
+
+    def _infer_app_module(self, image: str) -> Optional[str]:
+        """
+        Infer the uvicorn app module path from the image name.
+
+        Args:
+            image: Container image name
+
+        Returns:
+            App module path like "envs.coding_env.server.app:app" or None
+        """
+        clean_image = image.split("/")[-1].split(":")[0]
+        
+        # Map common environment names to their app modules
+        env_module_map = {
+            "coding-env": "envs.coding_env.server.app:app",
+            "echo-env": "envs.echo_env.server.app:app",
+            "git-env": "envs.git_env.server.app:app",
+            "openspiel-env": "envs.openspiel_env.server.app:app",
+            "sumo-rl-env": "envs.sumo_rl_env.server.app:app",
+            "finrl-env": "envs.finrl_env.server.app:app",
+        }
+        
+        return env_module_map.get(clean_image)
+
 
 class KubernetesProvider(ContainerProvider):
     """
diff --git a/src/envs/coding_env/server/app.py b/src/envs/coding_env/server/app.py
index 3a895474..4f9d6293 100644
--- a/src/envs/coding_env/server/app.py
+++ b/src/envs/coding_env/server/app.py
@@ -19,15 +19,34 @@
 
     # Or run directly:
     python -m envs.coding_env.server.app
+    
+    # With custom imports:
+    PYTHON_ADDITIONAL_IMPORTS="sys,os,functools,typing" python -m envs.coding_env.server.app
 """
 
+import os
+
 from core.env_server import create_app
 
 from ..models import CodeAction, CodeObservation
 from .python_codeact_env import PythonCodeActEnv
 
-# Create the environment instance
-env = PythonCodeActEnv()
+# Get additional imports from environment variable
+# Format: comma-separated list, e.g., "sys,os,functools,typing"
+additional_imports_str = os.environ.get("PYTHON_ADDITIONAL_IMPORTS", "")
+if additional_imports_str:
+    additional_imports = [imp.strip() for imp in additional_imports_str.split(",") if imp.strip()]
+else:
+    # Default imports that match the common_imports used in reward evaluation
+    additional_imports = [
+        "sys",
+        "os",
+        "functools",
+        "typing",
+    ]
+
+# Create the environment instance with authorized imports
+env = PythonCodeActEnv(additional_imports=additional_imports)
 
 # Create the app with web interface and README integration
 app = create_app(env, CodeAction, CodeObservation, env_name="coding_env")
diff --git a/src/envs/coding_env/server/python_codeact_env.py b/src/envs/coding_env/server/python_codeact_env.py
index 14daf2c9..cec3d838 100644
--- a/src/envs/coding_env/server/python_codeact_env.py
+++ b/src/envs/coding_env/server/python_codeact_env.py
@@ -45,10 +45,12 @@ class PythonCodeActEnv(Environment):
 
     def __init__(
         self,
+        additional_imports: list[str] | None = None,
     ):
         self.transform = create_safe_coding_transform()
-        self._executor = PyExecutor()
+        self._executor = PyExecutor(additional_imports=additional_imports)
         self._state = CodeState()
+        self._additional_imports = additional_imports
 
     def reset(self) -> Observation:
         """
@@ -63,7 +65,7 @@ def reset(self) -> Observation:
         self._state.last_exit_code = 0
 
         # Reset executor to clear any previously defined variables/functions
-        self._executor = PyExecutor()
+        self._executor = PyExecutor(additional_imports=self._additional_imports)
 
         # Reset transform to clear any accumulated state
         self.transform = create_safe_coding_transform()
diff --git a/test_custom_port.py b/test_custom_port.py
new file mode 100644
index 00000000..53d1f669
--- /dev/null
+++ b/test_custom_port.py
@@ -0,0 +1,27 @@
+from envs.coding_env import CodeAction, CodingEnv
+from openenv_core.containers.runtime import PodmanProvider
+
+provider = PodmanProvider()
+
+# Test with custom port 9000
+base_url = provider.start_container("coding-env:latest", port=9000)
+print(f"Container started at: {base_url}")
+
+# Wait for the server to be ready
+provider.wait_for_ready(base_url, timeout_s=30)
+print("✅ Container is ready!")
+
+# Create environment
+coding_env = CodingEnv(base_url=base_url, provider=provider)
+
+# Test with simple code
+result = coding_env.reset()
+print(f"Reset: exit_code={result.observation.exit_code}")
+
+result = coding_env.step(CodeAction(code="print('Testing custom port 9000!')"))
+print(f"Output: {result.observation.stdout.strip()}")
+
+# Cleanup
+coding_env.close()
+provider.stop_container()
+print("✅ Test complete!")
diff --git a/test_docker_host_network.py b/test_docker_host_network.py
new file mode 100644
index 00000000..c5bcd006
--- /dev/null
+++ b/test_docker_host_network.py
@@ -0,0 +1,27 @@
+from envs.coding_env import CodeAction, CodingEnv
+from openenv_core.containers.runtime import LocalDockerProvider
+
+provider = LocalDockerProvider()
+
+# Test with default port 8000
+base_url = provider.start_container("coding-env:latest")
+print(f"Container started at: {base_url}")
+
+# Wait for the server to be ready
+provider.wait_for_ready(base_url, timeout_s=30)
+print("✅ Container is ready!")
+
+# Create environment
+coding_env = CodingEnv(base_url=base_url, provider=provider)
+
+# Test with simple code
+result = coding_env.reset()
+print(f"Reset: exit_code={result.observation.exit_code}")
+
+result = coding_env.step(CodeAction(code="print('Docker with host network!')"))
+print(f"Output: {result.observation.stdout.strip()}")
+
+# Cleanup
+coding_env.close()
+provider.stop_container()
+print("✅ Docker host network test complete!")
diff --git a/test_log b/test_log
new file mode 100644
index 00000000..efceabda
--- /dev/null
+++ b/test_log
@@ -0,0 +1,10 @@
+Traceback (most recent call last):
+  File "/home/kaiwu/work/kaiwu/OpenEnv/test_openenv.py", line 7, in <module>
+    base_url = provider.start_container("coding-env:latest")
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/kaiwu/work/kaiwu/OpenEnv/src/core/containers/runtime/providers.py", line 356, in start_container
+    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/kaiwu/.conda/envs/forge/lib/python3.12/subprocess.py", line 571, in run
+    raise CalledProcessError(retcode, process.args,
+subprocess.CalledProcessError: Command '['podman', 'run', '-d', '--name', 'coding-env-1761861983029', '-p', '127.0.0.1:38989:8000', 'coding-env:latest']' returned non-zero exit status 126.
diff --git a/test_openenv.py b/test_openenv.py
new file mode 100644
index 00000000..468452a7
--- /dev/null
+++ b/test_openenv.py
@@ -0,0 +1,41 @@
+from _thread import exit
+
+from envs.coding_env import CodeAction, CodingEnv
+from openenv_core.containers.runtime import LocalDockerProvider
+
+provider = LocalDockerProvider()
+base_url = provider.start_container("coding-env:latest")
+print(base_url)  # http://127.0.0.1:<port>
+
+# Wait for the server to be ready before creating the client
+provider.wait_for_ready(base_url, timeout_s=100)
+
+# Use the environment via base_url
+# provider.stop_container()
+try:
+    # Create environment from Docker image
+    # coding_env = CodingEnv.from_docker_image("coding-env:latest")
+    coding_env = CodingEnv(base_url=base_url, provider=provider)
+    # Reset
+    result = coding_env.reset()
+    print(f"Reset complete: exit_code={result.observation.exit_code}")
+
+    # Execute Python code
+    code_samples = [
+        "print('Hello, World!')",
+        "x = 5 + 3\nprint(f'Result: {x}')",
+        "import math\nprint(math.pi)",
+    ]
+
+    for code in code_samples:
+        result = coding_env.step(CodeAction(code=code))
+        print(f"Code: {code}")
+        print(f"  → stdout: {result.observation.stdout.strip()}")
+        print(f"  → exit_code: {result.observation.exit_code}")
+
+except Exception as e:
+    print(f"Error: {e}")
+
+# Always clean up
+coding_env.close()
+provider.stop_container()

From 8729fb3dda6a27ef0be1e904b685059dc2b87cc9 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Thu, 30 Oct 2025 23:48:59 -0700
Subject: [PATCH 02/10] add timeout and port change

---
 src/core/env_server/http_server.py            | 17 +++-
 src/core/tools/local_python_executor.py       | 79 +++++++++++++++----
 src/envs/coding_env/server/app.py             | 19 ++++-
 .../coding_env/server/python_codeact_env.py   | 19 ++++-
 test_log                                      | 10 ---
 test_openenv.py                               | 18 ++++-
 6 files changed, 123 insertions(+), 39 deletions(-)
 delete mode 100644 test_log

diff --git a/src/core/env_server/http_server.py b/src/core/env_server/http_server.py
index d18873f0..7cc75253 100644
--- a/src/core/env_server/http_server.py
+++ b/src/core/env_server/http_server.py
@@ -15,7 +15,7 @@
 
 import os
 from dataclasses import asdict
-from typing import Any, Dict, Type
+from typing import Any, Dict, Optional, Type
 
 from .interfaces import Environment
 from .types import Action, Observation
@@ -85,13 +85,22 @@ async def reset(request: Dict[str, Any] = Body(default={})) -> Dict[str, Any]:
         async def step(request: Dict[str, Any]) -> Dict[str, Any]:
             """Step endpoint - executes action and returns observation."""
             action_data = request.get("action", {})
-            # TODO: Handle timeout_s, request_id, episode_id from request if provided
+            
+            # Extract timeout_s from request (sent by HTTPEnvClient)
+            timeout_s = request.get("timeout_s", None)
+            
+            # TODO: Handle request_id, episode_id from request if provided
 
             # Deserialize action
             action = self._deserialize_action(action_data)
 
-            # Execute step
-            observation = self.env.step(action)
+            # Execute step with timeout if environment supports it
+            try:
+                # Try to pass timeout_s to step() method
+                observation = self.env.step(action, timeout_s=timeout_s)
+            except TypeError:
+                # Environment doesn't support timeout parameter, call without it
+                observation = self.env.step(action)
 
             # Return serialized observation
             return self._serialize_observation(observation)
diff --git a/src/core/tools/local_python_executor.py b/src/core/tools/local_python_executor.py
index ba4477d5..e1f0145c 100644
--- a/src/core/tools/local_python_executor.py
+++ b/src/core/tools/local_python_executor.py
@@ -8,14 +8,23 @@
 Local Python Executor.
 
 This module provides functionality for executing Python code locally by wrapping
-the smolagents LocalPythonExecutor.
+the smolagents LocalPythonExecutor with timeout protection.
 """
 
+import multiprocessing
+import signal
+from typing import Optional
+
 from smolagents import LocalPythonExecutor
 
 from core.env_server.types import CodeExecResult
 
 
+def _timeout_handler(signum, frame):
+    """Signal handler for timeout."""
+    raise TimeoutError("Code execution timed out")
+
+
 class PyExecutor:
     """
     Wrapper around smolagents LocalPythonExecutor for executing Python code.
@@ -57,12 +66,14 @@ def __init__(self, additional_imports: list[str] | None = None):
         # Initialize tools to make BASE_PYTHON_TOOLS available (including print)
         self._executor.send_tools({})
 
-    def run(self, code: str) -> CodeExecResult:
+    def run(self, code: str, timeout_s: Optional[float] = None) -> CodeExecResult:
         """
-        Execute Python code and return the result.
+        Execute Python code and return the result with optional timeout protection.
 
         Args:
             code: Python code string to execute
+            timeout_s: Maximum execution time in seconds. If None, no timeout is enforced.
+                      If the code exceeds this time, it will be terminated with a timeout error.
 
         Returns:
             CodeExecResult containing stdout, stderr, and exit_code
@@ -77,22 +88,58 @@ def run(self, code: str) -> CodeExecResult:
             >>> result = executor.run("1 / 0")
             >>> print(result.exit_code)  # 1
             >>> print(result.stderr)  # Contains error message
+            >>>
+            >>> # Timeout protection
+            >>> result = executor.run("while True: pass", timeout_s=5.0)
+            >>> print(result.exit_code)  # 1
+            >>> print("timeout" in result.stderr.lower())  # True
         """
         try:
-            # Execute the code using LocalPythonExecutor
-            # LocalPythonExecutor returns a CodeOutput object with output, logs, is_final_answer
-            exec_result = self._executor(code)
-
-            # Extract the logs (which contain print outputs) as stdout
-            # The output field contains the return value of the code
-            stdout = exec_result.logs
-            stderr = ""
-            exit_code = 0  # Success
-
+            # Set up timeout using signal (Unix/Linux only)
+            old_handler = None
+            if timeout_s is not None and timeout_s > 0:
+                try:
+                    # Set alarm signal handler for timeout
+                    old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
+                    signal.alarm(int(timeout_s))
+                except (ValueError, AttributeError):
+                    # signal.alarm is not available on Windows
+                    # Fall back to no timeout on Windows
+                    pass
+
+            try:
+                # Execute the code using LocalPythonExecutor
+                # LocalPythonExecutor returns a CodeOutput object with output, logs, is_final_answer
+                exec_result = self._executor(code)
+
+                # Extract the logs (which contain print outputs) as stdout
+                # The output field contains the return value of the code
+                stdout = exec_result.logs
+                stderr = ""
+                exit_code = 0  # Success
+
+                return CodeExecResult(
+                    stdout=stdout,
+                    stderr=stderr,
+                    exit_code=exit_code,
+                )
+            finally:
+                # Cancel the alarm and restore old handler
+                if timeout_s is not None and timeout_s > 0:
+                    try:
+                        signal.alarm(0)
+                        if old_handler is not None:
+                            signal.signal(signal.SIGALRM, old_handler)
+                    except (ValueError, AttributeError):
+                        pass
+
+        except TimeoutError as e:
+            # Code execution exceeded timeout
             return CodeExecResult(
-                stdout=stdout,
-                stderr=stderr,
-                exit_code=exit_code,
+                stdout="",
+                stderr=f"Code execution timed out after {timeout_s} seconds. "
+                       f"Possible infinite loop or extremely long computation.",
+                exit_code=1,  # Non-zero indicates error
             )
 
         except Exception as e:
diff --git a/src/envs/coding_env/server/app.py b/src/envs/coding_env/server/app.py
index 4f9d6293..a4cfe683 100644
--- a/src/envs/coding_env/server/app.py
+++ b/src/envs/coding_env/server/app.py
@@ -12,10 +12,10 @@
 
 Usage:
     # Development (with auto-reload):
-    uvicorn envs.coding_env.server.app:app --reload --host 0.0.0.0 --port 8000
+    uvicorn envs.coding_env.server.app:app --reload --host 0.0.0.0 --port 5432
 
     # Production:
-    uvicorn envs.coding_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
+    uvicorn envs.coding_env.server.app:app --host 0.0.0.0 --port 5432 --workers 4
 
     # Or run directly:
     python -m envs.coding_env.server.app
@@ -53,6 +53,19 @@
 
 
 if __name__ == "__main__":
+    import sys
     import uvicorn
 
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    # Get port from environment variable or command line argument
+    # Priority: command line arg > environment variable > default (5432)
+    port = int(os.environ.get("PORT", 5432))
+    
+    # Override with command line argument if provided
+    if len(sys.argv) > 1:
+        try:
+            port = int(sys.argv[1])
+        except ValueError:
+            print(f"Invalid port argument: {sys.argv[1]}, using port {port}")
+    
+    print(f"Starting server on port {port}")
+    uvicorn.run(app, host="0.0.0.0", port=port)
diff --git a/src/envs/coding_env/server/python_codeact_env.py b/src/envs/coding_env/server/python_codeact_env.py
index cec3d838..ae984aca 100644
--- a/src/envs/coding_env/server/python_codeact_env.py
+++ b/src/envs/coding_env/server/python_codeact_env.py
@@ -79,24 +79,37 @@ def reset(self) -> Observation:
 
         return self._apply_transform(observation)
 
-    def step(self, action: Action) -> Observation:
+    def step(self, action: Action, timeout_s: float | None = None) -> Observation:
         """
         Execute code action and return observation.
 
         Args:
             action: CodeAction containing the code to execute
+            timeout_s: Maximum execution time in seconds. If None, uses default timeout (60s).
+                      If code exceeds timeout, execution is terminated with a timeout error.
 
         Returns:
             CodeObservation with execution results (stdout, stderr, exit_code)
 
         Raises:
             ValueError: If action is not a CodeAction instance
+
+        Example:
+            >>> env = PythonCodeActEnv()
+            >>> env.reset()
+            >>> action = CodeAction(code="print('Hello')")
+            >>> obs = env.step(action, timeout_s=30.0)  # 30 second timeout
+            >>> print(obs.stdout)  # "Hello\n"
         """
         if not isinstance(action, CodeAction):
             raise ValueError(f"Expected CodeAction, got {type(action)}")
 
-        # Execute the code using PyExecutor
-        result = self._executor.run(action.code)
+        # Use default timeout if none provided (60 seconds is reasonable for most code)
+        if timeout_s is None:
+            timeout_s = 60.0
+
+        # Execute the code using PyExecutor with timeout protection
+        result = self._executor.run(action.code, timeout_s=timeout_s)
 
         # Update state
         self._state.step_count += 1
diff --git a/test_log b/test_log
deleted file mode 100644
index efceabda..00000000
--- a/test_log
+++ /dev/null
@@ -1,10 +0,0 @@
-Traceback (most recent call last):
-  File "/home/kaiwu/work/kaiwu/OpenEnv/test_openenv.py", line 7, in <module>
-    base_url = provider.start_container("coding-env:latest")
-               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/kaiwu/work/kaiwu/OpenEnv/src/core/containers/runtime/providers.py", line 356, in start_container
-    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-  File "/home/kaiwu/.conda/envs/forge/lib/python3.12/subprocess.py", line 571, in run
-    raise CalledProcessError(retcode, process.args,
-subprocess.CalledProcessError: Command '['podman', 'run', '-d', '--name', 'coding-env-1761861983029', '-p', '127.0.0.1:38989:8000', 'coding-env:latest']' returned non-zero exit status 126.
diff --git a/test_openenv.py b/test_openenv.py
index 468452a7..a43df59f 100644
--- a/test_openenv.py
+++ b/test_openenv.py
@@ -1,11 +1,23 @@
-from _thread import exit
+import socket
 
 from envs.coding_env import CodeAction, CodingEnv
 from openenv_core.containers.runtime import LocalDockerProvider
 
+def find_available_port():
+    """Find an available port on the host."""
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        s.listen(1)
+        port = s.getsockname()[1]
+    return port
+
+# Find an available port to avoid conflicts with existing services
+port = find_available_port()
+print(f"Using port: {port}")
+
 provider = LocalDockerProvider()
-base_url = provider.start_container("coding-env:latest")
-print(base_url)  # http://127.0.0.1:<port>
+base_url = provider.start_container("coding-env:latest", port=port)
+print(f"Container started at: {base_url}")
 
 # Wait for the server to be ready before creating the client
 provider.wait_for_ready(base_url, timeout_s=100)

From 91865d6b668f5826dce8203008320b7d1691c598 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Fri, 31 Oct 2025 09:18:55 -0700
Subject: [PATCH 03/10] timeout added

---
 src/core/http_env_client.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/core/http_env_client.py b/src/core/http_env_client.py
index b304e088..4807ca96 100644
--- a/src/core/http_env_client.py
+++ b/src/core/http_env_client.py
@@ -46,6 +46,8 @@ def from_docker_image(
         cls: Type[EnvClientT],
         image: str,
         provider: Optional["ContainerProvider"] = None,
+        timeout_s: float = 120.0,
+        request_timeout_s: float = 15.0,
         **kwargs: Any,
     ) -> EnvClientT:
         """
@@ -62,6 +64,8 @@ def from_docker_image(
         Args:
             image: Docker image name to run (e.g., "echo-env:latest")
             provider: Container provider to use (defaults to LocalDockerProvider)
+            timeout_s: Maximum time to wait for container to become ready (default: 120 seconds)
+            request_timeout_s: Timeout for HTTP requests to the environment (default: 15 seconds)
             **kwargs: Additional arguments to pass to provider.start_container()
                      (e.g., env_vars, port)
 
@@ -75,9 +79,11 @@ def from_docker_image(
             >>> # Create environment from image
             >>> env = CodingEnv.from_docker_image("coding-env:latest")
             >>>
-            >>> # Create environment with custom env vars
+            >>> # Create environment with custom env vars and timeouts
             >>> env = CodingEnv.from_docker_image(
             ...     "coding-env:latest",
+            ...     timeout_s=180.0,
+            ...     request_timeout_s=120.0,
             ...     env_vars={"MY_VAR": "value"}
             ... )
             >>>
@@ -99,11 +105,11 @@ def from_docker_image(
         # 1. Start container with optional kwargs (e.g., env_vars, port)
         base_url = provider.start_container(image, **kwargs)
 
-        # 2. Wait for server to be ready
-        provider.wait_for_ready(base_url)
+        # 2. Wait for server to be ready with configured timeout
+        provider.wait_for_ready(base_url, timeout_s=timeout_s)
 
-        # 3. Create and return client instance with provider reference
-        return cls(base_url=base_url, provider=provider)
+        # 3. Create and return client instance with provider reference and request timeout
+        return cls(base_url=base_url, request_timeout_s=request_timeout_s, provider=provider)
 
     @abstractmethod
     def _step_payload(self, action: ActT) -> dict:

From abc8f509d8b40aad409a912f7d76565af98c812b Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Sat, 1 Nov 2025 13:46:44 -0700
Subject: [PATCH 04/10] handle timeout

---
 src/core/containers/runtime/providers.py | 31 ++++++++++++++++++++----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/core/containers/runtime/providers.py b/src/core/containers/runtime/providers.py
index d7dc0c8b..de762037 100644
--- a/src/core/containers/runtime/providers.py
+++ b/src/core/containers/runtime/providers.py
@@ -243,23 +243,44 @@ def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
         """
         import time
         import requests
+        import subprocess
+        import logging
 
         start_time = time.time()
         health_url = f"{base_url}/health"
+        last_error = None
 
         while time.time() - start_time < timeout_s:
             try:
                 response = requests.get(health_url, timeout=2.0)
                 if response.status_code == 200:
                     return
-            except requests.RequestException:
-                pass
+            except requests.RequestException as e:
+                last_error = str(e)
 
             time.sleep(0.5)
 
-        raise TimeoutError(
-            f"Container at {base_url} did not become ready within {timeout_s}s"
-        )
+        # If we timeout, provide diagnostic information
+        error_msg = f"Container at {base_url} did not become ready within {timeout_s}s"
+        
+        if self._container_id:
+            try:
+                # Get container logs to help diagnose the issue
+                result = subprocess.run(
+                    ["docker", "logs", "--tail", "50", self._container_id],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if result.stdout or result.stderr:
+                    error_msg += f"\n\nContainer logs (last 50 lines):\n{result.stdout}\n{result.stderr}"
+            except Exception:
+                pass
+
+        if last_error:
+            error_msg += f"\n\nLast connection error: {last_error}"
+
+        raise TimeoutError(error_msg)
 
     def _find_available_port(self) -> int:
         """

From d3291c1fd6e6bf784d6e6c0aa0ef6ab6f020318d Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Sat, 1 Nov 2025 13:54:39 -0700
Subject: [PATCH 05/10] clean up

---
 PODMAN_SOLUTION.md                       | 136 --------------
 src/core/containers/runtime/__init__.py  |   3 +-
 src/core/containers/runtime/providers.py | 227 -----------------------
 test_custom_port.py                      |  27 ---
 test_docker_host_network.py              |  27 ---
 test_openenv.py                          |  53 ------
 6 files changed, 1 insertion(+), 472 deletions(-)
 delete mode 100644 PODMAN_SOLUTION.md
 delete mode 100644 test_custom_port.py
 delete mode 100644 test_docker_host_network.py
 delete mode 100644 test_openenv.py

diff --git a/PODMAN_SOLUTION.md b/PODMAN_SOLUTION.md
deleted file mode 100644
index f7d5a448..00000000
--- a/PODMAN_SOLUTION.md
+++ /dev/null
@@ -1,136 +0,0 @@
-# Podman Networking Issue - Solution Summary
-
-## Problem
-When using `podman-docker` (podman emulating Docker), the container provider was failing with timeout errors:
-```
-TimeoutError: Container at http://localhost:63915 did not become ready within 100s
-```
-
-### Root Cause Analysis
-1. **IPv6 Connection Reset**: When connecting to `localhost`, curl was trying IPv6 (`::1`) and getting "Connection reset by peer"
-2. **IPv4 Connection Refused**: When connecting to `127.0.0.1`, curl was getting "Connection refused"
-3. **Podman Networking Issue**: Rootless podman has known networking issues with port forwarding using `pasta` or `slirp4netns`
-
-```bash
-# IPv6 attempt
-$ curl http://localhost:63915/health
-* Connected to localhost (::1) port 63915
-* Recv failure: Connection reset by peer
-
-# IPv4 attempt
-$ curl http://127.0.0.1:63915/health
-* Failed to connect to 127.0.0.1 port 63915: Connection refused
-```
-
-## Solution: PodmanProvider with Host Networking
-
-Created a dedicated `PodmanProvider` class that uses native podman commands with `--network=host` to bypass port forwarding issues.
-
-### Key Implementation Details
-
-**File**: `/home/kaiwu/work/kaiwu/OpenEnv/src/core/containers/runtime/providers.py`
-
-```python
-class PodmanProvider(ContainerProvider):
-    """
-    Container provider for Podman.
-
-    Uses host networking to avoid rootless podman port forwarding issues.
-    Container binds directly to port 8000 on the host.
-    """
-
-    def start_container(self, image: str, ...) -> str:
-        cmd = [
-            "podman", "run",
-            "-d",
-            "--name", self._container_name,
-            "--network", "host",  # Host networking bypasses port forwarding
-        ]
-        # Container binds directly to host port 8000
-        return "http://127.0.0.1:8000"
-```
-
-**File**: `/home/kaiwu/work/kaiwu/OpenEnv/src/core/containers/runtime/__init__.py`
-
-```python
-from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider, PodmanProvider
-
-__all__ = [
-    "ContainerProvider",
-    "LocalDockerProvider",
-    "PodmanProvider",  # Now exported
-    "KubernetesProvider",
-]
-```
-
-### Usage
-
-**File**: `/home/kaiwu/work/kaiwu/OpenEnv/test_openenv.py`
-
-```python
-from openenv_core.containers.runtime import PodmanProvider
-
-# Use PodmanProvider instead of LocalDockerProvider
-provider = PodmanProvider()
-base_url = provider.start_container("coding-env:latest")
-print(base_url)  # http://127.0.0.1:8000
-
-provider.wait_for_ready(base_url, timeout_s=100)
-coding_env = CodingEnv(base_url=base_url, provider=provider)
-```
-
-## Test Results
-
-```bash
-$ python test_openenv.py
-http://127.0.0.1:8000
-Reset complete: exit_code=0
-Code: print('Hello, World!')
-  → stdout: Hello, World!
-  → exit_code: 0
-Code: x = 5 + 3
-print(f'Result: {x}')
-  → stdout: Result: 8
-  → exit_code: 0
-Code: import math
-print(math.pi)
-  → stdout: 3.141592653589793
-  → exit_code: 0
-```
-
-✅ **All tests passed successfully!**
-
-## Trade-offs and Limitations
-
-### Host Networking Mode
-- **Pro**: Bypasses all rootless podman networking issues
-- **Pro**: Direct port access (no port forwarding overhead)
-- **Con**: Container always uses port 8000 (no dynamic port allocation)
-- **Con**: Can only run one container at a time on the same host
-
-### Alternative Approaches Considered
-
-1. **Use explicit IPv4 binding** (`127.0.0.1:port:8000`) - ❌ Failed with pasta error
-2. **Use default port mapping** (`port:8000`) - ❌ Same networking issues
-3. **Run with root privileges** - ❌ Security concern
-4. **Switch to slirp4netns** - ⚠️ More complex, might still have issues
-
-## Recommendations
-
-1. **For local development**: Use `PodmanProvider` - it's simple and reliable
-2. **For CI/CD**: Consider using actual Docker or running podman with root
-3. **For production**: Use `KubernetesProvider` or cloud-based container services
-
-## When to Use Which Provider
-
-| Provider | Use Case | Networking Mode |
-|----------|----------|----------------|
-| `LocalDockerProvider` | Docker installed | Port forwarding |
-| `PodmanProvider` | Rootless podman | Host networking |
-| `KubernetesProvider` | K8s cluster | Service/Ingress |
-
-## Future Improvements
-
-1. Consider adding a `--rootful` option for `PodmanProvider` to enable port forwarding
-2. Add dynamic port support by overriding uvicorn command with custom port
-3. Create comprehensive documentation on container runtime selection
diff --git a/src/core/containers/runtime/__init__.py b/src/core/containers/runtime/__init__.py
index 1cd25562..0be889ba 100644
--- a/src/core/containers/runtime/__init__.py
+++ b/src/core/containers/runtime/__init__.py
@@ -6,11 +6,10 @@
 
 """Container runtime providers."""
 
-from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider, PodmanProvider
+from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider
 
 __all__ = [
     "ContainerProvider",
     "LocalDockerProvider",
-    "PodmanProvider",
     "KubernetesProvider",
 ]
diff --git a/src/core/containers/runtime/providers.py b/src/core/containers/runtime/providers.py
index 5f6150bf..527a8767 100644
--- a/src/core/containers/runtime/providers.py
+++ b/src/core/containers/runtime/providers.py
@@ -338,233 +338,6 @@ def _infer_app_module(self, image: str) -> Optional[str]:
         return env_module_map.get(clean_image)
 
 
-class PodmanProvider(ContainerProvider):
-    """
-    Container provider for Podman.
-
-    This provider runs containers using native Podman commands, which avoids
-    the networking issues that can occur with podman-docker emulation.
-
-    Example:
-        >>> provider = PodmanProvider()
-        >>> base_url = provider.start_container("echo-env:latest")
-        >>> # Container running on http://localhost:<random-port>
-        >>> provider.stop_container()
-    """
-
-    def __init__(self):
-        """Initialize the Podman provider."""
-        self._container_id: Optional[str] = None
-        self._container_name: Optional[str] = None
-
-        # Check if Podman is available
-        import subprocess
-
-        try:
-            subprocess.run(
-                ["podman", "version"],
-                check=True,
-                capture_output=True,
-                timeout=5,
-            )
-        except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
-            raise RuntimeError(
-                "Podman is not available. Please install Podman."
-            )
-
-    def start_container(
-        self,
-        image: str,
-        port: Optional[int] = None,
-        env_vars: Optional[Dict[str, str]] = None,
-        **kwargs: Any,
-    ) -> str:
-        """
-        Start a Podman container.
-
-        Args:
-            image: Container image name
-            port: Port to expose (if None, uses 8000)
-            env_vars: Environment variables for the container
-            **kwargs: Additional Podman run options
-                - command_override: List of command args to override container CMD
-
-        Returns:
-            Base URL to connect to the container
-        """
-        import subprocess
-        import time
-
-        # Use default port if not specified
-        if port is None:
-            port = 8000
-
-        # Generate container name
-        self._container_name = self._generate_container_name(image)
-
-        # Build podman run command
-        # Use host networking to avoid rootless podman port forwarding issues
-        cmd = [
-            "podman", "run",
-            "-d",  # Detached
-            "--name", self._container_name,
-            "--network", "host",  # Use host network to avoid port forwarding issues
-        ]
-
-        # Add environment variables
-        if env_vars:
-            for key, value in env_vars.items():
-                cmd.extend(["-e", f"{key}={value}"])
-
-        # Add image
-        cmd.append(image)
-        
-        # Add command override if provided (to change port)
-        if "command_override" in kwargs:
-            cmd.extend(kwargs["command_override"])
-        elif port != 8000:
-            # Infer app path from image name for common environments
-            app_module = self._infer_app_module(image)
-            if app_module:
-                cmd.extend([
-                    "uvicorn",
-                    app_module,
-                    "--host", "0.0.0.0",
-                    "--port", str(port)
-                ])
-
-        # Run container
-        try:
-            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-            self._container_id = result.stdout.strip()
-        except subprocess.CalledProcessError as e:
-            error_msg = f"Failed to start container: {e}\nstdout: {e.stdout}\nstderr: {e.stderr}"
-            raise RuntimeError(error_msg) from e
-
-        # Wait a moment for container to start
-        time.sleep(1)
-
-        base_url = f"http://127.0.0.1:{port}"
-        return base_url
-
-    def stop_container(self) -> None:
-        """
-        Stop and remove the Podman container.
-        """
-        if self._container_id is None:
-            return
-
-        import subprocess
-
-        try:
-            # Stop container
-            subprocess.run(
-                ["podman", "stop", self._container_id],
-                capture_output=True,
-                check=True,
-                timeout=10,
-            )
-
-            # Remove container
-            subprocess.run(
-                ["podman", "rm", self._container_id],
-                capture_output=True,
-                check=True,
-                timeout=10,
-            )
-        except subprocess.CalledProcessError:
-            # Container might already be stopped/removed
-            pass
-        finally:
-            self._container_id = None
-            self._container_name = None
-
-    def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
-        """
-        Wait for container to be ready by polling /health endpoint.
-
-        Args:
-            base_url: Base URL of the container
-            timeout_s: Maximum time to wait
-
-        Raises:
-            TimeoutError: If container doesn't become ready
-        """
-        import time
-        import requests
-
-        start_time = time.time()
-        health_url = f"{base_url}/health"
-
-        while time.time() - start_time < timeout_s:
-            try:
-                response = requests.get(health_url, timeout=2.0)
-                if response.status_code == 200:
-                    return
-            except requests.RequestException:
-                pass
-
-            time.sleep(0.5)
-
-        raise TimeoutError(
-            f"Container at {base_url} did not become ready within {timeout_s}s"
-        )
-
-    def _find_available_port(self) -> int:
-        """
-        Find an available port on localhost.
-
-        Returns:
-            An available port number
-        """
-        import socket
-
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            s.bind(("", 0))
-            s.listen(1)
-            port = s.getsockname()[1]
-        return port
-
-    def _generate_container_name(self, image: str) -> str:
-        """
-        Generate a unique container name based on image name and timestamp.
-
-        Args:
-            image: Container image name
-
-        Returns:
-            A unique container name
-        """
-        import time
-
-        clean_image = image.split("/")[-1].split(":")[0]
-        timestamp = int(time.time() * 1000)
-        return f"{clean_image}-{timestamp}"
-
-    def _infer_app_module(self, image: str) -> Optional[str]:
-        """
-        Infer the uvicorn app module path from the image name.
-
-        Args:
-            image: Container image name
-
-        Returns:
-            App module path like "envs.coding_env.server.app:app" or None
-        """
-        clean_image = image.split("/")[-1].split(":")[0]
-        
-        # Map common environment names to their app modules
-        env_module_map = {
-            "coding-env": "envs.coding_env.server.app:app",
-            "echo-env": "envs.echo_env.server.app:app",
-            "git-env": "envs.git_env.server.app:app",
-            "openspiel-env": "envs.openspiel_env.server.app:app",
-            "sumo-rl-env": "envs.sumo_rl_env.server.app:app",
-            "finrl-env": "envs.finrl_env.server.app:app",
-        }
-        
-        return env_module_map.get(clean_image)
-
 
 class KubernetesProvider(ContainerProvider):
     """
diff --git a/test_custom_port.py b/test_custom_port.py
deleted file mode 100644
index 53d1f669..00000000
--- a/test_custom_port.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from envs.coding_env import CodeAction, CodingEnv
-from openenv_core.containers.runtime import PodmanProvider
-
-provider = PodmanProvider()
-
-# Test with custom port 9000
-base_url = provider.start_container("coding-env:latest", port=9000)
-print(f"Container started at: {base_url}")
-
-# Wait for the server to be ready
-provider.wait_for_ready(base_url, timeout_s=30)
-print("✅ Container is ready!")
-
-# Create environment
-coding_env = CodingEnv(base_url=base_url, provider=provider)
-
-# Test with simple code
-result = coding_env.reset()
-print(f"Reset: exit_code={result.observation.exit_code}")
-
-result = coding_env.step(CodeAction(code="print('Testing custom port 9000!')"))
-print(f"Output: {result.observation.stdout.strip()}")
-
-# Cleanup
-coding_env.close()
-provider.stop_container()
-print("✅ Test complete!")
diff --git a/test_docker_host_network.py b/test_docker_host_network.py
deleted file mode 100644
index c5bcd006..00000000
--- a/test_docker_host_network.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from envs.coding_env import CodeAction, CodingEnv
-from openenv_core.containers.runtime import LocalDockerProvider
-
-provider = LocalDockerProvider()
-
-# Test with default port 8000
-base_url = provider.start_container("coding-env:latest")
-print(f"Container started at: {base_url}")
-
-# Wait for the server to be ready
-provider.wait_for_ready(base_url, timeout_s=30)
-print("✅ Container is ready!")
-
-# Create environment
-coding_env = CodingEnv(base_url=base_url, provider=provider)
-
-# Test with simple code
-result = coding_env.reset()
-print(f"Reset: exit_code={result.observation.exit_code}")
-
-result = coding_env.step(CodeAction(code="print('Docker with host network!')"))
-print(f"Output: {result.observation.stdout.strip()}")
-
-# Cleanup
-coding_env.close()
-provider.stop_container()
-print("✅ Docker host network test complete!")
diff --git a/test_openenv.py b/test_openenv.py
deleted file mode 100644
index a43df59f..00000000
--- a/test_openenv.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import socket
-
-from envs.coding_env import CodeAction, CodingEnv
-from openenv_core.containers.runtime import LocalDockerProvider
-
-def find_available_port():
-    """Find an available port on the host."""
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(("", 0))
-        s.listen(1)
-        port = s.getsockname()[1]
-    return port
-
-# Find an available port to avoid conflicts with existing services
-port = find_available_port()
-print(f"Using port: {port}")
-
-provider = LocalDockerProvider()
-base_url = provider.start_container("coding-env:latest", port=port)
-print(f"Container started at: {base_url}")
-
-# Wait for the server to be ready before creating the client
-provider.wait_for_ready(base_url, timeout_s=100)
-
-# Use the environment via base_url
-# provider.stop_container()
-try:
-    # Create environment from Docker image
-    # coding_env = CodingEnv.from_docker_image("coding-env:latest")
-    coding_env = CodingEnv(base_url=base_url, provider=provider)
-    # Reset
-    result = coding_env.reset()
-    print(f"Reset complete: exit_code={result.observation.exit_code}")
-
-    # Execute Python code
-    code_samples = [
-        "print('Hello, World!')",
-        "x = 5 + 3\nprint(f'Result: {x}')",
-        "import math\nprint(math.pi)",
-    ]
-
-    for code in code_samples:
-        result = coding_env.step(CodeAction(code=code))
-        print(f"Code: {code}")
-        print(f"  → stdout: {result.observation.stdout.strip()}")
-        print(f"  → exit_code: {result.observation.exit_code}")
-
-except Exception as e:
-    print(f"Error: {e}")
-
-# Always clean up
-coding_env.close()
-provider.stop_container()

From ba8e92e05ea573bde2813dd10067787a0e5ec0e1 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Sat, 1 Nov 2025 16:18:54 -0700
Subject: [PATCH 06/10] add -e port

---
 src/core/containers/runtime/providers.py | 60 ++++++++++++++++--------
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/core/containers/runtime/providers.py b/src/core/containers/runtime/providers.py
index 527a8767..f6f4ccad 100644
--- a/src/core/containers/runtime/providers.py
+++ b/src/core/containers/runtime/providers.py
@@ -145,6 +145,9 @@ def start_container(
         """
         import subprocess
         import time
+        import logging
+
+        logger = logging.getLogger(__name__)
 
         # Use default port if not specified
         if port is None:
@@ -155,6 +158,7 @@ def start_container(
 
         # Build docker run command
         # Use host networking for better performance and consistency with podman
+        # NOTE: Do NOT use --rm initially - if container fails to start, we need logs
         cmd = [
             "docker", "run",
             "-d",  # Detached
@@ -167,27 +171,24 @@ def start_container(
             for key, value in env_vars.items():
                 cmd.extend(["-e", f"{key}={value}"])
 
+        # Pass custom port via environment variable instead of overriding command
+        # This allows the container to use its proper entrypoint/CMD
+        if port != 8000:
+            cmd.extend(["-e", f"PORT={port}"])
+
         # Add image
         cmd.append(image)
           
-        # Add command override if provided (to change port)
+        # Add command override if provided (explicit override by user)
         if "command_override" in kwargs:
             cmd.extend(kwargs["command_override"])
-        elif port != 8000:
-            # Infer app path from image name for common environments
-            app_module = self._infer_app_module(image)
-            if app_module:
-                cmd.extend([
-                    "uvicorn",
-                    app_module,
-                    "--host", "0.0.0.0",
-                    "--port", str(port)
-                ])
 
         # Run container
         try:
+            logger.debug(f"Starting container with command: {' '.join(cmd)}")
             result = subprocess.run(cmd, capture_output=True, text=True, check=True)
             self._container_id = result.stdout.strip()
+            logger.debug(f"Container started with ID: {self._container_id}")
         except subprocess.CalledProcessError as e:
             error_msg = f"Failed to start Docker container.\nCommand: {' '.join(cmd)}\nExit code: {e.returncode}\nStderr: {e.stderr}\nStdout: {e.stdout}"
             raise RuntimeError(error_msg) from e
@@ -262,20 +263,41 @@ def wait_for_ready(self, base_url: str, timeout_s: float = 30.0) -> None:
 
         # If we timeout, provide diagnostic information
         error_msg = f"Container at {base_url} did not become ready within {timeout_s}s"
-        
+          
         if self._container_id:
             try:
-                # Get container logs to help diagnose the issue
-                result = subprocess.run(
-                    ["docker", "logs", "--tail", "50", self._container_id],
+                # First check if container exists
+                inspect_result = subprocess.run(
+                    ["docker", "inspect", self._container_id],
                     capture_output=True,
                     text=True,
                     timeout=5,
                 )
-                if result.stdout or result.stderr:
-                    error_msg += f"\n\nContainer logs (last 50 lines):\n{result.stdout}\n{result.stderr}"
-            except Exception:
-                pass
+                  
+                if inspect_result.returncode != 0:
+                    # Container doesn't exist - likely exited and auto-removed due to --rm flag
+                    error_msg += f"\n\nContainer was auto-removed (likely exited immediately)."
+                    error_msg += f"\nThis typically means:"
+                    error_msg += f"\n  1. The container image has an error in its startup script"
+                    error_msg += f"\n  2. Required dependencies are missing in the container"
+                    error_msg += f"\n  3. Port {base_url.split(':')[-1]} might be in use by another process"
+                    error_msg += f"\n  4. Container command/entrypoint is misconfigured"
+                    error_msg += f"\nTry running the container manually to debug:"
+                    error_msg += f"\n  docker run -it --rm <IMAGE_NAME>"
+                else:
+                    # Container exists, try to get logs
+                    result = subprocess.run(
+                        ["docker", "logs", "--tail", "50", self._container_id],
+                        capture_output=True,
+                        text=True,
+                        timeout=5,
+                    )
+                    if result.stdout or result.stderr:
+                        error_msg += f"\n\nContainer logs (last 50 lines):\n{result.stdout}\n{result.stderr}"
+            except subprocess.TimeoutExpired:
+                error_msg += f"\n\nTimeout while trying to inspect container"
+            except Exception as e:
+                error_msg += f"\n\nFailed to get container diagnostics: {e}"
 
         if last_error:
             error_msg += f"\n\nLast connection error: {last_error}"

From d1ad7a78943b962937afba1b43e61364674abeb4 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Sat, 1 Nov 2025 16:34:17 -0700
Subject: [PATCH 07/10] take PORT env in Dockerfile

---
 src/envs/coding_env/server/Dockerfile | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/envs/coding_env/server/Dockerfile b/src/envs/coding_env/server/Dockerfile
index 7cf90d5d..5717e21e 100644
--- a/src/envs/coding_env/server/Dockerfile
+++ b/src/envs/coding_env/server/Dockerfile
@@ -21,9 +21,12 @@ COPY src/envs/coding_env/ /app/src/envs/coding_env/
 # Copy README for web interface documentation
 COPY src/envs/coding_env/README.md /app/README.md
 
-# Health check
+# Set default port (can be overridden via environment variable)
+ENV PORT=8000
+
+# Health check (uses PORT env variable)
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8000/health || exit 1
+    CMD curl -f http://localhost:${PORT}/health || exit 1
 
-# Run the FastAPI server
-CMD ["uvicorn", "envs.coding_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
+# Run the FastAPI server (uses PORT env variable)
+CMD sh -c "uvicorn envs.coding_env.server.app:app --host 0.0.0.0 --port ${PORT}"

From fa1e79b1c13de029ca997a0c795001c14640a7fb Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Sat, 1 Nov 2025 17:18:44 -0700
Subject: [PATCH 08/10] add numpy

---
 src/envs/coding_env/server/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/envs/coding_env/server/requirements.txt b/src/envs/coding_env/server/requirements.txt
index cf6769a0..cd90ed4a 100644
--- a/src/envs/coding_env/server/requirements.txt
+++ b/src/envs/coding_env/server/requirements.txt
@@ -1 +1,2 @@
 smolagents
+numpy

From be83fd7113416d0dad3a1e9e74a1625601b517f5 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Sat, 1 Nov 2025 21:35:24 -0700
Subject: [PATCH 09/10] add memory_gb limit

---
 src/core/containers/runtime/providers.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/core/containers/runtime/providers.py b/src/core/containers/runtime/providers.py
index f6f4ccad..957bb690 100644
--- a/src/core/containers/runtime/providers.py
+++ b/src/core/containers/runtime/providers.py
@@ -139,6 +139,7 @@ def start_container(
             env_vars: Environment variables for the container
             **kwargs: Additional Docker run options
                 - command_override: List of command args to override container CMD
+                - memory_gb: Memory limit in GB (default: 4GB)
 
         Returns:
             Base URL to connect to the container
@@ -153,6 +154,9 @@ def start_container(
         if port is None:
             port = 8000
 
+        # Use default memory limit if not specified
+        memory_gb = kwargs.get("memory_gb", 4)
+
         # Generate container name
         self._container_name = self._generate_container_name(image)
 
@@ -164,6 +168,9 @@ def start_container(
             "-d",  # Detached
             "--name", self._container_name,
             "--network", "host",  # Use host network
+            "--memory", f"{memory_gb}g",  # Limit container memory
+            "--memory-swap", f"{memory_gb}g",  # Prevent swap usage (set equal to --memory)
+            "--oom-kill-disable=false",  # Allow OOM killer (exit gracefully)
         ]
 
         # Add environment variables

From 85ea808ef1de34bdced590a8c2c259009b572f82 Mon Sep 17 00:00:00 2001
From: Kai Wu <kaiwu@meta.com>
Date: Tue, 4 Nov 2025 13:46:00 -0800
Subject: [PATCH 10/10] change python executor

---
 src/core/tools/local_python_executor.py | 177 +++++++++++++++++-------
 1 file changed, 130 insertions(+), 47 deletions(-)

diff --git a/src/core/tools/local_python_executor.py b/src/core/tools/local_python_executor.py
index e1f0145c..83759c09 100644
--- a/src/core/tools/local_python_executor.py
+++ b/src/core/tools/local_python_executor.py
@@ -13,6 +13,7 @@
 
 import multiprocessing
 import signal
+import threading
 from typing import Optional
 
 from smolagents import LocalPythonExecutor
@@ -25,6 +26,32 @@ def _timeout_handler(signum, frame):
     raise TimeoutError("Code execution timed out")
 
 
+def _run_with_timeout(executor, code: str, timeout_s: float, result_container: list):
+    """Helper function to run code execution in a separate process with timeout.
+    
+    Args:
+        executor: The LocalPythonExecutor instance
+        code: Code to execute
+        timeout_s: Timeout in seconds
+        result_container: List to store the result (mutated in place)
+    """
+    try:
+        exec_result = executor(code)
+        result_container.append({
+            'success': True,
+            'stdout': exec_result.logs,
+            'stderr': '',
+            'exit_code': 0
+        })
+    except Exception as e:
+        result_container.append({
+            'success': False,
+            'stdout': '',
+            'stderr': str(e),
+            'exit_code': 1
+        })
+
+
 class PyExecutor:
     """
     Wrapper around smolagents LocalPythonExecutor for executing Python code.
@@ -94,59 +121,115 @@ def run(self, code: str, timeout_s: Optional[float] = None) -> CodeExecResult:
             >>> print(result.exit_code)  # 1
             >>> print("timeout" in result.stderr.lower())  # True
         """
+        # Use proper multiprocessing-based timeout for subprocess protection
+        if timeout_s is not None and timeout_s > 0:
+            return self._run_with_process_timeout(code, timeout_s)
+        
+        # No timeout - run directly
         try:
-            # Set up timeout using signal (Unix/Linux only)
-            old_handler = None
-            if timeout_s is not None and timeout_s > 0:
-                try:
-                    # Set alarm signal handler for timeout
-                    old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
-                    signal.alarm(int(timeout_s))
-                except (ValueError, AttributeError):
-                    # signal.alarm is not available on Windows
-                    # Fall back to no timeout on Windows
-                    pass
-
-            try:
-                # Execute the code using LocalPythonExecutor
-                # LocalPythonExecutor returns a CodeOutput object with output, logs, is_final_answer
-                exec_result = self._executor(code)
-
-                # Extract the logs (which contain print outputs) as stdout
-                # The output field contains the return value of the code
-                stdout = exec_result.logs
-                stderr = ""
-                exit_code = 0  # Success
-
-                return CodeExecResult(
-                    stdout=stdout,
-                    stderr=stderr,
-                    exit_code=exit_code,
-                )
-            finally:
-                # Cancel the alarm and restore old handler
-                if timeout_s is not None and timeout_s > 0:
-                    try:
-                        signal.alarm(0)
-                        if old_handler is not None:
-                            signal.signal(signal.SIGALRM, old_handler)
-                    except (ValueError, AttributeError):
-                        pass
-
-        except TimeoutError as e:
-            # Code execution exceeded timeout
+            exec_result = self._executor(code)
+            return CodeExecResult(
+                stdout=exec_result.logs,
+                stderr="",
+                exit_code=0,
+            )
+        except Exception as e:
             return CodeExecResult(
                 stdout="",
-                stderr=f"Code execution timed out after {timeout_s} seconds. "
-                       f"Possible infinite loop or extremely long computation.",
-                exit_code=1,  # Non-zero indicates error
+                stderr=str(e),
+                exit_code=1,
             )
 
+    def _run_with_process_timeout(self, code: str, timeout_s: float) -> CodeExecResult:
+        """Execute code with proper subprocess timeout protection using multiprocessing.
+        
+        This method uses multiprocessing.Process to isolate code execution and
+        ensures the process is properly terminated if it exceeds the timeout.
+        
+        Args:
+            code: Python code to execute
+            timeout_s: Timeout in seconds
+            
+        Returns:
+            CodeExecResult with execution results or timeout error
+        """
+        # Use a Manager to share results between processes
+        manager = multiprocessing.Manager()
+        result_container = manager.list()
+        
+        # Create a process to run the code
+        process = multiprocessing.Process(
+            target=_run_with_timeout,
+            args=(self._executor, code, timeout_s, result_container)
+        )
+        
+        try:
+            # Start the process
+            process.start()
+            
+            # Wait for completion with timeout
+            process.join(timeout=timeout_s)
+            
+            # Check if process completed
+            if process.is_alive():
+                # CRITICAL: Process exceeded timeout - KILL IT!
+                print(f"WARNING: Code execution timed out after {timeout_s}s, terminating process {process.pid}")
+                process.terminate()  # Send SIGTERM
+                process.join(timeout=2)  # Wait up to 2s for graceful shutdown
+                
+                if process.is_alive():
+                    # Still alive - force kill
+                    print(f"WARNING: Process {process.pid} did not terminate, force killing")
+                    process.kill()  # Send SIGKILL
+                    process.join(timeout=1)
+                
+                return CodeExecResult(
+                    stdout="",
+                    stderr=f"Code execution timed out after {timeout_s} seconds. "
+                           f"Process was terminated. Possible infinite loop or extremely long computation.",
+                    exit_code=1,
+                )
+            
+            # Process completed - check results
+            if result_container:
+                result = result_container[0]
+                if result['success']:
+                    return CodeExecResult(
+                        stdout=result['stdout'],
+                        stderr=result['stderr'],
+                        exit_code=result['exit_code'],
+                    )
+                else:
+                    return CodeExecResult(
+                        stdout=result['stdout'],
+                        stderr=result['stderr'],
+                        exit_code=result['exit_code'],
+                    )
+            else:
+                # Process completed but no result - something went wrong
+                return CodeExecResult(
+                    stdout="",
+                    stderr="Code execution completed but produced no output",
+                    exit_code=1,
+                )
+                
         except Exception as e:
-            # LocalPythonExecutor raises InterpreterError for various issues
-            # (syntax errors, forbidden operations, runtime errors, etc.)
+            # Clean up process on exception
+            if process.is_alive():
+                process.terminate()
+                process.join(timeout=1)
+                if process.is_alive():
+                    process.kill()
+            
             return CodeExecResult(
                 stdout="",
-                stderr=str(e),
-                exit_code=1,  # Non-zero indicates error
+                stderr=f"Error during code execution: {str(e)}",
+                exit_code=1,
             )
+        finally:
+            # Ensure process is cleaned up
+            if process.is_alive():
+                process.terminate()
+                process.join(timeout=1)
+                if process.is_alive():
+                    process.kill()