Skip to content

Commit f12205f

Browse files
committed
Add boiler plate code for CodingEnv
1 parent f0e52bb commit f12205f

File tree

3 files changed

+103
-2
lines changed

3 files changed

+103
-2
lines changed

src/core/types.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@ class StepResult(Generic[ObsT]):
1515
observation: The environment's observation after the action.
1616
reward: Scalar reward for this step (optional).
1717
done: Whether the episode is finished.
18-
info: Additional metadata (e.g. debug info, latency, etc.).
1918
"""
2019

2120
observation: ObsT
2221
reward: Optional[float] = None
2322
done: bool = False
24-
info: Optional[dict[str, Any]] = None

src/envs/coding_env/env.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
envs/coding_env/env.py
3+
--------------------------------
4+
Concrete environment implementation using the core BaseEnv.
5+
POC implementation runs code locally via subprocess that can be changed later.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import subprocess
11+
from typing import Optional
12+
13+
from core.base import BaseEnv
14+
from core.types import StepResult
15+
16+
from .models import CodeAction, CodeObservation
17+
18+
19+
class CodingEnv(BaseEnv[CodeAction, CodeObservation]):
20+
"""
21+
Minimal Coding Environment.
22+
23+
POC behavior:
24+
- reset(): returns a fresh, empty observation (no persistent state).
25+
- step(action): runs Python code with `python -c` and returns stdout/stderr/exit_code.
26+
27+
Future swap:
28+
Replace _run_code_locally() with a call to your Docker/gateway backend without
29+
changing the public API.
30+
"""
31+
32+
def __init__(
33+
self,
34+
default_timeout_s: float = 10.0,
35+
python_executable: str = "python",
36+
):
37+
"""
38+
Args:
39+
default_timeout_s: Max seconds to allow code execution before timing out.
40+
python_executable: Interpreter to run (e.g., "python3", a venv path, etc.).
41+
"""
42+
self._default_timeout_s = float(default_timeout_s)
43+
self._python = python_executable
44+
45+
# --- BaseEnv interface ---
46+
47+
def reset(self) -> CodeObservation:
48+
# No state to clear in this POC; return an initial observation.
49+
return CodeObservation(stdout="", stderr="", exit_code=0)
50+
51+
def step(self, action: CodeAction) -> StepResult[CodeObservation]:
52+
if not isinstance(action, CodeAction):
53+
raise TypeError(f"Expected CodeAction, got {type(action)!r}")
54+
55+
# TODO: replace dummy response with the call to the code executor inside the container
56+
obs, timed_out = CodeObservation(stderr="", stdout="", exit_code=0), False
57+
58+
# Simple reward heuristic: success and no stderr -> 1.0 else 0.0
59+
reward: Optional[float] = (
60+
1.0 if (obs.exit_code == 0 and not obs.stderr) else 0.0
61+
)
62+
63+
info = {
64+
"timed_out": timed_out,
65+
"interpreter": self._python,
66+
}
67+
68+
return StepResult(
69+
observation=obs,
70+
reward=reward,
71+
done=False, # Coding env is not episodic by default
72+
)

src/envs/coding_env/models.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""
2+
envs/coding_env/models.py
3+
--------------------------------
4+
Action/Observation types for the Coding environment.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from dataclasses import dataclass
10+
from typing import Any, Optional
11+
12+
13+
@dataclass
14+
class CodeAction:
15+
"""
16+
Represents a single code execution request.
17+
"""
18+
19+
code: str
20+
# Optional: future fields like 'lint': bool, 'timeout_s': float, etc.
21+
22+
23+
@dataclass
24+
class CodeObservation:
25+
"""
26+
Result of executing code in the environment.
27+
"""
28+
29+
stdout: str = ""
30+
stderr: str = ""
31+
exit_code: int = 0

0 commit comments

Comments
 (0)