Skip to content

Commit 1af82ff

Browse files
authored
Merge pull request #165 from meta-pytorch/coding-example
[EXAMPLE] coding inference example
2 parents c633048 + ce31752 commit 1af82ff

File tree

1 file changed

+226
-0
lines changed

1 file changed

+226
-0
lines changed

examples/coding_env_inference.py

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
#!/usr/bin/env python3
2+
"""Solve a coding task with a hosted LLM via Hugging Face Inference.
3+
4+
This script mirrors ``textarena_wordle_inference.py`` but targets the Coding
5+
environment. It launches the CodingEnv Docker image locally and asks an
6+
OpenAI-compatible model served through Hugging Face's router to iteratively
7+
produce Python code until the task is solved.
8+
9+
Prerequisites
10+
-------------
11+
1. Build the Coding environment Docker image::
12+
13+
docker build \
14+
-f src/envs/coding_env/server/Dockerfile \
15+
-t coding-env:latest .
16+
17+
2. Set your Hugging Face token, or any other API key that is compatible with the OpenAI API:
18+
19+
export HF_TOKEN=your_token_here
20+
export API_KEY=your_api_key_here
21+
22+
3. Run the script::
23+
24+
python examples/coding_env_inference.py
25+
26+
The script keeps sending execution feedback to the model until it prints
27+
``Result: 338350`` or reaches the configured step limit.
28+
"""
29+
30+
from __future__ import annotations
31+
32+
import os
33+
import re
34+
from typing import List, Tuple
35+
36+
from openai import OpenAI
37+
38+
from envs.coding_env import CodeAction, CodingEnv
39+
40+
41+
# ---------------------------------------------------------------------------
42+
# Configuration
43+
# ---------------------------------------------------------------------------
44+
45+
API_BASE_URL = "https://router.huggingface.co/v1"
46+
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
47+
48+
MODEL = "openai/gpt-oss-120b:novita"
49+
MAX_STEPS = 5
50+
VERBOSE = True
51+
52+
CODING_TASK = (
53+
"Write Python code that prints the sum of squares of the integers from 1 "
54+
"to 100 inclusive. The final line must be exactly `Result: <value>` with "
55+
"the correct number substituted."
56+
)
57+
EXPECTED_SUBSTRING = "Result: 338350"
58+
59+
SYSTEM_PROMPT = (
60+
"You are an expert Python programmer. Respond with valid Python code that "
61+
"solves the user's task. Always wrap your final answer in a fenced code "
62+
"block starting with ```python. Provide a complete script that can be "
63+
"executed as-is, with no commentary outside the code block."
64+
)
65+
66+
67+
# ---------------------------------------------------------------------------
68+
# Helpers
69+
# ---------------------------------------------------------------------------
70+
71+
def extract_python_code(text: str) -> str:
72+
"""Extract the first Python code block from the model output."""
73+
74+
code_blocks = re.findall(
75+
r"```(?:python)?\s*(.*?)```",
76+
text,
77+
re.IGNORECASE | re.DOTALL,
78+
)
79+
if code_blocks:
80+
return code_blocks[0].strip()
81+
return text.strip()
82+
83+
84+
def format_feedback(
85+
step: int,
86+
stdout: str,
87+
stderr: str,
88+
exit_code: int,
89+
) -> str:
90+
"""Generate feedback text describing the previous execution."""
91+
92+
stdout_display = stdout if stdout.strip() else "<empty>"
93+
stderr_display = stderr if stderr.strip() else "<empty>"
94+
return (
95+
f"Execution feedback for step {step}:\n"
96+
f"exit_code={exit_code}\n"
97+
f"stdout:\n{stdout_display}\n"
98+
f"stderr:\n{stderr_display}\n"
99+
"If the task is not solved, return an improved Python script."
100+
)
101+
102+
103+
def build_initial_prompt(task: str) -> str:
104+
"""Construct the first user prompt for the coding task."""
105+
106+
return (
107+
"You must write Python code to satisfy the following task. "
108+
"When executed, your script should behave exactly as described.\n\n"
109+
f"Task:\n{task}\n\n"
110+
"Reply with the full script in a single ```python code block."
111+
)
112+
113+
114+
# ---------------------------------------------------------------------------
115+
# Gameplay
116+
# ---------------------------------------------------------------------------
117+
118+
def solve_coding_task(
119+
env: CodingEnv,
120+
client: OpenAI,
121+
) -> Tuple[bool, List[str]]:
122+
"""Iteratively ask the model for code until the task is solved."""
123+
124+
history = [
125+
{"role": "system", "content": SYSTEM_PROMPT},
126+
{"role": "user", "content": build_initial_prompt(CODING_TASK)},
127+
]
128+
129+
obs = env.reset().observation
130+
131+
transcripts: List[str] = []
132+
133+
for step in range(1, MAX_STEPS + 1):
134+
response = client.chat.completions.create(
135+
model=MODEL,
136+
messages=history,
137+
max_tokens=2048,
138+
temperature=0.2,
139+
)
140+
141+
assistant_message = response.choices[0].message.content.strip()
142+
history.append({"role": "assistant", "content": assistant_message})
143+
144+
code = extract_python_code(assistant_message)
145+
146+
if VERBOSE:
147+
print(f"\n🛠️ Step {step}: executing model-produced code")
148+
print(code)
149+
150+
result = env.step(CodeAction(code=code))
151+
obs = result.observation
152+
153+
transcripts.append(
154+
(
155+
f"Step {step} | exit_code={obs.exit_code}\n"
156+
f"stdout:\n{obs.stdout}\n"
157+
f"stderr:\n{obs.stderr}\n"
158+
)
159+
)
160+
161+
if VERBOSE:
162+
print(" ▶ exit_code:", obs.exit_code)
163+
if obs.stdout:
164+
print(" ▶ stdout:\n" + obs.stdout)
165+
if obs.stderr:
166+
print(" ▶ stderr:\n" + obs.stderr)
167+
168+
solved = obs.exit_code == 0 and EXPECTED_SUBSTRING in obs.stdout
169+
if solved:
170+
return True, transcripts
171+
172+
history.append(
173+
{
174+
"role": "user",
175+
"content": format_feedback(
176+
step,
177+
obs.stdout,
178+
obs.stderr,
179+
obs.exit_code,
180+
),
181+
}
182+
)
183+
184+
# Keep conversation history compact to avoid exceeding context limits
185+
if len(history) > 20:
186+
history = [history[0]] + history[-19:]
187+
188+
return False, transcripts
189+
190+
191+
# ---------------------------------------------------------------------------
192+
# Entrypoint
193+
# ---------------------------------------------------------------------------
194+
195+
def main() -> None:
196+
if not API_KEY:
197+
raise SystemExit(
198+
"HF_TOKEN (or API_KEY) must be set to query the model."
199+
)
200+
201+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
202+
203+
env = CodingEnv.from_docker_image(
204+
"coding-env:latest",
205+
ports={8000: 8000},
206+
)
207+
208+
try:
209+
success, transcripts = solve_coding_task(env, client)
210+
finally:
211+
env.close()
212+
213+
print(
214+
"\n✅ Session complete"
215+
if success
216+
else "\n⚠️ Session finished without solving the task"
217+
)
218+
print("--- Execution transcripts ---")
219+
for entry in transcripts:
220+
print(entry)
221+
222+
223+
if __name__ == "__main__":
224+
main()
225+
226+

0 commit comments

Comments
 (0)