Skip to content

Commit e9d92c9

Browse files
committed
Catch a common hallucination
1 parent 59409c2 commit e9d92c9

File tree

4 files changed

+29
-8
lines changed

4 files changed

+29
-8
lines changed

docs/server/usage.mdx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,17 @@ To control the server's behavior, send the following commands:
4141

4242
1. Stop execution:
4343
```json
44+
{"role": "user", "type": "command", "start": True},
4445
{"role": "user", "type": "command", "content": "stop"}
46+
{"role": "user", "type": "command", "end": True}
4547
```
4648
This stops all execution and message processing.
4749

4850
2. Execute code block:
4951
```json
52+
{"role": "user", "type": "command", "start": True},
5053
{"role": "user", "type": "command", "content": "go"}
54+
{"role": "user", "type": "command", "end": True}
5155
```
5256
This executes a generated code block and allows the agent to proceed.
5357

interpreter/core/async_core.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,8 @@ async def send_output():
367367
# TODO
368368
@router.post("/")
369369
async def post_input(payload: Dict[str, Any]):
370-
# This doesn't work, but something like this should exist
371-
query = payload.get("query")
372-
if not query:
373-
return {"error": "Query is required."}, 400
374370
try:
375-
async_interpreter.input.put(query)
371+
async_interpreter.input(payload)
376372
return {"status": "success"}
377373
except Exception as e:
378374
return {"error": str(e)}, 500

interpreter/core/respond.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import os
33
import re
4+
import time
45
import traceback
56

67
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
@@ -181,8 +182,8 @@ def respond(interpreter):
181182

182183
if code.replace("\n", "").replace(" ", "").startswith("{language:"):
183184
try:
184-
code = code.replace("language: ", "'language': ").replace(
185-
"code: ", "'code': "
185+
code = code.replace("language: ", '"language": ').replace(
186+
"code: ", '"code": '
186187
)
187188
code_dict = json.loads(code)
188189
if set(code_dict.keys()) == {"language", "code"}:
@@ -197,9 +198,19 @@ def respond(interpreter):
197198
except:
198199
pass
199200

200-
if language == "text" or language == "markdown":
201+
if (
202+
language == "text"
203+
or language == "markdown"
204+
or language == "plaintext"
205+
):
201206
# It does this sometimes just to take notes. Let it, it's useful.
202207
# In the future we should probably not detect this behavior as code at all.
208+
real_content = interpreter.messages[-1]["content"]
209+
interpreter.messages[-1] = {
210+
"role": "assistant",
211+
"type": "message",
212+
"content": f"```\n{real_content}\n```",
213+
}
203214
continue
204215

205216
# Is this language enabled/supported?

tests/test_interpreter.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,16 @@ def test_hallucinations():
5151
assert chunk.get("content") == "22"
5252
break
5353

54+
code = """{language: "python", code: "print('hello')" }"""
55+
56+
interpreter.messages = [
57+
{"role": "assistant", "type": "code", "format": "python", "content": code}
58+
]
59+
for chunk in interpreter._respond_and_store():
60+
if chunk.get("format") == "output":
61+
assert chunk.get("content").strip() == "hello"
62+
break
63+
5464

5565
@pytest.mark.skip(reason="Requires uvicorn, which we don't require by default")
5666
def test_server():

0 commit comments

Comments
 (0)