Catch a common hallucination

KillianLucas · KillianLucas · commit e9d92c98513f · 2024-07-17T14:14:55.000-07:00
diff --git a/docs/server/usage.mdx b/docs/server/usage.mdx
@@ -41,13 +41,17 @@ To control the server's behavior, send the following commands:
 
 1. Stop execution:
    ```json
+   {"role": "user", "type": "command", "start": True},
    {"role": "user", "type": "command", "content": "stop"}
+   {"role": "user", "type": "command", "end": True}
    ```
    This stops all execution and message processing.
 
 2. Execute code block:
    ```json
+   {"role": "user", "type": "command", "start": True},
    {"role": "user", "type": "command", "content": "go"}
+   {"role": "user", "type": "command", "end": True}
    ```
    This executes a generated code block and allows the agent to proceed.
 
diff --git a/interpreter/core/async_core.py b/interpreter/core/async_core.py
@@ -367,12 +367,8 @@ async def send_output():
     # TODO
     @router.post("/")
     async def post_input(payload: Dict[str, Any]):
-        # This doesn't work, but something like this should exist
-        query = payload.get("query")
-        if not query:
-            return {"error": "Query is required."}, 400
         try:
-            async_interpreter.input.put(query)
+            async_interpreter.input(payload)
             return {"status": "success"}
         except Exception as e:
             return {"error": str(e)}, 500
diff --git a/interpreter/core/respond.py b/interpreter/core/respond.py
@@ -1,6 +1,7 @@
 import json
 import os
 import re
+import time
 import traceback
 
 os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
@@ -181,8 +182,8 @@ def respond(interpreter):
 
                 if code.replace("\n", "").replace(" ", "").startswith("{language:"):
                     try:
-                        code = code.replace("language: ", "'language': ").replace(
-                            "code: ", "'code': "
+                        code = code.replace("language: ", '"language": ').replace(
+                            "code: ", '"code": '
                         )
                         code_dict = json.loads(code)
                         if set(code_dict.keys()) == {"language", "code"}:
@@ -197,9 +198,19 @@ def respond(interpreter):
                     except:
                         pass
 
-                if language == "text" or language == "markdown":
+                if (
+                    language == "text"
+                    or language == "markdown"
+                    or language == "plaintext"
+                ):
                     # It does this sometimes just to take notes. Let it, it's useful.
                     # In the future we should probably not detect this behavior as code at all.
+                    real_content = interpreter.messages[-1]["content"]
+                    interpreter.messages[-1] = {
+                        "role": "assistant",
+                        "type": "message",
+                        "content": f"```\n{real_content}\n```",
+                    }
                     continue
 
                 # Is this language enabled/supported?
diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py
@@ -51,6 +51,16 @@ def test_hallucinations():
             assert chunk.get("content") == "22"
             break
 
+    code = """{language: "python", code: "print('hello')" }"""
+
+    interpreter.messages = [
+        {"role": "assistant", "type": "code", "format": "python", "content": code}
+    ]
+    for chunk in interpreter._respond_and_store():
+        if chunk.get("format") == "output":
+            assert chunk.get("content").strip() == "hello"
+            break
+
 
 @pytest.mark.skip(reason="Requires uvicorn, which we don't require by default")
 def test_server():