Fix tool schema and add relevant tests; Add bash implmentations (#4)

xingyaoww · openhands-agent · web-flow · commit 466708039e9d · 2025-08-24T23:01:57.000+08:00
Co-authored-by: openhands &lt;openhands@all-hands.dev&gt;
diff --git a/openhands/core/runtime/schema.py b/openhands/core/runtime/schema.py
@@ -24,6 +24,65 @@ def py_type(spec: dict[str, Any]) -> Any:
     return Any
 
 
+def _process_schema_node(node, defs):
+    """Recursively process a schema node to simplify and resolve $ref.
+
+    https://www.reddit.com/r/mcp/comments/1kjo9gt/toolinputschema_conversion_from_pydanticmodel/
+    https://gist.github.com/leandromoreira/3de4819e4e4df9422d87f1d3e7465c16
+    """
+    # Handle $ref references
+    if "$ref" in node:
+        ref_path = node["$ref"]
+        if ref_path.startswith("#/$defs/"):
+            ref_name = ref_path.split("/")[-1]
+            if ref_name in defs:
+                # Process the referenced definition
+                return _process_schema_node(defs[ref_name], defs)
+
+    # Start with a new schema object
+    result = {}
+
+    # Copy the basic properties
+    if "type" in node:
+        result["type"] = node["type"]
+
+    # Handle anyOf (often used for optional fields with None)
+    if "anyOf" in node:
+        non_null_types = [t for t in node["anyOf"] if t.get("type") != "null"]
+        if non_null_types:
+            # Process the first non-null type
+            processed = _process_schema_node(non_null_types[0], defs)
+            result.update(processed)
+
+    # Handle description
+    if "description" in node:
+        result["description"] = node["description"]
+
+    # Handle object properties recursively
+    if node.get("type") == "object" and "properties" in node:
+        result["type"] = "object"
+        result["properties"] = {}
+
+        # Process each property
+        for prop_name, prop_schema in node["properties"].items():
+            result["properties"][prop_name] = _process_schema_node(prop_schema, defs)
+
+        # Add required fields if present
+        if "required" in node:
+            result["required"] = node["required"]
+
+    # Handle arrays
+    if node.get("type") == "array" and "items" in node:
+        result["type"] = "array"
+        result["items"] = _process_schema_node(node["items"], defs)
+
+    # Handle enum
+    if "enum" in node:
+        result["enum"] = node["enum"]
+
+    return result
+
+
 class Schema(BaseModel):
     """Base schema for input action / output observation."""
 
@@ -32,13 +91,9 @@ class Schema(BaseModel):
     @classmethod
     def to_mcp_schema(cls) -> dict[str, Any]:
         """Convert to JSON schema format compatible with MCP."""
-        js = cls.model_json_schema()
-        req = [n for n, f in cls.model_fields.items() if f.is_required()]
-        return {
-            "type": "object",
-            "properties": js.get("properties", {}) or {},
-            "required": req or [],
-        }
+        full_schema = cls.model_json_schema()
+        # This will get rid of all "anyOf" in the schema, so it is fully compatible with MCP tool schema
+        return _process_schema_node(full_schema, full_schema.get("$defs", {}))
 
     @classmethod
     def from_mcp_schema(
diff --git a/openhands/core/runtime/tool.py b/openhands/core/runtime/tool.py
@@ -1,7 +1,16 @@
-from typing import Any, Callable
+import re
+from typing import Any, Callable, TypeVar, Generic
 from pydantic import BaseModel, Field
 from .schema import ActionBase, ObservationBase, Schema
 
+ActionT = TypeVar("ActionT", bound=ActionBase)
+ObservationT = TypeVar("ObservationT", bound=ObservationBase)
+
+
+def to_camel_case(s: str) -> str:
+    parts = re.split(r"[_\-\s]+", s)
+    return "".join(word.capitalize() for word in parts if word)
+
 
 class ToolAnnotations(BaseModel):
     """Annotations to provide hints about the tool's behavior.
@@ -30,7 +39,7 @@ class ToolAnnotations(BaseModel):
     )
 
 
-class Tool:
+class Tool(Generic[ActionT, ObservationT]):
     """Tool that wraps an executor function with input/output validation and schema.
 
     - Normalize input/output schemas (class or dict) into both model+schema.
@@ -48,7 +57,7 @@ def __init__(
         description: str | None = None,
         annotations: ToolAnnotations | None = None,
         _meta: dict[str, Any] | None = None,
-        execute_fn: Callable[[ActionBase], ObservationBase] | None = None,
+        execute_fn: Callable[[ActionT], ObservationT] | None = None,
     ):
         self.name = name
         self.description = description
@@ -71,7 +80,7 @@ def _set_input_schema(
         elif isinstance(input_schema, dict):
             self.input_schema = input_schema
             self.action_type = ActionBase.from_mcp_schema(
-                f"{self.name}Action", input_schema
+                f"{to_camel_case(self.name)}Action", input_schema
             )
         else:
             raise TypeError(
@@ -93,14 +102,18 @@ def _set_output_schema(
         elif isinstance(output_schema, dict):
             self.output_schema = output_schema
             self.observation_type = ObservationBase.from_mcp_schema(
-                f"{self.name}Observation", output_schema
+                f"{to_camel_case(self.name)}Observation", output_schema
             )
         else:
             raise TypeError(
                 "output_schema must be ObservationBase subclass, dict, or None"
             )
 
-    def call(self, action: ActionBase) -> ObservationBase:
+    def call(self, action: ActionT) -> ObservationBase:
+        """Validate input, execute, and coerce output.
+
+        We always return some ObservationBase subclass, but not always the generic ObservationT.
+        """
         if self.execute_fn is None:
             raise NotImplementedError(f"Tool '{self.name}' has no executor")
 
diff --git a/openhands/core/runtime/tools/execute_bash/__init__.py b/openhands/core/runtime/tools/execute_bash/__init__.py
@@ -0,0 +1,4 @@
+from .definition import execute_bash_tool, ExecuteBashAction, ExecuteBashObservation
+
+
+__all__ = ["execute_bash_tool", "ExecuteBashAction", "ExecuteBashObservation"]
diff --git a/openhands/core/runtime/tools/execute_bash/definition.py b/openhands/core/runtime/tools/execute_bash/definition.py
@@ -0,0 +1,84 @@
+"""Execute bash tool implementation."""
+
+from pydantic import Field
+
+from openhands.core.runtime.tool import Tool, ToolAnnotations
+from openhands.core.runtime.schema import ActionBase, ObservationBase
+from openhands.core.runtime.security import SECURITY_RISK_DESC, SECURITY_RISK_LITERAL
+
+
+class ExecuteBashAction(ActionBase):
+    """Schema for bash command execution."""
+
+    command: str = Field(
+        description="The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together."
+    )
+    is_input: bool = Field(
+        default=False,
+        description="If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.",
+    )
+    timeout: float | None = Field(
+        default=None,
+        description="Optional. Sets a hard timeout in seconds for the command execution. If not provided, the command will use the default soft timeout behavior.",
+    )
+    security_risk: SECURITY_RISK_LITERAL = Field(description=SECURITY_RISK_DESC)
+
+
+class ExecuteBashObservation(ObservationBase):
+    """A ToolResult that can be rendered as a CLI output."""
+
+    output: str = Field(
+        default="", description="The output from the command execution (stdout)."
+    )
+    exit_code: int = Field(
+        default=0,
+        description="The exit code of the command. -1 indicates the process hit the soft timeout and is not yet finished.",
+    )
+    error: str = Field(
+        default="", description="Any error output from the command execution (stderr)."
+    )
+    timeout: bool = Field(
+        default=False, description="Whether the command execution timed out."
+    )
+
+
+TOOL_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
+
+
+### Command Execution
+* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, use `&&` or `;` to chain them together.
+* Persistent session: Commands execute in a persistent shell session where environment variables, virtual environments, and working directory persist between commands.
+* Soft timeout: Commands have a soft timeout of 10 seconds, once that's reached, you have the option to continue or interrupt the command (see section below for details)
+* Shell options: Do NOT use `set -e`, `set -eu`, or `set -euo pipefail` in shell scripts or commands in this environment. The runtime may not support them and can cause unusable shell sessions. If you want to run multi-line bash commands, write the commands to a file and then run it, instead.
+
+### Long-running Commands
+* For commands that may run indefinitely, run them in the background and redirect output to a file, e.g. `python3 app.py > server.log 2>&1 &`.
+* For commands that may run for a long time (e.g. installation or testing commands), or commands that run for a fixed amount of time (e.g. sleep), you should set the "timeout" parameter of your function call to an appropriate value.
+* If a bash command returns exit code `-1`, this means the process hit the soft timeout and is not yet finished. By setting `is_input` to `true`, you can:
+  - Send empty `command` to retrieve additional logs
+  - Send text (set `command` to the text) to STDIN of the running process
+  - Send control commands like `C-c` (Ctrl+C), `C-d` (Ctrl+D), or `C-z` (Ctrl+Z) to interrupt the process
+  - If you do C-c, you can re-start the process with a longer "timeout" parameter to let it run to completion
+
+### Best Practices
+* Directory verification: Before creating new directories or files, first verify the parent directory exists and is the correct location.
+* Directory management: Try to maintain working directory by using absolute paths and avoiding excessive use of `cd`.
+
+### Output Handling
+* Output truncation: If the output exceeds a maximum length, it will be truncated before being returned.
+"""
+
+
+execute_bash_tool = Tool(
+    name="execute_bash",
+    input_schema=ExecuteBashAction,
+    output_schema=ExecuteBashObservation,
+    description=TOOL_DESCRIPTION,
+    annotations=ToolAnnotations(
+        title="execute_bash",
+        readOnlyHint=False,
+        destructiveHint=True,
+        idempotentHint=False,
+        openWorldHint=True,
+    ),
+)
diff --git a/tests/core/runtime/test_tool.py b/tests/core/runtime/test_tool.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +from .definition import execute_bash_tool, ExecuteBashAction, ExecuteBashObservation
++
++
 +__all__ = ["execute_bash_tool", "ExecuteBashAction", "ExecuteBashObservation"]