Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 98 additions & 2 deletions openhands-sdk/openhands/sdk/tool/schema.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import json
import types
from abc import ABC, abstractmethod
from collections.abc import Sequence
from typing import Any, ClassVar, TypeVar
from typing import Annotated, Any, ClassVar, TypeVar, Union, get_args, get_origin

from pydantic import ConfigDict, Field, create_model
from pydantic import ConfigDict, Field, create_model, model_validator
from rich.text import Text

from openhands.sdk.llm import ImageContent, TextContent
Expand Down Expand Up @@ -100,6 +102,100 @@ class Schema(DiscriminatedUnionMixin):

model_config: ClassVar[ConfigDict] = ConfigDict(extra="forbid", frozen=True)

@model_validator(mode="before")
@classmethod
def _decode_json_strings(cls, data: Any) -> Any:
"""Pre-validator that automatically decodes JSON strings for list/dict fields.

This validator runs before field validation and checks if any field that
expects a list or dict type has received a JSON string instead. If so,
it automatically decodes the string using json.loads().

This handles cases where certain LLMs (such as GLM 4.6) incorrectly encode
array/object parameters as JSON strings when using native function calling.

Example raw LLM output from GLM 4.6:
{
"role": "assistant",
"content": "I'll view the file for you.",
"tool_calls": [{
"id": "call_ef8e",
"type": "function",
"function": {
"name": "str_replace_editor",
"arguments": '{
"command": "view",
"path": "/tmp/test.txt",
"view_range": "[1, 5]"
}'
}
}]
}
Expected output: `"view_range" : [1, 5]`

Note: The arguments field is a JSON string. When decoded, view_range is
incorrectly a string "[1, 5]" instead of the proper array [1, 5].
This validator automatically fixes this by detecting that view_range
expects a list type and decoding the JSON string to get the actual array.

Args:
data: The input data (usually a dict) before validation.

Returns:
The data with JSON strings decoded where appropriate.
"""
if not isinstance(data, dict):
return data

# Use model_fields to properly handle aliases and inherited fields
for field_name, field_info in cls.model_fields.items():
# Check both the field name and its alias (if any)
data_key = field_info.alias if field_info.alias else field_name
if data_key not in data:
continue

value = data[data_key]
# Skip if value is not a string
if not isinstance(value, str):
continue

expected_type = field_info.annotation

# Unwrap Annotated types - only the first arg is the actual type
if get_origin(expected_type) is Annotated:
type_args = get_args(expected_type)
expected_type = type_args[0] if type_args else expected_type

# Get the origin of the expected type (e.g., list from list[str])
origin = get_origin(expected_type)

# For Union types, we need to check all union members
if origin is Union or (
hasattr(types, "UnionType") and origin is types.UnionType
):
# For Union types, check each union member
type_args = get_args(expected_type)
expected_origins = [get_origin(arg) or arg for arg in type_args]
else:
# For non-Union types, just check the origin
expected_origins = [origin or expected_type]

# Check if any of the expected types is list or dict
if any(exp in (list, dict) for exp in expected_origins):
# Try to parse the string as JSON
try:
parsed_value = json.loads(value)
# json.loads() returns dict, list, str, int, float, bool, or None
# Only use parsed value if it matches expected collection types
if isinstance(parsed_value, (list, dict)):
data[data_key] = parsed_value
except (json.JSONDecodeError, ValueError):
# If parsing fails, leave the original value
# Pydantic will raise validation error if needed
pass

return data

@classmethod
def to_mcp_schema(cls) -> dict[str, Any]:
"""Convert to JSON schema format compatible with MCP."""
Expand Down
257 changes: 257 additions & 0 deletions tests/sdk/tool/test_schema_json_decoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
"""Tests for JSON string decoding in Schema validator.

This module tests the _decode_json_strings validator that automatically
decodes JSON strings for list/dict fields. This handles cases where LLMs
(like GLM-4) return array/object values as JSON strings instead of native
JSON arrays/objects.
"""

from typing import Annotated

import pytest
from pydantic import Field, ValidationError

from openhands.sdk.tool.schema import Action


class JsonDecodingTestAction(Action):
"""Test action with list and dict fields."""

items: list[str] = Field(description="A list of items")
config: dict[str, int] = Field(description="Configuration dictionary")
name: str = Field(description="A regular string field")


class JsonDecodingAnnotatedAction(Action):
"""Test action with Annotated types."""

items: Annotated[list[str], Field(description="A list of items")]
config: Annotated[dict[str, int], Field(description="Configuration dictionary")]


class JsonDecodingAliasAction(Action):
"""Test action with field aliases."""

my_list: list[int] = Field(alias="myList", description="A list with alias")
my_dict: dict[str, str] = Field(alias="myDict", description="A dict with alias")


class JsonDecodingOptionalAction(Action):
"""Test action with optional list/dict fields."""

items: list[str] | None = Field(default=None, description="Optional list")
config: dict[str, int] | None = Field(default=None, description="Optional dict")


def test_decode_json_string_list():
"""Test that JSON string lists are decoded to native lists."""
data = {
"items": '["a", "b", "c"]',
"config": '{"x": 1, "y": 2}',
"name": "test",
}
action = JsonDecodingTestAction.model_validate(data)

assert action.items == ["a", "b", "c"]
assert action.config == {"x": 1, "y": 2}
assert action.name == "test"


def test_decode_json_string_dict():
"""Test that JSON string dicts are decoded to native dicts."""
data = {
"items": '["item1", "item2"]',
"config": '{"key1": 10, "key2": 20}',
"name": "dict_test",
}
action = JsonDecodingTestAction.model_validate(data)

assert action.items == ["item1", "item2"]
assert action.config == {"key1": 10, "key2": 20}
assert action.name == "dict_test"


def test_native_list_dict_passthrough():
"""Test that native lists and dicts pass through unchanged."""
data = {
"items": ["direct", "list"],
"config": {"direct": 42},
"name": "native_test",
}
action = JsonDecodingTestAction.model_validate(data)

assert action.items == ["direct", "list"]
assert action.config == {"direct": 42}
assert action.name == "native_test"


def test_regular_string_not_decoded():
"""Test that regular string fields are not affected by JSON decoding."""
data = {
"items": "[]",
"config": "{}",
"name": "this is not json but a regular string",
}
action = JsonDecodingTestAction.model_validate(data)

assert action.items == []
assert action.config == {}
# Regular string field should NOT be decoded
assert action.name == "this is not json but a regular string"


def test_annotated_types():
"""Test that Annotated types are properly handled."""
data = {
"items": '["x", "y", "z"]',
"config": '{"a": 1, "b": 2}',
}
action = JsonDecodingAnnotatedAction.model_validate(data)

assert action.items == ["x", "y", "z"]
assert action.config == {"a": 1, "b": 2}


def test_field_aliases():
"""Test that field aliases are properly handled."""
data = {
"myList": "[1, 2, 3]",
"myDict": '{"key": "value"}',
}
action = JsonDecodingAliasAction.model_validate(data)

assert action.my_list == [1, 2, 3]
assert action.my_dict == {"key": "value"}


def test_optional_fields_with_json_strings():
"""Test that optional list/dict fields work with JSON strings."""
data = {
"items": '["opt1", "opt2"]',
"config": '{"opt": 99}',
}
action = JsonDecodingOptionalAction.model_validate(data)

assert action.items == ["opt1", "opt2"]
assert action.config == {"opt": 99}


def test_optional_fields_with_none():
"""Test that optional fields can be None."""
data = {}
action = JsonDecodingOptionalAction.model_validate(data)

assert action.items is None
assert action.config is None


def test_optional_fields_with_native_values():
"""Test that optional fields work with native values."""
data = {
"items": ["native1", "native2"],
"config": {"native": 100},
}
action = JsonDecodingOptionalAction.model_validate(data)

assert action.items == ["native1", "native2"]
assert action.config == {"native": 100}


def test_invalid_json_string_rejected():
"""Test that invalid JSON strings are rejected with validation error."""
data = {
"items": "not valid json",
"config": "{}",
"name": "test",
}

with pytest.raises(ValidationError) as exc_info:
JsonDecodingTestAction.model_validate(data)

# Should fail validation because "not valid json" can't be parsed as list
assert "items" in str(exc_info.value)


def test_json_string_with_wrong_type_rejected():
"""Test that JSON strings with wrong types are rejected."""
# Field expects list but JSON string contains dict
data = {
"items": '{"not": "a list"}',
"config": "{}",
"name": "test",
}

with pytest.raises(ValidationError) as exc_info:
JsonDecodingTestAction.model_validate(data)

assert "items" in str(exc_info.value)


def test_nested_structures():
"""Test that nested lists and dicts in JSON strings work."""

class NestedAction(Action):
nested_list: list[list[int]] = Field(description="Nested list")
nested_dict: dict[str, dict[str, str]] = Field(description="Nested dict")

data = {
"nested_list": "[[1, 2], [3, 4]]",
"nested_dict": '{"outer": {"inner": "value"}}',
}
action = NestedAction.model_validate(data)

assert action.nested_list == [[1, 2], [3, 4]]
assert action.nested_dict == {"outer": {"inner": "value"}}


def test_empty_collections():
"""Test that empty lists and dicts work."""
data = {
"items": "[]",
"config": "{}",
"name": "empty",
}
action = JsonDecodingTestAction.model_validate(data)

assert action.items == []
assert action.config == {}


def test_mixed_native_and_json_strings():
"""Test mixing native values and JSON strings in same model."""
data = {
"items": ["native", "list"], # Native list
"config": '{"from": 1, "json": 2}', # JSON string
"name": "mixed",
}
action = JsonDecodingTestAction.model_validate(data)

assert action.items == ["native", "list"]
assert action.config == {"from": 1, "json": 2}
assert action.name == "mixed"


def test_unicode_in_json_strings():
"""Test that unicode characters in JSON strings are handled correctly."""
data = {
"items": '["hello", "世界", "🌍"]',
"config": '{"greeting": 1, "你好": 2}',
"name": "unicode",
}
action = JsonDecodingTestAction.model_validate(data)

assert action.items == ["hello", "世界", "🌍"]
assert action.config == {"greeting": 1, "你好": 2}


def test_whitespace_in_json_strings():
"""Test that JSON strings with extra whitespace work."""
data = {
"items": ' [ "a" , "b" , "c" ] ',
"config": ' { "x" : 1 , "y" : 2 } ',
"name": "whitespace",
}
action = JsonDecodingTestAction.model_validate(data)

assert action.items == ["a", "b", "c"]
assert action.config == {"x": 1, "y": 2}
Loading