diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py index 8d3e6bb65f..a50442b1cc 100644 --- a/dspy/clients/base_lm.py +++ b/dspy/clients/base_lm.py @@ -225,24 +225,83 @@ def _process_response(self, response): Returns: List of processed outputs, which is always of size 1 because the Response API only supports one output. """ + + def _normalize_output_item(item): + """Convert response output item to dict format regardless of input type. + + OpenAI's Responses API returns different formats based on tool usage: + - Without tools: Returns objects with attributes (.type, .content, etc.) + - With tools (e.g., web_search): Returns dicts with keys ('type', 'content', etc.) + + This function normalizes both formats to dict for consistent processing. + """ + if isinstance(item, dict): + # Already a dict, return as-is + return item + + # Convert object to dict + normalized = {"type": item.type} + + # Handle content + if hasattr(item, "content") and item.content: + normalized["content"] = [] + for content_item in item.content: + if isinstance(content_item, dict): + normalized["content"].append(content_item) + else: + normalized["content"].append({"text": content_item.text}) + + # Handle function calls (store original for model_dump if needed) + if hasattr(item, "name"): + normalized["name"] = item.name + if hasattr(item, "arguments"): + normalized["arguments"] = item.arguments + if hasattr(item, "model_dump"): + # Store the original object for model_dump + normalized["_original"] = item + + # Handle reasoning content + if hasattr(item, "summary") and item.summary: + normalized["summary"] = [] + for summary_item in item.summary: + if isinstance(summary_item, dict): + normalized["summary"].append(summary_item) + else: + normalized["summary"].append({"text": summary_item.text}) + + return normalized + + # Normalize all output items to dict format first + normalized_outputs = [_normalize_output_item(item) for item in response.output] + text_outputs = [] tool_calls = [] reasoning_contents = [] - for output_item in response.output: - output_item_type = output_item.type + for output_item in normalized_outputs: + output_item_type = output_item.get("type") + if output_item_type == "message": - for content_item in output_item.content: - text_outputs.append(content_item.text) + for content_item in output_item.get("content", []): + text_outputs.append(content_item.get("text", "")) + elif output_item_type == "function_call": - tool_calls.append(output_item.model_dump()) + # Use original object for model_dump if available, otherwise use dict + if "_original" in output_item: + tool_calls.append(output_item["_original"].model_dump()) + else: + tool_calls.append(output_item) + elif output_item_type == "reasoning": - if getattr(output_item, "content", None) and len(output_item.content) > 0: - for content_item in output_item.content: - reasoning_contents.append(content_item.text) - elif getattr(output_item, "summary", None) and len(output_item.summary) > 0: - for summary_item in output_item.summary: - reasoning_contents.append(summary_item.text) + content = output_item.get("content", []) + summary = output_item.get("summary", []) + + if content: + for content_item in content: + reasoning_contents.append(content_item.get("text", "")) + elif summary: + for summary_item in summary: + reasoning_contents.append(summary_item.get("text", "")) result = {} if len(text_outputs) > 0: @@ -251,10 +310,10 @@ def _process_response(self, response): result["tool_calls"] = tool_calls if len(reasoning_contents) > 0: result["reasoning_content"] = "".join(reasoning_contents) + # All `response.output` items map to one answer, so we return a list of size 1. return [result] - def inspect_history(n: int = 1): """The global history shared across all LMs.""" return pretty_print_history(GLOBAL_HISTORY, n) diff --git a/dspy/clients/test_base_lm_response_formats.py b/dspy/clients/test_base_lm_response_formats.py new file mode 100644 index 0000000000..361f35a8ea --- /dev/null +++ b/dspy/clients/test_base_lm_response_formats.py @@ -0,0 +1,204 @@ +""" +Unit tests for _process_response method handling both dict and object formats. +Tests the fix for issue #8958 - web_search tools return dict format. +""" + +import pytest + +from dspy.clients.base_lm import BaseLM + + +class MockContent: + """Mock content object (object format)""" + def __init__(self, text): + self.text = text + + +class MockOutputItem: + """Mock output item (object format - without web_search)""" + def __init__(self, item_type, content=None, summary=None): + self.type = item_type + if content: + self.content = content + if summary: + self.summary = summary + + def model_dump(self): + return {"type": self.type, "name": "test_function", "arguments": "{}"} + + +class MockResponse: + """Mock response object""" + def __init__(self, output): + self.output = output + self.usage = type("obj", (object,), { + "completion_tokens": 10, + "prompt_tokens": 5, + "total_tokens": 15 + })() + self.model = "gpt-4" + + +class TestProcessResponseFormats: + """Test _process_response handles both dict and object formats""" + + @pytest.fixture + def base_lm(self): + """Create a BaseLM instance for testing""" + return BaseLM(model="test-model", model_type="responses") + + def test_object_format_message(self, base_lm): + """Test processing object format (normal responses without web_search)""" + # Create mock response with object format + mock_response = MockResponse( + output=[ + MockOutputItem("message", content=[MockContent("Hello world")]) + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert result[0]["text"] == "Hello world" + + def test_dict_format_message(self, base_lm): + """Test processing dict format (responses with web_search tools)""" + # Create mock response with dict format (as returned by web_search) + mock_response = MockResponse( + output=[ + { + "type": "message", + "content": [{"text": "Hello from web search"}] + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert result[0]["text"] == "Hello from web search" + + def test_dict_format_with_multiple_content(self, base_lm): + """Test dict format with multiple content items""" + mock_response = MockResponse( + output=[ + { + "type": "message", + "content": [ + {"text": "Part 1"}, + {"text": " Part 2"}, + {"text": " Part 3"} + ] + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert result[0]["text"] == "Part 1 Part 2 Part 3" + + def test_object_format_function_call(self, base_lm): + """Test function call in object format""" + mock_item = MockOutputItem("function_call") + mock_response = MockResponse(output=[mock_item]) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "tool_calls" in result[0] + assert len(result[0]["tool_calls"]) == 1 + + def test_dict_format_function_call(self, base_lm): + """Test function call in dict format""" + mock_response = MockResponse( + output=[ + { + "type": "function_call", + "name": "web_search", + "arguments": '{"query": "test"}' + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "tool_calls" in result[0] + assert result[0]["tool_calls"][0]["name"] == "web_search" + + def test_object_format_reasoning(self, base_lm): + """Test reasoning content in object format""" + mock_response = MockResponse( + output=[ + MockOutputItem("reasoning", content=[MockContent("Thinking step 1")]) + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "reasoning_content" in result[0] + assert result[0]["reasoning_content"] == "Thinking step 1" + + def test_dict_format_reasoning(self, base_lm): + """Test reasoning content in dict format""" + mock_response = MockResponse( + output=[ + { + "type": "reasoning", + "content": [{"text": "Reasoning step 1"}] + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "reasoning_content" in result[0] + assert result[0]["reasoning_content"] == "Reasoning step 1" + + def test_dict_format_reasoning_with_summary(self, base_lm): + """Test reasoning with summary (fallback when no content)""" + mock_response = MockResponse( + output=[ + { + "type": "reasoning", + "summary": [{"text": "Summary text"}] + } + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "reasoning_content" in result[0] + assert result[0]["reasoning_content"] == "Summary text" + + def test_mixed_format_backwards_compatibility(self, base_lm): + """Test that both formats can coexist (edge case)""" + # Mix of object and dict formats in same response + mock_response = MockResponse( + output=[ + MockOutputItem("message", content=[MockContent("Object format")]), + {"type": "message", "content": [{"text": " Dict format"}]} + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert result[0]["text"] == "Object format Dict format" + + def test_empty_content(self, base_lm): + """Test handling of empty content""" + mock_response = MockResponse( + output=[ + {"type": "message", "content": []} + ] + ) + + result = base_lm._process_response(mock_response) + + assert len(result) == 1 + assert "text" not in result[0] # No text key when no content