Introduce dspy.Reasoning to capture native reasoning from reasoning models (#8986)

chenmoneygithub · arnavsinghvi11 · web-flow · commit 6ad51eb4071c · 2025-11-13T14:36:54.000-08:00
* support for native reasoning in CoT for reasoning models

* ruff and test

* Introduce dspy.Reasoning to handle ChainOfThought on reasoning models

* remove unintended file

* fix

* make reasoning string-like

* increment

* go

* polish the docstring

* automatically turn on reasoning for COT on reasoning model

* comments

* fix tests

* fix

* add dspy.Reasoning

* comments

* add comment for backward compatibility

---------

Co-authored-by: arnavsinghvi11 &lt;arnav11.singhvi@gmail.com&gt;
diff --git a/dspy/__init__.py b/dspy/__init__.py
@@ -6,7 +6,7 @@
 
 from dspy.evaluate import Evaluate  # isort: skip
 from dspy.clients import *  # isort: skip
-from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, XMLAdapter, TwoStepAdapter, Image, Audio, File, History, Type, Tool, ToolCalls, Code  # isort: skip
+from dspy.adapters import Adapter, ChatAdapter, JSONAdapter, XMLAdapter, TwoStepAdapter, Image, Audio, File, History, Type, Tool, ToolCalls, Code, Reasoning  # isort: skip
 from dspy.utils.logging_utils import configure_dspy_loggers, disable_logging, enable_logging
 from dspy.utils.asyncify import asyncify
 from dspy.utils.syncify import syncify
diff --git a/dspy/adapters/__init__.py b/dspy/adapters/__init__.py
@@ -2,7 +2,7 @@
 from dspy.adapters.chat_adapter import ChatAdapter
 from dspy.adapters.json_adapter import JSONAdapter
 from dspy.adapters.two_step_adapter import TwoStepAdapter
-from dspy.adapters.types import Audio, Code, File, History, Image, Tool, ToolCalls, Type
+from dspy.adapters.types import Audio, Code, File, History, Image, Reasoning, Tool, ToolCalls, Type
 from dspy.adapters.xml_adapter import XMLAdapter
 
 __all__ = [
@@ -19,4 +19,5 @@
     "TwoStepAdapter",
     "Tool",
     "ToolCalls",
+    "Reasoning",
 ]
diff --git a/dspy/adapters/base.py b/dspy/adapters/base.py
@@ -6,6 +6,7 @@
 
 from dspy.adapters.types import History, Type
 from dspy.adapters.types.base_type import split_message_content_for_custom_types
+from dspy.adapters.types.reasoning import Reasoning
 from dspy.adapters.types.tool import Tool, ToolCalls
 from dspy.experimental import Citations
 from dspy.signatures.signature import Signature
@@ -16,7 +17,7 @@
 if TYPE_CHECKING:
     from dspy.clients.lm import LM
 
-_DEFAULT_NATIVE_RESPONSE_TYPES = [Citations]
+_DEFAULT_NATIVE_RESPONSE_TYPES = [Citations, Reasoning]
 
 
 class Adapter:
@@ -99,14 +100,14 @@ def _call_preprocess(
 
                 return signature_for_native_function_calling
 
-        # Handle custom types that use native response
+        # Handle custom types that use native LM features, e.g., reasoning, citations, etc.
         for name, field in signature.output_fields.items():
             if (
                 isinstance(field.annotation, type)
                 and issubclass(field.annotation, Type)
                 and field.annotation in self.native_response_types
             ):
-                signature = signature.delete(name)
+                signature = field.annotation.adapt_to_native_lm_feature(signature, name, lm, lm_kwargs)
 
         return signature
 
@@ -116,6 +117,7 @@ def _call_postprocess(
         original_signature: type[Signature],
         outputs: list[dict[str, Any] | str],
         lm: "LM",
+        lm_kwargs: dict[str, Any],
     ) -> list[dict[str, Any]]:
         values = []
 
@@ -152,14 +154,16 @@ def _call_postprocess(
                 ]
                 value[tool_call_output_field_name] = ToolCalls.from_dict_list(tool_calls)
 
-            # Parse custom types that does not rely on the adapter parsing
+            # Parse custom types that does not rely on the `Adapter.parse()` method
             for name, field in original_signature.output_fields.items():
                 if (
                     isinstance(field.annotation, type)
                     and issubclass(field.annotation, Type)
                     and field.annotation in self.native_response_types
                 ):
-                    value[name] = field.annotation.parse_lm_response(output)
+                    parsed_value = field.annotation.parse_lm_response(output)
+                    if parsed_value is not None:
+                        value[name] = parsed_value
 
             if output_logprobs:
                 value["logprobs"] = output_logprobs
@@ -196,7 +200,7 @@ def __call__(
         inputs = self.format(processed_signature, demos, inputs)
 
         outputs = lm(messages=inputs, **lm_kwargs)
-        return self._call_postprocess(processed_signature, signature, outputs, lm)
+        return self._call_postprocess(processed_signature, signature, outputs, lm, lm_kwargs)
 
     async def acall(
         self,
@@ -210,7 +214,7 @@ async def acall(
         inputs = self.format(processed_signature, demos, inputs)
 
         outputs = await lm.acall(messages=inputs, **lm_kwargs)
-        return self._call_postprocess(processed_signature, signature, outputs, lm)
+        return self._call_postprocess(processed_signature, signature, outputs, lm, lm_kwargs)
 
     def format(
         self,
diff --git a/dspy/adapters/types/__init__.py b/dspy/adapters/types/__init__.py
@@ -4,6 +4,7 @@
 from dspy.adapters.types.file import File
 from dspy.adapters.types.history import History
 from dspy.adapters.types.image import Image
+from dspy.adapters.types.reasoning import Reasoning
 from dspy.adapters.types.tool import Tool, ToolCalls
 
-__all__ = ["History", "Image", "Audio", "File", "Type", "Tool", "ToolCalls", "Code"]
+__all__ = ["History", "Image", "Audio", "File", "Type", "Tool", "ToolCalls", "Code", "Reasoning"]
diff --git a/dspy/adapters/types/base_type.py b/dspy/adapters/types/base_type.py
@@ -1,11 +1,15 @@
 import json
 import re
-from typing import Any, Optional, get_args, get_origin
+from typing import TYPE_CHECKING, Any, Optional, get_args, get_origin
 
 import json_repair
 import pydantic
 from litellm import ModelResponseStream
 
+if TYPE_CHECKING:
+    from dspy.clients.lm import LM
+    from dspy.signatures.signature import Signature
+
 CUSTOM_TYPE_START_IDENTIFIER = "<<CUSTOM-TYPE-START-IDENTIFIER>>"
 CUSTOM_TYPE_END_IDENTIFIER = "<<CUSTOM-TYPE-END-IDENTIFIER>>"
 
@@ -70,6 +74,31 @@ def serialize_model(self):
             )
         return formatted
 
+    @classmethod
+    def adapt_to_native_lm_feature(
+        cls,
+        signature: type["Signature"],
+        field_name: str,
+        lm: "LM",
+        lm_kwargs: dict[str, Any],
+    ) -> type["Signature"]:
+        """Adapt the custom type to the native LM feature if possible.
+
+        When the LM and configuration supports the related native LM feature, e.g., native tool calling, native
+        reasoning, etc., we adapt the signature and `lm_kwargs` to enable the native LM feature.
+
+        Args:
+            signature: The DSPy signature for the LM call.
+            field_name: The name of the field in the signature to adapt to the native LM feature.
+            lm: The LM instance.
+            lm_kwargs: The keyword arguments for the LM call, subject to in-place updates if adaptation if required.
+
+        Returns:
+            The adapted signature. If the custom type is not natively supported by the LM, return the original
+            signature.
+        """
+        return signature
+
     @classmethod
     def is_streamable(cls) -> bool:
         """Whether the custom type is streamable."""
diff --git a/dspy/adapters/types/citation.py b/dspy/adapters/types/citation.py
@@ -167,6 +167,12 @@ def __getitem__(self, index):
         """Allow indexing into citations."""
         return self.citations[index]
 
+    @classmethod
+    def adapt_to_native_lm_feature(cls, signature, field_name, lm, lm_kwargs) -> bool:
+        if lm.model.startswith("anthropic/"):
+            return signature.delete(field_name)
+        return signature
+
     @classmethod
     def is_streamable(cls) -> bool:
         """Whether the Citations type is streamable."""
diff --git a/dspy/adapters/types/reasoning.py b/dspy/adapters/types/reasoning.py
@@ -0,0 +1,118 @@
+from typing import TYPE_CHECKING, Any, Optional
+
+import litellm
+import pydantic
+
+from dspy.adapters.types.base_type import Type
+
+if TYPE_CHECKING:
+    from dspy.clients.lm import LM
+    from dspy.signatures.signature import Signature
+
+
+class Reasoning(Type):
+    """Reasoning type in DSPy.
+
+    This type is useful when you want the DSPy output to include the reasoning of the LM. We build this type so that
+    DSPy can support the reasoning model and non-reasoning model with the same code.
+
+    This is a str-like type, you can convert a string directly to a Reasoning object, and from DSPy adapters'
+    perspective, `Reasoning` is treated as a string.
+    """
+
+    content: str
+
+    def format(self):
+        return f"{self.content}"
+
+    @pydantic.model_validator(mode="before")
+    @classmethod
+    def validate_input(cls, data: Any):
+        if isinstance(data, cls):
+            return data
+
+        if isinstance(data, str):
+            return {"content": data}
+
+        if isinstance(data, dict):
+            if "content" not in data:
+                raise ValueError("`content` field is required for `dspy.Reasoning`")
+            if not isinstance(data["content"], str):
+                raise ValueError(f"`content` field must be a string, but received type: {type(data['content'])}")
+            return {"content": data["content"]}
+
+        raise ValueError(f"Received invalid value for `dspy.Reasoning`: {data}")
+
+    @classmethod
+    def adapt_to_native_lm_feature(
+        cls,
+        signature: type["Signature"],
+        field_name: str,
+        lm: "LM",
+        lm_kwargs: dict[str, Any],
+    ) -> type["Signature"]:
+        if "reasoning_effort" in lm_kwargs:
+            # `lm_kwargs` overrides `lm.kwargs`.
+            reasoning_effort = lm_kwargs["reasoning_effort"]
+        elif "reasoning_effort" in lm.kwargs:
+            reasoning_effort = lm.kwargs["reasoning_effort"]
+        else:
+            # Turn on the native reasoning explicitly if Reasoning field is present in the signature and no explicit
+            # reasoning effort is set in `lm_kwargs` or `lm.kwargs`.
+            reasoning_effort = "low"
+
+        if reasoning_effort is None or not litellm.supports_reasoning(lm.model):
+            # If users explicitly set `reasoning_effort` to None or the LM doesn't support reasoning, we don't enable
+            # native reasoning.
+            return signature
+
+        if "gpt-5" in lm.model and lm.model_type == "chat":
+            # There is a caveat of Litellm as 1.79.0 that when using the chat completion API on GPT-5 family models,
+            # the reasoning content is not available in the response. As a workaround, we don't enable the native
+            # reasoning feature for GPT-5 family models when using the chat completion API.
+            # Litellm issue: https://github.com/BerriAI/litellm/issues/14748
+            return signature
+
+        lm_kwargs["reasoning_effort"] = reasoning_effort
+        # Delete the reasoning field from the signature to use the native reasoning feature.
+        return signature.delete(field_name)
+
+    @classmethod
+    def parse_lm_response(cls, response: str | dict[str, Any]) -> Optional["Reasoning"]:
+        """Parse the LM response into a Reasoning object."""
+        if "reasoning_content" in response:
+            return Reasoning(content=response["reasoning_content"])
+        return None
+
+    @classmethod
+    def parse_stream_chunk(cls, chunk) -> str | None:
+        """
+        Parse a stream chunk into reasoning content if available.
+
+        Args:
+            chunk: A stream chunk from the LM.
+
+        Returns:
+            The reasoning content (str) if available, None otherwise.
+        """
+        try:
+            if choices := getattr(chunk, "choices", None):
+                return getattr(choices[0].delta, "reasoning_content", None)
+        except Exception:
+            return None
+
+    @classmethod
+    def is_streamable(cls) -> bool:
+        return True
+
+    def __repr__(self) -> str:
+        return f"{self.content!r}"
+
+    def __str__(self) -> str:
+        return self.content
+
+    def __eq__(self, other: object) -> bool:
+        if isinstance(other, Reasoning):
+            return self.content == other.content
+        if isinstance(other, str):
+            return self.content == other
diff --git a/dspy/adapters/utils.py b/dspy/adapters/utils.py
@@ -12,6 +12,7 @@
 from pydantic.fields import FieldInfo
 
 from dspy.adapters.types.base_type import Type as DspyType
+from dspy.adapters.types.reasoning import Reasoning
 from dspy.signatures.utils import get_dspy_field_type
 
 
@@ -84,7 +85,7 @@ def move_type_to_front(d):
 def translate_field_type(field_name, field_info):
     field_type = field_info.annotation
 
-    if get_dspy_field_type(field_info) == "input" or field_type is str:
+    if get_dspy_field_type(field_info) == "input" or field_type is str or field_type is Reasoning:
         desc = ""
     elif field_type is bool:
         desc = "must be True or False"
@@ -190,6 +191,10 @@ def get_annotation_name(annotation):
     origin = get_origin(annotation)
     args = get_args(annotation)
     if origin is None:
+        if annotation is Reasoning:
+            # Keep backward compatibility with the old behavior in `dspy.ChainOfThought`, where reasoning
+            # field type is treated as a string.
+            return "str"
         if hasattr(annotation, "__name__"):
             return annotation.__name__
         else:
diff --git a/dspy/clients/base_lm.py b/dspy/clients/base_lm.py
@@ -204,6 +204,10 @@ def _process_completion(self, response, merged_kwargs):
         for c in response.choices:
             output = {}
             output["text"] = c.message.content if hasattr(c, "message") else c["text"]
+
+            if hasattr(c, "message") and hasattr(c.message, "reasoning_content") and c.message.reasoning_content:
+                output["reasoning_content"] = c.message.reasoning_content
+
             if merged_kwargs.get("logprobs"):
                 output["logprobs"] = c.logprobs if hasattr(c, "logprobs") else c["logprobs"]
             if hasattr(c, "message") and getattr(c.message, "tool_calls", None):
@@ -219,7 +223,6 @@ def _process_completion(self, response, merged_kwargs):
         if all(len(output) == 1 for output in outputs):
             # Return a list if every output only has "text" key
             outputs = [output["text"] for output in outputs]
-
         return outputs
 
     def _extract_citations_from_response(self, choice):
diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py
@@ -493,6 +493,10 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]):
                 for item in c:
                     content_blocks.append(_convert_content_item_to_responses_format(item))
         request["input"] = [{"role": msg.get("role", "user"), "content": content_blocks}]
+    # Convert `reasoning_effort` to reasoning format supported by the Responses API
+    if "reasoning_effort" in request:
+        effort = request.pop("reasoning_effort")
+        request["reasoning"] = {"effort": effort, "summary": "auto"}
 
     # Convert `response_format` to `text.format` for Responses API
     if "response_format" in request:
diff --git a/dspy/streaming/streaming_listener.py b/dspy/streaming/streaming_listener.py
@@ -134,13 +134,6 @@ def receive(self, chunk: ModelResponseStream):
             else:
                 return
 
-        try:
-            chunk_message = chunk.choices[0].delta.content
-            if chunk_message is None:
-                return
-        except Exception:
-            return
-
         # Handle custom streamable types
         if self._output_type and issubclass(self._output_type, Type) and self._output_type.is_streamable():
             if parsed_chunk := self._output_type.parse_stream_chunk(chunk):
@@ -151,6 +144,14 @@ def receive(self, chunk: ModelResponseStream):
                     is_last_chunk=self.stream_end,
                 )
 
+        # For non-custom streamable types, the streaming chunks come from the content field of the ModelResponseStream.
+        try:
+            chunk_message = chunk.choices[0].delta.content
+            if chunk_message is None:
+                return
+        except Exception:
+            return
+
         if chunk_message and start_identifier in chunk_message and not isinstance(settings.adapter, JSONAdapter):
             # If the cache is hit, the chunk_message could be the full response. When it happens we can
             # directly end the stream listening. In some models like gemini, each stream chunk can be multiple
diff --git a/tests/adapters/test_chat_adapter.py b/tests/adapters/test_chat_adapter.py
diff --git a/tests/adapters/test_citation.py b/tests/adapters/test_citation.py
diff --git a/tests/adapters/test_json_adapter.py b/tests/adapters/test_json_adapter.py
diff --git a/tests/clients/test_lm.py b/tests/clients/test_lm.py
diff --git a/tests/streaming/test_streaming.py b/tests/streaming/test_streaming.py