hironow
diff --git a/‎.github/workflows/run_tests.yml‎
Lines changed: 23 additions & 10 deletions b/‎.github/workflows/run_tests.yml‎
Lines changed: 23 additions & 10 deletions
diff --git a/‎docs/docs/index.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/docs/index.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/docs/learn/programming/language_models.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/docs/learn/programming/language_models.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/docs/tutorials/cache/index.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/docs/tutorials/cache/index.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dspy/adapters/chat_adapter.py‎
Lines changed: 28 additions & 6 deletions b/‎dspy/adapters/chat_adapter.py‎
Lines changed: 28 additions & 6 deletions
diff --git a/‎dspy/adapters/types/base_type.py‎
Lines changed: 1 addition & 1 deletion b/‎dspy/adapters/types/base_type.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dspy/adapters/types/citation.py‎
Lines changed: 4 additions & 7 deletions b/‎dspy/adapters/types/citation.py‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎dspy/clients/lm.py‎
Lines changed: 2 additions & 4 deletions b/‎dspy/clients/lm.py‎
Lines changed: 2 additions & 4 deletions
@@ -91,11 +91,6 @@ jobs:
   llm_call_test:
     name: Run Tests with Real LM
     runs-on: ubuntu-latest
-    services:
-      ollama:
-        image: ollama/ollama:latest
-        ports:
-          - 11434:11434
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -116,15 +111,33 @@ jobs:
         run: |
           uv sync --dev -p .venv --extra dev
           uv pip list
-      - name: Pull LLM
+      - name: Cache Ollama models
+        id: cache-ollama
+        uses: actions/cache@v4
+        with:
+          path: ollama-data
+          key: ollama-llama3.2-3b-${{ runner.os }}-v1
+      - name: Start Ollama service
         run: |
+          mkdir -p ollama-data
+          docker run -d --name ollama \
+            -p 11434:11434 \
+            -v ${{ github.workspace }}/ollama-data:/root/.ollama \
+            ollama/ollama:latest
           timeout 60 bash -c 'until curl -f http://localhost:11434/api/version; do sleep 2; done'
-          curl -X POST http://localhost:11434/api/pull \
-            -H "Content-Type: application/json" \
-            -d '{"name": "llama3.2:3b"}'
-          echo "LM_FOR_TEST=ollama/llama3.2:3b" >> $GITHUB_ENV
+      - name: Pull LLM
+        if: steps.cache-ollama.outputs.cache-hit != 'true'
+        run: docker exec ollama ollama pull llama3.2:3b
+      - name: Set LM environment variable
+        run: echo "LM_FOR_TEST=ollama/llama3.2:3b" >> $GITHUB_ENV
       - name: Run tests
         run: uv run -p .venv pytest -m llm_call --llm_call -vv --durations=5 tests/
+      - name: Fix permissions for cache
+        if: always()
+        run: sudo chown -R $USER:$USER ollama-data || true
+      - name: Stop Ollama service
+        if: always()
+        run: docker stop ollama && docker rm ollama
 
   build_package:
     name: Build Package
 
@@ -40,7 +40,7 @@ Instead of wrangling prompts or training jobs, DSPy (Declarative Self-improving
 
         ```python linenums="1"
         import dspy
-        lm = dspy.LM("anthropic/claude-3-opus-20240229", api_key="YOUR_ANTHROPIC_API_KEY")
+        lm = dspy.LM("anthropic/claude-sonnet-4-5-20250929", api_key="YOUR_ANTHROPIC_API_KEY")
         dspy.configure(lm=lm)
         ```
 
 
@@ -37,7 +37,7 @@ dspy.configure(lm=lm)
 
         ```python linenums="1"
         import dspy
-        lm = dspy.LM('anthropic/claude-3-opus-20240229', api_key='YOUR_ANTHROPIC_API_KEY')
+        lm = dspy.LM('anthropic/claude-sonnet-4-5-20250929', api_key='YOUR_ANTHROPIC_API_KEY')
         dspy.configure(lm=lm)
         ```
 
 
@@ -60,7 +60,7 @@ import os
 
 os.environ["ANTHROPIC_API_KEY"] = "{your_anthropic_key}"
 lm = dspy.LM(
-    "anthropic/claude-3-5-sonnet-20240620",
+    "anthropic/claude-sonnet-4-5-20250929",
     cache_control_injection_points=[
         {
             "location": "message",
 
@@ -26,6 +26,20 @@ class FieldInfoWithName(NamedTuple):
 
 
 class ChatAdapter(Adapter):
+    def __init__(
+        self,
+        callbacks=None,
+        use_native_function_calling: bool = False,
+        native_response_types=None,
+        use_json_adapter_fallback: bool = True,
+    ):
+        super().__init__(
+            callbacks=callbacks,
+            use_native_function_calling=use_native_function_calling,
+            native_response_types=native_response_types,
+        )
+        self.use_json_adapter_fallback = use_json_adapter_fallback
+
     def __call__(
         self,
         lm: LM,
@@ -40,9 +54,13 @@ def __call__(
             # fallback to JSONAdapter
             from dspy.adapters.json_adapter import JSONAdapter
 
-            if isinstance(e, ContextWindowExceededError) or isinstance(self, JSONAdapter):
-                # On context window exceeded error or already using JSONAdapter, we don't want to retry with a different
-                # adapter.
+            if (
+                isinstance(e, ContextWindowExceededError)
+                or isinstance(self, JSONAdapter)
+                or not self.use_json_adapter_fallback
+            ):
+                # On context window exceeded error, already using JSONAdapter, or use_json_adapter_fallback is False
+                # we don't want to retry with a different adapter. Raise the original error instead of the fallback error.
                 raise e
             return JSONAdapter()(lm, lm_kwargs, signature, demos, inputs)
 
@@ -60,9 +78,13 @@ async def acall(
             # fallback to JSONAdapter
             from dspy.adapters.json_adapter import JSONAdapter
 
-            if isinstance(e, ContextWindowExceededError) or isinstance(self, JSONAdapter):
-                # On context window exceeded error or already using JSONAdapter, we don't want to retry with a different
-                # adapter.
+            if (
+                isinstance(e, ContextWindowExceededError)
+                or isinstance(self, JSONAdapter)
+                or not self.use_json_adapter_fallback
+            ):
+                # On context window exceeded error, already using JSONAdapter, or use_json_adapter_fallback is False
+                # we don't want to retry with a different adapter. Raise the original error instead of the fallback error.
                 raise e
             return await JSONAdapter().acall(lm, lm_kwargs, signature, demos, inputs)
 
 
@@ -88,7 +88,6 @@ def parse_stream_chunk(cls, chunk: ModelResponseStream) -> Optional["Type"]:
         """
         return None
 
-
     @classmethod
     def parse_lm_response(cls, response: str | dict[str, Any]) -> Optional["Type"]:
         """Parse a LM response into the custom type.
@@ -101,6 +100,7 @@ def parse_lm_response(cls, response: str | dict[str, Any]) -> Optional["Type"]:
         """
         return None
 
+
 def split_message_content_for_custom_types(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
     """Split user message content into a list of content blocks.
 
 
@@ -54,6 +54,7 @@ class AnswerWithSources(Signature):
 
     class Citation(Type):
         """Individual citation with character location information."""
+
         type: str = "char_location"
         cited_text: str
         document_index: int
@@ -73,7 +74,7 @@ def format(self) -> dict[str, Any]:
                 "cited_text": self.cited_text,
                 "document_index": self.document_index,
                 "start_char_index": self.start_char_index,
-                "end_char_index": self.end_char_index
+                "end_char_index": self.end_char_index,
             }
 
             if self.document_title:
@@ -134,9 +135,7 @@ def validate_input(cls, data: Any):
             return data
 
         # Handle case where data is a list of dicts with citation info
-        if isinstance(data, list) and all(
-            isinstance(item, dict) and "cited_text" in item for item in data
-        ):
+        if isinstance(data, list) and all(isinstance(item, dict) and "cited_text" in item for item in data):
             return {"citations": [cls.Citation(**item) for item in data]}
 
         # Handle case where data is a dict
@@ -147,8 +146,7 @@ def validate_input(cls, data: Any):
                 if isinstance(citations_data, list):
                     return {
                         "citations": [
-                            cls.Citation(**item) if isinstance(item, dict) else item
-                            for item in citations_data
+                            cls.Citation(**item) if isinstance(item, dict) else item for item in citations_data
                         ]
                     }
             elif "cited_text" in data:
@@ -197,7 +195,6 @@ def parse_stream_chunk(cls, chunk) -> Optional["Citations"]:
             pass
         return None
 
-
     @classmethod
     def parse_lm_response(cls, response: str | dict[str, Any]) -> Optional["Citations"]:
         """Parse a LM response into Citations.
 
@@ -88,7 +88,6 @@ def __init__(
         model_pattern = re.match(r"^(?:o[1345]|gpt-5)(?:-(?:mini|nano))?", model_family)
 
         if model_pattern:
-
             if (temperature and temperature != 1.0) or (max_tokens and max_tokens < 16000):
                 raise ValueError(
                     "OpenAI's reasoning models require passing temperature=1.0 or None and max_tokens >= 16000 or None to "
@@ -228,9 +227,7 @@ def thread_function_wrapper():
 
         return job
 
-    def reinforce(
-        self, train_kwargs
-    ) -> ReinforceJob:
+    def reinforce(self, train_kwargs) -> ReinforceJob:
         # TODO(GRPO Team): Should we return an initialized job here?
         from dspy import settings as settings
 
@@ -482,6 +479,7 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]):
 
     return request
 
+
 def _get_headers(headers: dict[str, Any] | None = None):
     headers = headers or {}
     return {
Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ import os`
`60`	`60`
`61`	`61`	`os.environ["ANTHROPIC_API_KEY"] = "{your_anthropic_key}"`
`62`	`62`	`lm = dspy.LM(`
`63`		`- "anthropic/claude-3-5-sonnet-20240620",`
	`63`	`+ "anthropic/claude-sonnet-4-5-20250929",`
`64`	`64`	`cache_control_injection_points=[`
`65`	`65`	`{`
`66`	`66`	`"location": "message",`