Skip to content

Commit f971600

Browse files
authored
Better default and warnings around LM max_tokens (#8215)
* Better default and warnings around LM max_tokens * max_tokens * max_tokens * fix test
1 parent c2b3007 commit f971600

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

dspy/clients/lm.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(
3131
model: str,
3232
model_type: Literal["chat", "text"] = "chat",
3333
temperature: float = 0.0,
34-
max_tokens: int = 1000,
34+
max_tokens: int = 4000,
3535
cache: bool = True,
3636
cache_in_memory: bool = True,
3737
callbacks: Optional[List[BaseCallback]] = None,
@@ -131,6 +131,15 @@ def forward(self, prompt=None, messages=None, **kwargs):
131131
cache=litellm_cache_args,
132132
)
133133

134+
if any(c.finish_reason == "length" for c in results["choices"]):
135+
logger.warning(
136+
f"LM response was truncated due to exceeding max_tokens={self.kwargs['max_tokens']}. "
137+
"You can inspect the latest LM interactions with `dspy.inspect_history()`. "
138+
"To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. "
139+
f"You may also consider increasing the temperature (currently {self.kwargs['temperature']}) "
140+
" if the reason for truncation is repetition."
141+
)
142+
134143
if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
135144
settings.usage_tracker.add_usage(self.model, dict(results.usage))
136145
return results
@@ -152,6 +161,15 @@ async def aforward(self, prompt=None, messages=None, **kwargs):
152161
cache=litellm_cache_args,
153162
)
154163

164+
if any(c.finish_reason == "length" for c in results["choices"]):
165+
logger.warning(
166+
f"LM response was truncated due to exceeding max_tokens={self.kwargs['max_tokens']}. "
167+
"You can inspect the latest LM interactions with `dspy.inspect_history()`. "
168+
"To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. "
169+
f"You may also consider increasing the temperature (currently {self.kwargs['temperature']}) "
170+
" if the reason for truncation is repetition."
171+
)
172+
155173
if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
156174
settings.usage_tracker.add_usage(self.model, dict(results.usage))
157175
return results

tests/caching/test_litellm_cache.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def test_lm_calls_are_cached_across_interpreter_sessions(litellm_test_server, te
8888
model="openai/dspy-test-model",
8989
api_base=api_base,
9090
api_key="fakekey",
91+
max_tokens=1000,
9192
)
9293
lm1("Example query")
9394

0 commit comments

Comments
 (0)