Skip to content

Commit eb29c47

Browse files
authored
fix(dspy): exclude gpt-5-chat from reasoning model classification (#9033)
The regex pattern for identifying reasoning models was incorrectly matching gpt-5-chat, causing it to be subject to reasoning model validation requirements and attempting to pass reasoning parameters that Azure's API doesn't support for this model. Updated the regex to use negative lookahead (?!-chat) to exclude gpt-5-chat while still allowing other gpt-5 variants like gpt-5-mini, gpt-5-nano, and gpt-5-pro to be recognized as reasoning models. - Updated regex pattern in dspy/clients/lm.py - Added test case for gpt-5-chat in test_reasoning_model_token_parameter - Added dedicated test test_gpt_5_chat_not_reasoning_model Fixes issue where gpt-5-chat (Azure AI Foundry) was incorrectly classified as a reasoning model, preventing normal usage with standard temperature and max_tokens values.
1 parent fc1170a commit eb29c47

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

dspy/clients/lm.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,13 @@ def __init__(
8585
model_family = model.split("/")[-1].lower() if "/" in model else model.lower()
8686

8787
# Recognize OpenAI reasoning models (o1, o3, o4, gpt-5 family)
88-
model_pattern = re.match(r"^(?:o[1345]|gpt-5)(?:-(?:mini|nano))?", model_family)
88+
# Exclude non-reasoning variants like gpt-5-chat this is in azure ai foundry
89+
# Allow date suffixes like -2023-01-01 after model name or mini/nano/pro
90+
# For gpt-5, use negative lookahead to exclude -chat and allow other suffixes
91+
model_pattern = re.match(
92+
r"^(?:o[1345](?:-(?:mini|nano|pro))?(?:-\d{4}-\d{2}-\d{2})?|gpt-5(?!-chat)(?:-.*)?)$",
93+
model_family,
94+
)
8995

9096
if model_pattern:
9197
if (temperature and temperature != 1.0) or (max_tokens and max_tokens < 16000):

tests/clients/test_lm.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ def test_reasoning_model_token_parameter():
298298
("openai/gpt-5", True),
299299
("openai/gpt-5-mini", True),
300300
("openai/gpt-5-nano", True),
301+
("azure/gpt-5-chat", False), # gpt-5-chat is NOT a reasoning model
301302
("openai/gpt-4", False),
302303
("anthropic/claude-2", False),
303304
]
@@ -318,7 +319,7 @@ def test_reasoning_model_token_parameter():
318319
assert lm.kwargs["max_tokens"] == 1000
319320

320321

321-
@pytest.mark.parametrize("model_name", ["openai/o1", "openai/gpt-5-nano"])
322+
@pytest.mark.parametrize("model_name", ["openai/o1", "openai/gpt-5-nano", "openai/gpt-5-mini"])
322323
def test_reasoning_model_requirements(model_name):
323324
# Should raise assertion error if temperature or max_tokens requirements not met
324325
with pytest.raises(
@@ -347,6 +348,21 @@ def test_reasoning_model_requirements(model_name):
347348
assert lm.kwargs["max_completion_tokens"] is None
348349

349350

351+
def test_gpt_5_chat_not_reasoning_model():
352+
"""Test that gpt-5-chat is NOT treated as a reasoning model."""
353+
# Should NOT raise validation error - gpt-5-chat is not a reasoning model
354+
lm = dspy.LM(
355+
model="openai/gpt-5-chat",
356+
temperature=0.7, # Can be any value
357+
max_tokens=1000, # Can be any value
358+
)
359+
# Should use max_tokens, not max_completion_tokens
360+
assert "max_completion_tokens" not in lm.kwargs
361+
assert "max_tokens" in lm.kwargs
362+
assert lm.kwargs["max_tokens"] == 1000
363+
assert lm.kwargs["temperature"] == 0.7
364+
365+
350366
def test_dump_state():
351367
lm = dspy.LM(
352368
model="openai/gpt-4o-mini",

0 commit comments

Comments
 (0)