Skip to content

Commit 404d7a9

Browse files
authored
[Performance][gpt-oss] Revert gpt-oss max cudagraph size to 1024 (vllm-project#28345)
Signed-off-by: Mohammad Miadh Angkad <MAngkad.BSDSBA2027@aim.edu>
1 parent 171133f commit 404d7a9

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

vllm/model_executor/models/config.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -258,21 +258,19 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None:
258258
if structured_outputs_config.reasoning_parser == "":
259259
structured_outputs_config.reasoning_parser = "openai_gptoss"
260260

261-
# Increase the max capture size from 512 to 992 for performance.
261+
# Increase the max capture size from 512 to 1024 for performance.
262262
# NOTE(woosuk): This will increase the number of CUDA graphs
263-
# from 67 to 81.
263+
# from 67 to 83.
264264
compilation_config = vllm_config.compilation_config
265265
# Only override when the user has not set either of
266266
# cudagraph_capture_sizes or max_cudagraph_capture_size.
267267
if (
268268
compilation_config.cudagraph_capture_sizes is None
269269
and compilation_config.max_cudagraph_capture_size is None
270270
):
271-
# FIXME(woosuk): When using full cuda graph with FA3, the max
272-
# supported size is 992.
273-
compilation_config.max_cudagraph_capture_size = 992
271+
compilation_config.max_cudagraph_capture_size = 1024
274272
logger.info(
275-
"Overriding max cuda graph capture size to %d for performance.", 992
273+
"Overriding max cuda graph capture size to %d for performance.", 1024
276274
)
277275

278276

0 commit comments

Comments
 (0)