File tree Expand file tree Collapse file tree 1 file changed +4
-6
lines changed
vllm/model_executor/models Expand file tree Collapse file tree 1 file changed +4
-6
lines changed Original file line number Diff line number Diff line change @@ -258,21 +258,19 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None:
258258 if structured_outputs_config .reasoning_parser == "" :
259259 structured_outputs_config .reasoning_parser = "openai_gptoss"
260260
261- # Increase the max capture size from 512 to 992 for performance.
261+ # Increase the max capture size from 512 to 1024 for performance.
262262 # NOTE(woosuk): This will increase the number of CUDA graphs
263- # from 67 to 81 .
263+ # from 67 to 83 .
264264 compilation_config = vllm_config .compilation_config
265265 # Only override when the user has not set either of
266266 # cudagraph_capture_sizes or max_cudagraph_capture_size.
267267 if (
268268 compilation_config .cudagraph_capture_sizes is None
269269 and compilation_config .max_cudagraph_capture_size is None
270270 ):
271- # FIXME(woosuk): When using full cuda graph with FA3, the max
272- # supported size is 992.
273- compilation_config .max_cudagraph_capture_size = 992
271+ compilation_config .max_cudagraph_capture_size = 1024
274272 logger .info (
275- "Overriding max cuda graph capture size to %d for performance." , 992
273+ "Overriding max cuda graph capture size to %d for performance." , 1024
276274 )
277275
278276
You can’t perform that action at this time.
0 commit comments