@@ -488,7 +488,6 @@ class EngineArgs:
488488
489489 ray_workers_use_nsight : bool = ParallelConfig .ray_workers_use_nsight
490490 num_gpu_blocks_override : int | None = CacheConfig .num_gpu_blocks_override
491- num_lookahead_slots : int = SchedulerConfig .num_lookahead_slots
492491 model_loader_extra_config : dict = get_field (LoadConfig , "model_loader_extra_config" )
493492 ignore_patterns : str | list [str ] = get_field (LoadConfig , "ignore_patterns" )
494493
@@ -1081,9 +1080,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
10811080 "--long-prefill-token-threshold" ,
10821081 ** scheduler_kwargs ["long_prefill_token_threshold" ],
10831082 )
1084- scheduler_group .add_argument (
1085- "--num-lookahead-slots" , ** scheduler_kwargs ["num_lookahead_slots" ]
1086- )
10871083 # multi-step scheduling has been removed; corresponding arguments
10881084 # are no longer supported.
10891085 scheduler_group .add_argument (
@@ -1653,18 +1649,11 @@ def create_engine_config(
16531649 target_parallel_config = parallel_config ,
16541650 )
16551651
1656- # make sure num_lookahead_slots is set appropriately depending on
1657- # whether speculative decoding is enabled
1658- num_lookahead_slots = self .num_lookahead_slots
1659- if speculative_config is not None :
1660- num_lookahead_slots = speculative_config .num_lookahead_slots
1661-
16621652 scheduler_config = SchedulerConfig (
16631653 runner_type = model_config .runner_type ,
16641654 max_num_batched_tokens = self .max_num_batched_tokens ,
16651655 max_num_seqs = self .max_num_seqs ,
16661656 max_model_len = model_config .max_model_len ,
1667- num_lookahead_slots = num_lookahead_slots ,
16681657 enable_chunked_prefill = self .enable_chunked_prefill ,
16691658 disable_chunked_mm_input = self .disable_chunked_mm_input ,
16701659 is_multimodal_model = model_config .is_multimodal_model ,
0 commit comments