@@ -142,12 +142,18 @@ def __init__(
142142 logger .info ("Disabling chunked prefill for model without KVCache" )
143143 vllm_config .scheduler_config .chunked_prefill_enabled = False
144144
145+ scheduler_block_size = (
146+ vllm_config .cache_config .block_size
147+ * vllm_config .parallel_config .decode_context_parallel_size
148+ )
149+
145150 self .scheduler : SchedulerInterface = Scheduler (
146151 vllm_config = vllm_config ,
147152 kv_cache_config = kv_cache_config ,
148153 structured_output_manager = self .structured_output_manager ,
149154 include_finished_set = vllm_config .parallel_config .data_parallel_size > 1 ,
150155 log_stats = self .log_stats ,
156+ block_size = scheduler_block_size ,
151157 )
152158 self .use_spec_decode = vllm_config .speculative_config is not None
153159 if self .scheduler .connector is not None : # type: ignore
@@ -177,14 +183,13 @@ def __init__(
177183 self .vllm_config .cache_config .enable_prefix_caching
178184 or self .scheduler .get_kv_connector () is not None
179185 ):
180- block_size = vllm_config .cache_config .block_size
181186 caching_hash_fn = get_hash_fn_by_name (
182187 vllm_config .cache_config .prefix_caching_hash_algo
183188 )
184189 init_none_hash (caching_hash_fn )
185190
186191 self .request_block_hasher = get_request_block_hasher (
187- block_size , caching_hash_fn
192+ scheduler_block_size , caching_hash_fn
188193 )
189194
190195 self .step_fn = (
0 commit comments