[bugfix][DCP] fix block_size of hash in DCP prefix caching (vllm-project#26296)

heheda12345 · web-flow · commit 606b00e80f32 · 2025-10-10T03:02:49.000-07:00
Signed-off-by: Chen Zhang &lt;zhangch99@outlook.com&gt;
diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py
@@ -1411,6 +1411,7 @@ def create_scheduler_with_priority(
         kv_cache_config=kv_cache_config,
         log_stats=True,
         structured_output_manager=StructuredOutputManager(vllm_config),
+        block_size=block_size,
     )
 
 
diff --git a/tests/v1/core/utils.py b/tests/v1/core/utils.py
@@ -129,6 +129,7 @@ def create_scheduler(
     return scheduler_cls(
         vllm_config=vllm_config,
         kv_cache_config=kv_cache_config,
+        block_size=block_size,
         log_stats=True,
         structured_output_manager=StructuredOutputManager(vllm_config),
     )
diff --git a/tests/v1/kv_connector/unit/utils.py b/tests/v1/kv_connector/unit/utils.py
@@ -138,6 +138,7 @@ def create_scheduler(
         kv_cache_config=kv_cache_config,
         log_stats=True,
         structured_output_manager=StructuredOutputManager(vllm_config),
+        block_size=block_size,
     )
 
 
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
@@ -45,6 +45,7 @@ def __init__(
         vllm_config: VllmConfig,
         kv_cache_config: KVCacheConfig,
         structured_output_manager: StructuredOutputManager,
+        block_size: int,
         mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
         include_finished_set: bool = False,
         log_stats: bool = False,
@@ -101,15 +102,8 @@ def __init__(
         num_gpu_blocks = self.cache_config.num_gpu_blocks
         assert num_gpu_blocks is not None and num_gpu_blocks > 0
 
-        self.block_size = self.cache_config.block_size
-
+        self.block_size = block_size
         self.dcp_world_size = vllm_config.parallel_config.decode_context_parallel_size
-        # Note(hc): The scheduler’s block_size must be multiplied
-        # by dcp_world_size, since block hashes are computed on the
-        # original full token sequence at a granularity of
-        # original_block_size × dcp_world_size.
-        if self.dcp_world_size > 1:
-            self.block_size *= self.dcp_world_size
 
         # req_id -> Request
         self.requests: dict[str, Request] = {}
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
@@ -142,12 +142,18 @@ def __init__(
             logger.info("Disabling chunked prefill for model without KVCache")
             vllm_config.scheduler_config.chunked_prefill_enabled = False
 
+        scheduler_block_size = (
+            vllm_config.cache_config.block_size
+            * vllm_config.parallel_config.decode_context_parallel_size
+        )
+
         self.scheduler: SchedulerInterface = Scheduler(
             vllm_config=vllm_config,
             kv_cache_config=kv_cache_config,
             structured_output_manager=self.structured_output_manager,
             include_finished_set=vllm_config.parallel_config.data_parallel_size > 1,
             log_stats=self.log_stats,
+            block_size=scheduler_block_size,
         )
         self.use_spec_decode = vllm_config.speculative_config is not None
         if self.scheduler.connector is not None:  # type: ignore
@@ -177,14 +183,13 @@ def __init__(
             self.vllm_config.cache_config.enable_prefix_caching
             or self.scheduler.get_kv_connector() is not None
         ):
-            block_size = vllm_config.cache_config.block_size
             caching_hash_fn = get_hash_fn_by_name(
                 vllm_config.cache_config.prefix_caching_hash_algo
             )
             init_none_hash(caching_hash_fn)
 
             self.request_block_hasher = get_request_block_hasher(
-                block_size, caching_hash_fn
+                scheduler_block_size, caching_hash_fn
             )
 
         self.step_fn = (

Original file line number	Diff line number	Diff line change
`@@ -1411,6 +1411,7 @@ def create_scheduler_with_priority(`
`1411`	`1411`	`kv_cache_config=kv_cache_config,`
`1412`	`1412`	`log_stats=True,`
`1413`	`1413`	`structured_output_manager=StructuredOutputManager(vllm_config),`
	`1414`	`+ block_size=block_size,`
`1414`	`1415`	`)`
`1415`	`1416`
`1416`	`1417`
Original file line number	Diff line number	Diff line change
`@@ -129,6 +129,7 @@ def create_scheduler(`
`129`	`129`	`return scheduler_cls(`
`130`	`130`	`vllm_config=vllm_config,`
`131`	`131`	`kv_cache_config=kv_cache_config,`
	`132`	`+ block_size=block_size,`
`132`	`133`	`log_stats=True,`
`133`	`134`	`structured_output_manager=StructuredOutputManager(vllm_config),`
`134`	`135`	`)`
Original file line number	Diff line number	Diff line change
`@@ -138,6 +138,7 @@ def create_scheduler(`
`138`	`138`	`kv_cache_config=kv_cache_config,`
`139`	`139`	`log_stats=True,`
`140`	`140`	`structured_output_manager=StructuredOutputManager(vllm_config),`
	`141`	`+ block_size=block_size,`
`141`	`142`	`)`
`142`	`143`
`143`	`144`