Skip to content

Commit e2741f6

Browse files
[Chore] Rename SchedulerConfig.chunked_prefill_enabled (#28735)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent 6718755 commit e2741f6

File tree

9 files changed

+21
-19
lines changed

9 files changed

+21
-19
lines changed

tests/v1/core/test_scheduler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2282,7 +2282,6 @@ def _validate_chunked_prefill_settings_for_encoder_decoder(
22822282
) -> None:
22832283
"""Validate chunked prefill settings in the scheduler config for
22842284
encoder-decoder models."""
2285-
assert scheduler_config.chunked_prefill_enabled is expect_enabled
22862285
assert scheduler_config.enable_chunked_prefill is expect_enabled
22872286
if is_encoder_decoder:
22882287
# Encoder-decoder models should automatically disable chunked multimodal

tests/v1/e2e/test_spec_decode.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def test_speculators_model_integration(
272272

273273

274274
@pytest.mark.parametrize(
275-
["model_setup", "mm_enabled", "chunked_prefill_enabled"],
275+
["model_setup", "mm_enabled", "enable_chunked_prefill"],
276276
[
277277
(("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False, False),
278278
pytest.param(
@@ -358,7 +358,7 @@ def test_eagle_correctness(
358358
sampling_config: SamplingParams,
359359
model_setup: tuple[str, str, str, int],
360360
mm_enabled: bool,
361-
chunked_prefill_enabled: bool,
361+
enable_chunked_prefill: bool,
362362
attn_backend: str,
363363
):
364364
if attn_backend == "TREE_ATTN":
@@ -396,9 +396,7 @@ def test_eagle_correctness(
396396

397397
method, model_name, spec_model_name, tp_size = model_setup
398398
max_model_len = 2048
399-
max_num_batched_tokens = max_model_len
400-
if chunked_prefill_enabled:
401-
max_num_batched_tokens = 128
399+
max_num_batched_tokens = 128 if enable_chunked_prefill else max_model_len
402400

403401
ref_llm = LLM(
404402
model=model_name, max_model_len=max_model_len, tensor_parallel_size=tp_size
@@ -420,7 +418,7 @@ def test_eagle_correctness(
420418
},
421419
max_model_len=max_model_len,
422420
max_num_batched_tokens=max_num_batched_tokens,
423-
enable_chunked_prefill=chunked_prefill_enabled,
421+
enable_chunked_prefill=enable_chunked_prefill,
424422
)
425423
spec_outputs = spec_llm.chat(test_prompts, sampling_config)
426424
matches = 0

tests/v1/engine/test_engine_core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ def test_encoder_instance_zero_kv_cache(
571571
)
572572

573573
# Check 5: Verify chunked prefill is disabled
574-
assert not vllm_config.scheduler_config.chunked_prefill_enabled, (
574+
assert not vllm_config.scheduler_config.enable_chunked_prefill, (
575575
"Encoder instance should disable chunked prefill (no KV cache)"
576576
)
577577

vllm/config/scheduler.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from pydantic import Field, field_validator, model_validator
1010
from pydantic.dataclasses import dataclass
11-
from typing_extensions import Self
11+
from typing_extensions import Self, deprecated
1212

1313
from vllm.config.utils import config
1414
from vllm.logger import init_logger
@@ -233,6 +233,11 @@ def __post_init__(self, is_encoder_decoder: bool) -> None:
233233
)
234234

235235
@property
236+
@deprecated(
237+
"`SchedulerConfig.chunked_prefill_enabled` has been renamed to "
238+
"`SchedulerConfig.enable_chunked_prefill`. "
239+
"The old name will be removed in v0.12."
240+
)
236241
def chunked_prefill_enabled(self) -> bool:
237242
return self.enable_chunked_prefill
238243

@@ -244,7 +249,7 @@ def chunked_prefill_enabled(self, value: bool):
244249
def _verify_args(self) -> Self:
245250
if (
246251
self.max_num_batched_tokens < self.max_model_len
247-
and not self.chunked_prefill_enabled
252+
and not self.enable_chunked_prefill
248253
):
249254
raise ValueError(
250255
f"max_num_batched_tokens ({self.max_num_batched_tokens}) is "
@@ -271,7 +276,7 @@ def _verify_args(self) -> Self:
271276
)
272277

273278
if self.max_num_partial_prefills > 1:
274-
if not self.chunked_prefill_enabled:
279+
if not self.enable_chunked_prefill:
275280
raise ValueError(
276281
"Chunked prefill must be enabled to set "
277282
"max_num_partial_prefills > 1."

vllm/config/vllm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def __post_init__(self):
411411

412412
if (
413413
self.model_config is not None
414-
and self.scheduler_config.chunked_prefill_enabled
414+
and self.scheduler_config.enable_chunked_prefill
415415
and self.model_config.dtype == torch.float32
416416
and current_platform.get_device_capability() == (7, 5)
417417
):
@@ -584,7 +584,7 @@ def __post_init__(self):
584584
):
585585
for reason in disable_chunked_prefill_reasons:
586586
logger.info(reason)
587-
self.scheduler_config.chunked_prefill_enabled = False
587+
self.scheduler_config.enable_chunked_prefill = False
588588
self.scheduler_config.long_prefill_token_threshold = 0
589589

590590
if self.cache_config is not None:
@@ -1026,7 +1026,7 @@ def __str__(self):
10261026
f"seed={self.model_config.seed}, "
10271027
f"served_model_name={self.model_config.served_model_name}, "
10281028
f"enable_prefix_caching={self.cache_config.enable_prefix_caching}, "
1029-
f"chunked_prefill_enabled={self.scheduler_config.chunked_prefill_enabled}, " # noqa
1029+
f"enable_chunked_prefill={self.scheduler_config.enable_chunked_prefill}, " # noqa
10301030
f"pooler_config={self.model_config.pooler_config!r}, "
10311031
f"compilation_config={self.compilation_config!r}"
10321032
)

vllm/platforms/cpu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
192192

193193
scheduler_config = vllm_config.scheduler_config
194194
if (
195-
scheduler_config.chunked_prefill_enabled
195+
scheduler_config.enable_chunked_prefill
196196
or cache_config.enable_prefix_caching
197197
) and cache_config.cache_dtype != "auto":
198198
raise RuntimeError(

vllm/v1/core/sched/scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ def schedule(self) -> SchedulerOutput:
497497
# chunked prefill has to be enabled explicitly to allow
498498
# pooling requests to be chunked
499499
if (
500-
not self.scheduler_config.chunked_prefill_enabled
500+
not self.scheduler_config.enable_chunked_prefill
501501
and num_new_tokens > token_budget
502502
):
503503
self.waiting.pop_request()

vllm/v1/engine/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def __init__(
124124
# Encoder models without KV cache don't support
125125
# chunked prefill. But do SSM models?
126126
logger.info("Disabling chunked prefill for model without KVCache")
127-
vllm_config.scheduler_config.chunked_prefill_enabled = False
127+
vllm_config.scheduler_config.enable_chunked_prefill = False
128128

129129
scheduler_block_size = (
130130
vllm_config.cache_config.block_size

vllm/v1/worker/gpu_model_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2031,7 +2031,7 @@ def get_supported_pooling_tasks(self) -> list[PoolingTask]:
20312031

20322032
supported_tasks = list(model.pooler.get_supported_tasks())
20332033

2034-
if self.scheduler_config.chunked_prefill_enabled:
2034+
if self.scheduler_config.enable_chunked_prefill:
20352035
if "token_embed" in supported_tasks:
20362036
supported_tasks.remove("token_embed")
20372037
if "token_classify" in supported_tasks:
@@ -3825,7 +3825,7 @@ def _dummy_pooler_run(
38253825
supported_pooling_tasks = self.get_supported_pooling_tasks()
38263826

38273827
if not supported_pooling_tasks:
3828-
if self.scheduler_config.chunked_prefill_enabled:
3828+
if self.scheduler_config.enable_chunked_prefill:
38293829
raise RuntimeError(
38303830
f"Model {self.model_config.model} does not support "
38313831
"any pooling tasks with chunked prefill enabled. "

0 commit comments

Comments
 (0)