@@ -272,7 +272,7 @@ def test_speculators_model_integration(
272272
273273
274274@pytest .mark .parametrize (
275- ["model_setup" , "mm_enabled" , "chunked_prefill_enabled " ],
275+ ["model_setup" , "mm_enabled" , "enable_chunked_prefill " ],
276276 [
277277 (("eagle3" , "Qwen/Qwen3-8B" , "AngelSlim/Qwen3-8B_eagle3" , 1 ), False , False ),
278278 pytest .param (
@@ -358,7 +358,7 @@ def test_eagle_correctness(
358358 sampling_config : SamplingParams ,
359359 model_setup : tuple [str , str , str , int ],
360360 mm_enabled : bool ,
361- chunked_prefill_enabled : bool ,
361+ enable_chunked_prefill : bool ,
362362 attn_backend : str ,
363363):
364364 if attn_backend == "TREE_ATTN" :
@@ -396,9 +396,7 @@ def test_eagle_correctness(
396396
397397 method , model_name , spec_model_name , tp_size = model_setup
398398 max_model_len = 2048
399- max_num_batched_tokens = max_model_len
400- if chunked_prefill_enabled :
401- max_num_batched_tokens = 128
399+ max_num_batched_tokens = 128 if enable_chunked_prefill else max_model_len
402400
403401 ref_llm = LLM (
404402 model = model_name , max_model_len = max_model_len , tensor_parallel_size = tp_size
@@ -420,7 +418,7 @@ def test_eagle_correctness(
420418 },
421419 max_model_len = max_model_len ,
422420 max_num_batched_tokens = max_num_batched_tokens ,
423- enable_chunked_prefill = chunked_prefill_enabled ,
421+ enable_chunked_prefill = enable_chunked_prefill ,
424422 )
425423 spec_outputs = spec_llm .chat (test_prompts , sampling_config )
426424 matches = 0
0 commit comments