Skip to content

Commit 638e419

Browse files
[Misc] Make SchedulerConfig.max_model_len init-only (#28733)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent 1ec978c commit 638e419

File tree

17 files changed

+22
-45
lines changed

17 files changed

+22
-45
lines changed

tests/kernels/moe/test_batched_moe.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@
4040
TOP_KS = [1, 2, 6]
4141

4242
vllm_config = VllmConfig()
43-
vllm_config.scheduler_config.max_num_seqs = 128
44-
vllm_config.scheduler_config.max_model_len = 8192
4543

4644

4745
@dataclass

tests/kernels/moe/test_block_fp8.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@
3333
pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)
3434

3535
vllm_config = VllmConfig()
36-
vllm_config.scheduler_config.max_num_seqs = 128
37-
vllm_config.scheduler_config.max_model_len = 8192
3836

3937
# Test configurations
4038
DTYPES = [torch.bfloat16] # [torch.half, torch.bfloat16, torch.float32]

tests/kernels/moe/test_block_int8.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
pytest.skip("INT8 Triton requires CUDA 7.0 or higher", allow_module_level=True)
1919

2020
vllm_config = VllmConfig()
21-
vllm_config.scheduler_config.max_num_seqs = 128
22-
vllm_config.scheduler_config.max_model_len = 8192
2321

2422
DTYPES = [torch.bfloat16]
2523

tests/kernels/moe/test_cutlass_moe.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@
4242
]
4343

4444
vllm_config = VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
45-
vllm_config.scheduler_config.max_num_seqs = 128
46-
vllm_config.scheduler_config.max_model_len = 8192
4745

4846

4947
@dataclasses.dataclass

tests/kernels/moe/test_flashinfer.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@
4545
]
4646

4747
vllm_config = VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
48-
vllm_config.scheduler_config.max_num_seqs = 128
49-
vllm_config.scheduler_config.max_model_len = 8192
5048

5149

5250
def quant_fp8_per_tensor_batches(a):

tests/kernels/moe/test_moe.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,6 @@
8181
]
8282

8383
vllm_config = VllmConfig()
84-
vllm_config.scheduler_config.max_num_seqs = 128
85-
vllm_config.scheduler_config.max_model_len = 8192
8684

8785

8886
def run_moe_test(

tests/kernels/moe/test_pplx_cutlass_moe.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,6 @@ def pplx_cutlass_moe(
192192

193193

194194
vllm_config = VllmConfig()
195-
vllm_config.scheduler_config.max_num_seqs = 128
196-
vllm_config.scheduler_config.max_model_len = 8192
197195

198196

199197
def _pplx_moe(

tests/kernels/moe/test_pplx_moe.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,6 @@
8181
DTYPES = [torch.float8_e4m3fn, torch.bfloat16]
8282

8383
vllm_config = VllmConfig()
84-
vllm_config.scheduler_config.max_num_seqs = 128
85-
vllm_config.scheduler_config.max_model_len = 8192
8684

8785

8886
def torch_prepare(

tests/kernels/moe/test_triton_moe_ptpc_fp8.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)
1919

2020
vllm_config = VllmConfig()
21-
vllm_config.scheduler_config.max_num_seqs = 128
22-
vllm_config.scheduler_config.max_model_len = 8192
2321

2422

2523
def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16):

tests/kernels/quantization/test_block_fp8.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@
2929
pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)
3030

3131
vllm_config = VllmConfig()
32-
vllm_config.scheduler_config.max_num_seqs = 128
33-
vllm_config.scheduler_config.max_model_len = 8192
3432

3533
# Test configurations
3634
DTYPES = [torch.bfloat16] # [torch.half, torch.bfloat16, torch.float32]

0 commit comments

Comments
 (0)