Skip to content

Commit 3a773e2

Browse files
committed
fix mla ut
Signed-off-by: wangli <wangli858794774@gmail.com>
1 parent 9d7da91 commit 3a773e2

File tree

2 files changed

+33
-28
lines changed

2 files changed

+33
-28
lines changed

tests/ut/torchair/test_torchair_mla.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -180,18 +180,19 @@ def test_ascend_mla_metadata_default(self):
180180
class TestAscendMLATorchairMetadataBuilder(TestBase):
181181

182182
def test_ascend_mla_metadata_builder_default(self):
183-
mock_vllm_config = MagicMock()
184-
mock_vllm_config.model_config.max_model_len = 1024
185-
mock_vllm_config.model_config.get_head_size.return_value = 64
186-
mock_vllm_config.model_config.dtype = torch.float16
187-
mock_vllm_config.cache_config.block_size = 16
188-
mock_vllm_config.get_head_size = lambda: 8
189-
mock_vllm_config.scheduler_config.max_num_seqs = 4
190-
mock_vllm_config.scheduler_config.chunked_prefill_enabled = False
191-
mock_device = torch.device('cpu')
183+
mock_model_config = MagicMock()
184+
mock_model_config.max_model_len = 1024
185+
mock_model_config.get_head_size.return_value = 64
186+
mock_model_config.dtype = torch.float16
192187

188+
mock_vllm_config = MagicMock()
189+
mock_vllm_config.model_config = mock_model_config
190+
mock_vllm_config.cache_config = MagicMock(block_size=16)
191+
mock_vllm_config.scheduler_config = MagicMock(
192+
max_num_seqs=4, enable_chunked_prefill=False)
193193
mock_vllm_config.speculative_config = None
194194

195+
mock_device = torch.device('cpu')
195196
ascend_config = MagicMock()
196197
ascend_config.torchair_graph_config = MagicMock()
197198
ascend_config.torchair_graph_config.enabled = True
@@ -205,23 +206,25 @@ def test_ascend_mla_metadata_builder_default(self):
205206
mock_vllm_config.cache_config.block_size)
206207
self.assertEqual(
207208
builder.chunked_prefill_enabled,
208-
mock_vllm_config.scheduler_config.chunked_prefill_enabled)
209+
mock_vllm_config.scheduler_config.enable_chunked_prefill)
209210
self.assertEqual(builder.torchair_graph_enabled, True)
210211

211212
@patch("vllm_ascend.torchair.torchair_mla.get_ascend_config")
212213
def test_reorder_batch_with_torchair_graph(self, ascend_config):
213-
mock_vllm_config = MagicMock()
214-
mock_vllm_config.model_config.max_model_len = 1024
215-
mock_vllm_config.get_head_size = lambda: 8
216-
mock_vllm_config.cache_config.block_size = 16
217-
mock_vllm_config.scheduler_config.max_num_seqs = 4
218-
mock_vllm_config.scheduler_config.chunked_prefill_enabled = False
219-
mock_device = torch.device('cpu')
220-
ascend_config.torchair_graph_config = MagicMock()
221-
ascend_config.torchair_graph_config.enabled = True
214+
mock_model_config = MagicMock()
215+
mock_model_config.max_model_len = 1024
216+
mock_model_config.get_head_size.return_value = 64
217+
mock_model_config.dtype = torch.float16
222218

219+
mock_vllm_config = MagicMock()
220+
mock_vllm_config.model_config = mock_model_config
221+
mock_vllm_config.cache_config = MagicMock(block_size=16)
222+
mock_vllm_config.scheduler_config = MagicMock(
223+
max_num_seqs=4, enable_chunked_prefill=False)
223224
mock_vllm_config.speculative_config = None
224225

226+
mock_device = torch.device('cpu')
227+
225228
builder = AscendMLATorchairMetadataBuilder(None, None,
226229
mock_vllm_config,
227230
mock_device)
@@ -255,7 +258,7 @@ def test_reorder_batch_without_torchair_graph(self):
255258
mock_vllm_config.get_head_size = lambda: 8
256259
mock_vllm_config.cache_config.block_size = 16
257260
mock_vllm_config.scheduler_config.max_num_seqs = 4
258-
mock_vllm_config.scheduler_config.chunked_prefill_enabled = False
261+
mock_vllm_config.scheduler_config.enable_chunked_prefill = False
259262
mock_device = torch.device('cpu')
260263

261264
mock_vllm_config.speculative_config = None
@@ -293,7 +296,7 @@ def test_get_graph_runner_block_tables_normal(self, mock_ascend_config):
293296
mock_vllm_config = MagicMock()
294297
mock_vllm_config.model_config.max_model_len = 1024
295298
mock_vllm_config.cache_config.block_size = 16
296-
mock_vllm_config.scheduler_config.chunked_prefill_enabled = False
299+
mock_vllm_config.scheduler_config.enable_chunked_prefill = False
297300
mock_device = torch.device('cpu')
298301

299302
mock_vllm_config.speculative_config = None
@@ -316,7 +319,7 @@ def test_get_graph_runner_block_tables_truncated(self, mock_ascend_config):
316319
mock_vllm_config = MagicMock()
317320
mock_vllm_config.model_config.max_model_len = 64
318321
mock_vllm_config.cache_config.block_size = 16
319-
mock_vllm_config.scheduler_config.chunked_prefill_enabled = False
322+
mock_vllm_config.scheduler_config.enable_chunked_prefill = False
320323
mock_device = torch.device('cpu')
321324

322325
mock_vllm_config.speculative_config = None
@@ -342,7 +345,7 @@ def test_get_graph_runner_block_tables_from_numpy(self,
342345
mock_vllm_config.cache_config.block_size = 16
343346
mock_vllm_config.get_head_size = lambda: 28
344347
mock_vllm_config.dtype = torch.bfloat16
345-
mock_vllm_config.scheduler_config.chunked_prefill_enabled = False
348+
mock_vllm_config.scheduler_config.enable_chunked_prefill = False
346349
mock_device = torch.device('cpu')
347350

348351
mock_vllm_config.speculative_config = None
@@ -368,7 +371,7 @@ def test_build_dummy(self, mock_ascend_config):
368371
mock_vllm_config = MagicMock()
369372
mock_vllm_config.model_config.max_model_len = 1024
370373
mock_vllm_config.cache_config.block_size = 16
371-
mock_vllm_config.scheduler_config.chunked_prefill_enabled = False
374+
mock_vllm_config.scheduler_config.enable_chunked_prefill = False
372375
mock_vllm_config.get_head_size.return_value = 64
373376
mock_vllm_config.model_config.dtype = torch.float16
374377
mock_device = torch.device('cpu')
@@ -435,7 +438,7 @@ def test_build_decode(self, mock_ascend_config):
435438
mock_vllm_config = MagicMock()
436439
mock_vllm_config.model_config.max_model_len = 1024
437440
mock_vllm_config.cache_config.block_size = 16
438-
mock_vllm_config.scheduler_config.chunked_prefill_enabled = False
441+
mock_vllm_config.scheduler_config.enable_chunked_prefill = False
439442
mock_vllm_config.get_head_size.return_value = 64
440443
mock_vllm_config.model_config.dtype = torch.float16
441444
mock_device = torch.device('cpu')

vllm_ascend/utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -471,9 +471,11 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
471471

472472
# Calculate parallel configuration factor
473473
if not vllm_config.model_config:
474-
logger.warning("Got empty model config, This occurs in scenarios \
475-
where an empty config needs to be initialized, eg: unit tests, \
476-
where updates are skipped.")
474+
logger.warning(
475+
"Got empty model config. This typically occurs when an empty vllm_config is "
476+
"initialized (e.g., in unit tests), where config updates are intentionally skipped."
477+
)
478+
477479
return
478480
hf_config = vllm_config.model_config.hf_config
479481
if hasattr(hf_config, 'num_hidden_layers'):

0 commit comments

Comments
 (0)