Skip to content

Commit 2fa3945

Browse files
offline893offline0806gemini-code-assist[bot]
authored
[Bugfix]Fix eplb enable when using mtp float weights. (#4571)
### What this PR does / why we need it? Fix eplb enable when using mtp float weights. It will be remove when eplb supporting mtp and float weights. ### How was this patch tested? Deepseek-V3 + MTP + EPLB in A3. - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2 --------- Signed-off-by: offline0806 <3337230449@qq.com> Signed-off-by: offline893 <158537145+offline893@users.noreply.github.com> Co-authored-by: offline0806 <3337230449@qq.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 71e9b37 commit 2fa3945

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

vllm_ascend/ops/fused_moe/fused_moe.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ def __init__(self, *args, **kwargs):
184184
# init moe.
185185
self.local_num_experts, self.expert_map, _ = determine_expert_map(
186186
self.ep_size, self.ep_rank, self.global_num_experts)
187+
# TODO: Temporary flag to indicate if static EPLB is enabled. This is a
188+
# workaround to bypass a quantization check that fails with float weights.
189+
init_eplb_enable = False
187190
# static eplb initializing with expert_map_path
188191
if self.expert_map_path and os.path.exists(
189192
self.expert_map_path) and os.access(self.expert_map_path,
@@ -200,6 +203,7 @@ def __init__(self, *args, **kwargs):
200203
self.moe_instance_id, self.ep_rank))
201204
self.log2phy = self.expert_load_balancer.get_rank_log2phy_map(
202205
self.moe_instance_id, self.ep_rank).npu()
206+
init_eplb_enable = True
203207
except Exception as e:
204208
logger.warning(
205209
f"Init expert map of mtp/eagle when using sample.{e}")
@@ -225,10 +229,10 @@ def __init__(self, *args, **kwargs):
225229
self.moe_load = torch.zeros(local_num_experts,
226230
dtype=torch.int64).npu()
227231

228-
eplb_enable = self.dynamic_eplb or (self.expert_map_path is not None)
229-
if eplb_enable and (not hasattr(self.quant_method, "quant_method") or
230-
not isinstance(self.quant_method.quant_method,
231-
AscendW8A8DynamicFusedMoEMethod)):
232+
if init_eplb_enable and (
233+
not hasattr(self.quant_method, "quant_method")
234+
or not isinstance(self.quant_method.quant_method,
235+
AscendW8A8DynamicFusedMoEMethod)):
232236
raise ValueError("Eplb supports only w8a8_dynamic quantization.")
233237

234238
self.moe_config.num_experts = self.global_num_experts

0 commit comments

Comments
 (0)