@@ -172,6 +172,7 @@ def __init__(self, *args, **kwargs):
172172 self .expert_map_path = ascend_config .expert_map_path
173173 self .global_redundant_expert_num = ascend_config .init_redundancy_expert
174174 self .global_num_experts = num_experts + self .global_redundant_expert_num
175+ init_eplb_enable = False
175176 if self .custom_routing_function is None and self .e_score_correction_bias is not None :
176177 vllm_config = get_current_vllm_config ()
177178 self .e_score_correction_bias .data = self .e_score_correction_bias .data .to (
@@ -191,6 +192,7 @@ def __init__(self, *args, **kwargs):
191192 self .moe_instance_id , self .ep_rank ))
192193 self .log2phy = self .expert_load_balancer .get_rank_log2phy_map (
193194 self .moe_instance_id , self .ep_rank ).npu ()
195+ init_eplb_enable = True
194196 except Exception as e :
195197 logger .warning (
196198 f"Init expert map of mtp/eagle when using sample.{ e } " )
@@ -236,8 +238,7 @@ def __init__(self, *args, **kwargs):
236238 self .moe_load = torch .zeros (local_num_experts ,
237239 dtype = torch .int64 ).npu ()
238240
239- eplb_enable = self .dynamic_eplb or (self .expert_map_path is not None )
240- if eplb_enable and (not hasattr (self .quant_method , "quant_method" ) or
241+ if init_eplb_enable and (not hasattr (self .quant_method , "quant_method" ) or
241242 not isinstance (self .quant_method .quant_method ,
242243 AscendW8A8DynamicFusedMoEMethod )):
243244 raise ValueError ("Eplb supports only w8a8_dynamic quantization." )
0 commit comments