refactor: optimize phase detection logic in qwen3 moe forward pass.

yingxudeng · yingxudeng · commit 35234ccc5309 · 2025-12-02T18:05:32.000+08:00
diff --git a/xllm/core/layers/npu/npu_qwen3_moe_decoder_layer_impl.cpp b/xllm/core/layers/npu/npu_qwen3_moe_decoder_layer_impl.cpp
@@ -896,9 +896,7 @@ torch::Tensor NpuQwen3MoeDecoderLayerImpl::forward(
     std::atomic<bool>* event_flag,
     int node_id) {
   atb::Status st;
-  bool is_prefill = input_params.decode_seq_range.second !=
-                    input_params.q_seq_lens.size(0) - 1;
-  if (is_prefill) {
+  if (!input_params.batch_forward_type.is_decode()) {
     build_node_variant_pack(prefill_node_,
                             x,
                             cos_pos,