[None][fix] Update the attention layers counting for Qwen3-next.

nv-guomingz · nv-guomingz · commit 98c088c44864 · 2025-11-12T23:19:36.000+08:00
Signed-off-by: nv-guomingz &lt;137257613+nv-guomingz@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py
@@ -642,5 +642,11 @@ def get_layer_types(self) -> Optional[List[LayerTypeCpp]]:
     def get_num_attention_layers(self):
         if is_nemotron_hybrid(self.pretrained_config):
             return self.pretrained_config.hybrid_override_pattern.count("*")
+        elif self.pretrained_config.architectures[0] in [
+                "Qwen3NextForCausalLM"
+        ]:
+            # Qwen3NextForCausalLM has hybrid attention pattern(1:3 full attention:linear attention),
+            # we need to calculate the number of fullattention layers
+            return self.pretrained_config.num_hidden_layers // self.pretrained_config.full_attention_interval
         else:
             return self.pretrained_config.num_hidden_layers