[None][fix] Update the attention layers counting for Qwen3-next.

nv-guomingz · nv-guomingz · commit a1b0b402e659 · 2025-11-13T01:09:09.000Z
Signed-off-by: nv-guomingz &lt;137257613+nv-guomingz@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py
@@ -642,5 +642,12 @@ def get_layer_types(self) -> Optional[List[LayerTypeCpp]]:
     def get_num_attention_layers(self):
         if is_nemotron_hybrid(self.pretrained_config):
             return self.pretrained_config.hybrid_override_pattern.count("*")
+        elif hasattr(self.pretrained_config, "architectures"
+                     ) and self.pretrained_config.architectures[0] in [
+                         "Qwen3NextForCausalLM"
+                     ]:
+            # Qwen3NextForCausalLM has hybrid attention pattern(1:3 full attention:linear attention),
+            # we need to calculate the number of fullattention layers
+            return self.pretrained_config.num_hidden_layers // self.pretrained_config.full_attention_interval
         else:
             return self.pretrained_config.num_hidden_layers