From 10518b8066d640e6ab4e201517a7c6b0181d35ff Mon Sep 17 00:00:00 2001
From: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
Date: Tue, 11 Nov 2025 11:47:16 +0000
Subject: [PATCH] [None][fix] Update the attention layers counting for
 Qwen3-next.

Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com>
---
 tensorrt_llm/_torch/model_config.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py
index ca956dc53cf..b7e42fc09b0 100644
--- a/tensorrt_llm/_torch/model_config.py
+++ b/tensorrt_llm/_torch/model_config.py
@@ -605,5 +605,12 @@ def get_layer_types(self) -> Optional[List[LayerTypeCpp]]:
     def get_num_attention_layers(self):
         if is_nemotron_hybrid(self.pretrained_config):
             return self.pretrained_config.hybrid_override_pattern.count("*")
+        elif hasattr(
+                self.pretrained_config, "architectures"
+        ) and self.pretrained_config.architectures is not None and self.pretrained_config.architectures[
+                0] in ["Qwen3NextForCausalLM"]:
+            # Qwen3NextForCausalLM has hybrid attention pattern(1:3 full attention:linear attention),
+            # we need to calculate the number of fullattention layers
+            return self.pretrained_config.num_hidden_layers // self.pretrained_config.full_attention_interval
         else:
             return self.pretrained_config.num_hidden_layers