From 10518b8066d640e6ab4e201517a7c6b0181d35ff Mon Sep 17 00:00:00 2001 From: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> Date: Tue, 11 Nov 2025 11:47:16 +0000 Subject: [PATCH] [None][fix] Update the attention layers counting for Qwen3-next. Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> --- tensorrt_llm/_torch/model_config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py index ca956dc53cf..b7e42fc09b0 100644 --- a/tensorrt_llm/_torch/model_config.py +++ b/tensorrt_llm/_torch/model_config.py @@ -605,5 +605,12 @@ def get_layer_types(self) -> Optional[List[LayerTypeCpp]]: def get_num_attention_layers(self): if is_nemotron_hybrid(self.pretrained_config): return self.pretrained_config.hybrid_override_pattern.count("*") + elif hasattr( + self.pretrained_config, "architectures" + ) and self.pretrained_config.architectures is not None and self.pretrained_config.architectures[ + 0] in ["Qwen3NextForCausalLM"]: + # Qwen3NextForCausalLM has hybrid attention pattern(1:3 full attention:linear attention), + # we need to calculate the number of fullattention layers + return self.pretrained_config.num_hidden_layers // self.pretrained_config.full_attention_interval else: return self.pretrained_config.num_hidden_layers