We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 264d38e commit 98c088cCopy full SHA for 98c088c
tensorrt_llm/_torch/model_config.py
@@ -642,5 +642,11 @@ def get_layer_types(self) -> Optional[List[LayerTypeCpp]]:
642
def get_num_attention_layers(self):
643
if is_nemotron_hybrid(self.pretrained_config):
644
return self.pretrained_config.hybrid_override_pattern.count("*")
645
+ elif self.pretrained_config.architectures[0] in [
646
+ "Qwen3NextForCausalLM"
647
+ ]:
648
+ # Qwen3NextForCausalLM has hybrid attention pattern(1:3 full attention:linear attention),
649
+ # we need to calculate the number of fullattention layers
650
+ return self.pretrained_config.num_hidden_layers // self.pretrained_config.full_attention_interval
651
else:
652
return self.pretrained_config.num_hidden_layers
0 commit comments