|
136 | 136 | Qwen2Model, |
137 | 137 | Qwen2RMSNorm, |
138 | 138 | ) |
| 139 | +from transformers.models.qwen3_moe.modeling_qwen3_moe import ( |
| 140 | + Qwen3MoeAttention, |
| 141 | + Qwen3MoeDecoderLayer, |
| 142 | + Qwen3MoeForCausalLM, |
| 143 | + Qwen3MoeModel, |
| 144 | + Qwen3MoeRMSNorm, |
| 145 | + Qwen3MoeRotaryEmbedding, |
| 146 | + Qwen3MoeSparseMoeBlock, |
| 147 | +) |
139 | 148 | from transformers.models.starcoder2.modeling_starcoder2 import ( |
140 | 149 | Starcoder2Attention, |
141 | 150 | Starcoder2DecoderLayer, |
|
303 | 312 | QEffQwen2ForCausalLM, |
304 | 313 | QEffQwen2Model, |
305 | 314 | ) |
| 315 | +from QEfficient.transformers.models.qwen3_moe.modeling_qwen3_moe import ( |
| 316 | + QEffQwen3MoeAttention, |
| 317 | + QEffQwen3MoeDecoderLayer, |
| 318 | + QEffQwen3MoeForCausalLM, |
| 319 | + QEffQwen3MoeModel, |
| 320 | + QEffQwen3MoeRotaryEmbedding, |
| 321 | + QEffQwen3MoeSparseMoeBlock, |
| 322 | +) |
306 | 323 | from QEfficient.transformers.models.starcoder2.modeling_starcoder2 import ( |
307 | 324 | QEffStarcoder2Attention, |
308 | 325 | QEFFStarcoder2DecoderLayer, |
@@ -338,6 +355,7 @@ class CustomOpsTransform(ModuleMappingTransform): |
338 | 355 | MllamaTextRMSNorm: CustomRMSNormAIC, |
339 | 356 | GraniteRMSNorm: CustomRMSNormAIC, |
340 | 357 | GraniteMoeRMSNorm: CustomRMSNormAIC, |
| 358 | + Qwen3MoeRMSNorm: CustomRMSNormAIC, |
341 | 359 | Gemma3RMSNorm: QEffGemma3CustomRMSNormAIC, |
342 | 360 | } |
343 | 361 |
|
@@ -388,6 +406,13 @@ class KVCacheTransform(ModuleMappingTransform): |
388 | 406 | GemmaDecoderLayer: QEffGemmaDecoderLayer, |
389 | 407 | GemmaModel: QEffGemmaModel, |
390 | 408 | GemmaForCausalLM: QEffGemmaForCausalLM, |
| 409 | + # Qwen3Moe |
| 410 | + Qwen3MoeForCausalLM: QEffQwen3MoeForCausalLM, |
| 411 | + Qwen3MoeModel: QEffQwen3MoeModel, |
| 412 | + Qwen3MoeDecoderLayer: QEffQwen3MoeDecoderLayer, |
| 413 | + Qwen3MoeAttention: QEffQwen3MoeAttention, |
| 414 | + Qwen3MoeRotaryEmbedding: QEffQwen3MoeRotaryEmbedding, |
| 415 | + Qwen3MoeSparseMoeBlock: QEffQwen3MoeSparseMoeBlock, |
391 | 416 | # Gemma2 |
392 | 417 | Gemma2Attention: QEffGemma2Attention, |
393 | 418 | Gemma2DecoderLayer: QEffGemma2DecoderLayer, |
|
0 commit comments