File tree Expand file tree Collapse file tree 3 files changed +563
-2
lines changed
backends/gaudi/server/text_generation_server/models Expand file tree Collapse file tree 3 files changed +563
-2
lines changed Original file line number Diff line number Diff line change 104104 from text_generation_server .models .custom_modeling .flash_qwen3_modeling import (
105105 Qwen3ForCausalLM ,
106106 )
107+ from text_generation_server .models .custom_modeling .flash_qwen3_moe_modeling import (
108+ Qwen3MoeForCausalLM ,
109+ )
107110 from text_generation_server .models .custom_modeling .flash_mistral_modeling import (
108111 FlashMistralForCausalLM ,
109112 )
@@ -292,7 +295,11 @@ class ModelType(enum.Enum):
292295 "name" : "Qwen 3" ,
293296 "url" : "https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f" ,
294297 }
295-
298+ QWEN3_MOE = {
299+ "type" : "qwen3_moe" ,
300+ "name" : "Qwen 3 Moe" ,
301+ "url" : "https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f" ,
302+ }
296303 GALACTICA = {
297304 "type" : "galactica" ,
298305 "name" : "Galactica" ,
@@ -808,6 +815,18 @@ def get_model(
808815 trust_remote_code = trust_remote_code ,
809816 lora_adapter_ids = lora_adapter_ids ,
810817 )
818+ elif model_type == QWEN3_MOE :
819+ return FlashCausalLM (
820+ model_id = model_id ,
821+ model_class = Qwen3MoeForCausalLM ,
822+ revision = revision ,
823+ quantize = quantize ,
824+ speculator = speculator ,
825+ dtype = dtype ,
826+ kv_cache_dtype = kv_cache_dtype ,
827+ trust_remote_code = trust_remote_code ,
828+ lora_adapter_ids = lora_adapter_ids ,
829+ )
811830 elif model_type == MLLAMA :
812831 return FlashMllamaCausalLM (
813832 model_id = model_id ,
You can’t perform that action at this time.
0 commit comments