Skip to content

Commit 2e6fd83

Browse files
Merge pull request #2635 from AI-Hypercomputer:shuningjin-ckpt-fix
PiperOrigin-RevId: 830746026
2 parents 025a0f6 + bc16288 commit 2e6fd83

File tree

4 files changed

+24
-5
lines changed

4 files changed

+24
-5
lines changed

src/MaxText/utils/ckpt_scripts/convert_deepseek_family_ckpt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242
from MaxText import max_logging
4343
from MaxText.inference_utils import str2bool
44-
from MaxText import llama_or_mistral_ckpt
44+
from MaxText.utils.ckpt_scripts import llama_or_mistral_ckpt
4545

4646

4747
MODEL_PARAMS_DICT = {

src/MaxText/utils/ckpt_scripts/convert_deepseek_family_unscanned_ckpt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from tqdm import tqdm
3737

3838
from MaxText.utils.ckpt_scripts import convert_deepseek_family_ckpt as ds_ckpt
39-
from MaxText import llama_or_mistral_ckpt
39+
from MaxText.utils.ckpt_scripts import llama_or_mistral_ckpt
4040
from MaxText import max_logging
4141
from MaxText.inference_utils import str2bool
4242
from safetensors import safe_open

src/MaxText/utils/ckpt_scripts/convert_qwen3_moe.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@
3232
from safetensors import safe_open
3333
from tqdm import tqdm
3434

35-
from MaxText import llama_or_mistral_ckpt, max_logging
35+
from MaxText import max_logging
3636
from MaxText.inference_utils import str2bool
37+
from MaxText.utils.ckpt_scripts import llama_or_mistral_ckpt
3738

3839
# Static model parameters dictionary
3940
MODEL_PARAMS_DICT = {
@@ -45,7 +46,25 @@
4546
"head_dim": 128,
4647
"num_experts": 128,
4748
"moe_intermediate_size": 1536,
48-
}
49+
},
50+
"qwen3-30b-a3b": {
51+
"num_hidden_layers": 48,
52+
"num_attention_heads": 32,
53+
"num_key_value_heads": 4,
54+
"hidden_size": 2048,
55+
"head_dim": 128,
56+
"num_experts": 128,
57+
"moe_intermediate_size": 768,
58+
},
59+
"qwen3-480b-a35b": {
60+
"num_hidden_layers": 62,
61+
"num_attention_heads": 96,
62+
"num_key_value_heads": 8,
63+
"hidden_size": 6144,
64+
"head_dim": 128,
65+
"num_experts": 160,
66+
"moe_intermediate_size": 2560,
67+
},
4968
}
5069

5170

src/MaxText/utils/ckpt_scripts/llama_mistral_mixtral_orbax_to_hf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
from transformers import LlamaForCausalLM, MistralForCausalLM, AutoModelForCausalLM, AutoConfig
4949

5050
from MaxText import checkpointing
51-
from MaxText import llama_or_mistral_ckpt
51+
from MaxText.utils.ckpt_scripts import llama_or_mistral_ckpt
5252
from MaxText import max_logging
5353
from MaxText import maxtext_utils
5454
from MaxText import pyconfig

0 commit comments

Comments
 (0)