Skip to content

Commit 7ce57c8

Browse files
committed
added unit tests
1 parent 26e08a2 commit 7ce57c8

File tree

6 files changed

+869
-0
lines changed

6 files changed

+869
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"architectures": [
3+
"MistralForCausalLM"
4+
],
5+
"attention_dropout": 0.0,
6+
"bos_token_id": 1,
7+
"eos_token_id": 2,
8+
"pad_token_id": 11,
9+
"head_dim": 128,
10+
"hidden_act": "silu",
11+
"hidden_size": 5120,
12+
"initializer_range": 0.02,
13+
"intermediate_size": 32768,
14+
"max_position_embeddings": 131072,
15+
"model_type": "mistral",
16+
"num_attention_heads": 32,
17+
"num_hidden_layers": 40,
18+
"num_key_value_heads": 8,
19+
"rms_norm_eps": 1e-05,
20+
"rope_theta": 1000000000.0,
21+
"sliding_window": null,
22+
"tie_word_embeddings": false,
23+
"torch_dtype": "bfloat16",
24+
"transformers_version": "4.53.1",
25+
"use_cache": true,
26+
"vocab_size": 131072
27+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
{
2+
"architectures": [
3+
"DeepseekV3ForCausalLM"
4+
],
5+
"attention_bias": false,
6+
"attention_dropout": 0.0,
7+
"auto_map": {
8+
"AutoConfig": "configuration_deepseek.DeepseekV3Config",
9+
"AutoModel": "modeling_deepseek.DeepseekV3Model",
10+
"AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11+
},
12+
"aux_loss_alpha": 0.001,
13+
"bos_token_id": 163584,
14+
"eos_token_id": 163585,
15+
"first_k_dense_replace": 1,
16+
"hidden_act": "silu",
17+
"hidden_size": 7168,
18+
"initializer_range": 0.02,
19+
"intermediate_size": 18432,
20+
"kv_lora_rank": 512,
21+
"max_position_embeddings": 131072,
22+
"model_type": "kimi_k2",
23+
"moe_intermediate_size": 2048,
24+
"moe_layer_freq": 1,
25+
"n_group": 1,
26+
"n_routed_experts": 384,
27+
"n_shared_experts": 1,
28+
"norm_topk_prob": true,
29+
"num_attention_heads": 64,
30+
"num_experts_per_tok": 8,
31+
"num_hidden_layers": 61,
32+
"num_key_value_heads": 64,
33+
"num_nextn_predict_layers": 0,
34+
"pretraining_tp": 1,
35+
"q_lora_rank": 1536,
36+
"qk_nope_head_dim": 128,
37+
"qk_rope_head_dim": 64,
38+
"quantization_config": {
39+
"activation_scheme": "dynamic",
40+
"fmt": "e4m3",
41+
"quant_method": "fp8",
42+
"weight_block_size": [
43+
128,
44+
128
45+
]
46+
},
47+
"rms_norm_eps": 1e-06,
48+
"rope_theta": 50000.0,
49+
"routed_scaling_factor": 2.827,
50+
"rope_scaling": {
51+
"beta_fast": 1.0,
52+
"beta_slow": 1.0,
53+
"factor": 32.0,
54+
"mscale": 1.0,
55+
"mscale_all_dim": 1.0,
56+
"original_max_position_embeddings": 4096,
57+
"type": "yarn"
58+
},
59+
"scoring_func": "sigmoid",
60+
"seq_aux": true,
61+
"tie_word_embeddings": false,
62+
"topk_group": 1,
63+
"topk_method": "noaux_tc",
64+
"torch_dtype": "bfloat16",
65+
"transformers_version": "4.48.3",
66+
"use_cache": true,
67+
"v_head_dim": 128,
68+
"vocab_size": 163840
69+
}

0 commit comments

Comments
 (0)