66
77freqs_cis : -1 # torch.complex64 (2048, 64)
88tok_embeddings.weight : 1 # torch.float32 (32000, 4096)
9+ tok_embeddings.weight_scaler : 0 # torch.bfloat16 (4096,)
910layers.*.attention.wo.weight : 1 # torch.int8 (4096, 4096)
1011layers.*.attention.wo.weight_scaler : 0 # torch.bfloat16 (4096,)
1112layers.*.attention.wq.weight : 0 # torch.int8 (4096, 4096)
@@ -15,9 +16,13 @@ layers.*.attention.wk.weight_scaler : 0 # torch.bfloat16 (4096,)
1516layers.*.attention.wv.weight : 0 # torch.int8 (4096, 4096)
1617layers.*.attention.wv.weight_scaler : 0 # torch.bfloat16 (4096,)
1718layers.*.feed_forward.w1.weight : 0 # torch.float32 (11008, 4096)
19+ layers.*.feed_forward.w1.weight_scaler : 0 # torch.bfloat16 (4096,)
1820layers.*.feed_forward.w2.weight : 1 # torch.float32 (4096, 11008)
21+ layers.*.feed_forward.w2.weight_scaler : 0 # torch.bfloat16 (11008,)
1922layers.*.feed_forward.w3.weight : 0 # torch.float32 (11008, 4096)
23+ layers.*.feed_forward.w3.weight_scaler : 0 # torch.bfloat16 (4096,)
2024layers.*.attention_norm.weight : -1 # torch.float32 (4096,)
2125layers.*.ffn_norm.weight : -1 # torch.float32 (4096,)
2226norm.weight : -1 # torch.float32 (4096,)
2327output.weight : 0 # torch.float32 (32000, 4096)
28+ output.weight_scaler : 0 # torch.float32 (4096,)
0 commit comments