@@ -279,11 +279,10 @@ class TensorNameMap:
279279 "transformer.decoder_layer.{bid}.rms_norm_2" , # Grok
280280 "encoder.layers.{bid}.post_attention_layernorm" , # chatglm
281281 "transformer.layers.{bid}.ffn_norm" , # openelm
282- "model.layers.{bid}.pre_ff_layernorm" , # jamba
282+ "model.layers.{bid}.pre_ff_layernorm" , # jamba bamba
283283 "model.layers.{bid}.pre_moe_layernorm" , # mini-jamba
284284 "model.layers.{bid}.post_attention_layernorm" , # llama4
285285 "transformer_encoder.{bid}.ffn_norm" , # neobert
286- "model.layers.{bid}.pre_ff_layernorm" , # bamba
287286 ),
288287
289288 # Post feed-forward norm
@@ -305,9 +304,8 @@ class TensorNameMap:
305304 "model.layers.{bid}.mlp.gate" , # qwen2moe olmoe
306305 "transformer.decoder_layer.{bid}.router" , # Grok
307306 "transformer.blocks.{bid}.ffn.router.layer" , # dbrx
308- "model.layers.{bid}.feed_forward.router" , # jamba
309307 "model.layers.{bid}.block_sparse_moe.router.layer" , # granitemoe
310- "model.layers.{bid}.feed_forward.router" , # llama4
308+ "model.layers.{bid}.feed_forward.router" , # llama4 jamba
311309 "encoder.layers.{bid}.mlp.router.layer" , # nomic-bert-moe
312310 "model.layers.{bid}.mlp.gate.wg" , # hunyuan
313311 ),
@@ -349,12 +347,10 @@ class TensorNameMap:
349347 "encoder.layer.{bid}.mlp.gated_layers" , # jina-bert-v2 (GEGLU)
350348 "encoder.layer.{bid}.mlp.up_gated_layer" , # jina-v2-code (GEGLU)
351349 "model.layers.{bid}.residual_mlp.w3" , # arctic
352- "model.layers.{bid}.feed_forward.up_proj" , # jamba
353350 "encoder.layers.{bid}.mlp.dense_h_to_4h" , # chatglm
354351 "transformer.h.{bid}.mlp.c_fc_1" , # exaone
355- "model.layers.{bid}.feed_forward.up_proj" , # llama4
352+ "model.layers.{bid}.feed_forward.up_proj" , # llama4 jamba bamba
356353 "transformer_encoder.{bid}.ffn.w12" , # neobert
357- "model.layers.{bid}.feed_forward.up_proj" , # bamba
358354 ),
359355
360356 MODEL_TENSOR .FFN_UP_EXP : (
@@ -392,10 +388,8 @@ class TensorNameMap:
392388 "encoder.layer.{bid}.mlp.gated_layers_w" , # jina-bert-v2 (split up/gate, no longer used)
393389 "transformer.h.{bid}.mlp.linear_1" , # refact
394390 "model.layers.{bid}.residual_mlp.w1" , # arctic
395- "model.layers.{bid}.feed_forward.gate_proj" , # jamba
396391 "transformer.h.{bid}.mlp.c_fc_0" , # exaone
397- "language_model.model.layers.{bid}.feed_forward.gate_proj" , # llama4
398- "model.layers.{bid}.feed_forward.gate_proj" , # bamba
392+ "model.layers.{bid}.feed_forward.gate_proj" , # llama4 jamba bamba
399393 ),
400394
401395 MODEL_TENSOR .FFN_GATE_EXP : (
@@ -439,12 +433,10 @@ class TensorNameMap:
439433 "transformer.layers.{bid}.ffn.proj_2" , # openelm
440434 "model.layers.{bid}.residual_mlp.w2" , # arctic
441435 "encoder.layer.{bid}.mlp.down_layer" , # jina-bert-v2
442- "model.layers.{bid}.feed_forward.down_proj" , # jamba
443436 "encoder.layers.{bid}.mlp.dense_4h_to_h" , # chatglm
444437 "model.layers.h.{bid}.mlp.c_proj" , # exaone
445- "model.layers.{bid}.feed_forward.down_proj" , # llama4
438+ "model.layers.{bid}.feed_forward.down_proj" , # llama4 jamba bamba
446439 "transformer_encoder.{bid}.ffn.w3" , # neobert
447- "model.layers.{bid}.feed_forward.down_proj" , # bamba
448440 ),
449441
450442 MODEL_TENSOR .FFN_DOWN_EXP : (
@@ -614,9 +606,8 @@ class TensorNameMap:
614606 ),
615607
616608 MODEL_TENSOR .SSM_NORM : (
617- "model.layers.{bid}.mamba.norm" , # falcon-h1
609+ "model.layers.{bid}.mamba.norm" , # falcon-h1 bamba
618610 "backbone.layers.{bid}.mixer.norm" , # mamba2
619- "model.layers.{bid}.mamba.norm" , # bamba
620611 ),
621612
622613 MODEL_TENSOR .SSM_OUT : (
0 commit comments