From b85ecaa6854c39c61f43defd1765a3d3b0c7a535 Mon Sep 17 00:00:00 2001 From: Joshua Rosenkranz Date: Fri, 25 Apr 2025 17:42:59 +0000 Subject: [PATCH] updated with 20b and 70b expectation tests Signed-off-by: Joshua Rosenkranz --- tests/models/test_model_expectations.py | 6 ++++-- ...IUDecoderModels.Llama-3.1-70B-Instruct.test_model_output | 1 + ...oderModels.Llama-3.1-70B-Instruct.test_model_weight_keys | 1 + ...derModels.granite-20b-code-instruct-8k.test_model_output | 1 + ...dels.granite-20b-code-instruct-8k.test_model_weight_keys | 1 + 5 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.Llama-3.1-70B-Instruct.test_model_output create mode 100644 tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.Llama-3.1-70B-Instruct.test_model_weight_keys create mode 100644 tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.granite-20b-code-instruct-8k.test_model_output create mode 100644 tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.granite-20b-code-instruct-8k.test_model_weight_keys diff --git a/tests/models/test_model_expectations.py b/tests/models/test_model_expectations.py index 3aeee0d7..2d1295bc 100644 --- a/tests/models/test_model_expectations.py +++ b/tests/models/test_model_expectations.py @@ -22,10 +22,12 @@ model_dir = os.environ.get("FMS_TESTING_MODEL_DIR", "/tmp/models") LLAMA_3p1_8B_INSTRUCT = "meta-llama/Llama-3.1-8B-Instruct" GRANITE_3p2_8B_INSTRUCT = "ibm-granite/granite-3.2-8b-instruct" +GRANITE_20B_CODE_INSTRUCT_8K = "ibm-granite/granite-20b-code-instruct-8k" +LLAMA_3p1_70B_INSTRUCT = "meta-llama/Llama-3.1-70B-Instruct" ROBERTA_SQUAD_v2 = "deepset/roberta-base-squad2" torch.manual_seed(42) -micro_models = {LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT} +micro_models = {LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT, GRANITE_20B_CODE_INSTRUCT_8K, LLAMA_3p1_70B_INSTRUCT} class AIUModelFixtureMixin(ModelFixtureMixin): @@ -55,7 +57,7 @@ def model(self, uninitialized_model): return uninitialized_model -decoder_models = [LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT] +decoder_models = [LLAMA_3p1_8B_INSTRUCT, GRANITE_3p2_8B_INSTRUCT, GRANITE_20B_CODE_INSTRUCT_8K, LLAMA_3p1_70B_INSTRUCT] class TestAIUDecoderModels( diff --git a/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.Llama-3.1-70B-Instruct.test_model_output b/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.Llama-3.1-70B-Instruct.test_model_output new file mode 100644 index 00000000..080ab1f7 --- /dev/null +++ b/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.Llama-3.1-70B-Instruct.test_model_output @@ -0,0 +1 @@ +15.625,15.625,15.6875,15.71875,13.8125,4.75,5.59375,3.1875,5.5625,5.28125,2.0625,4.59375,0.0,9.75,2.96875,4.375,10.53125,9.15625,4.6875,5.03125,2.25,7.03125,10.34375,11.21875,9.875,12.5,2.5625,9.75,8.03125,8.375,5.78125,14.25,6.125,9.84375,7.34375,5.15625,3.1875,4.15625,9.875,9.09375,10.28125,10.78125,2.5625,8.46875,9.15625,18.25,5.71875,4.34375,8.25,22.65625,8.6875,5.125,5.15625,1.9375,3.1875,11.15625,9.0,19.53125,4.5625,6.375,20.40625,9.21875,17.0625,7.71875 \ No newline at end of file diff --git a/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.Llama-3.1-70B-Instruct.test_model_weight_keys b/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.Llama-3.1-70B-Instruct.test_model_weight_keys new file mode 100644 index 00000000..6329cb98 --- /dev/null +++ b/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.Llama-3.1-70B-Instruct.test_model_weight_keys @@ -0,0 +1 @@ +dec_norm.weight,layers.0.attn.dense.weight,layers.0.attn.in_proj.key.weight,layers.0.attn.in_proj.query.weight,layers.0.attn.in_proj.value.weight,layers.0.ff_ln.weight,layers.0.ff_sub_layer.w1.weight,layers.0.ff_sub_layer.w2.weight,layers.0.ff_sub_layer.wg.weight,layers.0.ln.weight,layers.1.attn.dense.weight,layers.1.attn.in_proj.key.weight,layers.1.attn.in_proj.query.weight,layers.1.attn.in_proj.value.weight,layers.1.ff_ln.weight,layers.1.ff_sub_layer.w1.weight,layers.1.ff_sub_layer.w2.weight,layers.1.ff_sub_layer.wg.weight,layers.1.ln.weight,layers.2.attn.dense.weight,layers.2.attn.in_proj.key.weight,layers.2.attn.in_proj.query.weight,layers.2.attn.in_proj.value.weight,layers.2.ff_ln.weight,layers.2.ff_sub_layer.w1.weight,layers.2.ff_sub_layer.w2.weight,layers.2.ff_sub_layer.wg.weight,layers.2.ln.weight,shared.emb.weight,shared.head.weight \ No newline at end of file diff --git a/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.granite-20b-code-instruct-8k.test_model_output b/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.granite-20b-code-instruct-8k.test_model_output new file mode 100644 index 00000000..45ff035c --- /dev/null +++ b/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.granite-20b-code-instruct-8k.test_model_output @@ -0,0 +1 @@ +0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 \ No newline at end of file diff --git a/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.granite-20b-code-instruct-8k.test_model_weight_keys b/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.granite-20b-code-instruct-8k.test_model_weight_keys new file mode 100644 index 00000000..040d687e --- /dev/null +++ b/tests/resources/expectations/models.test_model_expectations.TestAIUDecoderModels.granite-20b-code-instruct-8k.test_model_weight_keys @@ -0,0 +1 @@ +base_model.dec_norm.bias,base_model.dec_norm.weight,base_model.embedding.weight,base_model.layers.0.attn.dense.bias,base_model.layers.0.attn.dense.weight,base_model.layers.0.attn.in_proj.key.bias,base_model.layers.0.attn.in_proj.key.weight,base_model.layers.0.attn.in_proj.query.bias,base_model.layers.0.attn.in_proj.query.weight,base_model.layers.0.attn.in_proj.value.bias,base_model.layers.0.attn.in_proj.value.weight,base_model.layers.0.ff_ln.bias,base_model.layers.0.ff_ln.weight,base_model.layers.0.ff_sub_layer.w1.bias,base_model.layers.0.ff_sub_layer.w1.weight,base_model.layers.0.ff_sub_layer.w2.bias,base_model.layers.0.ff_sub_layer.w2.weight,base_model.layers.0.ln.bias,base_model.layers.0.ln.weight,base_model.layers.1.attn.dense.bias,base_model.layers.1.attn.dense.weight,base_model.layers.1.attn.in_proj.key.bias,base_model.layers.1.attn.in_proj.key.weight,base_model.layers.1.attn.in_proj.query.bias,base_model.layers.1.attn.in_proj.query.weight,base_model.layers.1.attn.in_proj.value.bias,base_model.layers.1.attn.in_proj.value.weight,base_model.layers.1.ff_ln.bias,base_model.layers.1.ff_ln.weight,base_model.layers.1.ff_sub_layer.w1.bias,base_model.layers.1.ff_sub_layer.w1.weight,base_model.layers.1.ff_sub_layer.w2.bias,base_model.layers.1.ff_sub_layer.w2.weight,base_model.layers.1.ln.bias,base_model.layers.1.ln.weight,base_model.layers.2.attn.dense.bias,base_model.layers.2.attn.dense.weight,base_model.layers.2.attn.in_proj.key.bias,base_model.layers.2.attn.in_proj.key.weight,base_model.layers.2.attn.in_proj.query.bias,base_model.layers.2.attn.in_proj.query.weight,base_model.layers.2.attn.in_proj.value.bias,base_model.layers.2.attn.in_proj.value.weight,base_model.layers.2.ff_ln.bias,base_model.layers.2.ff_ln.weight,base_model.layers.2.ff_sub_layer.w1.bias,base_model.layers.2.ff_sub_layer.w1.weight,base_model.layers.2.ff_sub_layer.w2.bias,base_model.layers.2.ff_sub_layer.w2.weight,base_model.layers.2.ln.bias,base_model.layers.2.ln.weight,base_model.position_embedding.weight,head.weight \ No newline at end of file