From 14e9863e3f278850c12419573377fc2dc015dfe1 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Tue, 16 Sep 2025 21:34:39 +0530 Subject: [PATCH 01/20] new Signed-off-by: Tarun Kumar --- openai/gpt-oss-120b/performance/server.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 openai/gpt-oss-120b/performance/server.yml diff --git a/openai/gpt-oss-120b/performance/server.yml b/openai/gpt-oss-120b/performance/server.yml new file mode 100644 index 0000000..806e169 --- /dev/null +++ b/openai/gpt-oss-120b/performance/server.yml @@ -0,0 +1,5 @@ +enable-chunked-prefill: true +max-model-len: 10000 +tensor-parallel-size: 2 +trust-remote-code: true +uvicorn-log-level: debug From 8a9486c76eefc8872bc0fc263e417f5c1538b7b6 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Wed, 17 Sep 2025 13:58:46 +0530 Subject: [PATCH 02/20] Add new Signed-off-by: Tarun Kumar --- openai/gpt-oss-120b/performance/server.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openai/gpt-oss-120b/performance/server.yml b/openai/gpt-oss-120b/performance/server.yml index 806e169..45a0b3f 100644 --- a/openai/gpt-oss-120b/performance/server.yml +++ b/openai/gpt-oss-120b/performance/server.yml @@ -3,3 +3,5 @@ max-model-len: 10000 tensor-parallel-size: 2 trust-remote-code: true uvicorn-log-level: debug +tool-call-parser: openai +enable-auto-tool-choice: true From b1deeb71484be01c0095c0b3291c2ec42043a3bf Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Wed, 17 Sep 2025 19:48:20 +0530 Subject: [PATCH 03/20] ad Signed-off-by: Tarun Kumar --- mistralai/Voxtral-Small-24B-2507/performance/server.yml | 8 ++++++++ mistralai/Voxtral-Small-24B-2507/storage.yml | 3 +++ 2 files changed, 11 insertions(+) create mode 100644 mistralai/Voxtral-Small-24B-2507/performance/server.yml create mode 100644 mistralai/Voxtral-Small-24B-2507/storage.yml diff --git a/mistralai/Voxtral-Small-24B-2507/performance/server.yml b/mistralai/Voxtral-Small-24B-2507/performance/server.yml new file mode 100644 index 0000000..452fa19 --- /dev/null +++ b/mistralai/Voxtral-Small-24B-2507/performance/server.yml @@ -0,0 +1,8 @@ +enable-chunked-prefill: true +max-model-len: 9000 +tensor-parallel-size: 1 +trust-remote-code: true +uvicorn-log-level: debug +tokenizer_mode: mistral +config_format: mistral +load_format: mistral diff --git a/mistralai/Voxtral-Small-24B-2507/storage.yml b/mistralai/Voxtral-Small-24B-2507/storage.yml new file mode 100644 index 0000000..fc064a6 --- /dev/null +++ b/mistralai/Voxtral-Small-24B-2507/storage.yml @@ -0,0 +1,3 @@ +# https://huggingface.co/mistralai/Voxtral-Small-24B-2507 +model: hf +data: hf From 1c7fbefddb422b7b765b38fab9ac98d123930173 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 18 Sep 2025 21:57:54 +0530 Subject: [PATCH 04/20] new Signed-off-by: Tarun Kumar --- openai/gpt-oss-20b/performance/server.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 openai/gpt-oss-20b/performance/server.yml diff --git a/openai/gpt-oss-20b/performance/server.yml b/openai/gpt-oss-20b/performance/server.yml new file mode 100644 index 0000000..1ac2b20 --- /dev/null +++ b/openai/gpt-oss-20b/performance/server.yml @@ -0,0 +1,7 @@ +enable-chunked-prefill: true +max-model-len: 10000 +tensor-parallel-size: 1 +trust-remote-code: true +uvicorn-log-level: debug +tool-call-parser: openai +enable-auto-tool-choice: true From 6939151b1fda677ebc52c5f75ede15a6f5f38f39 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 18 Sep 2025 23:36:02 +0530 Subject: [PATCH 05/20] ds Signed-off-by: Tarun Kumar --- openai/gpt-oss-20b/performance/server.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/openai/gpt-oss-20b/performance/server.yml b/openai/gpt-oss-20b/performance/server.yml index 1ac2b20..90ff212 100644 --- a/openai/gpt-oss-20b/performance/server.yml +++ b/openai/gpt-oss-20b/performance/server.yml @@ -3,5 +3,3 @@ max-model-len: 10000 tensor-parallel-size: 1 trust-remote-code: true uvicorn-log-level: debug -tool-call-parser: openai -enable-auto-tool-choice: true From 9c5ef0e8150cf248c7bb75b48ea51cefe90e1907 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 18 Sep 2025 23:40:25 +0530 Subject: [PATCH 06/20] ds Signed-off-by: Tarun Kumar --- openai/gpt-oss-20b/performance/server.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openai/gpt-oss-20b/performance/server.yml b/openai/gpt-oss-20b/performance/server.yml index 90ff212..cf738a4 100644 --- a/openai/gpt-oss-20b/performance/server.yml +++ b/openai/gpt-oss-20b/performance/server.yml @@ -3,3 +3,6 @@ max-model-len: 10000 tensor-parallel-size: 1 trust-remote-code: true uvicorn-log-level: debug +tool-call-parser: openai +enable-auto-tool-choice: true + From 6e35a8672c3ac1a3852770ad90beaadb24488210 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Mon, 6 Oct 2025 12:05:53 +0530 Subject: [PATCH 07/20] INFERENG-2232: add new models for model enablement (#53) (#54) * New models for validation * update qwen3 metrics. add server for distil-whisper * update value * update whisper-large-v3 and Voxtral model acccuracy server settings * more Voxtral server settings * accuracy servers need to be in RedHatAI too * update Qwen2.5-VL-7B-Instruct-FP8-Dynamic values * try Kimi-K2 with quad * metric value for gpt-oss-20b from a run on k8s-a100-duo * remove empty file Co-authored-by: Derek Kozikowski <106621615+derekk-nm@users.noreply.github.com> --- .../Qwen2.5-VL-7B-Instruct/accuracy/tasks.yml | 28 +------------------ Qwen/Qwen3-8B-FP8/accuracy/tasks.yml | 5 ++++ Qwen/Qwen3-8B-FP8/storage.yml | 3 ++ .../accuracy/server.yml | 2 +- .../accuracy/tasks.yml | 5 ++++ .../storage.yml | 3 ++ .../accuracy/server.yml | 7 +++++ .../accuracy/server.yml | 2 ++ .../accuracy/tasks.yml | 5 ++++ .../DeepSeek-R1-Distill-Qwen-32B/storage.yml | 3 ++ .../distil-large-v3/accuracy/server.yml | 4 +++ .../Voxtral-Mini-3B-2507/accuracy/server.yml | 7 +++++ openai/gpt-oss-20b/accuracy/tasks.yml | 2 +- openai/whisper-large-v3/accuracy/server.yml | 3 ++ openai/whisper-large-v3/accuracy/tasks.yml | 5 ++++ 15 files changed, 55 insertions(+), 29 deletions(-) create mode 100644 Qwen/Qwen3-8B-FP8/accuracy/tasks.yml create mode 100644 Qwen/Qwen3-8B-FP8/storage.yml create mode 100644 RedHatAI/Qwen2.5-VL-7B-Instruct-FP8-Dynamic/accuracy/tasks.yml create mode 100644 RedHatAI/Qwen2.5-VL-7B-Instruct-FP8-Dynamic/storage.yml create mode 100644 RedHatAI/Voxtral-Mini-3B-2507-FP8-dynamic/accuracy/server.yml create mode 100644 RedHatAI/whisper-large-v3-turbo-FP8-dynamic/accuracy/server.yml create mode 100644 deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/accuracy/tasks.yml create mode 100644 deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/storage.yml create mode 100644 distil-whisper/distil-large-v3/accuracy/server.yml create mode 100644 mistralai/Voxtral-Mini-3B-2507/accuracy/server.yml create mode 100644 openai/whisper-large-v3/accuracy/server.yml create mode 100644 openai/whisper-large-v3/accuracy/tasks.yml diff --git a/Qwen/Qwen2.5-VL-7B-Instruct/accuracy/tasks.yml b/Qwen/Qwen2.5-VL-7B-Instruct/accuracy/tasks.yml index 231aeaf..50fc1bc 100644 --- a/Qwen/Qwen2.5-VL-7B-Instruct/accuracy/tasks.yml +++ b/Qwen/Qwen2.5-VL-7B-Instruct/accuracy/tasks.yml @@ -1,31 +1,5 @@ -# from gs://nm-vllm-certs/model-validation/lmeval/Qwen/Qwen2.5-7B/cuda/0.9.1.dev238+g922878ced/k8s-a100-duo/llm_eval_16218386266.json tasks: - - name: arc_challenge - metrics: - - name: acc_norm,none - value: 0.6373 - - name: gsm8k metrics: - name: exact_match,strict-match - value: 0.8074 - - - name: hellaswag - metrics: - - name: acc_norm,none - value: 0.8024 - - - name: mmlu - metrics: - - name: acc,none - value: 0.7424 - - - name: truthfulqa_mc2 - metrics: - - name: acc,none - value: 0.5633 - - - name: winogrande - metrics: - - name: acc,none - value: 0.7505 + value: 0.6944 diff --git a/Qwen/Qwen3-8B-FP8/accuracy/tasks.yml b/Qwen/Qwen3-8B-FP8/accuracy/tasks.yml new file mode 100644 index 0000000..c4a91b6 --- /dev/null +++ b/Qwen/Qwen3-8B-FP8/accuracy/tasks.yml @@ -0,0 +1,5 @@ +tasks: + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.8756 diff --git a/Qwen/Qwen3-8B-FP8/storage.yml b/Qwen/Qwen3-8B-FP8/storage.yml new file mode 100644 index 0000000..e31645c --- /dev/null +++ b/Qwen/Qwen3-8B-FP8/storage.yml @@ -0,0 +1,3 @@ +# storage configs for https://huggingface.co/Qwen/Qwen3-8B +model: hf +data: hf diff --git a/RedHatAI/Kimi-K2-Instruct-quantized.w4a16/accuracy/server.yml b/RedHatAI/Kimi-K2-Instruct-quantized.w4a16/accuracy/server.yml index 51d25d0..22eafe9 100644 --- a/RedHatAI/Kimi-K2-Instruct-quantized.w4a16/accuracy/server.yml +++ b/RedHatAI/Kimi-K2-Instruct-quantized.w4a16/accuracy/server.yml @@ -1,3 +1,3 @@ trust-remote-code: true -tensor-parallel-size: 2 +tensor-parallel-size: 4 max-model-len: 16384 diff --git a/RedHatAI/Qwen2.5-VL-7B-Instruct-FP8-Dynamic/accuracy/tasks.yml b/RedHatAI/Qwen2.5-VL-7B-Instruct-FP8-Dynamic/accuracy/tasks.yml new file mode 100644 index 0000000..50fc1bc --- /dev/null +++ b/RedHatAI/Qwen2.5-VL-7B-Instruct-FP8-Dynamic/accuracy/tasks.yml @@ -0,0 +1,5 @@ +tasks: + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.6944 diff --git a/RedHatAI/Qwen2.5-VL-7B-Instruct-FP8-Dynamic/storage.yml b/RedHatAI/Qwen2.5-VL-7B-Instruct-FP8-Dynamic/storage.yml new file mode 100644 index 0000000..51a9ad6 --- /dev/null +++ b/RedHatAI/Qwen2.5-VL-7B-Instruct-FP8-Dynamic/storage.yml @@ -0,0 +1,3 @@ +# storage configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic +model: hf +data: hf diff --git a/RedHatAI/Voxtral-Mini-3B-2507-FP8-dynamic/accuracy/server.yml b/RedHatAI/Voxtral-Mini-3B-2507-FP8-dynamic/accuracy/server.yml new file mode 100644 index 0000000..de66392 --- /dev/null +++ b/RedHatAI/Voxtral-Mini-3B-2507-FP8-dynamic/accuracy/server.yml @@ -0,0 +1,7 @@ +enable-chunked-prefill: true +max-model-len: 9000 +tensor-parallel-size: 1 +trust-remote-code: true +tokenizer_mode: mistral +config_format: mistral +load_format: mistral diff --git a/RedHatAI/whisper-large-v3-turbo-FP8-dynamic/accuracy/server.yml b/RedHatAI/whisper-large-v3-turbo-FP8-dynamic/accuracy/server.yml new file mode 100644 index 0000000..860ae1f --- /dev/null +++ b/RedHatAI/whisper-large-v3-turbo-FP8-dynamic/accuracy/server.yml @@ -0,0 +1,2 @@ +max-model-len: 448 +trust-remote-code: true diff --git a/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/accuracy/tasks.yml b/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/accuracy/tasks.yml new file mode 100644 index 0000000..14e1988 --- /dev/null +++ b/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/accuracy/tasks.yml @@ -0,0 +1,5 @@ +tasks: + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.8271 diff --git a/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/storage.yml b/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/storage.yml new file mode 100644 index 0000000..fb93360 --- /dev/null +++ b/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/storage.yml @@ -0,0 +1,3 @@ +# storage configs for https://huggingface.co/deepseek-ai/DeepSeek-R1-0528 +model: hf +data: hf diff --git a/distil-whisper/distil-large-v3/accuracy/server.yml b/distil-whisper/distil-large-v3/accuracy/server.yml new file mode 100644 index 0000000..4a4677e --- /dev/null +++ b/distil-whisper/distil-large-v3/accuracy/server.yml @@ -0,0 +1,4 @@ +max-model-len: 448 +tensor-parallel-size: 1 +trust-remote-code: true +uvicorn-log-level: debug diff --git a/mistralai/Voxtral-Mini-3B-2507/accuracy/server.yml b/mistralai/Voxtral-Mini-3B-2507/accuracy/server.yml new file mode 100644 index 0000000..1c30da7 --- /dev/null +++ b/mistralai/Voxtral-Mini-3B-2507/accuracy/server.yml @@ -0,0 +1,7 @@ +enable-chunked-prefill: true +max-model-len: 9000 +tensor-parallel-size: 1 +trust-remote-code: true +tokenizer_mode: mistral +config_format: mistral +load_format: mistral diff --git a/openai/gpt-oss-20b/accuracy/tasks.yml b/openai/gpt-oss-20b/accuracy/tasks.yml index 0b4633c..cf07e78 100644 --- a/openai/gpt-oss-20b/accuracy/tasks.yml +++ b/openai/gpt-oss-20b/accuracy/tasks.yml @@ -2,4 +2,4 @@ tasks: - name: gsm8k metrics: - name: exact_match,strict-match - value: 0 + value: 0.2494 diff --git a/openai/whisper-large-v3/accuracy/server.yml b/openai/whisper-large-v3/accuracy/server.yml new file mode 100644 index 0000000..c00b7fb --- /dev/null +++ b/openai/whisper-large-v3/accuracy/server.yml @@ -0,0 +1,3 @@ +max-model-len: 448 +tensor-parallel-size: 1 +trust-remote-code: true diff --git a/openai/whisper-large-v3/accuracy/tasks.yml b/openai/whisper-large-v3/accuracy/tasks.yml new file mode 100644 index 0000000..0b4633c --- /dev/null +++ b/openai/whisper-large-v3/accuracy/tasks.yml @@ -0,0 +1,5 @@ +tasks: + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0 From ecae33190fb8a3a9099120b2854a456010e6f7e7 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Mon, 6 Oct 2025 13:07:35 +0530 Subject: [PATCH 08/20] add Signed-off-by: Tarun Kumar --- .../accuracy/model_card_tasks.yml | 31 ++++++++++ .../accuracy/tasks.yml | 56 +++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml create mode 100644 microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml diff --git a/microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml b/microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml new file mode 100644 index 0000000..4e3bda9 --- /dev/null +++ b/microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml @@ -0,0 +1,31 @@ +# from RedHatAI/phi-4-quantized.w8a8 +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.6442 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.85 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8437 + + - name: mmlu + metrics: + - name: acc,none + value: 0.803 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.5937 + + - name: winogrande + metrics: + - name: acc,none + value: 0.8058 diff --git a/microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml b/microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml new file mode 100644 index 0000000..78ebfd1 --- /dev/null +++ b/microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml @@ -0,0 +1,56 @@ +# from gs://nm-vllm-certs/model-validation/lmeval/microsoft/phi-4/cuda/0.8.4.post1/k8s-a100-duo/llm_eval_14786302755.json +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.6825 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.82 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8435 + + - name: mmlu + metrics: + - name: acc,none + value: 0.803 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.5934 + + - name: winogrande + metrics: + - name: acc,none + value: 0.8011 + + - name: leaderboard_ifeval + metrics: + - name: inst_level_strict_acc,none + value: 0.0587 + + - name: leaderboard_gpqa_diamond + metrics: + - name: acc-norm,none + value: 0.3939 + + - name: leaderboard_gpqa_extended + metrics: + - name: acc-norm,none + value: 0.3882 + + - name: leaderboard_gpqa_main + metrics: + - name: acc-norm,none + value: 0.4129 + + - name: leaderboard_mmlu_pro + metrics: + - name: acc,none + value: 0.53 From 3e129df17d521a9b392667a7cef61d94248c0f3c Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Mon, 6 Oct 2025 13:28:45 +0530 Subject: [PATCH 09/20] add Signed-off-by: Tarun Kumar --- microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml | 1 - microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml b/microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml index 4e3bda9..50fc02e 100644 --- a/microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml +++ b/microsoft/Phi-3-medium-4k-instruct/accuracy/model_card_tasks.yml @@ -1,4 +1,3 @@ -# from RedHatAI/phi-4-quantized.w8a8 tasks: - name: arc_challenge metrics: diff --git a/microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml b/microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml index 78ebfd1..36dff96 100644 --- a/microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml +++ b/microsoft/Phi-3-medium-4k-instruct/accuracy/tasks.yml @@ -1,4 +1,3 @@ -# from gs://nm-vllm-certs/model-validation/lmeval/microsoft/phi-4/cuda/0.8.4.post1/k8s-a100-duo/llm_eval_14786302755.json tasks: - name: arc_challenge metrics: From f2ac79484fd781325774c1f44f8afd4b85cc3d69 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Wed, 8 Oct 2025 20:44:34 +0530 Subject: [PATCH 10/20] Add Signed-off-by: Tarun Kumar --- .../accuracy/model_card_tasks.yml | 31 ++++++++++ .../granite-4.0-h-small/accuracy/tasks.yml | 60 ++++++++++++++++++ ibm-granite/granite-4.0-h-small/storage.yml | 3 + .../accuracy/model_card_tasks.yml | 30 +++++++++ .../granite-4.0-micro/accuracy/tasks.yml | 61 +++++++++++++++++++ ibm-granite/granite-4.0-micro/storage.yml | 3 + 6 files changed, 188 insertions(+) create mode 100644 ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml create mode 100644 ibm-granite/granite-4.0-h-small/accuracy/tasks.yml create mode 100644 ibm-granite/granite-4.0-h-small/storage.yml create mode 100644 ibm-granite/granite-4.0-micro/accuracy/model_card_tasks.yml create mode 100644 ibm-granite/granite-4.0-micro/accuracy/tasks.yml create mode 100644 ibm-granite/granite-4.0-micro/storage.yml diff --git a/ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml b/ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml new file mode 100644 index 0000000..0e01059 --- /dev/null +++ b/ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml @@ -0,0 +1,31 @@ +# from RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16 +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.7278 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.5668 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8370 + + - name: mmlu + metrics: + - name: acc,none + value: 0.8067 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.7062 + + - name: winogrande + metrics: + - name: acc,none + value: 0.8374 diff --git a/ibm-granite/granite-4.0-h-small/accuracy/tasks.yml b/ibm-granite/granite-4.0-h-small/accuracy/tasks.yml new file mode 100644 index 0000000..293aa1f --- /dev/null +++ b/ibm-granite/granite-4.0-h-small/accuracy/tasks.yml @@ -0,0 +1,60 @@ +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.715 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.85 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8573 + + - name: mmlu + metrics: + - name: acc,none + value: 0.8109 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.6409 + + - name: winogrande + metrics: + - name: acc,none + value: 0.8374 + + - name: leaderboard_ifeval + metrics: + - name: inst_level_strict_acc,none + value: 0.729 + + # - name: leaderboard_bbh + # metrics: + # - name: acc-norm,none + # value: 0.5319 + + # - name: leaderboard_math_hard + # metrics: + # - name: exact_match,none + # value: 0.1477 + + # - name: leaderboard_gpqa + # metrics: + # - name: acc-norm,none + # value: 0.3176 + + # - name: leaderboard_musr + # metrics: + # - name: acc-norm,none + # value: 0.4601 + + - name: leaderboard_mmlu_pro + metrics: + - name: acc,none + value: 0.5545 diff --git a/ibm-granite/granite-4.0-h-small/storage.yml b/ibm-granite/granite-4.0-h-small/storage.yml new file mode 100644 index 0000000..1f29c96 --- /dev/null +++ b/ibm-granite/granite-4.0-h-small/storage.yml @@ -0,0 +1,3 @@ +# storage configs for https://huggingface.co/ibm-granite/granite-3.1-8b-instruct +model: hf +data: hf diff --git a/ibm-granite/granite-4.0-micro/accuracy/model_card_tasks.yml b/ibm-granite/granite-4.0-micro/accuracy/model_card_tasks.yml new file mode 100644 index 0000000..76bbb3b --- /dev/null +++ b/ibm-granite/granite-4.0-micro/accuracy/model_card_tasks.yml @@ -0,0 +1,30 @@ +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.7278 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.79 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8370 + + - name: mmlu + metrics: + - name: acc,none + value: 0.8067 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.7062 + + - name: winogrande + metrics: + - name: acc,none + value: 0.8374 diff --git a/ibm-granite/granite-4.0-micro/accuracy/tasks.yml b/ibm-granite/granite-4.0-micro/accuracy/tasks.yml new file mode 100644 index 0000000..7ef8589 --- /dev/null +++ b/ibm-granite/granite-4.0-micro/accuracy/tasks.yml @@ -0,0 +1,61 @@ +# from gs://nm-vllm-certs/model-validation/lmeval/mistralai/Mistral-Small-3.1-24B-Instruct-2503/cuda/0.8.4.post1/ibm02-a100-octo/llm_eval_0.json +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.715 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.89 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8573 + + - name: mmlu + metrics: + - name: acc,none + value: 0.8109 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.6409 + + - name: winogrande + metrics: + - name: acc,none + value: 0.8374 + + - name: leaderboard_ifeval + metrics: + - name: inst_level_strict_acc,none + value: 0.729 + + # - name: leaderboard_bbh + # metrics: + # - name: acc-norm,none + # value: 0.5319 + + # - name: leaderboard_math_hard + # metrics: + # - name: exact_match,none + # value: 0.1477 + + # - name: leaderboard_gpqa + # metrics: + # - name: acc-norm,none + # value: 0.3176 + + # - name: leaderboard_musr + # metrics: + # - name: acc-norm,none + # value: 0.4601 + + - name: leaderboard_mmlu_pro + metrics: + - name: acc,none + value: 0.5545 diff --git a/ibm-granite/granite-4.0-micro/storage.yml b/ibm-granite/granite-4.0-micro/storage.yml new file mode 100644 index 0000000..1f29c96 --- /dev/null +++ b/ibm-granite/granite-4.0-micro/storage.yml @@ -0,0 +1,3 @@ +# storage configs for https://huggingface.co/ibm-granite/granite-3.1-8b-instruct +model: hf +data: hf From d6d58db5371f48a2494a10783215de3e546fbb08 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Wed, 8 Oct 2025 21:08:34 +0530 Subject: [PATCH 11/20] new Signed-off-by: Tarun Kumar --- ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml | 3 +-- ibm-granite/granite-4.0-micro/accuracy/tasks.yml | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml b/ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml index 0e01059..795309c 100644 --- a/ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml +++ b/ibm-granite/granite-4.0-h-small/accuracy/model_card_tasks.yml @@ -1,4 +1,3 @@ -# from RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16 tasks: - name: arc_challenge metrics: @@ -8,7 +7,7 @@ tasks: - name: gsm8k metrics: - name: exact_match,strict-match - value: 0.5668 + value: 0.85 - name: hellaswag metrics: diff --git a/ibm-granite/granite-4.0-micro/accuracy/tasks.yml b/ibm-granite/granite-4.0-micro/accuracy/tasks.yml index 7ef8589..3640c76 100644 --- a/ibm-granite/granite-4.0-micro/accuracy/tasks.yml +++ b/ibm-granite/granite-4.0-micro/accuracy/tasks.yml @@ -1,4 +1,3 @@ -# from gs://nm-vllm-certs/model-validation/lmeval/mistralai/Mistral-Small-3.1-24B-Instruct-2503/cuda/0.8.4.post1/ibm02-a100-octo/llm_eval_0.json tasks: - name: arc_challenge metrics: @@ -8,7 +7,7 @@ tasks: - name: gsm8k metrics: - name: exact_match,strict-match - value: 0.89 + value: 0.79 - name: hellaswag metrics: From 00e8c0c966010ba28595ce7b9f9b9cae3b13a818 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 9 Oct 2025 16:10:27 +0530 Subject: [PATCH 12/20] l40 Signed-off-by: Tarun Kumar --- ibm-granite/granite-4.0-h-small/performance/server.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 ibm-granite/granite-4.0-h-small/performance/server.yml diff --git a/ibm-granite/granite-4.0-h-small/performance/server.yml b/ibm-granite/granite-4.0-h-small/performance/server.yml new file mode 100644 index 0000000..c06be8e --- /dev/null +++ b/ibm-granite/granite-4.0-h-small/performance/server.yml @@ -0,0 +1,4 @@ +max-model-len: 4025 +tensor-parallel-size: 2 +trust-remote-code: true +uvicorn-log-level: debug From 49bef20ce80d77d379c13af58a8b12965756323b Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 16 Oct 2025 10:59:06 +0530 Subject: [PATCH 13/20] temp Signed-off-by: Tarun Kumar --- common/performance/server.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/common/performance/server.yml b/common/performance/server.yml index 122d583..ecc0159 100644 --- a/common/performance/server.yml +++ b/common/performance/server.yml @@ -4,3 +4,4 @@ tensor-parallel-size: 1 trust-remote-code: true uvicorn-log-level: debug no-enable-prefix-caching: true +kv-cache-dtype: fp8 From f8e23bb6e31fe1aceb0c8731717615026997a557 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 16 Oct 2025 11:04:42 +0530 Subject: [PATCH 14/20] temp Signed-off-by: Tarun Kumar --- common/performance/server.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/common/performance/server.yml b/common/performance/server.yml index ecc0159..122d583 100644 --- a/common/performance/server.yml +++ b/common/performance/server.yml @@ -4,4 +4,3 @@ tensor-parallel-size: 1 trust-remote-code: true uvicorn-log-level: debug no-enable-prefix-caching: true -kv-cache-dtype: fp8 From 9e44e7f1725fbbc56f0be683e50c2414db47ce9c Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Mon, 3 Nov 2025 19:12:11 +0530 Subject: [PATCH 15/20] add Signed-off-by: Tarun Kumar --- google/gemma-3-12b-it/accuracy/tasks.yml | 30 ++++++++++++++++++++ google/gemma-3-12b-it/storage.yml | 3 ++ microsoft/Phi-4-reasoning/accuracy/tasks.yml | 30 ++++++++++++++++++++ microsoft/Phi-4-reasoning/storage.yml | 3 ++ 4 files changed, 66 insertions(+) create mode 100644 google/gemma-3-12b-it/accuracy/tasks.yml create mode 100644 google/gemma-3-12b-it/storage.yml create mode 100644 microsoft/Phi-4-reasoning/accuracy/tasks.yml create mode 100644 microsoft/Phi-4-reasoning/storage.yml diff --git a/google/gemma-3-12b-it/accuracy/tasks.yml b/google/gemma-3-12b-it/accuracy/tasks.yml new file mode 100644 index 0000000..b284156 --- /dev/null +++ b/google/gemma-3-12b-it/accuracy/tasks.yml @@ -0,0 +1,30 @@ +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.6024 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.7665 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.7494 + + - name: mmlu + metrics: + - name: acc,none + value: 0.6414 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.5487 + + - name: winogrande + metrics: + - name: acc,none + value: 0.6835 diff --git a/google/gemma-3-12b-it/storage.yml b/google/gemma-3-12b-it/storage.yml new file mode 100644 index 0000000..5c72893 --- /dev/null +++ b/google/gemma-3-12b-it/storage.yml @@ -0,0 +1,3 @@ +# https://huggingface.co/google/gemma-3-12b-it +model: hf +data: hf diff --git a/microsoft/Phi-4-reasoning/accuracy/tasks.yml b/microsoft/Phi-4-reasoning/accuracy/tasks.yml new file mode 100644 index 0000000..46de8cb --- /dev/null +++ b/microsoft/Phi-4-reasoning/accuracy/tasks.yml @@ -0,0 +1,30 @@ +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.6024 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.9257 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.7494 + + - name: mmlu + metrics: + - name: acc,none + value: 0.6414 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.5487 + + - name: winogrande + metrics: + - name: acc,none + value: 0.6835 diff --git a/microsoft/Phi-4-reasoning/storage.yml b/microsoft/Phi-4-reasoning/storage.yml new file mode 100644 index 0000000..7dca6be --- /dev/null +++ b/microsoft/Phi-4-reasoning/storage.yml @@ -0,0 +1,3 @@ +# https://huggingface.co/microsoft/Phi-4-reasoning +model: hf +data: hf From db466e249e621ca4139431cd8ab4fb33a6670b6e Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 6 Nov 2025 12:20:59 +0530 Subject: [PATCH 16/20] tets Signed-off-by: Tarun Kumar --- common/performance/server.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/common/performance/server.yml b/common/performance/server.yml index 122d583..176522a 100644 --- a/common/performance/server.yml +++ b/common/performance/server.yml @@ -1,6 +1,3 @@ -enable-chunked-prefill: true -max-model-len: 8192 tensor-parallel-size: 1 trust-remote-code: true -uvicorn-log-level: debug -no-enable-prefix-caching: true +uvicorn-log-level: debug \ No newline at end of file From 22dde99110f693d74a59a795832694a5adf0fff5 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 27 Nov 2025 16:13:30 +0530 Subject: [PATCH 17/20] update Signed-off-by: Tarun Kumar --- Qwen/Qwen3-Next-80B-A3B-Instruct/storage.yml | 2 +- openai/gpt-oss-120b/performance/server.yml | 3 --- openai/gpt-oss-20b/performance/server.yml | 4 ---- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/Qwen/Qwen3-Next-80B-A3B-Instruct/storage.yml b/Qwen/Qwen3-Next-80B-A3B-Instruct/storage.yml index de0b347..5b30f65 100644 --- a/Qwen/Qwen3-Next-80B-A3B-Instruct/storage.yml +++ b/Qwen/Qwen3-Next-80B-A3B-Instruct/storage.yml @@ -1,3 +1,3 @@ -# storage configs for https://huggingface.co/Qwen/Qwen3-Next-80B-A3B-Instruct +# storage configs for https://huggingface.co/Qwen/Qwen3-Embedding-0.6B-GGUF model: hf data: hf diff --git a/openai/gpt-oss-120b/performance/server.yml b/openai/gpt-oss-120b/performance/server.yml index 45a0b3f..6652e2b 100644 --- a/openai/gpt-oss-120b/performance/server.yml +++ b/openai/gpt-oss-120b/performance/server.yml @@ -2,6 +2,3 @@ enable-chunked-prefill: true max-model-len: 10000 tensor-parallel-size: 2 trust-remote-code: true -uvicorn-log-level: debug -tool-call-parser: openai -enable-auto-tool-choice: true diff --git a/openai/gpt-oss-20b/performance/server.yml b/openai/gpt-oss-20b/performance/server.yml index cf738a4..7bbd353 100644 --- a/openai/gpt-oss-20b/performance/server.yml +++ b/openai/gpt-oss-20b/performance/server.yml @@ -2,7 +2,3 @@ enable-chunked-prefill: true max-model-len: 10000 tensor-parallel-size: 1 trust-remote-code: true -uvicorn-log-level: debug -tool-call-parser: openai -enable-auto-tool-choice: true - From 0f4f6a979d618d74ae75ec2aa4248636067e8722 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 27 Nov 2025 17:14:08 +0530 Subject: [PATCH 18/20] add Signed-off-by: Tarun Kumar --- .../DeepSeek-R1-Distill-Qwen-32B/performance/server.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/performance/server.yml diff --git a/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/performance/server.yml b/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/performance/server.yml new file mode 100644 index 0000000..7bbd353 --- /dev/null +++ b/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/performance/server.yml @@ -0,0 +1,4 @@ +enable-chunked-prefill: true +max-model-len: 10000 +tensor-parallel-size: 1 +trust-remote-code: true From 4c6d3e216785f40771559aaf2ea6ad71f3494909 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Thu, 27 Nov 2025 17:18:11 +0530 Subject: [PATCH 19/20] add Signed-off-by: Tarun Kumar --- .../performance/server.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 mistralai/Mistral-Small-3.1-24B-Instruct-2503/performance/server.yml diff --git a/mistralai/Mistral-Small-3.1-24B-Instruct-2503/performance/server.yml b/mistralai/Mistral-Small-3.1-24B-Instruct-2503/performance/server.yml new file mode 100644 index 0000000..452fa19 --- /dev/null +++ b/mistralai/Mistral-Small-3.1-24B-Instruct-2503/performance/server.yml @@ -0,0 +1,8 @@ +enable-chunked-prefill: true +max-model-len: 9000 +tensor-parallel-size: 1 +trust-remote-code: true +uvicorn-log-level: debug +tokenizer_mode: mistral +config_format: mistral +load_format: mistral From 626bc0f0c6114a911bf1fbf21ad9c8a5e14e2b50 Mon Sep 17 00:00:00 2001 From: Tarun Kumar Date: Wed, 3 Dec 2025 15:44:59 +0530 Subject: [PATCH 20/20] add Signed-off-by: Tarun Kumar --- .../accuracy/tasks.yml | 60 +++++++++++++++++++ .../granite-3.3-8b-instruct/storage.yml | 3 + 2 files changed, 63 insertions(+) create mode 100644 ibm-granite/granite-3.3-8b-instruct/accuracy/tasks.yml create mode 100644 ibm-granite/granite-3.3-8b-instruct/storage.yml diff --git a/ibm-granite/granite-3.3-8b-instruct/accuracy/tasks.yml b/ibm-granite/granite-3.3-8b-instruct/accuracy/tasks.yml new file mode 100644 index 0000000..e3b35aa --- /dev/null +++ b/ibm-granite/granite-3.3-8b-instruct/accuracy/tasks.yml @@ -0,0 +1,60 @@ +tasks: + - name: arc_challenge + metrics: + - name: acc_norm,none + value: 0.6663 + + - name: gsm8k + metrics: + - name: exact_match,strict-match + value: 0.6543 + + - name: hellaswag + metrics: + - name: acc_norm,none + value: 0.8412 + + - name: mmlu + metrics: + - name: acc,none + value: 0.656 + + - name: truthfulqa_mc2 + metrics: + - name: acc,none + value: 0.6048 + + - name: winogrande + metrics: + - name: acc,none + value: 0.7987 + + - name: leaderboard_ifeval + metrics: + - name: inst_level_strict_acc,none + value: 0.6666 + + # - name: leaderboard_bbh + # metrics: + # - name: acc-norm,none + # value: 0.5319 + + # - name: leaderboard_math_hard + # metrics: + # - name: exact_match,none + # value: 0.1477 + + # - name: leaderboard_gpqa + # metrics: + # - name: acc-norm,none + # value: 0.3176 + + # - name: leaderboard_musr + # metrics: + # - name: acc-norm,none + # value: 0.4601 + + - name: leaderboard_mmlu_pro + metrics: + - name: acc,none + value: 0.3517 diff --git a/ibm-granite/granite-3.3-8b-instruct/storage.yml b/ibm-granite/granite-3.3-8b-instruct/storage.yml new file mode 100644 index 0000000..1f29c96 --- /dev/null +++ b/ibm-granite/granite-3.3-8b-instruct/storage.yml @@ -0,0 +1,3 @@ +# storage configs for https://huggingface.co/ibm-granite/granite-3.1-8b-instruct +model: hf +data: hf