Skip to content

Commit ac07e57

Browse files
author
chibu
committed
test retry
1 parent bce2e96 commit ac07e57

File tree

1 file changed

+98
-0
lines changed

1 file changed

+98
-0
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from automation.tasks import ArenaHardJudgeTask
2+
3+
model_queue_list_dict = [
4+
{"Qwen/Qwen3-0.6B" :"oneshot-a100x1"},
5+
{"RedHatAI/Qwen3-0.6B-quantized.w4a16":"oneshot-a100x1"},
6+
7+
{"Qwen/Qwen3-1.7B":"oneshot-a100x1"},
8+
{"RedHatAI/Qwen3-1.7B-quantized.w4a16":"oneshot-a100x1"},
9+
10+
{"Qwen/Qwen3-4B":"oneshot-a100x1"},
11+
{"RedHatAI/Qwen3-4B-quantized.w4a16":"oneshot-a100x1"},
12+
13+
{"Qwen/Qwen3-8B":"oneshot-a100x1"},
14+
{"RedHatAI/Qwen3-8B-quantized.w4a16":"oneshot-a100x1"},
15+
16+
{"Qwen/Qwen3-14B":"oneshot-a100x1"},
17+
{"RedHatAI/Qwen3-14B-quantized.w4a16":"oneshot-a100x1"},
18+
19+
{"Qwen/Qwen3-32B":"oneshot-a100x2"},
20+
{"RedHatAI/Qwen3-32B-quantized.w4a16":"oneshot-a100x2"},
21+
22+
{"Qwen/Qwen3-30B-A3B":"oneshot-a100x2"},
23+
{"RedHatAI/Qwen3-30B-A3B-quantized.w4a16":"oneshot-a100x2"},
24+
25+
{"meta-llama/Llama-3.1-8B-Instruct":"oneshot-a100x1"},
26+
{"RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16":"oneshot-a100x1"},
27+
{"RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8":"oneshot-a100x1"},
28+
29+
{"meta-llama/Llama-3.2-1B-Instruct":"oneshot-a100x1"},
30+
{"RedHatAI/Llama-3.2-1B-Instruct-quantized.w8a8":"oneshot-a100x1"},
31+
32+
{"meta-llama/Llama-3.2-3B-Instruct":"oneshot-a100x1"},
33+
{"RedHatAI/Llama-3.2-3B-Instruct-quantized.w8a8":"oneshot-a100x1"},
34+
35+
{"meta-llama/Llama-3.3-70B-Instruct": "oneshot-a100x4"},
36+
{"RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16": "oneshot-a100x4"},
37+
{"RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8": "oneshot-a100x4"},
38+
39+
{"meta-llama/Llama-4-Scout-17B-16E": "oneshot-a100x8"},
40+
{"RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16": "oneshot-a100x8"},
41+
42+
{"microsoft/phi-4" :"oneshot-a100x1"},
43+
{"RedHatAI/phi-4-quantized.w4a16" :"oneshot-a100x1"},
44+
{"RedHatAI/phi-4-quantized.w8a8" :"oneshot-a100x1"},
45+
46+
{"mistralai/Mistral-Small-3.1-24B-Instruct-2503" :"oneshot-a100x2"},
47+
{"RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8" :"oneshot-a100x2"},
48+
{"RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16" :"oneshot-a100x2"},
49+
]
50+
51+
model_queue_list_dict = [
52+
#{"Qwen/Qwen3-0.6B" :"oneshot-a100x1"},
53+
{"RedHatAI/Qwen3-0.6B-quantized.w4a16":"oneshot-a100x1"},
54+
{"meta-llama/Llama-3.2-1B-Instruct":"oneshot-a100x1"},
55+
{"RedHatAI/Llama-3.2-1B-Instruct-quantized.w8a8":"oneshot-a100x1"},
56+
{"microsoft/phi-4" :"oneshot-a100x1"},
57+
{"RedHatAI/phi-4-quantized.w4a16" :"oneshot-a100x1"},
58+
{"RedHatAI/phi-4-quantized.w8a8" :"oneshot-a100x1"},
59+
]
60+
61+
model_queue_list_dict = [
62+
{"Qwen/Qwen3-0.6B" :"oneshot-a100x1"},
63+
]
64+
65+
judgement_model_dict = {"model": "openai/gpt-oss-120b", "queue": "oneshot-a100x8" }
66+
67+
68+
def run_task(model_queue_dict):
69+
answer_model, _ = model_queue_dict.popitem()
70+
judgement_model = judgement_model_dict["model"]
71+
queue = judgement_model_dict["queue"]
72+
73+
task = ArenaHardJudgeTask(
74+
#project_name="jira_arenahard_gpt_judgement_final",
75+
project_name="jira_arenahard_gpt_judgement_v0.1_final",
76+
task_name = f"judge_{answer_model.lower()}_task",
77+
packages = ["huggingface-hub==0.34.3", "triton==3.3.1", "vllm==0.10.1.1"],
78+
#answer_project_name = "jira_arenahard_generation",
79+
answer_project_name = "jira_arenahard_generation_v0.1",
80+
answer_task_name = f"generate_task_{answer_model.lower()}",
81+
answer_model= answer_model.split("/")[1].lower(),
82+
judgement_model = judgement_model,
83+
rate_type="throughput",
84+
backend="aiohttp_server",
85+
target="http://localhost:8000/v1",
86+
api_type = "openai",
87+
bench_name = "arena-hard-v0.1",
88+
#bench_name = "arena-hard-v2.0",
89+
branch = "arena_upgrade",
90+
vllm_kwargs={"enable-chunked-prefill": True},
91+
#max_tokens = 32000,
92+
max_tokens = 64000,
93+
)
94+
95+
task.execute_remotely(queue)
96+
97+
for model_queue_dict in model_queue_list_dict:
98+
run_task(model_queue_dict)

0 commit comments

Comments
 (0)