added reasoning effort to template

chibu · chibu · commit 1dbfc7035441 · 2025-11-06T14:27:37.000Z
diff --git a/src/automation/standards/templates/arena-hard-v0.1.yaml.j2 b/src/automation/standards/templates/arena-hard-v0.1.yaml.j2
@@ -1,4 +1,5 @@
 judge_model: {{ judge_model }}
+reasoning_effort: low
 
 bench_name: arena-hard-v0.1
 
diff --git a/src/automation/standards/templates/arena-hard-v2.0.yaml.j2 b/src/automation/standards/templates/arena-hard-v2.0.yaml.j2
@@ -1,4 +1,5 @@
 judge_model: {{ judge_model }}
+reasoning_effort: low
 temperature: 0.0
 max_tokens: {{ max_tokens }}
 
diff --git a/src/automation/standards/templates/generate_api_config.yaml.j2 b/src/automation/standards/templates/generate_api_config.yaml.j2
@@ -0,0 +1,11 @@
+{{ lower_case_model }}:
+    model: {{ model_name }}
+    endpoints:
+      - api_base: {{ api_base }}
+        api_key: {{ api_key }}
+    api_type: {{ api_type }}
+    temperature: 0.6
+    end_think_token: "</think>"
+    max_tokens: {{ max_tokens }}
+    parallel: 1
+
diff --git a/src/automation/tasks/arenahard_judgement.py b/src/automation/tasks/arenahard_judgement.py
@@ -10,8 +10,8 @@
 class ArenaHardJudgeTask(BaseTask):
 
     arenahard_packages = [
-        #"vllm",
-        "git+https://github.com/vllm-project/vllm.git@v0.11.1rc5",
+        "vllm",
+        #"git+https://github.com/vllm-project/vllm.git@v0.11.1rc5",
         ARENAHARD_PACKAGE,
         "hf_xet",
     ]