Move task to top

farrelmahaztra · farrelmahaztra · commit 1c993830a14b · 2025-01-12T12:00:26.000+07:00
diff --git a/src/inspect_evals/musr/musr.py b/src/inspect_evals/musr/musr.py
@@ -30,6 +30,43 @@
 )
 
 
+@task
+def musr(
+    domain: Literal[
+        "murder_mysteries", "object_placements", "team_allocation"
+    ] = "murder_mysteries",
+    prompt_technique: Literal["regular", "cot", "cot+"] = "regular",
+    example_count: int = 0,
+) -> Task:
+    """Inspect task implementing the MuSR benchmark.
+
+    Args:
+        domain (Literal["murder_mysteries", "object_placements", "team_allocation"]): Which domain in the dataset to evaluate.
+            Defaults to "murder_mysteries".
+        prompt_technique (Literal["regular", "cot", "cot+"]): The prompt technique to use. "regular" includes only the narrative
+            and the question. "cot" uses chain-of-thought prompting. "cot+" includes a hint. Defaults to "regular".
+        example_count (int): Number of solved examples to include at the beginning of each prompt. Defaults to 0. Currently only supports 1 example.
+    """
+    prompt = get_domain_prompt(domain, prompt_technique, example_count)
+
+    dataset = hf_dataset(
+        path="TAUR-Lab/MuSR",
+        split=domain,
+        sample_fields=record_to_sample,
+        shuffle=True,
+        auto_id=True,
+    )
+
+    return Task(
+        dataset=dataset,
+        solver=[
+            system_message(SYSTEM_PROMPT),
+            multiple_choice(template=prompt),
+        ],
+        scorer=choice(),
+    )
+
+
 def get_domain_prompt(
     domain: Literal["murder_mysteries", "object_placements", "team_allocation"],
     prompt_technique: Literal["regular", "cot", "cot+"],
@@ -80,40 +117,3 @@ def record_to_sample(record: Dict[str, Any]) -> Sample:
         choices=ast.literal_eval(record["choices"]),
         target=chr(ord("A") + int(record["answer_index"])),
     )
-
-
-@task
-def musr(
-    domain: Literal[
-        "murder_mysteries", "object_placements", "team_allocation"
-    ] = "murder_mysteries",
-    prompt_technique: Literal["regular", "cot", "cot+"] = "regular",
-    example_count: int = 0,
-) -> Task:
-    """Inspect task implementing the MuSR benchmark.
-
-    Args:
-        domain (Literal["murder_mysteries", "object_placements", "team_allocation"]): Which domain in the dataset to evaluate.
-            Defaults to "murder_mysteries".
-        prompt_technique (Literal["regular", "cot", "cot+"]): The prompt technique to use. "regular" includes only the narrative
-            and the question. "cot" uses chain-of-thought prompting. "cot+" includes a hint. Defaults to "regular".
-        example_count (int): Number of solved examples to include at the beginning of each prompt. Defaults to 0. Currently only supports 1 example.
-    """
-    prompt = get_domain_prompt(domain, prompt_technique, example_count)
-
-    dataset = hf_dataset(
-        path="TAUR-Lab/MuSR",
-        split=domain,
-        sample_fields=record_to_sample,
-        shuffle=True,
-        auto_id=True,
-    )
-
-    return Task(
-        dataset=dataset,
-        solver=[
-            system_message(SYSTEM_PROMPT),
-            multiple_choice(template=prompt),
-        ],
-        scorer=choice(),
-    )