refactor: allow evaluating mbpp+ tasks on mbpp base tests

ganler · ganler · commit 5d9a4c7edd3b · 2024-01-25T04:21:54.000-06:00
diff --git a/bigcode_eval/tasks/mbppplus.py b/bigcode_eval/tasks/mbppplus.py
@@ -10,6 +10,8 @@
 Homepage: https://github.com/evalplus/evalplus
 """
 
+import os
+
 from bigcode_eval.tasks.mbpp import MBPP
 from bigcode_eval.tasks.custom_metrics.code_eval import compute_code_eval
 
@@ -46,6 +48,9 @@ def get_prompt(self, doc):
     #                is different from HumanEval(+) which further requires a `check` func
     def get_reference(self, doc):
         """Builds the reference solution for the doc (sample from the test dataset)."""
+        use_mbpp_tests = os.getenv("MBBPPLUS_USE_MBPP_TESTS", "0")
+        if use_mbpp_tests == "1":
+            return "\n".join(doc["test_list"])
         return "\n" + doc["test"]
 
     def get_dataset(self):