@@ -144,6 +144,7 @@ def split_gen():
144144
145145def read_task_perf (task = "complete" ):
146146 model_results = dict ()
147+ result_files = []
147148 for model , info in model_info .items ():
148149 if task == "instruct" and (not info ["prompted" ] or info ["name" ] in ["Granite-Code-3B-Instruct" , "Granite-Code-8B-Instruct" ]):
149150 continue
@@ -164,13 +165,14 @@ def read_task_perf(task="complete"):
164165 except :
165166 continue
166167
168+ result_files .append (file )
167169 with open (file , "r" ) as f :
168170 data = json .load (f )
169171 for task_id , perfs in data ["eval" ].items ():
170172 status = 1 if perfs [0 ]["status" ] == "pass" else 0
171173 task_perf [task_id ] = status
172174 model_results [info ["name" ]] = task_perf
173- return model_results
175+ return model_results , result_files
174176
175177
176178def get_winner_df (data_dict , task , task_level = True , no_tie = True ):
@@ -313,8 +315,16 @@ def push_ds(ds, path, local=False):
313315
314316 model_info = update_model_info (model_info )
315317 results = get_results ()
316- complete_data = read_task_perf ("complete" )
317- instruct_data = read_task_perf ("instruct" )
318+ files = []
319+ complete_data , complete_files = read_task_perf ("complete" )
320+ instruct_data , instruct_files = read_task_perf ("instruct" )
321+ files .extend (complete_files )
322+ files .extend (instruct_files )
323+ shutil .rmtree ("eval_results" , ignore_errors = True )
324+ os .makedirs ("eval_results" , exist_ok = True )
325+ for file in files :
326+ shutil .copy (file , "eval_results" )
327+
318328 complete_solve_rate = get_solve_rate (complete_data , task = "complete" )
319329 instruct_solve_rate = get_solve_rate (instruct_data , task = "instruct" )
320330 solve_rate_ds = DatasetDict ({"complete" : complete_solve_rate , "instruct" : instruct_solve_rate })
0 commit comments