@@ -166,16 +166,17 @@ def wrapped_program(example_idx, example):
166166 num_threads ,
167167 display_progress ,
168168 )
169- if return_outputs : # Handle the return_outputs logic
170- results = [(example , prediction , score )
171- for _ , example , prediction , score in reordered_devset ]
172169
173170 if display :
174171 print (
175172 f"Average Metric: { ncorrect } / { ntotal } ({ round (100 * ncorrect / ntotal , 1 )} %)" )
176173
177174 predicted_devset = sorted (reordered_devset )
178175
176+ if return_outputs : # Handle the return_outputs logic
177+ results = [(example , prediction , score )
178+ for _ , example , prediction , score in predicted_devset ]
179+
179180 # data = [{**example, **prediction, 'correct': score} for example, prediction, score in zip(reordered_devset, preds, scores)]
180181 data = [
181182 merge_dicts (example , prediction ) | {"correct" : score } for _ , example , prediction , score in predicted_devset
@@ -222,9 +223,9 @@ def wrapped_program(example_idx, example):
222223 ipython_display (HTML (message ))
223224
224225 if return_all_scores and return_outputs :
225- return round (100 * ncorrect / ntotal , 2 ), results , [score for * _ , score in reordered_devset ]
226+ return round (100 * ncorrect / ntotal , 2 ), results , [score for * _ , score in predicted_devset ]
226227 elif return_all_scores :
227- return round (100 * ncorrect / ntotal , 2 ), [score for * _ , score in reordered_devset ]
228+ return round (100 * ncorrect / ntotal , 2 ), [score for * _ , score in predicted_devset ]
228229 elif return_outputs :
229230 return round (100 * ncorrect / ntotal , 2 ), results
230231
0 commit comments