Skip to content

Commit 6fda4d2

Browse files
authored
Fix evaluation multithreaded results
1 parent 3132a7f commit 6fda4d2

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

dspy/evaluate/evaluate.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -166,16 +166,17 @@ def wrapped_program(example_idx, example):
166166
num_threads,
167167
display_progress,
168168
)
169-
if return_outputs: # Handle the return_outputs logic
170-
results = [(example, prediction, score)
171-
for _, example, prediction, score in reordered_devset]
172169

173170
if display:
174171
print(
175172
f"Average Metric: {ncorrect} / {ntotal} ({round(100 * ncorrect / ntotal, 1)}%)")
176173

177174
predicted_devset = sorted(reordered_devset)
178175

176+
if return_outputs: # Handle the return_outputs logic
177+
results = [(example, prediction, score)
178+
for _, example, prediction, score in predicted_devset]
179+
179180
# data = [{**example, **prediction, 'correct': score} for example, prediction, score in zip(reordered_devset, preds, scores)]
180181
data = [
181182
merge_dicts(example, prediction) | {"correct": score} for _, example, prediction, score in predicted_devset
@@ -222,9 +223,9 @@ def wrapped_program(example_idx, example):
222223
ipython_display(HTML(message))
223224

224225
if return_all_scores and return_outputs:
225-
return round(100 * ncorrect / ntotal, 2), results, [score for *_, score in reordered_devset]
226+
return round(100 * ncorrect / ntotal, 2), results, [score for *_, score in predicted_devset]
226227
elif return_all_scores:
227-
return round(100 * ncorrect / ntotal, 2), [score for *_, score in reordered_devset]
228+
return round(100 * ncorrect / ntotal, 2), [score for *_, score in predicted_devset]
228229
elif return_outputs:
229230
return round(100 * ncorrect / ntotal, 2), results
230231

0 commit comments

Comments
 (0)