Skip to content

Commit e7f163d

Browse files
authored
Merge pull request #187 from stanfordnlp/evaluation_error_handling
added proper error handling within evaluate and bootstrap
2 parents 0d2f37b + 860ad91 commit e7f163d

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

dspy/evaluate/evaluate.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ def __init__(self, *, devset, metric=None, num_threads=1, display_progress=False
2828
self.display_table = display_table
2929
self.display = display
3030
self.max_errors = max_errors
31+
self.error_count = 0
32+
self.error_lock = threading.Lock()
3133

3234
def _execute_single_thread(self, wrapped_program, devset, display_progress):
3335
ncorrect = 0
@@ -95,6 +97,11 @@ def wrapped_program(example_idx, example):
9597
score = metric(example, prediction) # FIXME: TODO: What's the right order? Maybe force name-based kwargs!
9698
return example_idx, example, prediction, score
9799
except Exception as e:
100+
with self.error_lock:
101+
self.error_count += 1
102+
current_error_count = self.error_count
103+
if current_error_count >= self.max_errors:
104+
raise e
98105
print(f"Error for example in dev set: \t\t {e}")
99106
return example_idx, example, dict(), 0.0
100107
finally:

dspy/teleprompt/bootstrap.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import dsp
22
import tqdm
33
import random
4+
import threading
45

56
from dspy.primitives import Example
67

@@ -30,13 +31,16 @@
3031

3132

3233
class BootstrapFewShot(Teleprompter):
33-
def __init__(self, metric=None, teacher_settings={}, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1):
34+
def __init__(self, metric=None, teacher_settings={}, max_bootstrapped_demos=4, max_labeled_demos=16, max_rounds=1, max_errors=5):
3435
self.metric = metric
3536
self.teacher_settings = teacher_settings
3637

3738
self.max_bootstrapped_demos = max_bootstrapped_demos
3839
self.max_labeled_demos = max_labeled_demos
3940
self.max_rounds = max_rounds
41+
self.max_errors= max_errors
42+
self.error_count = 0
43+
self.error_lock = threading.Lock()
4044

4145
def compile(self, student, *, teacher=None, trainset, valset=None):
4246
self.trainset = trainset
@@ -135,7 +139,11 @@ def _bootstrap_one_example(self, example, round_idx=0):
135139
# print(success, example, prediction)
136140
except Exception as e:
137141
success = False
138-
# FIXME: remove the reliance on uuid here so the error is printed
142+
with self.error_lock:
143+
self.error_count += 1
144+
current_error_count = self.error_count
145+
if current_error_count >= self.max_errors:
146+
raise e
139147
print(f'Failed to run or to evaluate example {example} with {self.metric} due to {e}.')
140148

141149
if success:

0 commit comments

Comments
 (0)