@@ -189,10 +189,6 @@ def evaluate(
189189
190190 # run the evaluation
191191 print (f"Command run in sandbox { e2b_endpoint } " )
192- if not isinstance (pass_k , str ):
193- pass_k = "," .join (pass_k )
194- if not isinstance (selective_evaluate , str ):
195- selective_evaluate = "," .join (selective_evaluate )
196192 sandbox .commands .run ("bigcodebench.evaluate --execution 'local' "
197193 f"--split { split } --subset { subset } --samples { samples } "
198194 f"--pass_k { pass_k } --save_pass_rate { save_pass_rate } --calibrated { calibrated } "
@@ -209,9 +205,16 @@ def evaluate(
209205 else :
210206
211207 pass_at_k = dict ()
212-
213- passk = [int (k ) for k in pass_k .split ("," )]
208+ passk = list (pass_k )
214209
210+ if isinstance (selective_evaluate , str ):
211+ selected_ids = set (selective_evaluate .split ("," ))
212+ else :
213+ try :
214+ selected_ids = set (selective_evaluate )
215+ except :
216+ selected_ids = ""
217+
215218 if parallel < 1 :
216219 n_workers = max (1 , multiprocessing .cpu_count () // 2 )
217220 else :
@@ -224,11 +227,7 @@ def evaluate(
224227 problems = get_bigcodebench (subset = subset )
225228
226229 # Add selective evaluation logic
227- if selective_evaluate :
228- if isinstance (selective_evaluate , str ):
229- selected_ids = set (selective_evaluate .split ("," ))
230- else :
231- selected_ids = set (selective_evaluate )
230+ if selected_ids :
232231 problems = {k : v for k , v in problems .items () if k in selected_ids }
233232 if not problems :
234233 raise ValueError (f"None of the provided task IDs { selected_ids } were found in the dataset" )
0 commit comments