@@ -152,60 +152,57 @@ export async function generateCodeAndAssess(options: {
152152
153153 for ( const rootPromptDef of promptsToProcess ) {
154154 allTasks . push (
155- appConcurrencyQueue . add (
156- async ( ) => {
157- const evalID = await env . gateway . initializeEval ( ) ;
158- let results : AssessmentResult [ ] | undefined ;
159-
160- try {
161- results = await callWithTimeout (
162- `Evaluation of ${ rootPromptDef . name } ` ,
163- async abortSignal =>
164- startEvaluationTask (
165- evalID ,
166- env ,
167- env . gateway ,
168- ratingLlm ,
169- options . model ,
170- rootPromptDef ,
171- options . localMode ,
172- options . skipScreenshots ,
173- options . outputDirectory ,
174- options . ragEndpoint ,
175- abortSignal ,
176- options . skipAxeTesting ,
177- ! ! options . enableUserJourneyTesting ,
178- ! ! options . enableAutoCsp ,
179- workerConcurrencyQueue ,
180- progress ,
181- options . autoraterModel || DEFAULT_AUTORATER_MODEL_NAME ,
182- options . a11yRepairAttempts ?? 0 ,
183- ) ,
184- // 10min max per app evaluation. We just want to make sure it never gets stuck.
185- 10 ,
186- ) ;
187- return results ;
188- } catch ( e : unknown ) {
189- failedPrompts . push ( {
190- promptName : rootPromptDef . name ,
191- error : `${ e } ` ,
192- stack : e instanceof Error ? e . stack : undefined ,
193- } ) ;
194-
195- let details = `Error: ${ e } ` ;
196- if ( e instanceof Error && e . stack ) {
197- details += `\nStack: ${ e . stack } ` ;
198- }
199-
200- progress . log ( rootPromptDef , 'error' , 'Failed to evaluate code' , details ) ;
201- return [ ] satisfies AssessmentResult [ ] ;
202- } finally {
203- progress . evalFinished ( rootPromptDef , results || [ ] ) ;
204- await env . gateway . finalizeEval ( evalID ) ;
155+ appConcurrencyQueue . add ( async ( ) => {
156+ const evalID = await env . gateway . initializeEval ( ) ;
157+ let results : AssessmentResult [ ] | undefined ;
158+
159+ try {
160+ results = await callWithTimeout (
161+ `Evaluation of ${ rootPromptDef . name } ` ,
162+ async abortSignal =>
163+ startEvaluationTask (
164+ evalID ,
165+ env ,
166+ env . gateway ,
167+ ratingLlm ,
168+ options . model ,
169+ rootPromptDef ,
170+ options . localMode ,
171+ options . skipScreenshots ,
172+ options . outputDirectory ,
173+ options . ragEndpoint ,
174+ abortSignal ,
175+ options . skipAxeTesting ,
176+ ! ! options . enableUserJourneyTesting ,
177+ ! ! options . enableAutoCsp ,
178+ workerConcurrencyQueue ,
179+ progress ,
180+ options . autoraterModel || DEFAULT_AUTORATER_MODEL_NAME ,
181+ options . a11yRepairAttempts ?? 0 ,
182+ ) ,
183+ // 10min max per app evaluation. We just want to make sure it never gets stuck.
184+ 10 ,
185+ ) ;
186+ return results ;
187+ } catch ( e : unknown ) {
188+ failedPrompts . push ( {
189+ promptName : rootPromptDef . name ,
190+ error : `${ e } ` ,
191+ stack : e instanceof Error ? e . stack : undefined ,
192+ } ) ;
193+
194+ let details = `Error: ${ e } ` ;
195+ if ( e instanceof Error && e . stack ) {
196+ details += `\nStack: ${ e . stack } ` ;
205197 }
206- } ,
207- { throwOnTimeout : true } ,
208- ) ,
198+
199+ progress . log ( rootPromptDef , 'error' , 'Failed to evaluate code' , details ) ;
200+ return [ ] satisfies AssessmentResult [ ] ;
201+ } finally {
202+ progress . evalFinished ( rootPromptDef , results || [ ] ) ;
203+ await env . gateway . finalizeEval ( evalID ) ;
204+ }
205+ } ) ,
209206 ) ;
210207 }
211208
0 commit comments