refactor: follow-up to test repair feature

devversion · devversion · commit 5b01c0ce04ea · 2025-10-13T13:35:20.000+02:00
Small follow-up to the test repair feature. One open question is whether
we should run tests when serving failed. I'm leaning towards, yes— we
can still try to run them and collect more data.
diff --git a/report-app/src/app/pages/report-viewer/report-viewer.html b/report-app/src/app/pages/report-viewer/report-viewer.html
@@ -80,10 +80,7 @@ <h3 class="chart-title">
               <span>Tests</span>
             </h3>
             <div class="summary-card-item">
-              <stacked-bar-chart
-                [data]="testsAsGraphData(overview.stats.tests)"
-                [compact]="true"
-              />
+              <stacked-bar-chart [data]="testsAsGraphData(overview.stats.tests)" [compact]="true" />
             </div>
           </div>
         }
@@ -258,7 +255,8 @@ <h2>Generated applications</h2>
         [label]="`Filter by failed checks (${selectedChecks().length} selected)`"
         [options]="allFailedChecks()"
         [(selected)]="selectedChecks"
-        class="check-filter"/>
+        class="check-filter"
+      />
     }
     <div class="apps-list">
       @for (result of filteredResults(); track result) {
@@ -291,7 +289,7 @@ <h2>Generated applications</h2>
                 }
 
                 @if (hasBuildFailureDuringTestRepair(result)) {
-                  <span class="status-badge error">Build failed after a11y repair</span>
+                  <span class="status-badge error">Build failed after a11y/test repair</span>
                 }
                 <!-- Test status badges -->
                 @if (finalAttempt.testResult) {
@@ -381,13 +379,15 @@ <h4>Test Results</h4>
                     @if (result.testResult.passed) {
                       <span class="status-text success">✔ Tests passed</span>
                       @if ((result.testRepairAttempts || 0) > 0) {
-                        <span class="status-text">&nbsp;after {{ result.testRepairAttempts }} repair attempt(s)</span>
+                        <span class="status-text"
+                          >&nbsp;after {{ result.testRepairAttempts }} repair attempt(s)</span
+                        >
                       }
                     } @else {
                       <span class="status-text error">✘ Tests failed</span>
                     }
                   </div>
-                  
+
                   @if (result.testResult.output && !result.testResult.passed) {
                     <details class="test-output-button">
                       <summary class="neutral-button">See Test Output</summary>
@@ -507,14 +507,16 @@ <h4>Generated Code</h4>
                         [audits]="category.audits"
                         [displayName]="category.displayName"
                         [score]="category.score"
-                        [description]="category.description"/>
+                        [description]="category.description"
+                      />
                     }
 
                     @if (lighthouse.uncategorized.length > 0) {
                       <lighthouse-category
                         [audits]="lighthouse.uncategorized"
                         displayName="Uncategorized"
-                        [score]="null"/>
+                        [score]="null"
+                      />
                     }
                   </expansion-panel>
                 }
diff --git a/runner/orchestration/build-serve-test-loop.ts b/runner/orchestration/build-serve-test-loop.ts
@@ -133,19 +133,16 @@ export async function attemptBuildAndTest(
       progress,
       userJourneyAgentTaskInput,
     );
-    const testResult = await runTest(
-      env,
-      evalID,
-      directory,
-      rootPromptDef,
-      abortSignal,
-      workerConcurrencyQueue,
-      progress,
-    );
-
-    if (testResult !== null) {
-      lastAttempt.testResult = testResult;
-    }
+    lastAttempt.testResult =
+      (await runTest(
+        env,
+        evalID,
+        directory,
+        rootPromptDef,
+        abortSignal,
+        workerConcurrencyQueue,
+        progress,
+      )) ?? undefined;
   }
 
   // Attempt to repair testing. This only runs when the last build
@@ -236,19 +233,16 @@ export async function attemptBuildAndTest(
       progress,
       userJourneyAgentTaskInput,
     );
-    const testResult = await runTest(
-      env,
-      evalID,
-      directory,
-      rootPromptDef,
-      abortSignal,
-      workerConcurrencyQueue,
-      progress,
-    );
-
-    if (testResult !== null) {
-      lastAttempt.testResult = testResult;
-    }
+    lastAttempt.testResult =
+      (await runTest(
+        env,
+        evalID,
+        directory,
+        rootPromptDef,
+        abortSignal,
+        workerConcurrencyQueue,
+        progress,
+      )) ?? undefined;
 
     if (hasAxeFailure && lastAttempt.serveTestingResult.axeViolations?.length === 0) {
       progress.log(rootPromptDef, 'success', `Successfully fixed all Axe accessibility violations`);
diff --git a/runner/orchestration/generate.ts b/runner/orchestration/generate.ts
@@ -47,7 +47,7 @@ import {getRunnerByName} from '../codegen/runner-creation.js';
 import {summarizeReportWithAI} from '../reporting/report-ai-summary.js';
 import {LocalExecutor} from './executors/local-executor.js';
 import {EvalID} from './executors/executor.js';
-import {attemptBuildAndTest} from './build-serve-loop.js';
+import {attemptBuildAndTest} from './build-serve-test-loop.js';
 
 /**
  * Orchestrates the entire assessment process for each prompt defined in the `prompts` array.