angular
diff --git a/‎docs/environment-reference.md‎
Lines changed: 5 additions & 0 deletions b/‎docs/environment-reference.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎report-app/src/app/pages/report-viewer/report-viewer.html‎
Lines changed: 63 additions & 1 deletion b/‎report-app/src/app/pages/report-viewer/report-viewer.html‎
Lines changed: 63 additions & 1 deletion
diff --git a/‎report-app/src/app/pages/report-viewer/report-viewer.ts‎
Lines changed: 28 additions & 2 deletions b/‎report-app/src/app/pages/report-viewer/report-viewer.ts‎
Lines changed: 28 additions & 2 deletions
diff --git a/‎runner/configuration/constants.ts‎
Lines changed: 7 additions & 1 deletion b/‎runner/configuration/constants.ts‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎runner/configuration/environment-config.ts‎
Lines changed: 0 additions & 5 deletions b/‎runner/configuration/environment-config.ts‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎runner/configuration/package-managers.ts‎
Lines changed: 4 additions & 0 deletions b/‎runner/configuration/package-managers.ts‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎runner/eval-cli.ts‎
Lines changed: 11 additions & 9 deletions b/‎runner/eval-cli.ts‎
Lines changed: 11 additions & 9 deletions
@@ -179,3 +179,8 @@ Defaults to `<package manager> run build`.
 
 Command used to start a local dev server as a part of the evaluation.
 Defaults to `<package manager> run start --port 0`.
+
+### `testCommand`
+
+Command used to run tests against the generated code. If this property is not provided, tests will not be run. The command should exit with code 0 on success and a non-zero exit code on failure. The output from the command (both `stdout` and `stderr`) is captured and used for repair attempts if the tests fail. The test command will time out after 4 minutes.
+
@@ -73,6 +73,20 @@ <h3 class="chart-title">
             <stacked-bar-chart [data]="buildsAsGraphData(overview.stats.builds)" [compact]="true" />
           </div>
         </div>
+        @if (overview.stats.tests) {
+          <div class="chart-container test-results-details">
+            <h3 class="chart-title">
+              <span class="material-symbols-outlined"> quiz </span>
+              <span>Tests</span>
+            </h3>
+            <div class="summary-card-item">
+              <stacked-bar-chart
+                [data]="testsAsGraphData(overview.stats.tests)"
+                [compact]="true"
+              />
+            </div>
+          </div>
+        }
         @if (overview.stats.runtime) {
           <div class="chart-container">
             <h3 class="chart-title">
@@ -276,9 +290,19 @@ <h2>Generated applications</h2>
                   <span class="status-badge error">Initial build failed</span>
                 }
 
-                @if (hasBuildFailureDuringA11yRepair(result)) {
+                @if (hasBuildFailureDuringTestRepair(result)) {
                   <span class="status-badge error">Build failed after a11y repair</span>
                 }
+                <!-- Test status badges -->
+                @if (finalAttempt.testResult) {
+                  @if (finalAttempt.testResult.passed) {
+                    @if ((result.testRepairAttempts || 0) > 0) {
+                      <span class="status-badge warning">Tests passed after repair</span>
+                    }
+                  } @else {
+                    <span class="status-badge error">Tests failed</span>
+                  }
+                }
               </div>
             </div>
           </expansion-panel-header>
@@ -350,12 +374,36 @@ <h5>
                 </div>
               </div>
 
+              @if (result.testResult) {
+                <div class="app-details-section">
+                  <h4>Test Results</h4>
+                  <div class="test-summary">
+                    @if (result.testResult.passed) {
+                      <span class="status-text success">✔ Tests passed</span>
+                      @if ((result.testRepairAttempts || 0) > 0) {
+                        <span class="status-text">&nbsp;after {{ result.testRepairAttempts }} repair attempt(s)</span>
+                      }
+                    } @else {
+                      <span class="status-text error">✘ Tests failed</span>
+                    }
+                  </div>
+                  
+                  @if (result.testResult.output && !result.testResult.passed) {
+                    <details class="test-output-button">
+                      <summary class="neutral-button">See Test Output</summary>
+                      <pre class="callout neutral code">{{ result.testResult.output }}</pre>
+                    </details>
+                  }
+                </div>
+              }
+
               <div class="app-details-section">
                 <h4>Additional info</h4>
                 @for (attempt of result.attemptDetails; track attempt) {
                   @let isBuilt = attempt.buildResult.status === 'success';
                   @let axeViolations = attempt.serveTestingResult?.axeViolations;
                   @let hasAxeViolations = axeViolations && axeViolations.length > 0;
+                  @let testsFailed = attempt.testResult?.passed === false;
 
                   <expansion-panel #expansionPanel>
                     <expansion-panel-header>
@@ -380,6 +428,15 @@ <h4>Additional info</h4>
                           >A11y</span
                         >
                       }
+
+                      @if (attempt.testResult) {
+                        <span
+                          class="status-badge"
+                          [class.error]="!attempt.testResult.passed"
+                          [class.success]="attempt.testResult.passed"
+                          >Tests</span
+                        >
+                      }
                     </expansion-panel-header>
 
                     @if (expansionPanel.opened()) {
@@ -416,6 +473,11 @@ <h4>A11y Violations</h4>
                         </pre>
                       }
 
+                      @if (testsFailed) {
+                        <h4>Failed Tests</h4>
+                        <pre class="callout neutral code">{{ attempt.testResult?.output }}</pre>
+                      }
+
                       <h4>Generated Code</h4>
 
                       @for (file of attempt.outputFiles; track file) {
 
@@ -21,6 +21,7 @@ import {
   LlmResponseFile,
   RunInfo,
   RunSummaryBuilds,
+  RunSummaryTests,
   RuntimeStats,
   ScoreBucket,
   SkippedIndividualAssessment,
@@ -265,6 +266,31 @@ export class ReportViewer {
     ];
   }
 
+  protected testsAsGraphData(tests: RunSummaryTests): StackedBarChartData {
+    return [
+      {
+        label: 'Passed',
+        color: ScoreCssVariable.excellent,
+        value: tests.successfulInitialTests,
+      },
+      {
+        label: 'Passed after repair',
+        color: ScoreCssVariable.great,
+        value: tests.successfulTestsAfterRepair,
+      },
+      {
+        label: 'Failed',
+        color: ScoreCssVariable.poor,
+        value: tests.failedTests,
+      },
+      {
+        label: 'No tests run',
+        color: ScoreCssVariable.neutral,
+        value: tests.noTestsRun,
+      },
+    ];
+  }
+
   protected checksAsGraphData(buckets: ScoreBucket[]): StackedBarChartData {
     return buckets.map(b => ({
       label: b.nameWithLabels,
@@ -400,7 +426,7 @@ export class ReportViewer {
     return `wcs run --prompt=${result.promptDef.name} --env=<path to ${report.details.summary.environmentId} config>`;
   }
 
-  protected hasBuildFailureDuringA11yRepair(result: AssessmentResult): boolean {
-    return result.attemptDetails.some(attempt => attempt.buildFailedDuringA11yRepair);
+  protected hasBuildFailureDuringTestRepair(result: AssessmentResult): boolean {
+    return result.attemptDetails.some(attempt => attempt.buildFailedDuringTestRepair);
   }
 }
@@ -25,7 +25,13 @@ export const LLM_OUTPUT_DIR = join(rootDir, 'llm-output');
  * providing the build output and the code that causes the problem.
  */
 // Note: When updating, also adjust the default description in `README.md`.
-export const DEFAULT_MAX_REPAIR_ATTEMPTS = 1;
+export const DEFAULT_MAX_BUILD_REPAIR_ATTEMPTS = 1;
+
+/**
+ * Number of times we'll try to ask LLM to repair test failures
+ * E.g. Axe violations, or test command failures
+ */
+export const DEFAULT_MAX_TEST_REPAIR_ATTEMPTS = 1;
 
 /** Name of the folder where we store all generated reports */
 export const REPORTS_ROOT_DIR = join(rootDir, 'reports');
 
@@ -73,11 +73,6 @@ export const environmentConfigSchema = z.object({
 export type EnvironmentConfig = z.infer<typeof environmentConfigSchema> &
   Partial<LocalExecutorConfig>;
 
-/** Package managers that are currently supported. */
-export function getPossiblePackageManagers() {
-  return ['npm', 'pnpm', 'yarn'] as const;
-}
-
 /** Asserts that the specified data is a valid environment config. */
 export function assertIsEnvironmentConfig(value: unknown): asserts value is EnvironmentConfig {
   const validationResult = environmentConfigSchema
 
@@ -0,0 +1,4 @@
+/** Package managers that are currently supported. */
+export function getPossiblePackageManagers() {
+  return ['npm', 'pnpm', 'yarn'] as const;
+}
@@ -3,7 +3,8 @@ import chalk from 'chalk';
 import {
   BUILT_IN_ENVIRONMENTS,
   DEFAULT_AUTORATER_MODEL_NAME,
-  DEFAULT_MAX_REPAIR_ATTEMPTS,
+  DEFAULT_MAX_BUILD_REPAIR_ATTEMPTS,
+  DEFAULT_MAX_TEST_REPAIR_ATTEMPTS,
   DEFAULT_MODEL_NAME,
 } from './configuration/constants.js';
 import {generateCodeAndAssess} from './orchestration/generate.js';
@@ -37,9 +38,9 @@ interface Options {
   enableUserJourneyTesting?: boolean;
   enableAutoCsp?: boolean;
   autoraterModel?: string;
-  a11yRepairAttempts?: number;
   logging?: 'text-only' | 'dynamic';
   skipLighthouse?: boolean;
+  maxTestRepairAttempts?: number;
   maxBuildRepairAttempts?: number;
 }
 
@@ -151,21 +152,22 @@ function builder(argv: Argv): Argv<Options> {
         default: DEFAULT_AUTORATER_MODEL_NAME,
         description: 'Model to use when automatically rating generated code',
       })
-      .option('a11y-repair-attempts', {
-        type: 'number',
-        default: 0,
-        description: 'Number of repair attempts for discovered a11y violations',
-      })
       .option('skip-lighthouse', {
         type: 'boolean',
         default: false,
         description: 'Whether to skip collecting Lighthouse data',
       })
       .option('max-build-repair-attempts', {
         type: 'number',
-        default: DEFAULT_MAX_REPAIR_ATTEMPTS,
+        default: DEFAULT_MAX_BUILD_REPAIR_ATTEMPTS,
         description: 'Number of repair attempts when build errors are discovered',
       })
+      .option('max-test-repair-attempts', {
+        type: 'number',
+        default: DEFAULT_MAX_TEST_REPAIR_ATTEMPTS,
+        description:
+          'Number of repair attempts for discovered test failures (including a11y violations and ones from testCommand)',
+      })
       .strict()
       .version(false)
       .help()
@@ -209,9 +211,9 @@ async function handler(cliArgs: Arguments<Options>): Promise<void> {
       logging: cliArgs.logging,
       autoraterModel: cliArgs.autoraterModel,
       skipAiSummary: cliArgs.skipAiSummary,
-      a11yRepairAttempts: cliArgs.a11yRepairAttempts,
       skipLighthouse: cliArgs.skipLighthouse,
       maxBuildRepairAttempts: cliArgs.maxBuildRepairAttempts,
+      maxTestRepairAttempts: cliArgs.maxTestRepairAttempts,
     });
 
     logReportToConsole(runInfo);