refactor: revert at link support but expose an API for post-processing prompts

devversion · devversion · commit bb73ea4a91b9 · 2025-10-23T20:38:43.000+02:00
* We find the at-link support to be non-ideal given requirements inside
  Google, or differences by different agents— so we revert it. We don't
  expect any usages as it was just available for a very short time (less
  than 2 hours)

* We will expose a hook for advanced executors to post-process system
  prompts. We can evolve that API as we detect more need.
diff --git a/examples/environments/remote_env/fake-executor.ts b/examples/environments/remote_env/fake-executor.ts
@@ -7,6 +7,7 @@ import {
   LlmGenerateFilesRequest,
   LlmResponse,
   LlmResponseFile,
+  replaceAtReferencesInPrompt,
   RootPromptDefinition,
 } from '../../../runner';
 import {ProgressLogger} from '../../../runner/progress/progress-logger';
@@ -111,5 +112,13 @@ export class FakeRemoteExecutor implements Executor {
     };
   }
 
+  async postProcessSystemPrompt(prompt: string, environmentRootPath: string) {
+    return replaceAtReferencesInPrompt(
+      prompt,
+      `${environmentRootPath}/prompt.md`,
+      environmentRootPath,
+    );
+  }
+
   async destroy() {}
 }
diff --git a/examples/environments/remote_env/other-file.md b/examples/environments/remote_env/other-file.md
@@ -1,3 +1,3 @@
 Use XYZ.
 
-@./other-file-2.md
+@/other-file-2.md
diff --git a/examples/environments/remote_env/system-instructions.md b/examples/environments/remote_env/system-instructions.md
@@ -3,4 +3,4 @@ Follow instructions below CAREFULLY:
 - Code MUST be implemented in my super secret framework.
 - Put the component code inside `src/app/app.ts`
 
-@./other-file.md
+@../remote_env/other-file.md
diff --git a/runner/configuration/environment.ts b/runner/configuration/environment.ts
@@ -28,8 +28,6 @@ export class Environment {
   readonly fullStackFramework: FrameworkInfo;
   /** Information about the client-side framework used within the environment. */
   readonly clientSideFramework: FrameworkInfo;
-  /** Prompts that should be executed as a part of the evaluation. */
-  readonly executablePrompts: RootPromptDefinition[];
   /** Path from which to read the code rating prompt. */
   readonly codeRatingPromptPath: string | null;
   /** Whether the prompts should be removed from the final report. */
@@ -58,10 +56,6 @@ export class Environment {
             this.getFrameworkDisplayName(config.fullStackFramework) || config.clientSideFramework,
         }
       : {...this.clientSideFramework};
-    this.executablePrompts = this.resolveExecutablePrompts(
-      config.executablePrompts,
-      config.ratings,
-    );
     this.codeRatingPromptPath = config.codeRatingPrompt
       ? join(rootPath, config.codeRatingPrompt)
       : null;
@@ -70,22 +64,27 @@ export class Environment {
     this.executor = config.executor;
   }
 
-  systemPromptGeneration = lazy(() => {
-    return this.renderRelativePrompt(this.config.generationSystemPrompt).result;
+  /** Prompts that should be executed as a part of the evaluation. */
+  executablePrompts = lazy(async () => {
+    return this.resolveExecutablePrompts(this.config.executablePrompts, this.config.ratings);
+  });
+
+  systemPromptGeneration = lazy(async () => {
+    return (await this.renderSystemPrompt(this.config.generationSystemPrompt)).result;
   });
 
-  systemPromptRepair = lazy(() => {
+  systemPromptRepair = lazy(async () => {
     if (!this.config.repairSystemPrompt) {
       return 'Please fix the given errors and return the corrected code.';
     }
-    return this.renderRelativePrompt(this.config.repairSystemPrompt).result;
+    return (await this.renderSystemPrompt(this.config.repairSystemPrompt)).result;
   });
 
-  systemPromptEditing = lazy(() => {
+  systemPromptEditing = lazy(async () => {
     if (!this.config.editingSystemPrompt) {
       return this.systemPromptGeneration();
     }
-    return this.renderRelativePrompt(this.config.editingSystemPrompt).result;
+    return (await this.renderSystemPrompt(this.config.editingSystemPrompt)).result;
   });
 
   /**
@@ -100,8 +99,8 @@ export class Environment {
   ): Promise<string> {
     const systemPrompt =
       type === 'generation'
-        ? this.systemPromptGeneration()
-        : (this.systemPromptEditing() ?? this.systemPromptGeneration());
+        ? await this.systemPromptGeneration()
+        : await this.systemPromptEditing();
 
     if (!ragEndpoint) {
       return [systemPrompt, userPrompt].join('\n\n');
@@ -168,11 +167,11 @@ export class Environment {
    * @param prompts Prompts to be resolved.
    * @param envRatings Environment-level ratings.
    */
-  private resolveExecutablePrompts(
+  private async resolveExecutablePrompts(
     prompts: EnvironmentConfig['executablePrompts'],
     envRatings: Rating[],
-  ) {
-    const result: RootPromptDefinition[] = [];
+  ): Promise<RootPromptDefinition[]> {
+    const result: Promise<RootPromptDefinition>[] = [];
 
     for (const def of prompts) {
       if (def instanceof MultiStepPrompt) {
@@ -191,20 +190,21 @@ export class Environment {
           name = def.name;
         }
 
-        globSync(path, {cwd: this.rootPath}).forEach(relativePath => {
-          result.push(
-            this.getStepPromptDefinition(
-              name ?? basename(relativePath, extname(relativePath)),
-              relativePath,
-              ratings,
-              /* isEditing */ false,
-            ),
-          );
-        });
+        result.push(
+          ...globSync(path, {cwd: this.rootPath}).map(
+            async relativePath =>
+              await this.getStepPromptDefinition(
+                name ?? basename(relativePath, extname(relativePath)),
+                relativePath,
+                ratings,
+                /* isEditing */ false,
+              ),
+          ),
+        );
       }
     }
 
-    return result;
+    return Promise.all(result);
   }
 
   /**
@@ -216,13 +216,13 @@ export class Environment {
    * @param ratings Ratings to run against the definition.
    * @param isEditing Whether this is an editing or generation step.
    */
-  private getStepPromptDefinition(
+  private async getStepPromptDefinition(
     name: string,
     relativePath: string,
     ratings: Rating[],
     isEditing: boolean,
-  ): PromptDefinition {
-    const {result, contextFiles} = this.renderRelativePrompt(relativePath);
+  ): Promise<PromptDefinition> {
+    const {result, contextFiles} = await this.renderEnvironmentPrompt(relativePath);
 
     return {
       name: name,
@@ -240,10 +240,10 @@ export class Environment {
    * @param def Definition of the prompt.
    * @param envRatings Environment-level ratings.
    */
-  private getMultiStepPrompt(
+  private async getMultiStepPrompt(
     def: MultiStepPrompt,
     envRatings: Rating[],
-  ): MultiStepPromptDefinition {
+  ): Promise<MultiStepPromptDefinition> {
     const promptRoot = resolve(this.rootPath, def.directoryPath);
     const name = basename(promptRoot);
     const steps: PromptDefinition[] = [];
@@ -288,7 +288,7 @@ export class Environment {
       if (stepNum === 0) {
         throw new UserFacingError('Multi-step prompts start with `step-1`.');
       }
-      const step = this.getStepPromptDefinition(
+      const step = await this.getStepPromptDefinition(
         `${name}-step-${stepNum}`,
         join(def.directoryPath, current.name),
         ratings,
@@ -317,8 +317,20 @@ export class Environment {
   }
 
   /** Renders a prompt from a path relative to the environment config. */
-  private renderRelativePrompt(relativePath: string) {
+  private async renderEnvironmentPrompt(relativePath: string) {
     const path = resolve(this.rootPath, relativePath);
     return this.renderPrompt(readFileSync(path, 'utf8'), path);
   }
+
+  private async renderSystemPrompt(relativePath: string) {
+    const result = await this.renderEnvironmentPrompt(relativePath);
+
+    // Optional hooks for post processing environment system prompts. Useful for e.g.
+    // supporting `@` references from Gemini CLI or inside g3.
+    if (this.executor.postProcessSystemPrompt !== undefined) {
+      result.result = await this.executor.postProcessSystemPrompt(result.result, this.rootPath);
+    }
+
+    return result;
+  }
 }
diff --git a/runner/configuration/prompt-templating.ts b/runner/configuration/prompt-templating.ts
@@ -14,8 +14,7 @@ function initializeHandlebars() {
     }
 
     const fullPath = path.join(dirname(ctx.containingFile), ctx.file);
-    let content = readFileSync(fullPath, 'utf8');
-    content = processAtFileReferencesSync(content, fullPath);
+    const content = readFileSync(fullPath, 'utf8');
 
     // Recursively support `embed`.
     return Handlebars.compile(content, {strict: true})({
@@ -39,8 +38,6 @@ export function renderPromptTemplate<T extends {containingFile: string | null}>(
   content: string,
   ctx: T,
 ) {
-  content = ctx.containingFile ? processAtFileReferencesSync(content, ctx.containingFile) : content;
-
   const template = Handlebars.compile(content, {strict: true});
   const contextFiles: string[] = [];
   const result = template(ctx, {
@@ -82,35 +79,3 @@ export function renderPromptTemplate<T extends {containingFile: string | null}>(
     contextFiles,
   };
 }
-
-function processAtFileReferencesSync(content: string, containingFile: string): string {
-  let newContent = content;
-  let match;
-  // Match all `@./<file-path>` or `@/<file-path>` occurrences.
-  // If someone intends to write such text in their prompt, they could overcome this
-  // by indenting the string, or adding arbitrary characters before front.
-  const regex = /^@(\.?\/[^\s]+)/gm;
-  const containingFileDir = dirname(containingFile);
-
-  while ((match = regex.exec(newContent)) !== null) {
-    const filePath = match[1];
-    const fullPath = resolve(containingFileDir, filePath);
-    let replacement: string;
-    try {
-      replacement = readFileSync(fullPath, 'utf8');
-      // Note: If we start searching the match start, where the new embedded content begins,
-      // we can trivially. process nested embeds via the `@` syntax.
-    } catch (e) {
-      throw new Error(
-        `Unexpected error while embedding \`${match[0]}\` reference in ${containingFile}. ` +
-          `Error: ${e}`,
-      );
-    }
-
-    newContent =
-      newContent.substring(0, match.index) +
-      processAtFileReferencesSync(replacement, fullPath) +
-      newContent.substring(regex.lastIndex);
-  }
-  return newContent;
-}
diff --git a/runner/index.ts b/runner/index.ts
@@ -48,3 +48,4 @@ export {DynamicProgressLogger} from './progress/dynamic-progress-logger.js';
 export {NoopProgressLogger} from './progress/noop-progress-logger.js';
 export {TextProgressLogger} from './progress/text-progress-logger.js';
 export {type ServeTestingResult} from './workers/serve-testing/worker-types.js';
+export {replaceAtReferencesInPrompt} from './utils/prompt-at-references.js';
diff --git a/runner/orchestration/codegen.ts b/runner/orchestration/codegen.ts
@@ -98,7 +98,7 @@ export async function repairCodeWithAI(
   progress: ProgressLogger,
   repairType: 'build' | 'test',
 ): Promise<LlmResponse> {
-  const repairSystemInstructions = env.systemPromptRepair();
+  const repairSystemInstructions = await env.systemPromptRepair();
   const repairPrompt = [
     ...errors.map(({errorContext, errorMessage}) =>
       [errorContext, '```', errorMessage, '```'].join('\n'),
diff --git a/runner/orchestration/executors/executor.ts b/runner/orchestration/executors/executor.ts
@@ -104,6 +104,12 @@ export const executorSchema = z.object({
       }),
     ),
   ),
+  postProcessSystemPrompt: z
+    .function(
+      z.tuple([z.string().describe('Prompt'), z.string().describe('Environment root path')]),
+      z.promise(z.string()),
+    )
+    .optional(),
   destroy: z.function(z.tuple([]), z.promise(z.void())),
   getExecutorInfo: z.function(
     z.tuple([]),
diff --git a/runner/orchestration/generate.ts b/runner/orchestration/generate.ts
@@ -63,11 +63,10 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
   await assertValidModelName(options.model, env.executor);
 
   try {
-    const promptsToProcess = getCandidateExecutablePrompts(
-      env,
-      options.localMode,
-      options.promptFilter,
+    const promptsToProcess = (
+      await getCandidateExecutablePrompts(env, options.localMode, options.promptFilter)
     ).slice(0, options.limit);
+
     const progress =
       options.logging === 'dynamic' ? new DynamicProgressLogger() : new TextProgressLogger();
     const appConcurrency =
@@ -196,8 +195,10 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
       ),
       timestamp: timestamp.toISOString(),
       reportName: options.reportName,
-      systemPromptGeneration: env.classifyPrompts ? 'Classified 🕵️' : env.systemPromptGeneration(),
-      systemPromptRepair: env.classifyPrompts ? 'Classified 🕵️' : env.systemPromptRepair(),
+      systemPromptGeneration: env.classifyPrompts
+        ? 'Classified 🕵️'
+        : await env.systemPromptGeneration(),
+      systemPromptRepair: env.classifyPrompts ? 'Classified 🕵️' : await env.systemPromptRepair(),
       // Deduplicate labels before finalizing the report.
       labels: Array.from(new Set(options.labels)),
       mcp,
@@ -219,13 +220,13 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
 }
 
 /** Gets prompts that are candidates to be executed. */
-function getCandidateExecutablePrompts(
+async function getCandidateExecutablePrompts(
   env: Environment,
   localMode: boolean,
   promptFilter: string | undefined,
-): RootPromptDefinition[] {
+): Promise<RootPromptDefinition[]> {
   const envDir = join(LLM_OUTPUT_DIR, env.id);
-  let result = env.executablePrompts;
+  let result = await env.executablePrompts();
 
   // In local mode filter the list of prompts down to
   // only the ones that we have local output for.
diff --git a/runner/run-cli.ts b/runner/run-cli.ts
@@ -146,13 +146,14 @@ async function resolveConfig(options: Options) {
     );
   }
 
-  const rootPromptDef = environment.executablePrompts.find(p => p.name === options.prompt);
+  const executablePrompts = await environment.executablePrompts();
+  const rootPromptDef = executablePrompts.find(p => p.name === options.prompt);
 
   if (!rootPromptDef) {
     throw new UserFacingError(
       `Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` +
         `The following prompts are available:\n` +
-        environment.executablePrompts.map(p => ` - ${p.name}`).join('\n'),
+        executablePrompts.map(p => ` - ${p.name}`).join('\n'),
     );
   }
 
diff --git a/runner/utils/prompt-at-references.ts b/runner/utils/prompt-at-references.ts

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`Use XYZ.`
`2`	`2`
`3`		`-@./other-file-2.md`
	`3`	`+@/other-file-2.md`
Original file line number	Diff line number	Diff line change
`@@ -146,13 +146,14 @@ async function resolveConfig(options: Options) {`
`146`	`146`	`);`
`147`	`147`	`}`
`148`	`148`
`149`		`- const rootPromptDef = environment.executablePrompts.find(p => p.name === options.prompt);`
	`149`	`+ const executablePrompts = await environment.executablePrompts();`
	`150`	`+ const rootPromptDef = executablePrompts.find(p => p.name === options.prompt);`
`150`	`151`
`151`	`152`	`if (!rootPromptDef) {`
`152`	`153`	`throw new UserFacingError(`
`153`	`154`	`Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` +
`154`	`155`	`The following prompts are available:\n` +
`155`		- environment.executablePrompts.map(p => ` - ${p.name}`).join('\n'),
	`156`	+ executablePrompts.map(p => ` - ${p.name}`).join('\n'),
`156`	`157`	`);`
`157`	`158`	`}`
`158`	`159`