Skip to content

Commit bb73ea4

Browse files
committed
refactor: revert at link support but expose an API for post-processing prompts
* We find the at-link support to be non-ideal given requirements inside Google, or differences by different agents— so we revert it. We don't expect any usages as it was just available for a very short time (less than 2 hours) * We will expose a hook for advanced executors to post-process system prompts. We can evolve that API as we detect more need.
1 parent b881657 commit bb73ea4

File tree

11 files changed

+139
-85
lines changed

11 files changed

+139
-85
lines changed

examples/environments/remote_env/fake-executor.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import {
77
LlmGenerateFilesRequest,
88
LlmResponse,
99
LlmResponseFile,
10+
replaceAtReferencesInPrompt,
1011
RootPromptDefinition,
1112
} from '../../../runner';
1213
import {ProgressLogger} from '../../../runner/progress/progress-logger';
@@ -111,5 +112,13 @@ export class FakeRemoteExecutor implements Executor {
111112
};
112113
}
113114

115+
async postProcessSystemPrompt(prompt: string, environmentRootPath: string) {
116+
return replaceAtReferencesInPrompt(
117+
prompt,
118+
`${environmentRootPath}/prompt.md`,
119+
environmentRootPath,
120+
);
121+
}
122+
114123
async destroy() {}
115124
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
Use XYZ.
22

3-
@./other-file-2.md
3+
@/other-file-2.md

examples/environments/remote_env/system-instructions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ Follow instructions below CAREFULLY:
33
- Code MUST be implemented in my super secret framework.
44
- Put the component code inside `src/app/app.ts`
55

6-
@./other-file.md
6+
@../remote_env/other-file.md

runner/configuration/environment.ts

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ export class Environment {
2828
readonly fullStackFramework: FrameworkInfo;
2929
/** Information about the client-side framework used within the environment. */
3030
readonly clientSideFramework: FrameworkInfo;
31-
/** Prompts that should be executed as a part of the evaluation. */
32-
readonly executablePrompts: RootPromptDefinition[];
3331
/** Path from which to read the code rating prompt. */
3432
readonly codeRatingPromptPath: string | null;
3533
/** Whether the prompts should be removed from the final report. */
@@ -58,10 +56,6 @@ export class Environment {
5856
this.getFrameworkDisplayName(config.fullStackFramework) || config.clientSideFramework,
5957
}
6058
: {...this.clientSideFramework};
61-
this.executablePrompts = this.resolveExecutablePrompts(
62-
config.executablePrompts,
63-
config.ratings,
64-
);
6559
this.codeRatingPromptPath = config.codeRatingPrompt
6660
? join(rootPath, config.codeRatingPrompt)
6761
: null;
@@ -70,22 +64,27 @@ export class Environment {
7064
this.executor = config.executor;
7165
}
7266

73-
systemPromptGeneration = lazy(() => {
74-
return this.renderRelativePrompt(this.config.generationSystemPrompt).result;
67+
/** Prompts that should be executed as a part of the evaluation. */
68+
executablePrompts = lazy(async () => {
69+
return this.resolveExecutablePrompts(this.config.executablePrompts, this.config.ratings);
70+
});
71+
72+
systemPromptGeneration = lazy(async () => {
73+
return (await this.renderSystemPrompt(this.config.generationSystemPrompt)).result;
7574
});
7675

77-
systemPromptRepair = lazy(() => {
76+
systemPromptRepair = lazy(async () => {
7877
if (!this.config.repairSystemPrompt) {
7978
return 'Please fix the given errors and return the corrected code.';
8079
}
81-
return this.renderRelativePrompt(this.config.repairSystemPrompt).result;
80+
return (await this.renderSystemPrompt(this.config.repairSystemPrompt)).result;
8281
});
8382

84-
systemPromptEditing = lazy(() => {
83+
systemPromptEditing = lazy(async () => {
8584
if (!this.config.editingSystemPrompt) {
8685
return this.systemPromptGeneration();
8786
}
88-
return this.renderRelativePrompt(this.config.editingSystemPrompt).result;
87+
return (await this.renderSystemPrompt(this.config.editingSystemPrompt)).result;
8988
});
9089

9190
/**
@@ -100,8 +99,8 @@ export class Environment {
10099
): Promise<string> {
101100
const systemPrompt =
102101
type === 'generation'
103-
? this.systemPromptGeneration()
104-
: (this.systemPromptEditing() ?? this.systemPromptGeneration());
102+
? await this.systemPromptGeneration()
103+
: await this.systemPromptEditing();
105104

106105
if (!ragEndpoint) {
107106
return [systemPrompt, userPrompt].join('\n\n');
@@ -168,11 +167,11 @@ export class Environment {
168167
* @param prompts Prompts to be resolved.
169168
* @param envRatings Environment-level ratings.
170169
*/
171-
private resolveExecutablePrompts(
170+
private async resolveExecutablePrompts(
172171
prompts: EnvironmentConfig['executablePrompts'],
173172
envRatings: Rating[],
174-
) {
175-
const result: RootPromptDefinition[] = [];
173+
): Promise<RootPromptDefinition[]> {
174+
const result: Promise<RootPromptDefinition>[] = [];
176175

177176
for (const def of prompts) {
178177
if (def instanceof MultiStepPrompt) {
@@ -191,20 +190,21 @@ export class Environment {
191190
name = def.name;
192191
}
193192

194-
globSync(path, {cwd: this.rootPath}).forEach(relativePath => {
195-
result.push(
196-
this.getStepPromptDefinition(
197-
name ?? basename(relativePath, extname(relativePath)),
198-
relativePath,
199-
ratings,
200-
/* isEditing */ false,
201-
),
202-
);
203-
});
193+
result.push(
194+
...globSync(path, {cwd: this.rootPath}).map(
195+
async relativePath =>
196+
await this.getStepPromptDefinition(
197+
name ?? basename(relativePath, extname(relativePath)),
198+
relativePath,
199+
ratings,
200+
/* isEditing */ false,
201+
),
202+
),
203+
);
204204
}
205205
}
206206

207-
return result;
207+
return Promise.all(result);
208208
}
209209

210210
/**
@@ -216,13 +216,13 @@ export class Environment {
216216
* @param ratings Ratings to run against the definition.
217217
* @param isEditing Whether this is an editing or generation step.
218218
*/
219-
private getStepPromptDefinition(
219+
private async getStepPromptDefinition(
220220
name: string,
221221
relativePath: string,
222222
ratings: Rating[],
223223
isEditing: boolean,
224-
): PromptDefinition {
225-
const {result, contextFiles} = this.renderRelativePrompt(relativePath);
224+
): Promise<PromptDefinition> {
225+
const {result, contextFiles} = await this.renderEnvironmentPrompt(relativePath);
226226

227227
return {
228228
name: name,
@@ -240,10 +240,10 @@ export class Environment {
240240
* @param def Definition of the prompt.
241241
* @param envRatings Environment-level ratings.
242242
*/
243-
private getMultiStepPrompt(
243+
private async getMultiStepPrompt(
244244
def: MultiStepPrompt,
245245
envRatings: Rating[],
246-
): MultiStepPromptDefinition {
246+
): Promise<MultiStepPromptDefinition> {
247247
const promptRoot = resolve(this.rootPath, def.directoryPath);
248248
const name = basename(promptRoot);
249249
const steps: PromptDefinition[] = [];
@@ -288,7 +288,7 @@ export class Environment {
288288
if (stepNum === 0) {
289289
throw new UserFacingError('Multi-step prompts start with `step-1`.');
290290
}
291-
const step = this.getStepPromptDefinition(
291+
const step = await this.getStepPromptDefinition(
292292
`${name}-step-${stepNum}`,
293293
join(def.directoryPath, current.name),
294294
ratings,
@@ -317,8 +317,20 @@ export class Environment {
317317
}
318318

319319
/** Renders a prompt from a path relative to the environment config. */
320-
private renderRelativePrompt(relativePath: string) {
320+
private async renderEnvironmentPrompt(relativePath: string) {
321321
const path = resolve(this.rootPath, relativePath);
322322
return this.renderPrompt(readFileSync(path, 'utf8'), path);
323323
}
324+
325+
private async renderSystemPrompt(relativePath: string) {
326+
const result = await this.renderEnvironmentPrompt(relativePath);
327+
328+
// Optional hooks for post processing environment system prompts. Useful for e.g.
329+
// supporting `@` references from Gemini CLI or inside g3.
330+
if (this.executor.postProcessSystemPrompt !== undefined) {
331+
result.result = await this.executor.postProcessSystemPrompt(result.result, this.rootPath);
332+
}
333+
334+
return result;
335+
}
324336
}

runner/configuration/prompt-templating.ts

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ function initializeHandlebars() {
1414
}
1515

1616
const fullPath = path.join(dirname(ctx.containingFile), ctx.file);
17-
let content = readFileSync(fullPath, 'utf8');
18-
content = processAtFileReferencesSync(content, fullPath);
17+
const content = readFileSync(fullPath, 'utf8');
1918

2019
// Recursively support `embed`.
2120
return Handlebars.compile(content, {strict: true})({
@@ -39,8 +38,6 @@ export function renderPromptTemplate<T extends {containingFile: string | null}>(
3938
content: string,
4039
ctx: T,
4140
) {
42-
content = ctx.containingFile ? processAtFileReferencesSync(content, ctx.containingFile) : content;
43-
4441
const template = Handlebars.compile(content, {strict: true});
4542
const contextFiles: string[] = [];
4643
const result = template(ctx, {
@@ -82,35 +79,3 @@ export function renderPromptTemplate<T extends {containingFile: string | null}>(
8279
contextFiles,
8380
};
8481
}
85-
86-
function processAtFileReferencesSync(content: string, containingFile: string): string {
87-
let newContent = content;
88-
let match;
89-
// Match all `@./<file-path>` or `@/<file-path>` occurrences.
90-
// If someone intends to write such text in their prompt, they could overcome this
91-
// by indenting the string, or adding arbitrary characters before front.
92-
const regex = /^@(\.?\/[^\s]+)/gm;
93-
const containingFileDir = dirname(containingFile);
94-
95-
while ((match = regex.exec(newContent)) !== null) {
96-
const filePath = match[1];
97-
const fullPath = resolve(containingFileDir, filePath);
98-
let replacement: string;
99-
try {
100-
replacement = readFileSync(fullPath, 'utf8');
101-
// Note: If we start searching the match start, where the new embedded content begins,
102-
// we can trivially. process nested embeds via the `@` syntax.
103-
} catch (e) {
104-
throw new Error(
105-
`Unexpected error while embedding \`${match[0]}\` reference in ${containingFile}. ` +
106-
`Error: ${e}`,
107-
);
108-
}
109-
110-
newContent =
111-
newContent.substring(0, match.index) +
112-
processAtFileReferencesSync(replacement, fullPath) +
113-
newContent.substring(regex.lastIndex);
114-
}
115-
return newContent;
116-
}

runner/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,4 @@ export {DynamicProgressLogger} from './progress/dynamic-progress-logger.js';
4848
export {NoopProgressLogger} from './progress/noop-progress-logger.js';
4949
export {TextProgressLogger} from './progress/text-progress-logger.js';
5050
export {type ServeTestingResult} from './workers/serve-testing/worker-types.js';
51+
export {replaceAtReferencesInPrompt} from './utils/prompt-at-references.js';

runner/orchestration/codegen.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ export async function repairCodeWithAI(
9898
progress: ProgressLogger,
9999
repairType: 'build' | 'test',
100100
): Promise<LlmResponse> {
101-
const repairSystemInstructions = env.systemPromptRepair();
101+
const repairSystemInstructions = await env.systemPromptRepair();
102102
const repairPrompt = [
103103
...errors.map(({errorContext, errorMessage}) =>
104104
[errorContext, '```', errorMessage, '```'].join('\n'),

runner/orchestration/executors/executor.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ export const executorSchema = z.object({
104104
}),
105105
),
106106
),
107+
postProcessSystemPrompt: z
108+
.function(
109+
z.tuple([z.string().describe('Prompt'), z.string().describe('Environment root path')]),
110+
z.promise(z.string()),
111+
)
112+
.optional(),
107113
destroy: z.function(z.tuple([]), z.promise(z.void())),
108114
getExecutorInfo: z.function(
109115
z.tuple([]),

runner/orchestration/generate.ts

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,10 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
6363
await assertValidModelName(options.model, env.executor);
6464

6565
try {
66-
const promptsToProcess = getCandidateExecutablePrompts(
67-
env,
68-
options.localMode,
69-
options.promptFilter,
66+
const promptsToProcess = (
67+
await getCandidateExecutablePrompts(env, options.localMode, options.promptFilter)
7068
).slice(0, options.limit);
69+
7170
const progress =
7271
options.logging === 'dynamic' ? new DynamicProgressLogger() : new TextProgressLogger();
7372
const appConcurrency =
@@ -196,8 +195,10 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
196195
),
197196
timestamp: timestamp.toISOString(),
198197
reportName: options.reportName,
199-
systemPromptGeneration: env.classifyPrompts ? 'Classified 🕵️' : env.systemPromptGeneration(),
200-
systemPromptRepair: env.classifyPrompts ? 'Classified 🕵️' : env.systemPromptRepair(),
198+
systemPromptGeneration: env.classifyPrompts
199+
? 'Classified 🕵️'
200+
: await env.systemPromptGeneration(),
201+
systemPromptRepair: env.classifyPrompts ? 'Classified 🕵️' : await env.systemPromptRepair(),
201202
// Deduplicate labels before finalizing the report.
202203
labels: Array.from(new Set(options.labels)),
203204
mcp,
@@ -219,13 +220,13 @@ export async function generateCodeAndAssess(options: AssessmentConfig): Promise<
219220
}
220221

221222
/** Gets prompts that are candidates to be executed. */
222-
function getCandidateExecutablePrompts(
223+
async function getCandidateExecutablePrompts(
223224
env: Environment,
224225
localMode: boolean,
225226
promptFilter: string | undefined,
226-
): RootPromptDefinition[] {
227+
): Promise<RootPromptDefinition[]> {
227228
const envDir = join(LLM_OUTPUT_DIR, env.id);
228-
let result = env.executablePrompts;
229+
let result = await env.executablePrompts();
229230

230231
// In local mode filter the list of prompts down to
231232
// only the ones that we have local output for.

runner/run-cli.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,14 @@ async function resolveConfig(options: Options) {
146146
);
147147
}
148148

149-
const rootPromptDef = environment.executablePrompts.find(p => p.name === options.prompt);
149+
const executablePrompts = await environment.executablePrompts();
150+
const rootPromptDef = executablePrompts.find(p => p.name === options.prompt);
150151

151152
if (!rootPromptDef) {
152153
throw new UserFacingError(
153154
`Environment "${environment.displayName}" does not have a prompt with a name of "${options.prompt}".\n` +
154155
`The following prompts are available:\n` +
155-
environment.executablePrompts.map(p => ` - ${p.name}`).join('\n'),
156+
executablePrompts.map(p => ` - ${p.name}`).join('\n'),
156157
);
157158
}
158159

0 commit comments

Comments
 (0)