feat: Observe command can now suggest creating new tests

dividedmind · dividedmind · commit cae623f1dfcd · 2025-04-29T15:12:52.000+02:00
Other improvements:
- enhance UserOptions constructor to accept both Map and object formats;
- add clone method to InteractionHistory;
- implement closingTags function for unclosed XML tags;
- introduce replaceStream for async string replacement in streams;
- update ExplainCommand to handle optional classifierService.
diff --git a/packages/navie/src/commands/explain-command.ts b/packages/navie/src/commands/explain-command.ts
@@ -34,8 +34,11 @@ export default class ExplainCommand implements Command {
     private readonly options: ExplainOptions,
     public readonly interactionHistory: InteractionHistory,
     private readonly completionService: CompletionService,
-    private readonly classifierService: ClassificationService,
-    private readonly agentSelectionService: AgentSelectionService,
+    private readonly classifierService: ClassificationService | undefined,
+    private readonly agentSelectionService: Pick<
+      AgentSelectionService,
+      'selectAgent' | 'contextService'
+    >,
     private readonly codeSelectionService: CodeSelectionService,
     private readonly projectInfoService: ProjectInfoService,
     private readonly memoryService: MemoryService
@@ -52,7 +55,7 @@ export default class ExplainCommand implements Command {
     let contextLabelsFn: Promise<ContextV2.ContextLabel[]> | undefined;
 
     performance.mark('classifyStart');
-    if (classifyEnabled)
+    if (classifyEnabled && this.classifierService)
       contextLabelsFn = this.classifierService
         .classifyQuestion(baseQuestion, chatHistory)
         .catch((err) => {
diff --git a/packages/navie/src/commands/observe-command.ts b/packages/navie/src/commands/observe-command.ts
@@ -1,32 +1,63 @@
 import { z } from 'zod';
 
+import { AgentMode } from '../agent';
+import TestAgent from '../agents/test-agent';
 import type Command from '../command';
 import type { CommandRequest } from '../command';
-import CompletionService from '../services/completion-service';
-import LookupContextService from '../services/lookup-context-service';
-import VectorTermsService from '../services/vector-terms-service';
 import { ContextV2 } from '../context';
-import { ExplainOptions } from './explain-command';
-import Message from '../message';
 import InteractionHistory, {
   CompletionEvent,
   PromptInteractionEvent,
 } from '../interaction-history';
+import closingTags from '../lib/closing-tags';
 import { UserOptions } from '../lib/parse-options';
+import replaceStream from '../lib/replace-stream';
+import Message from '../message';
+import ApplyContextService from '../services/apply-context-service';
+import CodeSelectionService from '../services/code-selection-service';
+import CompletionService from '../services/completion-service';
+import ContextService from '../services/context-service';
+import FileChangeExtractorService from '../services/file-change-extractor-service';
+import LookupContextService from '../services/lookup-context-service';
+import { NaiveMemoryService } from '../services/memory-service';
 import ProjectInfoService from '../services/project-info-service';
+import VectorTermsService from '../services/vector-terms-service';
+
+import ExplainCommand, { ExplainOptions } from './explain-command';
 
 const RelevantTest = z.object({
   relevantTest: z
     .object({
-      name: z.string().optional().describe('The name of the test case, if known'),
-      path: z.string().describe('The file path of the test file'),
+      name: z
+        .string()
+        .optional()
+        .describe('The name of the test case, if known')
+        .nullable()
+        .transform((value) => (value === null ? undefined : value)),
+      path: z
+        .string()
+        .describe('The file path of the test file')
+        .nullable()
+        .transform((value) => (value === null ? undefined : value)),
       language: z
         .enum(['ruby', 'python', 'java', 'javascript', 'other'])
         .describe('The programming language of the test file'),
-      framework: z.string().optional().describe('The test framework used'),
+      framework: z
+        .string()
+        .optional()
+        .describe('The test framework used')
+        .nullable()
+        .transform((value) => (value === null ? undefined : value)),
     })
     .optional()
+    .nullable()
     .describe('A descriptor of the most relevant test to the requested behavior'),
+  suggestedTest: z
+    .string()
+    .optional()
+    .nullable()
+    .transform((value) => (value === null ? undefined : value))
+    .describe('A suggested test case to write, if no relevant test is found'),
   installCommands: z
     .array(
       z.object({
@@ -126,6 +157,9 @@ export default class ObserveCommand implements Command {
       {
         role: 'system',
         content: `Given the following code snippets, identify the single most relevant test to the user request.
+          If no test seems relevant, suggest a test case to write instead. Do not provide test case code, just describe it in detail.
+          The test case should be relevant to the user request, and ideally, it should be written in the same language as the code snippets provided.
+          Do suggest a name and path for the test case and take it into account when generating the test run instructions.
 
 ${projectLanguageDirective}
 
@@ -156,24 +190,83 @@ ${context.join('\n')}
     return result;
   }
 
+  private async *suggestTestCase(
+    suggestedTest: string,
+    userOptions: UserOptions,
+    history: InteractionHistory
+  ): AsyncIterable<string> {
+    const applyContextService = new ApplyContextService(history);
+    const contextService = new ContextService(
+      history,
+      this.vectorTermsService,
+      this.lookupContextService,
+      applyContextService
+    );
+    const testAgent = new TestAgent(
+      history,
+      contextService,
+      new FileChangeExtractorService(history, this.completionService)
+    );
+    // create an ExplainCommand
+    const explainCommand = new ExplainCommand(
+      this.options,
+      history,
+      this.completionService,
+      undefined,
+      {
+        selectAgent: () => ({
+          agentMode: AgentMode.Test,
+          agent: testAgent,
+          question: suggestedTest,
+        }),
+        contextService,
+      },
+      new CodeSelectionService(history),
+      this.projectInfoService,
+      NaiveMemoryService
+    );
+
+    // call the explainCommand with the suggested test
+    const commandRequest: CommandRequest = {
+      question: suggestedTest + '\n\nOnly include code, no explanations.',
+      userOptions: new UserOptions({
+        ...userOptions,
+        format: 'xml',
+        classify: false,
+        tokenlimit: String(userOptions.numberValue('tokenlimit') || this.options.tokenLimit),
+      }),
+    };
+
+    const explainCommandResponse: string[] = [];
+    yield '\n';
+    for await (const token of explainCommand.execute(commandRequest)) {
+      yield token;
+      explainCommandResponse.push(token);
+    }
+
+    yield closingTags(explainCommandResponse.join('').trim());
+
+    yield '\n';
+  }
+
   async *execute({ question: userRequest, userOptions }: CommandRequest): AsyncIterable<string> {
     const vectorTerms = await this.vectorTermsService.suggestTerms(userRequest);
     const tokenLimit = userOptions.numberValue('tokenlimit') || this.options.tokenLimit;
     const testSnippets = await this.getTestSnippets(vectorTerms, tokenLimit);
     const result = await this.getMostRelevantTest(userRequest, userOptions, testSnippets);
-    const { relevantTest, installCommands, testCommands } = result || {};
-    if (!relevantTest) {
-      yield 'Sorry, I could not find any relevant tests to record.';
-      return;
-    }
+    const { relevantTest, installCommands, testCommands, suggestedTest } = result || {};
+
+    const history = this.interactionHistory.clone();
 
-    if (relevantTest.language === 'other') {
+    if (relevantTest?.language === 'other') {
       yield `I found a relevant test at \`${relevantTest.path}\`, but I'm unable to help you record it at this time. This language does not appear to be supported.`;
       return;
     }
 
     const helpDocs = await this.lookupContextService.lookupHelp(
-      ['record', 'agent', 'tests', relevantTest.framework].filter(Boolean) as string[],
+      ['record', 'agent', 'tests', relevantTest?.language, relevantTest?.framework].filter(
+        Boolean
+      ) as string[],
       tokenLimit
     );
 
@@ -191,11 +284,15 @@ ${userRequest}
       },
       {
         role: 'assistant',
-        content: `Based on the request, the most relevant test case is:
+        content:
+          (relevantTest?.path
+            ? `Based on the request, the most relevant test case is:
 ${relevantTest.name ? `**Name:** \`${relevantTest.name}\`` : ''}
 ${relevantTest.framework ? `**Framework:** \`${relevantTest.framework}\`` : ''}
 ${relevantTest.language ? `**Language:** \`${relevantTest.language}\`` : ''}
-**Path:** \`${relevantTest.path}\`
+**Path:** \`${relevantTest.path}\``
+            : `Based on the request, a ${relevantTest?.language} ${relevantTest?.framework} test case needs to be created first:\n${suggestedTest}\n\n`) +
+          `
 
 ${
   installCommands?.length
@@ -227,11 +324,11 @@ ${helpDocs
       },
       {
         role: 'user',
-        content: `Restate the information you've provided to me, in standalone format, as a step by step guide outlining the steps required to record the single test case that you've identified.
+        content: `Restate the information you've provided to me, in standalone format, as a step by step guide outlining the steps required to record the single test case that you've identified or suggested creating.
 If possible, include the terminal command needed to run the test. Only specify test patterns that are guaranteed to match based on previous context. For example, do not include file ranges not supported by the test runner.
 In your response, please include the following:
-- The name of the test case (if known)
-- The path to the test file
+- If an existing test was found, indicate the test case name and path
+- Otherwise, steps and suggested location to create it (don't generate code itself, instead use <generated-test-case /> placeholder — DO NOT surround it with code fences)
 - Any steps and terminal commands required to install the AppMap recording agent
 - Any steps and terminal commands required to run the specific test case
 
@@ -250,9 +347,9 @@ Do not include:
     );
     const completion = this.completionService.complete(messages, { temperature });
 
-    for await (const token of completion) {
-      yield token;
-    }
+    yield* replaceStream(completion, '<generated-test-case />', () =>
+      this.suggestTestCase(suggestedTest ?? '', userOptions, history)
+    );
   }
 }
 
diff --git a/packages/navie/src/interaction-history.ts b/packages/navie/src/interaction-history.ts
@@ -316,6 +316,21 @@ class InteractionHistory extends EventEmitter {
     this.events.push(event);
   }
 
+  /**
+   * Clone the interaction history.
+   * This is useful for creating a copy of the interaction history
+   * that can be modified without affecting the original.
+   * @note this is a shallow copy, so the events are not cloned.
+   * This means that modifying the events in the clone will also modify the events in the original.
+   * @returns a shallow copy of the interaction history
+   */
+  clone(): InteractionHistory {
+    const clone = new InteractionHistory();
+    clone.events.push(...this.events);
+    clone.acceptPinnedFileContext = this.acceptPinnedFileContext;
+    return clone;
+  }
+
   stopAcceptingPinnedFileContext() {
     this.acceptPinnedFileContext = false;
   }
diff --git a/packages/navie/src/lib/closing-tags.ts b/packages/navie/src/lib/closing-tags.ts
@@ -0,0 +1,33 @@
+/** Scan the text for any unclosed XML tags and returns the closing tags to be appended. */
+export default function closingTags(text: string): string {
+  // Stack to keep track of open tags
+  const openTags: string[] = [];
+
+  // Match opening and self-closing tags
+  const tagRegex = /<(\/)?([a-zA-Z]+)([^>]*?)(\/)?>/g;
+
+  for (const match of text.matchAll(tagRegex)) {
+    const [, close, tagName, attributes, selfClosing] = match;
+
+    // Skip self-closing tags
+    if (selfClosing) continue;
+
+    // Check if this is a closing tag
+    if (close || attributes.trim().startsWith('/')) {
+      // Found a closing tag, remove the matching opening tag if it exists
+      const lastOpenTag = openTags[openTags.length - 1];
+      if (lastOpenTag === tagName) {
+        openTags.pop();
+      }
+    } else {
+      // Found an opening tag, push it onto the stack
+      openTags.push(tagName);
+    }
+  }
+
+  // Generate closing tags for any remaining open tags in reverse order
+  return openTags
+    .reverse()
+    .map((tag) => `</${tag}>`)
+    .join('');
+}
diff --git a/packages/navie/src/lib/parse-options.ts b/packages/navie/src/lib/parse-options.ts
@@ -1,7 +1,21 @@
 import { ContextV2 } from '../context';
 
 export class UserOptions {
-  constructor(private options: Map<string, string | boolean>) {}
+  private options: Map<string, string | boolean>;
+  constructor(options: Map<string, string | boolean> | Record<string, string | boolean>) {
+    this.options = new Map<string, string | boolean>();
+    if (options instanceof Map) {
+      this.options = options;
+    } else {
+      Object.entries(options).forEach(([key, value]) => {
+        this.options.set(key.toLowerCase(), value);
+      });
+    }
+  }
+
+  [Symbol.iterator](): IterableIterator<[string, string | boolean]> {
+    return this.options.entries();
+  }
 
   has(key: string): boolean {
     return this.options.has(key);
diff --git a/packages/navie/src/lib/replace-stream.ts b/packages/navie/src/lib/replace-stream.ts
@@ -0,0 +1,35 @@
+import splitOn from './split-on';
+
+/**
+ * Replace a needle in a stream with a replacement function.
+ * The replacement function is called with the found string and returns an async iterable of strings.
+ * The replacement function can be async, and will be awaited.
+ * The replacement function can yield multiple strings.
+ * The replacement function can be called multiple times if the needle is found multiple times.
+ * @param source the source stream
+ * @param needle the string or regex to search
+ * @param replacement the replacement function
+ */
+export default async function* replaceStream(
+  source: AsyncIterable<string>,
+  needle: string | RegExp,
+  replacement: (found: string) => AsyncIterable<string>
+): AsyncIterable<string> {
+  let buffer = '';
+  for await (const chunk of source) {
+    buffer += chunk;
+    while (buffer) {
+      const [before, found, after] = splitOn(buffer, needle);
+      yield before;
+      if (found) {
+        if (after) {
+          yield* replacement(found);
+        } else {
+          buffer = found;
+          break;
+        }
+      }
+      buffer = after;
+    }
+  }
+}
diff --git a/packages/navie/test/commands/observe-command.spec.ts b/packages/navie/test/commands/observe-command.spec.ts
@@ -107,19 +107,35 @@ describe('ObserveCommand', () => {
     expect(lookupContextService.lookupHelp).toBeCalledTimes(1);
   });
 
-  it('exits early if no test is identified', async () => {
+  it('suggests a test if no test is identified', async () => {
     lookupContextService.lookupContext = jest.fn().mockResolvedValue([]);
-    completionService.json = jest.fn().mockReturnValueOnce(undefined);
-    lookupContextService.lookupHelp = jest.fn();
+    completionService.json = jest.fn().mockReturnValueOnce({
+      suggestedTest: 'Write a test for the observe command that handles missing tests',
+      relevantTest: { language: 'javascript' },
+    });
+    completionService.complete = jest
+      .fn()
+      .mockImplementationOnce(function* () {
+        yield '1. Create the following test:\n<generated-test-case />\n2. Run the test';
+      })
+      .mockImplementation(function* () {
+        yield 'Generated test case code';
+      });
+
     const result = await read(
       command.execute({
         question: 'what?',
         userOptions: new UserOptions(new Map()),
       })
     );
-    expect(result).toEqual('Sorry, I could not find any relevant tests to record.');
-    expect(lookupContextService.lookupHelp).not.toBeCalled();
-    expect(completionService.complete).not.toBeCalled();
+    expect(result).toMatchInlineSnapshot(`
+      "1. Create the following test:
+
+      Generated test case code
+
+      2. Run the test"
+    `);
+    expect(completionService.complete).toHaveBeenCalledTimes(4);
   });
 
   it('exits early if the language is not supported', async () => {
diff --git a/packages/navie/test/lib/closing-tags.spec.ts b/packages/navie/test/lib/closing-tags.spec.ts