Skip to content

Commit 3646df1

Browse files
committed
feat: Ask AI about AppMaps
1 parent c58e618 commit 3646df1

File tree

7 files changed

+505
-4
lines changed

7 files changed

+505
-4
lines changed

packages/cli/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
"minimatch": "^5.1.2",
115115
"moo": "^0.5.1",
116116
"open": "^8.2.1",
117+
"openai": "^4.19.0",
117118
"openapi-diff": "^0.23.6",
118119
"openapi-types": "^12.1.3",
119120
"ora": "^5.4.1",

packages/cli/src/cli.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ const CompareCommand = require('./cmds/compare/compare');
3636
const CompareReportCommand = require('./cmds/compare-report/compareReport');
3737
const InventoryCommand = require('./cmds/inventory/inventory');
3838
const InventoryReportCommand = require('./cmds/inventory-report/inventoryReport');
39+
const Ask = require('./cmds/ask/ask');
3940
import UploadCommand from './cmds/upload';
4041
import { default as sqlErrorLog } from './lib/sqlErrorLog';
4142

@@ -192,6 +193,7 @@ yargs(process.argv.slice(2))
192193
.command(CompareReportCommand)
193194
.command(InventoryCommand)
194195
.command(InventoryReportCommand)
196+
.command(Ask)
195197
.option('verbose', {
196198
alias: 'v',
197199
type: 'boolean',

packages/cli/src/cmds/ask/ask.ts

Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
import { warn } from 'console';
2+
import OpenAI from 'openai';
3+
import lunr from 'lunr';
4+
import { ChatCompletionMessageParam } from 'openai/resources';
5+
import { readFile } from 'fs/promises';
6+
import { AppMapFilter, CodeObject, Event, Metadata, buildAppMap } from '@appland/models';
7+
import { Action, Specification, buildDiagram, nodeName } from '@appland/sequence-diagram';
8+
9+
import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory';
10+
import { locateAppMapDir } from '../../lib/locateAppMapDir';
11+
import { exists, verbose } from '../../utils';
12+
import FindAppMaps, { SearchResult as FindAppMapSearchResult } from '../../fulltext/FindAppMaps';
13+
import FindEvents, { SearchResult as FindEventSearchResult } from '../../fulltext/FindEvents';
14+
15+
export const command = 'ask <question>';
16+
export const describe =
17+
'Ask a plain text question and get a filtered and configured AppMap as a response';
18+
19+
export const builder = (args) => {
20+
args.positional('question', {
21+
describe: 'plain text question about the code base',
22+
});
23+
args.option('max-diagram-matches', {
24+
describe: 'maximum number of diagram matches to return',
25+
type: 'number',
26+
default: 5,
27+
});
28+
args.option('max-code-object-matches', {
29+
describe: 'maximum number of code objects matches to return for each diagram',
30+
type: 'number',
31+
default: 5,
32+
});
33+
args.option('directory', {
34+
describe: 'program working directory',
35+
type: 'string',
36+
alias: 'd',
37+
});
38+
return args.strict();
39+
};
40+
41+
function buildOpenAI(): OpenAI {
42+
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
43+
if (!OPENAI_API_KEY) {
44+
throw new Error('OPENAI_API_KEY environment variable must be set');
45+
}
46+
return new OpenAI({ apiKey: OPENAI_API_KEY });
47+
}
48+
49+
type SerializedCodeObject = {
50+
name: string;
51+
type: string;
52+
labels: string[];
53+
children: SerializedCodeObject[];
54+
static?: boolean;
55+
sourceLocation?: string;
56+
};
57+
58+
type ActionInfo = {
59+
elapsed?: number;
60+
eventId: number;
61+
location?: string;
62+
};
63+
64+
type SearchDiagramResult = {
65+
diagramId: string;
66+
};
67+
68+
type DiagramDetailsParam = {
69+
search: string;
70+
diagramIds: string[];
71+
};
72+
73+
type LookupSourceCodeParam = {
74+
locations: string[];
75+
};
76+
77+
type LookupSourceCodeResult = Record<string, string>;
78+
79+
type EventInfo = {
80+
name: string;
81+
fqid?: string;
82+
sourceLocation?: string;
83+
elapsed?: number;
84+
eventIds?: number[];
85+
};
86+
87+
type DiagramDetailsResult = {
88+
summary: string;
89+
metadata: Metadata;
90+
keyEvents: FindEventSearchResult[];
91+
};
92+
93+
const isCamelized = (str: string): boolean => {
94+
if (str.length < 3) return false;
95+
96+
const testStr = str.slice(1);
97+
return /[a-z][A-Z]/.test(testStr);
98+
};
99+
100+
const splitCamelized = (str: string): string => {
101+
if (!isCamelized(str)) return str;
102+
103+
const result = new Array<string>();
104+
let last = 0;
105+
for (let i = 1; i < str.length; i++) {
106+
const pc = str[i - 1];
107+
const c = str[i];
108+
const isUpper = c >= 'A' && c <= 'Z';
109+
if (isUpper) {
110+
result.push(str.slice(last, i));
111+
last = i;
112+
}
113+
}
114+
result.push(str.slice(last));
115+
return result.join(' ');
116+
};
117+
118+
export const handler = async (argv: any) => {
119+
verbose(argv.verbose);
120+
handleWorkingDirectory(argv.directory);
121+
const { question, maxCodeObjectMatches, maxDiagramMatches } = argv;
122+
const appmapDir = await locateAppMapDir(argv.appmapDir);
123+
124+
const findAppMaps = new FindAppMaps(appmapDir);
125+
await findAppMaps.initialize();
126+
127+
function showPlan(paramStr: string) {
128+
let params: any;
129+
try {
130+
params = JSON.parse(paramStr) as { plan: string };
131+
} catch (e) {
132+
warn(`Failed to parse plan: ${paramStr}: ${e}`);
133+
return;
134+
}
135+
warn(`AI Plan: ${params.plan}`);
136+
}
137+
138+
function fetchDiagrams(): FindAppMapSearchResult[] {
139+
warn(`Fetching diagrams`);
140+
return findAppMaps.search(question, { maxResults: maxDiagramMatches });
141+
}
142+
143+
const diagramDetailsResults = new Array<FindEventSearchResult>();
144+
145+
async function getDiagramDetails(paramStr: string): Promise<DiagramDetailsResult[]> {
146+
const params = JSON.parse(paramStr) as DiagramDetailsParam;
147+
const { diagramIds } = params;
148+
warn(`Getting details for diagram ${diagramIds}, retrieved by "${question}"`);
149+
const result = new Array<DiagramDetailsResult>();
150+
for (const diagramId of diagramIds) {
151+
warn(`Loading AppMap ${diagramId} and pruning to 1MB`);
152+
153+
const index = new FindEvents(diagramId);
154+
index.maxSize = 1024 * 1024;
155+
await index.initialize();
156+
const searchResults = index.search(question, { maxResults: maxCodeObjectMatches });
157+
diagramDetailsResults.push(...searchResults);
158+
159+
const diagramText = new Array<string>();
160+
for (const event of index.appmap.rootEvents()) {
161+
const actionInfo: ActionInfo = { eventId: event.id };
162+
if (event.elapsedTime) actionInfo.elapsed = event.elapsedTime;
163+
if (event.codeObject.location) actionInfo.location = event.codeObject.location;
164+
const actionInfoStr = Object.keys(actionInfo)
165+
.sort()
166+
.map((key) => {
167+
const value = actionInfo[key];
168+
return `${key}=${value}`;
169+
})
170+
.join(',');
171+
diagramText.push(
172+
`${event.codeObject.id}${actionInfoStr !== '' ? ` (${actionInfoStr})` : ''}`
173+
);
174+
}
175+
176+
const metadata = index.appmap.metadata;
177+
delete metadata['git'];
178+
delete (metadata as any)['client'];
179+
// TODO: Do we want the AI to read the source code of the test case?
180+
delete metadata['source_location'];
181+
result.push({ metadata, summary: diagramText.join('\n'), keyEvents: searchResults });
182+
}
183+
184+
return result;
185+
}
186+
187+
async function lookupSourceCode(
188+
locationStr: string
189+
): Promise<LookupSourceCodeResult | undefined> {
190+
const params = JSON.parse(locationStr) as LookupSourceCodeParam;
191+
192+
const languageRegexMap: Record<string, RegExp> = {
193+
'.rb': new RegExp(`def\\s+\\w+.*?\\n(.*?\\n)*?^end\\b`, 'gm'),
194+
'.java': new RegExp(
195+
`(?:public|private|protected)?\\s+(?:static\\s+)?(?:final\\s+)?(?:synchronized\\s+)?(?:abstract\\s+)?(?:native\\s+)?(?:strictfp\\s+)?(?:transient\\s+)?(?:volatile\\s+)?(?:\\w+\\s+)*\\w+\\s+\\w+\\s*\\([^)]*\\)\\s*(?:throws\\s+\\w+(?:,\\s*\\w+)*)?\\s*\\{(?:[^{}]*\\{[^{}]*\\})*[^{}]*\\}`,
196+
'gm'
197+
),
198+
'.py': new RegExp(`def\\s+\\w+.*?:\\n(.*?\\n)*?`, 'gm'),
199+
'.js': new RegExp(
200+
`(?:async\\s+)?function\\s+\\w+\\s*\\([^)]*\\)\\s*\\{(?:[^{}]*\\{[^{}]*\\})*[^{}]*\\}`,
201+
'gm'
202+
),
203+
};
204+
205+
const result: LookupSourceCodeResult = {};
206+
for (const location of params.locations) {
207+
const [path, lineno] = location.split(':');
208+
209+
if (await exists(path)) {
210+
const fileContent = await readFile(path, 'utf-8');
211+
let functionContent: string | undefined;
212+
if (lineno) {
213+
const extension = path.substring(path.lastIndexOf('.'));
214+
const regex = languageRegexMap[extension];
215+
216+
if (regex) {
217+
const match = regex.exec(fileContent);
218+
if (match) {
219+
const lines = match[0].split('\n');
220+
const startLine = parseInt(lineno, 10);
221+
const endLine = startLine + lines.length - 1;
222+
if (startLine <= endLine) {
223+
functionContent = lines.slice(startLine - 1, endLine).join('\n');
224+
}
225+
}
226+
}
227+
} else {
228+
functionContent = fileContent;
229+
}
230+
if (functionContent) result[location] = functionContent;
231+
}
232+
}
233+
return result;
234+
}
235+
236+
const systemMessages: ChatCompletionMessageParam[] = [
237+
'You are an assistant that answers questions about the design and architecture of code.',
238+
'You answer these questions by accessing a knowledge base of sequence diagrams.',
239+
'Each sequence diagram conists of a series of events, such as function calls, HTTP server requests, SQL queries, etc.',
240+
'Before each function call, call "showPlan" function with a Markdown document that describes your strategy for answering the question.',
241+
`Begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.`,
242+
'Next, use the "getDiagramDetails" function get details about the events that occur with in the matching diagrams.',
243+
'Enhance your answer by using "lookupSourceCode" function to get the source code for the most relevant functions.',
244+
'Finally, respond with a Markdown document that summarizes the diagrams and answers the question.',
245+
'Never emit phrases like "note that the actual behavior may vary between different applications"',
246+
].map((msg) => ({
247+
content: msg,
248+
role: 'system',
249+
}));
250+
251+
const userMessage: ChatCompletionMessageParam = {
252+
content: question,
253+
role: 'user',
254+
};
255+
256+
const messages = [...systemMessages, userMessage];
257+
258+
const openai = buildOpenAI();
259+
const runFunctions = openai.beta.chat.completions.runFunctions({
260+
model: 'gpt-4',
261+
messages,
262+
function_call: 'auto',
263+
functions: [
264+
{
265+
function: showPlan,
266+
description: 'Print the plan for answering the question',
267+
parameters: {
268+
type: 'object',
269+
properties: {
270+
plan: {
271+
type: 'string',
272+
description: 'The plan in Markdown format',
273+
},
274+
},
275+
required: ['plan'],
276+
},
277+
},
278+
{
279+
function: fetchDiagrams,
280+
description: `Obtain sequence diagrams that are relevant to the user's question. The response is a list of diagram ids.`,
281+
parameters: {
282+
type: 'object',
283+
properties: {},
284+
},
285+
},
286+
{
287+
function: getDiagramDetails,
288+
description: `Get details about diagrams, including their name, code language, frameworks, source location, exceptions raised.`,
289+
parameters: {
290+
type: 'object',
291+
properties: {
292+
diagramIds: {
293+
type: 'array',
294+
description: 'Array of diagram ids',
295+
items: {
296+
type: 'string',
297+
},
298+
},
299+
},
300+
required: ['search', 'diagramIds'],
301+
},
302+
},
303+
{
304+
function: lookupSourceCode,
305+
description: `Get the source code for a specific function.`,
306+
parameters: {
307+
type: 'object',
308+
properties: {
309+
locations: {
310+
type: 'array',
311+
description: `An array of source code locations in the format <path>[:<line number>]. Line number can be omitted if it's not known.`,
312+
items: {
313+
type: 'string',
314+
},
315+
},
316+
},
317+
required: ['locations'],
318+
},
319+
},
320+
],
321+
});
322+
323+
runFunctions.on('functionCall', (data) => {
324+
warn(JSON.stringify(data, null, 2));
325+
});
326+
runFunctions.on('finalFunctionCall', (data) => {
327+
warn(JSON.stringify(data, null, 2));
328+
});
329+
runFunctions.on('functionCallResult', (data) => {
330+
if (verbose()) warn(JSON.stringify(data));
331+
});
332+
runFunctions.on('finalFunctionCallResult', (data) => {
333+
if (verbose()) warn(JSON.stringify(data));
334+
});
335+
336+
const response = await runFunctions.finalContent();
337+
if (!response) {
338+
warn(`No response from OpenAI`);
339+
return;
340+
}
341+
console.log(response);
342+
console.log('');
343+
console.log('The best matching sequence diagram events are:');
344+
console.log('');
345+
diagramDetailsResults.sort((a, b) => b.score - a.score);
346+
for (const event of diagramDetailsResults) {
347+
console.log(` ${event.fqid} (${event.score})`);
348+
}
349+
};

packages/cli/src/functionStats.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class FunctionStats {
4141
const trigram = (/** @type {Trigram} */ t) =>
4242
[t.callerId, t.codeObjectId, t.calleeId].join(' ->\n');
4343
return {
44+
appmaps: this.appMapNames,
4445
returnValues: this.returnValues,
4546
httpServerRequests: this.httpServerRequests,
4647
sqlQueries: this.sqlQueries,
@@ -58,6 +59,10 @@ class FunctionStats {
5859
return [...new Set(this.eventMatches.map((e) => e.appmap))].sort();
5960
}
6061

62+
get appmaps() {
63+
return this.appMapNames;
64+
}
65+
6166
get returnValues() {
6267
return [...new Set(this.eventMatches.map((e) => e.event.returnValue).map(formatValue))].sort();
6368
}

0 commit comments

Comments
 (0)