|
| 1 | +import { warn } from 'console'; |
| 2 | +import OpenAI from 'openai'; |
| 3 | +import lunr from 'lunr'; |
| 4 | +import { ChatCompletionMessageParam } from 'openai/resources'; |
| 5 | +import { readFile } from 'fs/promises'; |
| 6 | +import { AppMapFilter, CodeObject, Event, Metadata, buildAppMap } from '@appland/models'; |
| 7 | +import { Action, Specification, buildDiagram, nodeName } from '@appland/sequence-diagram'; |
| 8 | + |
| 9 | +import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory'; |
| 10 | +import { locateAppMapDir } from '../../lib/locateAppMapDir'; |
| 11 | +import { exists, verbose } from '../../utils'; |
| 12 | +import FindAppMaps, { SearchResult as FindAppMapSearchResult } from '../../fulltext/FindAppMaps'; |
| 13 | +import FindEvents, { SearchResult as FindEventSearchResult } from '../../fulltext/FindEvents'; |
| 14 | + |
| 15 | +export const command = 'ask <question>'; |
| 16 | +export const describe = |
| 17 | + 'Ask a plain text question and get a filtered and configured AppMap as a response'; |
| 18 | + |
| 19 | +export const builder = (args) => { |
| 20 | + args.positional('question', { |
| 21 | + describe: 'plain text question about the code base', |
| 22 | + }); |
| 23 | + args.option('max-diagram-matches', { |
| 24 | + describe: 'maximum number of diagram matches to return', |
| 25 | + type: 'number', |
| 26 | + default: 5, |
| 27 | + }); |
| 28 | + args.option('max-code-object-matches', { |
| 29 | + describe: 'maximum number of code objects matches to return for each diagram', |
| 30 | + type: 'number', |
| 31 | + default: 5, |
| 32 | + }); |
| 33 | + args.option('directory', { |
| 34 | + describe: 'program working directory', |
| 35 | + type: 'string', |
| 36 | + alias: 'd', |
| 37 | + }); |
| 38 | + return args.strict(); |
| 39 | +}; |
| 40 | + |
| 41 | +function buildOpenAI(): OpenAI { |
| 42 | + const OPENAI_API_KEY = process.env.OPENAI_API_KEY; |
| 43 | + if (!OPENAI_API_KEY) { |
| 44 | + throw new Error('OPENAI_API_KEY environment variable must be set'); |
| 45 | + } |
| 46 | + return new OpenAI({ apiKey: OPENAI_API_KEY }); |
| 47 | +} |
| 48 | + |
| 49 | +type SerializedCodeObject = { |
| 50 | + name: string; |
| 51 | + type: string; |
| 52 | + labels: string[]; |
| 53 | + children: SerializedCodeObject[]; |
| 54 | + static?: boolean; |
| 55 | + sourceLocation?: string; |
| 56 | +}; |
| 57 | + |
| 58 | +type ActionInfo = { |
| 59 | + elapsed?: number; |
| 60 | + eventId: number; |
| 61 | + location?: string; |
| 62 | +}; |
| 63 | + |
| 64 | +type SearchDiagramResult = { |
| 65 | + diagramId: string; |
| 66 | +}; |
| 67 | + |
| 68 | +type DiagramDetailsParam = { |
| 69 | + search: string; |
| 70 | + diagramIds: string[]; |
| 71 | +}; |
| 72 | + |
| 73 | +type LookupSourceCodeParam = { |
| 74 | + locations: string[]; |
| 75 | +}; |
| 76 | + |
| 77 | +type LookupSourceCodeResult = Record<string, string>; |
| 78 | + |
| 79 | +type EventInfo = { |
| 80 | + name: string; |
| 81 | + fqid?: string; |
| 82 | + sourceLocation?: string; |
| 83 | + elapsed?: number; |
| 84 | + eventIds?: number[]; |
| 85 | +}; |
| 86 | + |
| 87 | +type DiagramDetailsResult = { |
| 88 | + summary: string; |
| 89 | + metadata: Metadata; |
| 90 | + keyEvents: FindEventSearchResult[]; |
| 91 | +}; |
| 92 | + |
| 93 | +const isCamelized = (str: string): boolean => { |
| 94 | + if (str.length < 3) return false; |
| 95 | + |
| 96 | + const testStr = str.slice(1); |
| 97 | + return /[a-z][A-Z]/.test(testStr); |
| 98 | +}; |
| 99 | + |
| 100 | +const splitCamelized = (str: string): string => { |
| 101 | + if (!isCamelized(str)) return str; |
| 102 | + |
| 103 | + const result = new Array<string>(); |
| 104 | + let last = 0; |
| 105 | + for (let i = 1; i < str.length; i++) { |
| 106 | + const pc = str[i - 1]; |
| 107 | + const c = str[i]; |
| 108 | + const isUpper = c >= 'A' && c <= 'Z'; |
| 109 | + if (isUpper) { |
| 110 | + result.push(str.slice(last, i)); |
| 111 | + last = i; |
| 112 | + } |
| 113 | + } |
| 114 | + result.push(str.slice(last)); |
| 115 | + return result.join(' '); |
| 116 | +}; |
| 117 | + |
| 118 | +export const handler = async (argv: any) => { |
| 119 | + verbose(argv.verbose); |
| 120 | + handleWorkingDirectory(argv.directory); |
| 121 | + const { question, maxCodeObjectMatches, maxDiagramMatches } = argv; |
| 122 | + const appmapDir = await locateAppMapDir(argv.appmapDir); |
| 123 | + |
| 124 | + const findAppMaps = new FindAppMaps(appmapDir); |
| 125 | + await findAppMaps.initialize(); |
| 126 | + |
| 127 | + function showPlan(paramStr: string) { |
| 128 | + let params: any; |
| 129 | + try { |
| 130 | + params = JSON.parse(paramStr) as { plan: string }; |
| 131 | + } catch (e) { |
| 132 | + warn(`Failed to parse plan: ${paramStr}: ${e}`); |
| 133 | + return; |
| 134 | + } |
| 135 | + warn(`AI Plan: ${params.plan}`); |
| 136 | + } |
| 137 | + |
| 138 | + function fetchDiagrams(): FindAppMapSearchResult[] { |
| 139 | + warn(`Fetching diagrams`); |
| 140 | + return findAppMaps.search(question, { maxResults: maxDiagramMatches }); |
| 141 | + } |
| 142 | + |
| 143 | + const diagramDetailsResults = new Array<FindEventSearchResult>(); |
| 144 | + |
| 145 | + async function getDiagramDetails(paramStr: string): Promise<DiagramDetailsResult[]> { |
| 146 | + const params = JSON.parse(paramStr) as DiagramDetailsParam; |
| 147 | + const { diagramIds } = params; |
| 148 | + warn(`Getting details for diagram ${diagramIds}, retrieved by "${question}"`); |
| 149 | + const result = new Array<DiagramDetailsResult>(); |
| 150 | + for (const diagramId of diagramIds) { |
| 151 | + warn(`Loading AppMap ${diagramId} and pruning to 1MB`); |
| 152 | + |
| 153 | + const index = new FindEvents(diagramId); |
| 154 | + index.maxSize = 1024 * 1024; |
| 155 | + await index.initialize(); |
| 156 | + const searchResults = index.search(question, { maxResults: maxCodeObjectMatches }); |
| 157 | + diagramDetailsResults.push(...searchResults); |
| 158 | + |
| 159 | + const diagramText = new Array<string>(); |
| 160 | + for (const event of index.appmap.rootEvents()) { |
| 161 | + const actionInfo: ActionInfo = { eventId: event.id }; |
| 162 | + if (event.elapsedTime) actionInfo.elapsed = event.elapsedTime; |
| 163 | + if (event.codeObject.location) actionInfo.location = event.codeObject.location; |
| 164 | + const actionInfoStr = Object.keys(actionInfo) |
| 165 | + .sort() |
| 166 | + .map((key) => { |
| 167 | + const value = actionInfo[key]; |
| 168 | + return `${key}=${value}`; |
| 169 | + }) |
| 170 | + .join(','); |
| 171 | + diagramText.push( |
| 172 | + `${event.codeObject.id}${actionInfoStr !== '' ? ` (${actionInfoStr})` : ''}` |
| 173 | + ); |
| 174 | + } |
| 175 | + |
| 176 | + const metadata = index.appmap.metadata; |
| 177 | + delete metadata['git']; |
| 178 | + delete (metadata as any)['client']; |
| 179 | + // TODO: Do we want the AI to read the source code of the test case? |
| 180 | + delete metadata['source_location']; |
| 181 | + result.push({ metadata, summary: diagramText.join('\n'), keyEvents: searchResults }); |
| 182 | + } |
| 183 | + |
| 184 | + return result; |
| 185 | + } |
| 186 | + |
| 187 | + async function lookupSourceCode( |
| 188 | + locationStr: string |
| 189 | + ): Promise<LookupSourceCodeResult | undefined> { |
| 190 | + const params = JSON.parse(locationStr) as LookupSourceCodeParam; |
| 191 | + |
| 192 | + const languageRegexMap: Record<string, RegExp> = { |
| 193 | + '.rb': new RegExp(`def\\s+\\w+.*?\\n(.*?\\n)*?^end\\b`, 'gm'), |
| 194 | + '.java': new RegExp( |
| 195 | + `(?:public|private|protected)?\\s+(?:static\\s+)?(?:final\\s+)?(?:synchronized\\s+)?(?:abstract\\s+)?(?:native\\s+)?(?:strictfp\\s+)?(?:transient\\s+)?(?:volatile\\s+)?(?:\\w+\\s+)*\\w+\\s+\\w+\\s*\\([^)]*\\)\\s*(?:throws\\s+\\w+(?:,\\s*\\w+)*)?\\s*\\{(?:[^{}]*\\{[^{}]*\\})*[^{}]*\\}`, |
| 196 | + 'gm' |
| 197 | + ), |
| 198 | + '.py': new RegExp(`def\\s+\\w+.*?:\\n(.*?\\n)*?`, 'gm'), |
| 199 | + '.js': new RegExp( |
| 200 | + `(?:async\\s+)?function\\s+\\w+\\s*\\([^)]*\\)\\s*\\{(?:[^{}]*\\{[^{}]*\\})*[^{}]*\\}`, |
| 201 | + 'gm' |
| 202 | + ), |
| 203 | + }; |
| 204 | + |
| 205 | + const result: LookupSourceCodeResult = {}; |
| 206 | + for (const location of params.locations) { |
| 207 | + const [path, lineno] = location.split(':'); |
| 208 | + |
| 209 | + if (await exists(path)) { |
| 210 | + const fileContent = await readFile(path, 'utf-8'); |
| 211 | + let functionContent: string | undefined; |
| 212 | + if (lineno) { |
| 213 | + const extension = path.substring(path.lastIndexOf('.')); |
| 214 | + const regex = languageRegexMap[extension]; |
| 215 | + |
| 216 | + if (regex) { |
| 217 | + const match = regex.exec(fileContent); |
| 218 | + if (match) { |
| 219 | + const lines = match[0].split('\n'); |
| 220 | + const startLine = parseInt(lineno, 10); |
| 221 | + const endLine = startLine + lines.length - 1; |
| 222 | + if (startLine <= endLine) { |
| 223 | + functionContent = lines.slice(startLine - 1, endLine).join('\n'); |
| 224 | + } |
| 225 | + } |
| 226 | + } |
| 227 | + } else { |
| 228 | + functionContent = fileContent; |
| 229 | + } |
| 230 | + if (functionContent) result[location] = functionContent; |
| 231 | + } |
| 232 | + } |
| 233 | + return result; |
| 234 | + } |
| 235 | + |
| 236 | + const systemMessages: ChatCompletionMessageParam[] = [ |
| 237 | + 'You are an assistant that answers questions about the design and architecture of code.', |
| 238 | + 'You answer these questions by accessing a knowledge base of sequence diagrams.', |
| 239 | + 'Each sequence diagram conists of a series of events, such as function calls, HTTP server requests, SQL queries, etc.', |
| 240 | + 'Before each function call, call "showPlan" function with a Markdown document that describes your strategy for answering the question.', |
| 241 | + `Begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.`, |
| 242 | + 'Next, use the "getDiagramDetails" function get details about the events that occur with in the matching diagrams.', |
| 243 | + 'Enhance your answer by using "lookupSourceCode" function to get the source code for the most relevant functions.', |
| 244 | + 'Finally, respond with a Markdown document that summarizes the diagrams and answers the question.', |
| 245 | + 'Never emit phrases like "note that the actual behavior may vary between different applications"', |
| 246 | + ].map((msg) => ({ |
| 247 | + content: msg, |
| 248 | + role: 'system', |
| 249 | + })); |
| 250 | + |
| 251 | + const userMessage: ChatCompletionMessageParam = { |
| 252 | + content: question, |
| 253 | + role: 'user', |
| 254 | + }; |
| 255 | + |
| 256 | + const messages = [...systemMessages, userMessage]; |
| 257 | + |
| 258 | + const openai = buildOpenAI(); |
| 259 | + const runFunctions = openai.beta.chat.completions.runFunctions({ |
| 260 | + model: 'gpt-4', |
| 261 | + messages, |
| 262 | + function_call: 'auto', |
| 263 | + functions: [ |
| 264 | + { |
| 265 | + function: showPlan, |
| 266 | + description: 'Print the plan for answering the question', |
| 267 | + parameters: { |
| 268 | + type: 'object', |
| 269 | + properties: { |
| 270 | + plan: { |
| 271 | + type: 'string', |
| 272 | + description: 'The plan in Markdown format', |
| 273 | + }, |
| 274 | + }, |
| 275 | + required: ['plan'], |
| 276 | + }, |
| 277 | + }, |
| 278 | + { |
| 279 | + function: fetchDiagrams, |
| 280 | + description: `Obtain sequence diagrams that are relevant to the user's question. The response is a list of diagram ids.`, |
| 281 | + parameters: { |
| 282 | + type: 'object', |
| 283 | + properties: {}, |
| 284 | + }, |
| 285 | + }, |
| 286 | + { |
| 287 | + function: getDiagramDetails, |
| 288 | + description: `Get details about diagrams, including their name, code language, frameworks, source location, exceptions raised.`, |
| 289 | + parameters: { |
| 290 | + type: 'object', |
| 291 | + properties: { |
| 292 | + diagramIds: { |
| 293 | + type: 'array', |
| 294 | + description: 'Array of diagram ids', |
| 295 | + items: { |
| 296 | + type: 'string', |
| 297 | + }, |
| 298 | + }, |
| 299 | + }, |
| 300 | + required: ['search', 'diagramIds'], |
| 301 | + }, |
| 302 | + }, |
| 303 | + { |
| 304 | + function: lookupSourceCode, |
| 305 | + description: `Get the source code for a specific function.`, |
| 306 | + parameters: { |
| 307 | + type: 'object', |
| 308 | + properties: { |
| 309 | + locations: { |
| 310 | + type: 'array', |
| 311 | + description: `An array of source code locations in the format <path>[:<line number>]. Line number can be omitted if it's not known.`, |
| 312 | + items: { |
| 313 | + type: 'string', |
| 314 | + }, |
| 315 | + }, |
| 316 | + }, |
| 317 | + required: ['locations'], |
| 318 | + }, |
| 319 | + }, |
| 320 | + ], |
| 321 | + }); |
| 322 | + |
| 323 | + runFunctions.on('functionCall', (data) => { |
| 324 | + warn(JSON.stringify(data, null, 2)); |
| 325 | + }); |
| 326 | + runFunctions.on('finalFunctionCall', (data) => { |
| 327 | + warn(JSON.stringify(data, null, 2)); |
| 328 | + }); |
| 329 | + runFunctions.on('functionCallResult', (data) => { |
| 330 | + if (verbose()) warn(JSON.stringify(data)); |
| 331 | + }); |
| 332 | + runFunctions.on('finalFunctionCallResult', (data) => { |
| 333 | + if (verbose()) warn(JSON.stringify(data)); |
| 334 | + }); |
| 335 | + |
| 336 | + const response = await runFunctions.finalContent(); |
| 337 | + if (!response) { |
| 338 | + warn(`No response from OpenAI`); |
| 339 | + return; |
| 340 | + } |
| 341 | + console.log(response); |
| 342 | + console.log(''); |
| 343 | + console.log('The best matching sequence diagram events are:'); |
| 344 | + console.log(''); |
| 345 | + diagramDetailsResults.sort((a, b) => b.score - a.score); |
| 346 | + for (const event of diagramDetailsResults) { |
| 347 | + console.log(` ${event.fqid} (${event.score})`); |
| 348 | + } |
| 349 | +}; |
0 commit comments