@@ -3,14 +3,14 @@ import OpenAI from 'openai';
33import lunr from 'lunr' ;
44import { ChatCompletionMessageParam } from 'openai/resources' ;
55import { readFile } from 'fs/promises' ;
6- import { dirname , join } from 'path' ;
76import { AppMapFilter , CodeObject , Event , Metadata , buildAppMap } from '@appland/models' ;
8- import { Action , NodeType , Specification , buildDiagram , nodeName } from '@appland/sequence-diagram' ;
7+ import { Action , Specification , buildDiagram , nodeName } from '@appland/sequence-diagram' ;
98
109import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory' ;
1110import { locateAppMapDir } from '../../lib/locateAppMapDir' ;
12- import { exists , processNamedFiles , verbose } from '../../utils' ;
13- import assert , { match } from 'assert' ;
11+ import { exists , verbose } from '../../utils' ;
12+ import FindAppMaps , { SearchResult as FindAppMapSearchResult } from '../../fulltext/FindAppMaps' ;
13+ import FindEvents , { SearchResult as FindEventSearchResult } from '../../fulltext/FindEvents' ;
1414
1515export const command = 'ask <question>' ;
1616export const describe =
@@ -20,6 +20,16 @@ export const builder = (args) => {
2020 args . positional ( 'question' , {
2121 describe : 'plain text question about the code base' ,
2222 } ) ;
23+ args . option ( 'max-diagram-matches' , {
24+ describe : 'maximum number of diagram matches to return' ,
25+ type : 'number' ,
26+ default : 5 ,
27+ } ) ;
28+ args . option ( 'max-code-object-matches' , {
29+ describe : 'maximum number of code objects matches to return for each diagram' ,
30+ type : 'number' ,
31+ default : 5 ,
32+ } ) ;
2333 args . option ( 'directory' , {
2434 describe : 'program working directory' ,
2535 type : 'string' ,
@@ -47,7 +57,7 @@ type SerializedCodeObject = {
4757
4858type ActionInfo = {
4959 elapsed ?: number ;
50- eventIds ?: string ;
60+ eventId : number ;
5161 location ?: string ;
5262} ;
5363
@@ -77,7 +87,7 @@ type EventInfo = {
7787type DiagramDetailsResult = {
7888 summary : string ;
7989 metadata : Metadata ;
80- keyEvents : EventInfo [ ] ;
90+ keyEvents : FindEventSearchResult [ ] ;
8191} ;
8292
8393const isCamelized = ( str : string ) : boolean => {
@@ -105,88 +115,14 @@ const splitCamelized = (str: string): string => {
105115 return result . join ( ' ' ) ;
106116} ;
107117
108- class Ask {
109- idx : lunr . Index | undefined ;
110- public search : string | undefined ;
111-
112- constructor ( public appmapDir : string ) { }
113-
114- async initialize ( ) {
115- const { appmapDir } = this ;
116-
117- const documents = new Array < any > ( ) ;
118- await processNamedFiles ( appmapDir , 'metadata.json' , async ( metadataFile ) => {
119- const metadata = JSON . parse ( await readFile ( metadataFile , 'utf-8' ) ) as Metadata ;
120- const diagramId = dirname ( metadataFile ) ;
121- const classMap = JSON . parse (
122- await readFile ( join ( diagramId , 'classMap.json' ) , 'utf-8' )
123- ) as SerializedCodeObject [ ] ;
124- const queries = new Array < string > ( ) ;
125- const codeObjects = new Array < string > ( ) ;
126- const routes = new Array < string > ( ) ;
127- const externalRoutes = new Array < string > ( ) ;
128-
129- const collectFunction = ( co : SerializedCodeObject ) => {
130- if ( co . type === 'query' ) queries . push ( co . name ) ;
131- else if ( co . type === 'route' ) routes . push ( co . name ) ;
132- else if ( co . type === 'external-route' ) externalRoutes . push ( co . name ) ;
133- else codeObjects . push ( splitCamelized ( co . name ) ) ;
134-
135- co . children ?. forEach ( ( child ) => {
136- collectFunction ( child ) ;
137- } ) ;
138- } ;
139- classMap . forEach ( ( co ) => collectFunction ( co ) ) ;
140-
141- documents . push ( {
142- id : diagramId ,
143- name : metadata . name ,
144- source_location : metadata . source_location ,
145- code_objects : codeObjects . join ( ' ' ) ,
146- queries : queries . join ( ' ' ) ,
147- routes : routes . join ( ' ' ) ,
148- external_routes : externalRoutes . join ( ' ' ) ,
149- } ) ;
150- } ) ;
151-
152- warn ( `Indexing ${ documents . length } diagrams` ) ;
153-
154- this . idx = lunr ( function ( ) {
155- this . ref ( 'id' ) ;
156- this . field ( 'name' ) ;
157- this . field ( 'source_location' ) ;
158- this . field ( 'code_objects' ) ;
159- this . field ( 'queries' ) ;
160- this . field ( 'routes' ) ;
161- this . field ( 'external_routes' ) ;
162-
163- this . tokenizer . separator = / [ \s / - _ : # . ] + / ;
164-
165- for ( const doc of documents ) this . add ( doc ) ;
166- } ) ;
167- }
168-
169- async fetchDiagrams ( ) : Promise < SearchDiagramResult [ ] > {
170- const { search } = this ;
171- assert ( this . idx ) ;
172- assert ( search ) ;
173- let matches = this . idx . search ( search ) ;
174- warn ( `Got ${ matches . length } matches for search ${ search } ` ) ;
175- if ( matches . length > 5 ) {
176- warn ( `Limiting to the top 5 matches` ) ;
177- matches = matches . slice ( 0 , 5 ) ;
178- }
179- return matches . map ( ( match ) => ( { diagramId : match . ref } ) ) ;
180- }
181- }
182-
183118export const handler = async ( argv : any ) => {
184119 verbose ( argv . verbose ) ;
185120 handleWorkingDirectory ( argv . directory ) ;
121+ const { question, maxCodeObjectMatches, maxDiagramMatches } = argv ;
186122 const appmapDir = await locateAppMapDir ( argv . appmapDir ) ;
187123
188- const ask = new Ask ( appmapDir ) ;
189- await ask . initialize ( ) ;
124+ const findAppMaps = new FindAppMaps ( appmapDir ) ;
125+ await findAppMaps . initialize ( ) ;
190126
191127 function showPlan ( paramStr : string ) {
192128 let params : any ;
@@ -199,132 +135,50 @@ export const handler = async (argv: any) => {
199135 warn ( `AI Plan: ${ params . plan } ` ) ;
200136 }
201137
202- async function fetchDiagrams ( paramStr : string ) : Promise < SearchDiagramResult [ ] > {
203- warn ( `Fecching diagrams` ) ;
204- return await ask . fetchDiagrams ( ) ;
138+ function fetchDiagrams ( ) : FindAppMapSearchResult [ ] {
139+ warn ( `Fetching diagrams` ) ;
140+ return findAppMaps . search ( question , { maxResults : maxDiagramMatches } ) ;
205141 }
206142
143+ const diagramDetailsResults = new Array < FindEventSearchResult > ( ) ;
144+
207145 async function getDiagramDetails ( paramStr : string ) : Promise < DiagramDetailsResult [ ] > {
208146 const params = JSON . parse ( paramStr ) as DiagramDetailsParam ;
209147 const { diagramIds } = params ;
210- warn ( `Getting details for diagram ${ diagramIds } , retrieved by "${ ask . search } "` ) ;
148+ warn ( `Getting details for diagram ${ diagramIds } , retrieved by "${ question } "` ) ;
211149 const result = new Array < DiagramDetailsResult > ( ) ;
212150 for ( const diagramId of diagramIds ) {
213151 warn ( `Loading AppMap ${ diagramId } and pruning to 1MB` ) ;
214152
215- const appmapFile = [ diagramId , 'appmap.json' ] . join ( '.' ) ;
216- const prunedAppMap = buildAppMap ( )
217- . source ( await readFile ( appmapFile , 'utf-8' ) )
218- . prune ( 1 * 1000 * 1000 )
219- . build ( ) ;
220-
221- warn ( `Built AppMap with ${ prunedAppMap . events . length } events.` ) ;
222- warn ( `Applying default AppMap filters.` ) ;
223- const filter = new AppMapFilter ( ) ;
224- if ( prunedAppMap . metadata . language ?. name !== 'java' )
225- filter . declutter . hideExternalPaths . on = true ;
226- filter . declutter . limitRootEvents . on = true ;
227- const filteredAppMap = filter . filter ( prunedAppMap , [ ] ) ;
228- warn ( `Filtered AppMap has ${ filteredAppMap . events . length } events.` ) ;
229-
230- const codeObjectsByFqid = new Map < string , CodeObject > ( ) ;
231- const eventsById = filteredAppMap . events . reduce ( ( map , event ) => {
232- map . set ( event . id , event ) ;
233- return map ;
234- } , new Map < number , Event > ( ) ) ;
235- const specification = Specification . build ( filteredAppMap , { loops : true } ) ;
236-
237- warn ( `Indexing AppMap` ) ;
153+ const index = new FindEvents ( diagramId ) ;
154+ index . maxSize = 1024 * 1024 ;
155+ await index . initialize ( ) ;
156+ const searchResults = index . search ( question , { maxResults : maxCodeObjectMatches } ) ;
157+ diagramDetailsResults . push ( ...searchResults ) ;
238158
239159 const diagramText = new Array < string > ( ) ;
240- const idx = lunr ( function ( ) {
241- this . ref ( 'fqid' ) ;
242- this . field ( 'name' ) ;
243- this . tokenizer . separator = / [ \s / \- _ : # . ] + / ;
244-
245- const self = this ;
246- const indexEvent = ( event : Event , depth = 0 ) => {
247- // These will already be well-represented by the diagram summary.
248- if ( depth > 0 ) {
249- const co = event . codeObject ;
250- if ( ! codeObjectsByFqid . has ( co . fqid ) ) {
251- codeObjectsByFqid . set ( co . fqid , co ) ;
252- let boost = 1 ;
253- if ( co . location ) boost *= 2 ;
254- self . add (
255- {
256- fqid : co . fqid ,
257- name : splitCamelized ( co . id ) ,
258- } ,
259- {
260- boost,
261- }
262- ) ;
263- }
264- }
265- event . children . forEach ( ( child ) => indexEvent ( child , depth + 1 ) ) ;
266- } ;
267- filteredAppMap . rootEvents ( ) . forEach ( ( event ) => indexEvent ( event ) ) ;
268-
269- const diagram = buildDiagram ( appmapFile , filteredAppMap , specification ) ;
270- const MAX_DEPTH = 0 ;
271- const collectAction = ( action : Action , depth = 0 ) => {
272- if ( depth <= MAX_DEPTH ) {
273- const actionInfo : ActionInfo = { } ;
274- if ( action . eventIds . length > 0 ) {
275- actionInfo . eventIds = action . eventIds . join ( ',' ) ;
276- const co = eventsById . get ( action . eventIds [ 0 ] ) ?. codeObject ;
277- if ( co ) {
278- if ( co . location ) actionInfo . location = co . location ;
279- } else {
280- warn ( `No code object for event ${ action . eventIds [ 0 ] } ` ) ;
281- }
282- }
283- const actionInfoStr = Object . keys ( actionInfo )
284- . sort ( )
285- . map ( ( key ) => {
286- const value = actionInfo [ key ] ;
287- return `${ key } =${ value } ` ;
288- } )
289- . join ( ',' ) ;
290- const indent = ' ' . repeat ( depth ) ;
291- diagramText . push (
292- `${ indent } ${ nodeName ( action ) } ${ actionInfoStr !== '' ? ` (${ actionInfoStr } )` : '' } `
293- ) ;
294- }
295- if ( action . children ) {
296- action . children . forEach ( ( child ) => collectAction ( child , depth + 1 ) ) ;
297- }
298- } ;
299- diagram . rootActions . forEach ( ( action ) => collectAction ( action ) ) ;
300- } ) ;
301-
302- assert ( ask . search ) ;
303- let searchResult = idx . search ( ask . search ) ;
304- warn ( `Matched ${ searchResult . length } code objects in the diagram` ) ;
305- if ( searchResult . length > 5 ) {
306- warn ( `Limiting to the top 5 matches` ) ;
307- searchResult = searchResult . slice ( 0 , 5 ) ;
160+ for ( const event of index . appmap . rootEvents ( ) ) {
161+ const actionInfo : ActionInfo = { eventId : event . id } ;
162+ if ( event . elapsedTime ) actionInfo . elapsed = event . elapsedTime ;
163+ if ( event . codeObject . location ) actionInfo . location = event . codeObject . location ;
164+ const actionInfoStr = Object . keys ( actionInfo )
165+ . sort ( )
166+ . map ( ( key ) => {
167+ const value = actionInfo [ key ] ;
168+ return `${ key } =${ value } ` ;
169+ } )
170+ . join ( ',' ) ;
171+ diagramText . push (
172+ `${ event . codeObject . id } ${ actionInfoStr !== '' ? ` (${ actionInfoStr } )` : '' } `
173+ ) ;
308174 }
309- const keyEvents = searchResult . map ( ( match ) => {
310- const co = codeObjectsByFqid . get ( match . ref ) ;
311- assert ( co ) ;
312- const result : EventInfo = {
313- name : co . id ,
314- fqid : co . fqid ,
315- elapsed : co . allEvents . reduce ( ( sum , event ) => sum + ( event . elapsedTime || 0 ) , 0 ) ,
316- } ;
317- if ( co . location ) result . sourceLocation = co . location ;
318-
319- return result ;
320- } ) ;
321-
322- const metadata = prunedAppMap . metadata ;
175+
176+ const metadata = index . appmap . metadata ;
323177 delete metadata [ 'git' ] ;
324178 delete ( metadata as any ) [ 'client' ] ;
325179 // TODO: Do we want the AI to read the source code of the test case?
326180 delete metadata [ 'source_location' ] ;
327- result . push ( { metadata, summary : diagramText . join ( '\n' ) , keyEvents } ) ;
181+ result . push ( { metadata, summary : diagramText . join ( '\n' ) , keyEvents : searchResults } ) ;
328182 }
329183
330184 return result ;
@@ -379,20 +233,15 @@ export const handler = async (argv: any) => {
379233 return result ;
380234 }
381235
382- const question = argv . question ;
383- ask . search = question ;
384-
385236 const systemMessages : ChatCompletionMessageParam [ ] = [
386237 'You are an assistant that answers questions about the design and architecture of code.' ,
387238 'You answer these questions by accessing a knowledge base of sequence diagrams.' ,
388239 'Each sequence diagram conists of a series of events, such as function calls, HTTP server requests, SQL queries, etc.' ,
389240 'Before each function call, call "showPlan" function with a Markdown document that describes your strategy for answering the question.' ,
390- `After the first "showPlan", begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.` ,
391- 'Next, call "showPlan", then call "getDiagramDetails" function get details about the events that occur with in the matching diagrams.' ,
392- `Evaluate which diagrams are most relevant to the user's problem.` ,
241+ `Begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.` ,
242+ 'Next, use the "getDiagramDetails" function get details about the events that occur with in the matching diagrams.' ,
393243 'Enhance your answer by using "lookupSourceCode" function to get the source code for the most relevant functions.' ,
394244 'Finally, respond with a Markdown document that summarizes the diagrams and answers the question.' ,
395- 'Subsequent mentions of the function should use backticks but should not be links.' ,
396245 'Never emit phrases like "note that the actual behavior may vary between different applications"' ,
397246 ] . map ( ( msg ) => ( {
398247 content : msg ,
@@ -428,7 +277,7 @@ export const handler = async (argv: any) => {
428277 } ,
429278 {
430279 function : fetchDiagrams ,
431- description : `List sequence diagrams that match a keyword. Each response includes a diagram id, plus information about the events (function calls, HTTP server requests, SQL queries, etc) within that diagram that match the search term .` ,
280+ description : `Obtain sequence diagrams that are relevant to the user's question. The response is a list of diagram ids .` ,
432281 parameters : {
433282 type : 'object' ,
434283 properties : { } ,
@@ -472,16 +321,16 @@ export const handler = async (argv: any) => {
472321 } ) ;
473322
474323 runFunctions . on ( 'functionCall' , ( data ) => {
475- warn ( JSON . stringify ( data ) ) ;
324+ warn ( JSON . stringify ( data , null , 2 ) ) ;
476325 } ) ;
477326 runFunctions . on ( 'finalFunctionCall' , ( data ) => {
478- warn ( JSON . stringify ( data ) ) ;
327+ warn ( JSON . stringify ( data , null , 2 ) ) ;
479328 } ) ;
480329 runFunctions . on ( 'functionCallResult' , ( data ) => {
481- warn ( JSON . stringify ( data ) ) ;
330+ if ( verbose ( ) ) warn ( JSON . stringify ( data ) ) ;
482331 } ) ;
483332 runFunctions . on ( 'finalFunctionCallResult' , ( data ) => {
484- warn ( JSON . stringify ( data ) ) ;
333+ if ( verbose ( ) ) warn ( JSON . stringify ( data ) ) ;
485334 } ) ;
486335
487336 const response = await runFunctions . finalContent ( ) ;
@@ -490,4 +339,11 @@ export const handler = async (argv: any) => {
490339 return ;
491340 }
492341 console . log ( response ) ;
342+ console . log ( '' ) ;
343+ console . log ( 'The best matching sequence diagram events are:' ) ;
344+ console . log ( '' ) ;
345+ diagramDetailsResults . sort ( ( a , b ) => b . score - a . score ) ;
346+ for ( const event of diagramDetailsResults ) {
347+ console . log ( ` ${ event . fqid } (${ event . score } )` ) ;
348+ }
493349} ;
0 commit comments