Skip to content

Commit bf66b0d

Browse files
committed
feat: Refactor full-text search into its own package
1 parent 00a66d3 commit bf66b0d

File tree

4 files changed

+361
-205
lines changed

4 files changed

+361
-205
lines changed

packages/cli/src/cmds/ask/ask.ts

Lines changed: 61 additions & 205 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@ import OpenAI from 'openai';
33
import lunr from 'lunr';
44
import { ChatCompletionMessageParam } from 'openai/resources';
55
import { readFile } from 'fs/promises';
6-
import { dirname, join } from 'path';
76
import { AppMapFilter, CodeObject, Event, Metadata, buildAppMap } from '@appland/models';
8-
import { Action, NodeType, Specification, buildDiagram, nodeName } from '@appland/sequence-diagram';
7+
import { Action, Specification, buildDiagram, nodeName } from '@appland/sequence-diagram';
98

109
import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory';
1110
import { locateAppMapDir } from '../../lib/locateAppMapDir';
12-
import { exists, processNamedFiles, verbose } from '../../utils';
13-
import assert, { match } from 'assert';
11+
import { exists, verbose } from '../../utils';
12+
import FindAppMaps, { SearchResult as FindAppMapSearchResult } from '../../fulltext/FindAppMaps';
13+
import FindEvents, { SearchResult as FindEventSearchResult } from '../../fulltext/FindEvents';
1414

1515
export const command = 'ask <question>';
1616
export const describe =
@@ -20,6 +20,16 @@ export const builder = (args) => {
2020
args.positional('question', {
2121
describe: 'plain text question about the code base',
2222
});
23+
args.option('max-diagram-matches', {
24+
describe: 'maximum number of diagram matches to return',
25+
type: 'number',
26+
default: 5,
27+
});
28+
args.option('max-code-object-matches', {
29+
describe: 'maximum number of code objects matches to return for each diagram',
30+
type: 'number',
31+
default: 5,
32+
});
2333
args.option('directory', {
2434
describe: 'program working directory',
2535
type: 'string',
@@ -47,7 +57,7 @@ type SerializedCodeObject = {
4757

4858
type ActionInfo = {
4959
elapsed?: number;
50-
eventIds?: string;
60+
eventId: number;
5161
location?: string;
5262
};
5363

@@ -77,7 +87,7 @@ type EventInfo = {
7787
type DiagramDetailsResult = {
7888
summary: string;
7989
metadata: Metadata;
80-
keyEvents: EventInfo[];
90+
keyEvents: FindEventSearchResult[];
8191
};
8292

8393
const isCamelized = (str: string): boolean => {
@@ -105,88 +115,14 @@ const splitCamelized = (str: string): string => {
105115
return result.join(' ');
106116
};
107117

108-
class Ask {
109-
idx: lunr.Index | undefined;
110-
public search: string | undefined;
111-
112-
constructor(public appmapDir: string) {}
113-
114-
async initialize() {
115-
const { appmapDir } = this;
116-
117-
const documents = new Array<any>();
118-
await processNamedFiles(appmapDir, 'metadata.json', async (metadataFile) => {
119-
const metadata = JSON.parse(await readFile(metadataFile, 'utf-8')) as Metadata;
120-
const diagramId = dirname(metadataFile);
121-
const classMap = JSON.parse(
122-
await readFile(join(diagramId, 'classMap.json'), 'utf-8')
123-
) as SerializedCodeObject[];
124-
const queries = new Array<string>();
125-
const codeObjects = new Array<string>();
126-
const routes = new Array<string>();
127-
const externalRoutes = new Array<string>();
128-
129-
const collectFunction = (co: SerializedCodeObject) => {
130-
if (co.type === 'query') queries.push(co.name);
131-
else if (co.type === 'route') routes.push(co.name);
132-
else if (co.type === 'external-route') externalRoutes.push(co.name);
133-
else codeObjects.push(splitCamelized(co.name));
134-
135-
co.children?.forEach((child) => {
136-
collectFunction(child);
137-
});
138-
};
139-
classMap.forEach((co) => collectFunction(co));
140-
141-
documents.push({
142-
id: diagramId,
143-
name: metadata.name,
144-
source_location: metadata.source_location,
145-
code_objects: codeObjects.join(' '),
146-
queries: queries.join(' '),
147-
routes: routes.join(' '),
148-
external_routes: externalRoutes.join(' '),
149-
});
150-
});
151-
152-
warn(`Indexing ${documents.length} diagrams`);
153-
154-
this.idx = lunr(function () {
155-
this.ref('id');
156-
this.field('name');
157-
this.field('source_location');
158-
this.field('code_objects');
159-
this.field('queries');
160-
this.field('routes');
161-
this.field('external_routes');
162-
163-
this.tokenizer.separator = /[\s/-_:#.]+/;
164-
165-
for (const doc of documents) this.add(doc);
166-
});
167-
}
168-
169-
async fetchDiagrams(): Promise<SearchDiagramResult[]> {
170-
const { search } = this;
171-
assert(this.idx);
172-
assert(search);
173-
let matches = this.idx.search(search);
174-
warn(`Got ${matches.length} matches for search ${search}`);
175-
if (matches.length > 5) {
176-
warn(`Limiting to the top 5 matches`);
177-
matches = matches.slice(0, 5);
178-
}
179-
return matches.map((match) => ({ diagramId: match.ref }));
180-
}
181-
}
182-
183118
export const handler = async (argv: any) => {
184119
verbose(argv.verbose);
185120
handleWorkingDirectory(argv.directory);
121+
const { question, maxCodeObjectMatches, maxDiagramMatches } = argv;
186122
const appmapDir = await locateAppMapDir(argv.appmapDir);
187123

188-
const ask = new Ask(appmapDir);
189-
await ask.initialize();
124+
const findAppMaps = new FindAppMaps(appmapDir);
125+
await findAppMaps.initialize();
190126

191127
function showPlan(paramStr: string) {
192128
let params: any;
@@ -199,132 +135,50 @@ export const handler = async (argv: any) => {
199135
warn(`AI Plan: ${params.plan}`);
200136
}
201137

202-
async function fetchDiagrams(paramStr: string): Promise<SearchDiagramResult[]> {
203-
warn(`Fecching diagrams`);
204-
return await ask.fetchDiagrams();
138+
function fetchDiagrams(): FindAppMapSearchResult[] {
139+
warn(`Fetching diagrams`);
140+
return findAppMaps.search(question, { maxResults: maxDiagramMatches });
205141
}
206142

143+
const diagramDetailsResults = new Array<FindEventSearchResult>();
144+
207145
async function getDiagramDetails(paramStr: string): Promise<DiagramDetailsResult[]> {
208146
const params = JSON.parse(paramStr) as DiagramDetailsParam;
209147
const { diagramIds } = params;
210-
warn(`Getting details for diagram ${diagramIds}, retrieved by "${ask.search}"`);
148+
warn(`Getting details for diagram ${diagramIds}, retrieved by "${question}"`);
211149
const result = new Array<DiagramDetailsResult>();
212150
for (const diagramId of diagramIds) {
213151
warn(`Loading AppMap ${diagramId} and pruning to 1MB`);
214152

215-
const appmapFile = [diagramId, 'appmap.json'].join('.');
216-
const prunedAppMap = buildAppMap()
217-
.source(await readFile(appmapFile, 'utf-8'))
218-
.prune(1 * 1000 * 1000)
219-
.build();
220-
221-
warn(`Built AppMap with ${prunedAppMap.events.length} events.`);
222-
warn(`Applying default AppMap filters.`);
223-
const filter = new AppMapFilter();
224-
if (prunedAppMap.metadata.language?.name !== 'java')
225-
filter.declutter.hideExternalPaths.on = true;
226-
filter.declutter.limitRootEvents.on = true;
227-
const filteredAppMap = filter.filter(prunedAppMap, []);
228-
warn(`Filtered AppMap has ${filteredAppMap.events.length} events.`);
229-
230-
const codeObjectsByFqid = new Map<string, CodeObject>();
231-
const eventsById = filteredAppMap.events.reduce((map, event) => {
232-
map.set(event.id, event);
233-
return map;
234-
}, new Map<number, Event>());
235-
const specification = Specification.build(filteredAppMap, { loops: true });
236-
237-
warn(`Indexing AppMap`);
153+
const index = new FindEvents(diagramId);
154+
index.maxSize = 1024 * 1024;
155+
await index.initialize();
156+
const searchResults = index.search(question, { maxResults: maxCodeObjectMatches });
157+
diagramDetailsResults.push(...searchResults);
238158

239159
const diagramText = new Array<string>();
240-
const idx = lunr(function () {
241-
this.ref('fqid');
242-
this.field('name');
243-
this.tokenizer.separator = /[\s/\-_:#.]+/;
244-
245-
const self = this;
246-
const indexEvent = (event: Event, depth = 0) => {
247-
// These will already be well-represented by the diagram summary.
248-
if (depth > 0) {
249-
const co = event.codeObject;
250-
if (!codeObjectsByFqid.has(co.fqid)) {
251-
codeObjectsByFqid.set(co.fqid, co);
252-
let boost = 1;
253-
if (co.location) boost *= 2;
254-
self.add(
255-
{
256-
fqid: co.fqid,
257-
name: splitCamelized(co.id),
258-
},
259-
{
260-
boost,
261-
}
262-
);
263-
}
264-
}
265-
event.children.forEach((child) => indexEvent(child, depth + 1));
266-
};
267-
filteredAppMap.rootEvents().forEach((event) => indexEvent(event));
268-
269-
const diagram = buildDiagram(appmapFile, filteredAppMap, specification);
270-
const MAX_DEPTH = 0;
271-
const collectAction = (action: Action, depth = 0) => {
272-
if (depth <= MAX_DEPTH) {
273-
const actionInfo: ActionInfo = {};
274-
if (action.eventIds.length > 0) {
275-
actionInfo.eventIds = action.eventIds.join(',');
276-
const co = eventsById.get(action.eventIds[0])?.codeObject;
277-
if (co) {
278-
if (co.location) actionInfo.location = co.location;
279-
} else {
280-
warn(`No code object for event ${action.eventIds[0]}`);
281-
}
282-
}
283-
const actionInfoStr = Object.keys(actionInfo)
284-
.sort()
285-
.map((key) => {
286-
const value = actionInfo[key];
287-
return `${key}=${value}`;
288-
})
289-
.join(',');
290-
const indent = ' '.repeat(depth);
291-
diagramText.push(
292-
`${indent}${nodeName(action)}${actionInfoStr !== '' ? ` (${actionInfoStr})` : ''}`
293-
);
294-
}
295-
if (action.children) {
296-
action.children.forEach((child) => collectAction(child, depth + 1));
297-
}
298-
};
299-
diagram.rootActions.forEach((action) => collectAction(action));
300-
});
301-
302-
assert(ask.search);
303-
let searchResult = idx.search(ask.search);
304-
warn(`Matched ${searchResult.length} code objects in the diagram`);
305-
if (searchResult.length > 5) {
306-
warn(`Limiting to the top 5 matches`);
307-
searchResult = searchResult.slice(0, 5);
160+
for (const event of index.appmap.rootEvents()) {
161+
const actionInfo: ActionInfo = { eventId: event.id };
162+
if (event.elapsedTime) actionInfo.elapsed = event.elapsedTime;
163+
if (event.codeObject.location) actionInfo.location = event.codeObject.location;
164+
const actionInfoStr = Object.keys(actionInfo)
165+
.sort()
166+
.map((key) => {
167+
const value = actionInfo[key];
168+
return `${key}=${value}`;
169+
})
170+
.join(',');
171+
diagramText.push(
172+
`${event.codeObject.id}${actionInfoStr !== '' ? ` (${actionInfoStr})` : ''}`
173+
);
308174
}
309-
const keyEvents = searchResult.map((match) => {
310-
const co = codeObjectsByFqid.get(match.ref);
311-
assert(co);
312-
const result: EventInfo = {
313-
name: co.id,
314-
fqid: co.fqid,
315-
elapsed: co.allEvents.reduce((sum, event) => sum + (event.elapsedTime || 0), 0),
316-
};
317-
if (co.location) result.sourceLocation = co.location;
318-
319-
return result;
320-
});
321-
322-
const metadata = prunedAppMap.metadata;
175+
176+
const metadata = index.appmap.metadata;
323177
delete metadata['git'];
324178
delete (metadata as any)['client'];
325179
// TODO: Do we want the AI to read the source code of the test case?
326180
delete metadata['source_location'];
327-
result.push({ metadata, summary: diagramText.join('\n'), keyEvents });
181+
result.push({ metadata, summary: diagramText.join('\n'), keyEvents: searchResults });
328182
}
329183

330184
return result;
@@ -379,20 +233,15 @@ export const handler = async (argv: any) => {
379233
return result;
380234
}
381235

382-
const question = argv.question;
383-
ask.search = question;
384-
385236
const systemMessages: ChatCompletionMessageParam[] = [
386237
'You are an assistant that answers questions about the design and architecture of code.',
387238
'You answer these questions by accessing a knowledge base of sequence diagrams.',
388239
'Each sequence diagram conists of a series of events, such as function calls, HTTP server requests, SQL queries, etc.',
389240
'Before each function call, call "showPlan" function with a Markdown document that describes your strategy for answering the question.',
390-
`After the first "showPlan", begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.`,
391-
'Next, call "showPlan", then call "getDiagramDetails" function get details about the events that occur with in the matching diagrams.',
392-
`Evaluate which diagrams are most relevant to the user's problem.`,
241+
`Begin by calling the "fetchDiagrams" function to obtain the diagrams that are most relevant to the user's question.`,
242+
'Next, use the "getDiagramDetails" function get details about the events that occur with in the matching diagrams.',
393243
'Enhance your answer by using "lookupSourceCode" function to get the source code for the most relevant functions.',
394244
'Finally, respond with a Markdown document that summarizes the diagrams and answers the question.',
395-
'Subsequent mentions of the function should use backticks but should not be links.',
396245
'Never emit phrases like "note that the actual behavior may vary between different applications"',
397246
].map((msg) => ({
398247
content: msg,
@@ -428,7 +277,7 @@ export const handler = async (argv: any) => {
428277
},
429278
{
430279
function: fetchDiagrams,
431-
description: `List sequence diagrams that match a keyword. Each response includes a diagram id, plus information about the events (function calls, HTTP server requests, SQL queries, etc) within that diagram that match the search term.`,
280+
description: `Obtain sequence diagrams that are relevant to the user's question. The response is a list of diagram ids.`,
432281
parameters: {
433282
type: 'object',
434283
properties: {},
@@ -472,16 +321,16 @@ export const handler = async (argv: any) => {
472321
});
473322

474323
runFunctions.on('functionCall', (data) => {
475-
warn(JSON.stringify(data));
324+
warn(JSON.stringify(data, null, 2));
476325
});
477326
runFunctions.on('finalFunctionCall', (data) => {
478-
warn(JSON.stringify(data));
327+
warn(JSON.stringify(data, null, 2));
479328
});
480329
runFunctions.on('functionCallResult', (data) => {
481-
warn(JSON.stringify(data));
330+
if (verbose()) warn(JSON.stringify(data));
482331
});
483332
runFunctions.on('finalFunctionCallResult', (data) => {
484-
warn(JSON.stringify(data));
333+
if (verbose()) warn(JSON.stringify(data));
485334
});
486335

487336
const response = await runFunctions.finalContent();
@@ -490,4 +339,11 @@ export const handler = async (argv: any) => {
490339
return;
491340
}
492341
console.log(response);
342+
console.log('');
343+
console.log('The best matching sequence diagram events are:');
344+
console.log('');
345+
diagramDetailsResults.sort((a, b) => b.score - a.score);
346+
for (const event of diagramDetailsResults) {
347+
console.log(` ${event.fqid} (${event.score})`);
348+
}
493349
};

0 commit comments

Comments
 (0)