From 37187a7d778dc7060d8b0185b552a348cd462695 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Wed, 6 Nov 2024 20:42:56 -0500 Subject: [PATCH 01/12] refactor: Export isNodeError --- packages/cli/src/fingerprint/fingerprintQueue.ts | 5 +---- packages/cli/src/utils.ts | 5 +++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/fingerprint/fingerprintQueue.ts b/packages/cli/src/fingerprint/fingerprintQueue.ts index 76f28c9385..362f858d07 100644 --- a/packages/cli/src/fingerprint/fingerprintQueue.ts +++ b/packages/cli/src/fingerprint/fingerprintQueue.ts @@ -1,10 +1,7 @@ import { queue, QueueObject } from 'async'; import FileTooLargeError from './fileTooLargeError'; import Fingerprinter from './fingerprinter'; - -function isNodeError(error: unknown, code?: string): error is NodeJS.ErrnoException { - return error instanceof Error && (!code || (error as NodeJS.ErrnoException).code === code); -} +import { isNodeError } from '../utils'; export default class FingerprintQueue { public handler: Fingerprinter; diff --git a/packages/cli/src/utils.ts b/packages/cli/src/utils.ts index 6d51ec2e7f..61dbd1240b 100644 --- a/packages/cli/src/utils.ts +++ b/packages/cli/src/utils.ts @@ -175,6 +175,11 @@ export async function processNamedFiles( return matchCount; } +export function isNodeError(error: unknown, code?: string): error is NodeJS.ErrnoException { + return error instanceof Error && (!code || (error as NodeJS.ErrnoException).code === code); +} + + /** * Lists all matching files in a directory, and passes them to an optional function. */ From 1bcaa89f367a5f7cbdb84b53a519ed52004da99b Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Mon, 18 Nov 2024 09:22:43 -0500 Subject: [PATCH 02/12] fix: Search for 'code' Remove the word code from stop words --- packages/search/src/query-keywords.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/search/src/query-keywords.ts b/packages/search/src/query-keywords.ts index e3835840a8..de7b11bc93 100644 --- a/packages/search/src/query-keywords.ts +++ b/packages/search/src/query-keywords.ts @@ -9,7 +9,6 @@ const STOP_WORDS = new Set([ 'at', 'be', 'by', - 'code', 'for', 'from', 'has', From aa7ec8bb4b0cb0af3f967a586f9989fc846af121 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Tue, 12 Nov 2024 17:29:07 -0500 Subject: [PATCH 03/12] fix: Import and use CodeObjectType --- .../cli/src/fingerprint/canonicalize/packageDependencies.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/fingerprint/canonicalize/packageDependencies.js b/packages/cli/src/fingerprint/canonicalize/packageDependencies.js index a9aacb1537..e1bcc26ba9 100644 --- a/packages/cli/src/fingerprint/canonicalize/packageDependencies.js +++ b/packages/cli/src/fingerprint/canonicalize/packageDependencies.js @@ -1,6 +1,6 @@ /* eslint-disable @typescript-eslint/unbound-method */ /* eslint-disable class-methods-use-this */ -const { CodeObject } = require('@appland/models'); +const { CodeObjectType } = require('@appland/models'); const Unique = require('./unique'); function packageOf(codeObject) { @@ -9,7 +9,7 @@ function packageOf(codeObject) { } const ancestors = [codeObject, ...codeObject.ancestors()]; - let packageObject = ancestors.find((a) => a.type === CodeObject.PACKAGE); + let packageObject = ancestors.find((a) => a.type === CodeObjectType.PACKAGE); if (!packageObject && ancestors.length >= 1) { packageObject = ancestors[ancestors.length - 1]; } From 4f7b4c264d86071eb4c06dbe6ebfb02dd3dedd5b Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Wed, 6 Nov 2024 20:45:28 -0500 Subject: [PATCH 04/12] refactor: Use queryKeywords from @appland/search --- packages/cli/src/fulltext/AppMapIndex.ts | 4 +- packages/cli/src/fulltext/FindEvents.ts | 7 +- packages/cli/src/fulltext/queryKeywords.ts | 76 ------------------- .../cli/src/rpc/explain/collectContext.ts | 2 +- packages/search/src/index.ts | 1 + 5 files changed, 8 insertions(+), 82 deletions(-) delete mode 100644 packages/cli/src/fulltext/queryKeywords.ts diff --git a/packages/cli/src/fulltext/AppMapIndex.ts b/packages/cli/src/fulltext/AppMapIndex.ts index eec83ac9a3..6427037e87 100644 --- a/packages/cli/src/fulltext/AppMapIndex.ts +++ b/packages/cli/src/fulltext/AppMapIndex.ts @@ -1,6 +1,7 @@ import { dirname, join } from 'path'; -import { Metadata } from '@appland/models'; import { readFile } from 'fs/promises'; +import { Metadata } from '@appland/models'; +import { queryKeywords } from '@appland/search'; import { exists, processNamedFiles, verbose } from '../utils'; import { splitCamelized } from '../lib/splitCamelized'; @@ -9,7 +10,6 @@ import lunr from 'lunr'; import UpToDate from '../lib/UpToDate'; import loadAppMapConfig from '../lib/loadAppMapConfig'; import { packRef, refToAppMapDir, unpackRef } from './ref'; -import queryKeywords from './queryKeywords'; type SerializedCodeObject = { name: string; diff --git a/packages/cli/src/fulltext/FindEvents.ts b/packages/cli/src/fulltext/FindEvents.ts index f5ad3082f3..5ce3b4d967 100644 --- a/packages/cli/src/fulltext/FindEvents.ts +++ b/packages/cli/src/fulltext/FindEvents.ts @@ -1,11 +1,12 @@ import { AppMap, AppMapFilter, Event, buildAppMap } from '@appland/models'; import { log, warn } from 'console'; import { readFile } from 'fs/promises'; -import { verbose } from '../utils'; +import { queryKeywords } from '@appland/search'; import lunr from 'lunr'; -import { collectParameters } from './collectParameters'; import assert from 'assert'; -import queryKeywords from './queryKeywords'; + +import { verbose } from '../utils'; +import { collectParameters } from './collectParameters'; import { fileNameMatchesFilterPatterns } from './fileNameMatchesFilterPatterns'; type IndexItem = { diff --git a/packages/cli/src/fulltext/queryKeywords.ts b/packages/cli/src/fulltext/queryKeywords.ts deleted file mode 100644 index 7e414064c8..0000000000 --- a/packages/cli/src/fulltext/queryKeywords.ts +++ /dev/null @@ -1,76 +0,0 @@ -import { splitCamelized } from '../lib/splitCamelized'; - -const STOP_WORDS = new Set([ - 'a', - 'an', - 'and', - 'are', - 'as', - 'at', - 'be', - 'by', - 'code', - 'for', - 'from', - 'has', - 'he', - 'in', - 'is', - 'it', - 'its', - 'of', - 'on', - 'over', - 'that', - 'the', - 'to', - 'was', - 'were', - 'will', - 'with', - 'without', -]); - -/** - * Replace non-alphanumeric characters with spaces, then split the keyword on spaces. - * So in effect, words with non-alphanumeric characters become multiple words. - * Allow dash and underscore as delimeters. - */ -const sanitizeKeyword = (keyword: string): string[] => - keyword.replace(/[^\p{L}\p{N}\-_]/gu, ' ').split(' '); - -/** - * Extract keywords from a string or an array of strings. The extraction process includes the following steps: - * - * - Remove non-alphanumeric characters and split the keyword on spaces. - * - Split camelized words. - * - Remove stop words. - */ -export default function queryKeywords(words: undefined | string | string[]): string[] { - if (!words) return []; - - const wordsArray = Array.isArray(words) ? words : [words]; - if (wordsArray.length === 0) return []; - - return wordsArray - .map((word) => sanitizeKeyword(word || '')) - .flat() - .filter(Boolean) - .map((word): string[] => { - const camelized = splitCamelized(word) - .split(/[\s\-_]/) - .map((word) => word.toLowerCase()); - // Return each of the component words, and also return each pair of adjacent words as a single word. - const result = new Array(); - for (let i = 0; i < camelized.length; i++) { - result.push(camelized[i]); - if (i > 0) result.push([camelized[i - 1] + camelized[i]].join('')); - } - return result; - }) - .flat() - .map((str) => str.trim()) - .filter(Boolean) - .filter((str) => str.length >= 2) - .filter((str) => !STOP_WORDS.has(str)); -} diff --git a/packages/cli/src/rpc/explain/collectContext.ts b/packages/cli/src/rpc/explain/collectContext.ts index fb56a06860..1e3849dac3 100644 --- a/packages/cli/src/rpc/explain/collectContext.ts +++ b/packages/cli/src/rpc/explain/collectContext.ts @@ -1,11 +1,11 @@ import { ContextV2 } from '@appland/navie'; import { SearchRpc } from '@appland/rpc'; +import { queryKeywords } from '@appland/search'; import { SearchResult as EventSearchResult } from '../../fulltext/FindEvents'; import Location from './location'; import SearchContextCollector from './SearchContextCollector'; import LocationContextCollector from './LocationContextCollector'; -import queryKeywords from '../../fulltext/queryKeywords'; import { warn } from 'console'; export const buildExclusionPattern = (dirName: string): RegExp => { diff --git a/packages/search/src/index.ts b/packages/search/src/index.ts index 05fc5fe24b..8f74cd8836 100644 --- a/packages/search/src/index.ts +++ b/packages/search/src/index.ts @@ -7,3 +7,4 @@ export { default as FileIndex, FileSearchResult } from './file-index'; export { default as listProjectFiles } from './project-files'; export { isBinaryFile, isDataFile, isLargeFile } from './file-type'; export { fileTokens } from './tokenize'; +export { default as queryKeywords } from './query-keywords'; From 4ff0fbf1fb3b8eac227a26dc1c2ea88ec8c24f60 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Wed, 6 Nov 2024 20:59:31 -0500 Subject: [PATCH 05/12] chore: appmap.yml: exclude common functions from search --- appmap.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/appmap.yml b/appmap.yml index 3f6954c9a2..028c87a4e3 100644 --- a/appmap.yml +++ b/appmap.yml @@ -13,6 +13,9 @@ packages: - .yarn - path: packages/search exclude: + - search/built/tokenize + - search/built/query-keywords + - search/built/split-camelized - node_modules - .yarn - path: packages/client From 2f08d9d3134093cd3e113f69d52b0136425f9315 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Thu, 7 Nov 2024 14:52:51 -0500 Subject: [PATCH 06/12] chore: Document the client ai telemetry types --- packages/client/src/ai.ts | 47 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/packages/client/src/ai.ts b/packages/client/src/ai.ts index 5cc9ff5966..5ad4feb4e6 100644 --- a/packages/client/src/ai.ts +++ b/packages/client/src/ai.ts @@ -2,50 +2,86 @@ import { io } from 'socket.io-client'; import AIClient, { Callbacks } from './aiClient'; import { getConfiguration } from './loadConfiguration'; +/** + * When a conversation is created, the AppMap service reports the permissions assigned to the user, + * directly or through an organization to which they belong. + */ export type Permissions = { useNavieAIProxy: boolean; }; +/** + * An activity record for the user, indicating how many conversations were created over a given + * time period. + */ export type ConversationCount = { daysAgo: number; count: number; }; +/** + * The usage report for a given user, which is reported back to the user when a conversation is + * created. + */ export type Usage = { conversationCounts: ConversationCount[]; }; +/** + * Model parameters are reported to the AppMap service when a conversation is created. + */ export type ModelParameters = { baseUrl?: string; model?: string; aiKeyName?: string; }; +/** + * Project directory information is reported to the AppMap service when a conversation is created. + */ export type ProjectDirectory = { hasAppMapConfig: boolean; language?: string; }; +/** + * The parameters for a project, which are reported to the AppMap service when a conversation is + * created. + */ export type ProjectParameters = { directoryCount: number; codeEditor?: string; directories: ProjectDirectory[]; }; +/** + * A specific product to which the user is subscribed. + */ export type SubscriptionItem = { productName: string; }; +/** + * A record of all subscriptions for a given user, along with the date on which they were first + * enrolled. + */ export type Subscription = { enrollmentDate: Date; subscriptions: SubscriptionItem[]; }; +/** + * These parameters are passed from Navie Client to the Navie Service when a new conversation is + * created. + */ export type CreateConversationThread = { modelParameters: ModelParameters; projectParameters: ProjectParameters; }; +/** + * This information is reported back to the Navie Client when a conversation is created. + */ export type ConversationThread = { id: string; permissions: Permissions; @@ -71,6 +107,17 @@ export type CreateUserMessage = { codeSelectionLength?: number; }; +/** + * When a user message is reported to the AppMap service, the agent name (aka command name) and + * automatically assigned classifications are reported. + * + * The agent name will be one of the published Navie commands, such as @explain, @diagram, @plan, + * @generate, @test, @search, @review, and @help. + * + * Classifications are question categories that are assigned by Navie to help the AI respond + * in the most appropriate way. Classifications are single words like overview, architecture, + * troubleshoot, feature, generate-diagram, generate-code, explain, and help. + */ export type UpdateUserMessage = { agentName?: string; classification?: Classification[]; From 5dc1164a6c550fa08a758f429ac9069c4876e88e Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Fri, 22 Nov 2024 14:21:11 -0500 Subject: [PATCH 07/12] chore: Ignore .run-stats directories --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 14174ff1bd..5835820618 100644 --- a/.gitignore +++ b/.gitignore @@ -46,4 +46,5 @@ tmp/appmap packages/*/tmp/ *.appmap.json .navie +.run-stats From 4b6707a2439243a0983452cceb52d63353f0060a Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Wed, 6 Nov 2024 21:00:38 -0500 Subject: [PATCH 08/12] feat: Search for AppMap data using @appland/search --- packages/cli/src/cmds/search/search.ts | 44 ++- packages/cli/src/fulltext/AppMapIndex.ts | 321 ------------------ packages/cli/src/fulltext/appmap-index.ts | 179 ++++++++++ packages/cli/src/fulltext/appmap-match.ts | 178 ++++++++++ .../cli/src/rpc/explain/EventCollector.ts | 8 +- .../src/rpc/explain/SearchContextCollector.ts | 36 +- packages/cli/src/rpc/explain/explain.ts | 20 +- packages/cli/src/rpc/search/search.ts | 33 +- .../cli/tests/integration/rpc.search.spec.ts | 2 +- .../appmap-index.readAppMapContent.spec.ts | 90 +++++ ...ex.spec.ts => appmap-index.search.spec.ts} | 128 ++++--- .../unit/rpc/explain/ContextCollector.spec.ts | 1 - .../unit/rpc/explain/EventCollector.spec.ts | 2 +- .../unit/rpc/explain/collectContext.spec.ts | 124 ------- packages/search/src/build-file-index.ts | 18 +- 15 files changed, 638 insertions(+), 546 deletions(-) delete mode 100644 packages/cli/src/fulltext/AppMapIndex.ts create mode 100644 packages/cli/src/fulltext/appmap-index.ts create mode 100644 packages/cli/src/fulltext/appmap-match.ts create mode 100644 packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts rename packages/cli/tests/unit/fulltext/{AppMapIndex.spec.ts => appmap-index.search.spec.ts} (53%) delete mode 100644 packages/cli/tests/unit/rpc/explain/collectContext.spec.ts diff --git a/packages/cli/src/cmds/search/search.ts b/packages/cli/src/cmds/search/search.ts index 72687a71ce..cb706bf617 100644 --- a/packages/cli/src/cmds/search/search.ts +++ b/packages/cli/src/cmds/search/search.ts @@ -1,22 +1,22 @@ import yargs from 'yargs'; - +import sqlite3 from 'better-sqlite3'; import assert from 'assert'; import { readFileSync } from 'fs'; import { writeFile } from 'fs/promises'; import { AppMap, AppMapFilter, buildAppMap, deserializeFilter } from '@appland/models'; +import { FileIndex } from '@appland/search'; import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory'; import { verbose } from '../../utils'; import searchSingleAppMap, { SearchOptions as SingleSearchOptions } from './searchSingleAppMap'; -import AppMapIndex, { - SearchResponse as DiagramsSearchResponse, - SearchOptions, -} from '../../fulltext/AppMapIndex'; +import { SearchResponse as DiagramsSearchResponse } from '../../fulltext/appmap-match'; import { SearchResult as EventSearchResult, SearchResponse as EventSearchResponse, } from '../../fulltext/FindEvents'; import { openInBrowser } from '../open/openers'; +import { buildAppMapIndex, search } from '../../fulltext/appmap-index'; +import buildIndex from '../../rpc/explain/buildIndex'; export const command = 'search '; export const describe = @@ -83,7 +83,21 @@ export const builder = (args: yargs.Argv) => { return args.strict(); }; -export const handler = async (argv: any) => { +type ArgumentTypes = { + directory: string; + query: string; + appmap: string; + contextDepth: number; + maxSize: string; + filter: string; + show: boolean; + maxResults: number; + findEvents: boolean; + format: 'json' | 'appmap'; + verbose: boolean; +}; + +export const handler = async (argv: ArgumentTypes) => { verbose(argv.verbose); const { directory, query, appmap, contextDepth, show, maxResults, findEvents, format } = argv; @@ -160,19 +174,29 @@ export const handler = async (argv: any) => { maxResults, }; const { maxSize, filter: filterStr } = argv; - if (maxSize) options.maxSize = maxSize; + if (maxSize) options.maxSize = parseInt(maxSize); if (filterStr) options.filter = deserializeFilter(filterStr); const response = await searchSingleAppMap(appmap, query, options); await presentResults(response); } else { - const options: SearchOptions = { + const options = { maxResults, }; - const response = await AppMapIndex.search([process.cwd()], query, options); + + const index = await buildIndex('appmaps', async (indexFile) => { + const db = new sqlite3(indexFile); + const fileIndex = new FileIndex(db); + await buildAppMapIndex(fileIndex, [process.cwd()]); + return fileIndex; + }); + + const response = await search(index.index, query.split(/\s+/).join(' OR '), maxResults); + index.close(); + if (findEvents) { const eventOptions: SingleSearchOptions = { maxResults }; const { maxSize, filter: filterStr } = argv; - if (maxSize) eventOptions.maxSize = maxSize; + if (maxSize) eventOptions.maxSize = parseInt(maxSize, 10); if (filterStr) eventOptions.filter = deserializeFilter(filterStr); const { results } = response; diff --git a/packages/cli/src/fulltext/AppMapIndex.ts b/packages/cli/src/fulltext/AppMapIndex.ts deleted file mode 100644 index 6427037e87..0000000000 --- a/packages/cli/src/fulltext/AppMapIndex.ts +++ /dev/null @@ -1,321 +0,0 @@ -import { dirname, join } from 'path'; -import { readFile } from 'fs/promises'; -import { Metadata } from '@appland/models'; -import { queryKeywords } from '@appland/search'; - -import { exists, processNamedFiles, verbose } from '../utils'; -import { splitCamelized } from '../lib/splitCamelized'; -import { log, warn } from 'console'; -import lunr from 'lunr'; -import UpToDate from '../lib/UpToDate'; -import loadAppMapConfig from '../lib/loadAppMapConfig'; -import { packRef, refToAppMapDir, unpackRef } from './ref'; - -type SerializedCodeObject = { - name: string; - type: string; - labels: string[]; - children: SerializedCodeObject[]; - static?: boolean; - sourceLocation?: string; -}; - -export type SearchOptions = { - maxResults?: number; -}; - -export type SearchResult = { - appmap: string; - directory: string; - score: number; -}; - -export type SearchStats = { - mean: number; - median: number; - stddev: number; - max: number; -}; - -export type SearchResponse = { - type: 'appmap'; - results: SearchResult[]; - stats: SearchStats; - numResults: number; -}; - -async function buildDocument(directory: string, metadataFile: string): Promise { - const metadata = JSON.parse(await readFile(metadataFile, 'utf-8')) as Metadata; - const indexDir = dirname(metadataFile); - const classMap = JSON.parse( - await readFile(join(indexDir, 'classMap.json'), 'utf-8') - ) as SerializedCodeObject[]; - const queries = new Array(); - const codeObjects = new Array(); - const routes = new Array(); - const externalRoutes = new Array(); - - const collectFunction = (co: SerializedCodeObject) => { - if (co.type === 'query') queries.push(co.name); - else if (co.type === 'route') routes.push(co.name); - else if (co.type === 'external-route') externalRoutes.push(co.name); - else codeObjects.push(co.name); - - co.children?.forEach((child) => { - collectFunction(child); - }); - }; - classMap.forEach((co) => collectFunction(co)); - - const parameters = new Array(); - if (await exists(join(indexDir, 'canonical.parameters.json'))) { - const canonicalParameters = JSON.parse( - await readFile(join(indexDir, 'canonical.parameters.json'), 'utf-8') - ) as string[]; - canonicalParameters.forEach((cp) => { - parameters.push(splitCamelized(cp)); - }); - } - - let appmapId = indexDir; - if (appmapId.startsWith(directory)) appmapId = appmapId.substring(directory.length + 1); - - const id = packRef(directory, appmapId); - return { - id, - name: queryKeywords(metadata.name), - source_location: queryKeywords(metadata.source_location), - code_objects: queryKeywords(codeObjects), - queries: queryKeywords(queries), - routes: queryKeywords(routes), - external_routes: queryKeywords(externalRoutes), - parameters: queryKeywords(parameters), - }; -} - -async function buildIndex(directories: string[]): Promise { - const documents = new Array(); - if (verbose()) log(`[AppMapIndex] Adding AppMaps to full-text index`); - const startTime = Date.now(); - - for (const directory of directories) { - const appmapConfig = await loadAppMapConfig(join(directory, 'appmap.yml')); - let appmapDir: string | undefined; - if (appmapConfig) { - appmapDir = appmapConfig.appmap_dir ?? 'tmp/appmap'; - } - if (!appmapDir) { - if (verbose()) - log( - `[AppMapIndex] Skipping directory ${directory} because it does not contain an AppMap configuration` - ); - continue; - } - await processNamedFiles( - join(directory, appmapDir), - 'metadata.json', - async (metadataFile: string) => { - documents.push(await buildDocument(directory, metadataFile)); - } - ); - } - - const idx = lunr(function () { - this.ref('id'); - this.field('name'); - this.field('source_location'); - this.field('code_objects'); - this.field('queries'); - this.field('routes'); - this.field('external_routes'); - this.field('parameters'); - - this.tokenizer.separator = /[\s/-_:#.]+/; - - for (const doc of documents) this.add(doc); - }); - - const endTime = Date.now(); - if (verbose()) - log( - `[AppMapIndex] Added ${documents.length} AppMaps to full-text index in ${ - endTime - startTime - }ms` - ); - return new AppMapIndex(directories, idx); -} - -enum ScoreStats { - StdDev = 'stddev', - Mean = 'mean', - Median = 'median', - Max = 'max', -} - -enum ScoreFactors { - OutOfDateFactor = ScoreStats.StdDev, - OutOfDateMultipler = 0.5, -} - -export async function removeNonExistentMatches(matches: lunr.Index.Result[]) { - const appmapExists = new Map(); - for (const match of matches) { - const appmapDir = refToAppMapDir(match.ref); - const appmapFileName = [appmapDir, '.appmap.json'].join(''); - const doesExist = await exists(appmapFileName); - if (!doesExist) { - if (verbose()) - warn( - `[AppMapIndex] AppMap ${appmapFileName} does not exist, but we got it as a search match.` - ); - } - appmapExists.set(match.ref, doesExist); - } - return matches.filter((match) => appmapExists.get(match.ref)); -} - -export function scoreMatches(matches: lunr.Index.Result[]): Map { - const scoreStats = new Map(); - if (!matches.length) return scoreStats; - - const numResults = matches.length; - const maxScore = matches.reduce((acc, match) => Math.max(acc, match.score), 0); - const medianScore = matches[Math.floor(numResults / 2)].score; - const meanScore = matches.reduce((acc, match) => acc + match.score, 0) / numResults; - const stddevScore = Math.sqrt( - matches.reduce((acc, match) => acc + Math.pow(match.score, 2), 0) / numResults - ); - - if (verbose()) { - log(`[AppMapIndex] Score stats:`); - log(` Max: ${maxScore}`); - log(` Median: ${medianScore}`); - log(` Mean: ${meanScore}`); - log(` StdDev: ${stddevScore}`); - log( - `Number which are least 1 stddev above the mean: ${ - matches.filter((match) => match.score > meanScore + stddevScore).length - }` - ); - log( - `Number which are at least 2 stddev above the mean: ${ - matches.filter((match) => match.score > meanScore + 2 * stddevScore).length - }` - ); - log( - `Number which are at least 3 stddev above the mean: ${ - matches.filter((match) => match.score > meanScore + 3 * stddevScore).length - }` - ); - } - - scoreStats.set(ScoreStats.Max, maxScore); - scoreStats.set(ScoreStats.Median, medianScore); - scoreStats.set(ScoreStats.Mean, meanScore); - scoreStats.set(ScoreStats.StdDev, stddevScore); - - return scoreStats; -} - -async function downscoreOutOfDateMatches( - scoreStats: Map, - matches: lunr.Index.Result[], - maxResults: number -): Promise { - const sortedMatches = new Array(); - let i = 0; - - const finishedIterating = () => i >= matches.length; - const matchBelowThreshold = () => { - if (sortedMatches.length < maxResults) return false; - - const lastSortedMatch = sortedMatches[sortedMatches.length - 1]; - const match = matches[i]; - return match.score < lastSortedMatch.score; - }; - const completed = () => finishedIterating() || matchBelowThreshold(); - - while (!completed()) { - const match = matches[i++]; - const downscore = scoreStats.get(ScoreStats.StdDev)! * ScoreFactors.OutOfDateMultipler; - const { directory, appmapId } = unpackRef(match.ref); - const upToDate = new UpToDate(); - upToDate.baseDir = directory; - const outOfDateDependencies = await upToDate.isOutOfDate(appmapId); - if (outOfDateDependencies) { - if (verbose()) { - log( - `[AppMapIndex] AppMap ${refToAppMapDir(match.ref)} is out of date due to ${[ - ...outOfDateDependencies, - ]}. Downscoring by ${downscore}.` - ); - } - match.score -= downscore; - } - - sortedMatches.push(match); - sortedMatches.sort((a, b) => b.score - a.score); - } - - return sortedMatches; -} - -export function reportMatches( - matches: lunr.Index.Result[], - scoreStats: Map, - numResults: number -): SearchResponse { - const searchResults = matches.map((match) => { - const { directory, appmapId } = unpackRef(match.ref); - return { - appmap: appmapId, - directory, - score: match.score, - }; - }); - return { - type: 'appmap', - results: searchResults, - stats: [...scoreStats.keys()].reduce((acc, key) => { - acc[key] = scoreStats.get(key)!; - return acc; - }, {}) as SearchStats, - numResults, - }; -} - -export default class AppMapIndex { - constructor(public directories: string[], private idx: lunr.Index) {} - - async search(search: string, options: SearchOptions): Promise { - let matches = this.idx.search(queryKeywords(search).join(' ')); - matches = await removeNonExistentMatches(matches); - const numResults = matches.length; - - if (verbose()) log(`[AppMapIndex] Got ${numResults} AppMap matches for search "${search}"`); - - const scoreStats = scoreMatches(matches); - - matches = await downscoreOutOfDateMatches( - scoreStats, - matches, - options.maxResults || matches.length - ); - - if (options.maxResults && numResults > options.maxResults) { - if (verbose()) log(`[FullText] Limiting to the top ${options.maxResults} matches`); - matches = matches.slice(0, options.maxResults); - } - - return reportMatches(matches, scoreStats, numResults); - } - - static async search( - appmapDirectories: string[], - search: string, - options: SearchOptions - ): Promise { - const index = await buildIndex(appmapDirectories); - return await index.search(search, options); - } -} diff --git a/packages/cli/src/fulltext/appmap-index.ts b/packages/cli/src/fulltext/appmap-index.ts new file mode 100644 index 0000000000..77d20ae20f --- /dev/null +++ b/packages/cli/src/fulltext/appmap-index.ts @@ -0,0 +1,179 @@ +import { isAbsolute, join, relative } from 'path'; +import { isNativeError } from 'util/types'; +import { log, warn } from 'console'; +import { readFile } from 'fs/promises'; +import { Metadata } from '@appland/models'; +import { buildFileIndex, FileIndex, fileTokens } from '@appland/search'; + +import { findFiles, isNodeError, verbose } from '../utils'; +import { + downscoreOutOfDateMatches, + Match, + removeNonExistentMatches, + reportMatches, + scoreMatches, + SearchResponse, +} from './appmap-match'; +import loadAppMapConfig from '../lib/loadAppMapConfig'; + +type ClassMapEntry = { + name: string; + type: string; + labels: string[]; + children: ClassMapEntry[]; + static?: boolean; + sourceLocation?: string; +}; + +/** + * List all appmap.json files in a directory. + */ +export async function listAppMaps(directory: string): Promise { + const appmapConfig = await loadAppMapConfig(join(directory, 'appmap.yml')); + let appmapDir: string | undefined; + if (appmapConfig) { + appmapDir = appmapConfig.appmap_dir ?? 'tmp/appmap'; + } + if (!appmapDir) { + if (verbose()) + log( + `[AppMapIndex] Skipping directory ${directory} because it does not contain an AppMap configuration` + ); + return []; + } + + const appmapFiles = await findFiles(join(directory, appmapDir), '.appmap.json'); + if (verbose()) log(`[appmap-index] Found ${appmapFiles.length} AppMap files in ${directory}`); + const relativeToPath = (file: string) => (isAbsolute(file) ? relative(directory, file) : file); + return appmapFiles.map(relativeToPath); +} + +/** + * Read all content for an AppMap. For efficiency, utilizes the AppMap index files, rather + * than reading the entire AppMap file directly. + */ +export async function readAppMapContent(appmapFile: string): Promise { + const appmapName = appmapFile.replace(/\.appmap\.json$/, ''); + + async function readIndexFile(name: string): Promise { + const indexFile = join(appmapName, [name, '.json'].join('')); + let indexStr: string; + try { + indexStr = await readFile(indexFile, 'utf-8'); + } catch (e) { + if (isNativeError(e) && !isNodeError(e, 'ENOENT')) { + warn(`Error reading metadata file ${indexFile}: ${e.message}`); + } + return undefined; + } + + try { + return JSON.parse(indexStr) as T; + } catch (e) { + const errorMessage = isNativeError(e) ? e.message : String(e); + warn(`Error parsing metadata file ${indexFile}: ${errorMessage}`); + } + } + + const appmapWords = new Array(); + + const metadata = await readIndexFile('metadata'); + if (metadata) { + appmapWords.push(metadata.name); + if (metadata.labels) appmapWords.push(...metadata.labels); + if (metadata.exception) appmapWords.push(metadata.exception.message); + } + + const classMap = (await readIndexFile('classMap')) ?? []; + + const queries = new Array(); + const codeObjects = new Array(); + const routes = new Array(); + const externalRoutes = new Array(); + const types = new Set(); + + const collectClassMapEntry = (cme: ClassMapEntry) => { + if (cme.type === 'query') { + queries.push(cme.name); + types.add('sql'); + types.add('query'); + types.add('database'); + } else if (cme.type === 'route') { + routes.push(cme.name); + types.add('route'); + types.add('request'); + types.add('server'); + types.add('http'); + } else if (cme.type === 'external-route') { + externalRoutes.push(cme.name); + types.add('route'); + types.add('request'); + types.add('client'); + types.add('http'); + } else codeObjects.push(cme.name); + + cme.children?.forEach((child) => { + collectClassMapEntry(child); + }); + }; + classMap.forEach((co) => collectClassMapEntry(co)); + appmapWords.push(...queries, ...codeObjects, ...routes, ...externalRoutes); + + const parameters = (await readIndexFile('canonical.parameters')) ?? []; + appmapWords.push(...parameters); + appmapWords.push(...types); + + return appmapWords.join(' '); +} + +export function trueFilter(): Promise { + return Promise.resolve(true); +} + +/** + * Build an index of all AppMaps in the specified directories. + */ +export async function buildAppMapIndex(fileIndex: FileIndex, directories: string[]): Promise { + return buildFileIndex( + fileIndex, + directories, + listAppMaps, + trueFilter, + readAppMapContent, + fileTokens + ); +} + +export async function search( + index: FileIndex, + search: string, + maxResults: number +): Promise { + const searchMatches = index.search(search, maxResults); + let matches: Match[] = searchMatches.map((match) => { + let appmapId = match.filePath; + if (appmapId.endsWith('.appmap.json')) + appmapId = match.filePath.slice(0, -'.appmap.json'.length); + return { + appmapId, + directory: match.directory, + score: match.score, + }; + }); + + matches = await removeNonExistentMatches(matches); + const numResults = matches.length; + + if (verbose()) log(`[appmap-index] Got ${numResults} AppMap matches for search "${search}"`); + + const scoreStats = scoreMatches(matches); + + matches = await downscoreOutOfDateMatches(scoreStats, matches, maxResults || matches.length); + + if (maxResults && numResults > maxResults) { + if (verbose()) log(`[appmap-index] Limiting to the top ${maxResults} matches`); + matches = matches.slice(0, maxResults); + } + + return reportMatches(matches, scoreStats, numResults); +} diff --git a/packages/cli/src/fulltext/appmap-match.ts b/packages/cli/src/fulltext/appmap-match.ts new file mode 100644 index 0000000000..4dc104f47c --- /dev/null +++ b/packages/cli/src/fulltext/appmap-match.ts @@ -0,0 +1,178 @@ +import UpToDate from '../lib/UpToDate'; +import { exists } from '../utils'; + +import makeDebug from 'debug'; + +const debug = makeDebug('appmap:fulltext:appmap-match'); + +export type SearchResult = { + appmap: string; + directory: string; + score: number; +}; + +export type SearchStats = { + mean: number; + median: number; + stddev: number; + max: number; +}; + +export type SearchResponse = { + type: 'appmap'; + results: SearchResult[]; + stats: SearchStats; + numResults: number; +}; + +export type Match = { + appmapId: string; + directory: string; + score: number; +}; + +enum ScoreStats { + StdDev = 'stddev', + Mean = 'mean', + Median = 'median', + Max = 'max', +} + +enum ScoreFactors { + OutOfDateFactor = ScoreStats.StdDev, + OutOfDateMultipler = 0.5, +} + +export async function removeNonExistentMatches(matches: Match[]): Promise { + const result = new Array(); + for (const match of matches) { + const { appmapId } = match; + const appmapFileName = [appmapId, '.appmap.json'].join(''); + const doesExist = await exists(appmapFileName); + if (doesExist) { + result.push(match); + } else { + debug(`AppMap ${appmapFileName} does not exist, but we got it as a search match.`); + } + } + return result; +} + +export function scoreMatches(matches: Match[]): Map { + const scoreStats = new Map(); + if (!matches.length) return scoreStats; + + const numResults = matches.length; + const maxScore = matches.reduce((acc, match) => Math.max(acc, match.score), 0); + const medianScore = matches[Math.floor(numResults / 2)].score; + const meanScore = matches.reduce((acc, match) => acc + match.score, 0) / numResults; + const stddevScore = Math.sqrt( + matches.reduce((acc, match) => acc + Math.pow(match.score, 2), 0) / numResults + ); + + debug(`Score stats:`); + debug(` Max: ${maxScore}`); + debug(` Median: ${medianScore}`); + debug(` Mean: ${meanScore}`); + debug(` StdDev: ${stddevScore}`); + debug( + `Number which are least 1 stddev above the mean: ${ + matches.filter((match) => match.score > meanScore + stddevScore).length + }` + ); + debug( + `Number which are at least 2 stddev above the mean: ${ + matches.filter((match) => match.score > meanScore + 2 * stddevScore).length + }` + ); + debug( + `Number which are at least 3 stddev above the mean: ${ + matches.filter((match) => match.score > meanScore + 3 * stddevScore).length + }` + ); + + scoreStats.set(ScoreStats.Max, maxScore); + scoreStats.set(ScoreStats.Median, medianScore); + scoreStats.set(ScoreStats.Mean, meanScore); + scoreStats.set(ScoreStats.StdDev, stddevScore); + + return scoreStats; +} + +/** + * Adjusts the scores of AppMap search matches based on their out-of-dateness. + * + * This function iterates over a list of search match results, determining if the matched + * AppMaps are out-of-date. If so, it reduces their score by a calculated "downscore" + * value based on the standard deviation of scores. It only processes until the specified + * maximum number of results is determined. + * + * @param scoreStats - A map containing score statistics (e.g., standard deviation). + * @param matches - An array of search match objects containing details about AppMaps. + * @param maxResults - The maximum number of results that should be considered. + * @returns A promise that resolves to a sorted array of matches with adjusted scores. + */ +export async function downscoreOutOfDateMatches( + scoreStats: Map, + matches: Match[], + maxResults: number +): Promise { + const sortedMatches = new Array(); + let i = 0; + + const finishedIterating = () => i >= matches.length; + const matchBelowThreshold = () => { + if (sortedMatches.length < maxResults) return false; + + const lastSortedMatch = sortedMatches[sortedMatches.length - 1]; + const match = matches[i]; + return match.score < lastSortedMatch.score; + }; + const completed = () => finishedIterating() || matchBelowThreshold(); + + while (!completed()) { + const match = matches[i++]; + const downscore = scoreStats.get(ScoreStats.StdDev)! * ScoreFactors.OutOfDateMultipler; + const { directory, appmapId } = match; + const upToDate = new UpToDate(); + upToDate.baseDir = directory; + const outOfDateDependencies = await upToDate.isOutOfDate(appmapId); + if (outOfDateDependencies) { + debug( + `AppMap ${appmapId} is out of date due to ${[...outOfDateDependencies].join( + ', ' + )}. Downscoring by ${downscore}.` + ); + match.score -= downscore; + } + + sortedMatches.push(match); + sortedMatches.sort((a, b) => b.score - a.score); + } + + return sortedMatches; +} + +export function reportMatches( + matches: Match[], + scoreStats: Map, + numResults: number +): SearchResponse { + const searchResults = matches.map((match) => { + const { directory, appmapId } = match; + return { + appmap: appmapId, + directory, + score: match.score, + }; + }); + return { + type: 'appmap', + results: searchResults, + stats: [...scoreStats.keys()].reduce((acc, key) => { + acc[key] = scoreStats.get(key)!; + return acc; + }, {}) as SearchStats, + numResults, + }; +} diff --git a/packages/cli/src/rpc/explain/EventCollector.ts b/packages/cli/src/rpc/explain/EventCollector.ts index e0f9b4b889..2768e7660b 100644 --- a/packages/cli/src/rpc/explain/EventCollector.ts +++ b/packages/cli/src/rpc/explain/EventCollector.ts @@ -1,13 +1,11 @@ import { isAbsolute, join } from 'path'; import { ContextV2 } from '@appland/navie'; import { SearchRpc } from '@appland/rpc'; -import { - SearchResponse as AppMapSearchResponse, - SearchOptions as AppMapSearchOptions, -} from '../../fulltext/AppMapIndex'; +import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match'; import FindEvents, { SearchResponse as EventSearchResponse, SearchOptions as EventsSearchOptions, + SearchOptions, } from '../../fulltext/FindEvents'; import buildContext from './buildContext'; import { textSearchResultToRpcSearchResult } from './collectContext'; @@ -71,7 +69,7 @@ export default class EventCollector { return index; } - async findEvents(appmap: string, options: AppMapSearchOptions): Promise { + async findEvents(appmap: string, options: SearchOptions): Promise { if (appmap.endsWith('.appmap.json')) appmap = appmap.slice(0, -'.appmap.json'.length); const index = await this.appmapIndex(appmap); diff --git a/packages/cli/src/rpc/explain/SearchContextCollector.ts b/packages/cli/src/rpc/explain/SearchContextCollector.ts index 75f0f32eb3..81afcf0c9f 100644 --- a/packages/cli/src/rpc/explain/SearchContextCollector.ts +++ b/packages/cli/src/rpc/explain/SearchContextCollector.ts @@ -3,18 +3,16 @@ import sqlite3 from 'better-sqlite3'; import { ContextV2, applyContext } from '@appland/navie'; import { SearchRpc } from '@appland/rpc'; -import { FileSearchResult } from '@appland/search'; +import { FileIndex, FileSearchResult } from '@appland/search'; -import AppMapIndex, { - SearchResponse as AppMapSearchResponse, - SearchOptions as AppMapSearchOptions, -} from '../../fulltext/AppMapIndex'; +import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match'; import { DEFAULT_MAX_DIAGRAMS } from '../search/search'; import EventCollector from './EventCollector'; import indexFiles from './index-files'; import indexSnippets from './index-snippets'; import collectSnippets from './collect-snippets'; import buildIndex from './buildIndex'; +import { buildAppMapIndex, search } from '../../fulltext/appmap-index'; export default class SearchContextCollector { public excludePatterns: RegExp[] | undefined; @@ -59,15 +57,27 @@ export default class SearchContextCollector { numResults: this.appmaps.length, }; } else { - // Search across all AppMaps, creating a map from AppMap id to AppMapSearchResult - const searchOptions: AppMapSearchOptions = { - maxResults: DEFAULT_MAX_DIAGRAMS, - }; - appmapSearchResponse = await AppMapIndex.search( - this.appmapDirectories, - this.vectorTerms.join(' '), - searchOptions + const appmapIndex = await buildIndex('appmaps', async (indexFile) => { + const db = new sqlite3(indexFile); + const fileIndex = new FileIndex(db); + await buildAppMapIndex(fileIndex, this.appmapDirectories); + return fileIndex; + }); + const selectedAppMaps = await search( + appmapIndex.index, + this.vectorTerms.join(' OR '), + DEFAULT_MAX_DIAGRAMS ); + appmapIndex.close(); + + appmapSearchResponse = { + results: selectedAppMaps.results, + numResults: selectedAppMaps.results.length, + stats: selectedAppMaps.stats, + type: 'appmap', + }; + + log(`[search-context] Matched ${selectedAppMaps.results.length} AppMaps.`); } const fileIndex = await buildIndex('files', async (indexFile) => { diff --git a/packages/cli/src/rpc/explain/explain.ts b/packages/cli/src/rpc/explain/explain.ts index 9e9a5991cd..be60babab4 100644 --- a/packages/cli/src/rpc/explain/explain.ts +++ b/packages/cli/src/rpc/explain/explain.ts @@ -9,7 +9,7 @@ import { ContextV2, Help, ProjectInfo, UserContext } from '@appland/navie'; import { ExplainRpc } from '@appland/rpc'; import { warn } from 'console'; import EventEmitter from 'events'; -import { basename } from 'path'; +import { basename, join } from 'path'; import { LRUCache } from 'lru-cache'; import detectAIEnvVar from '../../cmds/index/aiEnvVar'; @@ -104,12 +104,7 @@ export class Explain extends EventEmitter { } } - await navie.ask( - this.status.threadId, - this.question, - this.codeSelection, - this.prompt - ); + await navie.ask(this.status.threadId, this.question, this.codeSelection, this.prompt); } async searchContext(data: ContextV2.ContextRequest): Promise { @@ -152,6 +147,17 @@ export class Explain extends EventEmitter { // pruned by the client AI anyway. // The meaning of tokenCount is "try and get at least this many tokens" const charLimit = tokenCount * 3; + + // const appmapDirectories = this.appmapDirectories.map((dir) => { + // const path = dir.directory; + // const appmapDir = dir.appmapConfig?.appmap_dir ?? 'tmp/appmap'; + // if (path.endsWith(appmapDir)) { + // return path; + // } else { + // return join(path, appmapDir); + // } + // }); + const searchResult = await collectContext( this.appmapDirectories.map((dir) => dir.directory), this.projectDirectories, diff --git a/packages/cli/src/rpc/search/search.ts b/packages/cli/src/rpc/search/search.ts index 146dc49ff1..a646f64188 100644 --- a/packages/cli/src/rpc/search/search.ts +++ b/packages/cli/src/rpc/search/search.ts @@ -1,9 +1,15 @@ +import { isAbsolute, join } from 'path'; +import sqlite3 from 'better-sqlite3'; +import { FileIndex } from '@appland/search'; import { SearchRpc } from '@appland/rpc'; + import { RpcHandler } from '../rpc'; -import AppMapIndex, { SearchResponse } from '../../fulltext/AppMapIndex'; +import { SearchResponse } from '../../fulltext/appmap-match'; +import { search as searchAppMaps } from '../../fulltext/appmap-index'; import searchSingleAppMap from '../../cmds/search/searchSingleAppMap'; import configuration, { AppMapDirectory } from '../configuration'; -import { isAbsolute, join } from 'path'; +import buildIndex from '../explain/buildIndex'; +import { buildAppMapIndex } from '../../fulltext/appmap-index'; export const DEFAULT_MAX_DIAGRAMS = 10; export const DEFAULT_MAX_EVENTS_PER_DIAGRAM = 100; @@ -52,14 +58,23 @@ export async function handler( }; } else { // Search across all AppMaps, creating a map from AppMap id to AppMapSearchResult - const searchOptions = { - maxResults: options.maxDiagrams || options.maxResults || DEFAULT_MAX_DIAGRAMS, - }; - appmapSearchResponse = await AppMapIndex.search( - appmapDirectories.map((d) => d.directory), - query, - searchOptions + const maxResults = options.maxDiagrams || options.maxResults || DEFAULT_MAX_DIAGRAMS; + const index = await buildIndex('appmaps', async (indexFile) => { + const db = new sqlite3(indexFile); + const fileIndex = new FileIndex(db); + await buildAppMapIndex( + fileIndex, + appmapDirectories.map((d) => d.directory) + ); + return fileIndex; + }); + + appmapSearchResponse = await searchAppMaps( + index.index, + query.split(/\s+/).join(' OR '), + maxResults ); + index.close(); } // For each AppMap, search for events within the map that match the query. diff --git a/packages/cli/tests/integration/rpc.search.spec.ts b/packages/cli/tests/integration/rpc.search.spec.ts index 986b819d48..1c4a5b8517 100644 --- a/packages/cli/tests/integration/rpc.search.spec.ts +++ b/packages/cli/tests/integration/rpc.search.spec.ts @@ -1,6 +1,6 @@ import { SearchRpc } from '@appland/rpc'; import { join } from 'path'; -import { readFile, writeFile } from 'fs/promises'; +import { readFile } from 'fs/promises'; import { SingleDirectoryRPCTest as RPCTest } from './RPCTest'; import { verbose } from '../../src/utils'; diff --git a/packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts b/packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts new file mode 100644 index 0000000000..96a0ac4a22 --- /dev/null +++ b/packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts @@ -0,0 +1,90 @@ +import { vol } from 'memfs'; +import { readAppMapContent } from '../../../src/fulltext/appmap-index'; +import { Metadata } from '@appland/models'; + +jest.mock('fs/promises', () => require('memfs').promises); + +describe('readAppMapContent', () => { + beforeEach(() => vol.reset()); + afterEach(() => vol.reset()); + + it('reads appmap content from index files', async () => { + const appmapName = '/appmaps/testAppMap'; + const metadata: Metadata = { + name: 'Test AppMap', + labels: ['test', 'appmap'], + exception: { class: 'Exception', message: 'Test exception' }, + client: { name: 'Test client', version: '1.0.0', url: 'http://test.com' }, + recorder: { name: 'Test recorder' }, + }; + const classMap = [ + { + name: 'package1', + type: 'package', + labels: [], + children: [ + { + name: 'class1', + type: 'class', + labels: [], + children: [ + { + name: 'function1', + type: 'function', + labels: [], + children: [], + }, + ], + }, + { name: 'class2', type: 'class', labels: [], children: [] }, + ], + }, + { name: 'query1', type: 'query', labels: [], children: [] }, + { name: 'route1', type: 'route', labels: [], children: [] }, + ]; + + vol.fromJSON({ + [`${appmapName}/metadata.json`]: JSON.stringify(metadata), + [`${appmapName}/classMap.json`]: JSON.stringify(classMap), + [`${appmapName}/canonical.parameters.json`]: JSON.stringify(['param1', 'param2']), + }); + + const content = await readAppMapContent(`${appmapName}.appmap.json`); + expect(content).toContain('Test AppMap'); + expect(content).toContain('test'); + expect(content).toContain('appmap'); + expect(content).toContain('Test exception'); + expect(content).toContain('query1'); + expect(content).toContain('route1'); + expect(content).toContain('function1'); + expect(content).toContain('param1'); + expect(content).toContain('param2'); + expect(content).toContain('route'); + expect(content).toContain('sql'); + expect(content).toContain('database'); + + expect(content.split(' ')).toEqual([ + 'Test', + 'AppMap', + 'test', + 'appmap', + 'Test', + 'exception', + 'query1', + 'package1', + 'class1', + 'function1', + 'class2', + 'route1', + 'param1', + 'param2', + 'sql', + 'query', + 'database', + 'route', + 'request', + 'server', + 'http', + ]); + }); +}); diff --git a/packages/cli/tests/unit/fulltext/AppMapIndex.spec.ts b/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts similarity index 53% rename from packages/cli/tests/unit/fulltext/AppMapIndex.spec.ts rename to packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts index 4074f6fc74..622f1dc703 100644 --- a/packages/cli/tests/unit/fulltext/AppMapIndex.spec.ts +++ b/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts @@ -1,16 +1,16 @@ import * as utils from '../../../src/utils'; -import AppMapIndex from '../../../src/fulltext/AppMapIndex'; -import UpToDate from '../../../src/lib/UpToDate'; -import lunr from 'lunr'; +import UpToDate, { AppMapIndex } from '../../../src/lib/UpToDate'; import { PathLike } from 'fs'; -import { packRef } from '../../../src/fulltext/ref'; import { join } from 'path'; +import { FileIndex, FileSearchResult } from '@appland/search'; +import { search } from '../../../src/fulltext/appmap-index'; +import { SearchStats } from '../../../src/fulltext/appmap-match'; jest.mock('../../../src/utils'); jest.mock('../../../src/lib/UpToDate'); describe('AppMapIndex', () => { - let appMapIndex: AppMapIndex; + let mockAppmapIndex: FileIndex; afterEach(() => jest.resetAllMocks()); @@ -21,19 +21,40 @@ describe('AppMapIndex', () => { describe('when search results are found', () => { beforeEach(() => { - const search = jest.fn().mockReturnValue([ - { ref: packRef('the-dir', 'appmap5'), score: 5 }, - { ref: packRef('the-dir', 'appmap4'), score: 4 }, - { ref: packRef('the-dir', 'appmap3'), score: 3 }, - { ref: packRef('the-dir', 'appmap2'), score: 2 }, - { ref: packRef('the-dir', 'appmap1'), score: 1 }, - ]); + const searchResults: FileSearchResult[] = [ + { + directory: 'the-dir', + filePath: 'appmap5', + score: 5, + }, + { + directory: 'the-dir', + filePath: 'appmap4', + score: 4, + }, + { + directory: 'the-dir', + filePath: 'appmap3', + score: 3, + }, + { + directory: 'the-dir', + filePath: 'appmap2', + score: 2, + }, + { + directory: 'the-dir', + filePath: 'appmap1', + score: 1, + }, + ]; + const search = jest.fn().mockReturnValue(searchResults); const exists = jest.mocked(utils).exists; exists.mockResolvedValue(true); - const mockLunr: lunr.Index = { + + mockAppmapIndex = { search, - } as unknown as lunr.Index; - appMapIndex = new AppMapIndex(['project-dir'], mockLunr); + } as unknown as FileIndex; }); describe('and some are out of date', () => { @@ -47,7 +68,7 @@ describe('AppMapIndex', () => { }); it('downscores the out of date matches', async () => { - const searchResults = await appMapIndex.search('login', {}); + const searchResults = await search(mockAppmapIndex, 'login', 5); expect(searchResults.numResults).toEqual(5); expect(searchResults.results.map((r) => r.appmap)).toEqual([ 'appmap5', @@ -61,7 +82,7 @@ describe('AppMapIndex', () => { }); it('only computes downscore until maxResults is reached', async () => { - const searchResults = await appMapIndex.search('login', { maxResults: 1 }); + const searchResults = await search(mockAppmapIndex, 'login', 1); expect(searchResults.numResults).toEqual(5); expect(searchResults.results.map((r) => r.appmap)).toEqual(['appmap5']); expect(searchResults.results.map((r) => r.score)).toEqual([5]); @@ -69,31 +90,17 @@ describe('AppMapIndex', () => { }); }); - describe('when search results are not found', () => { - it('returns an expected result', async () => { - const index = new AppMapIndex(['project-dir'], { - search: jest.fn().mockReturnValue([]), - } as any); - const searchResults = await index.search('', {}); - expect(searchResults).toStrictEqual({ - type: 'appmap', - results: [], - stats: {}, - numResults: 0, - }); - }); - }); - it(`reports statistics`, async () => { mockUpToDate(); - const searchResults = await appMapIndex.search('login', {}); + const searchResults = await search(mockAppmapIndex, 'login', 10); expect(searchResults.numResults).toEqual(5); expect(searchResults.results.map((r) => r.score)).toEqual([5, 4, 3, 2, 1]); - const stats: any = { ...searchResults.stats }; + const stats: SearchStats = { ...searchResults.stats }; const stddev = stats.stddev; - delete stats.stddev; + // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-explicit-any + delete (stats as any).stddev; expect(stats).toEqual({ max: 5, median: 3, @@ -103,25 +110,50 @@ describe('AppMapIndex', () => { }); }); - describe(`when a search result doesn't exist on disk`, () => { - beforeEach(() => mockUpToDate()); + describe('when search results are not found', () => { + it('returns an expected result', async () => { + mockAppmapIndex = { + search: jest.fn().mockReturnValue([]), + } as unknown as FileIndex; + const searchResults = await search(mockAppmapIndex, 'the search', 10); + expect(searchResults).toStrictEqual({ + type: 'appmap', + results: [], + stats: {}, + numResults: 0, + }); + }); + }); - it(`removes the search result from the reported matches`, async () => { - const existingFileNames = [join('the-dir', 'appmap1.appmap.json')]; - const search = jest.fn().mockReturnValue([ - { ref: packRef('the-dir', 'appmap1'), score: 1 }, - { ref: packRef('the-dir', 'appmap2'), score: 2 }, - ]); + describe(`when a search result doesn't exist on disk`, () => { + beforeEach(() => { + const existingFileNames = [join('appmap1.appmap.json')]; const exists = jest.mocked(utils).exists; exists.mockImplementation(async (appmapFileName: PathLike): Promise => { return Promise.resolve(existingFileNames.includes(appmapFileName.toString())); }); - const mockLunr: lunr.Index = { - search, - } as unknown as lunr.Index; - appMapIndex = new AppMapIndex(['project-dir'], mockLunr); - const searchResults = await appMapIndex.search('login', {}); + const searchResults: FileSearchResult[] = [ + { + directory: 'the-dir', + filePath: 'appmap1', + score: 1, + }, + { + directory: 'the-dir', + filePath: 'appmap2', + score: 2, + }, + ]; + mockAppmapIndex = { + search: jest.fn().mockReturnValue(searchResults), + } as unknown as FileIndex; + }); + + beforeEach(() => mockUpToDate()); + + it(`removes the search result from the reported matches`, async () => { + const searchResults = await search(mockAppmapIndex, 'login', 10); expect(searchResults.numResults).toEqual(1); expect(searchResults.results).toEqual([ { appmap: 'appmap1', directory: 'the-dir', score: 1 }, diff --git a/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts b/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts index 93056ebd0d..ee2a1dcaf6 100644 --- a/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts @@ -5,7 +5,6 @@ import * as navie from '@appland/navie'; import Location from '../../../../src/rpc/explain/location'; jest.mock('@appland/navie'); -jest.mock('../../../../src/fulltext/AppMapIndex'); jest.mock('../../../../src/rpc/explain/SearchContextCollector'); jest.mock('../../../../src/rpc/explain/LocationContextCollector'); diff --git a/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts b/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts index d2765709c1..2f1a34e345 100644 --- a/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts @@ -3,7 +3,7 @@ import { join } from 'path'; import { textSearchResultToRpcSearchResult } from '../../../../src/rpc/explain/collectContext'; import buildContext from '../../../../src/rpc/explain/buildContext'; -import { SearchResponse as AppMapSearchResponse } from '../../../../src/fulltext/AppMapIndex'; +import { SearchResponse as AppMapSearchResponse } from '../../../../src/fulltext/appmap-match'; import FindEvents, { SearchResponse as EventSearchResponse, } from '../../../../src/fulltext/FindEvents'; diff --git a/packages/cli/tests/unit/rpc/explain/collectContext.spec.ts b/packages/cli/tests/unit/rpc/explain/collectContext.spec.ts deleted file mode 100644 index 5120f4d8d2..0000000000 --- a/packages/cli/tests/unit/rpc/explain/collectContext.spec.ts +++ /dev/null @@ -1,124 +0,0 @@ -import { SearchRpc } from '@appland/rpc'; -import { ContextCollector } from '../../../../src/rpc/explain/collectContext'; -import AppMapIndex from '../../../../src/fulltext/AppMapIndex'; -import * as navie from '@appland/navie'; -import EventCollector from '../../../../src/rpc/explain/EventCollector'; - -jest.mock('../../../../src/fulltext/AppMapIndex'); -jest.mock('@appland/navie'); - -describe('collectContext', () => { - const vectorTerms = ['login', 'user']; - const charLimit = 5000; - let contextCollector: ContextCollector; - - beforeEach(() => { - jest.mocked(navie.applyContext).mockImplementation((context) => context); - }); - afterEach(() => jest.restoreAllMocks()); - - describe('appmaps', () => { - beforeEach(() => { - contextCollector = new ContextCollector(['a', 'b'], [], vectorTerms, charLimit); - }); - - it('returns context for specified appmaps', async () => { - const mockAppmaps = ['appmap1', 'appmap2']; - contextCollector.appmaps = mockAppmaps; - - const mockContext: navie.ContextV2.ContextResponse = [ - { - type: navie.ContextV2.ContextItemType.SequenceDiagram, - content: 'diagram1', - }, - { - type: navie.ContextV2.ContextItemType.SequenceDiagram, - content: 'diagram2', - }, - ]; - - AppMapIndex.search = jest.fn().mockRejectedValue(new Error('Unexpected call to search')); - - EventCollector.prototype.collectEvents = jest.fn().mockResolvedValue({ - results: [], - context: mockContext, - contextSize: 4545, - }); - - const collectedContext = await contextCollector.collectContext(); - - expect(collectedContext.searchResponse.numResults).toBe(mockAppmaps.length); - expect(collectedContext.context).toEqual(mockContext); - }); - - it('handles search across all appmaps', async () => { - const mockSearchResponse: SearchRpc.SearchResponse = { - numResults: 10, - results: [ - { - appmap: 'appmap1', - directory: 'a', - score: 1, - events: [{ fqid: 'function:1', score: 1, eventIds: [1, 2] }], - }, - { - appmap: 'appmap2', - directory: 'a', - score: 1, - events: [{ fqid: 'function:2', score: 1, eventIds: [3, 4] }], - }, - { - appmap: 'appmap3', - directory: 'b', - score: 1, - events: [{ fqid: 'function:3', score: 1, eventIds: [5, 6] }], - }, - ], - }; - - AppMapIndex.search = jest.fn().mockResolvedValue(mockSearchResponse); - - const mockContext: navie.ContextV2.ContextResponse = [ - { - type: navie.ContextV2.ContextItemType.SequenceDiagram, - content: 'diagram1', - }, - ]; - - EventCollector.prototype.collectEvents = jest.fn().mockResolvedValue({ - results: [], - context: mockContext, - contextSize: 3000, - }); - - const collectedContext = await contextCollector.collectContext(); - - expect(AppMapIndex.search).toHaveBeenCalledWith(['a', 'b'], vectorTerms.join(' '), { - maxResults: expect.any(Number), - }); - expect(collectedContext.searchResponse.numResults).toBe(10); - expect(collectedContext.context).toEqual(mockContext); - }); - }); - - describe('with empty vector terms', () => { - it('returns an empty context', async () => { - const emptyVectorTerms = ['', ' ']; - - const contextCollector = new ContextCollector( - ['example'], - ['src'], - emptyVectorTerms, - charLimit - ); - const result = await contextCollector.collectContext(); - expect(result).toStrictEqual({ - searchResponse: { - results: [], - numResults: 0, - }, - context: [], - }); - }); - }); -}); diff --git a/packages/search/src/build-file-index.ts b/packages/search/src/build-file-index.ts index b17f993160..935f7d606b 100644 --- a/packages/search/src/build-file-index.ts +++ b/packages/search/src/build-file-index.ts @@ -1,9 +1,10 @@ import makeDebug from 'debug'; -import { join } from 'path'; +import { isAbsolute, join } from 'path'; import FileIndex from './file-index'; import { ContentReader } from './ioutil'; import { warn } from 'console'; +import { isNativeError } from 'util/types'; export type ListFn = (path: string) => Promise; @@ -41,14 +42,19 @@ async function indexDirectory(context: Context, directory: string) { if (!dirContents) return; for (const dirContentItem of dirContents) { - const filePath = join(directory, dirContentItem); + let filePath: string; + if (isAbsolute(dirContentItem)) filePath = dirContentItem; + else filePath = join(directory, dirContentItem); + debug('Indexing: %s', filePath); if (await context.fileFilter(filePath)) { - indexFile(context, filePath).catch((e) => { - warn(`Error indexing file: ${filePath}`); - warn(e); - }); + try { + await indexFile(context, filePath); + } catch (e) { + const message = isNativeError(e) ? e.message : String(e); + warn(`Error indexing file ${filePath}: ${message}`); + } } } } From 8d1c7e7d8f3c86ccce6e2c52ae322adf9b6d9d7a Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Fri, 22 Nov 2024 14:45:47 -0500 Subject: [PATCH 09/12] fix: Pass absolute path when loading file content --- packages/search/src/build-snippet-index.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/search/src/build-snippet-index.ts b/packages/search/src/build-snippet-index.ts index e4dd2f22f7..4c8874301c 100644 --- a/packages/search/src/build-snippet-index.ts +++ b/packages/search/src/build-snippet-index.ts @@ -1,3 +1,4 @@ +import { isAbsolute, join } from 'path'; import { Tokenizer } from './build-file-index'; import { ContentReader } from './ioutil'; import SnippetIndex from './snippet-index'; @@ -16,14 +17,16 @@ type Context = { }; async function indexFile(context: Context, file: File) { - const fileContent = await context.contentReader(file.filePath); + const filePath = isAbsolute(file.filePath) ? file.filePath : join(file.directory, file.filePath); + + const fileContent = await context.contentReader(filePath); if (!fileContent) return; const extension = file.filePath.split('.').pop() || ''; const chunks = await context.splitter(fileContent, extension); chunks.forEach((chunk, index) => { - const snippetId = `${file.filePath}:${index}`; + const snippetId = `${filePath}:${index}`; const { content, startLine, endLine } = chunk; context.snippetIndex.indexSnippet( snippetId, From 6272ab53bbf63dd750c196f5718067c76628f763 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Tue, 12 Nov 2024 17:30:37 -0500 Subject: [PATCH 10/12] feat (wip): Search for snippets --- .../cli/src/cmds/search/searchSingleAppMap.ts | 1 + packages/cli/src/fulltext/appmap-index.ts | 51 +++++---- .../cli/src/rpc/explain/EventCollector.ts | 4 +- .../src/rpc/explain/SearchContextCollector.ts | 108 ++++++++++++------ .../cli/src/rpc/explain/appmap-location.ts | 8 ++ .../src/rpc/explain/build-sequence-diagram.ts | 31 +++++ packages/cli/src/rpc/explain/buildContext.ts | 42 ++----- .../cli/src/rpc/explain/collect-snippets.ts | 22 ---- .../cli/src/rpc/explain/collectContext.ts | 2 +- packages/cli/src/rpc/explain/index-events.ts | 80 +++++++++++++ packages/cli/src/rpc/search/search.ts | 2 +- packages/navie/src/index.ts | 2 + packages/search/src/build-snippet-index.ts | 15 +-- packages/search/src/cli.ts | 13 ++- packages/search/src/index.ts | 2 +- packages/search/src/snippet-index.ts | 64 +++++------ packages/search/test/snippet-index.spec.ts | 41 ++++--- 17 files changed, 302 insertions(+), 186 deletions(-) create mode 100644 packages/cli/src/rpc/explain/appmap-location.ts create mode 100644 packages/cli/src/rpc/explain/build-sequence-diagram.ts delete mode 100644 packages/cli/src/rpc/explain/collect-snippets.ts create mode 100644 packages/cli/src/rpc/explain/index-events.ts diff --git a/packages/cli/src/cmds/search/searchSingleAppMap.ts b/packages/cli/src/cmds/search/searchSingleAppMap.ts index 21358a5a25..bdf10832f6 100644 --- a/packages/cli/src/cmds/search/searchSingleAppMap.ts +++ b/packages/cli/src/cmds/search/searchSingleAppMap.ts @@ -12,6 +12,7 @@ export default async function searchSingleAppMap( query: string, options: SearchOptions = {} ): Promise { + // eslint-disable-next-line no-param-reassign if (appmap.endsWith('.appmap.json')) appmap = appmap.slice(0, -'.appmap.json'.length); const findEvents = new FindEvents(appmap); diff --git a/packages/cli/src/fulltext/appmap-index.ts b/packages/cli/src/fulltext/appmap-index.ts index 77d20ae20f..1e4d0318ff 100644 --- a/packages/cli/src/fulltext/appmap-index.ts +++ b/packages/cli/src/fulltext/appmap-index.ts @@ -16,7 +16,7 @@ import { } from './appmap-match'; import loadAppMapConfig from '../lib/loadAppMapConfig'; -type ClassMapEntry = { +export type ClassMapEntry = { name: string; type: string; labels: string[]; @@ -48,6 +48,29 @@ export async function listAppMaps(directory: string): Promise { return appmapFiles.map(relativeToPath); } +export async function readIndexFile( + appmapName: string, + indexName: string +): Promise { + const indexFile = join(appmapName, [indexName, '.json'].join('')); + let indexStr: string; + try { + indexStr = await readFile(indexFile, 'utf-8'); + } catch (e) { + if (isNativeError(e) && !isNodeError(e, 'ENOENT')) { + warn(`Error reading metadata file ${indexFile}: ${e.message}`); + } + return undefined; + } + + try { + return JSON.parse(indexStr) as T; + } catch (e) { + const errorMessage = isNativeError(e) ? e.message : String(e); + warn(`Error parsing metadata file ${indexFile}: ${errorMessage}`); + } +} + /** * Read all content for an AppMap. For efficiency, utilizes the AppMap index files, rather * than reading the entire AppMap file directly. @@ -55,36 +78,16 @@ export async function listAppMaps(directory: string): Promise { export async function readAppMapContent(appmapFile: string): Promise { const appmapName = appmapFile.replace(/\.appmap\.json$/, ''); - async function readIndexFile(name: string): Promise { - const indexFile = join(appmapName, [name, '.json'].join('')); - let indexStr: string; - try { - indexStr = await readFile(indexFile, 'utf-8'); - } catch (e) { - if (isNativeError(e) && !isNodeError(e, 'ENOENT')) { - warn(`Error reading metadata file ${indexFile}: ${e.message}`); - } - return undefined; - } - - try { - return JSON.parse(indexStr) as T; - } catch (e) { - const errorMessage = isNativeError(e) ? e.message : String(e); - warn(`Error parsing metadata file ${indexFile}: ${errorMessage}`); - } - } - const appmapWords = new Array(); - const metadata = await readIndexFile('metadata'); + const metadata = await readIndexFile(appmapName, 'metadata'); if (metadata) { appmapWords.push(metadata.name); if (metadata.labels) appmapWords.push(...metadata.labels); if (metadata.exception) appmapWords.push(metadata.exception.message); } - const classMap = (await readIndexFile('classMap')) ?? []; + const classMap = (await readIndexFile(appmapName, 'classMap')) ?? []; const queries = new Array(); const codeObjects = new Array(); @@ -119,7 +122,7 @@ export async function readAppMapContent(appmapFile: string): Promise { classMap.forEach((co) => collectClassMapEntry(co)); appmapWords.push(...queries, ...codeObjects, ...routes, ...externalRoutes); - const parameters = (await readIndexFile('canonical.parameters')) ?? []; + const parameters = (await readIndexFile(appmapName, 'canonical.parameters')) ?? []; appmapWords.push(...parameters); appmapWords.push(...types); diff --git a/packages/cli/src/rpc/explain/EventCollector.ts b/packages/cli/src/rpc/explain/EventCollector.ts index 2768e7660b..b877f81b6e 100644 --- a/packages/cli/src/rpc/explain/EventCollector.ts +++ b/packages/cli/src/rpc/explain/EventCollector.ts @@ -59,7 +59,7 @@ export default class EventCollector { return { results, context, contextSize }; } - async appmapIndex(appmap: string): Promise { + protected async appmapIndex(appmap: string): Promise { let index = this.appmapIndexes.get(appmap); if (!index) { index = new FindEvents(appmap); @@ -69,7 +69,7 @@ export default class EventCollector { return index; } - async findEvents(appmap: string, options: SearchOptions): Promise { + protected async findEvents(appmap: string, options: SearchOptions): Promise { if (appmap.endsWith('.appmap.json')) appmap = appmap.slice(0, -'.appmap.json'.length); const index = await this.appmapIndex(appmap); diff --git a/packages/cli/src/rpc/explain/SearchContextCollector.ts b/packages/cli/src/rpc/explain/SearchContextCollector.ts index 81afcf0c9f..b0958a3c42 100644 --- a/packages/cli/src/rpc/explain/SearchContextCollector.ts +++ b/packages/cli/src/rpc/explain/SearchContextCollector.ts @@ -1,18 +1,23 @@ -import { log } from 'console'; +import { log, warn } from 'console'; import sqlite3 from 'better-sqlite3'; import { ContextV2, applyContext } from '@appland/navie'; import { SearchRpc } from '@appland/rpc'; -import { FileIndex, FileSearchResult } from '@appland/search'; +import { FileIndex, FileSearchResult, SnippetSearchResult } from '@appland/search'; import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match'; import { DEFAULT_MAX_DIAGRAMS } from '../search/search'; -import EventCollector from './EventCollector'; import indexFiles from './index-files'; import indexSnippets from './index-snippets'; -import collectSnippets from './collect-snippets'; import buildIndex from './buildIndex'; import { buildAppMapIndex, search } from '../../fulltext/appmap-index'; +import indexEvents from './index-events'; + +type ContextCandidate = { + results: SearchRpc.SearchResult[]; + context: ContextV2.ContextResponse; + contextSize: number; +}; export default class SearchContextCollector { public excludePatterns: RegExp[] | undefined; @@ -98,41 +103,80 @@ export default class SearchContextCollector { const snippetIndex = await buildIndex('snippets', async (indexFile) => { const db = new sqlite3(indexFile); - return await indexSnippets(db, fileSearchResults); + const snippetIndex = await indexSnippets(db, fileSearchResults); + await indexEvents(snippetIndex, appmapSearchResponse.results); + return snippetIndex; }); - let contextCandidate: { - results: SearchRpc.SearchResult[]; - context: ContextV2.ContextResponse; - contextSize: number; - }; + let contextCandidate: ContextCandidate; try { - const eventsCollector = new EventCollector(this.vectorTerms.join(' '), appmapSearchResponse); - let charCount = 0; - let maxEventsPerDiagram = 5; + let maxSnippets = 50; log(`[search-context] Requested char limit: ${this.charLimit}`); for (;;) { - log(`[search-context] Collecting context with ${maxEventsPerDiagram} events per diagram.`); - - contextCandidate = await eventsCollector.collectEvents( - maxEventsPerDiagram, - this.excludePatterns, - this.includePatterns, - this.includeTypes + log(`[search-context] Collecting context with ${maxSnippets} events per diagram.`); + + // Collect all code objects from AppMaps and use them to build the sequence diagram + // const codeSnippets = new Array(); + // TODO: Apply this.includeTypes + + const snippetContextItem = ( + snippet: SnippetSearchResult + ): ContextV2.ContextItem | ContextV2.FileContextItem | undefined => { + const { snippetId, directory, score, content } = snippet; + + const { type: snippetIdType, id: snippetIdValue } = snippetId; + + let location: string | undefined; + if (snippetIdType === 'code-snippet') location = snippetIdValue; + + switch (snippetId.type) { + case 'query': + case 'route': + case 'external-route': + return { + type: ContextV2.ContextItemType.DataRequest, + content, + directory, + score, + }; + case 'code-snippet': + return { + type: ContextV2.ContextItemType.CodeSnippet, + content, + directory, + score, + location, + }; + default: + warn(`[search-context] Unknown snippet type: ${snippetId.type}`); + + // TODO: Collect all matching events, then build a sequence diagram + // case 'event': + // return await buildSequenceDiagram(snippet); + // default: + // codeSnippets.push(snippet); + } + }; + + const snippetSearchResults = snippetIndex.index.searchSnippets( + this.vectorTerms.join(' OR '), + maxSnippets ); + const context: ContextV2.ContextItem[] = []; + for (const result of snippetSearchResults) { + const contextItem = snippetContextItem(result); + if (contextItem) context.push(contextItem); + } - const codeSnippetCount = contextCandidate.context.filter( - (item) => item.type === ContextV2.ContextItemType.CodeSnippet - ).length; + // TODO: Build sequence diagrams - const charLimit = codeSnippetCount === 0 ? this.charLimit : this.charLimit / 4; - const sourceContext = collectSnippets( - snippetIndex.index, - this.vectorTerms.join(' OR '), - charLimit - ); - contextCandidate.context = contextCandidate.context.concat(sourceContext); + contextCandidate = { + // TODO: Fixme remove hard coded cast + results: appmapSearchResponse.results as SearchRpc.SearchResult[], + context, + contextSize: snippetSearchResults.reduce((acc, result) => acc + result.content.length, 0), + }; const appliedContext = applyContext(contextCandidate.context, this.charLimit); const appliedContextSize = appliedContext.reduce( @@ -147,8 +191,8 @@ export default class SearchContextCollector { break; } charCount = appliedContextSize; - maxEventsPerDiagram = Math.ceil(maxEventsPerDiagram * 1.5); - log(`[search-context] Increasing max events per diagram to ${maxEventsPerDiagram}.`); + maxSnippets = Math.ceil(maxSnippets * 1.5); + log(`[search-context] Increasing max events per diagram to ${maxSnippets}.`); } } finally { snippetIndex.close(); diff --git a/packages/cli/src/rpc/explain/appmap-location.ts b/packages/cli/src/rpc/explain/appmap-location.ts new file mode 100644 index 0000000000..b21f30c8bb --- /dev/null +++ b/packages/cli/src/rpc/explain/appmap-location.ts @@ -0,0 +1,8 @@ +import { SearchRpc } from "@appland/rpc"; + +export default function appmapLocation(appmap: string, event?: SearchRpc.EventMatch): string { + const appmapFile = [appmap, 'appmap.json'].join('.'); + const tokens = [appmapFile]; + if (event?.eventIds.length) tokens.push(String(event.eventIds[0])); + return tokens.join(':'); +} diff --git a/packages/cli/src/rpc/explain/build-sequence-diagram.ts b/packages/cli/src/rpc/explain/build-sequence-diagram.ts new file mode 100644 index 0000000000..95225e5cc9 --- /dev/null +++ b/packages/cli/src/rpc/explain/build-sequence-diagram.ts @@ -0,0 +1,31 @@ +import { AppMapFilter, serializeFilter } from '@appland/models'; +import { SearchRpc } from '@appland/rpc'; +import assert from 'assert'; + +import { handler as sequenceDiagramHandler } from '../appmap/sequenceDiagram'; +import { ContextV2 } from '@appland/navie'; +import appmapLocation from './appmap-location'; + +export default async function buildSequenceDiagram( + result: SearchRpc.SearchResult +): Promise { + const codeObjects = result.events.map((event) => event.fqid); + const appmapFilter = new AppMapFilter(); + appmapFilter.declutter.context.on = true; + appmapFilter.declutter.context.names = codeObjects; + const filterState = serializeFilter(appmapFilter); + + const plantUML = await sequenceDiagramHandler(result.appmap, { + filter: filterState, + format: 'plantuml', + formatOptions: { disableMarkup: true }, + }); + assert(typeof plantUML === 'string'); + return { + directory: result.directory, + location: appmapLocation(result.appmap), + type: ContextV2.ContextItemType.SequenceDiagram, + content: plantUML, + score: result.score, + }; +} diff --git a/packages/cli/src/rpc/explain/buildContext.ts b/packages/cli/src/rpc/explain/buildContext.ts index e8bcfe89c1..1a06dc4b4b 100644 --- a/packages/cli/src/rpc/explain/buildContext.ts +++ b/packages/cli/src/rpc/explain/buildContext.ts @@ -1,12 +1,11 @@ import { SearchRpc } from '@appland/rpc'; -import { AppMapFilter, serializeFilter } from '@appland/models'; -import assert from 'assert'; - -import { handler as sequenceDiagramHandler } from '../appmap/sequenceDiagram'; -import lookupSourceCode from './lookupSourceCode'; import { warn } from 'console'; import { ContextV2 } from '@appland/navie'; +import lookupSourceCode from './lookupSourceCode'; +import buildSequenceDiagram from './build-sequence-diagram'; +import appmapLocation from './appmap-location'; + /** * Processes search results to build sequence diagrams, code snippets, and code object sets. This is the format * expected by the Navie AI. @@ -33,39 +32,10 @@ export default async function buildContext( const codeSnippetLocations = new Set(); const dataRequestContent = new Set(); - const appmapLocation = (appmap: string, event?: SearchRpc.EventMatch) => { - const appmapFile = [appmap, 'appmap.json'].join('.'); - const tokens = [appmapFile]; - if (event?.eventIds.length) tokens.push(String(event.eventIds[0])); - return tokens.join(':'); - }; - - const buildSequenceDiagram = async (result: SearchRpc.SearchResult) => { - const codeObjects = result.events.map((event) => event.fqid); - const appmapFilter = new AppMapFilter(); - appmapFilter.declutter.context.on = true; - appmapFilter.declutter.context.names = codeObjects; - const filterState = serializeFilter(appmapFilter); - - const plantUML = await sequenceDiagramHandler(result.appmap, { - filter: filterState, - format: 'plantuml', - formatOptions: { disableMarkup: true }, - }); - assert(typeof plantUML === 'string'); - sequenceDiagrams.push({ - directory: result.directory, - location: appmapLocation(result.appmap), - type: ContextV2.ContextItemType.SequenceDiagram, - content: plantUML, - score: result.score, - }); - }; - const examinedLocations = new Set(); for (const result of searchResults) { try { - await buildSequenceDiagram(result); + sequenceDiagrams.push(await buildSequenceDiagram(result)); } catch (e) { warn(`Failed to build sequence diagram for ${result.appmap}`); warn(e); @@ -93,6 +63,8 @@ export default async function buildContext( codeSnippetLocations.add(event.location); + // TODO: Snippets from appmap events will no longer be needed, because the snippets come + // from the search results in the index (boosted by AppMap references). const snippets = await lookupSourceCode(result.directory, event.location); if (snippets) { codeSnippets.push({ diff --git a/packages/cli/src/rpc/explain/collect-snippets.ts b/packages/cli/src/rpc/explain/collect-snippets.ts deleted file mode 100644 index 502894e9c1..0000000000 --- a/packages/cli/src/rpc/explain/collect-snippets.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { ContextV2 } from '@appland/navie'; -import { SnippetIndex, SnippetSearchResult } from '@appland/search'; -import { CHARS_PER_SNIPPET } from './collectContext'; - -export default function collectSnippets( - snippetIndex: SnippetIndex, - query: string, - charLimit: number -): ContextV2.ContextResponse { - const snippets = snippetIndex.searchSnippets(query, Math.round(charLimit / CHARS_PER_SNIPPET)); - - const buildLocation = (result: SnippetSearchResult) => { - return `${result.filePath}:${result.startLine}-${result.endLine}`; - }; - - return snippets.map((snippet) => ({ - directory: snippet.directory, - type: ContextV2.ContextItemType.CodeSnippet, - content: snippet.content, - location: buildLocation(snippet), - })); -} diff --git a/packages/cli/src/rpc/explain/collectContext.ts b/packages/cli/src/rpc/explain/collectContext.ts index 1e3849dac3..c09eed9588 100644 --- a/packages/cli/src/rpc/explain/collectContext.ts +++ b/packages/cli/src/rpc/explain/collectContext.ts @@ -109,7 +109,7 @@ export default async function collectContext( }> { const keywords = searchTerms.map((term) => queryKeywords(term)).flat(); - // recent?: boolean; + // recent?: boolean; // locations?: string[]; // itemTypes?: ContextItemType[]; // labels?: ContextLabel[]; diff --git a/packages/cli/src/rpc/explain/index-events.ts b/packages/cli/src/rpc/explain/index-events.ts new file mode 100644 index 0000000000..16c9042840 --- /dev/null +++ b/packages/cli/src/rpc/explain/index-events.ts @@ -0,0 +1,80 @@ +import { SnippetId, SnippetIndex } from '@appland/search'; +import { warn } from 'console'; +import crypto from 'crypto'; + +import { SearchResult } from '../../fulltext/appmap-match'; +import { ClassMapEntry, readIndexFile } from '../../fulltext/appmap-index'; + +function hexDigest(input: string): string { + const hash = crypto.createHash('sha256'); + hash.update(input); + return hash.digest('hex'); +} + +async function indexAppMapEvents( + snippetIndex: SnippetIndex, + directory: string, + appmapFile: string +): Promise { + const appmapName = appmapFile.endsWith('.appmap.json') + ? appmapFile.slice(0, -'.appmap.json'.length) + : appmapFile; + const classMap = await readIndexFile(appmapName, 'classMap'); + if (!classMap) { + warn(`[index-events] No class map found for ${appmapName}`); + return; + } + + const indexCodeObject = (type: string, id: string, content: string, ...tags: string[]) => { + const words = [content, ...tags].join(' '); + + const snippetId: SnippetId = { + type, + id, + }; + + // TODO: Include event id in the snippet id + snippetIndex.indexSnippet(snippetId, directory, '', words, content); + }; + + const boostCodeObject = (location: string) => { + const snippetId: SnippetId = { + type: 'code-snippet', + id: location, + }; + snippetIndex.boostSnippet(snippetId, 2); + }; + + const indexClassMapEntry = (cme: ClassMapEntry) => { + let id: string | undefined; + let tags: string[] = []; + if (cme.type === 'query') { + id = hexDigest(cme.name); + tags = ['sql', 'query', 'database']; + } else if (cme.type === 'route') { + id = cme.name; + tags = ['route', 'request', 'server', 'http']; + } else if (cme.type === 'external-route') { + id = cme.name; + tags = ['route', 'request', 'client', 'http']; + } + + if (id) indexCodeObject(cme.type, id, cme.name, ...tags); + + if (cme.sourceLocation) boostCodeObject(cme.sourceLocation); + + cme.children?.forEach((child) => { + indexClassMapEntry(child); + }); + }; + classMap.forEach((co) => indexClassMapEntry(co)); +} + +export default async function indexEvents( + snippetIndex: SnippetIndex, + appmapSearchResults: SearchResult[] +): Promise { + for (const { directory, appmap } of appmapSearchResults) { + await indexAppMapEvents(snippetIndex, directory, appmap); + } +} diff --git a/packages/cli/src/rpc/search/search.ts b/packages/cli/src/rpc/search/search.ts index a646f64188..ba4e67644a 100644 --- a/packages/cli/src/rpc/search/search.ts +++ b/packages/cli/src/rpc/search/search.ts @@ -11,7 +11,7 @@ import configuration, { AppMapDirectory } from '../configuration'; import buildIndex from '../explain/buildIndex'; import { buildAppMapIndex } from '../../fulltext/appmap-index'; -export const DEFAULT_MAX_DIAGRAMS = 10; +export const DEFAULT_MAX_DIAGRAMS = 3; export const DEFAULT_MAX_EVENTS_PER_DIAGRAM = 100; export const DEFAULT_MAX_FILES = 10; diff --git a/packages/navie/src/index.ts b/packages/navie/src/index.ts index 8c10fc1a02..61ec67dbee 100644 --- a/packages/navie/src/index.ts +++ b/packages/navie/src/index.ts @@ -1,9 +1,11 @@ /* eslint-disable import/prefer-default-export */ export { default as applyContext } from './lib/apply-context'; +export { default as parseJSON } from './lib/parse-json'; export { default as extractFileChanges } from './lib/extract-file-changes'; export { default as Message } from './message'; export { default as InteractionState } from './interaction-state'; export { default as navie } from './navie'; +export { default as trimFences } from './lib/trim-fences'; export { AgentMode as Agents } from './agent'; export { ContextV1, ContextV2 } from './context'; export { UserContext } from './user-context'; diff --git a/packages/search/src/build-snippet-index.ts b/packages/search/src/build-snippet-index.ts index 4c8874301c..1887c8554e 100644 --- a/packages/search/src/build-snippet-index.ts +++ b/packages/search/src/build-snippet-index.ts @@ -1,7 +1,7 @@ import { isAbsolute, join } from 'path'; import { Tokenizer } from './build-file-index'; import { ContentReader } from './ioutil'; -import SnippetIndex from './snippet-index'; +import SnippetIndex, { SnippetId } from './snippet-index'; import { Splitter } from './splitter'; export type File = { @@ -25,15 +25,16 @@ async function indexFile(context: Context, file: File) { const extension = file.filePath.split('.').pop() || ''; const chunks = await context.splitter(fileContent, extension); - chunks.forEach((chunk, index) => { - const snippetId = `${filePath}:${index}`; - const { content, startLine, endLine } = chunk; + chunks.forEach((chunk) => { + const { content, startLine } = chunk; + const id = [filePath, startLine].filter(Boolean).join(':'); + const snippetId: SnippetId = { + type: 'code-snippet', + id, + }; context.snippetIndex.indexSnippet( snippetId, file.directory, - file.filePath, - startLine, - endLine, context.tokenizer(content, file.filePath).symbols.join(' '), context.tokenizer(content, file.filePath).words.join(' '), content diff --git a/packages/search/src/cli.ts b/packages/search/src/cli.ts index 515ba712ad..e5d933dd74 100644 --- a/packages/search/src/cli.ts +++ b/packages/search/src/cli.ts @@ -80,15 +80,15 @@ const cli = yargs(hideBin(process.argv)) return `.../${parts.slice(-3).join('/')}`; }; - const printResult = (filePath: string, score: number) => - console.log('%s %s', filePathAtMostThreeEntries(filePath), score.toPrecision(3)); + const printResult = (type: string, id: string, score: number) => + console.log('%s %s %s', type, filePathAtMostThreeEntries(id), score.toPrecision(3)); console.log('File search results'); console.log('-------------------'); const fileSearchResults = fileIndex.search(query as string); for (const result of fileSearchResults) { const { filePath, score } = result; - printResult(filePath, score); + printResult('file', filePath, score); } const splitter = langchainSplitter; @@ -104,8 +104,11 @@ const cli = yargs(hideBin(process.argv)) const snippetSearchResults = snippetIndex.searchSnippets(query as string); for (const result of snippetSearchResults) { - const { snippetId, filePath, startLine, endLine, score } = result; - printResult(snippetId, score); + const { snippetId, score } = result; + printResult(snippetId.type, snippetId.id, score); + + const [filePath, range] = snippetId.id.split(':'); + const [startLine, endLine] = range.split('-').map((n) => parseInt(n, 10)); if (isNullOrUndefined(startLine) || isNullOrUndefined(endLine)) continue; diff --git a/packages/search/src/index.ts b/packages/search/src/index.ts index 8f74cd8836..7fcab03761 100644 --- a/packages/search/src/index.ts +++ b/packages/search/src/index.ts @@ -2,7 +2,7 @@ export { ContentReader, readFileSafe } from './ioutil'; export { Splitter, langchainSplitter } from './splitter'; export { ListFn, FilterFn, Tokenizer, default as buildFileIndex } from './build-file-index'; export { File, default as buildSnippetIndex } from './build-snippet-index'; -export { default as SnippetIndex, SnippetSearchResult } from './snippet-index'; +export { default as SnippetIndex, SnippetSearchResult, SnippetId } from './snippet-index'; export { default as FileIndex, FileSearchResult } from './file-index'; export { default as listProjectFiles } from './project-files'; export { isBinaryFile, isDataFile, isLargeFile } from './file-type'; diff --git a/packages/search/src/snippet-index.ts b/packages/search/src/snippet-index.ts index d30672cce1..83e8ac50b3 100644 --- a/packages/search/src/snippet-index.ts +++ b/packages/search/src/snippet-index.ts @@ -1,11 +1,9 @@ +import assert from 'assert'; import sqlite3 from 'better-sqlite3'; const CREATE_SNIPPET_CONTENT_TABLE_SQL = `CREATE VIRTUAL TABLE snippet_content USING fts5( snippet_id UNINDEXED, directory UNINDEXED, - file_path, - start_line UNINDEXED, - end_line UNINDEXED, file_symbols, file_words, content UNINDEXED, @@ -18,8 +16,8 @@ const CREATE_SNIPPET_BOOST_TABLE_SQL = `CREATE TABLE snippet_boost ( )`; const INSERT_SNIPPET_SQL = `INSERT INTO snippet_content -(snippet_id, directory, file_path, start_line, end_line, file_symbols, file_words, content) -VALUES (?, ?, ?, ?, ?, ?, ?, ?)`; +(snippet_id, directory, file_symbols, file_words, content) +VALUES (?, ?, ?, ?, ?)`; const UPDATE_SNIPPET_BOOST_SQL = `INSERT OR REPLACE INTO snippet_boost (snippet_id, boost_factor) @@ -27,9 +25,6 @@ VALUES (?, ?)`; const SEARCH_SNIPPET_SQL = `SELECT snippet_content.directory, - snippet_content.file_path, - snippet_content.start_line, - snippet_content.end_line, snippet_content.snippet_id, snippet_content.content, (bm25(snippet_content, 1)*3.0 + bm25(snippet_content, 2)*2.0 + bm25(snippet_content, 3)*1.0) @@ -47,12 +42,29 @@ ORDER BY score DESC LIMIT ?`; +export type SnippetId = { + type: string; + id: string; +}; + +export function encodeSnippetId(snippetId: SnippetId): string { + return [snippetId.type, snippetId.id].join(':'); +} + +export function parseSnippetId(snippetId: string): SnippetId { + const parts = snippetId.split(':'); + const type = parts.shift(); + assert(type); + const id = parts.join(':'); + return { + type, + id, + }; +} + export type SnippetSearchResult = { - snippetId: string; + snippetId: SnippetId; directory: string; - filePath: string; - startLine: number | undefined; - endLine: number | undefined; score: number; content: string; }; @@ -60,9 +72,6 @@ export type SnippetSearchResult = { type SnippetSearchRow = { snippet_id: string; directory: string; - file_path: string; - start_line: number | undefined; - end_line: number | undefined; score: number; content: string; }; @@ -83,39 +92,24 @@ export default class SnippetIndex { } indexSnippet( - snippetId: string, + snippetId: SnippetId, directory: string, - filePath: string, - startLine: number | undefined, - endLine: number | undefined, symbols: string, words: string, content: string ): void { - this.#insertSnippet.run( - snippetId, - directory, - filePath, - startLine, - endLine, - symbols, - words, - content - ); + this.#insertSnippet.run(encodeSnippetId(snippetId), directory, symbols, words, content); } - boostSnippet(snippetId: string, boostFactor: number): void { - this.#updateSnippetBoost.run(snippetId, boostFactor); + boostSnippet(snippetId: SnippetId, boostFactor: number): void { + this.#updateSnippetBoost.run(encodeSnippetId(snippetId), boostFactor); } searchSnippets(query: string, limit = 10): SnippetSearchResult[] { const rows = this.#searchSnippet.all(query, limit) as SnippetSearchRow[]; return rows.map((row) => ({ directory: row.directory, - snippetId: row.snippet_id, - filePath: row.file_path, - startLine: row.start_line, - endLine: row.end_line, + snippetId: parseSnippetId(row.snippet_id), score: row.score, content: row.content, })); diff --git a/packages/search/test/snippet-index.spec.ts b/packages/search/test/snippet-index.spec.ts index bbf256722f..42a575e143 100644 --- a/packages/search/test/snippet-index.spec.ts +++ b/packages/search/test/snippet-index.spec.ts @@ -1,13 +1,18 @@ import { strict as assert } from 'assert'; import sqlite3 from 'better-sqlite3'; -import SnippetIndex from '../src/snippet-index'; +import SnippetIndex, { SnippetId } from '../src/snippet-index'; describe('SnippetIndex', () => { let db: sqlite3.Database; let index: SnippetIndex; const directory = 'src'; + const snippet1: SnippetId = { type: 'code-snippet', id: 'test.txt:1' }; + const snippet2: SnippetId = { type: 'code-snippet', id: 'test2.txt:11' }; + const snippet3: SnippetId = { type: 'code-snippet', id: 'test3.txt:21' }; + const snippet4: SnippetId = { type: 'code-snippet', id: 'test4.txt:31' }; + beforeEach(() => { db = new sqlite3(':memory:'); index = new SnippetIndex(db); @@ -19,39 +24,33 @@ describe('SnippetIndex', () => { it('should insert and search a snippet', () => { const content = 'symbol1 word1'; - index.indexSnippet('snippet1', directory, 'test.txt', 1, 10, 'symbol1', 'word1', content); + index.indexSnippet(snippet1, directory, 'symbol1', 'word1', content); const results = index.searchSnippets('symbol1'); assert.equal(results.length, 1); - assert.equal(results[0].snippetId, 'snippet1'); + assert.equal(JSON.stringify(results[0].snippetId), JSON.stringify(snippet1)); assert.equal(results[0].content, content); }); it('should update the boost factor of a snippet', () => { const content = 'symbol2 word2'; - index.indexSnippet('snippet2', directory, 'test2.txt', 11, 20, 'symbol2', 'word2', content); - index.boostSnippet('snippet2', 2.0); + index.indexSnippet(snippet2, directory, 'symbol2', 'word2', content); + index.boostSnippet(snippet2, 2.0); const results = index.searchSnippets('symbol2'); assert.equal(results.length, 1); - assert.equal(results[0].snippetId, 'snippet2'); + assert.equal(JSON.stringify(results[0].snippetId), JSON.stringify(snippet2)); }); it('should return results ordered by score', () => { index.indexSnippet( - 'snippet3', + snippet3, directory, - 'test3.txt', - 21, - 30, 'symbol1 symbol3', 'word1 word3', 'symbol1 word1 symbol3 word3' ); index.indexSnippet( - 'snippet4', + snippet4, directory, - 'test4.txt', - 31, - 40, 'symbol2 symbol3', 'word1 word4', 'symbol2 word1 symbol3 word4' @@ -59,17 +58,17 @@ describe('SnippetIndex', () => { let results = index.searchSnippets('word1 OR word4'); assert.equal(results.length, 2); - assert.equal(results[0].snippetId, 'snippet4'); - assert.equal(results[1].snippetId, 'snippet3'); + assert.equal(JSON.stringify(results[0].snippetId), JSON.stringify(snippet4)); + assert.equal(JSON.stringify(results[1].snippetId), JSON.stringify(snippet3)); const unboostedScore = results[1].score; - index.boostSnippet('snippet3', 2.0); + index.boostSnippet(snippet3, 2.0); results = index.searchSnippets('word1 OR word4'); assert.equal(results.length, 2); - assert.equal(results[0].snippetId, 'snippet3'); - assert.equal(results[1].snippetId, 'snippet4'); + assert.equal(JSON.stringify(results[0].snippetId), JSON.stringify(snippet3)); + assert.equal(JSON.stringify(results[1].snippetId), JSON.stringify(snippet4)); const boostedScore = results[0].score; const scoreMultiple = boostedScore / unboostedScore; @@ -78,7 +77,7 @@ describe('SnippetIndex', () => { results = index.searchSnippets('symbol3'); assert.equal(results.length, 2); - assert.equal(results[0].snippetId, 'snippet3'); - assert.equal(results[1].snippetId, 'snippet4'); + assert.equal(JSON.stringify(results[0].snippetId), JSON.stringify(snippet3)); + assert.equal(JSON.stringify(results[1].snippetId), JSON.stringify(snippet4)); }); }); From 3b97d13cd1d6e6acca8d36d4739e6ff7d47f4c32 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Mon, 18 Nov 2024 10:45:43 -0500 Subject: [PATCH 11/12] refactor: Refactor context collectors --- packages/cli/src/cmds/search/search.ts | 4 +- packages/cli/src/fulltext/appmap-index.ts | 2 +- .../cli/src/rpc/explain/EventCollector.ts | 78 ------ .../rpc/explain/LocationContextCollector.ts | 93 ------- .../src/rpc/explain/SearchContextCollector.ts | 209 --------------- packages/cli/src/rpc/explain/buildContext.ts | 82 ------ .../cli/src/rpc/explain/collect-context.ts | 136 ++++++++++ .../rpc/explain/collect-location-context.ts | 92 +++++++ .../src/rpc/explain/collect-search-context.ts | 155 +++++++++++ .../cli/src/rpc/explain/collectContext.ts | 173 ------------- packages/cli/src/rpc/explain/explain.ts | 25 +- .../cli/src/rpc/explain/index-snippets.ts | 21 -- .../rpc/explain/index/appmap-file-index.ts | 31 +++ .../build-index-in-temp-dir.ts} | 4 +- .../rpc/explain/{ => index}/index-events.ts | 18 +- .../project-file-index.ts} | 34 ++- .../index/project-file-snippet-index.ts | 84 ++++++ .../cli/src/rpc/explain/lookupSourceCode.ts | 105 -------- packages/cli/src/rpc/explain/parseLocation.ts | 16 -- packages/cli/src/rpc/search/search.ts | 4 +- .../unit/fulltext/appmap-index.search.spec.ts | 2 +- .../unit/rpc/explain/ContextCollector.spec.ts | 98 ------- .../explain/LocationContextCollector.spec.ts | 90 ------- .../unit/rpc/explain/collect-context.spec.ts | 129 ++++++++++ .../explain/collect-location-context.spec.ts | 88 +++++++ .../explain/collect-search-context.spec.ts | 151 +++++++++++ .../rpc/explain/index/index-events.spec.ts | 93 +++++++ .../index/project-file-snippet-index.spec.ts | 242 ++++++++++++++++++ .../tests/unit/rpc/explain/pattern.spec.ts | 2 +- 29 files changed, 1264 insertions(+), 997 deletions(-) delete mode 100644 packages/cli/src/rpc/explain/EventCollector.ts delete mode 100644 packages/cli/src/rpc/explain/LocationContextCollector.ts delete mode 100644 packages/cli/src/rpc/explain/SearchContextCollector.ts delete mode 100644 packages/cli/src/rpc/explain/buildContext.ts create mode 100644 packages/cli/src/rpc/explain/collect-context.ts create mode 100644 packages/cli/src/rpc/explain/collect-location-context.ts create mode 100644 packages/cli/src/rpc/explain/collect-search-context.ts delete mode 100644 packages/cli/src/rpc/explain/collectContext.ts delete mode 100644 packages/cli/src/rpc/explain/index-snippets.ts create mode 100644 packages/cli/src/rpc/explain/index/appmap-file-index.ts rename packages/cli/src/rpc/explain/{buildIndex.ts => index/build-index-in-temp-dir.ts} (89%) rename packages/cli/src/rpc/explain/{ => index}/index-events.ts (77%) rename packages/cli/src/rpc/explain/{index-files.ts => index/project-file-index.ts} (52%) create mode 100644 packages/cli/src/rpc/explain/index/project-file-snippet-index.ts delete mode 100644 packages/cli/src/rpc/explain/lookupSourceCode.ts delete mode 100644 packages/cli/src/rpc/explain/parseLocation.ts delete mode 100644 packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts delete mode 100644 packages/cli/tests/unit/rpc/explain/LocationContextCollector.spec.ts create mode 100644 packages/cli/tests/unit/rpc/explain/collect-context.spec.ts create mode 100644 packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts create mode 100644 packages/cli/tests/unit/rpc/explain/collect-search-context.spec.ts create mode 100644 packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts create mode 100644 packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts diff --git a/packages/cli/src/cmds/search/search.ts b/packages/cli/src/cmds/search/search.ts index cb706bf617..b4f94b411e 100644 --- a/packages/cli/src/cmds/search/search.ts +++ b/packages/cli/src/cmds/search/search.ts @@ -16,7 +16,7 @@ import { } from '../../fulltext/FindEvents'; import { openInBrowser } from '../open/openers'; import { buildAppMapIndex, search } from '../../fulltext/appmap-index'; -import buildIndex from '../../rpc/explain/buildIndex'; +import buildIndexInTempDir from '../../rpc/explain/index/build-index-in-temp-dir'; export const command = 'search '; export const describe = @@ -183,7 +183,7 @@ export const handler = async (argv: ArgumentTypes) => { maxResults, }; - const index = await buildIndex('appmaps', async (indexFile) => { + const index = await buildIndexInTempDir('appmaps', async (indexFile) => { const db = new sqlite3(indexFile); const fileIndex = new FileIndex(db); await buildAppMapIndex(fileIndex, [process.cwd()]); diff --git a/packages/cli/src/fulltext/appmap-index.ts b/packages/cli/src/fulltext/appmap-index.ts index 1e4d0318ff..3050141965 100644 --- a/packages/cli/src/fulltext/appmap-index.ts +++ b/packages/cli/src/fulltext/appmap-index.ts @@ -19,7 +19,7 @@ import loadAppMapConfig from '../lib/loadAppMapConfig'; export type ClassMapEntry = { name: string; type: string; - labels: string[]; + labels?: string[]; children: ClassMapEntry[]; static?: boolean; sourceLocation?: string; diff --git a/packages/cli/src/rpc/explain/EventCollector.ts b/packages/cli/src/rpc/explain/EventCollector.ts deleted file mode 100644 index b877f81b6e..0000000000 --- a/packages/cli/src/rpc/explain/EventCollector.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { isAbsolute, join } from 'path'; -import { ContextV2 } from '@appland/navie'; -import { SearchRpc } from '@appland/rpc'; -import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match'; -import FindEvents, { - SearchResponse as EventSearchResponse, - SearchOptions as EventsSearchOptions, - SearchOptions, -} from '../../fulltext/FindEvents'; -import buildContext from './buildContext'; -import { textSearchResultToRpcSearchResult } from './collectContext'; - -export default class EventCollector { - appmapIndexes = new Map(); - - constructor(private query: string, private appmapSearchResponse: AppMapSearchResponse) {} - - async collectEvents( - maxEvents: number, - excludePatterns?: RegExp[], - includePatterns?: RegExp[], - includeTypes?: ContextV2.ContextItemType[] - ): Promise<{ - results: SearchRpc.SearchResult[]; - context: ContextV2.ContextResponse; - contextSize: number; - }> { - const results = new Array(); - - for (const result of this.appmapSearchResponse.results) { - let { appmap } = result; - if (!isAbsolute(appmap)) appmap = join(result.directory, appmap); - - const options: EventsSearchOptions = { - maxResults: maxEvents, - }; - if (includePatterns) options.includePatterns = includePatterns; - if (excludePatterns) options.excludePatterns = excludePatterns; - - const eventsSearchResponse = await this.findEvents(appmap, options); - results.push({ - appmap: appmap, - directory: result.directory, - events: eventsSearchResponse.results.map(textSearchResultToRpcSearchResult), - score: result.score, - }); - } - - const isIncludedType = (item: ContextV2.ContextItem) => { - if (includeTypes && !includeTypes.some((type) => type === item.type)) return false; - - return true; - }; - - const context = (await buildContext(results)).filter(isIncludedType); - - const contextSize = context.reduce((acc, item) => acc + item.content.length, 0); - - return { results, context, contextSize }; - } - - protected async appmapIndex(appmap: string): Promise { - let index = this.appmapIndexes.get(appmap); - if (!index) { - index = new FindEvents(appmap); - await index.initialize(); - this.appmapIndexes.set(appmap, index); - } - return index; - } - - protected async findEvents(appmap: string, options: SearchOptions): Promise { - if (appmap.endsWith('.appmap.json')) appmap = appmap.slice(0, -'.appmap.json'.length); - - const index = await this.appmapIndex(appmap); - return index.search(this.query, options); - } -} diff --git a/packages/cli/src/rpc/explain/LocationContextCollector.ts b/packages/cli/src/rpc/explain/LocationContextCollector.ts deleted file mode 100644 index 56b69b24eb..0000000000 --- a/packages/cli/src/rpc/explain/LocationContextCollector.ts +++ /dev/null @@ -1,93 +0,0 @@ -import { readFile } from 'fs/promises'; -import { warn } from 'console'; -import { isAbsolute, join } from 'path'; -import { ContextV2 } from '@appland/navie'; -import { SearchRpc } from '@appland/rpc'; -import Location from './location'; -import { exists, isFile, verbose } from '../../utils'; - -/** - * LocationContextCollector is responsible for collecting context information from specified locations - * within source directories. It reads the contents of files at these locations and extracts code snippets - * to build a context response. - * - * Primary effects: - * - Iterates over provided locations and determines if they are absolute or relative paths. - * - For each location, constructs the full path and checks if the file exists and is a valid file. - * - Reads the contents of the file and extracts a code snippet based on the location. - * - Builds a context response containing the extracted code snippets and their respective locations. - * - Returns the context response along with a search response. - */ -export default class LocationContextCollector { - constructor(private sourceDirectories: string[], private locations: Location[]) {} - - async collectContext(): Promise<{ - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; - }> { - const result: { searchResponse: SearchRpc.SearchResponse; context: ContextV2.ContextResponse } = - { searchResponse: { results: [], numResults: 0 }, context: [] }; - - const candidateLocations = new Array<{ location: Location; directory?: string }>(); - for (const location of this.locations) { - const { path } = location; - if (isAbsolute(path)) { - const directory = this.sourceDirectories.find((dir) => path.startsWith(dir)); - candidateLocations.push({ location, directory }); - } else { - for (const sourceDirectory of this.sourceDirectories) { - candidateLocations.push({ location, directory: sourceDirectory }); - } - } - } - - if (verbose()) - warn( - `[location-context] Candidate locations: ${candidateLocations - .map((loc) => loc.location.toString()) - .join(', ')}` - ); - - for (const { location, directory } of candidateLocations) { - let pathTokens: string[] = []; - - if (isAbsolute(location.path)) pathTokens = [location.path]; - else if (directory) pathTokens = [directory, location.path].filter(Boolean); - - const path = join(...pathTokens); - if (!(await exists(path))) { - if (verbose()) warn(`[location-context] Skipping non-existent location: ${path}`); - continue; - } - if (!(await isFile(path))) { - if (verbose()) warn(`[location-context] Skipping non-file location: ${path}`); - continue; - } - - let contents: string | undefined; - try { - contents = await readFile(path, 'utf8'); - } catch (e) { - warn(`[location-context] Failed to read file: ${path}`); - continue; - } - - if (verbose()) - warn( - `[location-context] Extracting snippet for location: ${location.toString()} (${ - contents.length - } bytes)` - ); - - const snippet = location.snippet(contents); - result.context.push({ - type: ContextV2.ContextItemType.CodeSnippet, - content: snippet, - location: location.toString(), - directory, - }); - } - - return result; - } -} diff --git a/packages/cli/src/rpc/explain/SearchContextCollector.ts b/packages/cli/src/rpc/explain/SearchContextCollector.ts deleted file mode 100644 index b0958a3c42..0000000000 --- a/packages/cli/src/rpc/explain/SearchContextCollector.ts +++ /dev/null @@ -1,209 +0,0 @@ -import { log, warn } from 'console'; -import sqlite3 from 'better-sqlite3'; - -import { ContextV2, applyContext } from '@appland/navie'; -import { SearchRpc } from '@appland/rpc'; -import { FileIndex, FileSearchResult, SnippetSearchResult } from '@appland/search'; - -import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match'; -import { DEFAULT_MAX_DIAGRAMS } from '../search/search'; -import indexFiles from './index-files'; -import indexSnippets from './index-snippets'; -import buildIndex from './buildIndex'; -import { buildAppMapIndex, search } from '../../fulltext/appmap-index'; -import indexEvents from './index-events'; - -type ContextCandidate = { - results: SearchRpc.SearchResult[]; - context: ContextV2.ContextResponse; - contextSize: number; -}; - -export default class SearchContextCollector { - public excludePatterns: RegExp[] | undefined; - public includePatterns: RegExp[] | undefined; - public includeTypes: ContextV2.ContextItemType[] | undefined; - - constructor( - private appmapDirectories: string[], - private sourceDirectories: string[], - private appmaps: string[] | undefined, - private vectorTerms: string[], - private charLimit: number - ) {} - - async collectContext(): Promise<{ - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; - }> { - let appmapSearchResponse: AppMapSearchResponse; - if (this.appmaps) { - const results = this.appmaps - .map((appmap) => { - const directory = this.appmapDirectories.find((dir) => appmap.startsWith(dir)); - if (!directory) return undefined; - - return { - appmap, - directory, - score: 1, - }; - }) - .filter(Boolean) as SearchRpc.SearchResult[]; - appmapSearchResponse = { - type: 'appmap', - stats: { - max: 1, - mean: 1, - median: 1, - stddev: 0, - }, - results, - numResults: this.appmaps.length, - }; - } else { - const appmapIndex = await buildIndex('appmaps', async (indexFile) => { - const db = new sqlite3(indexFile); - const fileIndex = new FileIndex(db); - await buildAppMapIndex(fileIndex, this.appmapDirectories); - return fileIndex; - }); - const selectedAppMaps = await search( - appmapIndex.index, - this.vectorTerms.join(' OR '), - DEFAULT_MAX_DIAGRAMS - ); - appmapIndex.close(); - - appmapSearchResponse = { - results: selectedAppMaps.results, - numResults: selectedAppMaps.results.length, - stats: selectedAppMaps.stats, - type: 'appmap', - }; - - log(`[search-context] Matched ${selectedAppMaps.results.length} AppMaps.`); - } - - const fileIndex = await buildIndex('files', async (indexFile) => { - const db = new sqlite3(indexFile); - return await indexFiles( - db, - this.sourceDirectories, - this.includePatterns, - this.excludePatterns - ); - }); - let fileSearchResults: FileSearchResult[]; - try { - fileSearchResults = fileIndex.index.search(this.vectorTerms.join(' OR ')); - } finally { - fileIndex.close(); - } - - const snippetIndex = await buildIndex('snippets', async (indexFile) => { - const db = new sqlite3(indexFile); - const snippetIndex = await indexSnippets(db, fileSearchResults); - await indexEvents(snippetIndex, appmapSearchResponse.results); - return snippetIndex; - }); - - let contextCandidate: ContextCandidate; - try { - let charCount = 0; - let maxSnippets = 50; - log(`[search-context] Requested char limit: ${this.charLimit}`); - for (;;) { - log(`[search-context] Collecting context with ${maxSnippets} events per diagram.`); - - // Collect all code objects from AppMaps and use them to build the sequence diagram - // const codeSnippets = new Array(); - // TODO: Apply this.includeTypes - - const snippetContextItem = ( - snippet: SnippetSearchResult - ): ContextV2.ContextItem | ContextV2.FileContextItem | undefined => { - const { snippetId, directory, score, content } = snippet; - - const { type: snippetIdType, id: snippetIdValue } = snippetId; - - let location: string | undefined; - if (snippetIdType === 'code-snippet') location = snippetIdValue; - - switch (snippetId.type) { - case 'query': - case 'route': - case 'external-route': - return { - type: ContextV2.ContextItemType.DataRequest, - content, - directory, - score, - }; - case 'code-snippet': - return { - type: ContextV2.ContextItemType.CodeSnippet, - content, - directory, - score, - location, - }; - default: - warn(`[search-context] Unknown snippet type: ${snippetId.type}`); - - // TODO: Collect all matching events, then build a sequence diagram - // case 'event': - // return await buildSequenceDiagram(snippet); - // default: - // codeSnippets.push(snippet); - } - }; - - const snippetSearchResults = snippetIndex.index.searchSnippets( - this.vectorTerms.join(' OR '), - maxSnippets - ); - const context: ContextV2.ContextItem[] = []; - for (const result of snippetSearchResults) { - const contextItem = snippetContextItem(result); - if (contextItem) context.push(contextItem); - } - - // TODO: Build sequence diagrams - - contextCandidate = { - // TODO: Fixme remove hard coded cast - results: appmapSearchResponse.results as SearchRpc.SearchResult[], - context, - contextSize: snippetSearchResults.reduce((acc, result) => acc + result.content.length, 0), - }; - - const appliedContext = applyContext(contextCandidate.context, this.charLimit); - const appliedContextSize = appliedContext.reduce( - (acc, item) => acc + item.content.length, - 0 - ); - contextCandidate.context = appliedContext; - contextCandidate.contextSize = appliedContextSize; - log(`[search-context] Collected an estimated ${appliedContextSize} characters.`); - - if (appliedContextSize === charCount || appliedContextSize > this.charLimit) { - break; - } - charCount = appliedContextSize; - maxSnippets = Math.ceil(maxSnippets * 1.5); - log(`[search-context] Increasing max events per diagram to ${maxSnippets}.`); - } - } finally { - snippetIndex.close(); - } - - return { - searchResponse: { - results: contextCandidate.results, - numResults: appmapSearchResponse.numResults, - }, - context: contextCandidate.context, - }; - } -} diff --git a/packages/cli/src/rpc/explain/buildContext.ts b/packages/cli/src/rpc/explain/buildContext.ts deleted file mode 100644 index 1a06dc4b4b..0000000000 --- a/packages/cli/src/rpc/explain/buildContext.ts +++ /dev/null @@ -1,82 +0,0 @@ -import { SearchRpc } from '@appland/rpc'; -import { warn } from 'console'; -import { ContextV2 } from '@appland/navie'; - -import lookupSourceCode from './lookupSourceCode'; -import buildSequenceDiagram from './build-sequence-diagram'; -import appmapLocation from './appmap-location'; - -/** - * Processes search results to build sequence diagrams, code snippets, and code object sets. This is the format - * expected by the Navie AI. - * - * Given a list of search results, `buildContext` asynchronously: - * - * - Generates sequence diagrams for each result using event data and a filtered appmap, - * formatting the output as PlantUML and storing it in an array. The filtered sequence diagram - * includes only the code objects associated with the events in the search result, and their near neighbors. - * - * - Collects and de-duplicates code snippets tied to specific events' locations, storing them in a map with the location as the key. - * - * - Gathers a set of unique code objects identified by their fully qualified identifiers (fqid) from the events. - * These code objects are most commonly SQL queries and HTTP requests (client and server), since code snipptes are stored separately. - * The term "data requests" is being phased in to replace "codeObjects". - */ -export default async function buildContext( - searchResults: SearchRpc.SearchResult[] -): Promise { - const sequenceDiagrams = new Array(); - const codeSnippets = new Array(); - const dataRequests = new Array(); - - const codeSnippetLocations = new Set(); - const dataRequestContent = new Set(); - - const examinedLocations = new Set(); - for (const result of searchResults) { - try { - sequenceDiagrams.push(await buildSequenceDiagram(result)); - } catch (e) { - warn(`Failed to build sequence diagram for ${result.appmap}`); - warn(e); - } - for (const event of result.events) { - if (!event.location) { - if (!dataRequestContent.has(event.fqid)) { - dataRequestContent.add(event.fqid); - dataRequests.push({ - directory: result.directory, - location: appmapLocation(result.appmap, event), - type: ContextV2.ContextItemType.DataRequest, - content: event.fqid, - score: event.score, - }); - } - continue; - } - - if (examinedLocations.has(event.location)) continue; - - examinedLocations.add(event.location); - - if (codeSnippetLocations.has(event.location)) continue; - - codeSnippetLocations.add(event.location); - - // TODO: Snippets from appmap events will no longer be needed, because the snippets come - // from the search results in the index (boosted by AppMap references). - const snippets = await lookupSourceCode(result.directory, event.location); - if (snippets) { - codeSnippets.push({ - directory: result.directory, - type: ContextV2.ContextItemType.CodeSnippet, - location: event.location, - content: snippets.join('\n'), - score: event.score, - }); - } - } - } - - return [...sequenceDiagrams, ...codeSnippets, ...dataRequests]; -} diff --git a/packages/cli/src/rpc/explain/collect-context.ts b/packages/cli/src/rpc/explain/collect-context.ts new file mode 100644 index 0000000000..a322dab934 --- /dev/null +++ b/packages/cli/src/rpc/explain/collect-context.ts @@ -0,0 +1,136 @@ +import { ContextV2 } from '@appland/navie'; +import { SearchRpc } from '@appland/rpc'; +import { queryKeywords } from '@appland/search'; + +import { SearchResult as EventSearchResult } from '../../fulltext/FindEvents'; +import Location from './location'; +import { warn } from 'console'; +import collectLocationContext from './collect-location-context'; +import collectSearchContext from './collect-search-context'; + +export const buildExclusionPattern = (dirName: string): RegExp => { + const dirNamePattern = dirName.replace('.', '\\.'); + return new RegExp(`(^|[/\\\\])${dirNamePattern}([/\\\\]|$)`); +}; + +const EXCLUDE_DIRS = ['.appmap', '.navie', '.yarn', 'venv', '.venv', 'node_modules', 'vendor']; + +export function textSearchResultToRpcSearchResult( + eventResult: EventSearchResult +): SearchRpc.EventMatch { + const result: SearchRpc.EventMatch = { + fqid: eventResult.fqid, + score: eventResult.score, + eventIds: eventResult.eventIds, + }; + if (eventResult.location) result.location = eventResult.location; + if (eventResult.elapsed) result.elapsed = eventResult.elapsed; + return result; +} + +export const CHARS_PER_SNIPPET = 50; + +export type ContextRequest = { + appmaps?: string[]; + excludePatterns?: RegExp[]; + includePatterns?: RegExp[]; + includeTypes?: ContextV2.ContextItemType[]; + locations?: Location[]; +}; + +export function buildContextRequest( + appmapDirectories: string[], + sourceDirectories: string[], + appmaps: string[] | undefined, + searchTerms: string[], + charLimit: number, + filters: ContextV2.ContextFilters +): { vectorTerms: string[]; request: ContextRequest } { + const vectorTerms = searchTerms + .map((term) => queryKeywords(term)) + .flat() + .map((t) => t.trim()) + .filter(Boolean); + + const request: ContextRequest = {}; + + const contextParameters: Record = { + sourceDirectories: sourceDirectories.join(', '), + charLimit, + }; + if (appmapDirectories.length > 0) + contextParameters.appmapDirectories = appmapDirectories.join(', '); + if (vectorTerms.length > 0) contextParameters.keywords = vectorTerms.join(', '); + if (appmaps && appmaps.length > 0) contextParameters.appmaps = appmaps.join(', '); + if (filters.recent) contextParameters.recent = filters.recent; + if (filters.locations) contextParameters.locations = filters.locations.join(', '); + if (filters.itemTypes) contextParameters.itemTypes = filters.itemTypes.join(', '); + if (filters.labels && filters.labels.length > 0) + contextParameters.labels = filters.labels + .map((label) => `${label.name}(${label.weight})`) + .join(', '); + if (filters.exclude) contextParameters.exclude = filters.exclude.join(', '); + if (filters.include) contextParameters.include = filters.include.join(', '); + + const contextDebugString = Object.entries(contextParameters) + .map(([key, value]) => `${key}: ${value}`) + .join(', '); + warn(`Collecting context with parameters: ${contextDebugString}`); + + if (appmaps) request.appmaps = appmaps; + + const excludePatterns: RegExp[] = []; + if (filters?.exclude) + excludePatterns.push(...filters.exclude.map((pattern) => new RegExp(pattern))); + if (filters?.include) + request.includePatterns = filters.include.map((pattern) => new RegExp(pattern)); + if (filters?.itemTypes) request.includeTypes = filters.itemTypes.map((type) => type); + if (filters?.locations) { + request.locations = filters.locations + .map((location) => Location.parse(location)) + .filter(Boolean) as Location[]; + warn(`Parsed locations: ${request.locations.map((loc) => loc.toString()).join(', ')}`); + } + + const appendIfNotExists = (patterns: RegExp[], pattern: RegExp): RegExp[] => { + if (!patterns.find((p) => p.source === pattern.source)) patterns.push(pattern); + return patterns; + }; + + for (const dir of EXCLUDE_DIRS) appendIfNotExists(excludePatterns, buildExclusionPattern(dir)); + + request.excludePatterns = excludePatterns; + + return { vectorTerms, request }; +} + +export default async function collectContext( + appmapDirectories: string[], + sourceDirectories: string[], + charLimit: number, + vectorTerms: string[], + request: ContextRequest +): Promise<{ searchResponse: SearchRpc.SearchResponse; context: ContextV2.ContextResponse }> { + let searchResponse: SearchRpc.SearchResponse = { results: [], numResults: 0 }; + const context: ContextV2.ContextResponse = []; + + if (request.locations && request.locations.length > 0) { + const locationResult = await collectLocationContext(sourceDirectories, request.locations); + context.push(...locationResult); + } + + if (vectorTerms.length > 0 && charLimit > 0) { + const searchResult = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit, + request + ); + + searchResponse = searchResult.searchResponse; + context.push(...searchResult.context); + } + + return { searchResponse, context }; +} diff --git a/packages/cli/src/rpc/explain/collect-location-context.ts b/packages/cli/src/rpc/explain/collect-location-context.ts new file mode 100644 index 0000000000..31ee7616ab --- /dev/null +++ b/packages/cli/src/rpc/explain/collect-location-context.ts @@ -0,0 +1,92 @@ +import { readFile } from 'fs/promises'; +import { warn } from 'console'; +import { isAbsolute, join } from 'path'; +import { ContextV2 } from '@appland/navie'; +import Location from './location'; +import { exists, isFile, verbose } from '../../utils'; + +export type LocationContextRequest = { + sourceDirectories: string[]; + locations: Location[]; +}; + +/** + * Collect context information from specified locations + * within source directories. It reads the contents of files at these locations and extracts code snippets + * to build a context response. + * + * Primary effects: + * - Iterates over provided locations and determines if they are absolute or relative paths. + * - For each location, constructs the full path and checks if the file exists and is a valid file. + * - Reads the contents of the file and extracts a code snippet based on the location. + * - Builds a context response containing the extracted code snippets and their respective locations. + * - Returns the context response along with a search response. + */ +export default async function collectLocationContext( + sourceDirectories: string[], + locations: Location[] +): Promise { + const result: ContextV2.ContextResponse = []; + + const candidateLocations = new Array<{ location: Location; directory?: string }>(); + for (const location of locations) { + const { path } = location; + if (isAbsolute(path)) { + const directory = sourceDirectories.find((dir) => path.startsWith(dir)); + candidateLocations.push({ location, directory }); + } else { + for (const sourceDirectory of sourceDirectories) { + candidateLocations.push({ location, directory: sourceDirectory }); + } + } + } + + if (verbose()) + warn( + `[location-context] Candidate locations: ${candidateLocations + .map((loc) => loc.location.toString()) + .join(', ')}` + ); + + for (const { location, directory } of candidateLocations) { + let pathTokens: string[] = []; + + if (isAbsolute(location.path)) pathTokens = [location.path]; + else if (directory) pathTokens = [directory, location.path].filter(Boolean); + + const path = join(...pathTokens); + if (!(await exists(path))) { + if (verbose()) warn(`[location-context] Skipping non-existent location: ${path}`); + continue; + } + if (!(await isFile(path))) { + if (verbose()) warn(`[location-context] Skipping non-file location: ${path}`); + continue; + } + + let contents: string | undefined; + try { + contents = await readFile(path, 'utf8'); + } catch (e) { + warn(`[location-context] Failed to read file: ${path}`); + continue; + } + + if (verbose()) + warn( + `[location-context] Extracting snippet for location: ${location.toString()} (${ + contents.length + } bytes)` + ); + + const snippet = location.snippet(contents); + result.push({ + type: ContextV2.ContextItemType.CodeSnippet, + content: snippet, + location: location.toString(), + directory, + }); + } + + return result; +} diff --git a/packages/cli/src/rpc/explain/collect-search-context.ts b/packages/cli/src/rpc/explain/collect-search-context.ts new file mode 100644 index 0000000000..c0f2f75843 --- /dev/null +++ b/packages/cli/src/rpc/explain/collect-search-context.ts @@ -0,0 +1,155 @@ +import { log } from 'console'; + +import { ContextV2, applyContext } from '@appland/navie'; +import { SearchRpc } from '@appland/rpc'; + +import { DEFAULT_MAX_DIAGRAMS } from '../search/search'; +import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match'; +import { searchAppMapFiles } from './index/appmap-file-index'; +import { searchProjectFiles } from './index/project-file-index'; +import { + buildProjectFileSnippetIndex, + snippetContextItem, +} from './index/project-file-snippet-index'; + +type ContextCandidate = { + results: SearchRpc.SearchResult[]; + context: ContextV2.ContextResponse; + contextSize: number; +}; + +export type SearchContextRequest = { + appmaps?: string[]; + excludePatterns?: RegExp[]; + includePatterns?: RegExp[]; + includeTypes?: ContextV2.ContextItemType[]; +}; + +export default async function collectSearchContext( + appmapDirectories: string[], + sourceDirectories: string[], + vectorTerms: string[], + charLimit: number, + request: SearchContextRequest = {} +): Promise<{ + searchResponse: SearchRpc.SearchResponse; + context: ContextV2.ContextResponse; +}> { + let appmapSearchResponse: AppMapSearchResponse; + if (request.appmaps) { + const results = request.appmaps + .map((appmap) => { + const directory = appmapDirectories.find((dir) => appmap.startsWith(dir)); + if (!directory) return undefined; + + return { + appmap, + directory, + score: 1, + }; + }) + .filter(Boolean) as SearchRpc.SearchResult[]; + appmapSearchResponse = { + type: 'appmap', + stats: { + max: 1, + mean: 1, + median: 1, + stddev: 0, + }, + results, + numResults: results.length, + }; + } else { + const selectedAppMaps = await searchAppMapFiles( + appmapDirectories, + vectorTerms, + DEFAULT_MAX_DIAGRAMS + ); + + appmapSearchResponse = { + results: selectedAppMaps.results, + numResults: selectedAppMaps.results.length, + stats: selectedAppMaps.stats, + type: 'appmap', + }; + + log(`[search-context] Matched ${selectedAppMaps.results.length} AppMaps.`); + } + + const fileSearchResults = await searchProjectFiles( + sourceDirectories, + request.includePatterns, + request.excludePatterns, + vectorTerms + ); + + const snippetIndex = await buildProjectFileSnippetIndex( + fileSearchResults, + appmapSearchResponse.results + ); + let contextCandidate: ContextCandidate; + try { + let charCount = 0; + let maxSnippets = 50; + log(`[search-context] Requested char limit: ${charLimit}`); + for (;;) { + log(`[search-context] Collecting context with ${maxSnippets} events per diagram.`); + + // Collect all events from AppMaps and use them to build the sequence diagram + // The unsolved part here is getting event ids from code snippets that are associated with + // AppMap events, because this association is not yet implemented. + + // const codeSnippets = new Array(); + // TODO: Apply this.includeTypes + + const snippetSearchResults = snippetIndex.index.searchSnippets( + vectorTerms.join(' OR '), + maxSnippets + ); + const context: ContextV2.ContextItem[] = []; + for (const result of snippetSearchResults) { + const contextItem = snippetContextItem(result); + if (contextItem) context.push(contextItem); + } + + const appmapSearchResults: SearchRpc.SearchResult[] = appmapSearchResponse.results.map( + (result) => ({ + appmap: result.appmap, + directory: result.directory, + score: result.score, + events: [], + }) + ); + + contextCandidate = { + results: appmapSearchResults, + context, + contextSize: snippetSearchResults.reduce((acc, result) => acc + result.content.length, 0), + }; + + const appliedContext = applyContext(contextCandidate.context, charLimit); + const appliedContextSize = appliedContext.reduce((acc, item) => acc + item.content.length, 0); + contextCandidate.context = appliedContext; + contextCandidate.contextSize = appliedContextSize; + log(`[search-context] Collected an estimated ${appliedContextSize} characters.`); + + if (appliedContextSize === charCount || appliedContextSize > charLimit) { + break; + } + charCount = appliedContextSize; + maxSnippets = Math.ceil(maxSnippets * 1.5); + log(`[search-context] Increasing max events per diagram to ${maxSnippets}.`); + } + } finally { + snippetIndex.close(); + } + + return { + searchResponse: { + results: contextCandidate.results, + numResults: appmapSearchResponse.numResults, + }, + context: contextCandidate.context, + }; +} diff --git a/packages/cli/src/rpc/explain/collectContext.ts b/packages/cli/src/rpc/explain/collectContext.ts deleted file mode 100644 index c09eed9588..0000000000 --- a/packages/cli/src/rpc/explain/collectContext.ts +++ /dev/null @@ -1,173 +0,0 @@ -import { ContextV2 } from '@appland/navie'; -import { SearchRpc } from '@appland/rpc'; -import { queryKeywords } from '@appland/search'; - -import { SearchResult as EventSearchResult } from '../../fulltext/FindEvents'; -import Location from './location'; -import SearchContextCollector from './SearchContextCollector'; -import LocationContextCollector from './LocationContextCollector'; -import { warn } from 'console'; - -export const buildExclusionPattern = (dirName: string): RegExp => { - const dirNamePattern = dirName.replace('.', '\\.'); - return new RegExp(`(^|[/\\\\])${dirNamePattern}([/\\\\]|$)`); -}; - -const EXCLUDE_DIRS = ['.appmap', '.navie', '.yarn', 'venv', '.venv', 'node_modules', 'vendor']; - -export function textSearchResultToRpcSearchResult( - eventResult: EventSearchResult -): SearchRpc.EventMatch { - const result: SearchRpc.EventMatch = { - fqid: eventResult.fqid, - score: eventResult.score, - eventIds: eventResult.eventIds, - }; - if (eventResult.location) result.location = eventResult.location; - if (eventResult.elapsed) result.elapsed = eventResult.elapsed; - return result; -} - -export const CHARS_PER_SNIPPET = 50; - -export class ContextCollector { - public appmaps: string[] | undefined; - public excludePatterns: RegExp[] | undefined; - public includePatterns: RegExp[] | undefined; - public includeTypes: ContextV2.ContextItemType[] | undefined; - public locations: Location[] | undefined; - - query: string; - vectorTerms: string[]; - - constructor( - private appmapDirectories: string[], - private sourceDirectories: string[], - vectorTerms: string[], - private charLimit: number - ) { - this.vectorTerms = vectorTerms.map((term) => term.trim()).filter(Boolean); - this.query = vectorTerms.join(' '); - } - - async collectContext(): Promise<{ - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; - }> { - const result: { searchResponse: SearchRpc.SearchResponse; context: ContextV2.ContextResponse } = - { searchResponse: { results: [], numResults: 0 }, context: [] }; - const mergeSearchResults = (searchResult: { - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; - }) => { - result.searchResponse.results = result.searchResponse.results.concat( - searchResult.searchResponse.results - ); - result.searchResponse.numResults += searchResult.searchResponse.numResults; - result.context = result.context.concat(searchResult.context); - }; - - if (this.locations && this.locations.length > 0) { - const locationContextCollector = new LocationContextCollector( - this.sourceDirectories, - this.locations - ); - const locationResult = await locationContextCollector.collectContext(); - mergeSearchResults(locationResult); - } - - if (this.vectorTerms.length > 0 && this.charLimit > 0) { - const searchContextCollector = new SearchContextCollector( - this.appmapDirectories, - this.sourceDirectories, - this.appmaps, - this.vectorTerms, - this.charLimit - ); - if (this.includePatterns) searchContextCollector.includePatterns = this.includePatterns; - if (this.excludePatterns) searchContextCollector.excludePatterns = this.excludePatterns; - if (this.includeTypes) searchContextCollector.includeTypes = this.includeTypes; - - const searchResult = await searchContextCollector.collectContext(); - mergeSearchResults(searchResult); - } - - return result; - } -} - -export default async function collectContext( - appmapDirectories: string[], - sourceDirectories: string[], - appmaps: string[] | undefined, - searchTerms: string[], - charLimit: number, - filters: ContextV2.ContextFilters -): Promise<{ - searchResponse: SearchRpc.SearchResponse; - context: ContextV2.ContextResponse; -}> { - const keywords = searchTerms.map((term) => queryKeywords(term)).flat(); - - // recent?: boolean; - // locations?: string[]; - // itemTypes?: ContextItemType[]; - // labels?: ContextLabel[]; - // exclude?: string[]; - // include?: string[]; - - const contextParameters: Record = { - sourceDirectories: sourceDirectories.join(', '), - charLimit, - }; - if (appmapDirectories.length > 0) - contextParameters.appmapDirectories = appmapDirectories.join(', '); - if (keywords.length > 0) contextParameters.keywords = keywords.join(', '); - if (appmaps && appmaps.length > 0) contextParameters.appmaps = appmaps.join(', '); - if (filters.recent) contextParameters.recent = filters.recent; - if (filters.locations) contextParameters.locations = filters.locations.join(', '); - if (filters.itemTypes) contextParameters.itemTypes = filters.itemTypes.join(', '); - if (filters.labels && filters.labels.length > 0) - contextParameters.labels = filters.labels - .map((label) => `${label.name}(${label.weight})`) - .join(', '); - if (filters.exclude) contextParameters.exclude = filters.exclude.join(', '); - if (filters.include) contextParameters.include = filters.include.join(', '); - - const contextDebugString = Object.entries(contextParameters) - .map(([key, value]) => `${key}: ${value}`) - .join(', '); - warn(`Collecting context with parameters: ${contextDebugString}`); - - const contextCollector = new ContextCollector( - appmapDirectories, - sourceDirectories, - keywords, - charLimit - ); - if (appmaps) contextCollector.appmaps = appmaps; - - const excludePatterns: RegExp[] = []; - if (filters?.exclude) - excludePatterns.push(...filters.exclude.map((pattern) => new RegExp(pattern))); - if (filters?.include) - contextCollector.includePatterns = filters.include.map((pattern) => new RegExp(pattern)); - if (filters?.itemTypes) contextCollector.includeTypes = filters.itemTypes.map((type) => type); - if (filters?.locations) { - contextCollector.locations = filters.locations - .map((location) => Location.parse(location)) - .filter(Boolean) as Location[]; - warn(`Parsed locations: ${contextCollector.locations.map((loc) => loc.toString()).join(', ')}`); - } - - const appendIfNotExists = (patterns: RegExp[], pattern: RegExp): RegExp[] => { - if (!patterns.find((p) => p.source === pattern.source)) patterns.push(pattern); - return patterns; - }; - - for (const dir of EXCLUDE_DIRS) appendIfNotExists(excludePatterns, buildExclusionPattern(dir)); - - contextCollector.excludePatterns = excludePatterns; - - return await contextCollector.collectContext(); -} diff --git a/packages/cli/src/rpc/explain/explain.ts b/packages/cli/src/rpc/explain/explain.ts index be60babab4..0c9f10b6c8 100644 --- a/packages/cli/src/rpc/explain/explain.ts +++ b/packages/cli/src/rpc/explain/explain.ts @@ -9,7 +9,7 @@ import { ContextV2, Help, ProjectInfo, UserContext } from '@appland/navie'; import { ExplainRpc } from '@appland/rpc'; import { warn } from 'console'; import EventEmitter from 'events'; -import { basename, join } from 'path'; +import { basename } from 'path'; import { LRUCache } from 'lru-cache'; import detectAIEnvVar from '../../cmds/index/aiEnvVar'; @@ -18,7 +18,7 @@ import collectProjectInfos from '../../cmds/navie/projectInfo'; import configuration, { AppMapDirectory } from '../configuration'; import { getLLMConfiguration } from '../llmConfiguration'; import { RpcError, RpcHandler } from '../rpc'; -import collectContext from './collectContext'; +import collectContext, { buildContextRequest } from './collect-context'; import { initializeHistory } from './navie/historyHelper'; import { ThreadAccessError } from './navie/ihistory'; import INavie, { INavieProvider } from './navie/inavie'; @@ -148,17 +148,7 @@ export class Explain extends EventEmitter { // The meaning of tokenCount is "try and get at least this many tokens" const charLimit = tokenCount * 3; - // const appmapDirectories = this.appmapDirectories.map((dir) => { - // const path = dir.directory; - // const appmapDir = dir.appmapConfig?.appmap_dir ?? 'tmp/appmap'; - // if (path.endsWith(appmapDir)) { - // return path; - // } else { - // return join(path, appmapDir); - // } - // }); - - const searchResult = await collectContext( + const contextRequest = buildContextRequest( this.appmapDirectories.map((dir) => dir.directory), this.projectDirectories, this.appmaps, @@ -167,6 +157,15 @@ export class Explain extends EventEmitter { data ); + const searchResult = await collectContext( + this.appmapDirectories.map((dir) => dir.directory), + this.projectDirectories, + charLimit, + contextRequest.vectorTerms, + contextRequest.request + ); + + // TODO: Append this result rather than over-writing, to allow Navie to request context more than once. this.status.searchResponse = searchResult.searchResponse; this.status.contextResponse = searchResult.context; diff --git a/packages/cli/src/rpc/explain/index-snippets.ts b/packages/cli/src/rpc/explain/index-snippets.ts deleted file mode 100644 index ceb2d58f01..0000000000 --- a/packages/cli/src/rpc/explain/index-snippets.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { - buildSnippetIndex, - FileSearchResult, - fileTokens, - langchainSplitter, - readFileSafe, - SnippetIndex, -} from '@appland/search'; -import sqlite3 from 'better-sqlite3'; - -export default async function indexSnippets( - db: sqlite3.Database, - fileSearchResults: FileSearchResult[] -): Promise { - const splitter = langchainSplitter; - - const snippetIndex = new SnippetIndex(db); - await buildSnippetIndex(snippetIndex, fileSearchResults, readFileSafe, splitter, fileTokens); - - return snippetIndex; -} diff --git a/packages/cli/src/rpc/explain/index/appmap-file-index.ts b/packages/cli/src/rpc/explain/index/appmap-file-index.ts new file mode 100644 index 0000000000..0e16c43203 --- /dev/null +++ b/packages/cli/src/rpc/explain/index/appmap-file-index.ts @@ -0,0 +1,31 @@ +import sqlite3 from 'better-sqlite3'; + +import { FileIndex } from '@appland/search'; + +import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; +import { buildAppMapIndex, search } from '../../../fulltext/appmap-index'; +import { SearchResponse } from '../../../fulltext/appmap-match'; + +export async function buildAppMapFileIndex( + appmapDirectories: string[] +): Promise> { + return await buildIndexInTempDir('appmaps', async (indexFile) => { + const db = new sqlite3(indexFile); + const fileIndex = new FileIndex(db); + await buildAppMapIndex(fileIndex, appmapDirectories); + return fileIndex; + }); +} + +export async function searchAppMapFiles( + appmapDirectories: string[], + vectorTerms: string[], + maxDiagrams: number +): Promise { + const index = await buildAppMapFileIndex(appmapDirectories); + try { + return await search(index.index, vectorTerms.join(' OR '), maxDiagrams); + } finally { + index.close(); + } +} diff --git a/packages/cli/src/rpc/explain/buildIndex.ts b/packages/cli/src/rpc/explain/index/build-index-in-temp-dir.ts similarity index 89% rename from packages/cli/src/rpc/explain/buildIndex.ts rename to packages/cli/src/rpc/explain/index/build-index-in-temp-dir.ts index 61e78d05f5..590283dbf7 100644 --- a/packages/cli/src/rpc/explain/buildIndex.ts +++ b/packages/cli/src/rpc/explain/index/build-index-in-temp-dir.ts @@ -10,12 +10,12 @@ export interface Closeable { close(): void; } -type CloseableIndex = { +export type CloseableIndex = { index: T; close: () => void; }; -export default async function buildIndex( +export default async function buildIndexInTempDir( indexName: string, builder: (indexFile: string) => Promise ): Promise> { diff --git a/packages/cli/src/rpc/explain/index-events.ts b/packages/cli/src/rpc/explain/index/index-events.ts similarity index 77% rename from packages/cli/src/rpc/explain/index-events.ts rename to packages/cli/src/rpc/explain/index/index-events.ts index 16c9042840..2c656381dd 100644 --- a/packages/cli/src/rpc/explain/index-events.ts +++ b/packages/cli/src/rpc/explain/index/index-events.ts @@ -1,9 +1,9 @@ -import { SnippetId, SnippetIndex } from '@appland/search'; +import { queryKeywords, SnippetId, SnippetIndex } from '@appland/search'; import { warn } from 'console'; import crypto from 'crypto'; -import { SearchResult } from '../../fulltext/appmap-match'; -import { ClassMapEntry, readIndexFile } from '../../fulltext/appmap-index'; +import { SearchResult } from '../../../fulltext/appmap-match'; +import { ClassMapEntry, readIndexFile } from '../../../fulltext/appmap-index'; function hexDigest(input: string): string { const hash = crypto.createHash('sha256'); @@ -26,15 +26,16 @@ async function indexAppMapEvents( } const indexCodeObject = (type: string, id: string, content: string, ...tags: string[]) => { - const words = [content, ...tags].join(' '); + const words = [content, ...tags]; + const wordList = queryKeywords(words); const snippetId: SnippetId = { type, id, }; - // TODO: Include event id in the snippet id - snippetIndex.indexSnippet(snippetId, directory, '', words, content); + // TODO: Include event id in the snippet id? + snippetIndex.indexSnippet(snippetId, directory, '', wordList.join(' '), content); }; const boostCodeObject = (location: string) => { @@ -61,7 +62,10 @@ async function indexAppMapEvents( if (id) indexCodeObject(cme.type, id, cme.name, ...tags); - if (cme.sourceLocation) boostCodeObject(cme.sourceLocation); + if (cme.sourceLocation) { + // TODO: Which event ids should this be associated with? + boostCodeObject(cme.sourceLocation); + } cme.children?.forEach((child) => { indexClassMapEntry(child); diff --git a/packages/cli/src/rpc/explain/index-files.ts b/packages/cli/src/rpc/explain/index/project-file-index.ts similarity index 52% rename from packages/cli/src/rpc/explain/index-files.ts rename to packages/cli/src/rpc/explain/index/project-file-index.ts index d49d4b09c5..d3403b8f81 100644 --- a/packages/cli/src/rpc/explain/index-files.ts +++ b/packages/cli/src/rpc/explain/index/project-file-index.ts @@ -4,6 +4,7 @@ import makeDebug from 'debug'; import { buildFileIndex, FileIndex, + FileSearchResult, fileTokens, FilterFn, isBinaryFile, @@ -12,9 +13,11 @@ import { listProjectFiles, readFileSafe, } from '@appland/search'; -import { fileNameMatchesFilterPatterns } from '../../fulltext/fileNameMatchesFilterPatterns'; +import { fileNameMatchesFilterPatterns } from '../../../fulltext/fileNameMatchesFilterPatterns'; -const debug = makeDebug('appmap:rpc:explain:index-files'); +import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; + +const debug = makeDebug('appmap:index:project-files'); function fileFilter( includePatterns: RegExp[] | undefined, @@ -40,7 +43,7 @@ function fileFilter( }; } -export default async function indexFiles( +async function indexFiles( db: sqlite3.Database, directories: string[], includePatterns: RegExp[] | undefined, @@ -53,3 +56,28 @@ export default async function indexFiles( return fileIndex; } + +export async function buildProjectFileIndex( + sourceDirectories: string[], + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined +): Promise> { + return await buildIndexInTempDir('files', async (indexFile) => { + const db = new sqlite3(indexFile); + return await indexFiles(db, sourceDirectories, includePatterns, excludePatterns); + }); +} + +export async function searchProjectFiles( + sourceDirectories: string[], + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined, + vectorTerms: string[] +): Promise { + const index = await buildProjectFileIndex(sourceDirectories, includePatterns, excludePatterns); + try { + return index.index.search(vectorTerms.join(' OR ')); + } finally { + index.close(); + } +} diff --git a/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts b/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts new file mode 100644 index 0000000000..9493e7d8cf --- /dev/null +++ b/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts @@ -0,0 +1,84 @@ +import sqlite3 from 'better-sqlite3'; +import { warn } from 'console'; + +import { ContextV2 } from '@appland/navie'; +import { + buildSnippetIndex, + FileSearchResult, + fileTokens, + langchainSplitter, + readFileSafe, + SnippetIndex, + SnippetSearchResult, +} from '@appland/search'; + +import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; +import indexEvents from './index-events'; +import { SearchResult } from '../../../fulltext/appmap-match'; + +export function snippetContextItem( + snippet: SnippetSearchResult +): ContextV2.ContextItem | ContextV2.FileContextItem | undefined { + const { snippetId, directory, score, content } = snippet; + + const { type: snippetIdType, id: snippetIdValue } = snippetId; + + let location: string | undefined; + if (snippetIdType === 'code-snippet') location = snippetIdValue; + + const eventIds: number[] = []; + + switch (snippetId.type) { + case 'query': + case 'route': + case 'external-route': + // TODO: Collect event ids from these. + return { + type: ContextV2.ContextItemType.DataRequest, + content, + directory, + score, + }; + case 'code-snippet': + // TODO: Collect event ids from these. + return { + type: ContextV2.ContextItemType.CodeSnippet, + content, + directory, + score, + location, + }; + default: + warn(`[search-context] Unknown snippet type: ${snippetId.type}`); + + // TODO: Collect all matching events, then build a sequence diagram + // case 'event': + // return await buildSequenceDiagram(snippet); + // default: + // codeSnippets.push(snippet); + } +} + +export async function buildProjectFileSnippetIndex( + fileSearchResults: FileSearchResult[], + appmapSearchResults: SearchResult[] +): Promise> { + const indexSnippets = async ( + db: sqlite3.Database, + fileSearchResults: FileSearchResult[] + ): Promise => { + const splitter = langchainSplitter; + + const snippetIndex = new SnippetIndex(db); + await buildSnippetIndex(snippetIndex, fileSearchResults, readFileSafe, splitter, fileTokens); + + return snippetIndex; + }; + + return buildIndexInTempDir('snippets', async (indexFile) => { + const db = new sqlite3(indexFile); + const snippetIndex = await indexSnippets(db, fileSearchResults); + await indexEvents(snippetIndex, appmapSearchResults); + return snippetIndex; + }); +} diff --git a/packages/cli/src/rpc/explain/lookupSourceCode.ts b/packages/cli/src/rpc/explain/lookupSourceCode.ts deleted file mode 100644 index 16b600b80a..0000000000 --- a/packages/cli/src/rpc/explain/lookupSourceCode.ts +++ /dev/null @@ -1,105 +0,0 @@ -import chalk from 'chalk'; -import { warn } from 'console'; -import { readFile } from 'fs/promises'; -import { glob } from 'glob'; - -import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; -import { exists, verbose } from '../../utils'; -import { promisify } from 'util'; -import parseLocation from './parseLocation'; - -export const LANGUAGE_BY_FILE_EXTENSION: Record = - { - '.js': 'js', - '.ts': 'js', - '.jsx': 'js', - '.tsx': 'js', - '.java': 'java', - '.py': 'python', - '.rb': 'ruby', - '.php': 'php', - }; - -// TODO: Look up different types of files -const scannedExtensions = new Set(); -const FILE_NAMES = new Set(); - -// TODO: Return source code up to the next location in the class map. -// TODO: Reverse-strip comment that follow the function. -export default async function lookupSourceCode( - directory: string, - location: string -): Promise { - const parsedLocation = parseLocation(location); - if (!parsedLocation) return; - - const [requestedFileName, lineNo] = parsedLocation; - - if (verbose()) warn(chalk.gray(`Looking up source code for ${location}`)); - - const extension = requestedFileName.slice(requestedFileName.lastIndexOf('.')); - - if (!scannedExtensions.has(extension)) { - scannedExtensions.add(extension); - // dot: true is present to include the .tox directory for Python - const fileNames = await promisify(glob)(`${directory}/**/*${extension}`, { - dot: true, - ignore: [ - '**/node_modules/**', - '**/vendor/**', - 'tmp/**', - '**/build/**', - '**/dist/**', - '**/target/**', - '**/.git/**', - ], - }); - if (verbose()) - warn(chalk.gray(`Found ${fileNames.length} files with extension "${extension}"`)); - for (const fileName of fileNames) { - FILE_NAMES.add(fileName); - } - } - - const candidates = Array.from(FILE_NAMES).filter((candidate) => - candidate.endsWith(requestedFileName) - ); - if (candidates.length === 0) { - warn(chalk.gray(`File not found in the workspace: ${requestedFileName}`)); - return; - } - candidates.sort((a, b) => a.length - b.length); - - const fileName = candidates[0]; - if (!(await exists(fileName))) { - warn(chalk.gray(`File ${fileName} does not exist`)); - return; - } - - const fileContent = await readFile(fileName, 'utf-8'); - if (!lineNo) return [fileContent]; - - if (lineNo <= 0) return [fileContent]; - - const fileExtension = fileName.slice(fileName.lastIndexOf('.')); - const language = LANGUAGE_BY_FILE_EXTENSION[fileExtension]; - let splitter: RecursiveCharacterTextSplitter; - if (language) { - splitter = RecursiveCharacterTextSplitter.fromLanguage(language, { - chunkOverlap: 0, - chunkSize: 500, - }); - } else { - splitter = new RecursiveCharacterTextSplitter({ - chunkOverlap: 0, - chunkSize: 250, - }); - } - - const chunks = await splitter.createDocuments([fileContent]); - const matches = chunks.filter( - (chunk) => chunk.metadata.loc.lines.from <= lineNo && chunk.metadata.loc.lines.to >= lineNo - ); - if (verbose()) warn(chalk.gray(`Obtained ${matches.length} source code chunks for ${location}`)); - return matches.map((match) => match.pageContent); -} diff --git a/packages/cli/src/rpc/explain/parseLocation.ts b/packages/cli/src/rpc/explain/parseLocation.ts deleted file mode 100644 index d237cdbf36..0000000000 --- a/packages/cli/src/rpc/explain/parseLocation.ts +++ /dev/null @@ -1,16 +0,0 @@ -import chalk from 'chalk'; -import { warn } from 'console'; - -export default function parseLocation(location: string): [string, number | undefined] | undefined { - if (!location.includes(':')) return [location, undefined]; - - const locationTest = /([^:]+):(\d+)$/.exec(location); - if (!locationTest) { - warn(chalk.gray(`Invalid location format: ${location}. Skipping file lookup.`)); - return; - } - - const [requestedFileName, lineNoStr] = locationTest.slice(1); - const lineNoReturned = lineNoStr ? parseInt(lineNoStr, 10) : undefined; - return [requestedFileName, lineNoReturned]; -} diff --git a/packages/cli/src/rpc/search/search.ts b/packages/cli/src/rpc/search/search.ts index ba4e67644a..95fd07dbbe 100644 --- a/packages/cli/src/rpc/search/search.ts +++ b/packages/cli/src/rpc/search/search.ts @@ -8,7 +8,7 @@ import { SearchResponse } from '../../fulltext/appmap-match'; import { search as searchAppMaps } from '../../fulltext/appmap-index'; import searchSingleAppMap from '../../cmds/search/searchSingleAppMap'; import configuration, { AppMapDirectory } from '../configuration'; -import buildIndex from '../explain/buildIndex'; +import buildIndexInTempDir from '../explain/index/build-index-in-temp-dir'; import { buildAppMapIndex } from '../../fulltext/appmap-index'; export const DEFAULT_MAX_DIAGRAMS = 3; @@ -59,7 +59,7 @@ export async function handler( } else { // Search across all AppMaps, creating a map from AppMap id to AppMapSearchResult const maxResults = options.maxDiagrams || options.maxResults || DEFAULT_MAX_DIAGRAMS; - const index = await buildIndex('appmaps', async (indexFile) => { + const index = await buildIndexInTempDir('appmaps', async (indexFile) => { const db = new sqlite3(indexFile); const fileIndex = new FileIndex(db); await buildAppMapIndex( diff --git a/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts b/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts index 622f1dc703..72218d0b30 100644 --- a/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts +++ b/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts @@ -1,5 +1,5 @@ import * as utils from '../../../src/utils'; -import UpToDate, { AppMapIndex } from '../../../src/lib/UpToDate'; +import UpToDate from '../../../src/lib/UpToDate'; import { PathLike } from 'fs'; import { join } from 'path'; import { FileIndex, FileSearchResult } from '@appland/search'; diff --git a/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts b/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts deleted file mode 100644 index ee2a1dcaf6..0000000000 --- a/packages/cli/tests/unit/rpc/explain/ContextCollector.spec.ts +++ /dev/null @@ -1,98 +0,0 @@ -import { ContextCollector } from '../../../../src/rpc/explain/collectContext'; -import * as SearchContextCollector from '../../../../src/rpc/explain/SearchContextCollector'; -import * as LocationContextCollector from '../../../../src/rpc/explain/LocationContextCollector'; -import * as navie from '@appland/navie'; -import Location from '../../../../src/rpc/explain/location'; - -jest.mock('@appland/navie'); -jest.mock('../../../../src/rpc/explain/SearchContextCollector'); -jest.mock('../../../../src/rpc/explain/LocationContextCollector'); - -describe('ContextCollector', () => { - const charLimit = 5000; - - beforeEach(() => { - jest.mocked(navie.applyContext).mockImplementation((context) => context); - }); - afterEach(() => jest.restoreAllMocks()); - - describe('vector term search', () => { - describe('with empty vector terms', () => { - it('returns an empty context', async () => { - const emptyVectorTerms = ['', ' ']; - - const contextCollector = new ContextCollector( - ['example'], - ['src'], - emptyVectorTerms, - charLimit - ); - const result = await contextCollector.collectContext(); - expect(result).toStrictEqual({ - searchResponse: { - results: [], - numResults: 0, - }, - context: [], - }); - - expect(SearchContextCollector.default).not.toHaveBeenCalled(); - expect(LocationContextCollector.default).not.toHaveBeenCalled(); - }); - }); - }); - - describe('with non-empty vector terms', () => { - it('invokes SearchContextCollector', async () => { - const vectorTerms = ['login', 'user']; - const contextCollector = new ContextCollector(['example'], ['src'], vectorTerms, charLimit); - - const searchConstructorSpy = jest.spyOn(SearchContextCollector, 'default'); - searchConstructorSpy.mockImplementation( - () => - ({ - collectContext: jest.fn().mockResolvedValue({ - searchResponse: { - results: [], - numResults: 0, - }, - context: [], - }), - } as unknown as SearchContextCollector.default) - ); - - await contextCollector.collectContext(); - expect(searchConstructorSpy).toHaveBeenCalledWith( - ['example'], - ['src'], - undefined, - vectorTerms, - charLimit - ); - }); - }); - describe('with locations specified', () => { - it('invokes LocationContextCollector', async () => { - const locations = ['file1.py']; - const contextCollector = new ContextCollector(['example'], ['src'], [], 0); - contextCollector.locations = locations.map((l) => Location.parse(l)) as Location[]; - - const locationConstructorSpy = jest.spyOn(LocationContextCollector, 'default'); - locationConstructorSpy.mockImplementation( - () => - ({ - collectContext: jest.fn().mockResolvedValue({ - searchResponse: { - results: [], - numResults: 0, - }, - context: [], - }), - } as unknown as LocationContextCollector.default) - ); - - await contextCollector.collectContext(); - expect(locationConstructorSpy).toHaveBeenCalledWith(['src'], contextCollector.locations); - }); - }); -}); diff --git a/packages/cli/tests/unit/rpc/explain/LocationContextCollector.spec.ts b/packages/cli/tests/unit/rpc/explain/LocationContextCollector.spec.ts deleted file mode 100644 index c818f4c1bf..0000000000 --- a/packages/cli/tests/unit/rpc/explain/LocationContextCollector.spec.ts +++ /dev/null @@ -1,90 +0,0 @@ -import * as fs from 'fs/promises'; -import * as utils from '../../../../src/utils'; - -import Location from '../../../../src/rpc/explain/location'; -import LocationContextCollector from '../../../../src/rpc/explain/LocationContextCollector'; - -jest.mock('fs/promises'); -// eslint-disable-next-line @typescript-eslint/no-unsafe-return -jest.mock('../../../../src/utils', () => ({ - ...jest.requireActual('../../../../src/utils'), - exists: jest.fn(), - isFile: jest.fn(), -})); - -describe('LocationContextCollector', () => { - const sourceDirectories = ['/src', '/lib']; - const locations: Location[] = [ - { path: 'file1.js', snippet: (contents: string) => contents.slice(0, 10) }, - { path: '/src/file2.js', snippet: (contents: string) => contents.slice(0, 10) }, - { path: '/other/file3.js', snippet: (contents: string) => contents.slice(0, 10) }, - ]; - - let collector: LocationContextCollector; - - beforeEach(() => (collector = new LocationContextCollector(sourceDirectories, locations))); - beforeEach(() => jest.resetAllMocks()); - - it('initializes correctly', () => { - expect(collector).toBeDefined(); - }); - - it('handles empty locations', async () => { - collector = new LocationContextCollector(sourceDirectories, []); - const result = await collector.collectContext(); - expect(result.context).toEqual([]); - expect(result.searchResponse.numResults).toBe(0); - }); - - it('handles valid locations', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); - jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); - - const result = await collector.collectContext(); - expect(result.context.length).toBe(4); - expect(result.context[0].content).toBe('file conte'); - expect(result.context[1].content).toBe('file conte'); - expect(result.context[2].content).toBe('file conte'); - expect(result.context[3].content).toBe('file conte'); - - expect(utils.exists).toHaveBeenCalledTimes(4); - expect(utils.exists).toHaveBeenCalledWith('/src/file1.js'); - expect(utils.exists).toHaveBeenCalledWith('/lib/file1.js'); - expect(utils.exists).toHaveBeenCalledWith('/src/file2.js'); - expect(utils.exists).toHaveBeenCalledWith('/other/file3.js'); - }); - - it('handles non-file locations', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(false); - - const result = await collector.collectContext(); - expect(result.context).toEqual([]); - }); - - it('handles non-existent files', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(false); - - const result = await collector.collectContext(); - expect(result.context).toEqual([]); - }); - - it('handles file reading errors', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); - jest.spyOn(fs, 'readFile').mockRejectedValue(new Error('Read error')); - - const result = await collector.collectContext(); - expect(result.context).toEqual([]); - }); - - it('extracts snippets correctly', async () => { - jest.spyOn(utils, 'exists').mockResolvedValue(true); - jest.spyOn(utils, 'isFile').mockResolvedValue(true); - jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); - - const result = await collector.collectContext(); - expect(result.context[0].content).toBe('file conte'); - }); -}); diff --git a/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts new file mode 100644 index 0000000000..79f3e3cfc0 --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/collect-context.spec.ts @@ -0,0 +1,129 @@ +import * as collectSearchContext from '../../../../src/rpc/explain/collect-search-context'; +import * as collectLocationContext from '../../../../src/rpc/explain/collect-location-context'; +import collectContext, { + buildContextRequest, + ContextRequest, +} from '../../../../src/rpc/explain/collect-context'; +import Location from '../../../../src/rpc/explain/location'; + +jest.mock('../../../../src/rpc/explain/collect-search-context'); +jest.mock('../../../../src/rpc/explain/collect-location-context'); +jest.mock('@appland/navie'); + +describe('collect-context', () => { + afterEach(() => jest.resetAllMocks()); + afterEach(() => jest.restoreAllMocks()); + + describe('buildContextRequest', () => { + it('builds a context request', () => { + const request = buildContextRequest( + ['appmap-dir'], + ['src'], + ['appmap-a', 'appmap-b'], + ['login', 'the', 'user'], + 5000, + {} + ); + expect(request).toEqual({ + vectorTerms: ['login', 'user'], + request: { + appmaps: ['appmap-a', 'appmap-b'], + excludePatterns: [ + /(^|[/\\])\.appmap([/\\]|$)/, + /(^|[/\\])\.navie([/\\]|$)/, + /(^|[/\\])\.yarn([/\\]|$)/, + /(^|[/\\])venv([/\\]|$)/, + /(^|[/\\])\.venv([/\\]|$)/, + /(^|[/\\])node_modules([/\\]|$)/, + /(^|[/\\])vendor([/\\]|$)/, + ], + }, + }); + }); + }); + + describe('collectContext', () => { + const charLimit = 5000; + + describe('with empty vector terms', () => { + it('returns an empty context', async () => { + const emptyVectorTerms = []; + + const result = await collectContext( + ['appmap-dir'], + ['src'], + charLimit, + emptyVectorTerms, + {} + ); + expect(result).toStrictEqual({ + searchResponse: { + results: [], + numResults: 0, + }, + context: [], + }); + expect(collectLocationContext.default).not.toHaveBeenCalled(); + }); + }); + + describe('with vector terms', () => { + const appmapDirectories = ['dir1', 'dir2']; + const sourceDirectories = ['src1', 'src2']; + const vectorTerms = ['term1', 'term2']; + + it('should process vector terms and char limit correctly', async () => { + (collectSearchContext.default as jest.Mock).mockResolvedValue({ + searchResponse: { results: [], numResults: 2 }, + context: ['context1', 'context2'], + }); + + const request = { locations: [] }; + const result = await collectContext( + appmapDirectories, + sourceDirectories, + charLimit, + vectorTerms, + request + ); + + expect(collectSearchContext.default).toHaveBeenCalledWith( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit, + request + ); + expect(collectLocationContext.default).not.toHaveBeenCalled(); + + expect(result.searchResponse.numResults).toBe(2); + expect(result.context).toEqual(['context1', 'context2']); + }); + }); + + describe('with locations specified', () => { + it('should process locations and char limit correctly', async () => { + (collectLocationContext.default as jest.Mock).mockResolvedValue(['context1', 'context2']); + + const request: ContextRequest = { + locations: [Location.parse('location1')!, Location.parse('location2')!], + }; + const result = await collectContext( + ['dir1', 'dir2'], + ['src1', 'src2'], + charLimit, + [], + request + ); + + expect(collectSearchContext.default).not.toHaveBeenCalled(); + expect(collectLocationContext.default).toHaveBeenCalledWith( + ['src1', 'src2'], + request.locations + ); + expect(result.searchResponse.numResults).toBe(0); + expect(result.context).toEqual(['context1', 'context2']); + }); + }); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts new file mode 100644 index 0000000000..0466fc5f75 --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/collect-location-context.spec.ts @@ -0,0 +1,88 @@ +import * as fs from 'fs/promises'; +import * as utils from '../../../../src/utils'; + +import Location from '../../../../src/rpc/explain/location'; +import collectLocationContext from '../../../../src/rpc/explain/collect-location-context'; + +jest.mock('fs/promises'); +// eslint-disable-next-line @typescript-eslint/no-unsafe-return +jest.mock('../../../../src/utils', () => ({ + ...jest.requireActual('../../../../src/utils'), + exists: jest.fn(), + isFile: jest.fn(), +})); + +describe('collectLocationContext', () => { + const sourceDirectories = ['/src', '/lib']; + + beforeEach(() => jest.resetAllMocks()); + + describe('with empty locations', () => { + it('handles empty locations', async () => { + const result = await collectLocationContext(sourceDirectories, []); + expect(result).toEqual([]); + }); + }); + + describe('with valid locations', () => { + const locations: Location[] = [ + { path: 'file1.js', snippet: (contents: string) => contents.slice(0, 10) }, + { path: '/src/file2.js', snippet: (contents: string) => contents.slice(0, 10) }, + { path: '/other/file3.js', snippet: (contents: string) => contents.slice(0, 10) }, + ]; + + const collect = async () => collectLocationContext(sourceDirectories, locations); + + it('handles valid locations', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(true); + jest.spyOn(utils, 'isFile').mockResolvedValue(true); + jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); + + const result = await collect(); + expect(result.length).toBe(4); + expect(result[0].content).toBe('file conte'); + expect(result[1].content).toBe('file conte'); + expect(result[2].content).toBe('file conte'); + expect(result[3].content).toBe('file conte'); + + expect(utils.exists).toHaveBeenCalledTimes(4); + expect(utils.exists).toHaveBeenCalledWith('/src/file1.js'); + expect(utils.exists).toHaveBeenCalledWith('/lib/file1.js'); + expect(utils.exists).toHaveBeenCalledWith('/src/file2.js'); + expect(utils.exists).toHaveBeenCalledWith('/other/file3.js'); + }); + + it('handles non-file locations', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(true); + jest.spyOn(utils, 'isFile').mockResolvedValue(false); + + const result = await collect(); + expect(result).toEqual([]); + }); + + it('handles non-existent files', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(false); + + const result = await collect(); + expect(result).toEqual([]); + }); + + it('handles file reading errors', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(true); + jest.spyOn(utils, 'isFile').mockResolvedValue(true); + jest.spyOn(fs, 'readFile').mockRejectedValue(new Error('Read error')); + + const result = await collect(); + expect(result).toEqual([]); + }); + + it('extracts snippets correctly', async () => { + jest.spyOn(utils, 'exists').mockResolvedValue(true); + jest.spyOn(utils, 'isFile').mockResolvedValue(true); + jest.spyOn(fs, 'readFile').mockResolvedValue('file contents'); + + const result = await collect(); + expect(result[0].content).toBe('file conte'); + }); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/collect-search-context.spec.ts b/packages/cli/tests/unit/rpc/explain/collect-search-context.spec.ts new file mode 100644 index 0000000000..723082716a --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/collect-search-context.spec.ts @@ -0,0 +1,151 @@ +/* eslint-disable @typescript-eslint/no-unsafe-return */ +import collectSearchContext from '../../../../src/rpc/explain/collect-search-context'; +import * as AppMapFileIndex from '../../../../src/rpc/explain/index/appmap-file-index'; +import * as ProjectFileIndex from '../../../../src/rpc/explain/index/project-file-index'; +import * as ProjectFileSnippetIndex from '../../../../src/rpc/explain/index/project-file-snippet-index'; +import { SnippetSearchResult } from '@appland/search'; + +jest.mock('../../../../src/rpc/explain/index/appmap-file-index.ts', () => ({ + ...jest.requireActual('../../../../src/rpc/explain/index/appmap-file-index.ts'), + searchAppMapFiles: jest.fn(), +})); + +jest.mock('../../../../src/rpc/explain/index/project-file-index.ts', () => ({ + ...jest.requireActual('../../../../src/rpc/explain/index/project-file-index.ts'), + searchProjectFiles: jest.fn(), +})); + +jest.mock('../../../../src/rpc/explain/index/project-file-snippet-index.ts', () => ({ + ...jest.requireActual('../../../../src/rpc/explain/index/project-file-snippet-index.ts'), + buildProjectFileSnippetIndex: jest.fn().mockResolvedValue({ + index: { + searchSnippets: jest.fn().mockReturnValue([]), + }, + close: jest.fn(), + }), +})); + +describe('collectSearchContext', () => { + const appmapDirectories = ['dir1', 'dir2']; + const sourceDirectories = ['src1', 'src2']; + const vectorTerms = ['term1', 'term2']; + const charLimit = 1000; + + it('should emit appmaps provided in the request', async () => { + const request = { appmaps: ['dir1/appmap1', 'dir2/appmap2'] }; + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit, + request + ); + + expect(result.searchResponse.numResults).toBe(request.appmaps.length); + expect(result.context).toEqual([]); + }); + + it('should search appmap files when appmaps are not provided', async () => { + (AppMapFileIndex.searchAppMapFiles as jest.Mock).mockResolvedValue({ + results: [{ appmap: 'appmap1', directory: 'dir1', score: 1 }], + stats: {}, + }); + + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit + ); + + expect(AppMapFileIndex.searchAppMapFiles as jest.Mock).toHaveBeenCalledWith( + appmapDirectories, + vectorTerms, + expect.any(Number) + ); + expect(result.searchResponse.numResults).toBe(1); + }); + + it('should process and handle data returned from search functions', async () => { + (AppMapFileIndex.searchAppMapFiles as jest.Mock).mockResolvedValue({ + results: [{ appmap: 'appmap1', directory: 'dir1', score: 1 }], + stats: {}, + }); + (ProjectFileIndex.searchProjectFiles as jest.Mock).mockResolvedValue([ + { file: 'file1', content: 'content1' }, + ]); + + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit + ); + + expect(result.searchResponse.numResults).toBe(1); + expect(result.context).toEqual([]); + }); + + it('should search project files and build snippet index', async () => { + (ProjectFileIndex.searchProjectFiles as jest.Mock).mockResolvedValue([ + { file: 'file1', content: 'content1' }, + ]); + + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit + ); + + expect(ProjectFileIndex.searchProjectFiles as jest.Mock).toHaveBeenCalledWith( + sourceDirectories, + undefined, + undefined, + vectorTerms + ); + expect(ProjectFileSnippetIndex.buildProjectFileSnippetIndex as jest.Mock).toHaveBeenCalled(); + expect(result.context).toEqual([]); + }); + + it('should continue gathering context to meet the char limit', async () => { + const item1: SnippetSearchResult = { + snippetId: { type: 'code-snippet', id: 'path1.py' }, + directory: 'src', + score: 1, + content: 'short', + }; + const item2: SnippetSearchResult = { + snippetId: { type: 'code-snippet', id: 'path2.py' }, + directory: 'src', + score: 0.9, + content: 'longer content to try and meet the char limit', + }; + const mockSearchSnippets = jest + .fn() + .mockReturnValueOnce([item1]) + .mockReturnValue([item1, item2]); + + (ProjectFileSnippetIndex.buildProjectFileSnippetIndex as jest.Mock).mockResolvedValue({ + index: { + searchSnippets: mockSearchSnippets, + }, + close: jest.fn(), + }); + + const result = await collectSearchContext( + appmapDirectories, + sourceDirectories, + vectorTerms, + charLimit + ); + + expect(mockSearchSnippets).toHaveBeenCalledTimes(3); + expect(result.context).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'code-snippet', location: 'path1.py' }), + expect.objectContaining({ type: 'code-snippet', location: 'path2.py' }), + ]) + ); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts b/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts new file mode 100644 index 0000000000..3d9bc58cd0 --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts @@ -0,0 +1,93 @@ +/* eslint-disable @typescript-eslint/no-unsafe-member-access */ +/* eslint-disable @typescript-eslint/no-explicit-any */ +/* eslint-disable @typescript-eslint/no-unsafe-return */ +import { SnippetIndex } from '@appland/search'; +import sqlite3 from 'better-sqlite3'; + +import indexEvents from '../../../../../src/rpc/explain/index/index-events'; +import { SearchResult } from '../../../../../src/fulltext/appmap-match'; +import * as AppMapIndex from '../../../../../src/fulltext/appmap-index'; + +jest.mock('../../../../../src/fulltext/appmap-index', () => ({ + ...jest.requireActual('../../../../../src/fulltext/appmap-index'), + readIndexFile: jest.fn(), +})); + +describe('index-events', () => { + describe('indexAppMapEvents', () => { + let db: sqlite3.Database; + let snippetIndex: SnippetIndex; + + beforeEach(() => (db = new sqlite3(':memory:'))); + beforeEach(() => (snippetIndex = new SnippetIndex(db))); + afterEach(() => db.close()); + + it('should index events', async () => { + const searchResults: SearchResult[] = [ + { + directory: 'tmp/appmap', + appmap: 'appmap1', + score: 1, + }, + ]; + + const classMap: AppMapIndex.ClassMapEntry[] = [ + { + type: 'package', + name: 'package1', + children: [ + { + type: 'class', + name: 'class1', + children: [ + { + type: 'function', + name: 'method1', + sourceLocation: 'path/to/file1:10', + children: [], + }, + ], + }, + ], + }, + { + type: 'query', + name: 'SELECT * FROM table1', + children: [], + }, + { + type: 'route', + name: '/api/endpoint', + children: [], + }, + { + type: 'external-route', + name: 'GET https://example.com/api/endpoint', + children: [], + }, + ]; + + (AppMapIndex.readIndexFile as jest.Mock).mockResolvedValue(classMap); + + await indexEvents(snippetIndex, searchResults); + + const rows = db.prepare('SELECT * FROM snippet_content ORDER BY snippet_id').all(); + expect(rows.map((r) => (r as any).snippet_id)).toEqual([ + 'external-route:GET https://example.com/api/endpoint', + 'query:c78f4ded2dcc9714feb709a35c86af4727eef18d0eb90fe89c6b13b66977b7b1', + 'route:/api/endpoint', + ]); + + expect(rows.map((r) => (r as any).file_words)).toEqual([ + 'get https example com api endpoint route request client http', + 'select table1 sql query database', + 'api endpoint route request server http', + ]); + + const boostRows = db.prepare('SELECT * FROM snippet_boost ORDER BY snippet_id').all(); + expect(boostRows.map((r) => (r as any).snippet_id)).toEqual([ + 'code-snippet:path/to/file1:10', + ]); + }); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts b/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts new file mode 100644 index 0000000000..2c725291db --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts @@ -0,0 +1,242 @@ +/* eslint-disable @typescript-eslint/no-unsafe-return */ +import { join } from 'path'; + +import * as search from '@appland/search'; + +import { + buildProjectFileSnippetIndex, + snippetContextItem, +} from '../../../../../src/rpc/explain/index/project-file-snippet-index'; +import * as AppMapIndex from '../../../../../src/fulltext/appmap-index'; +import { CloseableIndex } from '../../../../../src/rpc/explain/index/build-index-in-temp-dir'; +import { SearchResult } from '../../../../../src/fulltext/appmap-match'; + +jest.mock('@appland/search', () => ({ + ...jest.requireActual('@appland/search'), + readFileSafe: jest.fn(), +})); + +jest.mock('../../../../../src/fulltext/appmap-index', () => ({ + ...jest.requireActual('../../../../../src/fulltext/appmap-index'), + readIndexFile: jest.fn(), +})); + +describe('project-file-snippet-index', () => { + beforeEach(() => jest.restoreAllMocks()); + beforeEach(() => jest.resetAllMocks()); + + describe('snippetContextItem', () => { + describe('query', () => { + it('should return a snippet context item', () => { + const snippet = { + snippetId: { type: 'query', id: 'the-query' }, + directory: 'a', + score: 1, + content: 'content', + }; + const result = snippetContextItem(snippet); + expect(result).toEqual({ + type: 'data-request', + content: 'content', + directory: 'a', + score: 1, + }); + }); + }); + describe('route', () => { + it('should return a snippet context item', () => { + const snippet = { + snippetId: { type: 'route', id: 'the-route' }, + directory: 'a', + score: 1, + content: 'content', + }; + const result = snippetContextItem(snippet); + expect(result).toEqual({ + type: 'data-request', + content: 'content', + directory: 'a', + score: 1, + }); + }); + }); + describe('external-route', () => { + it('should return a snippet context item', () => { + const snippet = { + snippetId: { type: 'external-route', id: 'the-route' }, + directory: 'a', + score: 1, + content: 'content', + }; + const result = snippetContextItem(snippet); + expect(result).toEqual({ + type: 'data-request', + content: 'content', + directory: 'a', + score: 1, + }); + }); + }); + describe('code-snippet', () => { + it('should return a snippet context item', () => { + const snippet = { + snippetId: { type: 'code-snippet', id: 'path/to/item.py:1-3' }, + directory: 'a', + score: 1, + content: 'content', + }; + const result = snippetContextItem(snippet); + expect(result).toEqual({ + type: 'code-snippet', + content: 'content', + directory: 'a', + score: 1, + location: 'path/to/item.py:1-3', + }); + }); + }); + }); + + describe('buildProjectFileSnippetIndex', () => { + let index: CloseableIndex; + + afterEach(() => index?.close()); + + it('should build a snippet index', async () => { + (search.readFileSafe as jest.Mock).mockImplementation((path: string) => { + if (path === 'a/path/to/item.py') return Promise.resolve('def item():\n return 42\n'); + + if (path === 'b/path/to/another.py') + return Promise.resolve('def another():\n return 21\n'); + + throw new Error(`Unexpected path: ${path}`); + }); + + const fileSearchResults: search.FileSearchResult[] = [ + { + directory: 'a', + filePath: 'path/to/item.py', + score: 1.0, + }, + { + directory: 'b', + filePath: 'path/to/another.py', + score: 1.0, + }, + ]; + const appmapSearchResults = []; + + index = await buildProjectFileSnippetIndex(fileSearchResults, appmapSearchResults); + + expect(index).toBeDefined(); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledTimes(2); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledWith(join('a', 'path/to/item.py')); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledWith( + join('b', 'path/to/another.py') + ); + + const result = index.index.searchSnippets('item', 10); + expect(result).toHaveLength(1); + expect(result[0].content).toEqual('def item():\n return 42'); + }); + + describe('indexing AppMap data requests', () => { + it('indexes a query', async () => { + const classMap: AppMapIndex.ClassMapEntry[] = [ + { + type: 'query', + name: 'SELECT * FROM table1', + children: [], + }, + ]; + (AppMapIndex.readIndexFile as jest.Mock).mockResolvedValue(classMap); + + const fileSearchResults: search.FileSearchResult[] = []; + const appmapSearchResults: SearchResult[] = [ + { + appmap: 'path/to/appmap_1.appmap.json', + directory: 'dir1', + score: 1.0, + }, + ]; + + index = await buildProjectFileSnippetIndex(fileSearchResults, appmapSearchResults); + + expect(AppMapIndex.readIndexFile as jest.Mock).toHaveBeenCalledTimes(1); + expect(AppMapIndex.readIndexFile as jest.Mock).toHaveBeenCalledWith( + 'path/to/appmap_1', + 'classMap' + ); + + const result = index.index.searchSnippets('table1', 10); + expect(result).toHaveLength(1); + expect(result[0].content).toEqual('SELECT * FROM table1'); + }); + + it('boosts a code snippet', async () => { + const classMap: AppMapIndex.ClassMapEntry[] = [ + { + type: 'package', + name: 'package1', + children: [ + { + type: 'function', + name: 'func1', + sourceLocation: 'path/to/func1.py:1', + children: [], + }, + ], + }, + ]; + + (AppMapIndex.readIndexFile as jest.Mock).mockResolvedValue(classMap); + (search.readFileSafe as jest.Mock).mockImplementation((path: string) => { + if (path === 'path/to/func1.py') return Promise.resolve('def myfunc():\n return 42\n'); + + if (path === 'path/to/func2.py') return Promise.resolve('def myfunc():\n return 21\n'); + + throw new Error(`Unexpected path: ${path}`); + }); + + const fileSearchResults: search.FileSearchResult[] = [ + { + directory: 'path/to', + filePath: 'func1.py', + score: 1.0, + }, + { + directory: 'path/to', + filePath: 'func2.py', + score: 1.0, + }, + ]; + const appmapSearchResults: SearchResult[] = [ + { + appmap: 'path/to/appmap_1.appmap.json', + directory: 'dir1', + score: 1.0, + }, + ]; + + index = await buildProjectFileSnippetIndex(fileSearchResults, appmapSearchResults); + + expect(AppMapIndex.readIndexFile as jest.Mock).toHaveBeenCalledTimes(1); + expect(AppMapIndex.readIndexFile as jest.Mock).toHaveBeenCalledWith( + 'path/to/appmap_1', + 'classMap' + ); + + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledTimes(2); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledWith(join('path/to/func1.py')); + expect(search.readFileSafe as jest.Mock).toHaveBeenCalledWith(join('path/to/func2.py')); + + const result = index.index.searchSnippets('myfunc', 10); + expect(result).toHaveLength(2); + expect(result[0].snippetId).toEqual({ type: 'code-snippet', id: 'path/to/func1.py:1' }); + expect(result[1].snippetId).toEqual({ type: 'code-snippet', id: 'path/to/func2.py:1' }); + // Row 0 should have approximately twice the score of row 1 + expect(result[1].score * 2).toBeCloseTo(result[0].score); + }); + }); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/pattern.spec.ts b/packages/cli/tests/unit/rpc/explain/pattern.spec.ts index e725818969..53694ef18f 100644 --- a/packages/cli/tests/unit/rpc/explain/pattern.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/pattern.spec.ts @@ -1,4 +1,4 @@ -import { buildExclusionPattern } from '../../../../src/rpc/explain/collectContext'; +import { buildExclusionPattern } from '../../../../src/rpc/explain/collect-context'; describe('Regex patterns', () => { const EXCLUDE_DOT_APPMAP_DIR = buildExclusionPattern('.appmap'); From ffaa8115844064f76c95ad3cd3d04e553b5eee39 Mon Sep 17 00:00:00 2001 From: Kevin Gilpin Date: Thu, 21 Nov 2024 11:40:38 -0500 Subject: [PATCH 12/12] refactor: Relocate code from fulltext to explain/index --- packages/cli/src/cmds/search/search.ts | 4 +- packages/cli/src/fulltext/FindEvents.ts | 2 +- packages/cli/src/fulltext/ref.ts | 14 --- .../src/rpc/explain/collect-search-context.ts | 2 +- packages/cli/src/rpc/explain/fileFilter.ts | 29 +++++ packages/cli/src/rpc/explain/index-files.ts | 55 ++++++++ .../rpc/explain/index/appmap-file-index.ts | 4 +- .../explain/index}/appmap-index.ts | 4 +- .../explain/index}/appmap-match.ts | 4 +- .../explain/index/filter-patterns.ts} | 0 .../cli/src/rpc/explain/index/index-events.ts | 10 +- .../rpc/explain/index/project-file-index.ts | 2 +- .../index/project-file-snippet-index.ts | 3 +- .../textSearchResultToRpcSearchResult.ts | 15 +++ packages/cli/src/rpc/search/search.ts | 6 +- .../cli/tests/unit/readAppMapContent.spec.ts | 42 +++++++ .../unit/rpc/explain/EventCollector.spec.ts | 119 ------------------ .../appmap-index.readAppMapContent.spec.ts | 2 +- .../index}/appmap-index.search.spec.ts | 12 +- .../rpc/explain/index/appmap-index.spec.ts | 90 +++++++++++++ .../rpc/explain/index/index-events.spec.ts | 8 +- .../index/project-file-snippet-index.spec.ts | 8 +- 22 files changed, 270 insertions(+), 165 deletions(-) delete mode 100644 packages/cli/src/fulltext/ref.ts create mode 100644 packages/cli/src/rpc/explain/fileFilter.ts create mode 100644 packages/cli/src/rpc/explain/index-files.ts rename packages/cli/src/{fulltext => rpc/explain/index}/appmap-index.ts (97%) rename packages/cli/src/{fulltext => rpc/explain/index}/appmap-match.ts (98%) rename packages/cli/src/{fulltext/fileNameMatchesFilterPatterns.ts => rpc/explain/index/filter-patterns.ts} (100%) create mode 100644 packages/cli/src/rpc/explain/textSearchResultToRpcSearchResult.ts create mode 100644 packages/cli/tests/unit/readAppMapContent.spec.ts delete mode 100644 packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts rename packages/cli/tests/unit/{fulltext => rpc/explain/index}/appmap-index.readAppMapContent.spec.ts (96%) rename packages/cli/tests/unit/{fulltext => rpc/explain/index}/appmap-index.search.spec.ts (93%) create mode 100644 packages/cli/tests/unit/rpc/explain/index/appmap-index.spec.ts diff --git a/packages/cli/src/cmds/search/search.ts b/packages/cli/src/cmds/search/search.ts index b4f94b411e..0cf5d1f634 100644 --- a/packages/cli/src/cmds/search/search.ts +++ b/packages/cli/src/cmds/search/search.ts @@ -9,13 +9,13 @@ import { FileIndex } from '@appland/search'; import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory'; import { verbose } from '../../utils'; import searchSingleAppMap, { SearchOptions as SingleSearchOptions } from './searchSingleAppMap'; -import { SearchResponse as DiagramsSearchResponse } from '../../fulltext/appmap-match'; +import { SearchResponse as DiagramsSearchResponse } from '../../rpc/explain/index/appmap-match'; import { SearchResult as EventSearchResult, SearchResponse as EventSearchResponse, } from '../../fulltext/FindEvents'; import { openInBrowser } from '../open/openers'; -import { buildAppMapIndex, search } from '../../fulltext/appmap-index'; +import { buildAppMapIndex, search } from '../../rpc/explain/index/appmap-index'; import buildIndexInTempDir from '../../rpc/explain/index/build-index-in-temp-dir'; export const command = 'search '; diff --git a/packages/cli/src/fulltext/FindEvents.ts b/packages/cli/src/fulltext/FindEvents.ts index 5ce3b4d967..ca47d18829 100644 --- a/packages/cli/src/fulltext/FindEvents.ts +++ b/packages/cli/src/fulltext/FindEvents.ts @@ -7,7 +7,7 @@ import assert from 'assert'; import { verbose } from '../utils'; import { collectParameters } from './collectParameters'; -import { fileNameMatchesFilterPatterns } from './fileNameMatchesFilterPatterns'; +import { fileNameMatchesFilterPatterns } from '../rpc/explain/index/filter-patterns'; type IndexItem = { fqid: string; diff --git a/packages/cli/src/fulltext/ref.ts b/packages/cli/src/fulltext/ref.ts deleted file mode 100644 index 6e73384cb9..0000000000 --- a/packages/cli/src/fulltext/ref.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { join } from 'path'; - -export function packRef(directory: string, appmapId: string): string { - return JSON.stringify({ directory, appmapId }); -} - -export function refToAppMapDir(ref: string): string { - const { directory, appmapId } = unpackRef(ref); - return join(directory, appmapId); -} - -export function unpackRef(ref: string): { directory: string; appmapId: string } { - return JSON.parse(ref); -} diff --git a/packages/cli/src/rpc/explain/collect-search-context.ts b/packages/cli/src/rpc/explain/collect-search-context.ts index c0f2f75843..60bc4015f2 100644 --- a/packages/cli/src/rpc/explain/collect-search-context.ts +++ b/packages/cli/src/rpc/explain/collect-search-context.ts @@ -4,7 +4,7 @@ import { ContextV2, applyContext } from '@appland/navie'; import { SearchRpc } from '@appland/rpc'; import { DEFAULT_MAX_DIAGRAMS } from '../search/search'; -import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match'; +import { SearchResponse as AppMapSearchResponse } from './index/appmap-match'; import { searchAppMapFiles } from './index/appmap-file-index'; import { searchProjectFiles } from './index/project-file-index'; import { diff --git a/packages/cli/src/rpc/explain/fileFilter.ts b/packages/cli/src/rpc/explain/fileFilter.ts new file mode 100644 index 0000000000..c144bd5434 --- /dev/null +++ b/packages/cli/src/rpc/explain/fileFilter.ts @@ -0,0 +1,29 @@ +import { FilterFn, isBinaryFile, isDataFile, isLargeFile } from '@appland/search'; +import makeDebug from 'debug'; +import { fileNameMatchesFilterPatterns } from './index/filter-patterns'; + +const debug = makeDebug('appmap:rpc:explain:file-filter'); + +export default function fileFilter( + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined +): FilterFn { + return async (path: string) => { + debug('Filtering: %s', path); + if (isBinaryFile(path)) { + debug('Skipping binary file: %s', path); + return false; + } + + const includeFile = fileNameMatchesFilterPatterns(path, includePatterns, excludePatterns); + if (!includeFile) return false; + + const isData = isDataFile(path); + if (isData && (await isLargeFile(path))) { + debug('Skipping large data file: %s', path); + return false; + } + + return true; + }; +} diff --git a/packages/cli/src/rpc/explain/index-files.ts b/packages/cli/src/rpc/explain/index-files.ts new file mode 100644 index 0000000000..c904faf021 --- /dev/null +++ b/packages/cli/src/rpc/explain/index-files.ts @@ -0,0 +1,55 @@ +import sqlite3 from 'better-sqlite3'; +import makeDebug from 'debug'; + +import { + buildFileIndex, + FileIndex, + fileTokens, + FilterFn, + isBinaryFile, + isDataFile, + isLargeFile, + listProjectFiles, + readFileSafe, +} from '@appland/search'; +import { fileNameMatchesFilterPatterns } from './index/filter-patterns'; + +const debug = makeDebug('appmap:rpc:explain:index-files'); + +function fileFilter( + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined +): FilterFn { + return async (path: string) => { + debug('Filtering: %s', path); + if (isBinaryFile(path)) { + debug('Skipping binary file: %s', path); + return false; + } + + const includeFile = fileNameMatchesFilterPatterns(path, includePatterns, excludePatterns); + if (!includeFile) return false; + + const isData = isDataFile(path); + if (isData && (await isLargeFile(path))) { + debug('Skipping large data file: %s', path); + return false; + } + + return true; + }; +} + +export default async function indexFiles( + db: sqlite3.Database, + directories: string[], + includePatterns: RegExp[] | undefined, + excludePatterns: RegExp[] | undefined +): Promise { + const fileIndex = new FileIndex(db); + + const filter = fileFilter(includePatterns, excludePatterns); + await buildFileIndex(fileIndex, directories, listProjectFiles, filter, readFileSafe, fileTokens); + + return fileIndex; +} diff --git a/packages/cli/src/rpc/explain/index/appmap-file-index.ts b/packages/cli/src/rpc/explain/index/appmap-file-index.ts index 0e16c43203..e49dc74fa6 100644 --- a/packages/cli/src/rpc/explain/index/appmap-file-index.ts +++ b/packages/cli/src/rpc/explain/index/appmap-file-index.ts @@ -3,8 +3,8 @@ import sqlite3 from 'better-sqlite3'; import { FileIndex } from '@appland/search'; import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; -import { buildAppMapIndex, search } from '../../../fulltext/appmap-index'; -import { SearchResponse } from '../../../fulltext/appmap-match'; +import { buildAppMapIndex, search } from './appmap-index'; +import { SearchResponse } from './appmap-match'; export async function buildAppMapFileIndex( appmapDirectories: string[] diff --git a/packages/cli/src/fulltext/appmap-index.ts b/packages/cli/src/rpc/explain/index/appmap-index.ts similarity index 97% rename from packages/cli/src/fulltext/appmap-index.ts rename to packages/cli/src/rpc/explain/index/appmap-index.ts index 3050141965..a29221bd75 100644 --- a/packages/cli/src/fulltext/appmap-index.ts +++ b/packages/cli/src/rpc/explain/index/appmap-index.ts @@ -5,7 +5,7 @@ import { readFile } from 'fs/promises'; import { Metadata } from '@appland/models'; import { buildFileIndex, FileIndex, fileTokens } from '@appland/search'; -import { findFiles, isNodeError, verbose } from '../utils'; +import { findFiles, isNodeError, verbose } from '../../../utils'; import { downscoreOutOfDateMatches, Match, @@ -14,7 +14,7 @@ import { scoreMatches, SearchResponse, } from './appmap-match'; -import loadAppMapConfig from '../lib/loadAppMapConfig'; +import loadAppMapConfig from '../../../lib/loadAppMapConfig'; export type ClassMapEntry = { name: string; diff --git a/packages/cli/src/fulltext/appmap-match.ts b/packages/cli/src/rpc/explain/index/appmap-match.ts similarity index 98% rename from packages/cli/src/fulltext/appmap-match.ts rename to packages/cli/src/rpc/explain/index/appmap-match.ts index 4dc104f47c..eb2d8d5094 100644 --- a/packages/cli/src/fulltext/appmap-match.ts +++ b/packages/cli/src/rpc/explain/index/appmap-match.ts @@ -1,5 +1,5 @@ -import UpToDate from '../lib/UpToDate'; -import { exists } from '../utils'; +import UpToDate from '../../../lib/UpToDate'; +import { exists } from '../../../utils'; import makeDebug from 'debug'; diff --git a/packages/cli/src/fulltext/fileNameMatchesFilterPatterns.ts b/packages/cli/src/rpc/explain/index/filter-patterns.ts similarity index 100% rename from packages/cli/src/fulltext/fileNameMatchesFilterPatterns.ts rename to packages/cli/src/rpc/explain/index/filter-patterns.ts diff --git a/packages/cli/src/rpc/explain/index/index-events.ts b/packages/cli/src/rpc/explain/index/index-events.ts index 2c656381dd..76ae35ecb9 100644 --- a/packages/cli/src/rpc/explain/index/index-events.ts +++ b/packages/cli/src/rpc/explain/index/index-events.ts @@ -2,8 +2,8 @@ import { queryKeywords, SnippetId, SnippetIndex } from '@appland/search'; import { warn } from 'console'; import crypto from 'crypto'; -import { SearchResult } from '../../../fulltext/appmap-match'; -import { ClassMapEntry, readIndexFile } from '../../../fulltext/appmap-index'; +import { SearchResult } from './appmap-match'; +import { ClassMapEntry, readIndexFile } from './appmap-index'; function hexDigest(input: string): string { const hash = crypto.createHash('sha256'); @@ -51,6 +51,12 @@ async function indexAppMapEvents( let tags: string[] = []; if (cme.type === 'query') { id = hexDigest(cme.name); + // TODO: We really want an event id for this code object. + // TODO: Include an index file that maps fqids to event ids? + // sequence.json does have the fqid -> event id mapping, but it's not + // in the index by default. + // TODO: Can we just link over to the appmap by fqid? + // Yes it can definitely be done. tags = ['sql', 'query', 'database']; } else if (cme.type === 'route') { id = cme.name; diff --git a/packages/cli/src/rpc/explain/index/project-file-index.ts b/packages/cli/src/rpc/explain/index/project-file-index.ts index d3403b8f81..b68d4589e1 100644 --- a/packages/cli/src/rpc/explain/index/project-file-index.ts +++ b/packages/cli/src/rpc/explain/index/project-file-index.ts @@ -13,7 +13,7 @@ import { listProjectFiles, readFileSafe, } from '@appland/search'; -import { fileNameMatchesFilterPatterns } from '../../../fulltext/fileNameMatchesFilterPatterns'; +import { fileNameMatchesFilterPatterns } from './filter-patterns'; import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; diff --git a/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts b/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts index 9493e7d8cf..2a1bd8685f 100644 --- a/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts +++ b/packages/cli/src/rpc/explain/index/project-file-snippet-index.ts @@ -14,7 +14,7 @@ import { import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir'; import indexEvents from './index-events'; -import { SearchResult } from '../../../fulltext/appmap-match'; +import { SearchResult } from './appmap-match'; export function snippetContextItem( snippet: SnippetSearchResult @@ -38,6 +38,7 @@ export function snippetContextItem( content, directory, score, + location: appmapLocation(result.appmap, eventId), }; case 'code-snippet': // TODO: Collect event ids from these. diff --git a/packages/cli/src/rpc/explain/textSearchResultToRpcSearchResult.ts b/packages/cli/src/rpc/explain/textSearchResultToRpcSearchResult.ts new file mode 100644 index 0000000000..f3d2e96625 --- /dev/null +++ b/packages/cli/src/rpc/explain/textSearchResultToRpcSearchResult.ts @@ -0,0 +1,15 @@ +import { SearchRpc } from '@appland/rpc'; +import { SearchResult as EventSearchResult } from '../../fulltext/FindEvents'; + +export function textSearchResultToRpcSearchResult( + eventResult: EventSearchResult +): SearchRpc.EventMatch { + const result: SearchRpc.EventMatch = { + fqid: eventResult.fqid, + score: eventResult.score, + eventIds: eventResult.eventIds, + }; + if (eventResult.location) result.location = eventResult.location; + if (eventResult.elapsed) result.elapsed = eventResult.elapsed; + return result; +} diff --git a/packages/cli/src/rpc/search/search.ts b/packages/cli/src/rpc/search/search.ts index 95fd07dbbe..dc37063271 100644 --- a/packages/cli/src/rpc/search/search.ts +++ b/packages/cli/src/rpc/search/search.ts @@ -4,12 +4,12 @@ import { FileIndex } from '@appland/search'; import { SearchRpc } from '@appland/rpc'; import { RpcHandler } from '../rpc'; -import { SearchResponse } from '../../fulltext/appmap-match'; -import { search as searchAppMaps } from '../../fulltext/appmap-index'; +import { SearchResponse } from '../explain/index/appmap-match'; +import { search as searchAppMaps } from '../explain/index/appmap-index'; import searchSingleAppMap from '../../cmds/search/searchSingleAppMap'; import configuration, { AppMapDirectory } from '../configuration'; import buildIndexInTempDir from '../explain/index/build-index-in-temp-dir'; -import { buildAppMapIndex } from '../../fulltext/appmap-index'; +import { buildAppMapIndex } from '../explain/index/appmap-index'; export const DEFAULT_MAX_DIAGRAMS = 3; export const DEFAULT_MAX_EVENTS_PER_DIAGRAM = 100; diff --git a/packages/cli/tests/unit/readAppMapContent.spec.ts b/packages/cli/tests/unit/readAppMapContent.spec.ts new file mode 100644 index 0000000000..4a64da2508 --- /dev/null +++ b/packages/cli/tests/unit/readAppMapContent.spec.ts @@ -0,0 +1,42 @@ +import { vol } from 'memfs'; +import { readAppMapContent } from '../../src/rpc/explain/index/appmap-index'; +import { Metadata } from '@appland/models'; + +jest.mock('fs/promises', () => require('memfs').promises); + +describe('readAppMapContent', () => { + beforeEach(() => { + vol.reset(); + }); + + it('reads appmap content from index files', async () => { + const appmapName = '/appmaps/testAppMap'; + const metadata: Metadata = { + name: 'Test AppMap', + labels: ['test', 'appmap'], + exception: { class: 'Exception', message: 'Test exception' }, + client: { name: 'Test client', version: '1.0.0', url: 'http://test.com' }, + recorder: { name: 'Test recorder' }, + }; + const classMap = [ + { name: 'query1', type: 'query', labels: [], children: [] }, + { name: 'route1', type: 'route', labels: [], children: [] }, + ]; + + vol.fromJSON({ + [`${appmapName}/metadata.json`]: JSON.stringify(metadata), + [`${appmapName}/classMap.json`]: JSON.stringify(classMap), + [`${appmapName}/canonical.parameters.json`]: JSON.stringify(['param1', 'param2']), + }); + + const content = await readAppMapContent(`${appmapName}.appmap.json`); + expect(content).toContain('Test AppMap'); + expect(content).toContain('test'); + expect(content).toContain('appmap'); + expect(content).toContain('Test exception'); + expect(content).toContain('query1'); + expect(content).toContain('route1'); + expect(content).toContain('param1'); + expect(content).toContain('param2'); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts b/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts deleted file mode 100644 index 2f1a34e345..0000000000 --- a/packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts +++ /dev/null @@ -1,119 +0,0 @@ -import { SearchRpc } from '@appland/rpc'; -import { join } from 'path'; - -import { textSearchResultToRpcSearchResult } from '../../../../src/rpc/explain/collectContext'; -import buildContext from '../../../../src/rpc/explain/buildContext'; -import { SearchResponse as AppMapSearchResponse } from '../../../../src/fulltext/appmap-match'; -import FindEvents, { - SearchResponse as EventSearchResponse, -} from '../../../../src/fulltext/FindEvents'; -import EventCollector from '../../../../src/rpc/explain/EventCollector'; - -jest.mock('../../../../src/fulltext/FindEvents'); -jest.mock('../../../../src/rpc/explain/buildContext'); - -describe('EventCollector', () => { - const mockFindEventsResponses: EventSearchResponse[] = [ - { - type: 'event', - numResults: 1, - results: [ - { - appmap: 'appMapId1', - fqid: 'testFqid1', - score: 1, - eventIds: [1, 2, 3], - }, - ], - }, - { - type: 'event', - numResults: 1, - results: [ - { - appmap: 'appMapId2', - fqid: 'testFqid2', - score: 1, - eventIds: [2, 3, 4], - }, - ], - }, - ]; - - const oneSearchResponse: AppMapSearchResponse = { - type: 'appmap', - numResults: 1, - stats: { max: 1, mean: 1, median: 1, stddev: 0 }, - results: [{ appmap: 'appMapId1', directory: 'a', score: 1 }], - }; - - const multiSearchResponse: AppMapSearchResponse = { - type: 'appmap', - numResults: 2, // Indicating two appmaps are present - stats: { max: 1, mean: 1, median: 1, stddev: 0 }, - results: [ - { appmap: 'appMapId1', directory: 'a', score: 1 }, - { appmap: 'appMapId2', directory: 'b', score: 1 }, - ], - }; - - beforeEach(() => { - jest.mocked(FindEvents).prototype.initialize.mockResolvedValue(); - let mockFindEventsResponsesCopy = [...mockFindEventsResponses]; - jest - .mocked(FindEvents) - .prototype.search.mockImplementation(() => mockFindEventsResponsesCopy.shift()!); - jest.mocked(buildContext).mockResolvedValue([]); - }); - afterEach(() => jest.resetAllMocks()); - - it('correctly initializes and indexes app maps', async () => { - const collector = new EventCollector('query', oneSearchResponse); - await collector.collectEvents(10); - - const appmap = join('a', 'appMapId1'); - expect(FindEvents).toHaveBeenCalledWith(appmap); - expect(FindEvents.prototype.initialize).toHaveBeenCalled(); - expect(collector.appmapIndexes.has(appmap)).toBe(true); - }); - - it('collects events based on provided maxEvents', async () => { - const maxEvents = 10; - const collector = new EventCollector('query', oneSearchResponse); - const collectedData = await collector.collectEvents(maxEvents); - - expect(FindEvents.prototype.search).toHaveBeenCalledWith('query', { maxResults: maxEvents }); - expect(buildContext).toHaveBeenCalled(); - expect(collectedData.results[0].events).toEqual( - mockFindEventsResponses[0].results.map(textSearchResultToRpcSearchResult) - ); - }); - - it('collects events from multiple appmaps', async () => { - const maxEvents = 10; - const collector = new EventCollector('query', multiSearchResponse); - const collectedData = await collector.collectEvents(maxEvents); - - // Assume the findEvents method provides merged results from multiple appmaps - const expectedResponse: SearchRpc.SearchResponse = { - numResults: 2, - results: [ - { - appmap: join('a', 'appMapId1'), - directory: 'a', - score: 1, - events: mockFindEventsResponses[0].results.map(textSearchResultToRpcSearchResult), - }, - { - appmap: join('b', 'appMapId2'), - directory: 'b', - score: 1, - events: mockFindEventsResponses[1].results.map(textSearchResultToRpcSearchResult), - }, - ], - }; - - expect(FindEvents.prototype.search).toHaveBeenCalledTimes(multiSearchResponse.numResults); - expect(collectedData.results).toEqual(expectedResponse.results); - }); -}); diff --git a/packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts b/packages/cli/tests/unit/rpc/explain/index/appmap-index.readAppMapContent.spec.ts similarity index 96% rename from packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts rename to packages/cli/tests/unit/rpc/explain/index/appmap-index.readAppMapContent.spec.ts index 96a0ac4a22..b51c47be4b 100644 --- a/packages/cli/tests/unit/fulltext/appmap-index.readAppMapContent.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/index/appmap-index.readAppMapContent.spec.ts @@ -1,5 +1,5 @@ import { vol } from 'memfs'; -import { readAppMapContent } from '../../../src/fulltext/appmap-index'; +import { readAppMapContent } from '../../../../../src/rpc/explain/index/appmap-index'; import { Metadata } from '@appland/models'; jest.mock('fs/promises', () => require('memfs').promises); diff --git a/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts b/packages/cli/tests/unit/rpc/explain/index/appmap-index.search.spec.ts similarity index 93% rename from packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts rename to packages/cli/tests/unit/rpc/explain/index/appmap-index.search.spec.ts index 72218d0b30..6f0b7e26e5 100644 --- a/packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/index/appmap-index.search.spec.ts @@ -1,13 +1,13 @@ -import * as utils from '../../../src/utils'; -import UpToDate from '../../../src/lib/UpToDate'; +import * as utils from '../../../../../src/utils'; +import UpToDate from '../../../../../src/lib/UpToDate'; import { PathLike } from 'fs'; import { join } from 'path'; import { FileIndex, FileSearchResult } from '@appland/search'; -import { search } from '../../../src/fulltext/appmap-index'; -import { SearchStats } from '../../../src/fulltext/appmap-match'; +import { search } from '../../../../../src/rpc/explain/index/appmap-index'; +import { SearchStats } from '../../../../../src/rpc/explain/index/appmap-match'; -jest.mock('../../../src/utils'); -jest.mock('../../../src/lib/UpToDate'); +jest.mock('../../../../../src/utils'); +jest.mock('../../../../../src/lib/UpToDate'); describe('AppMapIndex', () => { let mockAppmapIndex: FileIndex; diff --git a/packages/cli/tests/unit/rpc/explain/index/appmap-index.spec.ts b/packages/cli/tests/unit/rpc/explain/index/appmap-index.spec.ts new file mode 100644 index 0000000000..b51c47be4b --- /dev/null +++ b/packages/cli/tests/unit/rpc/explain/index/appmap-index.spec.ts @@ -0,0 +1,90 @@ +import { vol } from 'memfs'; +import { readAppMapContent } from '../../../../../src/rpc/explain/index/appmap-index'; +import { Metadata } from '@appland/models'; + +jest.mock('fs/promises', () => require('memfs').promises); + +describe('readAppMapContent', () => { + beforeEach(() => vol.reset()); + afterEach(() => vol.reset()); + + it('reads appmap content from index files', async () => { + const appmapName = '/appmaps/testAppMap'; + const metadata: Metadata = { + name: 'Test AppMap', + labels: ['test', 'appmap'], + exception: { class: 'Exception', message: 'Test exception' }, + client: { name: 'Test client', version: '1.0.0', url: 'http://test.com' }, + recorder: { name: 'Test recorder' }, + }; + const classMap = [ + { + name: 'package1', + type: 'package', + labels: [], + children: [ + { + name: 'class1', + type: 'class', + labels: [], + children: [ + { + name: 'function1', + type: 'function', + labels: [], + children: [], + }, + ], + }, + { name: 'class2', type: 'class', labels: [], children: [] }, + ], + }, + { name: 'query1', type: 'query', labels: [], children: [] }, + { name: 'route1', type: 'route', labels: [], children: [] }, + ]; + + vol.fromJSON({ + [`${appmapName}/metadata.json`]: JSON.stringify(metadata), + [`${appmapName}/classMap.json`]: JSON.stringify(classMap), + [`${appmapName}/canonical.parameters.json`]: JSON.stringify(['param1', 'param2']), + }); + + const content = await readAppMapContent(`${appmapName}.appmap.json`); + expect(content).toContain('Test AppMap'); + expect(content).toContain('test'); + expect(content).toContain('appmap'); + expect(content).toContain('Test exception'); + expect(content).toContain('query1'); + expect(content).toContain('route1'); + expect(content).toContain('function1'); + expect(content).toContain('param1'); + expect(content).toContain('param2'); + expect(content).toContain('route'); + expect(content).toContain('sql'); + expect(content).toContain('database'); + + expect(content.split(' ')).toEqual([ + 'Test', + 'AppMap', + 'test', + 'appmap', + 'Test', + 'exception', + 'query1', + 'package1', + 'class1', + 'function1', + 'class2', + 'route1', + 'param1', + 'param2', + 'sql', + 'query', + 'database', + 'route', + 'request', + 'server', + 'http', + ]); + }); +}); diff --git a/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts b/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts index 3d9bc58cd0..b8cdf6bf5f 100644 --- a/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/index/index-events.spec.ts @@ -5,11 +5,11 @@ import { SnippetIndex } from '@appland/search'; import sqlite3 from 'better-sqlite3'; import indexEvents from '../../../../../src/rpc/explain/index/index-events'; -import { SearchResult } from '../../../../../src/fulltext/appmap-match'; -import * as AppMapIndex from '../../../../../src/fulltext/appmap-index'; +import { SearchResult } from '../../../../../src/rpc/explain/index/appmap-match'; +import * as AppMapIndex from '../../../../../src/rpc/explain/index/appmap-index'; -jest.mock('../../../../../src/fulltext/appmap-index', () => ({ - ...jest.requireActual('../../../../../src/fulltext/appmap-index'), +jest.mock('../../../../../src/rpc/explain/index/appmap-index', () => ({ + ...jest.requireActual('../../../../../src/rpc/explain/index/appmap-index'), readIndexFile: jest.fn(), })); diff --git a/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts b/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts index 2c725291db..d834af99eb 100644 --- a/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts +++ b/packages/cli/tests/unit/rpc/explain/index/project-file-snippet-index.spec.ts @@ -7,17 +7,17 @@ import { buildProjectFileSnippetIndex, snippetContextItem, } from '../../../../../src/rpc/explain/index/project-file-snippet-index'; -import * as AppMapIndex from '../../../../../src/fulltext/appmap-index'; +import * as AppMapIndex from '../../../../../src/rpc/explain/index/appmap-index'; import { CloseableIndex } from '../../../../../src/rpc/explain/index/build-index-in-temp-dir'; -import { SearchResult } from '../../../../../src/fulltext/appmap-match'; +import { SearchResult } from '../../../../../src/rpc/explain/index/appmap-match'; jest.mock('@appland/search', () => ({ ...jest.requireActual('@appland/search'), readFileSafe: jest.fn(), })); -jest.mock('../../../../../src/fulltext/appmap-index', () => ({ - ...jest.requireActual('../../../../../src/fulltext/appmap-index'), +jest.mock('../../../../../src/rpc/explain/index/appmap-index', () => ({ + ...jest.requireActual('../../../../../src/rpc/explain/index/appmap-index'), readIndexFile: jest.fn(), }));