Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 21 additions & 30 deletions src/extension/linkify/common/filePathLinkifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ import { IContributedLinkifier, LinkifierContext } from './linkifyService';
// Create a single regex which runs different regexp parts in a big `|` expression.
const pathMatchRe = new RegExp(
[
// [path/to/file.md](path/to/file.md) or [`path/to/file.md`](path/to/file.md)
/\[(`?)(?<mdLinkText>[^`\]\)\n]+)\1\]\((?<mdLinkPath>[^`\s]+)\)/.source,

// Inline code paths
/(?<!\[)`(?<inlineCodePath>[^`\s]+)`(?!\])/.source,

Expand All @@ -35,8 +32,8 @@ const pathMatchRe = new RegExp(
* Linkifies file paths in responses. This includes:
*
* ```
* [file.md](file.md)
* `file.md`
* foo.ts
* ```
*/
export class FilePathLinkifier implements IContributedLinkifier {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need this class at all? Maybe just delete it and call new ModelFilePathLinkifier FilePathLinkifier instead

It seems like the new linkifier should handle everything that the current one does. If there are any gaps we can fix those instead of keeping the fallback around

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now ModelFilePathLinkifier handles structured markdown links with anchors texttext, while FilePathLinkifier handles inline code and plain text paths.
They're registered with clear separation of concerns, where FilePathLinkifier acts as a fallback for informal references.

That said, I'm happy to explore merging them if you think that would be better! Let me know your preference and I can make it work either way.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My hope is that the model will handle creating proper links now. The inline code and plain text path logic was added because previous models were no good at generating links consistently

Worth testing removing the old linkifier to and seeing if the model still generates paths in plaintext / inline code. If it does, maybe we need to tweak the instructions a bit

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still interested to see if we can avoid having two linkifier paths and just prefer using the model

Expand All @@ -58,28 +55,15 @@ export class FilePathLinkifier implements IContributedLinkifier {

const matched = match[0];

let pathText: string | undefined;

// For a md style link, require that the text and path are the same
// However we have to have extra logic since the path may be encoded: `[file name](file%20name)`
if (match.groups?.['mdLinkPath']) {
let mdLinkPath = match.groups?.['mdLinkPath'];
try {
mdLinkPath = decodeURIComponent(mdLinkPath);
} catch {
// noop
}

if (mdLinkPath !== match.groups?.['mdLinkText']) {
pathText = undefined;
} else {
pathText = mdLinkPath;
}
}
pathText ??= match.groups?.['inlineCodePath'] ?? match.groups?.['plainTextPath'] ?? '';
const pathText = match.groups?.['inlineCodePath'] ?? match.groups?.['plainTextPath'] ?? '';

parts.push(this.resolvePathText(pathText, context)
.then(uri => uri ? new LinkifyLocationAnchor(uri) : matched));
.then(uri => {
if (uri) {
return new LinkifyLocationAnchor(uri);
}
return matched;
}));

endLastMatch = match.index + matched.length;
}
Expand All @@ -93,6 +77,7 @@ export class FilePathLinkifier implements IContributedLinkifier {
}

private async resolvePathText(pathText: string, context: LinkifierContext): Promise<Uri | undefined> {
const includeDirectorySlash = pathText.endsWith('/');
const workspaceFolders = this.workspaceService.getWorkspaceFolders();

// Don't linkify very short paths such as '/' or special paths such as '../'
Expand All @@ -102,7 +87,7 @@ export class FilePathLinkifier implements IContributedLinkifier {

if (pathText.startsWith('/') || (isWindows && (pathText.startsWith('\\') || hasDriveLetter(pathText)))) {
try {
const uri = await this.statAndNormalizeUri(Uri.file(pathText.startsWith('/') ? path.posix.normalize(pathText) : path.normalize(pathText)));
const uri = await this.statAndNormalizeUri(Uri.file(pathText.startsWith('/') ? path.posix.normalize(pathText) : path.normalize(pathText)), includeDirectorySlash);
if (uri) {
if (path.posix.normalize(uri.path) === '/') {
return undefined;
Expand All @@ -121,7 +106,7 @@ export class FilePathLinkifier implements IContributedLinkifier {
try {
const uri = Uri.parse(pathText);
if (uri.scheme === Schemas.file || workspaceFolders.some(folder => folder.scheme === uri.scheme && folder.authority === uri.authority)) {
const statedUri = await this.statAndNormalizeUri(uri);
const statedUri = await this.statAndNormalizeUri(uri, includeDirectorySlash);
if (statedUri) {
return statedUri;
}
Expand All @@ -133,7 +118,7 @@ export class FilePathLinkifier implements IContributedLinkifier {
}

for (const workspaceFolder of workspaceFolders) {
const uri = await this.statAndNormalizeUri(Uri.joinPath(workspaceFolder, pathText));
const uri = await this.statAndNormalizeUri(Uri.joinPath(workspaceFolder, pathText), includeDirectorySlash);
if (uri) {
return uri;
}
Expand All @@ -154,12 +139,18 @@ export class FilePathLinkifier implements IContributedLinkifier {
return refUri;
}

private async statAndNormalizeUri(uri: Uri): Promise<Uri | undefined> {
private async statAndNormalizeUri(uri: Uri, includeDirectorySlash: boolean): Promise<Uri | undefined> {
try {
const stat = await this.fileSystem.stat(uri);
if (stat.type === FileType.Directory) {
// Ensure all dir paths have a trailing slash for icon rendering
return uri.path.endsWith('/') ? uri : uri.with({ path: `${uri.path}/` });
if (includeDirectorySlash) {
return uri.path.endsWith('/') ? uri : uri.with({ path: `${uri.path}/` });
}

if (uri.path.endsWith('/') && uri.path !== '/') {
return uri.with({ path: uri.path.slice(0, -1) });
}
return uri;
}

return uri;
Expand Down
3 changes: 3 additions & 0 deletions src/extension/linkify/common/linkifyService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { PromptReference } from '../../prompt/common/conversation';
import { FilePathLinkifier } from './filePathLinkifier';
import { LinkifiedText } from './linkifiedText';
import { Linkifier } from './linkifier';
import { ModelFilePathLinkifier } from './modelFilePathLinkifier';

/**
* A stateful linkifier.
Expand Down Expand Up @@ -86,6 +87,8 @@ export class LinkifyService implements ILinkifyService {
@IWorkspaceService workspaceService: IWorkspaceService,
@IEnvService private readonly envService: IEnvService,
) {
// Model-generated links first (anchors), fallback legacy path linkifier afterwards
this.registerGlobalLinkifier({ create: () => new ModelFilePathLinkifier(fileSystem, workspaceService) });
this.registerGlobalLinkifier({ create: () => new FilePathLinkifier(fileSystem, workspaceService) });
}

Expand Down
260 changes: 260 additions & 0 deletions src/extension/linkify/common/modelFilePathLinkifier.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

import { IFileSystemService } from '../../../platform/filesystem/common/fileSystemService';
import { FileType } from '../../../platform/filesystem/common/fileTypes';
import { getWorkspaceFileDisplayPath, IWorkspaceService } from '../../../platform/workspace/common/workspaceService';
import { CancellationToken } from '../../../util/vs/base/common/cancellation';
import { normalizePath as normalizeUriPath } from '../../../util/vs/base/common/resources';
import { Location, Position, Range, Uri } from '../../../vscodeTypes';
import { coalesceParts, LinkifiedPart, LinkifiedText, LinkifyLocationAnchor } from './linkifiedText';
import { IContributedLinkifier, LinkifierContext } from './linkifyService';

// Matches markdown links where the text is a path and optional #L anchor is present
// Example: [src/file.ts](src/file.ts#L10-12) or [src/file.ts](src/file.ts)
const modelLinkRe = /\[(?<text>[^\]\n]+)\]\((?<target>[^\s)]+)\)/gu;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure this can support the case where the path text is code too as this is fairly common and models have likely been trained on a lot of it:

[`file.ts`](file.ts)

Especially true if we get rid of the old linkifier

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, works. Added unit test to validate this.


export class ModelFilePathLinkifier implements IContributedLinkifier {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's break up the existing FilePathLinkifier into the real links functionality and the inline code functionality. Maybe just delete the real links stuff from FilePathLinkifier and keep this class. I'd like to avoid the duplication though and make it so we just have one place that handles the markdown file links

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

constructor(
@IFileSystemService private readonly fileSystem: IFileSystemService,
@IWorkspaceService private readonly workspaceService: IWorkspaceService,
) { }

async linkify(text: string, context: LinkifierContext, token: CancellationToken): Promise<LinkifiedText | undefined> {
let lastIndex = 0;
const parts: Array<LinkifiedPart | Promise<LinkifiedPart>> = [];

for (const match of text.matchAll(modelLinkRe)) {
const original = match[0];
const prefix = text.slice(lastIndex, match.index);
if (prefix) {
parts.push(prefix);
}
lastIndex = match.index + original.length;

const parsed = this.parseModelLinkMatch(match);
if (!parsed) {
parts.push(original);
continue;
}

const workspaceFolders = this.workspaceService.getWorkspaceFolders();
if (!this.canLinkify(parsed, workspaceFolders)) {
parts.push(original);
continue;
}

// Push promise to resolve in parallel with other matches
parts.push(this.resolveTarget(parsed.targetPath, workspaceFolders, parsed.preserveDirectorySlash, token).then(resolved => {
if (!resolved) {
return original;
}

const basePath = getWorkspaceFileDisplayPath(this.workspaceService, resolved);
const anchorRange = this.parseAnchor(parsed.anchor);
if (parsed.anchor && !anchorRange) {
return original;
}

if (anchorRange) {
const { range, startLine, endLine } = anchorRange;
const displayPath = endLine && startLine !== endLine
? `${basePath}#L${startLine}-L${endLine}`
: `${basePath}#L${startLine}`;
return new LinkifyLocationAnchor(new Location(resolved, range), displayPath);
}

return new LinkifyLocationAnchor(resolved, basePath);
}));
}

const suffix = text.slice(lastIndex);
if (suffix) {
parts.push(suffix);
}

if (!parts.length) {
return undefined;
}

return { parts: coalesceParts(await Promise.all(parts)) };
}

private parseModelLinkMatch(match: RegExpMatchArray): { readonly text: string; readonly targetPath: string; readonly anchor: string | undefined; readonly preserveDirectorySlash: boolean } | undefined {
const rawText = match.groups?.['text'];
const rawTarget = match.groups?.['target'];
if (!rawText || !rawTarget) {
return undefined;
}

const hashIndex = rawTarget.indexOf('#');
const baseTarget = hashIndex === -1 ? rawTarget : rawTarget.slice(0, hashIndex);
const anchor = hashIndex === -1 ? undefined : rawTarget.slice(hashIndex + 1);

let decodedBase = baseTarget;
try {
decodedBase = decodeURIComponent(baseTarget);
} catch {
// noop
}

const preserveDirectorySlash = decodedBase.endsWith('/') && decodedBase.length > 1;
const normalizedTarget = this.normalizeSlashes(decodedBase);
const normalizedText = this.normalizeLinkText(rawText);
return { text: normalizedText, targetPath: normalizedTarget, anchor, preserveDirectorySlash };
}

private normalizeSlashes(value: string): string {
// Collapse one or more backslashes into a single forward slash so mixed separators normalize consistently.
return value.replace(/\\+/g, '/');
}

private normalizeLinkText(rawText: string): string {
let text = this.normalizeSlashes(rawText);
// Remove a leading or trailing backtick that sometimes wraps the visible link label.
text = text.replace(/^`|`$/g, '');

// Look for a trailing #L anchor segment so it can be stripped before we compare names.
const anchorMatch = /^(.+?)(#L\d+(?:-\d+)?)$/.exec(text);
return anchorMatch ? anchorMatch[1] : text;
}

private canLinkify(parsed: { readonly text: string; readonly targetPath: string; readonly anchor: string | undefined }, workspaceFolders: readonly Uri[]): boolean {
const { text, targetPath, anchor } = parsed;
const textMatchesBase = targetPath === text;
const textIsFilename = !text.includes('/') && targetPath.endsWith(`/${text}`);
const descriptiveWithAnchor = !!anchor; // Allow any descriptive text when anchor is present

return Boolean(workspaceFolders.length) && (textMatchesBase || textIsFilename || descriptiveWithAnchor);
}

private async resolveTarget(targetPath: string, workspaceFolders: readonly Uri[], preserveDirectorySlash: boolean, token: CancellationToken): Promise<Uri | undefined> {
if (!workspaceFolders.length) {
return undefined;
}

if (token.isCancellationRequested) {
return undefined;
}

if (this.isAbsolutePath(targetPath)) {
// Choose URI construction strategy based on workspace folder schemes.
// For local (file:) workspaces we keep using Uri.file; for remote schemes we attempt
// to project the absolute path into the remote scheme preserving the folder URI's authority.
const normalizedAbs = targetPath.replace(/\\/g, '/');

for (const folderUri of workspaceFolders) {
if (token.isCancellationRequested) {
return undefined;
}
if (folderUri.scheme === 'file') {
const absoluteFileUri = this.tryCreateFileUri(targetPath);
if (!absoluteFileUri) {
continue;
}
if (this.isEqualOrParentFs(absoluteFileUri, folderUri)) {
const stat = await this.tryStat(absoluteFileUri, preserveDirectorySlash, token);
if (stat) {
return stat;
}
}
continue;
}

// Remote / virtual workspace: attempt to map the absolute path into the same scheme.
// Only consider it if the folder path is a prefix of the absolute path to avoid
// generating unrelated URIs.
const folderPath = folderUri.path.replace(/\\/g, '/');
const prefix = folderPath.endsWith('/') ? folderPath : folderPath + '/';
if (normalizedAbs.startsWith(prefix)) {
const candidate = folderUri.with({ path: normalizedAbs });
const stat = await this.tryStat(candidate, preserveDirectorySlash, token);
if (stat) {
return stat;
}
}
}
return undefined;
}

const segments = targetPath.split('/').filter(Boolean);
for (const folderUri of workspaceFolders) {
const candidate = Uri.joinPath(folderUri, ...segments);
const stat = await this.tryStat(candidate, preserveDirectorySlash, token);
if (stat) {
return stat;
}
}

return undefined;
}

private tryCreateFileUri(path: string): Uri | undefined {
try {
return Uri.file(path);
} catch {
return undefined;
}
}


private isEqualOrParentFs(target: Uri, folder: Uri): boolean {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should have a helper for this: isEqualOrParent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

const targetPath = normalizeUriPath(target).path;
const folderPath = normalizeUriPath(folder).path;
return targetPath === folderPath || targetPath.startsWith(folderPath.endsWith('/') ? folderPath : `${folderPath}/`);
}

private parseAnchor(anchor: string | undefined): { readonly range: Range; readonly startLine: string; readonly endLine: string | undefined } | undefined {
// Parse supported anchor formats: L123, L123-456, L123-L456
if (!anchor) {
return undefined;
}
const match = /^L(\d+)(?:-L?(\d+))?$/.exec(anchor);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd skip the .test above since it duplicates the regex and just try parsing directly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

if (!match) {
return undefined;
}

const startLine = match[1];
const endLineRaw = match[2];
const normalizedEndLine = endLineRaw === startLine ? undefined : endLineRaw;
const start = parseInt(startLine, 10) - 1;
const end = parseInt(normalizedEndLine ?? startLine, 10) - 1;
if (Number.isNaN(start) || Number.isNaN(end) || start < 0 || end < start) {
return undefined;
}

return {
range: new Range(new Position(start, 0), new Position(end, 0)),
startLine,
endLine: normalizedEndLine,
};
}

private isAbsolutePath(path: string): boolean {
// Treat drive-letter prefixes (e.g. C:) or leading slashes as absolute paths.
return /^[a-z]:/i.test(path) || path.startsWith('/');
}

private async tryStat(uri: Uri, preserveDirectorySlash: boolean, token: CancellationToken): Promise<Uri | undefined> {
if (token.isCancellationRequested) {
return undefined;
}
try {
const stat = await this.fileSystem.stat(uri);
if (stat.type === FileType.Directory) {
if (preserveDirectorySlash) {
return uri.path.endsWith('/') ? uri : uri.with({ path: `${uri.path}/` });
}
if (uri.path.endsWith('/') && uri.path !== '/') {
return uri.with({ path: uri.path.slice(0, -1) });
}
return uri;
}
return uri;
} catch {
return undefined;
}
}
}
Loading
Loading