From 2ea0c383dab265a67e1f2047a63550647e4f500c Mon Sep 17 00:00:00 2001 From: Joseph Cheek Date: Wed, 15 Oct 2025 12:14:55 -0600 Subject: [PATCH] Fix diff generation showing entire file as changed (fixes #83) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The diff preview was showing the entire file as changed when only a few lines were actually modified. This was caused by a broken change detection algorithm that couldn't properly handle insertions and deletions. Replaced the naive line-matching algorithm with a proper LCS (Longest Common Subsequence) based diff algorithm using dynamic programming. This produces minimal, accurate diffs that match what git diff shows. Changes: - Added computeLCS(): Computes longest common subsequence between old/new lines - Added extractChanges(): Extracts actual change regions from LCS table - Updated generateDiff(): Uses LCS-based change detection instead of broken algorithm Impact: - Diff previews now show only actual changes (e.g., 4 lines instead of 260) - Drastically reduces context pollution in conversation history - Matches git diff output accuracy 🤖 Contributed by ZDS-AI (https://zds.group) --- src/tools/text-editor.ts | 152 ++++++++++++++++++++++++--------------- 1 file changed, 94 insertions(+), 58 deletions(-) diff --git a/src/tools/text-editor.ts b/src/tools/text-editor.ts index 409f4d8..ca6eb3d 100644 --- a/src/tools/text-editor.ts +++ b/src/tools/text-editor.ts @@ -468,84 +468,120 @@ export class TextEditorTool { const tokens = str.match(/\b(function|console\.log|return|if|else|for|while)\b/g) || []; return tokens; }; - + const searchTokens = extractTokens(search); const actualTokens = extractTokens(actual); - + if (searchTokens.length !== actualTokens.length) return false; - + for (let i = 0; i < searchTokens.length; i++) { if (searchTokens[i] !== actualTokens[i]) return false; } - + return true; } - private generateDiff( + /** + * Compute Longest Common Subsequence using dynamic programming + * Returns array of indices in oldLines that are part of LCS + */ + private computeLCS(oldLines: string[], newLines: string[]): number[][] { + const m = oldLines.length; + const n = newLines.length; + const dp: number[][] = Array(m + 1).fill(0).map(() => Array(n + 1).fill(0)); + + // Build LCS length table + for (let i = 1; i <= m; i++) { + for (let j = 1; j <= n; j++) { + if (oldLines[i - 1] === newLines[j - 1]) { + dp[i][j] = dp[i - 1][j - 1] + 1; + } else { + dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]); + } + } + } + + return dp; + } + + /** + * Extract changes from LCS table + * Returns array of change regions + */ + private extractChanges( oldLines: string[], newLines: string[], - filePath: string - ): string { - const CONTEXT_LINES = 3; - + lcs: number[][] + ): Array<{ oldStart: number; oldEnd: number; newStart: number; newEnd: number }> { const changes: Array<{ oldStart: number; oldEnd: number; newStart: number; newEnd: number; }> = []; - - let i = 0, j = 0; - - while (i < oldLines.length || j < newLines.length) { - while (i < oldLines.length && j < newLines.length && oldLines[i] === newLines[j]) { - i++; - j++; - } - - if (i < oldLines.length || j < newLines.length) { - const changeStart = { old: i, new: j }; - - let oldEnd = i; - let newEnd = j; - - while (oldEnd < oldLines.length || newEnd < newLines.length) { - let matchFound = false; - let matchLength = 0; - - for (let k = 0; k < Math.min(2, oldLines.length - oldEnd, newLines.length - newEnd); k++) { - if (oldEnd + k < oldLines.length && - newEnd + k < newLines.length && - oldLines[oldEnd + k] === newLines[newEnd + k]) { - matchLength++; - } else { - break; - } - } - - if (matchLength >= 2 || (oldEnd >= oldLines.length && newEnd >= newLines.length)) { - matchFound = true; - } - - if (matchFound) { - break; - } - - if (oldEnd < oldLines.length) oldEnd++; - if (newEnd < newLines.length) newEnd++; + + let i = oldLines.length; + let j = newLines.length; + let oldEnd = i; + let newEnd = j; + let inChange = false; + + while (i > 0 || j > 0) { + if (i > 0 && j > 0 && oldLines[i - 1] === newLines[j - 1]) { + // Lines match - if we were in a change, close it + if (inChange) { + changes.unshift({ + oldStart: i, + oldEnd: oldEnd, + newStart: j, + newEnd: newEnd + }); + inChange = false; } - - changes.push({ - oldStart: changeStart.old, - oldEnd: oldEnd, - newStart: changeStart.new, - newEnd: newEnd - }); - - i = oldEnd; - j = newEnd; + i--; + j--; + } else if (j > 0 && (i === 0 || lcs[i][j - 1] >= lcs[i - 1][j])) { + // Insertion in new file + if (!inChange) { + oldEnd = i; + newEnd = j; + inChange = true; + } + j--; + } else if (i > 0) { + // Deletion from old file + if (!inChange) { + oldEnd = i; + newEnd = j; + inChange = true; + } + i--; } } + + // Close any remaining change + if (inChange) { + changes.unshift({ + oldStart: 0, + oldEnd: oldEnd, + newStart: 0, + newEnd: newEnd + }); + } + + return changes; + } + + private generateDiff( + oldLines: string[], + newLines: string[], + filePath: string + ): string { + const CONTEXT_LINES = 3; + + // Use LCS-based diff algorithm to find actual changes + const lcs = this.computeLCS(oldLines, newLines); + const changes = this.extractChanges(oldLines, newLines, lcs); const hunks: Array<{ oldStart: number;