@@ -41,6 +41,23 @@ final class Differ
4141 'oldNewComparison ' => 0 ,
4242 ];
4343
44+ /**
45+ * Some extra lines which will be appended to input strings to
46+ * make the diff result stable about diff around the EOF...
47+ *
48+ * @var array
49+ */
50+ private const APPENDED_HELPERLINES = [
51+ "\u{fcf28}\u{fc232}" ,
52+ "\u{fcf28}\u{fc232}" ,
53+ "\u{fcf28}\u{fc232}" ,
54+ "\u{fcf28}\u{fc232}" ,
55+ "\u{fcf28}\u{fc232}" ,
56+ "\u{fcf28}\u{fc232}" ,
57+ "\u{fcf28}\u{fc232}" ,
58+ "\u{fcf28}\u{fc232}" ,
59+ ];
60+
4461 /**
4562 * @var array array of the options that have been applied for generating the diff
4663 */
@@ -66,6 +83,16 @@ final class Differ
6683 */
6784 private $ sequenceMatcher ;
6885
86+ /**
87+ * @var int
88+ */
89+ private $ oldSrcLength = 0 ;
90+
91+ /**
92+ * @var int
93+ */
94+ private $ newSrcLength = 0 ;
95+
6996 /**
7097 * @var int the end index for the old if the old has no EOL at EOF
7198 * -1 means the old has an EOL at EOF
@@ -271,9 +298,17 @@ public function getGroupedOpcodes(): array
271298 return $ this ->groupedOpcodes ;
272299 }
273300
274- return $ this ->groupedOpcodes = $ this ->sequenceMatcher
275- ->setSequences ($ this ->old , $ this ->new )
301+ $ old = $ this ->old ;
302+ $ new = $ this ->new ;
303+ $ this ->getGroupedOpcodesPre ($ old , $ new );
304+
305+ $ opcodes = $ this ->sequenceMatcher
306+ ->setSequences ($ old , $ new )
276307 ->getGroupedOpcodes ($ this ->options ['context ' ]);
308+
309+ $ this ->getGroupedOpcodesPost ($ opcodes );
310+
311+ return $ this ->groupedOpcodes = $ opcodes ;
277312 }
278313
279314 /**
@@ -289,12 +324,118 @@ public function getGroupedOpcodesGnu(): array
289324 return $ this ->groupedOpcodesGnu ;
290325 }
291326
292- return $ this ->groupedOpcodesGnu = $ this ->sequenceMatcher
293- ->setSequences (
294- $ this ->createGnuCompatibleLines ($ this ->old ),
295- $ this ->createGnuCompatibleLines ($ this ->new )
296- )
327+ $ old = $ this ->old ;
328+ $ new = $ this ->new ;
329+ $ this ->getGroupedOpcodesGnuPre ($ old , $ new );
330+
331+ $ opcodes = $ this ->sequenceMatcher
332+ ->setSequences ($ old , $ new )
297333 ->getGroupedOpcodes ($ this ->options ['context ' ]);
334+
335+ $ this ->getGroupedOpcodesGnuPost ($ opcodes );
336+
337+ return $ this ->groupedOpcodesGnu = $ opcodes ;
338+ }
339+
340+ /**
341+ * Triggered before getGroupedOpcodes(). May modify the $old and $new.
342+ *
343+ * @param string[] $old the old
344+ * @param string[] $new the new
345+ */
346+ private function getGroupedOpcodesPre (array &$ old , array &$ new ): void
347+ {
348+ $ this ->oldSrcLength = \count ($ old );
349+ \array_push ($ old , ...self ::APPENDED_HELPERLINES );
350+
351+ $ this ->newSrcLength = \count ($ new );
352+ \array_push ($ new , ...self ::APPENDED_HELPERLINES );
353+ }
354+
355+ /**
356+ * Triggered after getGroupedOpcodes(). May modify the $opcodes.
357+ *
358+ * @param int[][][] $opcodes the opcodes
359+ */
360+ private function getGroupedOpcodesPost (array &$ opcodes ): void
361+ {
362+ foreach ($ opcodes as $ hunkIdx => &$ hunk ) {
363+ foreach ($ hunk as $ blockIdx => &$ block ) {
364+ // range overflow
365+ if ($ block [1 ] > $ this ->oldSrcLength ) {
366+ $ block [1 ] = $ this ->oldSrcLength ;
367+ }
368+ if ($ block [2 ] > $ this ->oldSrcLength ) {
369+ $ block [2 ] = $ this ->oldSrcLength ;
370+ }
371+ if ($ block [3 ] > $ this ->newSrcLength ) {
372+ $ block [3 ] = $ this ->newSrcLength ;
373+ }
374+ if ($ block [4 ] > $ this ->newSrcLength ) {
375+ $ block [4 ] = $ this ->newSrcLength ;
376+ }
377+ // useless extra block?
378+ /** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */
379+ if ($ block [1 ] === $ block [2 ] && $ block [3 ] === $ block [4 ]) {
380+ unset($ hunk [$ blockIdx ]);
381+ }
382+ }
383+
384+ if (empty ($ hunk )) {
385+ unset($ opcodes [$ hunkIdx ]);
386+ }
387+ }
388+ }
389+
390+ /**
391+ * Triggered before getGroupedOpcodesGnu(). May modify the $old and $new.
392+ *
393+ * @param string[] $old the old
394+ * @param string[] $new the new
395+ */
396+ private function getGroupedOpcodesGnuPre (array &$ old , array &$ new ): void
397+ {
398+ /**
399+ * Make the lines to be prepared for GNU-style diff.
400+ *
401+ * This method checks whether $lines has no EOL at EOF and append a special
402+ * indicator to the last line.
403+ *
404+ * @param string[] $lines the lines created by simply explode("\n", $string)
405+ */
406+ $ createGnuCompatibleLines = static function (array $ lines ): array {
407+ // note that the $lines should not be empty at this point
408+ // they have at least one element "" in the array because explode("\n", "") === [""]
409+ $ lastLineIdx = \count ($ lines ) - 1 ;
410+ $ lastLine = &$ lines [$ lastLineIdx ];
411+
412+ if ($ lastLine === '' ) {
413+ // remove the last plain "" line since we don't need it anymore
414+ // use array_slice() to also reset the array index
415+ $ lines = \array_slice ($ lines , 0 , -1 );
416+ } else {
417+ // this means the original source has no EOL at EOF
418+ // we append a special indicator to that line so it no longer matches
419+ $ lastLine .= self ::LINE_NO_EOL ;
420+ }
421+
422+ return $ lines ;
423+ };
424+
425+ $ old = $ createGnuCompatibleLines ($ old );
426+ $ new = $ createGnuCompatibleLines ($ new );
427+
428+ $ this ->getGroupedOpcodesPre ($ old , $ new );
429+ }
430+
431+ /**
432+ * Triggered after getGroupedOpcodesGnu(). May modify the $opcodes.
433+ *
434+ * @param int[][][] $opcodes the opcodes
435+ */
436+ private function getGroupedOpcodesGnuPost (array &$ opcodes ): void
437+ {
438+ $ this ->getGroupedOpcodesPost ($ opcodes );
298439 }
299440
300441 /**
@@ -334,31 +475,4 @@ private function resetCachedResults(): self
334475
335476 return $ this ;
336477 }
337-
338- /**
339- * Make the lines to be prepared for GNU-style diff.
340- *
341- * This method checks whether $lines has no EOL at EOF and append a special
342- * indicator to the last line.
343- *
344- * @param string[] $lines the lines created by simply explode("\n", $string)
345- */
346- private function createGnuCompatibleLines (array $ lines ): array
347- {
348- // note that the $lines should not be empty at this point
349- // they have at least one element "" in the array because explode("\n", "") === [""]
350- $ lastLineIdx = \count ($ lines ) - 1 ;
351- $ lastLine = &$ lines [$ lastLineIdx ];
352-
353- if ($ lastLine === '' ) {
354- // remove the last plain "" line since we don't need it anymore
355- unset($ lines [$ lastLineIdx ]);
356- } else {
357- // this means the original source has no EOL at EOF
358- // we append a special indicator to that line so it no longer matches
359- $ lastLine .= self ::LINE_NO_EOL ;
360- }
361-
362- return $ lines ;
363- }
364478}
0 commit comments