77use Jfcherng \Diff \Renderer \RendererConstant ;
88use Jfcherng \Diff \SequenceMatcher ;
99use Jfcherng \Diff \Utility \ReverseIterator ;
10+ use Jfcherng \Diff \Utility \Str ;
1011use Jfcherng \Utility \MbString ;
1112
1213final class Word extends AbstractLineRenderer
@@ -18,9 +19,14 @@ final class Word extends AbstractLineRenderer
1819 */
1920 public function render (MbString $ mbOld , MbString $ mbNew ): LineRendererInterface
2021 {
21- static $ splitRegex = '/([ ' . RendererConstant::PUNCTUATIONS_RANGE . ']++ )/uS ' ;
22+ static $ splitRegex = '/([ ' . RendererConstant::PUNCTUATIONS_RANGE . '])/uS ' ;
2223
23- $ pregFlag = \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY ;
24+ // using PREG_SPLIT_NO_EMPTY will make "wordGlues" work wrongly under some rare cases
25+ // failure case:
26+ // old: "good-looking-"
27+ // new: "good--"
28+ // notice that after glueing, the 1st "-" in the new should be in the diff segment
29+ $ pregFlag = \PREG_SPLIT_DELIM_CAPTURE ;
2430 $ oldWords = $ mbOld ->toArraySplit ($ splitRegex , -1 , $ pregFlag );
2531 $ newWords = $ mbNew ->toArraySplit ($ splitRegex , -1 , $ pregFlag );
2632
@@ -39,9 +45,58 @@ public function render(MbString $mbOld, MbString $mbNew): LineRendererInterface
3945 }
4046 }
4147
48+ if (!empty ($ hunk ) && !empty ($ this ->rendererOptions ['wordGlues ' ])) {
49+ $ regexGlues = \array_map (
50+ function (string $ glue ): string {
51+ return \preg_quote ($ glue , '/ ' );
52+ },
53+ $ this ->rendererOptions ['wordGlues ' ]
54+ );
55+
56+ $ gluePattern = '/^(?: ' . \implode ('| ' , $ regexGlues ) . ')+$/uS ' ;
57+
58+ $ this ->glueWordsResult ($ oldWords , $ gluePattern );
59+ $ this ->glueWordsResult ($ newWords , $ gluePattern );
60+ }
61+
4262 $ mbOld ->set (\implode ('' , $ oldWords ));
4363 $ mbNew ->set (\implode ('' , $ newWords ));
4464
4565 return $ this ;
4666 }
67+
68+ /**
69+ * Beautify diff result by glueing words.
70+ *
71+ * What this function does is basically making
72+ * ["<diff_begin>good<diff_end>", "-", "<diff_begin>looking<diff_end>"]
73+ * into
74+ * ["<diff_begin>good", "-", "looking<diff_end>"].
75+ *
76+ * @param array $words the words
77+ * @param string $gluePattern the regex to determine a string is purely glue or not
78+ */
79+ protected function glueWordsResult (array &$ words , string $ gluePattern ): void
80+ {
81+ /** @var int index of the word which has the trailing closure */
82+ $ endClosureIdx = -1 ;
83+
84+ foreach ($ words as $ idx => &$ word ) {
85+ if ($ word === '' ) {
86+ continue ;
87+ }
88+
89+ if ($ endClosureIdx < 0 ) {
90+ if (Str::endsWith ($ word , RendererConstant::HTML_CLOSURES [1 ])) {
91+ $ endClosureIdx = $ idx ;
92+ }
93+ } elseif (Str::startsWith ($ word , RendererConstant::HTML_CLOSURES [0 ])) {
94+ $ words [$ endClosureIdx ] = \substr ($ words [$ endClosureIdx ], 0 , -\strlen (RendererConstant::HTML_CLOSURES [1 ]));
95+ $ word = \substr ($ word , \strlen (RendererConstant::HTML_CLOSURES [0 ]));
96+ $ endClosureIdx = $ idx ;
97+ } elseif (!\preg_match ($ gluePattern , $ word )) {
98+ $ endClosureIdx = -1 ;
99+ }
100+ }
101+ }
47102}
0 commit comments