@@ -16,15 +16,32 @@ final class Word extends AbstractLineRenderer
1616 */
1717 public function render (MbString $ mbFrom , MbString $ mbTo ): LineRendererInterface
1818 {
19- static $ punctuations = (
20- ' $,.:;!? \'"()\[\]{}%@<=>_+\-*\/~ \\\\| ' .
19+ static $ punctuationsRange = (
20+ // Latin-1 Supplement ( https://unicode-table.com/en/blocks/latin-1-supplement/ )
21+ "\u{0080}- \u{00BB}" .
22+ // Spacing Modifier Letters ( https://unicode-table.com/en/blocks/spacing-modifier-letters/ )
23+ "\u{02B0}- \u{02FF}" .
24+ // Combining Diacritical Marks ( https://unicode-table.com/en/blocks/combining-diacritical-marks/ )
25+ "\u{0300}- \u{036F}" .
26+ // Small Form Variants ( https://unicode-table.com/en/blocks/small-form-variants/ )
27+ "\u{FE50}- \u{FE6F}" .
28+ // General Punctuation ( https://unicode-table.com/en/blocks/general-punctuation/ )
29+ "\u{2000}- \u{206F}" .
30+ // Supplemental Punctuation ( https://unicode-table.com/en/blocks/supplemental-punctuation/ )
31+ "\u{2E00}- \u{2E7F}" .
32+ // CJK Symbols and Punctuation ( https://unicode-table.com/en/blocks/cjk-symbols-and-punctuation/ )
33+ "\u{3000}- \u{303F}" .
34+ // Ideographic Symbols and Punctuation ( https://unicode-table.com/en/blocks/ideographic-symbols-and-punctuation/ )
35+ "\u{16FE0}- \u{16FFF}" .
36+ // ...
37+ " \t$,.:;!?' \"()\[\]{}%@<=>_+\-*\/~ \\\\| " .
2138 ' $,.:;!?’"()[]{}%@<=>_+-*/~\| ' .
22- '「」『』〈〉《》【】())( ‘’“” ' .
23- '.‧・・•· '
39+ '「」『』〈〉《》【】()() ‘’“” ' .
40+ '.‧・・•·¿ '
2441 );
2542
26- $ fromWords = $ mbFrom ->toArraySplit ("/([ {$ punctuations }])/uS " , -1 , \PREG_SPLIT_DELIM_CAPTURE );
27- $ toWords = $ mbTo ->toArraySplit ("/([ {$ punctuations }])/uS " , -1 , \PREG_SPLIT_DELIM_CAPTURE );
43+ $ fromWords = $ mbFrom ->toArraySplit ("/([ {$ punctuationsRange }])/uS " , -1 , \PREG_SPLIT_DELIM_CAPTURE );
44+ $ toWords = $ mbTo ->toArraySplit ("/([ {$ punctuationsRange }])/uS " , -1 , \PREG_SPLIT_DELIM_CAPTURE );
2845
2946 $ opcodes = $ this ->getChangedExtentSegments ($ fromWords , $ toWords );
3047
0 commit comments