@@ -352,9 +352,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
352352 return $ str ;
353353 }
354354
355- /**
356- * {@inheritdoc}
357- */
358355 public function reverse (): parent
359356 {
360357 $ str = clone $ this ;
@@ -444,22 +441,21 @@ public function width(bool $ignoreAnsiDecoration = true): int
444441 $ s = str_replace (["\r\n" , "\r" ], "\n" , $ s );
445442 }
446443
444+ if (!$ ignoreAnsiDecoration ) {
445+ $ s = preg_replace ('/[\p{Cc}\x7F]++/u ' , '' , $ s );
446+ }
447+
447448 foreach (explode ("\n" , $ s ) as $ s ) {
448449 if ($ ignoreAnsiDecoration ) {
449- $ s = preg_replace ('/\x1B(?:
450+ $ s = preg_replace ('/(?: \x1B(?:
450451 \[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
451452 | [P\]X^_] .*? \x1B \\\\
452453 | [\x41-\x7E]
453- )/x ' , '' , $ s );
454+ )|[\p{Cc}\x7F]++)/xu ' , '' , $ s );
454455 }
455456
456- $ w = substr_count ($ s , "\xAD" ) - substr_count ($ s , "\x08" );
457- $ s = preg_replace ('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u ' , '' , $ s );
458- $ s = preg_replace ('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u ' , '' , $ s , -1 , $ wide );
459-
460- if ($ width < $ w += mb_strlen ($ s , 'UTF-8 ' ) + ($ wide << 1 )) {
461- $ width = $ w ;
462- }
457+ // Non printable characters have been dropped, so wcswidth cannot logically return -1.
458+ $ width += $ this ->wcswidth ($ s );
463459 }
464460
465461 return $ width ;
@@ -503,4 +499,80 @@ private function pad(int $len, self $pad, int $type): parent
503499 throw new InvalidArgumentException ('Invalid padding type. ' );
504500 }
505501 }
502+
503+ /**
504+ * Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
505+ */
506+ private function wcswidth (string $ string ): int
507+ {
508+ $ width = 0 ;
509+
510+ foreach (preg_split ('//u ' , $ string , -1 , PREG_SPLIT_NO_EMPTY ) as $ c ) {
511+ $ codePoint = mb_ord ($ c , 'UTF-8 ' );
512+
513+ if (0 === $ codePoint // NULL
514+ || 0x034F === $ codePoint // COMBINING GRAPHEME JOINER
515+ || (0x200B <= $ codePoint && 0x200F >= $ codePoint ) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
516+ || 0x2028 === $ codePoint // LINE SEPARATOR
517+ || 0x2029 === $ codePoint // PARAGRAPH SEPARATOR
518+ || (0x202A <= $ codePoint && 0x202E >= $ codePoint ) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
519+ || (0x2060 <= $ codePoint && 0x2063 >= $ codePoint ) // WORD JOINER to INVISIBLE SEPARATOR
520+ ) {
521+ continue ;
522+ }
523+
524+ // Non printable characters
525+ if (32 > $ codePoint // C0 control characters
526+ || (0x07F <= $ codePoint && 0x0A0 > $ codePoint ) // C1 control characters and DEL
527+ ) {
528+ return -1 ;
529+ }
530+
531+ static $ tableZero ;
532+ if (null === $ tableZero ) {
533+ $ tableZero = require __DIR__ .'/Resources/data/wcswidth_table_zero.php ' ;
534+ }
535+
536+ if ($ codePoint >= $ tableZero [0 ][0 ] && $ codePoint <= $ tableZero [$ ubound = \count ($ tableZero ) - 1 ][1 ]) {
537+ $ lbound = 0 ;
538+ while ($ ubound >= $ lbound ) {
539+ $ mid = floor (($ lbound + $ ubound ) / 2 );
540+
541+ if ($ codePoint > $ tableZero [$ mid ][1 ]) {
542+ $ lbound = $ mid + 1 ;
543+ } elseif ($ codePoint < $ tableZero [$ mid ][0 ]) {
544+ $ ubound = $ mid - 1 ;
545+ } else {
546+ continue 2 ;
547+ }
548+ }
549+ }
550+
551+ static $ tableWide ;
552+ if (null === $ tableWide ) {
553+ $ tableWide = require __DIR__ .'/Resources/data/wcswidth_table_wide.php ' ;
554+ }
555+
556+ if ($ codePoint >= $ tableWide [0 ][0 ] && $ codePoint <= $ tableWide [$ ubound = \count ($ tableWide ) - 1 ][1 ]) {
557+ $ lbound = 0 ;
558+ while ($ ubound >= $ lbound ) {
559+ $ mid = floor (($ lbound + $ ubound ) / 2 );
560+
561+ if ($ codePoint > $ tableWide [$ mid ][1 ]) {
562+ $ lbound = $ mid + 1 ;
563+ } elseif ($ codePoint < $ tableWide [$ mid ][0 ]) {
564+ $ ubound = $ mid - 1 ;
565+ } else {
566+ $ width += 2 ;
567+
568+ continue 2 ;
569+ }
570+ }
571+ }
572+
573+ ++$ width ;
574+ }
575+
576+ return $ width ;
577+ }
506578}
0 commit comments