@@ -3138,25 +3138,24 @@ if (isSomeString!Range ||
31383138 import std.uni : isWhite;
31393139 alias C = Unqual! (ElementEncodingType! (typeof (str)));
31403140
3141- static if (isSomeString! (typeof (str)))
3141+ static if (isSomeString! (typeof (str)) && C.sizeof >= 2 )
31423142 {
3143- static if (C.sizeof >= 2 )
3143+ // No whitespace takes multiple wchars to encode and due to
3144+ // the design of UTF-16 those wchars will not occur as part
3145+ // of the encoding of multi-wchar codepoints.
3146+ foreach_reverse (i, C c; str)
31443147 {
3145- // No whitespace takes multiple wchars to encode and due to
3146- // the design of UTF-16 those wchars will not occur as part
3147- // of the encoding of multi-wchar codepoints.
3148- foreach_reverse (i, C c; str)
3149- {
3150- if (! isWhite(c))
3151- return str[0 .. i + 1 ];
3152- }
3153- return str[0 .. 0 ];
3148+ if (! isWhite(c))
3149+ return str[0 .. i + 1 ];
31543150 }
3155- else
3151+ return str[0 .. 0 ];
3152+ }
3153+ else
3154+ {
3155+ // ASCII optimization for dynamic arrays.
3156+ static if (isDynamicArray! (typeof (str)))
31563157 {
31573158 static import std.ascii ;
3158- import std.utf : codeLength;
3159- // ASCII optimization.
31603159 foreach_reverse (i, C c; str)
31613160 {
31623161 if (c >= 0x80 )
@@ -3170,18 +3169,10 @@ if (isSomeString!Range ||
31703169 }
31713170 }
31723171 return str[0 .. 0 ];
3173-
3174- NonAsciiPath:
3175- foreach_reverse (i, dchar c; str)
3176- {
3177- if (! isWhite(c))
3178- return str[0 .. i + codeLength! C(c)];
3179- }
3180- return str[0 .. 0 ];
31813172 }
3182- }
3183- else
3184- {
3173+
3174+ NonAsciiPath:
3175+
31853176 size_t i = str.length;
31863177 while (i-- )
31873178 {
@@ -3196,9 +3187,7 @@ if (isSomeString!Range ||
31963187 }
31973188 else static if (C.sizeof == 1 )
31983189 {
3199- import std.utf : byDchar;
3200-
3201- char cx = str[i];
3190+ const cx = str[i];
32023191 if (cx <= 0x7F )
32033192 {
32043193 if (isWhite(cx))
@@ -3207,21 +3196,30 @@ if (isSomeString!Range ||
32073196 }
32083197 else
32093198 {
3210- size_t stride = 0 ;
3211-
3212- while (1 )
3199+ if (i == 0 || (0b1100_0000 & cx) != 0b1000_0000)
3200+ break ;
3201+ const uint d = 0b0011_1111 & cx;
3202+ const c2 = str[i - 1 ];
3203+ if ((c2 & 0b1110_0000) == 0b1100_0000) // 2 byte encoding.
32133204 {
3214- ++ stride;
3215- if (! i || (cx & 0xC0 ) == 0xC0 || stride == 4 )
3216- break ;
3217- cx = str[i - 1 ];
3218- if (! (cx & 0x80 ))
3219- break ;
3220- -- i;
3205+ if (isWhite(d + (uint (c2 & 0b0001_1111) << 6 )))
3206+ {
3207+ i-- ;
3208+ continue ;
3209+ }
3210+ break ;
32213211 }
3222-
3223- if (! str[i .. i + stride].byDchar.front.isWhite)
3224- return str[0 .. i + stride];
3212+ if (i == 1 || (c2 & 0b1100_0000) != 0b1000_0000)
3213+ break ;
3214+ const c3 = str[i - 2 ];
3215+ // In UTF-8 all whitespace is encoded in 3 bytes or fewer.
3216+ if ((c3 & 0b1111_0000) == 0b1110_0000 &&
3217+ isWhite(d + (uint (c2 & 0b0011_1111) << 6 ) + (uint (c3 & 0b0000_1111) << 12 )))
3218+ {
3219+ i -= 2 ;
3220+ continue ;
3221+ }
3222+ break ;
32253223 }
32263224 }
32273225 else
@@ -3233,7 +3231,7 @@ if (isSomeString!Range ||
32333231}
32343232
32353233// /
3236- @safe pure
3234+ nothrow @safe pure
32373235unittest
32383236{
32393237 import std.uni : lineSep, paraSep;
@@ -3255,7 +3253,7 @@ if (isConvertibleToString!Range)
32553253 return stripRight! (StringTypeOf! Range )(str);
32563254}
32573255
3258- @safe pure unittest
3256+ @nogc nothrow @ safe pure unittest
32593257{
32603258 assert (testAliasedString! stripRight(" hello " ));
32613259}
0 commit comments