1717//! Note: Because the term "leading byte" can sometimes be ambiguous (for
1818//! example, it could also refer to the first byte of a slice), we'll often use
1919//! the term "non-continuation byte" to refer to these bytes in the code.
20+ use core:: intrinsics:: unlikely;
2021
22+ const USIZE_SIZE : usize = core:: mem:: size_of :: < usize > ( ) ;
23+ const UNROLL_INNER : usize = 4 ;
24+
25+ #[ inline]
2126pub ( super ) fn count_chars ( s : & str ) -> usize {
27+ if s. len ( ) < USIZE_SIZE * UNROLL_INNER {
28+ // Avoid entering the optimized implementation for strings where the
29+ // difference is not likely to matter, or where it might even be slower.
30+ // That said, a ton of thought was not spent on the particular threshold
31+ // here, beyond "this value seems to make sense".
32+ char_count_general_case ( s. as_bytes ( ) )
33+ } else {
34+ do_count_chars ( s)
35+ }
36+ }
37+
38+ fn do_count_chars ( s : & str ) -> usize {
2239 // For correctness, `CHUNK_SIZE` must be:
40+ //
2341 // - Less than or equal to 255, otherwise we'll overflow bytes in `counts`.
2442 // - A multiple of `UNROLL_INNER`, otherwise our `break` inside the
2543 // `body.chunks(CHUNK_SIZE)` loop.
2644 //
2745 // For performance, `CHUNK_SIZE` should be:
28- // - Relatively cheap to `% ` against.
46+ // - Relatively cheap to `/ ` against (so some simple sum of powers of two) .
2947 // - Large enough to avoid paying for the cost of the `sum_bytes_in_usize`
3048 // too often.
3149 const CHUNK_SIZE : usize = 192 ;
32- const UNROLL_INNER : usize = 4 ;
3350
34- // Check the properties of `CHUNK_SIZE` / `UNROLL_INNER` that are required
51+ // Check the properties of `CHUNK_SIZE` and `UNROLL_INNER` that are required
3552 // for correctness.
36- const _: [ ( ) ; 1 ] = [ ( ) ; ( CHUNK_SIZE < 256 && ( CHUNK_SIZE % UNROLL_INNER ) == 0 ) as usize ] ;
53+ const _: ( ) = assert ! ( CHUNK_SIZE < 256 ) ;
54+ const _: ( ) = assert ! ( CHUNK_SIZE % UNROLL_INNER == 0 ) ;
55+
3756 // SAFETY: transmuting `[u8]` to `[usize]` is safe except for size
3857 // differences which are handled by `align_to`.
3958 let ( head, body, tail) = unsafe { s. as_bytes ( ) . align_to :: < usize > ( ) } ;
4059
60+ // This should be quite rare, and basically exists to handle the degenerate
61+ // cases where align_to fails (as well as miri under symbolic alignment
62+ // mode).
63+ //
64+ // The `unlikely` helps discourage LLVM from inlining the body, which is
65+ // nice, as we would rather not mark the `char_count_general_case` function
66+ // as cold.
67+ if unlikely ( body. is_empty ( ) || head. len ( ) > USIZE_SIZE || tail. len ( ) > USIZE_SIZE ) {
68+ return char_count_general_case ( s. as_bytes ( ) ) ;
69+ }
70+
4171 let mut total = char_count_general_case ( head) + char_count_general_case ( tail) ;
4272 // Split `body` into `CHUNK_SIZE` chunks to reduce the frequency with which
4373 // we call `sum_bytes_in_usize`.
4474 for chunk in body. chunks ( CHUNK_SIZE ) {
4575 // We accumulate intermediate sums in `counts`, where each byte contains
4676 // a subset of the sum of this chunk, like a `[u8; size_of::<usize>()]`.
4777 let mut counts = 0 ;
48- let unrolled_chunks = chunk. array_chunks :: < UNROLL_INNER > ( ) ;
49- // If there's a remainder (know can only happen for the last item in
50- // `chunks`, because `CHUNK_SIZE % UNROLL == 0`), then we need to
51- // account for that (although we don't use it to later).
52- let remainder = unrolled_chunks. remainder ( ) ;
78+
79+ let ( unrolled_chunks, remainder) = chunk. as_chunks :: < UNROLL_INNER > ( ) ;
5380 for unrolled in unrolled_chunks {
5481 for & word in unrolled {
5582 // Because `CHUNK_SIZE` is < 256, this addition can't cause the
@@ -85,8 +112,8 @@ pub(super) fn count_chars(s: &str) -> usize {
85112// true)
86113#[ inline]
87114fn contains_non_continuation_byte ( w : usize ) -> usize {
88- let lsb = 0x0101_0101_0101_0101u64 as usize ;
89- ( ( !w >> 7 ) | ( w >> 6 ) ) & lsb
115+ const LSB : usize = 0x0101_0101_0101_0101u64 as usize ;
116+ ( ( !w >> 7 ) | ( w >> 6 ) ) & LSB
90117}
91118
92119// Morally equivalent to `values.to_ne_bytes().into_iter().sum::<usize>()`, but
@@ -97,20 +124,13 @@ fn sum_bytes_in_usize(values: usize) -> usize {
97124 const SKIP_BYTES : usize = 0x00ff_00ff_00ff_00ff_u64 as usize ;
98125
99126 let pair_sum: usize = ( values & SKIP_BYTES ) + ( ( values >> 8 ) & SKIP_BYTES ) ;
100- pair_sum. wrapping_mul ( LSB_SHORTS ) >> ( ( core :: mem :: size_of :: < usize > ( ) - 2 ) * 8 )
127+ pair_sum. wrapping_mul ( LSB_SHORTS ) >> ( ( USIZE_SIZE - 2 ) * 8 )
101128}
102129
103130// This is the most direct implementation of the concept of "count the number of
104131// bytes in the string which are not continuation bytes", and is used for the
105132// head and tail of the input string (the first and last item in the tuple
106133// returned by `slice::align_to`).
107134fn char_count_general_case ( s : & [ u8 ] ) -> usize {
108- const CONT_MASK_U8 : u8 = 0b0011_1111 ;
109- const TAG_CONT_U8 : u8 = 0b1000_0000 ;
110- let mut leads = 0 ;
111- for & byte in s {
112- let is_lead = ( byte & !CONT_MASK_U8 ) != TAG_CONT_U8 ;
113- leads += is_lead as usize ;
114- }
115- leads
135+ s. iter ( ) . filter ( |& & byte| !super :: validations:: utf8_is_cont_byte ( byte) ) . count ( )
116136}
0 commit comments