@@ -86,52 +86,118 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
8686 type Output = String ;
8787
8888 fn concat ( & self ) -> String {
89- if self . is_empty ( ) {
90- return String :: new ( ) ;
91- }
92-
93- // `len` calculation may overflow but push_str will check boundaries
94- let len = self . iter ( ) . map ( |s| s. borrow ( ) . len ( ) ) . sum ( ) ;
95- let mut result = String :: with_capacity ( len) ;
96-
97- for s in self {
98- result. push_str ( s. borrow ( ) )
99- }
100-
101- result
89+ self . join ( "" )
10290 }
10391
10492 fn join ( & self , sep : & str ) -> String {
105- if self . is_empty ( ) {
106- return String :: new ( ) ;
93+ unsafe {
94+ String :: from_utf8_unchecked ( join_generic_copy ( self , sep . as_bytes ( ) ) )
10795 }
96+ }
10897
109- // concat is faster
110- if sep . is_empty ( ) {
111- return self . concat ( ) ;
112- }
98+ fn connect ( & self , sep : & str ) -> String {
99+ self . join ( sep )
100+ }
101+ }
113102
114- // this is wrong without the guarantee that `self` is non-empty
115- // `len` calculation may overflow but push_str but will check boundaries
116- let len = sep. len ( ) * ( self . len ( ) - 1 ) +
117- self . iter ( ) . map ( |s| s. borrow ( ) . len ( ) ) . sum :: < usize > ( ) ;
118- let mut result = String :: with_capacity ( len) ;
119- let mut first = true ;
103+ macro_rules! spezialize_for_lengths {
104+ ( $separator: expr, $target: expr, $iter: expr; $( $num: expr) ,* ) => {
105+ let mut target = $target;
106+ let iter = $iter;
107+ let sep_len = $separator. len( ) ;
108+ let sep_bytes = $separator;
109+ match $separator. len( ) {
110+ $(
111+ // loops with hardcoded sizes run much faster
112+ // specialize the cases with small separator lengths
113+ $num => {
114+ for s in iter {
115+ target. get_unchecked_mut( ..$num)
116+ . copy_from_slice( sep_bytes) ;
117+
118+ let s_bytes = s. borrow( ) . as_ref( ) ;
119+ let offset = s_bytes. len( ) ;
120+ target = { target} . get_unchecked_mut( $num..) ;
121+ target. get_unchecked_mut( ..offset)
122+ . copy_from_slice( s_bytes) ;
123+ target = { target} . get_unchecked_mut( offset..) ;
124+ }
125+ } ,
126+ ) *
127+ 0 => {
128+ // concat, same principle without the separator
129+ for s in iter {
130+ let s_bytes = s. borrow( ) . as_ref( ) ;
131+ let offset = s_bytes. len( ) ;
132+ target. get_unchecked_mut( ..offset)
133+ . copy_from_slice( s_bytes) ;
134+ target = { target} . get_unchecked_mut( offset..) ;
135+ }
136+ } ,
137+ _ => {
138+ // arbitrary non-zero size fallback
139+ for s in iter {
140+ target. get_unchecked_mut( ..sep_len)
141+ . copy_from_slice( sep_bytes) ;
142+
143+ let s_bytes = s. borrow( ) . as_ref( ) ;
144+ let offset = s_bytes. len( ) ;
145+ target = { target} . get_unchecked_mut( sep_len..) ;
146+ target. get_unchecked_mut( ..offset)
147+ . copy_from_slice( s_bytes) ;
148+ target = { target} . get_unchecked_mut( offset..) ;
149+ }
150+ }
151+ }
152+ } ;
153+ }
120154
121- for s in self {
122- if first {
123- first = false ;
124- } else {
125- result. push_str ( sep) ;
155+ // Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
156+ // Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
157+ // For this reason SliceConcatExt<T> is not specialized for T: Copy and SliceConcatExt<str> is the
158+ // only user of this function. It is left in place for the time when that is fixed.
159+ //
160+ // the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
161+ // [T] and str both impl AsRef<[T]> for some T
162+ // => s.borrow().as_ref() and we always have slices
163+ fn join_generic_copy < B , T , S > ( slice : & [ S ] , sep : & [ T ] ) -> Vec < T >
164+ where
165+ T : Copy ,
166+ B : AsRef < [ T ] > + ?Sized ,
167+ S : Borrow < B > ,
168+ {
169+ let sep_len = sep. len ( ) ;
170+ let mut iter = slice. iter ( ) ;
171+ iter. next ( ) . map_or ( vec ! [ ] , |first| {
172+ // this is wrong without the guarantee that `slice` is non-empty
173+ // if the `len` calculation overflows, we'll panic
174+ // we would have run out of memory anyway and the rest of the function requires
175+ // the entire String pre-allocated for safety
176+ //
177+ // this is the exact len of the resulting String
178+ let len = sep_len. checked_mul ( slice. len ( ) - 1 ) . and_then ( |n| {
179+ slice. iter ( ) . map ( |s| s. borrow ( ) . as_ref ( ) . len ( ) ) . try_fold ( n, usize:: checked_add)
180+ } ) . expect ( "attempt to join into collection with len > usize::MAX" ) ;
181+
182+ // crucial for safety
183+ let mut result = Vec :: with_capacity ( len) ;
184+
185+ unsafe {
186+ result. extend_from_slice ( first. borrow ( ) . as_ref ( ) ) ;
187+
188+ {
189+ let pos = result. len ( ) ;
190+ let target = result. get_unchecked_mut ( pos..len) ;
191+
192+ // copy separator and strs over without bounds checks
193+ // generate loops with hardcoded offsets for small separators
194+ // massive improvements possible (~ x2)
195+ spezialize_for_lengths ! ( sep, target, iter; 1 , 2 , 3 , 4 ) ;
126196 }
127- result. push_str ( s . borrow ( ) ) ;
197+ result. set_len ( len ) ;
128198 }
129199 result
130- }
131-
132- fn connect ( & self , sep : & str ) -> String {
133- self . join ( sep)
134- }
200+ } )
135201}
136202
137203#[ stable( feature = "rust1" , since = "1.0.0" ) ]
0 commit comments