@@ -22,8 +22,7 @@ use char;
2222use char:: Char ;
2323use clone:: { Clone , DeepClone } ;
2424use container:: { Container , Mutable } ;
25- use num:: Times ;
26- use iter:: { Iterator , FromIterator , Extendable } ;
25+ use iter:: { Iterator , FromIterator , Extendable , range} ;
2726use iter:: { Filter , AdditiveIterator , Map } ;
2827use iter:: { Invert , DoubleEndedIterator , ExactSize } ;
2928use libc;
@@ -33,7 +32,6 @@ use ptr;
3332use ptr:: RawPtr ;
3433use to_str:: ToStr ;
3534use uint;
36- use unstable:: raw:: { Repr , Slice } ;
3735use vec;
3836use vec:: { OwnedVector , OwnedCopyableVector , ImmutableVector , MutableVector } ;
3937use default:: Default ;
@@ -185,23 +183,15 @@ impl<'self, S: Str> StrVector for &'self [S] {
185183 fn concat ( & self ) -> ~str {
186184 if self . is_empty ( ) { return ~""; }
187185
186+ // `len` calculation may overflow but push_str but will check boundaries
188187 let len = self . iter ( ) . map ( |s| s. as_slice ( ) . len ( ) ) . sum ( ) ;
189188
190- let mut s = with_capacity ( len) ;
189+ let mut result = with_capacity ( len) ;
191190
192- unsafe {
193- do s. as_mut_buf |buf, _| {
194- let mut buf = buf;
195- for ss in self . iter ( ) {
196- do ss. as_slice ( ) . as_imm_buf |ssbuf, sslen| {
197- ptr:: copy_memory ( buf, ssbuf, sslen) ;
198- buf = buf. offset ( sslen as int ) ;
199- }
200- }
201- }
202- raw:: set_len ( & mut s, len) ;
191+ for s in self . iter ( ) {
192+ result. push_str ( s. as_slice ( ) )
203193 }
204- s
194+ result
205195 }
206196
207197 /// Concatenate a vector of strings, placing a given separator between each.
@@ -212,34 +202,21 @@ impl<'self, S: Str> StrVector for &'self [S] {
212202 if sep. is_empty ( ) { return self . concat ( ) ; }
213203
214204 // this is wrong without the guarantee that `self` is non-empty
205+ // `len` calculation may overflow but push_str but will check boundaries
215206 let len = sep. len ( ) * ( self . len ( ) - 1 )
216207 + self . iter ( ) . map ( |s| s. as_slice ( ) . len ( ) ) . sum ( ) ;
217- let mut s = ~"" ;
208+ let mut result = with_capacity ( len ) ;
218209 let mut first = true ;
219210
220- s. reserve ( len) ;
221-
222- unsafe {
223- do s. as_mut_buf |buf, _| {
224- do sep. as_imm_buf |sepbuf, seplen| {
225- let mut buf = buf;
226- for ss in self . iter ( ) {
227- do ss. as_slice ( ) . as_imm_buf |ssbuf, sslen| {
228- if first {
229- first = false ;
230- } else {
231- ptr:: copy_memory ( buf, sepbuf, seplen) ;
232- buf = buf. offset ( seplen as int ) ;
233- }
234- ptr:: copy_memory ( buf, ssbuf, sslen) ;
235- buf = buf. offset ( sslen as int ) ;
236- }
237- }
238- }
211+ for s in self . iter ( ) {
212+ if first {
213+ first = false ;
214+ } else {
215+ result. push_str ( sep) ;
239216 }
240- raw :: set_len ( & mut s , len ) ;
217+ result . push_str ( s . as_slice ( ) ) ;
241218 }
242- s
219+ result
243220 }
244221}
245222
@@ -961,7 +938,6 @@ static TAG_CONT_U8: u8 = 128u8;
961938
962939/// Unsafe operations
963940pub mod raw {
964- use option:: Some ;
965941 use cast;
966942 use libc;
967943 use ptr;
@@ -1064,21 +1040,22 @@ pub mod raw {
10641040 }
10651041 }
10661042
1067- /// Appends a byte to a string. (Not UTF-8 safe).
1043+ /// Appends a byte to a string.
1044+ /// The caller must preserve the valid UTF-8 property.
10681045 #[ inline]
10691046 pub unsafe fn push_byte ( s : & mut ~str , b : u8 ) {
1070- let v: & mut ~[ u8 ] = cast:: transmute ( s) ;
1071- v. push ( b) ;
1047+ as_owned_vec ( s) . push ( b)
10721048 }
10731049
1074- /// Appends a vector of bytes to a string. (Not UTF-8 safe).
1075- unsafe fn push_bytes ( s : & mut ~ str , bytes : & [ u8 ] ) {
1076- let new_len = s . len ( ) + bytes . len ( ) ;
1077- s . reserve_at_least ( new_len ) ;
1078- for byte in bytes. iter ( ) { push_byte ( & mut * s , * byte ) ; }
1050+ /// Appends a vector of bytes to a string.
1051+ /// The caller must preserve the valid UTF-8 property.
1052+ # [ inline ]
1053+ pub unsafe fn push_bytes ( s : & mut ~ str , bytes : & [ u8 ] ) {
1054+ vec :: bytes:: push_bytes ( as_owned_vec ( s ) , bytes ) ;
10791055 }
10801056
1081- /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
1057+ /// Removes the last byte from a string and returns it.
1058+ /// The caller must preserve the valid UTF-8 property.
10821059 pub unsafe fn pop_byte ( s : & mut ~str ) -> u8 {
10831060 let len = s. len ( ) ;
10841061 assert ! ( ( len > 0 u) ) ;
@@ -1087,7 +1064,8 @@ pub mod raw {
10871064 return b;
10881065 }
10891066
1090- /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
1067+ /// Removes the first byte from a string and returns it.
1068+ /// The caller must preserve the valid UTF-8 property.
10911069 pub unsafe fn shift_byte ( s : & mut ~str ) -> u8 {
10921070 let len = s. len ( ) ;
10931071 assert ! ( ( len > 0 u) ) ;
@@ -1096,15 +1074,21 @@ pub mod raw {
10961074 return b;
10971075 }
10981076
1077+ /// Access the str in its vector representation.
1078+ /// The caller must preserve the valid UTF-8 property when modifying.
1079+ #[ inline]
1080+ pub unsafe fn as_owned_vec < ' a > ( s : & ' a mut ~str ) -> & ' a mut ~[ u8 ] {
1081+ cast:: transmute ( s)
1082+ }
1083+
10991084 /// Sets the length of a string
11001085 ///
11011086 /// This will explicitly set the size of the string, without actually
11021087 /// modifying its buffers, so it is up to the caller to ensure that
11031088 /// the string is actually the specified size.
11041089 #[ inline]
11051090 pub unsafe fn set_len ( s : & mut ~str , new_len : uint ) {
1106- let v: & mut ~[ u8 ] = cast:: transmute ( s) ;
1107- vec:: raw:: set_len ( v, new_len)
1091+ vec:: raw:: set_len ( as_owned_vec ( s) , new_len)
11081092 }
11091093
11101094 /// Sets the length of a string
@@ -2061,22 +2045,11 @@ impl<'self> StrSlice<'self> for &'self str {
20612045
20622046 /// Given a string, make a new string with repeated copies of it.
20632047 fn repeat(&self, nn: uint) -> ~str {
2064- do self.as_imm_buf |buf, len| {
2065- let mut ret = with_capacity(nn * len);
2066-
2067- unsafe {
2068- do ret.as_mut_buf |rbuf, _len| {
2069- let mut rbuf = rbuf;
2070-
2071- do nn.times {
2072- ptr::copy_memory(rbuf, buf, len);
2073- rbuf = rbuf.offset(len as int);
2074- }
2075- }
2076- raw::set_len(&mut ret, nn * len);
2077- }
2078- ret
2048+ let mut ret = with_capacity(nn * self.len());
2049+ for _ in range(0, nn) {
2050+ ret.push_str(*self);
20792051 }
2052+ ret
20802053 }
20812054
20822055 /// Retrieves the first character from a string slice and returns
@@ -2199,54 +2172,35 @@ impl OwnedStr for ~str {
21992172 /// Appends a string slice to the back of a string, without overallocating
22002173 #[inline]
22012174 fn push_str_no_overallocate(&mut self, rhs: &str) {
2202- unsafe {
2203- let llen = self.len();
2204- let rlen = rhs.len();
2205- self.reserve(llen + rlen);
2206- do self.as_imm_buf |lbuf, _llen| {
2207- do rhs.as_imm_buf |rbuf, _rlen| {
2208- let dst = ptr::offset(lbuf, llen as int);
2209- let dst = cast::transmute_mut_unsafe(dst);
2210- ptr::copy_memory(dst, rbuf, rlen);
2211- }
2212- }
2213- raw::set_len(self, llen + rlen);
2214- }
2175+ let new_cap = self.len() + rhs.len();
2176+ self.reserve(new_cap);
2177+ self.push_str(rhs);
22152178 }
22162179
22172180 /// Appends a string slice to the back of a string
22182181 #[inline]
22192182 fn push_str(&mut self, rhs: &str) {
22202183 unsafe {
2221- let llen = self.len();
2222- let rlen = rhs.len();
2223- self.reserve_at_least(llen + rlen);
2224- do self.as_imm_buf |lbuf, _llen| {
2225- do rhs.as_imm_buf |rbuf, _rlen| {
2226- let dst = ptr::offset(lbuf, llen as int);
2227- let dst = cast::transmute_mut_unsafe(dst);
2228- ptr::copy_memory(dst, rbuf, rlen);
2229- }
2230- }
2231- raw::set_len(self, llen + rlen);
2184+ raw::push_bytes(self, rhs.as_bytes());
22322185 }
22332186 }
22342187
22352188 /// Appends a character to the back of a string
22362189 #[inline]
22372190 fn push_char(&mut self, c: char) {
22382191 let cur_len = self.len();
2239- self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
2240-
2241- // Attempt to not use an intermediate buffer by just pushing bytes
2242- // directly onto this string.
2192+ // may use up to 4 bytes.
22432193 unsafe {
2244- let v = self.repr();
2245- let len = c.encode_utf8(cast::transmute(Slice {
2246- data: ((&(*v).data) as *u8).offset(cur_len as int),
2247- len: 4,
2248- }));
2249- raw::set_len(self, cur_len + len);
2194+ raw::as_owned_vec(self).reserve_additional(4);
2195+
2196+ // Attempt to not use an intermediate buffer by just pushing bytes
2197+ // directly onto this string.
2198+ let used = do self.as_mut_buf |buf, _| {
2199+ do vec::raw::mut_buf_as_slice(buf.offset(cur_len as int), 4) |slc| {
2200+ c.encode_utf8(slc)
2201+ }
2202+ };
2203+ raw::set_len(self, cur_len + used);
22502204 }
22512205 }
22522206
@@ -2306,8 +2260,7 @@ impl OwnedStr for ~str {
23062260 #[inline]
23072261 fn reserve(&mut self, n: uint) {
23082262 unsafe {
2309- let v: &mut ~[u8] = cast::transmute(self);
2310- (*v).reserve(n);
2263+ raw::as_owned_vec(self).reserve(n)
23112264 }
23122265 }
23132266
@@ -2329,7 +2282,7 @@ impl OwnedStr for ~str {
23292282 /// * n - The number of bytes to reserve space for
23302283 #[inline]
23312284 fn reserve_at_least(&mut self, n: uint) {
2332- self.reserve(uint::next_power_of_two (n))
2285+ self.reserve(uint::next_power_of_two_opt(n).unwrap_or (n))
23332286 }
23342287
23352288 /// Returns the number of single-byte characters the string can hold without
@@ -2359,8 +2312,9 @@ impl OwnedStr for ~str {
23592312
23602313 #[inline]
23612314 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2362- let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2363- v.as_mut_buf(f)
2315+ unsafe {
2316+ raw::as_owned_vec(self).as_mut_buf(f)
2317+ }
23642318 }
23652319}
23662320
@@ -3912,4 +3866,23 @@ mod bench {
39123866 with_capacity(100);
39133867 }
39143868 }
3869+
3870+ #[bench]
3871+ fn bench_push_str(bh: &mut BenchHarness) {
3872+ let s = " ศไทย中华Việt Nam ; Mary had a little lamb, Little lamb";
3873+ do bh.iter {
3874+ let mut r = ~" ";
3875+ r.push_str(s);
3876+ }
3877+ }
3878+
3879+ #[bench]
3880+ fn bench_connect(bh: &mut BenchHarness) {
3881+ let s = " ศไทย中华Việt Nam ; Mary had a little lamb, Little lamb";
3882+ let sep = " →";
3883+ let v = [ s, s, s, s, s, s, s, s, s, s] ;
3884+ do bh. iter {
3885+ assert_eq!( v. connect( sep) . len( ) , s. len( ) * 10 + sep. len( ) * 9 ) ;
3886+ }
3887+ }
39153888}
0 commit comments