@@ -759,30 +759,6 @@ pub fn is_utf16(v: &[u16]) -> bool {
759759 return true ;
760760}
761761
762- /// Converts to a vector of `u16` encoded as UTF-16
763- pub fn to_utf16( s : & str ) -> ~[ u16 ] {
764- let mut u = ~[ ] ;
765- for s. iter( ) . advance |ch| {
766- // Arithmetic with u32 literals is easier on the eyes than chars.
767- let mut ch = ch as u32;
768-
769- if ( ch & 0xFFFF_u32 ) == ch {
770- // The BMP falls through (assuming non-surrogate, as it
771- // should)
772- assert!( ch <= 0xD7FF_u32 || ch >= 0xE000_u32 ) ;
773- u. push( ch as u16)
774- } else {
775- // Supplementary planes break into surrogates.
776- assert ! ( ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32 ) ;
777- ch -= 0x1_0000_u32 ;
778- let w1 = 0xD800_u16 | ( ( ch >> 10 ) as u16 ) ;
779- let w2 = 0xDC00_u16 | ( ( ch as u16) & 0x3FF_u16 ) ;
780- u. push_all( [ w1, w2] )
781- }
782- }
783- u
784- }
785-
786762/// Iterates over the utf-16 characters in the specified slice, yielding each
787763/// decoded unicode character to the function provided.
788764///
@@ -1188,6 +1164,7 @@ pub trait StrSlice<'self> {
11881164 fn replace(&self, from: &str, to: &str) -> ~str;
11891165 fn to_owned(&self) -> ~str;
11901166 fn to_managed(&self) -> @str;
1167+ fn to_utf16(&self) -> ~[u16];
11911168 fn is_char_boundary(&self, index: uint) -> bool;
11921169 fn char_range_at(&self, start: uint) -> CharRange;
11931170 fn char_at(&self, i: uint) -> char;
@@ -1602,6 +1579,30 @@ impl<'self> StrSlice<'self> for &'self str {
16021579 unsafe { ::cast::transmute(v) }
16031580 }
16041581
1582+ /// Converts to a vector of `u16` encoded as UTF-16.
1583+ fn to_utf16(&self) -> ~[u16] {
1584+ let mut u = ~[];
1585+ for self.iter().advance |ch| {
1586+ // Arithmetic with u32 literals is easier on the eyes than chars.
1587+ let mut ch = ch as u32;
1588+
1589+ if (ch & 0xFFFF_u32) == ch {
1590+ // The BMP falls through (assuming non-surrogate, as it
1591+ // should)
1592+ assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1593+ u.push(ch as u16)
1594+ } else {
1595+ // Supplementary planes break into surrogates.
1596+ assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1597+ ch -= 0x1_0000_u32;
1598+ let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1599+ let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1600+ u.push_all([w1, w2])
1601+ }
1602+ }
1603+ u
1604+ }
1605+
16051606 /**
16061607 * Returns false if the index points into the middle of a multi-byte
16071608 * character sequence.
@@ -3116,10 +3117,10 @@ mod tests {
31163117
31173118 for pairs. each |p| {
31183119 let ( s, u) = copy * p;
3119- assert!( to_utf16( s ) == u) ;
3120+ assert!( s . to_utf16( ) == u) ;
31203121 assert!( from_utf16( u) == s) ;
3121- assert!( from_utf16( to_utf16( s ) ) == s) ;
3122- assert!( to_utf16 ( from_utf16( u) ) == u) ;
3122+ assert!( from_utf16( s . to_utf16( ) ) == s) ;
3123+ assert!( from_utf16( u) . to_utf16 ( ) == u) ;
31233124 }
31243125 }
31253126
0 commit comments