@@ -128,7 +128,7 @@ pub fn from_ucs4(
128128 suppress_error : bool ,
129129 variant : UTF16Variant ,
130130) -> ( u32 , Vec < u8 > ) {
131- let mut utf16: Vec < u8 > = Vec :: with_capacity ( input . len ( ) * 4 ) ; // Pre-allocate assuming worst case
131+ let mut utf16: Vec < u16 > = Vec :: new ( ) ;
132132 let variant = match variant {
133133 UTF16Variant :: UTF16LE => UTF16Variant :: UTF16LE ,
134134 UTF16Variant :: UTF16BE => UTF16Variant :: UTF16BE ,
@@ -142,49 +142,48 @@ pub fn from_ucs4(
142142 } ;
143143
144144 for & code_point in input {
145- if code_point <= 0xFFFF {
146- if ( 0xD800 ..=0xDFFF ) . contains ( & code_point) {
147- if !suppress_error {
148- eprintln ! ( "Error: Isolated surrogate code point U+{:04X}" , code_point) ;
149- }
150- if omit_invalid {
151- continue ;
152- } else {
153- return ( 1 , utf16) ;
154- }
145+ if code_point <= 0xD7FF || ( 0xE000 ..=0xFFFF ) . contains ( & code_point) {
146+ utf16. push ( u16:: try_from ( code_point) . unwrap ( ) ) ;
147+ } else if ( 0xD800 ..=0xDFFF ) . contains ( & code_point) {
148+ if !suppress_error {
149+ eprintln ! ( "Error: Isolated surrogate code point U+{:04X}" , code_point) ;
155150 }
156-
157- match variant {
158- UTF16Variant :: UTF16LE => LittleEndian :: write_u16 ( & mut utf16, code_point as u16 ) ,
159- UTF16Variant :: UTF16BE => BigEndian :: write_u16 ( & mut utf16, code_point as u16 ) ,
160- _ => unreachable ! ( ) ,
151+ if !omit_invalid {
152+ return ( 1 , to_bytes ( & utf16, variant) ) ;
161153 }
162154 } else if code_point <= 0x10FFFF {
163155 let code_point = code_point - 0x10000 ;
164- let high_surrogate = ( code_point >> 10 ) as u16 + 0xD800 ;
165- let low_surrogate = ( code_point & 0x3FF ) as u16 + 0xDC00 ;
166- match variant {
167- UTF16Variant :: UTF16LE => {
168- LittleEndian :: write_u16 ( & mut utf16, high_surrogate) ;
169- LittleEndian :: write_u16 ( & mut utf16, low_surrogate) ;
170- }
171- UTF16Variant :: UTF16BE => {
172- BigEndian :: write_u16 ( & mut utf16, high_surrogate) ;
173- BigEndian :: write_u16 ( & mut utf16, low_surrogate) ;
174- }
175- _ => unreachable ! ( ) ,
176- }
156+ let high_surrogate = ( ( code_point >> 10 ) as u16 ) + 0xD800 ;
157+ let low_surrogate = ( ( code_point & 0x3FF ) as u16 ) + 0xDC00 ;
158+ utf16. push ( high_surrogate) ;
159+ utf16. push ( low_surrogate) ;
177160 } else {
178161 if !suppress_error {
179162 eprintln ! ( "Error: Invalid Unicode code point U+{:X}" , code_point) ;
180163 }
181- if omit_invalid {
182- continue ;
183- } else {
184- return ( 1 , utf16) ;
164+ if !omit_invalid {
165+ return ( 1 , to_bytes ( & utf16, variant) ) ;
185166 }
186167 }
187168 }
188169
189- ( 0 , utf16)
170+ ( 0 , to_bytes ( & utf16, variant) )
171+ }
172+
173+ fn to_bytes ( utf16 : & [ u16 ] , variant : UTF16Variant ) -> Vec < u8 > {
174+ let mut bytes = Vec :: with_capacity ( utf16. len ( ) * 2 ) ;
175+ match variant {
176+ UTF16Variant :: UTF16LE => {
177+ for & code_unit in utf16 {
178+ bytes. extend_from_slice ( & code_unit. to_le_bytes ( ) ) ;
179+ }
180+ }
181+ UTF16Variant :: UTF16BE => {
182+ for & code_unit in utf16 {
183+ bytes. extend_from_slice ( & code_unit. to_be_bytes ( ) ) ;
184+ }
185+ }
186+ _ => unreachable ! ( ) ,
187+ }
188+ bytes
190189}
0 commit comments