11#![ unstable( issue = "0" , feature = "windows_stdio" ) ]
22
33use cell:: Cell ;
4+ use char:: decode_utf16;
45use cmp;
56use io;
67use ptr;
@@ -64,22 +65,27 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
6465 //
6566 // If the data is not valid UTF-8 we write out as many bytes as are valid.
6667 // Only when there are no valid bytes (which will happen on the next call), return an error.
67- let len = cmp:: min ( data. len ( ) , MAX_BUFFER_SIZE ) ;
68+ let len = cmp:: min ( data. len ( ) , MAX_BUFFER_SIZE / 2 ) ;
6869 let utf8 = match str:: from_utf8 ( & data[ ..len] ) {
6970 Ok ( s) => s,
7071 Err ( ref e) if e. valid_up_to ( ) == 0 => {
7172 return Err ( io:: Error :: new ( io:: ErrorKind :: InvalidData ,
72- "Windows stdio in console mode does not support non-UTF-8 byte sequences; \
73- see https://github.com/rust-lang/rust/issues/23344") )
73+ "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ) )
7474 } ,
7575 Err ( e) => str:: from_utf8 ( & data[ ..e. valid_up_to ( ) ] ) . unwrap ( ) ,
7676 } ;
77- let utf16 = utf8. encode_utf16 ( ) . collect :: < Vec < u16 > > ( ) ;
77+ let mut utf16 = [ 0u16 ; MAX_BUFFER_SIZE / 2 ] ;
78+ let mut len_utf16 = 0 ;
79+ for ( chr, dest) in utf8. encode_utf16 ( ) . zip ( utf16. iter_mut ( ) ) {
80+ * dest = chr;
81+ len_utf16 += 1 ;
82+ }
83+ let utf16 = & utf16[ ..len_utf16] ;
7884
7985 let mut written = write_u16s ( handle, & utf16) ?;
8086
8187 // Figure out how many bytes of as UTF-8 were written away as UTF-16.
82- if written > = utf16. len ( ) {
88+ if written = = utf16. len ( ) {
8389 Ok ( utf8. len ( ) )
8490 } else {
8591 // Make sure we didn't end up writing only half of a surrogate pair (even though the chance
@@ -90,7 +96,7 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
9096 let first_char_remaining = utf16[ written] ;
9197 if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF { // low surrogate
9298 // We just hope this works, and give up otherwise
93- let _ = write_u16s ( handle, & utf16[ written..written] ) ;
99+ let _ = write_u16s ( handle, & utf16[ written..written+ 1 ] ) ;
94100 written += 1 ;
95101 }
96102 // Calculate the number of bytes of `utf8` that were actually written.
@@ -103,6 +109,7 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
103109 _ => 3 ,
104110 } ;
105111 }
112+ debug_assert ! ( String :: from_utf16( & utf16[ ..written] ) . unwrap( ) == utf8[ ..count] ) ;
106113 Ok ( count)
107114 }
108115}
@@ -137,7 +144,7 @@ impl Stdin {
137144 return Ok ( 0 ) ;
138145 } else if buf. len ( ) < 4 {
139146 return Err ( io:: Error :: new ( io:: ErrorKind :: InvalidInput ,
140- "Windows stdin in console mode does not support a buffer too small to; \
147+ "Windows stdin in console mode does not support a buffer too small to \
141148 guarantee holding one arbitrary UTF-8 character (4 bytes)") )
142149 }
143150
@@ -147,27 +154,14 @@ impl Stdin {
147154 // lost.
148155 let amount = cmp:: min ( buf. len ( ) / 3 , utf16_buf. len ( ) ) ;
149156 let read = self . read_u16s_fixup_surrogates ( handle, & mut utf16_buf, amount) ?;
150- let utf16 = & utf16_buf[ ..read] ;
151157
152- // FIXME: it would be nice if we could directly decode into the buffer instead of doing an
153- // allocation.
154- let data = match String :: from_utf16 ( & utf16) {
155- Ok ( utf8) => utf8. into_bytes ( ) ,
156- Err ( ..) => {
157- // We can't really do any better than forget all data and return an error.
158- return Err ( io:: Error :: new ( io:: ErrorKind :: InvalidData ,
159- "Windows stdin in console mode does not support non-UTF-16 input; \
160- encountered unpaired surrogate") )
161- } ,
162- } ;
163- buf. copy_from_slice ( & data) ;
164- Ok ( data. len ( ) )
158+ utf16_to_utf8 ( & utf16_buf[ ..read] , buf)
165159 }
166160
167161 // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our
168162 // buffer size, and keep it around for the next read hoping to put them together.
169163 // This is a best effort, and may not work if we are not the only reader on Stdin.
170- pub fn read_u16s_fixup_surrogates ( & self , handle : c:: HANDLE , buf : & mut [ u16 ] , mut amount : usize )
164+ fn read_u16s_fixup_surrogates ( & self , handle : c:: HANDLE , buf : & mut [ u16 ] , mut amount : usize )
171165 -> io:: Result < usize >
172166 {
173167 // Insert possibly remaining unpaired surrogate from last read.
@@ -223,6 +217,26 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result<usize> {
223217 Ok ( amount as usize )
224218}
225219
220+ #[ allow( unused) ]
221+ fn utf16_to_utf8 ( utf16 : & [ u16 ] , utf8 : & mut [ u8 ] ) -> io:: Result < usize > {
222+ let mut written = 0 ;
223+ for chr in decode_utf16 ( utf16. iter ( ) . cloned ( ) ) {
224+ match chr {
225+ Ok ( chr) => {
226+ chr. encode_utf8 ( & mut utf8[ written..] ) ;
227+ written += chr. len_utf8 ( ) ;
228+ }
229+ Err ( _) => {
230+ // We can't really do any better than forget all data and return an error.
231+ return Err ( io:: Error :: new ( io:: ErrorKind :: InvalidData ,
232+ "Windows stdin in console mode does not support non-UTF-16 input; \
233+ encountered unpaired surrogate") )
234+ }
235+ }
236+ }
237+ Ok ( written)
238+ }
239+
226240impl Stdout {
227241 pub fn new ( ) -> io:: Result < Stdout > {
228242 Ok ( Stdout )
0 commit comments