@@ -14,8 +14,18 @@ use crate::sys::handle::Handle;
1414pub struct Stdin {
1515 surrogate : u16 ,
1616}
17- pub struct Stdout ;
18- pub struct Stderr ;
17+ pub struct Stdout {
18+ incomplete_utf8 : IncompleteUtf8 ,
19+ }
20+
21+ pub struct Stderr {
22+ incomplete_utf8 : IncompleteUtf8 ,
23+ }
24+
25+ struct IncompleteUtf8 {
26+ bytes : [ u8 ; 4 ] ,
27+ len : u8 ,
28+ }
1929
2030// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
2131// #13304 for details).
@@ -50,7 +60,27 @@ fn is_console(handle: c::HANDLE) -> bool {
5060 unsafe { c:: GetConsoleMode ( handle, & mut mode) != 0 }
5161}
5262
53- fn write ( handle_id : c:: DWORD , data : & [ u8 ] ) -> io:: Result < usize > {
63+ // Simple reimplementation of std::str::utf8_char_width() which is feature-gated
64+ fn utf8_char_width ( b : u8 ) -> usize {
65+ match b {
66+ 0x00 ..=0x7F => 1 ,
67+ 0x80 ..=0xC1 => 0 ,
68+ 0xC2 ..=0xDF => 2 ,
69+ 0xE0 ..=0xEF => 3 ,
70+ 0xF0 ..=0xF4 => 4 ,
71+ 0xF5 ..=0xFF => 0 ,
72+ }
73+ }
74+
75+ fn write (
76+ handle_id : c:: DWORD ,
77+ data : & [ u8 ] ,
78+ incomplete_utf8 : & mut IncompleteUtf8 ,
79+ ) -> io:: Result < usize > {
80+ if data. is_empty ( ) {
81+ return Ok ( 0 ) ;
82+ }
83+
5484 let handle = get_handle ( handle_id) ?;
5585 if !is_console ( handle) {
5686 let handle = Handle :: new ( handle) ;
@@ -59,22 +89,74 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
5989 return ret;
6090 }
6191
62- // As the console is meant for presenting text, we assume bytes of `data` come from a string
63- // and are encoded as UTF-8, which needs to be encoded as UTF-16.
92+ match incomplete_utf8. len {
93+ 0 => { }
94+ 1 ..=3 => {
95+ if data[ 0 ] >> 6 != 0b10 {
96+ incomplete_utf8. len = 0 ;
97+ // not a continuation byte - reject
98+ return Err ( io:: Error :: new (
99+ io:: ErrorKind :: InvalidData ,
100+ "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ,
101+ ) ) ;
102+ }
103+ incomplete_utf8. bytes [ incomplete_utf8. len as usize ] = data[ 0 ] ;
104+ incomplete_utf8. len += 1 ;
105+ let char_width = utf8_char_width ( incomplete_utf8. bytes [ 0 ] ) ;
106+ if ( incomplete_utf8. len as usize ) < char_width {
107+ // more bytes needed
108+ return Ok ( 1 ) ;
109+ }
110+ let s = str:: from_utf8 ( & incomplete_utf8. bytes [ 0 ..incomplete_utf8. len as usize ] ) ;
111+ incomplete_utf8. len = 0 ;
112+ match s {
113+ Ok ( s) => {
114+ assert_eq ! ( char_width, s. len( ) ) ;
115+ let written = write_valid_utf8 ( handle, s) ?;
116+ assert_eq ! ( written, s. len( ) ) ; // guaranteed by write0() for single codepoint writes
117+ return Ok ( 1 ) ;
118+ }
119+ Err ( _) => {
120+ return Err ( io:: Error :: new (
121+ io:: ErrorKind :: InvalidData ,
122+ "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ,
123+ ) ) ;
124+ }
125+ }
126+ }
127+ _ => {
128+ panic ! ( "Unexpected number of incomplete UTF-8 chars." ) ;
129+ }
130+ }
131+
132+ // As the console is meant for presenting text, we assume bytes of `data` are encoded as UTF-8,
133+ // which needs to be encoded as UTF-16.
64134 //
65135 // If the data is not valid UTF-8 we write out as many bytes as are valid.
66- // Only when there are no valid bytes (which will happen on the next call), return an error.
136+ // If the first byte is invalid it is either first byte of a multi-byte sequence but the
137+ // provided byte slice is too short or it is the first byte of an invalide multi-byte sequence.
67138 let len = cmp:: min ( data. len ( ) , MAX_BUFFER_SIZE / 2 ) ;
68139 let utf8 = match str:: from_utf8 ( & data[ ..len] ) {
69140 Ok ( s) => s,
70141 Err ( ref e) if e. valid_up_to ( ) == 0 => {
71- return Err ( io:: Error :: new_const (
72- io:: ErrorKind :: InvalidData ,
73- & "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ,
74- ) ) ;
142+ if data. len ( ) < utf8_char_width ( data[ 0 ] ) {
143+ incomplete_utf8. bytes [ 0 ] = data[ 0 ] ;
144+ incomplete_utf8. len = 1 ;
145+ return Ok ( 1 ) ;
146+ } else {
147+ return Err ( io:: Error :: new_const (
148+ io:: ErrorKind :: InvalidData ,
149+ & "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ,
150+ ) ) ;
151+ }
75152 }
76153 Err ( e) => str:: from_utf8 ( & data[ ..e. valid_up_to ( ) ] ) . unwrap ( ) ,
77154 } ;
155+
156+ write_valid_utf8 ( handle, utf8)
157+ }
158+
159+ fn write_valid_utf8 ( handle : c:: HANDLE , utf8 : & str ) -> io:: Result < usize > {
78160 let mut utf16 = [ 0u16 ; MAX_BUFFER_SIZE / 2 ] ;
79161 let mut len_utf16 = 0 ;
80162 for ( chr, dest) in utf8. encode_utf16 ( ) . zip ( utf16. iter_mut ( ) ) {
@@ -254,15 +336,21 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
254336 Ok ( written)
255337}
256338
339+ impl IncompleteUtf8 {
340+ pub const fn new ( ) -> IncompleteUtf8 {
341+ IncompleteUtf8 { bytes : [ 0 ; 4 ] , len : 0 }
342+ }
343+ }
344+
257345impl Stdout {
258346 pub const fn new ( ) -> Stdout {
259- Stdout
347+ Stdout { incomplete_utf8 : IncompleteUtf8 :: new ( ) }
260348 }
261349}
262350
263351impl io:: Write for Stdout {
264352 fn write ( & mut self , buf : & [ u8 ] ) -> io:: Result < usize > {
265- write ( c:: STD_OUTPUT_HANDLE , buf)
353+ write ( c:: STD_ERROR_HANDLE , buf, & mut self . incomplete_utf8 )
266354 }
267355
268356 fn flush ( & mut self ) -> io:: Result < ( ) > {
@@ -272,13 +360,13 @@ impl io::Write for Stdout {
272360
273361impl Stderr {
274362 pub const fn new ( ) -> Stderr {
275- Stderr
363+ Stderr { incomplete_utf8 : IncompleteUtf8 :: new ( ) }
276364 }
277365}
278366
279367impl io:: Write for Stderr {
280368 fn write ( & mut self , buf : & [ u8 ] ) -> io:: Result < usize > {
281- write ( c:: STD_ERROR_HANDLE , buf)
369+ write ( c:: STD_ERROR_HANDLE , buf, & mut self . incomplete_utf8 )
282370 }
283371
284372 fn flush ( & mut self ) -> io:: Result < ( ) > {
0 commit comments