@@ -15,7 +15,9 @@ use core::str::utf8_char_width;
1515// the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490.
1616pub struct Stdin {
1717 surrogate : u16 ,
18+ incomplete_utf8 : IncompleteUtf8 ,
1819}
20+
1921pub struct Stdout {
2022 incomplete_utf8 : IncompleteUtf8 ,
2123}
@@ -29,6 +31,25 @@ struct IncompleteUtf8 {
2931 len : u8 ,
3032}
3133
34+ impl IncompleteUtf8 {
35+ // Implemented for use in Stdin::read.
36+ fn read ( & mut self , buf : & mut [ u8 ] ) -> usize {
37+ // Write to buffer until the buffer is full or we run out of bytes.
38+ let to_write = cmp:: min ( buf. len ( ) , self . len as usize ) ;
39+ buf[ ..to_write] . copy_from_slice ( & self . bytes [ ..to_write] ) ;
40+
41+ // Rotate the remaining bytes if not enough remaining space in buffer.
42+ if usize:: from ( self . len ) > buf. len ( ) {
43+ self . bytes . copy_within ( to_write.., 0 ) ;
44+ self . len -= to_write as u8 ;
45+ } else {
46+ self . len = 0 ;
47+ }
48+
49+ to_write
50+ }
51+ }
52+
3253// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
3354// #13304 for details).
3455//
@@ -205,7 +226,7 @@ fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
205226
206227impl Stdin {
207228 pub const fn new ( ) -> Stdin {
208- Stdin { surrogate : 0 }
229+ Stdin { surrogate : 0 , incomplete_utf8 : IncompleteUtf8 :: new ( ) }
209230 }
210231}
211232
@@ -221,24 +242,39 @@ impl io::Read for Stdin {
221242 }
222243 }
223244
224- if buf. len ( ) == 0 {
225- return Ok ( 0 ) ;
226- } else if buf. len ( ) < 4 {
227- return Err ( io:: Error :: new_const (
228- io:: ErrorKind :: InvalidInput ,
229- & "Windows stdin in console mode does not support a buffer too small to \
230- guarantee holding one arbitrary UTF-8 character (4 bytes)",
231- ) ) ;
245+ // If there are bytes in the incomplete utf-8, start with those.
246+ // (No-op if there is nothing in the buffer.)
247+ let mut bytes_copied = self . incomplete_utf8 . read ( buf) ;
248+
249+ if bytes_copied == buf. len ( ) {
250+ return Ok ( bytes_copied) ;
251+ } else if buf. len ( ) - bytes_copied < 4 {
252+ // Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
253+ let mut utf16_buf = [ 0u16 ; 1 ] ;
254+ // Read one u16 character.
255+ let read = read_u16s_fixup_surrogates ( handle, & mut utf16_buf, 1 , & mut self . surrogate ) ?;
256+ // Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
257+ let read_bytes = utf16_to_utf8 ( & utf16_buf[ ..read] , & mut self . incomplete_utf8 . bytes ) ?;
258+
259+ // Read in the bytes from incomplete_utf8 until the buffer is full.
260+ self . incomplete_utf8 . len = read_bytes as u8 ;
261+ // No-op if no bytes.
262+ bytes_copied += self . incomplete_utf8 . read ( & mut buf[ bytes_copied..] ) ;
263+ Ok ( bytes_copied)
264+ } else {
265+ let mut utf16_buf = [ 0u16 ; MAX_BUFFER_SIZE / 2 ] ;
266+ // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
267+ // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
268+ // lost.
269+ let amount = cmp:: min ( buf. len ( ) / 3 , utf16_buf. len ( ) ) ;
270+ let read =
271+ read_u16s_fixup_surrogates ( handle, & mut utf16_buf, amount, & mut self . surrogate ) ?;
272+
273+ match utf16_to_utf8 ( & utf16_buf[ ..read] , buf) {
274+ Ok ( value) => return Ok ( bytes_copied + value) ,
275+ Err ( e) => return Err ( e) ,
276+ }
232277 }
233-
234- let mut utf16_buf = [ 0u16 ; MAX_BUFFER_SIZE / 2 ] ;
235- // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
236- // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
237- // lost.
238- let amount = cmp:: min ( buf. len ( ) / 3 , utf16_buf. len ( ) ) ;
239- let read = read_u16s_fixup_surrogates ( handle, & mut utf16_buf, amount, & mut self . surrogate ) ?;
240-
241- utf16_to_utf8 ( & utf16_buf[ ..read] , buf)
242278 }
243279}
244280
0 commit comments