@@ -58,6 +58,101 @@ struct Args {
5858 files : Option < Vec < PathBuf > > ,
5959}
6060
61+ struct CircularBuffer < R : Read > {
62+ reader : R ,
63+ buffer : [ u8 ; 10000 ] ,
64+ capacity : usize ,
65+ read_pos : usize ,
66+ write_pos : usize ,
67+ length : usize ,
68+ }
69+
70+ impl < R : Read > CircularBuffer < R > {
71+ fn new ( reader : R ) -> Self {
72+ CircularBuffer {
73+ reader,
74+ buffer : [ 0 ; 10000 ] ,
75+ capacity : 10000 ,
76+ read_pos : 0 ,
77+ write_pos : 0 ,
78+ length : 0 ,
79+ }
80+ }
81+
82+ fn available_space ( & self ) -> usize {
83+ self . capacity - self . length
84+ }
85+
86+ fn fill_buffer ( & mut self ) -> io:: Result < ( ) > {
87+ while self . length < self . capacity {
88+ let mut temp_buf = vec ! [ 0 ; self . available_space( ) ] ;
89+ match self . reader . read ( & mut temp_buf) {
90+ Ok ( 0 ) => return Ok ( ( ) ) , // EOF reached
91+ Ok ( n) => {
92+ self . write ( & temp_buf[ ..n] ) ;
93+ }
94+ Err ( ref e) if e. kind ( ) == io:: ErrorKind :: Interrupted => continue ,
95+ Err ( e) => return Err ( e) ,
96+ }
97+ }
98+ Ok ( ( ) )
99+ }
100+
101+ fn write ( & mut self , data : & [ u8 ] ) -> usize {
102+ let mut bytes_written = 0 ;
103+ for & byte in data. iter ( ) . take ( self . available_space ( ) ) {
104+ self . buffer [ self . write_pos ] = byte;
105+ self . write_pos = ( self . write_pos + 1 ) % self . capacity ;
106+ self . length += 1 ;
107+ bytes_written += 1 ;
108+ }
109+ bytes_written
110+ }
111+
112+ fn iter ( self ) -> CircularBufferIterator < R > {
113+ CircularBufferIterator { buffer : self }
114+ }
115+ }
116+
117+ struct CircularBufferIterator < R : Read > {
118+ buffer : CircularBuffer < R > ,
119+ }
120+
121+ impl < R : Read > Iterator for CircularBufferIterator < R > {
122+ type Item = u8 ;
123+
124+ fn next ( & mut self ) -> Option < Self :: Item > {
125+ if self . buffer . length == 0 {
126+ match self . buffer . fill_buffer ( ) {
127+ Ok ( ( ) ) if self . buffer . length == 0 => return None , // EOF reached
128+ Ok ( ( ) ) => { }
129+ Err ( e) => {
130+ eprintln ! ( "Error: {}" , e) ;
131+ exit ( 1 ) ;
132+ }
133+ }
134+ }
135+
136+ if self . buffer . length > 0 {
137+ let item = self . buffer . buffer [ self . buffer . read_pos ] ;
138+ self . buffer . read_pos = ( self . buffer . read_pos + 1 ) % self . buffer . capacity ;
139+ self . buffer . length -= 1 ;
140+ Some ( item)
141+ } else {
142+ None
143+ }
144+ }
145+ }
146+
147+ impl < R : Read > IntoIterator for CircularBuffer < R > {
148+ type Item = u8 ;
149+ type IntoIter = CircularBufferIterator < R > ;
150+
151+ fn into_iter ( self ) -> Self :: IntoIter {
152+ self . iter ( )
153+ }
154+ }
155+
61156#[ derive( EnumString , EnumIter , Debug , PartialEq , Display ) ]
62157#[ strum( serialize_all = "SCREAMING-KEBAB-CASE" ) ]
63158#[ allow( non_camel_case_types) ]
@@ -267,122 +362,127 @@ fn parse_codeset(codeset: &str) -> Result<CodesetType, Box<dyn std::error::Error
267362fn encoding_conversion (
268363 from : & Encodings ,
269364 to : & Encodings ,
270- input : & [ u8 ] ,
365+ input : CircularBuffer < Box < dyn Read > > ,
271366 omit_invalid : bool ,
272367 supress_error : bool ,
273- ) -> ( Vec < u8 > , u32 ) {
274- let ( input_exit_code, input) = match from {
275- Encodings :: UTF_8 => utf_8:: to_ucs4 ( input, omit_invalid, supress_error) ,
368+ ) {
369+ let iter = input. into_iter ( ) ;
370+ let ucs4 = match from {
371+ Encodings :: UTF_8 => utf_8:: to_ucs4 ( iter, omit_invalid, supress_error) ,
276372 Encodings :: UTF_16 => {
277- utf_16:: to_ucs4 ( input , omit_invalid, supress_error, UTF16Variant :: UTF16 )
373+ utf_16:: to_ucs4 ( iter , omit_invalid, supress_error, UTF16Variant :: UTF16 )
278374 }
279375 Encodings :: UTF_16LE => {
280- utf_16:: to_ucs4 ( input , omit_invalid, supress_error, UTF16Variant :: UTF16LE )
376+ utf_16:: to_ucs4 ( iter , omit_invalid, supress_error, UTF16Variant :: UTF16LE )
281377 }
282378 Encodings :: UTF_16BE => {
283- utf_16:: to_ucs4 ( input , omit_invalid, supress_error, UTF16Variant :: UTF16BE )
379+ utf_16:: to_ucs4 ( iter , omit_invalid, supress_error, UTF16Variant :: UTF16BE )
284380 }
285381 Encodings :: UTF_32 => {
286- utf_32:: to_ucs4 ( input , omit_invalid, supress_error, UTF32Variant :: UTF32 )
382+ utf_32:: to_ucs4 ( iter , omit_invalid, supress_error, UTF32Variant :: UTF32 )
287383 }
288384 Encodings :: UTF_32LE => {
289- utf_32:: to_ucs4 ( input , omit_invalid, supress_error, UTF32Variant :: UTF32LE )
385+ utf_32:: to_ucs4 ( iter , omit_invalid, supress_error, UTF32Variant :: UTF32LE )
290386 }
291387 Encodings :: UTF_32BE => {
292- utf_32:: to_ucs4 ( input , omit_invalid, supress_error, UTF32Variant :: UTF32BE )
388+ utf_32:: to_ucs4 ( iter , omit_invalid, supress_error, UTF32Variant :: UTF32BE )
293389 }
294- Encodings :: ASCII => ascii:: to_ucs4 ( input , omit_invalid, supress_error) ,
390+ Encodings :: ASCII => ascii:: to_ucs4 ( iter , omit_invalid, supress_error) ,
295391 } ;
296392
297- let ( output_exit_code, output) = match to {
298- Encodings :: UTF_8 => utf_8:: from_ucs4 ( input. as_slice ( ) , omit_invalid, supress_error) ,
299- Encodings :: UTF_16 => utf_16:: from_ucs4 (
300- input. as_slice ( ) ,
301- omit_invalid,
302- supress_error,
303- UTF16Variant :: UTF16 ,
304- ) ,
305- Encodings :: UTF_16LE => utf_16:: from_ucs4 (
306- input. as_slice ( ) ,
307- omit_invalid,
308- supress_error,
309- UTF16Variant :: UTF16LE ,
310- ) ,
311- Encodings :: UTF_16BE => utf_16:: from_ucs4 (
312- input. as_slice ( ) ,
313- omit_invalid,
314- supress_error,
315- UTF16Variant :: UTF16BE ,
316- ) ,
317- Encodings :: UTF_32 => utf_32:: from_ucs4 (
318- input. as_slice ( ) ,
319- omit_invalid,
320- supress_error,
321- UTF32Variant :: UTF32 ,
322- ) ,
323- Encodings :: UTF_32LE => utf_32:: from_ucs4 (
324- input. as_slice ( ) ,
325- omit_invalid,
326- supress_error,
327- UTF32Variant :: UTF32LE ,
328- ) ,
329- Encodings :: UTF_32BE => utf_32:: from_ucs4 (
330- input. as_slice ( ) ,
331- omit_invalid,
332- supress_error,
333- UTF32Variant :: UTF32BE ,
334- ) ,
335- Encodings :: ASCII => ascii:: from_ucs4 ( input. as_slice ( ) , omit_invalid, supress_error) ,
393+ let expected = match to {
394+ Encodings :: UTF_8 => utf_8:: from_ucs4 ( ucs4, omit_invalid, supress_error) ,
395+ Encodings :: UTF_16 => {
396+ utf_16:: from_ucs4 ( ucs4, omit_invalid, supress_error, UTF16Variant :: UTF16 )
397+ }
398+ Encodings :: UTF_16BE => {
399+ utf_16:: from_ucs4 ( ucs4, omit_invalid, supress_error, UTF16Variant :: UTF16BE )
400+ }
401+ Encodings :: UTF_16LE => {
402+ utf_16:: from_ucs4 ( ucs4, omit_invalid, supress_error, UTF16Variant :: UTF16LE )
403+ }
404+ Encodings :: UTF_32 => {
405+ utf_32:: from_ucs4 ( ucs4, omit_invalid, supress_error, UTF32Variant :: UTF32 )
406+ }
407+ Encodings :: UTF_32LE => {
408+ utf_32:: from_ucs4 ( ucs4, omit_invalid, supress_error, UTF32Variant :: UTF32LE )
409+ }
410+ Encodings :: UTF_32BE => {
411+ utf_32:: from_ucs4 ( ucs4, omit_invalid, supress_error, UTF32Variant :: UTF32BE )
412+ }
413+ Encodings :: ASCII => ascii:: from_ucs4 ( ucs4, omit_invalid, supress_error) ,
336414 } ;
337415
338- let exit_code = input_exit_code. max ( output_exit_code) ;
339-
340- ( output, exit_code)
416+ expected. for_each ( |byte| {
417+ io:: stdout ( ) . write_all ( & [ byte] ) . unwrap ( ) ;
418+ io:: stdout ( ) . flush ( ) . unwrap ( ) ;
419+ } ) ;
341420}
421+
342422fn charmap_conversion (
343423 from : & Charmap ,
344424 to : & Charmap ,
345- input : & [ u8 ] ,
425+ input : CircularBuffer < Box < dyn Read > > ,
346426 omit_invalid : bool ,
347427 suppress_error : bool ,
348- ) -> ( Vec < u8 > , u32 ) {
349- let mut output = Vec :: new ( ) ;
350- let mut error_count = 0 ;
428+ ) {
429+ let mut buffer = Vec :: new ( ) ;
430+ let stdout = io:: stdout ( ) ;
431+ let mut stdout = stdout. lock ( ) ;
351432
352- let mut i = 0 ;
353- while i < input . len ( ) {
433+ for byte in input {
434+ buffer . push ( byte ) ;
354435 let mut found = false ;
355436 for ( _, entry) in & from. entries {
356- if input [ i.. ] . starts_with ( & entry. encoding ) {
437+ if buffer . starts_with ( & entry. encoding ) {
357438 if let Some ( to_entry) = to
358439 . entries
359440 . values ( )
360441 . find ( |e| e. symbolic_name == entry. symbolic_name )
361442 {
362- output. extend_from_slice ( & to_entry. encoding ) ;
363- i += entry. encoding . len ( ) ;
443+ if let Err ( e) = stdout. write_all ( & to_entry. encoding ) {
444+ eprintln ! ( "Error writing to stdout: {}" , e) ;
445+ }
446+ if let Err ( e) = stdout. flush ( ) {
447+ eprintln ! ( "Error flushing stdout: {}" , e) ;
448+ }
449+ buffer. clear ( ) ;
364450 found = true ;
365451 break ;
366452 }
367453 }
368454 }
369-
370- if !found {
455+ if !found && buffer. len ( ) >= from. header . mb_cur_max {
371456 if !suppress_error {
372- eprintln ! ( "Error: Invalid or unmapped character at position {}" , i ) ;
457+ eprintln ! ( "Error: Invalid or unmapped character" ) ;
373458 }
374- error_count += 1 ;
375459 if omit_invalid {
376- i += 1 ;
460+ buffer . clear ( ) ;
377461 } else {
378- output. push ( input[ i] ) ;
379- i += 1 ;
462+ if let Err ( e) = stdout. write_all ( & [ buffer[ 0 ] ] ) {
463+ eprintln ! ( "Error writing to stdout: {}" , e) ;
464+ }
465+ if let Err ( e) = stdout. flush ( ) {
466+ eprintln ! ( "Error flushing stdout: {}" , e) ;
467+ }
468+ buffer. remove ( 0 ) ;
380469 }
381470 }
382471 }
383472
384- let exit_code = if error_count > 0 { 1 } else { 0 } ;
385- ( output, exit_code)
473+ for & byte in & buffer {
474+ if !omit_invalid {
475+ if let Err ( e) = stdout. write_all ( & [ byte] ) {
476+ eprintln ! ( "Error writing to stdout: {}" , e) ;
477+ }
478+ if let Err ( e) = stdout. flush ( ) {
479+ eprintln ! ( "Error flushing stdout: {}" , e) ;
480+ }
481+ }
482+ if !suppress_error {
483+ eprintln ! ( "Error: Invalid or unmapped character at end of input" ) ;
484+ }
485+ }
386486}
387487
388488fn main ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
@@ -428,35 +528,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
428528 None => vec ! [ Box :: new( io:: stdin( ) . lock( ) ) ] ,
429529 } ;
430530
431- for mut input in inputs {
432- let mut inp_buf = Vec :: new ( ) ;
433- input. read_to_end ( & mut inp_buf) ?;
434-
531+ for input in inputs {
532+ let buf = CircularBuffer :: new ( input) ;
435533 match ( & from_codeset, & to_codeset) {
436534 ( CodesetType :: Encoding ( from) , CodesetType :: Encoding ( to) ) => {
437- let ( output, exit_code) = encoding_conversion (
438- from,
439- to,
440- & inp_buf,
441- args. omit_invalid ,
442- args. suppress_messages ,
443- ) ;
444-
445- io:: stdout ( ) . write_all ( & output) ?;
446- exit ( exit_code as i32 ) ;
535+ encoding_conversion ( from, to, buf, args. omit_invalid , args. suppress_messages ) ;
447536 }
448-
449537 ( CodesetType :: Charmap ( from) , CodesetType :: Charmap ( to) ) => {
450- let ( output, exit_code) = charmap_conversion (
451- from,
452- to,
453- & inp_buf,
454- args. omit_invalid ,
455- args. suppress_messages ,
456- ) ;
457-
458- io:: stdout ( ) . write_all ( & output) ?;
459- exit ( exit_code as i32 ) ;
538+ charmap_conversion ( from, to, buf, args. omit_invalid , args. suppress_messages ) ;
460539 }
461540 _ => {
462541 eprintln ! (
0 commit comments