@@ -27,10 +27,6 @@ pub(crate) enum LineEndings {
2727impl LineEndings {
2828 /// Replaces `\r\n` with `\n` in-place in `src`.
2929 pub ( crate ) fn normalize ( src : String ) -> ( String , LineEndings ) {
30- if !src. as_bytes ( ) . contains ( & b'\r' ) {
31- return ( src, LineEndings :: Unix ) ;
32- }
33-
3430 // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
3531 // While we *can* call `as_mut_vec` and do surgery on the live string
3632 // directly, let's rather steal the contents of `src`. This makes the code
@@ -39,10 +35,19 @@ impl LineEndings {
3935 let mut buf = src. into_bytes ( ) ;
4036 let mut gap_len = 0 ;
4137 let mut tail = buf. as_mut_slice ( ) ;
38+ let mut crlf_seen = false ;
39+
40+ let find_crlf = |src : & [ u8 ] | src. windows ( 2 ) . position ( |it| it == b"\r \n " ) ;
41+
4242 loop {
4343 let idx = match find_crlf ( & tail[ gap_len..] ) {
44- None => tail. len ( ) ,
45- Some ( idx) => idx + gap_len,
44+ None if crlf_seen => tail. len ( ) ,
45+ // SAFETY: buf is unchanged and therefor still contains utf8 data
46+ None => return ( unsafe { String :: from_utf8_unchecked ( buf) } , LineEndings :: Unix ) ,
47+ Some ( idx) => {
48+ crlf_seen = true ;
49+ idx + gap_len
50+ }
4651 } ;
4752 tail. copy_within ( gap_len..idx, 0 ) ;
4853 tail = & mut tail[ idx - gap_len..] ;
@@ -54,15 +59,48 @@ impl LineEndings {
5459
5560 // Account for removed `\r`.
5661 // After `set_len`, `buf` is guaranteed to contain utf-8 again.
57- let new_len = buf. len ( ) - gap_len;
5862 let src = unsafe {
63+ let new_len = buf. len ( ) - gap_len;
5964 buf. set_len ( new_len) ;
6065 String :: from_utf8_unchecked ( buf)
6166 } ;
62- return ( src, LineEndings :: Dos ) ;
67+ ( src, LineEndings :: Dos )
68+ }
69+ }
6370
64- fn find_crlf ( src : & [ u8 ] ) -> Option < usize > {
65- src. windows ( 2 ) . position ( |it| it == b"\r \n " )
66- }
71+ #[ cfg( test) ]
72+ mod tests {
73+ use super :: * ;
74+
75+ #[ test]
76+ fn unix ( ) {
77+ let src = "a\n b\n c\n \n \n \n " ;
78+ let ( res, endings) = LineEndings :: normalize ( src. into ( ) ) ;
79+ assert_eq ! ( endings, LineEndings :: Unix ) ;
80+ assert_eq ! ( res, src) ;
81+ }
82+
83+ #[ test]
84+ fn dos ( ) {
85+ let src = "\r \n a\r \n \r \n b\r \n c\r \n \r \n \r \n \r \n " ;
86+ let ( res, endings) = LineEndings :: normalize ( src. into ( ) ) ;
87+ assert_eq ! ( endings, LineEndings :: Dos ) ;
88+ assert_eq ! ( res, "\n a\n \n b\n c\n \n \n \n " ) ;
89+ }
90+
91+ #[ test]
92+ fn mixed ( ) {
93+ let src = "a\r \n b\r \n c\r \n \n \r \n \n " ;
94+ let ( res, endings) = LineEndings :: normalize ( src. into ( ) ) ;
95+ assert_eq ! ( endings, LineEndings :: Dos ) ;
96+ assert_eq ! ( res, "a\n b\n c\n \n \n \n " ) ;
97+ }
98+
99+ #[ test]
100+ fn none ( ) {
101+ let src = "abc" ;
102+ let ( res, endings) = LineEndings :: normalize ( src. into ( ) ) ;
103+ assert_eq ! ( endings, LineEndings :: Unix ) ;
104+ assert_eq ! ( res, src) ;
67105 }
68106}
0 commit comments