@@ -1045,6 +1045,7 @@ impl SourceFile {
10451045 mut src : String ,
10461046 start_pos : BytePos ) -> Result < SourceFile , OffsetOverflowError > {
10471047 remove_bom ( & mut src) ;
1048+ normalize_newlines ( & mut src) ;
10481049
10491050 let src_hash = {
10501051 let mut hasher: StableHasher < u128 > = StableHasher :: new ( ) ;
@@ -1212,6 +1213,61 @@ fn remove_bom(src: &mut String) {
12121213 }
12131214}
12141215
1216+
1217+ /// Replaces `\r\n` with `\n` in-place in `src`.
1218+ ///
1219+ /// Returns error if there's a lone `\r` in the string
1220+ fn normalize_newlines ( src : & mut String ) {
1221+ if !src. as_bytes ( ) . contains ( & b'\r' ) {
1222+ return ;
1223+ }
1224+
1225+ // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
1226+ // While we *can* call `as_mut_vec` and do surgery on the live string
1227+ // directly, let's rather steal the contents of `src`. This makes the code
1228+ // safe even if a panic occurs.
1229+
1230+ let mut buf = std:: mem:: replace ( src, String :: new ( ) ) . into_bytes ( ) ;
1231+ let mut gap_len = 0 ;
1232+ let mut tail = buf. as_mut_slice ( ) ;
1233+ loop {
1234+ let idx = match find_crlf ( & tail[ gap_len..] ) {
1235+ None => tail. len ( ) ,
1236+ Some ( idx) => idx + gap_len,
1237+ } ;
1238+ tail. copy_within ( gap_len..idx, 0 ) ;
1239+ tail = & mut tail[ idx - gap_len..] ;
1240+ if tail. len ( ) == gap_len {
1241+ break ;
1242+ }
1243+ gap_len += 1 ;
1244+ }
1245+
1246+ // Account for removed `\r`.
1247+ // After `set_len`, `buf` is guaranteed to contain utf-8 again.
1248+ let new_len = buf. len ( ) - gap_len;
1249+ unsafe {
1250+ buf. set_len ( new_len) ;
1251+ * src = String :: from_utf8_unchecked ( buf) ;
1252+ }
1253+
1254+ fn find_crlf ( src : & [ u8 ] ) -> Option < usize > {
1255+ let mut search_idx = 0 ;
1256+ while let Some ( idx) = find_cr ( & src[ search_idx..] ) {
1257+ if src[ search_idx..] . get ( idx + 1 ) != Some ( & b'\n' ) {
1258+ search_idx += idx + 1 ;
1259+ continue ;
1260+ }
1261+ return Some ( search_idx + idx) ;
1262+ }
1263+ None
1264+ }
1265+
1266+ fn find_cr ( src : & [ u8 ] ) -> Option < usize > {
1267+ src. iter ( ) . position ( |& b| b == b'\r' )
1268+ }
1269+ }
1270+
12151271// _____________________________________________________________________________
12161272// Pos, BytePos, CharPos
12171273//
0 commit comments