@@ -849,6 +849,15 @@ impl Sub<BytePos> for NonNarrowChar {
849849 }
850850}
851851
852+ /// Identifies an offset of a character that was normalized away from `SourceFile`.
853+ #[ derive( Copy , Clone , RustcEncodable , RustcDecodable , Eq , PartialEq , Debug ) ]
854+ pub struct NormalizedPos {
855+ /// The absolute offset of the character in the `SourceMap`.
856+ pub pos : BytePos ,
857+ /// The difference between original and normalized string at position.
858+ pub diff : u32 ,
859+ }
860+
852861/// The state of the lazy external source loading mechanism of a `SourceFile`.
853862#[ derive( PartialEq , Eq , Clone ) ]
854863pub enum ExternalSource {
@@ -912,6 +921,8 @@ pub struct SourceFile {
912921 pub multibyte_chars : Vec < MultiByteChar > ,
913922 /// Width of characters that are not narrow in the source code.
914923 pub non_narrow_chars : Vec < NonNarrowChar > ,
924+ /// Locations of characters removed during normalization.
925+ pub normalized_pos : Vec < NormalizedPos > ,
915926 /// A hash of the filename, used for speeding up hashing in incremental compilation.
916927 pub name_hash : u128 ,
917928}
@@ -978,6 +989,9 @@ impl Encodable for SourceFile {
978989 } ) ?;
979990 s. emit_struct_field ( "name_hash" , 8 , |s| {
980991 self . name_hash . encode ( s)
992+ } ) ?;
993+ s. emit_struct_field ( "normalized_pos" , 9 , |s| {
994+ self . normalized_pos . encode ( s)
981995 } )
982996 } )
983997 }
@@ -1028,6 +1042,8 @@ impl Decodable for SourceFile {
10281042 d. read_struct_field ( "non_narrow_chars" , 7 , |d| Decodable :: decode ( d) ) ?;
10291043 let name_hash: u128 =
10301044 d. read_struct_field ( "name_hash" , 8 , |d| Decodable :: decode ( d) ) ?;
1045+ let normalized_pos: Vec < NormalizedPos > =
1046+ d. read_struct_field ( "normalized_pos" , 9 , |d| Decodable :: decode ( d) ) ?;
10311047 Ok ( SourceFile {
10321048 name,
10331049 name_was_remapped,
@@ -1044,6 +1060,7 @@ impl Decodable for SourceFile {
10441060 lines,
10451061 multibyte_chars,
10461062 non_narrow_chars,
1063+ normalized_pos,
10471064 name_hash,
10481065 } )
10491066 } )
@@ -1062,8 +1079,7 @@ impl SourceFile {
10621079 unmapped_path : FileName ,
10631080 mut src : String ,
10641081 start_pos : BytePos ) -> Result < SourceFile , OffsetOverflowError > {
1065- remove_bom ( & mut src) ;
1066- normalize_newlines ( & mut src) ;
1082+ let normalized_pos = normalize_src ( & mut src) ;
10671083
10681084 let src_hash = {
10691085 let mut hasher: StableHasher = StableHasher :: new ( ) ;
@@ -1096,6 +1112,7 @@ impl SourceFile {
10961112 lines,
10971113 multibyte_chars,
10981114 non_narrow_chars,
1115+ normalized_pos,
10991116 name_hash,
11001117 } )
11011118 }
@@ -1224,18 +1241,27 @@ impl SourceFile {
12241241 }
12251242}
12261243
1244+ /// Normalizes the source code and records the normalizations.
1245+ fn normalize_src ( src : & mut String ) -> Vec < NormalizedPos > {
1246+ let mut normalized_pos = vec ! [ ] ;
1247+ remove_bom ( src, & mut normalized_pos) ;
1248+ normalize_newlines ( src, & mut normalized_pos) ;
1249+ normalized_pos
1250+ }
1251+
12271252/// Removes UTF-8 BOM, if any.
1228- fn remove_bom ( src : & mut String ) {
1253+ fn remove_bom ( src : & mut String , normalized_pos : & mut Vec < NormalizedPos > ) {
12291254 if src. starts_with ( "\u{feff} " ) {
12301255 src. drain ( ..3 ) ;
1256+ normalized_pos. push ( NormalizedPos { pos : BytePos ( 0 ) , diff : 3 } ) ;
12311257 }
12321258}
12331259
12341260
12351261/// Replaces `\r\n` with `\n` in-place in `src`.
12361262///
12371263/// Returns error if there's a lone `\r` in the string
1238- fn normalize_newlines ( src : & mut String ) {
1264+ fn normalize_newlines ( src : & mut String , normalized_pos : & mut Vec < NormalizedPos > ) {
12391265 if !src. as_bytes ( ) . contains ( & b'\r' ) {
12401266 return ;
12411267 }
@@ -1248,6 +1274,8 @@ fn normalize_newlines(src: &mut String) {
12481274 let mut buf = std:: mem:: replace ( src, String :: new ( ) ) . into_bytes ( ) ;
12491275 let mut gap_len = 0 ;
12501276 let mut tail = buf. as_mut_slice ( ) ;
1277+ let mut cursor = 0 ;
1278+ let original_gap = normalized_pos. last ( ) . map_or ( 0 , |l| l. diff ) ;
12511279 loop {
12521280 let idx = match find_crlf ( & tail[ gap_len..] ) {
12531281 None => tail. len ( ) ,
@@ -1258,7 +1286,12 @@ fn normalize_newlines(src: &mut String) {
12581286 if tail. len ( ) == gap_len {
12591287 break ;
12601288 }
1289+ cursor += idx - gap_len;
12611290 gap_len += 1 ;
1291+ normalized_pos. push ( NormalizedPos {
1292+ pos : BytePos :: from_usize ( cursor + 1 ) ,
1293+ diff : original_gap + gap_len as u32 ,
1294+ } ) ;
12621295 }
12631296
12641297 // Account for removed `\r`.
0 commit comments