@@ -115,6 +115,10 @@ impl Sub for CharPos {
115115/// are *absolute* positions from the beginning of the codemap, not positions
116116/// relative to FileMaps. Methods on the CodeMap can be used to relate spans back
117117/// to the original source.
118+ /// You must be careful if the span crosses more than one file - you will not be
119+ /// able to use many of the functions on spans in codemap and you cannot assume
120+ /// that the length of the span = hi - lo; there may be space in the BytePos
121+ /// range between files.
118122#[ derive( Clone , Copy , Hash ) ]
119123pub struct Span {
120124 pub lo : BytePos ,
@@ -339,7 +343,7 @@ pub struct MultiByteChar {
339343 pub bytes : usize ,
340344}
341345
342- /// A single source in the CodeMap
346+ /// A single source in the CodeMap.
343347pub struct FileMap {
344348 /// The name of the file that the source came from, source that doesn't
345349 /// originate from files has names between angle brackets by convention,
@@ -508,6 +512,9 @@ impl FileMap {
508512 lines. get ( line_number) . map ( |& line| {
509513 let begin: BytePos = line - self . start_pos ;
510514 let begin = begin. to_usize ( ) ;
515+ // We can't use `lines.get(line_number+1)` because we might
516+ // be parsing when we call this function and thus the current
517+ // line is the last one we have line info for.
511518 let slice = & src[ begin..] ;
512519 match slice. find ( '\n' ) {
513520 Some ( e) => & slice[ ..e] ,
@@ -598,27 +605,27 @@ impl CodeMap {
598605 Ok ( self . new_filemap ( path. to_str ( ) . unwrap ( ) . to_string ( ) , src) )
599606 }
600607
608+ fn next_start_pos ( & self ) -> usize {
609+ let files = self . files . borrow ( ) ;
610+ match files. last ( ) {
611+ None => 0 ,
612+ // Add one so there is some space between files. This lets us distinguish
613+ // positions in the codemap, even in the presence of zero-length files.
614+ Some ( last) => last. end_pos . to_usize ( ) + 1 ,
615+ }
616+ }
617+
618+ /// Creates a new filemap without setting its line information. If you don't
619+ /// intend to set the line information yourself, you should use new_filemap_and_lines.
601620 pub fn new_filemap ( & self , filename : FileName , mut src : String ) -> Rc < FileMap > {
621+ let start_pos = self . next_start_pos ( ) ;
602622 let mut files = self . files . borrow_mut ( ) ;
603- let start_pos = match files. last ( ) {
604- None => 0 ,
605- Some ( last) => last. end_pos . to_usize ( ) ,
606- } ;
607623
608624 // Remove utf-8 BOM if any.
609625 if src. starts_with ( "\u{feff} " ) {
610626 src. drain ( ..3 ) ;
611627 }
612628
613- // Append '\n' in case it's not already there.
614- // This is a workaround to prevent CodeMap.lookup_filemap_idx from
615- // accidentally overflowing into the next filemap in case the last byte
616- // of span is also the last byte of filemap, which leads to incorrect
617- // results from CodeMap.span_to_*.
618- if !src. is_empty ( ) && !src. ends_with ( "\n " ) {
619- src. push ( '\n' ) ;
620- }
621-
622629 let end_pos = start_pos + src. len ( ) ;
623630
624631 let filemap = Rc :: new ( FileMap {
@@ -645,11 +652,8 @@ impl CodeMap {
645652 mut file_local_lines : Vec < BytePos > ,
646653 mut file_local_multibyte_chars : Vec < MultiByteChar > )
647654 -> Rc < FileMap > {
655+ let start_pos = self . next_start_pos ( ) ;
648656 let mut files = self . files . borrow_mut ( ) ;
649- let start_pos = match files. last ( ) {
650- None => 0 ,
651- Some ( last) => last. end_pos . to_usize ( ) ,
652- } ;
653657
654658 let end_pos = Pos :: from_usize ( start_pos + source_len) ;
655659 let start_pos = Pos :: from_usize ( start_pos) ;
@@ -686,39 +690,61 @@ impl CodeMap {
686690
687691 /// Lookup source information about a BytePos
688692 pub fn lookup_char_pos ( & self , pos : BytePos ) -> Loc {
689- let FileMapAndLine { fm : f, line : a} = self . lookup_line ( pos) ;
690- let line = a + 1 ; // Line numbers start at 1
691693 let chpos = self . bytepos_to_file_charpos ( pos) ;
692- let linebpos = ( * f. lines . borrow ( ) ) [ a] ;
693- let linechpos = self . bytepos_to_file_charpos ( linebpos) ;
694- debug ! ( "byte pos {:?} is on the line at byte pos {:?}" ,
695- pos, linebpos) ;
696- debug ! ( "char pos {:?} is on the line at char pos {:?}" ,
697- chpos, linechpos) ;
698- debug ! ( "byte is on line: {}" , line) ;
699- assert ! ( chpos >= linechpos) ;
700- Loc {
701- file : f,
702- line : line,
703- col : chpos - linechpos
694+ match self . lookup_line ( pos) {
695+ Ok ( FileMapAndLine { fm : f, line : a } ) => {
696+ let line = a + 1 ; // Line numbers start at 1
697+ let linebpos = ( * f. lines . borrow ( ) ) [ a] ;
698+ let linechpos = self . bytepos_to_file_charpos ( linebpos) ;
699+ debug ! ( "byte pos {:?} is on the line at byte pos {:?}" ,
700+ pos, linebpos) ;
701+ debug ! ( "char pos {:?} is on the line at char pos {:?}" ,
702+ chpos, linechpos) ;
703+ debug ! ( "byte is on line: {}" , line) ;
704+ assert ! ( chpos >= linechpos) ;
705+ Loc {
706+ file : f,
707+ line : line,
708+ col : chpos - linechpos,
709+ }
710+ }
711+ Err ( f) => {
712+ Loc {
713+ file : f,
714+ line : 0 ,
715+ col : chpos,
716+ }
717+ }
704718 }
705719 }
706720
707- fn lookup_line ( & self , pos : BytePos ) -> FileMapAndLine {
721+ // If the relevant filemap is empty, we don't return a line number.
722+ fn lookup_line ( & self , pos : BytePos ) -> Result < FileMapAndLine , Rc < FileMap > > {
708723 let idx = self . lookup_filemap_idx ( pos) ;
709724
710725 let files = self . files . borrow ( ) ;
711726 let f = ( * files) [ idx] . clone ( ) ;
727+
728+ let len = f. lines . borrow ( ) . len ( ) ;
729+ if len == 0 {
730+ return Err ( f) ;
731+ }
732+
712733 let mut a = 0 ;
713734 {
714735 let lines = f. lines . borrow ( ) ;
715736 let mut b = lines. len ( ) ;
716737 while b - a > 1 {
717738 let m = ( a + b) / 2 ;
718- if ( * lines) [ m] > pos { b = m; } else { a = m; }
739+ if ( * lines) [ m] > pos {
740+ b = m;
741+ } else {
742+ a = m;
743+ }
719744 }
745+ assert ! ( a <= lines. len( ) ) ;
720746 }
721- FileMapAndLine { fm : f, line : a}
747+ Ok ( FileMapAndLine { fm : f, line : a } )
722748 }
723749
724750 pub fn lookup_char_pos_adj ( & self , pos : BytePos ) -> LocWithOpt {
@@ -880,12 +906,15 @@ impl CodeMap {
880906 CharPos ( bpos. to_usize ( ) - map. start_pos . to_usize ( ) - total_extra_bytes)
881907 }
882908
909+ // Return the index of the filemap (in self.files) which contains pos.
883910 fn lookup_filemap_idx ( & self , pos : BytePos ) -> usize {
884911 let files = self . files . borrow ( ) ;
885912 let files = & * files;
886- let len = files. len ( ) ;
913+ let count = files. len ( ) ;
914+
915+ // Binary search for the filemap.
887916 let mut a = 0 ;
888- let mut b = len ;
917+ let mut b = count ;
889918 while b - a > 1 {
890919 let m = ( a + b) / 2 ;
891920 if files[ m] . start_pos > pos {
@@ -894,26 +923,8 @@ impl CodeMap {
894923 a = m;
895924 }
896925 }
897- // There can be filemaps with length 0. These have the same start_pos as
898- // the previous filemap, but are not the filemaps we want (because they
899- // are length 0, they cannot contain what we are looking for). So,
900- // rewind until we find a useful filemap.
901- loop {
902- let lines = files[ a] . lines . borrow ( ) ;
903- let lines = lines;
904- if !lines. is_empty ( ) {
905- break ;
906- }
907- if a == 0 {
908- panic ! ( "position {} does not resolve to a source location" ,
909- pos. to_usize( ) ) ;
910- }
911- a -= 1 ;
912- }
913- if a >= len {
914- panic ! ( "position {} does not resolve to a source location" ,
915- pos. to_usize( ) )
916- }
926+
927+ assert ! ( a < count, "position {} does not resolve to a source location" , pos. to_usize( ) ) ;
917928
918929 return a;
919930 }
@@ -1027,10 +1038,13 @@ mod tests {
10271038 let fm = cm. new_filemap ( "blork.rs" . to_string ( ) ,
10281039 "first line.\n second line" . to_string ( ) ) ;
10291040 fm. next_line ( BytePos ( 0 ) ) ;
1041+ // Test we can get lines with partial line info.
10301042 assert_eq ! ( fm. get_line( 0 ) , Some ( "first line." ) ) ;
1031- // TESTING BROKEN BEHAVIOR:
1043+ // TESTING BROKEN BEHAVIOR: line break declared before actual line break.
10321044 fm. next_line ( BytePos ( 10 ) ) ;
10331045 assert_eq ! ( fm. get_line( 1 ) , Some ( "." ) ) ;
1046+ fm. next_line ( BytePos ( 12 ) ) ;
1047+ assert_eq ! ( fm. get_line( 2 ) , Some ( "second line" ) ) ;
10341048 }
10351049
10361050 #[ test]
@@ -1056,9 +1070,9 @@ mod tests {
10561070
10571071 fm1. next_line ( BytePos ( 0 ) ) ;
10581072 fm1. next_line ( BytePos ( 12 ) ) ;
1059- fm2. next_line ( BytePos ( 24 ) ) ;
1060- fm3. next_line ( BytePos ( 24 ) ) ;
1061- fm3. next_line ( BytePos ( 34 ) ) ;
1073+ fm2. next_line ( fm2 . start_pos ) ;
1074+ fm3. next_line ( fm3 . start_pos ) ;
1075+ fm3. next_line ( fm3 . start_pos + BytePos ( 12 ) ) ;
10621076
10631077 cm
10641078 }
@@ -1068,11 +1082,15 @@ mod tests {
10681082 // Test lookup_byte_offset
10691083 let cm = init_code_map ( ) ;
10701084
1071- let fmabp1 = cm. lookup_byte_offset ( BytePos ( 22 ) ) ;
1085+ let fmabp1 = cm. lookup_byte_offset ( BytePos ( 23 ) ) ;
10721086 assert_eq ! ( fmabp1. fm. name, "blork.rs" ) ;
1073- assert_eq ! ( fmabp1. pos, BytePos ( 22 ) ) ;
1087+ assert_eq ! ( fmabp1. pos, BytePos ( 23 ) ) ;
1088+
1089+ let fmabp1 = cm. lookup_byte_offset ( BytePos ( 24 ) ) ;
1090+ assert_eq ! ( fmabp1. fm. name, "empty.rs" ) ;
1091+ assert_eq ! ( fmabp1. pos, BytePos ( 0 ) ) ;
10741092
1075- let fmabp2 = cm. lookup_byte_offset ( BytePos ( 24 ) ) ;
1093+ let fmabp2 = cm. lookup_byte_offset ( BytePos ( 25 ) ) ;
10761094 assert_eq ! ( fmabp2. fm. name, "blork2.rs" ) ;
10771095 assert_eq ! ( fmabp2. pos, BytePos ( 0 ) ) ;
10781096 }
@@ -1085,7 +1103,7 @@ mod tests {
10851103 let cp1 = cm. bytepos_to_file_charpos ( BytePos ( 22 ) ) ;
10861104 assert_eq ! ( cp1, CharPos ( 22 ) ) ;
10871105
1088- let cp2 = cm. bytepos_to_file_charpos ( BytePos ( 24 ) ) ;
1106+ let cp2 = cm. bytepos_to_file_charpos ( BytePos ( 25 ) ) ;
10891107 assert_eq ! ( cp2, CharPos ( 0 ) ) ;
10901108 }
10911109
@@ -1099,7 +1117,7 @@ mod tests {
10991117 assert_eq ! ( loc1. line, 2 ) ;
11001118 assert_eq ! ( loc1. col, CharPos ( 10 ) ) ;
11011119
1102- let loc2 = cm. lookup_char_pos ( BytePos ( 24 ) ) ;
1120+ let loc2 = cm. lookup_char_pos ( BytePos ( 25 ) ) ;
11031121 assert_eq ! ( loc2. file. name, "blork2.rs" ) ;
11041122 assert_eq ! ( loc2. line, 1 ) ;
11051123 assert_eq ! ( loc2. col, CharPos ( 0 ) ) ;
@@ -1115,18 +1133,18 @@ mod tests {
11151133 "first line€€.\n € second line" . to_string ( ) ) ;
11161134
11171135 fm1. next_line ( BytePos ( 0 ) ) ;
1118- fm1. next_line ( BytePos ( 22 ) ) ;
1119- fm2. next_line ( BytePos ( 40 ) ) ;
1120- fm2. next_line ( BytePos ( 58 ) ) ;
1136+ fm1. next_line ( BytePos ( 28 ) ) ;
1137+ fm2. next_line ( fm2 . start_pos ) ;
1138+ fm2. next_line ( fm2 . start_pos + BytePos ( 20 ) ) ;
11211139
11221140 fm1. record_multibyte_char ( BytePos ( 3 ) , 3 ) ;
11231141 fm1. record_multibyte_char ( BytePos ( 9 ) , 3 ) ;
11241142 fm1. record_multibyte_char ( BytePos ( 12 ) , 3 ) ;
11251143 fm1. record_multibyte_char ( BytePos ( 15 ) , 3 ) ;
11261144 fm1. record_multibyte_char ( BytePos ( 18 ) , 3 ) ;
1127- fm2. record_multibyte_char ( BytePos ( 50 ) , 3 ) ;
1128- fm2. record_multibyte_char ( BytePos ( 53 ) , 3 ) ;
1129- fm2. record_multibyte_char ( BytePos ( 58 ) , 3 ) ;
1145+ fm2. record_multibyte_char ( fm2 . start_pos + BytePos ( 10 ) , 3 ) ;
1146+ fm2. record_multibyte_char ( fm2 . start_pos + BytePos ( 13 ) , 3 ) ;
1147+ fm2. record_multibyte_char ( fm2 . start_pos + BytePos ( 18 ) , 3 ) ;
11301148
11311149 cm
11321150 }
0 commit comments