@@ -214,7 +214,8 @@ pub struct Tokenizer<'a> {
214214 /// ensure that computing the column will give the result in units
215215 /// of UTF-16 characters.
216216 current_line_start_position : usize ,
217- current_position : usize ,
217+ position_difference : u16 ,
218+ current_line_start_difference : u16 ,
218219 current_line_number : u32 ,
219220 var_or_env_functions : SeenStatus ,
220221 source_map_url : Option < & ' a str > ,
@@ -235,8 +236,9 @@ impl<'a> Tokenizer<'a> {
235236 input,
236237 position : 0 ,
237238 current_line_start_position : 0 ,
238- current_position : 0 ,
239+ current_line_start_difference : 0 ,
239240 current_line_number : 0 ,
241+ position_difference : 0 ,
240242 var_or_env_functions : SeenStatus :: DontCare ,
241243 source_map_url : None ,
242244 source_url : None ,
@@ -279,7 +281,12 @@ impl<'a> Tokenizer<'a> {
279281 pub fn current_source_location ( & self ) -> SourceLocation {
280282 SourceLocation {
281283 line : self . current_line_number ,
282- column : ( self . position - self . current_line_start_position + 1 ) as u32 ,
284+ column : (
285+ self . position -
286+ self . current_line_start_position -
287+ ( self . position_difference - self . current_line_start_difference ) as usize
288+ + 1
289+ ) as u32 ,
283290 }
284291 }
285292
@@ -298,7 +305,8 @@ impl<'a> Tokenizer<'a> {
298305 ParserState {
299306 position : self . position ,
300307 current_line_start_position : self . current_line_start_position ,
301- current_position : self . current_position ,
308+ current_line_start_difference : self . current_line_start_difference ,
309+ position_difference : self . position_difference ,
302310 current_line_number : self . current_line_number ,
303311 at_start_of : None ,
304312 }
@@ -308,7 +316,8 @@ impl<'a> Tokenizer<'a> {
308316 pub fn reset ( & mut self , state : & ParserState ) {
309317 self . position = state. position ;
310318 self . current_line_start_position = state. current_line_start_position ;
311- self . current_position = state. current_position ;
319+ self . current_line_start_difference = state. current_line_start_difference ;
320+ self . position_difference = state. position_difference ;
312321 self . current_line_number = state. current_line_number ;
313322 }
314323
@@ -374,7 +383,6 @@ impl<'a> Tokenizer<'a> {
374383 debug_assert ! ( b != b'\r' && b != b'\n' && b != b'\x0C' ) ;
375384 }
376385 }
377- self . current_position = self . current_position . wrapping_add ( n) ;
378386 self . position += n
379387 }
380388
@@ -396,8 +404,7 @@ impl<'a> Tokenizer<'a> {
396404 debug_assert ! ( self . next_byte_unchecked( ) & 0xF0 == 0xF0 ) ;
397405 // This takes two UTF-16 characters to represent, so we
398406 // actually have an undercount.
399- self . current_line_start_position = self . current_line_start_position . wrapping_sub ( 1 ) ;
400- self . current_position = self . current_position . wrapping_add ( 2 ) ;
407+ self . position_difference = self . position_difference . wrapping_sub ( 1 ) ;
401408 self . position += 1 ;
402409 }
403410
@@ -409,7 +416,7 @@ impl<'a> Tokenizer<'a> {
409416 // Continuation bytes contribute to column overcount. Note
410417 // that due to the special case for the 4-byte sequence intro,
411418 // we must use wrapping add here.
412- self . current_line_start_position = self . current_line_start_position . wrapping_add ( 1 ) ;
419+ self . position_difference = self . position_difference . wrapping_add ( 1 ) ;
413420 self . position += 1 ;
414421 }
415422
@@ -422,14 +429,11 @@ impl<'a> Tokenizer<'a> {
422429 if byte & 0xF0 == 0xF0 {
423430 // This takes two UTF-16 characters to represent, so we
424431 // actually have an undercount.
425- self . current_line_start_position = self . current_line_start_position . wrapping_sub ( 1 ) ;
426- self . current_position = self . current_position . wrapping_add ( 2 ) ;
432+ self . position_difference = self . position_difference . wrapping_sub ( 1 ) ;
427433 } else if byte & 0xC0 == 0x80 {
428434 // Note that due to the special case for the 4-byte
429435 // sequence intro, we must use wrapping add here.
430- self . current_line_start_position = self . current_line_start_position . wrapping_add ( 1 ) ;
431- } else {
432- self . current_position = self . current_position . wrapping_add ( 1 ) ;
436+ self . position_difference = self . position_difference . wrapping_add ( 1 ) ;
433437 }
434438 }
435439
@@ -448,12 +452,11 @@ impl<'a> Tokenizer<'a> {
448452 let byte = self . next_byte_unchecked ( ) ;
449453 debug_assert ! ( byte == b'\r' || byte == b'\n' || byte == b'\x0C' ) ;
450454 self . position += 1 ;
451- self . current_position = self . current_position . wrapping_add ( 1 ) ;
452455 if byte == b'\r' && self . next_byte ( ) == Some ( b'\n' ) {
453456 self . position += 1 ;
454- self . current_position = self . current_position . wrapping_add ( 1 ) ;
455457 }
456458 self . current_line_start_position = self . position ;
459+ self . current_line_start_difference = self . position_difference ;
457460 self . current_line_number += 1 ;
458461 }
459462
@@ -467,14 +470,13 @@ impl<'a> Tokenizer<'a> {
467470 fn consume_char ( & mut self ) -> char {
468471 let c = self . next_char ( ) ;
469472 let len_utf8 = c. len_utf8 ( ) ;
473+ let len_utf16 = c. len_utf16 ( ) ;
470474 self . position += len_utf8;
471475 // Note that due to the special case for the 4-byte sequence
472476 // intro, we must use wrapping add here.
473- let len_utf16 = c. len_utf16 ( ) ;
474- self . current_line_start_position = self
475- . current_line_start_position
476- . wrapping_add ( len_utf8 - len_utf16) ;
477- self . current_position = self . current_position . wrapping_add ( len_utf16) ;
477+ self . position_difference = self
478+ . position_difference
479+ . wrapping_add ( ( len_utf8 - len_utf16) as u16 ) ;
478480 c
479481 }
480482
@@ -1164,16 +1166,12 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
11641166 }
11651167 } ;
11661168 match_byte ! { b,
1167- b' ' | b'\t' => {
1168- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
1169- } ,
1169+ b' ' | b'\t' => { } ,
11701170 b'\n' | b'\x0C' => {
11711171 newlines += 1 ;
11721172 last_newline = offset;
1173- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
11741173 }
11751174 b'\r' => {
1176- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
11771175 if from_start. as_bytes( ) . get( offset + 1 ) != Some ( & b'\n' ) {
11781176 newlines += 1 ;
11791177 last_newline = offset;
0 commit comments