@@ -361,9 +361,15 @@ impl<'a> Tokenizer<'a> {
361361 self . input [ self . position ..] . chars ( ) . next ( ) . unwrap ( )
362362 }
363363
364- fn seen_newline ( & mut self , is_cr : bool ) {
365- if is_cr && self . next_byte ( ) == Some ( /* LF */ b'\n' ) {
366- return
364+ // Given that a newline has been seen, advance over the newline
365+ // and update the state.
366+ #[ inline]
367+ fn consume_newline ( & mut self ) {
368+ let byte = self . next_byte_unchecked ( ) ;
369+ debug_assert ! ( byte == b'\r' || byte == b'\n' || byte == b'\x0C' ) ;
370+ self . position += 1 ;
371+ if byte == b'\r' && self . next_byte ( ) == Some ( b'\n' ) {
372+ self . position += 1 ;
367373 }
368374 self . current_line_start_position = self . position ;
369375 self . current_line_number += 1 ;
@@ -393,13 +399,8 @@ impl<'a> Tokenizer<'a> {
393399 b' ' | b'\t' => {
394400 self . advance( 1 )
395401 } ,
396- b'\n' | b'\x0C' => {
397- self . advance( 1 ) ;
398- self . seen_newline( false ) ;
399- } ,
400- b'\r' => {
401- self . advance( 1 ) ;
402- self . seen_newline( true ) ;
402+ b'\n' | b'\x0C' | b'\r' => {
403+ self . consume_newline( ) ;
403404 } ,
404405 b'/' => {
405406 if self . starts_with( b"/*" ) {
@@ -421,13 +422,8 @@ impl<'a> Tokenizer<'a> {
421422 b' ' | b'\t' => {
422423 self . advance( 1 )
423424 } ,
424- b'\n' | b'\x0C' => {
425- self . advance( 1 ) ;
426- self . seen_newline( false ) ;
427- } ,
428- b'\r' => {
429- self . advance( 1 ) ;
430- self . seen_newline( true ) ;
425+ b'\n' | b'\x0C' | b'\r' => {
426+ self . consume_newline( ) ;
431427 } ,
432428 b'/' => {
433429 if self . starts_with( b"/*" ) {
@@ -481,13 +477,10 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
481477 let b = tokenizer. next_byte_unchecked ( ) ;
482478 let token = match_byte ! { b,
483479 b' ' | b'\t' => {
484- consume_whitespace( tokenizer, false , false )
480+ consume_whitespace( tokenizer, false )
485481 } ,
486- b'\n' | b'\x0C' => {
487- consume_whitespace( tokenizer, true , false )
488- } ,
489- b'\r' => {
490- consume_whitespace( tokenizer, true , true )
482+ b'\n' | b'\x0C' | b'\r' => {
483+ consume_whitespace( tokenizer, true )
491484 } ,
492485 b'"' => { consume_string( tokenizer, false ) } ,
493486 b'#' => {
@@ -617,25 +610,21 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
617610}
618611
619612
620- fn consume_whitespace < ' a > ( tokenizer : & mut Tokenizer < ' a > , newline : bool , is_cr : bool ) -> Token < ' a > {
613+ fn consume_whitespace < ' a > ( tokenizer : & mut Tokenizer < ' a > , newline : bool ) -> Token < ' a > {
621614 let start_position = tokenizer. position ( ) ;
622- tokenizer. advance ( 1 ) ;
623615 if newline {
624- tokenizer. seen_newline ( is_cr)
616+ tokenizer. consume_newline ( ) ;
617+ } else {
618+ tokenizer. advance ( 1 ) ;
625619 }
626620 while !tokenizer. is_eof ( ) {
627621 let b = tokenizer. next_byte_unchecked ( ) ;
628622 match_byte ! { b,
629623 b' ' | b'\t' => {
630624 tokenizer. advance( 1 ) ;
631625 }
632- b'\n' | b'\x0C' => {
633- tokenizer. advance( 1 ) ;
634- tokenizer. seen_newline( false ) ;
635- }
636- b'\r' => {
637- tokenizer. advance( 1 ) ;
638- tokenizer. seen_newline( true ) ;
626+ b'\n' | b'\x0C' | b'\r' => {
627+ tokenizer. consume_newline( ) ;
639628 }
640629 _ => {
641630 break
@@ -675,13 +664,8 @@ fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str {
675664 return contents
676665 }
677666 }
678- b'\n' | b'\x0C' => {
679- tokenizer. advance( 1 ) ;
680- tokenizer. seen_newline( false ) ;
681- }
682- b'\r' => {
683- tokenizer. advance( 1 ) ;
684- tokenizer. seen_newline( true ) ;
667+ b'\n' | b'\x0C' | b'\r' => {
668+ tokenizer. consume_newline( ) ;
685669 }
686670 _ => {
687671 tokenizer. advance( 1 ) ;
@@ -769,19 +753,8 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool)
769753 if !tokenizer. is_eof( ) {
770754 match tokenizer. next_byte_unchecked( ) {
771755 // Escaped newline
772- b'\n' | b'\x0C' => {
773- tokenizer. advance( 1 ) ;
774- tokenizer. seen_newline( false ) ;
775- }
776- b'\r' => {
777- tokenizer. advance( 1 ) ;
778- if tokenizer. next_byte( ) == Some ( b'\n' ) {
779- tokenizer. advance( 1 ) ;
780- }
781- // `is_cr = true` is useful to skip \r when the next iteration
782- // of a loop will call `seen_newline` again for the following \n.
783- // In this case we’re consuming both in this iteration, so passing `false`.
784- tokenizer. seen_newline( false ) ;
756+ b'\n' | b'\x0C' | b'\r' => {
757+ tokenizer. consume_newline( ) ;
785758 }
786759 // This pushes one well-formed code point
787760 _ => consume_escape_and_write( tokenizer, & mut string_bytes)
@@ -1178,18 +1151,17 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
11781151 string : CowRcStr < ' a > )
11791152 -> Token < ' a > {
11801153 while !tokenizer. is_eof ( ) {
1181- match_byte ! { tokenizer. consume_byte ( ) ,
1154+ match_byte ! { tokenizer. next_byte_unchecked ( ) ,
11821155 b')' => {
1156+ tokenizer. advance( 1 ) ;
11831157 break
11841158 }
1185- b' ' | b'\t' => { }
1186- b'\n' | b'\x0C' => {
1187- tokenizer. seen_newline( false ) ;
1188- }
1189- b'\r' => {
1190- tokenizer. seen_newline( true ) ;
1159+ b' ' | b'\t' => { tokenizer. advance( 1 ) ; }
1160+ b'\n' | b'\x0C' | b'\r' => {
1161+ tokenizer. consume_newline( ) ;
11911162 }
11921163 _ => {
1164+ tokenizer. advance( 1 ) ;
11931165 return consume_bad_url( tokenizer, start_pos) ;
11941166 }
11951167 }
@@ -1200,22 +1172,23 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
12001172 fn consume_bad_url < ' a > ( tokenizer : & mut Tokenizer < ' a > , start_pos : SourcePosition ) -> Token < ' a > {
12011173 // Consume up to the closing )
12021174 while !tokenizer. is_eof ( ) {
1203- match_byte ! { tokenizer. consume_byte ( ) ,
1175+ match_byte ! { tokenizer. next_byte_unchecked ( ) ,
12041176 b')' => {
1177+ tokenizer. advance( 1 ) ;
12051178 break
12061179 }
12071180 b'\\' => {
1181+ tokenizer. advance( 1 ) ;
12081182 if matches!( tokenizer. next_byte( ) , Some ( b')' ) | Some ( b'\\' ) ) {
12091183 tokenizer. advance( 1 ) ; // Skip an escaped ')' or '\'
12101184 }
12111185 }
1212- b'\n' | b'\x0C' => {
1213- tokenizer. seen_newline ( false ) ;
1186+ b'\n' | b'\x0C' | b'\r' => {
1187+ tokenizer. consume_newline ( ) ;
12141188 }
1215- b'\r' => {
1216- tokenizer. seen_newline ( true ) ;
1189+ _ => {
1190+ tokenizer. advance ( 1 ) ;
12171191 }
1218- _ => { } ,
12191192 }
12201193 }
12211194 BadUrl ( tokenizer. slice_from ( start_pos) . into ( ) )
@@ -1259,16 +1232,8 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char {
12591232 b' ' | b'\t' => {
12601233 tokenizer. advance( 1 )
12611234 }
1262- b'\n' | b'\x0C' => {
1263- tokenizer. advance( 1 ) ;
1264- tokenizer. seen_newline( false )
1265- }
1266- b'\r' => {
1267- tokenizer. advance( 1 ) ;
1268- if !tokenizer. is_eof( ) && tokenizer. next_byte_unchecked( ) == b'\n' {
1269- tokenizer. advance( 1 ) ;
1270- }
1271- tokenizer. seen_newline( false )
1235+ b'\n' | b'\x0C' | b'\r' => {
1236+ tokenizer. consume_newline( ) ;
12721237 }
12731238 _ => { }
12741239 }
0 commit comments