@@ -73,6 +73,12 @@ macro_rules! simple_enum_error {
7373 }
7474}
7575
76+ macro_rules! ascii_tab_or_new_line_pattern {
77+ ( ) => {
78+ '\t' | '\n' | '\r'
79+ } ;
80+ }
81+
7682#[ cfg( feature = "std" ) ]
7783impl std:: error:: Error for ParseError { }
7884
@@ -207,7 +213,7 @@ impl<'i> Input<'i> {
207213 if input. len ( ) < original_input. len ( ) {
208214 vfn ( SyntaxViolation :: C0SpaceIgnored )
209215 }
210- if input. chars ( ) . any ( |c| matches ! ( c , '\t' | '\n' | '\r' ) ) {
216+ if input. chars ( ) . any ( ascii_tab_or_new_line ) {
211217 vfn ( SyntaxViolation :: TabOrNewlineIgnored )
212218 }
213219 }
@@ -225,7 +231,7 @@ impl<'i> Input<'i> {
225231 if input. len ( ) < original_input. len ( ) {
226232 vfn ( SyntaxViolation :: C0SpaceIgnored )
227233 }
228- if input. chars ( ) . any ( |c| matches ! ( c , '\t' | '\n' | '\r' ) ) {
234+ if input. chars ( ) . any ( ascii_tab_or_new_line ) {
229235 vfn ( SyntaxViolation :: TabOrNewlineIgnored )
230236 }
231237 }
@@ -281,7 +287,7 @@ impl<'i> Input<'i> {
281287 let utf8 = self . chars . as_str ( ) ;
282288 match self . chars . next ( ) {
283289 Some ( c) => {
284- if !matches ! ( c , '\t' | '\n' | '\r' ) {
290+ if !ascii_tab_or_new_line ( c ) {
285291 return Some ( ( c, & utf8[ ..c. len_utf8 ( ) ] ) ) ;
286292 }
287293 }
@@ -321,9 +327,7 @@ impl<F: FnMut(char) -> bool> Pattern for F {
321327impl Iterator for Input < ' _ > {
322328 type Item = char ;
323329 fn next ( & mut self ) -> Option < char > {
324- self . chars
325- . by_ref ( )
326- . find ( |& c| !matches ! ( c, '\t' | '\n' | '\r' ) )
330+ self . chars . by_ref ( ) . find ( |& c| !ascii_tab_or_new_line ( c) )
327331 }
328332}
329333
@@ -995,7 +999,7 @@ impl<'a> Parser<'a> {
995999 ':' if !inside_square_brackets => break ,
9961000 '\\' if scheme_type. is_special ( ) => break ,
9971001 '/' | '?' | '#' => break ,
998- '\t' | '\n' | '\r' => {
1002+ ascii_tab_or_new_line_pattern ! ( ) => {
9991003 has_ignored_chars = true ;
10001004 }
10011005 '[' => {
@@ -1077,7 +1081,7 @@ impl<'a> Parser<'a> {
10771081 for c in input_str. chars ( ) {
10781082 match c {
10791083 '/' | '\\' | '?' | '#' => break ,
1080- '\t' | '\n' | '\r' => has_ignored_chars = true ,
1084+ ascii_tab_or_new_line_pattern ! ( ) => has_ignored_chars = true ,
10811085 _ => non_ignored_chars += 1 ,
10821086 }
10831087 bytes += c. len_utf8 ( ) ;
@@ -1473,37 +1477,81 @@ impl<'a> Parser<'a> {
14731477 & mut self ,
14741478 scheme_type : SchemeType ,
14751479 scheme_end : u32 ,
1476- mut input : Input < ' i > ,
1480+ input : Input < ' i > ,
14771481 ) -> Option < Input < ' i > > {
1478- let len = input. chars . as_str ( ) . len ( ) ;
1479- let mut query = String :: with_capacity ( len) ; // FIXME: use a streaming decoder instead
1480- let mut remaining = None ;
1481- while let Some ( c) = input. next ( ) {
1482- if c == '#' && self . context == Context :: UrlParser {
1483- remaining = Some ( input) ;
1484- break ;
1485- } else {
1486- self . check_url_code_point ( c, & input) ;
1487- query. push ( c) ;
1482+ struct QueryPartIter < ' i , ' p > {
1483+ is_url_parser : bool ,
1484+ input : Input < ' i > ,
1485+ violation_fn : Option < & ' p dyn Fn ( SyntaxViolation ) > ,
1486+ }
1487+
1488+ impl < ' i > Iterator for QueryPartIter < ' i , ' _ > {
1489+ type Item = ( & ' i str , bool ) ;
1490+
1491+ fn next ( & mut self ) -> Option < Self :: Item > {
1492+ let start = self . input . chars . as_str ( ) ;
1493+ // bypass self.input.next() in order to get string slices
1494+ // which are faster to operate on
1495+ while let Some ( c) = self . input . chars . next ( ) {
1496+ match c {
1497+ ascii_tab_or_new_line_pattern ! ( ) => {
1498+ return Some ( (
1499+ & start[ ..start. len ( ) - self . input . chars . as_str ( ) . len ( ) - 1 ] ,
1500+ false ,
1501+ ) ) ;
1502+ }
1503+ '#' if self . is_url_parser => {
1504+ return Some ( (
1505+ & start[ ..start. len ( ) - self . input . chars . as_str ( ) . len ( ) - 1 ] ,
1506+ true ,
1507+ ) ) ;
1508+ }
1509+ c => {
1510+ if let Some ( vfn) = & self . violation_fn {
1511+ check_url_code_point ( vfn, c, & self . input ) ;
1512+ }
1513+ }
1514+ }
1515+ }
1516+ if start. is_empty ( ) {
1517+ None
1518+ } else {
1519+ Some ( ( start, false ) )
1520+ }
14881521 }
14891522 }
14901523
1491- let encoding = match & self . serialization [ ..scheme_end as usize ] {
1492- "http" | "https" | "file" | "ftp" => self . query_encoding_override ,
1493- _ => None ,
1494- } ;
1495- let query_bytes = if let Some ( o) = encoding {
1496- o ( & query)
1497- } else {
1498- query. as_bytes ( ) . into ( )
1524+ let mut part_iter = QueryPartIter {
1525+ is_url_parser : self . context == Context :: UrlParser ,
1526+ input,
1527+ violation_fn : self . violation_fn ,
14991528 } ;
15001529 let set = if scheme_type. is_special ( ) {
15011530 SPECIAL_QUERY
15021531 } else {
15031532 QUERY
15041533 } ;
1505- self . serialization . extend ( percent_encode ( & query_bytes, set) ) ;
1506- remaining
1534+ let query_encoding_override = self . query_encoding_override . filter ( |_| {
1535+ matches ! (
1536+ & self . serialization[ ..scheme_end as usize ] ,
1537+ "http" | "https" | "file" | "ftp"
1538+ )
1539+ } ) ;
1540+
1541+ while let Some ( ( part, is_finished) ) = part_iter. next ( ) {
1542+ match query_encoding_override {
1543+ // slightly faster to be repetitive and not convert text to Cow
1544+ Some ( o) => self . serialization . extend ( percent_encode ( & o ( part) , set) ) ,
1545+ None => self
1546+ . serialization
1547+ . extend ( percent_encode ( part. as_bytes ( ) , set) ) ,
1548+ }
1549+ if is_finished {
1550+ return Some ( part_iter. input ) ;
1551+ }
1552+ }
1553+
1554+ None
15071555 }
15081556
15091557 fn fragment_only ( mut self , base_url : & Url , mut input : Input < ' _ > ) -> ParseResult < Url > {
@@ -1526,31 +1574,75 @@ impl<'a> Parser<'a> {
15261574 } )
15271575 }
15281576
1529- pub fn parse_fragment ( & mut self , mut input : Input < ' _ > ) {
1530- while let Some ( ( c, utf8_c) ) = input. next_utf8 ( ) {
1531- if c == '\0' {
1532- self . log_violation ( SyntaxViolation :: NullInFragment )
1533- } else {
1534- self . check_url_code_point ( c, & input) ;
1577+ pub fn parse_fragment ( & mut self , input : Input < ' _ > ) {
1578+ struct FragmentPartIter < ' i , ' p > {
1579+ input : Input < ' i > ,
1580+ violation_fn : Option < & ' p dyn Fn ( SyntaxViolation ) > ,
1581+ }
1582+
1583+ impl < ' i > Iterator for FragmentPartIter < ' i , ' _ > {
1584+ type Item = & ' i str ;
1585+
1586+ fn next ( & mut self ) -> Option < Self :: Item > {
1587+ let start = self . input . chars . as_str ( ) ;
1588+ // bypass self.input.next() in order to get string slices
1589+ // which are faster to operate on
1590+ while let Some ( c) = self . input . chars . next ( ) {
1591+ match c {
1592+ ascii_tab_or_new_line_pattern ! ( ) => {
1593+ return Some (
1594+ & start[ ..start. len ( ) - self . input . chars . as_str ( ) . len ( ) - 1 ] ,
1595+ ) ;
1596+ }
1597+ '\0' => {
1598+ if let Some ( vfn) = & self . violation_fn {
1599+ vfn ( SyntaxViolation :: NullInFragment ) ;
1600+ }
1601+ }
1602+ c => {
1603+ if let Some ( vfn) = & self . violation_fn {
1604+ check_url_code_point ( vfn, c, & self . input ) ;
1605+ }
1606+ }
1607+ }
1608+ }
1609+ if start. is_empty ( ) {
1610+ None
1611+ } else {
1612+ Some ( start)
1613+ }
15351614 }
1615+ }
1616+
1617+ let part_iter = FragmentPartIter {
1618+ input,
1619+ violation_fn : self . violation_fn ,
1620+ } ;
1621+
1622+ for part in part_iter {
15361623 self . serialization
1537- . extend ( utf8_percent_encode ( utf8_c , FRAGMENT ) ) ;
1624+ . extend ( utf8_percent_encode ( part , FRAGMENT ) ) ;
15381625 }
15391626 }
15401627
1628+ #[ inline]
15411629 fn check_url_code_point ( & self , c : char , input : & Input < ' _ > ) {
15421630 if let Some ( vfn) = self . violation_fn {
1543- if c == '%' {
1544- let mut input = input. clone ( ) ;
1545- if !matches ! ( ( input. next( ) , input. next( ) ) , ( Some ( a) , Some ( b) )
1631+ check_url_code_point ( vfn, c, input)
1632+ }
1633+ }
1634+ }
1635+
1636+ fn check_url_code_point ( vfn : & dyn Fn ( SyntaxViolation ) , c : char , input : & Input < ' _ > ) {
1637+ if c == '%' {
1638+ let mut input = input. clone ( ) ;
1639+ if !matches ! ( ( input. next( ) , input. next( ) ) , ( Some ( a) , Some ( b) )
15461640 if a. is_ascii_hexdigit( ) && b. is_ascii_hexdigit( ) )
1547- {
1548- vfn ( SyntaxViolation :: PercentDecode )
1549- }
1550- } else if !is_url_code_point ( c) {
1551- vfn ( SyntaxViolation :: NonUrlCodePoint )
1552- }
1641+ {
1642+ vfn ( SyntaxViolation :: PercentDecode )
15531643 }
1644+ } else if !is_url_code_point ( c) {
1645+ vfn ( SyntaxViolation :: NonUrlCodePoint )
15541646 }
15551647}
15561648
@@ -1589,7 +1681,7 @@ fn c0_control_or_space(ch: char) -> bool {
15891681/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
15901682#[ inline]
15911683fn ascii_tab_or_new_line ( ch : char ) -> bool {
1592- matches ! ( ch, '\t' | '\r' | '\n' )
1684+ matches ! ( ch, ascii_tab_or_new_line_pattern! ( ) )
15931685}
15941686
15951687/// https://url.spec.whatwg.org/#ascii-alpha
0 commit comments