@@ -79,6 +79,7 @@ macro_rules! simple_enum_error {
7979
8080simple_enum_error ! {
8181 EmptyHost => "empty host" ,
82+ InvalidAuthority => "invalid authority" ,
8283 IdnaError => "invalid international domain name" ,
8384 InvalidPort => "invalid port number" ,
8485 InvalidIpv4Address => "invalid IPv4 address" ,
@@ -156,7 +157,7 @@ impl fmt::Display for SyntaxViolation {
156157 }
157158}
158159
159- #[ derive( Copy , Clone ) ]
160+ #[ derive( Copy , Clone , PartialEq ) ]
160161pub enum SchemeType {
161162 File ,
162163 SpecialNotFile ,
@@ -217,7 +218,7 @@ impl<'i> Input<'i> {
217218 pub fn with_log ( original_input : & ' i str , vfn : Option < & dyn Fn ( SyntaxViolation ) > ) -> Self {
218219 let input = original_input. trim_matches ( c0_control_or_space) ;
219220 if let Some ( vfn) = vfn {
220- if input. len ( ) < original_input. len ( ) {
221+ if input. len ( ) != original_input. len ( ) {
221222 vfn ( SyntaxViolation :: C0SpaceIgnored )
222223 }
223224 if input. chars ( ) . any ( |c| matches ! ( c, '\t' | '\n' | '\r' ) ) {
@@ -858,11 +859,13 @@ impl<'a> Parser<'a> {
858859 self . serialization . push ( '/' ) ;
859860 self . serialization . push ( '/' ) ;
860861 // authority state
862+ let before_authority = self . serialization . len ( ) ;
861863 let ( username_end, remaining) = self . parse_userinfo ( input, scheme_type) ?;
864+ let has_authority = before_authority != self . serialization . len ( ) ;
862865 // host state
863866 let host_start = to_u32 ( self . serialization . len ( ) ) ?;
864867 let ( host_end, host, port, remaining) =
865- self . parse_host_and_port ( remaining, scheme_end, scheme_type) ?;
868+ self . parse_host_and_port ( remaining, scheme_end, scheme_type, has_authority ) ?;
866869 // path state
867870 let path_start = to_u32 ( self . serialization . len ( ) ) ?;
868871 let remaining = self . parse_path_start ( scheme_type, & mut true , remaining) ;
@@ -906,7 +909,18 @@ impl<'a> Parser<'a> {
906909 }
907910 let ( mut userinfo_char_count, remaining) = match last_at {
908911 None => return Ok ( ( to_u32 ( self . serialization . len ( ) ) ?, input) ) ,
909- Some ( ( 0 , remaining) ) => return Ok ( ( to_u32 ( self . serialization . len ( ) ) ?, remaining) ) ,
912+ Some ( ( 0 , remaining) ) => {
913+ // Otherwise, if one of the following is true
914+ // c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
915+ // url is special and c is U+005C (\)
916+ // If @ flag is set and buffer is the empty string, validation error, return failure.
917+ if let ( Some ( c) , _) = remaining. split_first ( ) {
918+ if c == '/' || c == '?' || c == '#' || scheme_type. is_special ( ) && c == '\\' {
919+ return Err ( ParseError :: InvalidAuthority ) ;
920+ }
921+ }
922+ return Ok ( ( to_u32 ( self . serialization . len ( ) ) ?, remaining) ) ;
923+ }
910924 Some ( x) => x,
911925 } ;
912926
@@ -948,10 +962,26 @@ impl<'a> Parser<'a> {
948962 input : Input < ' i > ,
949963 scheme_end : u32 ,
950964 scheme_type : SchemeType ,
965+ has_authority : bool ,
951966 ) -> ParseResult < ( u32 , HostInternal , Option < u16 > , Input < ' i > ) > {
952967 let ( host, remaining) = Parser :: parse_host ( input, scheme_type) ?;
953968 write ! ( & mut self . serialization, "{}" , host) . unwrap ( ) ;
954969 let host_end = to_u32 ( self . serialization . len ( ) ) ?;
970+ if let Host :: Domain ( h) = & host {
971+ if h. is_empty ( ) {
972+ // Port with an empty host
973+ if remaining. starts_with ( ":" ) {
974+ return Err ( ParseError :: EmptyHost ) ;
975+ }
976+ if scheme_type. is_special ( ) {
977+ return Err ( ParseError :: EmptyHost ) ;
978+ }
979+ if !scheme_type. is_special ( ) && has_authority {
980+ return Err ( ParseError :: EmptyHost ) ;
981+ }
982+ }
983+ } ;
984+
955985 let ( port, remaining) = if let Some ( remaining) = remaining. split_prefix ( ':' ) {
956986 let scheme = || default_port ( & self . serialization [ ..scheme_end as usize ] ) ;
957987 Parser :: parse_port ( remaining, scheme, self . context ) ?
@@ -1018,10 +1048,41 @@ impl<'a> Parser<'a> {
10181048 Ok ( ( host, input) )
10191049 }
10201050
1021- pub ( crate ) fn parse_file_host < ' i > (
1051+ pub fn get_file_host < ' i > ( input : Input < ' i > ) -> ParseResult < ( Host < String > , Input ) > {
1052+ let ( _, host_str, remaining) = Parser :: file_host ( input) ?;
1053+ let host = match Host :: parse ( & host_str) ? {
1054+ Host :: Domain ( ref d) if d == "localhost" => Host :: Domain ( "" . to_string ( ) ) ,
1055+ host => host,
1056+ } ;
1057+ Ok ( ( host, remaining) )
1058+ }
1059+
1060+ fn parse_file_host < ' i > (
10221061 & mut self ,
10231062 input : Input < ' i > ,
10241063 ) -> ParseResult < ( bool , HostInternal , Input < ' i > ) > {
1064+ let has_host;
1065+ let ( _, host_str, remaining) = Parser :: file_host ( input) ?;
1066+ let host = if host_str. is_empty ( ) {
1067+ has_host = false ;
1068+ HostInternal :: None
1069+ } else {
1070+ match Host :: parse ( & host_str) ? {
1071+ Host :: Domain ( ref d) if d == "localhost" => {
1072+ has_host = false ;
1073+ HostInternal :: None
1074+ }
1075+ host => {
1076+ write ! ( & mut self . serialization, "{}" , host) . unwrap ( ) ;
1077+ has_host = true ;
1078+ host. into ( )
1079+ }
1080+ }
1081+ } ;
1082+ Ok ( ( has_host, host, remaining) )
1083+ }
1084+
1085+ pub fn file_host < ' i > ( input : Input < ' i > ) -> ParseResult < ( bool , String , Input < ' i > ) > {
10251086 // Undo the Input abstraction here to avoid allocating in the common case
10261087 // where the host part of the input does not contain any tab or newline
10271088 let input_str = input. chars . as_str ( ) ;
@@ -1050,20 +1111,9 @@ impl<'a> Parser<'a> {
10501111 }
10511112 }
10521113 if is_windows_drive_letter ( host_str) {
1053- return Ok ( ( false , HostInternal :: None , input) ) ;
1114+ return Ok ( ( false , "" . to_string ( ) , input) ) ;
10541115 }
1055- let host = if host_str. is_empty ( ) {
1056- HostInternal :: None
1057- } else {
1058- match Host :: parse ( host_str) ? {
1059- Host :: Domain ( ref d) if d == "localhost" => HostInternal :: None ,
1060- host => {
1061- write ! ( & mut self . serialization, "{}" , host) . unwrap ( ) ;
1062- host. into ( )
1063- }
1064- }
1065- } ;
1066- Ok ( ( true , host, remaining) )
1116+ Ok ( ( true , host_str. to_string ( ) , remaining) )
10671117 }
10681118
10691119 pub fn parse_port < P > (
@@ -1503,7 +1553,7 @@ fn c0_control_or_space(ch: char) -> bool {
15031553
15041554/// https://infra.spec.whatwg.org/#ascii-tab-or-newline
15051555#[ inline]
1506- pub fn ascii_tab_or_new_line ( ch : char ) -> bool {
1556+ fn ascii_tab_or_new_line ( ch : char ) -> bool {
15071557 matches ! ( ch, '\t' | '\r' | '\n' )
15081558}
15091559
0 commit comments