@@ -456,13 +456,15 @@ impl Url {
456456
457457 if self . slice ( self . scheme_end + 1 ..) . starts_with ( "//" ) {
458458 // URL with authority
459- match self . byte_at ( self . username_end ) {
460- b':' => {
461- assert ! ( self . host_start >= self . username_end + 2 ) ;
462- assert_eq ! ( self . byte_at( self . host_start - 1 ) , b'@' ) ;
459+ if self . username_end != self . serialization . len ( ) as u32 {
460+ match self . byte_at ( self . username_end ) {
461+ b':' => {
462+ assert ! ( self . host_start >= self . username_end + 2 ) ;
463+ assert_eq ! ( self . byte_at( self . host_start - 1 ) , b'@' ) ;
464+ }
465+ b'@' => assert ! ( self . host_start == self . username_end + 1 ) ,
466+ _ => assert_eq ! ( self . username_end, self . scheme_end + 3 ) ,
463467 }
464- b'@' => assert ! ( self . host_start == self . username_end + 1 ) ,
465- _ => assert_eq ! ( self . username_end, self . scheme_end + 3 ) ,
466468 }
467469 assert ! ( self . host_start >= self . username_end) ;
468470 assert ! ( self . host_end >= self . host_start) ;
@@ -490,7 +492,10 @@ impl Url {
490492 Some ( port_str. parse:: <u16 >( ) . expect( "Couldn't parse port?" ) )
491493 ) ;
492494 }
493- assert_eq ! ( self . byte_at( self . path_start) , b'/' ) ;
495+ assert ! (
496+ self . path_start as usize == self . serialization. len( )
497+ || matches!( self . byte_at( self . path_start) , b'/' | b'#' | b'?' )
498+ ) ;
494499 } else {
495500 // Anarchist URL (no authority)
496501 assert_eq ! ( self . username_end, self . scheme_end + 1 ) ;
@@ -501,11 +506,11 @@ impl Url {
501506 assert_eq ! ( self . path_start, self . scheme_end + 1 ) ;
502507 }
503508 if let Some ( start) = self . query_start {
504- assert ! ( start > self . path_start) ;
509+ assert ! ( start >= self . path_start) ;
505510 assert_eq ! ( self . byte_at( start) , b'?' ) ;
506511 }
507512 if let Some ( start) = self . fragment_start {
508- assert ! ( start > self . path_start) ;
513+ assert ! ( start >= self . path_start) ;
509514 assert_eq ! ( self . byte_at( start) , b'#' ) ;
510515 }
511516 if let ( Some ( query_start) , Some ( fragment_start) ) = ( self . query_start , self . fragment_start ) {
@@ -685,7 +690,7 @@ impl Url {
685690 /// ```
686691 #[ inline]
687692 pub fn cannot_be_a_base ( & self ) -> bool {
688- !self . slice ( self . path_start ..) . starts_with ( '/' )
693+ !self . slice ( self . scheme_end + 1 ..) . starts_with ( '/' )
689694 }
690695
691696 /// Return the username for this URL (typically the empty string)
@@ -745,7 +750,10 @@ impl Url {
745750 pub fn password ( & self ) -> Option < & str > {
746751 // This ':' is not the one marking a port number since a host can not be empty.
747752 // (Except for file: URLs, which do not have port numbers.)
748- if self . has_authority ( ) && self . byte_at ( self . username_end ) == b':' {
753+ if self . has_authority ( )
754+ && self . username_end != self . serialization . len ( ) as u32
755+ && self . byte_at ( self . username_end ) == b':'
756+ {
749757 debug_assert ! ( self . byte_at( self . host_start - 1 ) == b'@' ) ;
750758 Some ( self . slice ( self . username_end + 1 ..self . host_start - 1 ) )
751759 } else {
@@ -1226,7 +1234,7 @@ impl Url {
12261234 if let Some ( input) = fragment {
12271235 self . fragment_start = Some ( to_u32 ( self . serialization . len ( ) ) . unwrap ( ) ) ;
12281236 self . serialization . push ( '#' ) ;
1229- self . mutate ( |parser| parser. parse_fragment ( parser:: Input :: new ( input) ) )
1237+ self . mutate ( |parser| parser. parse_fragment ( parser:: Input :: no_trim ( input) ) )
12301238 } else {
12311239 self . fragment_start = None
12321240 }
@@ -1284,7 +1292,12 @@ impl Url {
12841292 let scheme_type = SchemeType :: from ( self . scheme ( ) ) ;
12851293 let scheme_end = self . scheme_end ;
12861294 self . mutate ( |parser| {
1287- parser. parse_query ( scheme_type, scheme_end, parser:: Input :: new ( input) )
1295+ let vfn = parser. violation_fn ;
1296+ parser. parse_query (
1297+ scheme_type,
1298+ scheme_end,
1299+ parser:: Input :: trim_tab_and_newlines ( input, vfn) ,
1300+ )
12881301 } ) ;
12891302 }
12901303
@@ -1625,14 +1638,34 @@ impl Url {
16251638 if host == "" && SchemeType :: from ( self . scheme ( ) ) . is_special ( ) {
16261639 return Err ( ParseError :: EmptyHost ) ;
16271640 }
1641+ let mut host_substr = host;
1642+ // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1643+ if !host. starts_with ( '[' ) || !host. ends_with ( ']' ) {
1644+ match host. find ( ':' ) {
1645+ Some ( 0 ) => {
1646+ // If buffer is the empty string, validation error, return failure.
1647+ return Err ( ParseError :: InvalidDomainCharacter ) ;
1648+ }
1649+ // Let host be the result of host parsing buffer
1650+ Some ( colon_index) => {
1651+ host_substr = & host[ ..colon_index] ;
1652+ }
1653+ None => { }
1654+ }
1655+ }
16281656 if SchemeType :: from ( self . scheme ( ) ) . is_special ( ) {
1629- self . set_host_internal ( Host :: parse ( host ) ?, None )
1657+ self . set_host_internal ( Host :: parse ( host_substr ) ?, None ) ;
16301658 } else {
1631- self . set_host_internal ( Host :: parse_opaque ( host ) ?, None )
1659+ self . set_host_internal ( Host :: parse_opaque ( host_substr ) ?, None ) ;
16321660 }
16331661 } else if self . has_host ( ) {
1634- if SchemeType :: from ( self . scheme ( ) ) . is_special ( ) {
1662+ let scheme_type = SchemeType :: from ( self . scheme ( ) ) ;
1663+ if scheme_type. is_special ( ) {
16351664 return Err ( ParseError :: EmptyHost ) ;
1665+ } else {
1666+ if self . serialization . len ( ) == self . path_start as usize {
1667+ self . serialization . push ( '/' ) ;
1668+ }
16361669 }
16371670 debug_assert ! ( self . byte_at( self . scheme_end) == b':' ) ;
16381671 debug_assert ! ( self . byte_at( self . path_start) == b'/' ) ;
@@ -1935,14 +1968,28 @@ impl Url {
19351968 ///
19361969 /// # fn run() -> Result<(), ParseError> {
19371970 /// let mut url = Url::parse("https://example.net")?;
1938- /// let result = url.set_scheme("foo ");
1939- /// assert_eq!(url.as_str(), "foo ://example.net/");
1971+ /// let result = url.set_scheme("http ");
1972+ /// assert_eq!(url.as_str(), "http ://example.net/");
19401973 /// assert!(result.is_ok());
19411974 /// # Ok(())
19421975 /// # }
19431976 /// # run().unwrap();
19441977 /// ```
1978+ /// Change the URL’s scheme from `foo` to `bar`:
19451979 ///
1980+ /// ```
1981+ /// use url::Url;
1982+ /// # use url::ParseError;
1983+ ///
1984+ /// # fn run() -> Result<(), ParseError> {
1985+ /// let mut url = Url::parse("foo://example.net")?;
1986+ /// let result = url.set_scheme("bar");
1987+ /// assert_eq!(url.as_str(), "bar://example.net");
1988+ /// assert!(result.is_ok());
1989+ /// # Ok(())
1990+ /// # }
1991+ /// # run().unwrap();
1992+ /// ```
19461993 ///
19471994 /// Cannot change URL’s scheme from `https` to `foõ`:
19481995 ///
@@ -1975,14 +2022,55 @@ impl Url {
19752022 /// # }
19762023 /// # run().unwrap();
19772024 /// ```
2025+ /// Cannot change the URL’s scheme from `foo` to `https`:
2026+ ///
2027+ /// ```
2028+ /// use url::Url;
2029+ /// # use url::ParseError;
2030+ ///
2031+ /// # fn run() -> Result<(), ParseError> {
2032+ /// let mut url = Url::parse("foo://example.net")?;
2033+ /// let result = url.set_scheme("https");
2034+ /// assert_eq!(url.as_str(), "foo://example.net");
2035+ /// assert!(result.is_err());
2036+ /// # Ok(())
2037+ /// # }
2038+ /// # run().unwrap();
2039+ /// ```
2040+ /// Cannot change the URL’s scheme from `http` to `foo`:
2041+ ///
2042+ /// ```
2043+ /// use url::Url;
2044+ /// # use url::ParseError;
2045+ ///
2046+ /// # fn run() -> Result<(), ParseError> {
2047+ /// let mut url = Url::parse("http://example.net")?;
2048+ /// let result = url.set_scheme("foo");
2049+ /// assert_eq!(url.as_str(), "http://example.net/");
2050+ /// assert!(result.is_err());
2051+ /// # Ok(())
2052+ /// # }
2053+ /// # run().unwrap();
2054+ /// ```
19782055 pub fn set_scheme ( & mut self , scheme : & str ) -> Result < ( ) , ( ) > {
19792056 let mut parser = Parser :: for_setter ( String :: new ( ) ) ;
19802057 let remaining = parser. parse_scheme ( parser:: Input :: new ( scheme) ) ?;
1981- if !remaining. is_empty ( )
1982- || ( !self . has_host ( ) && SchemeType :: from ( & parser. serialization ) . is_special ( ) )
2058+ let new_scheme_type = SchemeType :: from ( & parser. serialization ) ;
2059+ let old_scheme_type = SchemeType :: from ( self . scheme ( ) ) ;
2060+ // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2061+ if ( new_scheme_type. is_special ( ) && !old_scheme_type. is_special ( ) ) ||
2062+ // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2063+ ( !new_scheme_type. is_special ( ) && old_scheme_type. is_special ( ) ) ||
2064+ // If url includes credentials or has a non-null port, and buffer is "file", then return.
2065+ // If url’s scheme is "file" and its host is an empty host or null, then return.
2066+ ( new_scheme_type. is_file ( ) && self . has_authority ( ) )
19832067 {
19842068 return Err ( ( ) ) ;
19852069 }
2070+
2071+ if !remaining. is_empty ( ) || ( !self . has_host ( ) && new_scheme_type. is_special ( ) ) {
2072+ return Err ( ( ) ) ;
2073+ }
19862074 let old_scheme_end = self . scheme_end ;
19872075 let new_scheme_end = to_u32 ( parser. serialization . len ( ) ) . unwrap ( ) ;
19882076 let adjust = |index : & mut u32 | {
@@ -2004,6 +2092,14 @@ impl Url {
20042092
20052093 parser. serialization . push_str ( self . slice ( old_scheme_end..) ) ;
20062094 self . serialization = parser. serialization ;
2095+
2096+ // Update the port so it can be removed
2097+ // If it is the scheme's default
2098+ // we don't mind it silently failing
2099+ // if there was no port in the first place
2100+ let previous_port = self . port ( ) ;
2101+ let _ = self . set_port ( previous_port) ;
2102+
20072103 Ok ( ( ) )
20082104 }
20092105
@@ -2408,6 +2504,7 @@ fn path_to_file_url_segments_windows(
24082504 }
24092505 let mut components = path. components ( ) ;
24102506
2507+ let host_start = serialization. len ( ) + 1 ;
24112508 let host_end;
24122509 let host_internal;
24132510 match components. next ( ) {
@@ -2434,15 +2531,24 @@ fn path_to_file_url_segments_windows(
24342531 _ => return Err ( ( ) ) ,
24352532 }
24362533
2534+ let mut path_only_has_prefix = true ;
24372535 for component in components {
24382536 if component == Component :: RootDir {
24392537 continue ;
24402538 }
2539+ path_only_has_prefix = false ;
24412540 // FIXME: somehow work with non-unicode?
24422541 let component = component. as_os_str ( ) . to_str ( ) . ok_or ( ( ) ) ?;
24432542 serialization. push ( '/' ) ;
24442543 serialization. extend ( percent_encode ( component. as_bytes ( ) , PATH_SEGMENT ) ) ;
24452544 }
2545+ // A windows drive letter must end with a slash.
2546+ if serialization. len ( ) > host_start
2547+ && parser:: is_windows_drive_letter ( & serialization[ host_start..] )
2548+ && path_only_has_prefix
2549+ {
2550+ serialization. push ( '/' ) ;
2551+ }
24462552 Ok ( ( host_end, host_internal) )
24472553}
24482554
@@ -2467,6 +2573,14 @@ fn file_url_segments_to_pathbuf(
24672573 bytes. push ( b'/' ) ;
24682574 bytes. extend ( percent_decode ( segment. as_bytes ( ) ) ) ;
24692575 }
2576+ // A windows drive letter must end with a slash.
2577+ if bytes. len ( ) > 2 {
2578+ if matches ! ( bytes[ bytes. len( ) - 2 ] , b'a' ..=b'z' | b'A' ..=b'Z' )
2579+ && matches ! ( bytes[ bytes. len( ) - 1 ] , b':' | b'|' )
2580+ {
2581+ bytes. push ( b'/' ) ;
2582+ }
2583+ }
24702584 let os_str = OsStr :: from_bytes ( & bytes) ;
24712585 let path = PathBuf :: from ( os_str) ;
24722586 debug_assert ! (
0 commit comments