Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 132 additions & 91 deletions src/uri/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@ use bytes::Bytes;
use super::{ErrorKind, InvalidUri, Port, URI_CHARS};
use crate::byte_str::ByteStr;

/// Validation result for authority parsing.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum AuthorityError {
Empty,
InvalidUriChar,
InvalidAuthority,
TooManyColons,
MismatchedBrackets,
InvalidBracketUsage,
EmptyAfterAt,
InvalidPercent,
}

/// Represents the authority component of a URI.
#[derive(Clone)]
pub struct Authority {
Expand Down Expand Up @@ -45,9 +58,14 @@ impl Authority {
/// let authority = Authority::from_static("example.com");
/// assert_eq!(authority.host(), "example.com");
/// ```
pub fn from_static(src: &'static str) -> Self {
Authority::from_shared(Bytes::from_static(src.as_bytes()))
.expect("static str is not valid authority")
#[inline]
pub const fn from_static(src: &'static str) -> Self {
match validate_authority_bytes(src.as_bytes()) {
Ok(_) => Authority {
data: ByteStr::from_static(src),
},
Err(_) => panic!("static str is not valid authority"),
}
}

/// Attempt to convert a `Bytes` buffer to a `Authority`.
Expand All @@ -69,95 +87,19 @@ impl Authority {
// Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
// ret is the return value.
pub(super) fn parse(s: &[u8]) -> Result<usize, InvalidUri> {
let mut colon_cnt = 0u32;
let mut start_bracket = false;
let mut end_bracket = false;
let mut has_percent = false;
let mut end = s.len();
let mut at_sign_pos = None;
const MAX_COLONS: u32 = 8; // e.g., [FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80

// Among other things, this loop checks that every byte in s up to the
// first '/', '?', or '#' is a valid URI character (or in some contexts,
// a '%'). This means that each such byte is a valid single-byte UTF-8
// code point.
for (i, &b) in s.iter().enumerate() {
match URI_CHARS[b as usize] {
b'/' | b'?' | b'#' => {
end = i;
break;
}
b':' => {
if colon_cnt >= MAX_COLONS {
return Err(ErrorKind::InvalidAuthority.into());
}
colon_cnt += 1;
}
b'[' => {
if has_percent || start_bracket {
// Something other than the userinfo has a `%`, so reject it.
return Err(ErrorKind::InvalidAuthority.into());
}
start_bracket = true;
}
b']' => {
if (!start_bracket) || end_bracket {
return Err(ErrorKind::InvalidAuthority.into());
}
end_bracket = true;

// Those were part of an IPv6 hostname, so forget them...
colon_cnt = 0;
has_percent = false;
}
b'@' => {
at_sign_pos = Some(i);

// Those weren't a port colon, but part of the
// userinfo, so it needs to be forgotten.
colon_cnt = 0;
has_percent = false;
}
0 if b == b'%' => {
// Per https://tools.ietf.org/html/rfc3986#section-3.2.1 and
// https://url.spec.whatwg.org/#authority-state
// the userinfo can have a percent-encoded username and password,
// so record that a `%` was found. If this turns out to be
// part of the userinfo, this flag will be cleared.
// Also per https://tools.ietf.org/html/rfc6874, percent-encoding can
// be used to indicate a zone identifier.
// If the flag hasn't been cleared at the end, that means this
// was part of the hostname (and not part of an IPv6 address), and
// will fail with an error.
has_percent = true;
}
0 => {
return Err(ErrorKind::InvalidUriChar.into());
}
_ => {}
validate_authority_bytes(s).map_err(|e| {
match e {
AuthorityError::Empty => ErrorKind::Empty,
AuthorityError::InvalidUriChar => ErrorKind::InvalidUriChar,
AuthorityError::InvalidAuthority
| AuthorityError::MismatchedBrackets
| AuthorityError::InvalidBracketUsage
| AuthorityError::EmptyAfterAt
| AuthorityError::InvalidPercent
| AuthorityError::TooManyColons => ErrorKind::InvalidAuthority,
}
}

if start_bracket ^ end_bracket {
return Err(ErrorKind::InvalidAuthority.into());
}

if colon_cnt > 1 {
// Things like 'localhost:8080:3030' are rejected.
return Err(ErrorKind::InvalidAuthority.into());
}

if end > 0 && at_sign_pos == Some(end - 1) {
// If there's nothing after an `@`, this is bonkers.
return Err(ErrorKind::InvalidAuthority.into());
}

if has_percent {
// Something after the userinfo has a `%`, so reject it.
return Err(ErrorKind::InvalidAuthority.into());
}

Ok(end)
.into()
})
}

// Parse bytes as an Authority, not allowing an empty string.
Expand Down Expand Up @@ -528,6 +470,105 @@ where
})
}

/// Shared validation logic for authority bytes.
/// Returns the end position of valid authority bytes, or an error.
const fn validate_authority_bytes(s: &[u8]) -> Result<usize, AuthorityError> {
if s.is_empty() {
return Err(AuthorityError::Empty);
}

let mut colon_cnt: u32 = 0;
let mut start_bracket = false;
let mut end_bracket = false;
let mut has_percent = false;
let mut end = s.len();
let mut at_sign_pos: usize = s.len();
const MAX_COLONS: u32 = 8; // e.g., [FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80

let mut i = 0;
// Among other things, this loop checks that every byte in s up to the
// first '/', '?', or '#' is a valid URI character (or in some contexts,
// a '%'). This means that each such byte is a valid single-byte UTF-8
// code point.
while i < s.len() {
let b = s[i];
let ch = URI_CHARS[b as usize];

if ch == b'/' || ch == b'?' || ch == b'#' {
end = i;
break;
}

if ch == 0 {
if b == b'%' {
// Per https://tools.ietf.org/html/rfc3986#section-3.2.1 and
// https://url.spec.whatwg.org/#authority-state
// the userinfo can have a percent-encoded username and password,
// so record that a `%` was found. If this turns out to be
// part of the userinfo, this flag will be cleared.
// Also per https://tools.ietf.org/html/rfc6874, percent-encoding can
// be used to indicate a zone identifier.
// If the flag hasn't been cleared at the end, that means this
// was part of the hostname (and not part of an IPv6 address), and
// will fail with an error.
has_percent = true;
} else {
return Err(AuthorityError::InvalidUriChar);
}
} else if ch == b':' {
if colon_cnt >= MAX_COLONS {
return Err(AuthorityError::TooManyColons);
}
colon_cnt += 1;
} else if ch == b'[' {
if has_percent || start_bracket {
// Something other than the userinfo has a `%`, so reject it.
return Err(AuthorityError::InvalidBracketUsage);
}
start_bracket = true;
} else if ch == b']' {
if !start_bracket || end_bracket {
return Err(AuthorityError::InvalidBracketUsage);
}
end_bracket = true;

// Those were part of an IPv6 hostname, so forget them...
colon_cnt = 0;
has_percent = false;
} else if ch == b'@' {
at_sign_pos = i;

// Those weren't a port colon, but part of the
// userinfo, so it needs to be forgotten.
colon_cnt = 0;
has_percent = false;
}

i += 1;
}

if start_bracket != end_bracket {
return Err(AuthorityError::MismatchedBrackets);
}

if colon_cnt > 1 {
// Things like 'localhost:8080:3030' are rejected.
return Err(AuthorityError::InvalidAuthority);
}

if end > 0 && at_sign_pos == end - 1 {
// If there's nothing after an `@`, this is bonkers.
return Err(AuthorityError::EmptyAfterAt);
}

if has_percent {
// Something after the userinfo has a `%`, so reject it.
return Err(AuthorityError::InvalidPercent);
}

Ok(end)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
80 changes: 76 additions & 4 deletions src/uri/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ use bytes::Bytes;
use super::{ErrorKind, InvalidUri};
use crate::byte_str::ByteStr;

/// Validation result for path and query parsing.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum PathAndQueryError {
InvalidPathChar,
InvalidQueryChar,
FragmentNotAllowed,
}

/// Represents the path component of a URI
#[derive(Clone)]
pub struct PathAndQuery {
Expand Down Expand Up @@ -138,10 +146,14 @@ impl PathAndQuery {
/// assert_eq!(v.query(), Some("world"));
/// ```
#[inline]
pub fn from_static(src: &'static str) -> Self {
let src = Bytes::from_static(src.as_bytes());

PathAndQuery::from_shared(src).unwrap()
pub const fn from_static(src: &'static str) -> Self {
match validate_path_and_query_bytes(src.as_bytes()) {
Ok(query) => PathAndQuery {
data: ByteStr::from_static(src),
query,
},
Err(_) => panic!("static str is not valid path"),
}
}

/// Attempt to convert a `Bytes` buffer to a `PathAndQuery`.
Expand Down Expand Up @@ -467,6 +479,66 @@ impl PartialOrd<PathAndQuery> for String {
}
}

/// Shared validation logic for path and query bytes.
/// Returns the query position (or NONE), or an error.
const fn validate_path_and_query_bytes(bytes: &[u8]) -> Result<u16, PathAndQueryError> {
let mut query: u16 = NONE;
let mut i: usize = 0;

// path ...
while i < bytes.len() {
let b = bytes[i];
if b == b'?' {
query = i as u16;
i += 1;
break;
} else if b == b'#' {
return Err(PathAndQueryError::FragmentNotAllowed);
} else {
let allowed = b == 0x21
|| (b >= 0x24 && b <= 0x3B)
|| b == 0x3D
|| (b >= 0x40 && b <= 0x5F)
|| (b >= 0x61 && b <= 0x7A)
|| b == 0x7C
|| b == 0x7E
|| b == b'"'
|| b == b'{'
|| b == b'}'
|| (b >= 0x7F);

if !allowed {
return Err(PathAndQueryError::InvalidPathChar);
}
}
i += 1;
}

// query ...
if query != NONE {
while i < bytes.len() {
let b = bytes[i];
if b == b'#' {
return Err(PathAndQueryError::FragmentNotAllowed);
}

let allowed = b == 0x21
|| (b >= 0x24 && b <= 0x3B)
|| b == 0x3D
|| (b >= 0x3F && b <= 0x7E)
|| (b >= 0x7F);

if !allowed {
return Err(PathAndQueryError::InvalidQueryChar);
}

i += 1;
}
}

Ok(query)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down