Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 51 additions & 25 deletions src/web/extractors/rustdoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ use std::borrow::Cow;
const INDEX_HTML: &str = "index.html";
const FOLDER_AND_INDEX_HTML: &str = "/index.html";

pub(crate) const ROOT_RUSTDOC_HTML_FILES: &[&str] = &[
"all.html",
"help.html",
"settings.html",
"scrape-examples-help.html",
];

#[derive(Clone, Debug, PartialEq)]
pub(crate) enum PageKind {
Rustdoc,
Expand Down Expand Up @@ -723,27 +730,38 @@ fn generate_rustdoc_path_for_url(
&& !path.is_empty()
&& path != INDEX_HTML
{
if let Some(target_name) = target_name
&& path == target_name
{
// Sometimes people add a path that is just the target name.
// This matches our `rustdoc_redirector_handler` route,
// without trailing slash (`/{name}/{version}/{target}`).
//
// Even though the rustdoc html handler would also be capable
// handling this input, we want to redirect to the trailing
// slash version (`/{name}/{version}/{target}/`),
// so they are separate.
format!("{}/", target_name)
} else {
// for none-elements paths we have to guarantee that we have a
// trailing slash, otherwise the rustdoc-url won't hit the html-handler and
// lead to redirect loops.
if path.contains('/') {
// just use the given inner to start, if:
// * it's not empty
// * it's not just "index.html"
// * we have a slash in the path.
path.to_string()
} else if ROOT_RUSTDOC_HTML_FILES.contains(&path) {
// special case: some files are at the root of the rustdoc output,
// without a trailing slash, and the routes are fine with that.
// e.g. `/help.html`, `/settings.html`, `/all.html`, ...
path.to_string()
} else if let Some(target_name) = target_name {
if target_name == path {
// when we have the target name as path, without a trailing slash,
// just add the slash.
format!("{}/", path)
} else {
// when someone just attaches some path to the URL, like
// `/{krate}/{version}/somefile.html`, we assume they meant
// `/{krate}/{version}/{target_name}/somefile.html`.
format!("{}/{}", target_name, path)
}
} else {
// fallback: just attach a slash and redirect.
format!("{}/", path)
}
} else if let Some(target_name) = target_name {
// after having no usable given path, we generate one with the
// target name, if we have one.
// target name, if we have one/.
format!("{}/", target_name)
} else {
// no usable given path:
Expand Down Expand Up @@ -1000,12 +1018,17 @@ mod tests {
)]
#[test_case(
None, Some("something"), false,
None, "something", "something";
None, "something", "krate/something";
"without trailing slash"
)]
#[test_case(
None, Some("settings.html"), false,
None, "settings.html", "settings.html";
"without trailing slash, but known root name"
)]
#[test_case(
None, Some("/something"), false,
None, "something", "something";
None, "something", "krate/something";
"leading slash is cut"
)]
#[test_case(
Expand All @@ -1027,7 +1050,7 @@ mod tests {
)]
#[test_case(
None, Some(&format!("{DEFAULT_TARGET}/one")), false,
Some(DEFAULT_TARGET), "one", "one";
Some(DEFAULT_TARGET), "one", "krate/one";
"target + one without trailing slash"
)]
#[test_case(
Expand Down Expand Up @@ -1073,7 +1096,7 @@ mod tests {
)]
#[test_case(
Some(UNKNOWN_TARGET), None, false,
None, UNKNOWN_TARGET, UNKNOWN_TARGET;
None, UNKNOWN_TARGET, &format!("krate/{UNKNOWN_TARGET}");
"unknown target without trailing slash"
)]
#[test_case(
Expand Down Expand Up @@ -1639,23 +1662,26 @@ mod tests {
assert_eq!(params.rustdoc_url(), "/krate/latest/krate/");
}

#[test_case(Some(PageKind::Rustdoc))]
#[test_case(None)]
fn test_only_target_name_without_slash(page_kind: Option<PageKind>) {
#[test_case("other_path.html", "/krate/latest/krate/other_path.html")]
#[test_case("other_path", "/krate/latest/krate/other_path"; "without .html")]
#[test_case("other_path.html", "/krate/latest/krate/other_path.html"; "with .html")]
#[test_case("settings.html", "/krate/latest/settings.html"; "static routes")]
#[test_case(KRATE, "/krate/latest/krate/"; "same as target name, without slash")]
fn test_redirect_some_odd_paths_we_saw(inner_path: &str, expected_url: &str) {
// test for https://github.com/rust-lang/docs.rs/issues/2989
let params = RustdocParams::new(KRATE)
.with_maybe_page_kind(page_kind)
.try_with_original_uri("/dummy/latest/dummy")
.with_page_kind(PageKind::Rustdoc)
.try_with_original_uri(format!("/{KRATE}/latest/{inner_path}"))
.unwrap()
.with_req_version(ReqVersion::Latest)
.with_maybe_doc_target(None::<String>)
.with_inner_path(KRATE)
.with_inner_path(inner_path)
.with_default_target(DEFAULT_TARGET)
.with_target_name(KRATE)
.with_doc_targets(TARGETS.iter().cloned());

dbg!(&params);

assert_eq!(params.rustdoc_url(), "/krate/latest/krate/");
assert_eq!(params.rustdoc_url(), expected_url);
}
}
22 changes: 20 additions & 2 deletions src/web/rustdoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use axum::{
http::StatusCode,
response::{IntoResponse, Response as AxumResponse},
};
use http::{HeaderValue, Uri, header};
use http::{HeaderValue, Uri, header, uri::Authority};
use serde::Deserialize;
use std::{
collections::HashMap,
Expand Down Expand Up @@ -219,6 +219,7 @@ pub(crate) async fn rustdoc_redirector_handler(
let params = params.with_page_kind(PageKind::Rustdoc);

fn redirect_to_doc(
original_uri: Option<&Uri>,
url: EscapedURI,
cache_policy: CachePolicy,
path_in_crate: Option<&str>,
Expand All @@ -229,6 +230,19 @@ pub(crate) async fn rustdoc_redirector_handler(
url
};

if let Some(original_uri) = original_uri
&& original_uri.path() == url.path()
&& (url.authority().is_none()
|| url.authority() == Some(&Authority::from_static("docs.rs")))
{
return Err(anyhow!(
"infinite redirect detected, \noriginal_uri = {}, redirect_url = {}",
original_uri,
url
)
.into());
}

trace!(%url, ?cache_policy, path_in_crate, "redirect to doc");
Ok(axum_cached_redirect(url, cache_policy)?)
}
Expand Down Expand Up @@ -266,6 +280,7 @@ pub(crate) async fn rustdoc_redirector_handler(
let target_uri =
EscapedURI::from_uri(description.href.clone()).append_raw_query(original_query);
return redirect_to_doc(
params.original_uri(),
target_uri,
CachePolicy::ForeverInCdnAndStaleInBrowser,
path_in_crate.as_deref(),
Expand Down Expand Up @@ -329,6 +344,7 @@ pub(crate) async fn rustdoc_redirector_handler(

if matched_release.rustdoc_status() {
Ok(redirect_to_doc(
params.original_uri(),
params.rustdoc_url().append_raw_query(original_query),
if matched_release.is_latest_url() {
CachePolicy::ForeverInCdn
Expand Down Expand Up @@ -3345,7 +3361,9 @@ mod test {
#[tokio::test(flavor = "multi_thread")]
#[test_case("/dummy/"; "only krate")]
#[test_case("/dummy/latest/"; "with version")]
#[test_case("/dummy/latest/dummy"; "full without trailing slash")]
#[test_case("/dummy/latest/dummy"; "target-name as path, without trailing slash")]
#[test_case("/dummy/latest/other_path"; "other path, without trailing slash")]
#[test_case("/dummy/latest/other_path.html"; "other html path, without trailing slash")]
#[test_case("/dummy/latest/dummy/"; "final target")]
async fn test_full_latest_url_without_trailing_slash(path: &str) -> Result<()> {
// test for https://github.com/rust-lang/docs.rs/issues/2989
Expand Down
Loading