From 224df8e957e9d8a4ae9df99733086877cd8b5198 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 1 Nov 2025 12:33:54 +0100 Subject: [PATCH 1/2] refactor & centralize rustdoc parameter handling & url generation --- ...8e1276f7f814f22b830c46d6b77edb209a033.json | 28 - ...986fd2ec3e76ff4df7c8204f2d4eaebba8f1.json} | 16 +- Cargo.toml | 2 +- src/db/add_package.rs | 68 +- src/db/mod.rs | 4 +- src/docbuilder/rustwide_builder.rs | 6 +- src/test/fakes.rs | 62 +- src/test/mod.rs | 2 +- src/utils/cargo_metadata.rs | 2 +- src/utils/mod.rs | 37 +- src/web/build_details.rs | 11 +- src/web/builds.rs | 45 +- src/web/crate_details.rs | 329 ++-- src/web/error.rs | 28 +- src/web/escaped_uri.rs | 460 +++++ src/web/extractors/context.rs | 53 + src/web/extractors/mod.rs | 6 + src/web/{extractors.rs => extractors/path.rs} | 52 +- src/web/extractors/rustdoc.rs | 1601 +++++++++++++++++ src/web/features.rs | 54 +- src/web/headers.rs | 101 +- src/web/mod.rs | 127 +- src/web/releases.rs | 187 +- src/web/routes.rs | 8 +- src/web/rustdoc.rs | 751 +++----- src/web/source.rs | 107 +- src/web/status.rs | 14 +- templates/core/home.html | 5 +- templates/crate/build_details.html | 3 +- templates/crate/builds.html | 3 +- templates/crate/details.html | 11 +- templates/crate/features.html | 14 +- templates/crate/source.html | 1 - templates/header/package_navigation.html | 10 +- templates/macros.html | 73 +- templates/releases/build_queue.html | 3 +- templates/releases/releases.html | 33 +- templates/rustdoc/platforms.html | 24 +- templates/rustdoc/releases.html | 2 +- templates/rustdoc/topbar.html | 25 +- 40 files changed, 3134 insertions(+), 1234 deletions(-) delete mode 100644 .sqlx/query-0011936b31678ee644dff3f5f8d8e1276f7f814f22b830c46d6b77edb209a033.json rename .sqlx/{query-118390f408685404fa25f1de88df56c6f943b5530760163ff8a667fac627626f.json => query-7072759134a3abceaa6d3105b1f7986fd2ec3e76ff4df7c8204f2d4eaebba8f1.json} (73%) create mode 100644 src/web/escaped_uri.rs create mode 100644 src/web/extractors/context.rs create mode 100644 src/web/extractors/mod.rs rename src/web/{extractors.rs => extractors/path.rs} (78%) create mode 100644 src/web/extractors/rustdoc.rs diff --git a/.sqlx/query-0011936b31678ee644dff3f5f8d8e1276f7f814f22b830c46d6b77edb209a033.json b/.sqlx/query-0011936b31678ee644dff3f5f8d8e1276f7f814f22b830c46d6b77edb209a033.json deleted file mode 100644 index cad5698c0..000000000 --- a/.sqlx/query-0011936b31678ee644dff3f5f8d8e1276f7f814f22b830c46d6b77edb209a033.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT\n releases.default_target,\n releases.doc_targets\n FROM releases\n WHERE releases.id = $1;", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "default_target", - "type_info": "Varchar" - }, - { - "ordinal": 1, - "name": "doc_targets", - "type_info": "Json" - } - ], - "parameters": { - "Left": [ - "Int4" - ] - }, - "nullable": [ - true, - true - ] - }, - "hash": "0011936b31678ee644dff3f5f8d8e1276f7f814f22b830c46d6b77edb209a033" -} diff --git a/.sqlx/query-118390f408685404fa25f1de88df56c6f943b5530760163ff8a667fac627626f.json b/.sqlx/query-7072759134a3abceaa6d3105b1f7986fd2ec3e76ff4df7c8204f2d4eaebba8f1.json similarity index 73% rename from .sqlx/query-118390f408685404fa25f1de88df56c6f943b5530760163ff8a667fac627626f.json rename to .sqlx/query-7072759134a3abceaa6d3105b1f7986fd2ec3e76ff4df7c8204f2d4eaebba8f1.json index 496380a0c..80cb41e33 100644 --- a/.sqlx/query-118390f408685404fa25f1de88df56c6f943b5530760163ff8a667fac627626f.json +++ b/.sqlx/query-7072759134a3abceaa6d3105b1f7986fd2ec3e76ff4df7c8204f2d4eaebba8f1.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT\n releases.id as \"id: ReleaseId\",\n releases.version,\n release_build_status.build_status as \"build_status!: BuildStatus\",\n releases.yanked,\n releases.is_library,\n releases.rustdoc_status,\n releases.release_time,\n releases.target_name\n FROM releases\n INNER JOIN release_build_status ON releases.id = release_build_status.rid\n WHERE\n releases.crate_id = $1", + "query": "SELECT\n releases.id as \"id: ReleaseId\",\n releases.version,\n release_build_status.build_status as \"build_status!: BuildStatus\",\n releases.yanked,\n releases.is_library,\n releases.rustdoc_status,\n releases.release_time,\n releases.target_name,\n releases.default_target,\n releases.doc_targets\n FROM releases\n INNER JOIN release_build_status ON releases.id = release_build_status.rid\n WHERE\n releases.crate_id = $1", "describe": { "columns": [ { @@ -53,6 +53,16 @@ "ordinal": 7, "name": "target_name", "type_info": "Varchar" + }, + { + "ordinal": 8, + "name": "default_target", + "type_info": "Varchar" + }, + { + "ordinal": 9, + "name": "doc_targets", + "type_info": "Json" } ], "parameters": { @@ -68,8 +78,10 @@ true, true, true, + true, + true, true ] }, - "hash": "118390f408685404fa25f1de88df56c6f943b5530760163ff8a667fac627626f" + "hash": "7072759134a3abceaa6d3105b1f7986fd2ec3e76ff4df7c8204f2d4eaebba8f1" } diff --git a/Cargo.toml b/Cargo.toml index 37ba11900..9982ae064 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,7 +59,7 @@ bzip2 = "0.6.0" getrandom = "0.3.1" itertools = { version = "0.14.0" } hex = "0.4.3" -derive_more = { version = "2.0.0", features = ["display"] } +derive_more = { version = "2.0.0", features = ["display", "deref"] } sysinfo = { version = "0.37.2", default-features = false, features = ["system"] } derive_builder = "0.20.2" diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 8b5c0b4c7..42aa4bd24 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -4,13 +4,13 @@ use crate::{ error::Result, registry_api::{CrateData, CrateOwner, ReleaseData}, storage::CompressionAlgorithm, - utils::{MetadataPackage, rustc_version::parse_rustc_date}, + utils::{Dependency, MetadataPackage, rustc_version::parse_rustc_date}, web::crate_details::{latest_release, releases_for_crate}, }; use anyhow::{Context, anyhow}; -use derive_more::Display; +use derive_more::{Deref, Display}; use futures_util::stream::TryStreamExt; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use serde_json::Value; use slug::slugify; use std::{ @@ -33,6 +33,44 @@ pub struct ReleaseId(pub i32); #[sqlx(transparent)] pub struct BuildId(pub i32); +type DepOut = (String, String, String, bool); +type DepIn = (String, String, Option, Option); + +/// A crate dependency in our internal representation for releases.dependencies json. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Deref)] +#[serde(from = "DepIn", into = "DepOut")] +pub(crate) struct ReleaseDependency(Dependency); + +impl ReleaseDependency { + pub(crate) fn into_inner(self) -> Dependency { + self.0 + } +} + +impl From for ReleaseDependency { + fn from((name, req, kind, optional): DepIn) -> Self { + ReleaseDependency(Dependency { + name, + req, + kind, + optional: optional.unwrap_or(false), + rename: None, + }) + } +} + +impl From for DepOut { + fn from(rd: ReleaseDependency) -> Self { + let d = rd.0; + ( + d.name, + d.req.to_string(), + d.kind.unwrap_or_else(|| "normal".into()), + d.optional, + ) + } +} + /// Adds a package into database. /// /// Package must be built first. @@ -59,7 +97,7 @@ pub(crate) async fn finish_release( source_size: u64, ) -> Result<()> { debug!("updating release data"); - let dependencies = convert_dependencies(metadata_pkg); + let dependencies = convert_dependencies(metadata_pkg)?; let rustdoc = get_rustdoc(metadata_pkg, source_dir).unwrap_or(None); let readme = get_readme(metadata_pkg, source_dir).unwrap_or(None); let features = get_features(metadata_pkg); @@ -94,7 +132,7 @@ pub(crate) async fn finish_release( WHERE id = $1"#, release_id.0, registry_data.release_time, - serde_json::to_value(dependencies)?, + dependencies, metadata_pkg.package_name(), registry_data.yanked, has_docs, @@ -393,20 +431,14 @@ pub(crate) async fn initialize_build( Ok(build_id) } -/// Convert dependencies into Vec<(String, String, String, bool)> -fn convert_dependencies(pkg: &MetadataPackage) -> Vec<(String, String, String, bool)> { - pkg.dependencies +/// Convert dependencies into our own internal JSON representation +fn convert_dependencies(pkg: &MetadataPackage) -> Result { + let dependencies: Vec<_> = pkg + .dependencies .iter() - .map(|dependency| { - let name = dependency.name.clone(); - let version = dependency.req.clone(); - let kind = dependency - .kind - .clone() - .unwrap_or_else(|| "normal".to_string()); - (name, version, kind, dependency.optional) - }) - .collect() + .map(|dependency| ReleaseDependency(dependency.clone())) + .collect::>(); + Ok(serde_json::to_value(dependencies)?) } /// Reads features and converts them to Vec with default being first diff --git a/src/db/mod.rs b/src/db/mod.rs index 4c903952b..eb874347e 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -4,8 +4,8 @@ use sqlx::migrate::{Migrate, Migrator}; pub use self::add_package::update_latest_version_id; pub(crate) use self::add_package::{ - add_doc_coverage, finish_build, finish_release, initialize_build, initialize_crate, - initialize_release, update_build_with_error, + ReleaseDependency, add_doc_coverage, finish_build, finish_release, initialize_build, + initialize_crate, initialize_release, update_build_with_error, }; pub use self::{ add_package::{ diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 5f0242e58..31b08209f 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -1450,11 +1450,10 @@ mod tests { &format!("{default_target}/{crate_path}/index.html"), )?); - let default_target_url = - format!("/{crate_}/{version}/{default_target}/{crate_path}/index.html"); + let default_target_url = format!("/{crate_}/{version}/{default_target}/{crate_path}/"); runtime.block_on(web.assert_redirect( &default_target_url, - &format!("/{crate_}/{version}/{crate_path}/index.html"), + &format!("/{crate_}/{version}/{crate_path}/"), ))?; // Non-dist toolchains only have a single target, and of course @@ -1498,7 +1497,6 @@ mod tests { json_files.sort(); dbg!(&json_files); assert!(json_files[0].starts_with(&format!("empty-library_1.0.0_{target}_"))); - assert!(json_files[0].ends_with(&format!(".json.{ext}"))); assert_eq!( json_files[1], diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 109407dfd..4a84e9b86 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -12,7 +12,7 @@ use crate::storage::{ AsyncStorage, CompressionAlgorithm, RustdocJsonFormatVersion, compress, rustdoc_archive_path, rustdoc_json_path, source_archive_path, }; -use crate::utils::{Dependency, MetadataPackage, Target}; +use crate::utils::{Dependency, MetadataPackage, cargo_metadata::Target}; use anyhow::{Context, bail}; use base64::{Engine, engine::general_purpose::STANDARD as b64}; use chrono::{DateTime, Utc}; @@ -330,7 +330,7 @@ impl<'a> FakeRelease<'a> { } /// Returns the release_id - pub(crate) async fn create(self) -> Result { + pub(crate) async fn create(mut self) -> Result { use std::fs; use std::path::Path; @@ -514,37 +514,34 @@ impl<'a> FakeRelease<'a> { store_files_into(&self.source_files, crate_dir)?; let default_target = self.default_target.unwrap_or("x86_64-unknown-linux-gnu"); + if !self.doc_targets.iter().any(|t| t == default_target) { + self.doc_targets.insert(0, default_target.to_owned()); + } - { - let mut targets = self.doc_targets.clone(); - if !targets.contains(&default_target.to_owned()) { - targets.push(default_target.to_owned()); - } - for target in &targets { - let dummy_rustdoc_json_content = serde_json::to_vec(&serde_json::json!({ - "format_version": 42 - }))?; - - for alg in RUSTDOC_JSON_COMPRESSION_ALGORITHMS { - let compressed_json: Vec = compress(&*dummy_rustdoc_json_content, *alg)?; - - for format_version in [ - RustdocJsonFormatVersion::Version(42), - RustdocJsonFormatVersion::Latest, - ] { - storage - .store_one_uncompressed( - &rustdoc_json_path( - &package.name, - &package.version, - target, - format_version, - Some(*alg), - ), - compressed_json.clone(), - ) - .await?; - } + for target in &self.doc_targets { + let dummy_rustdoc_json_content = serde_json::to_vec(&serde_json::json!({ + "format_version": 42 + }))?; + + for alg in RUSTDOC_JSON_COMPRESSION_ALGORITHMS { + let compressed_json: Vec = compress(&*dummy_rustdoc_json_content, *alg)?; + + for format_version in [ + RustdocJsonFormatVersion::Version(42), + RustdocJsonFormatVersion::Latest, + ] { + storage + .store_one_uncompressed( + &rustdoc_json_path( + &package.name, + &package.version, + target, + format_version, + Some(*alg), + ), + compressed_json.clone(), + ) + .await?; } } } @@ -555,6 +552,7 @@ impl<'a> FakeRelease<'a> { let mut async_conn = db.async_conn().await; let crate_id = initialize_crate(&mut async_conn, &package.name).await?; let release_id = initialize_release(&mut async_conn, crate_id, &package.version).await?; + crate::db::finish_release( &mut async_conn, crate_id, diff --git a/src/test/mod.rs b/src/test/mod.rs index 7069c9eff..eabf7374d 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -72,7 +72,7 @@ impl AxumResponseTestExt for axum::response::Response { expected_directives.to_str().unwrap(), ); } else { - assert!(cache_control.is_none()); + assert!(cache_control.is_none(), "{:?}", cache_control); } } diff --git a/src/utils/cargo_metadata.rs b/src/utils/cargo_metadata.rs index 66cd8e35b..eea8e879f 100644 --- a/src/utils/cargo_metadata.rs +++ b/src/utils/cargo_metadata.rs @@ -125,7 +125,7 @@ impl Target { } } -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] pub(crate) struct Dependency { pub(crate) name: String, pub(crate) req: String, diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 3d2634bdd..93b239ce7 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,20 +1,21 @@ //! Various utilities for docs.rs -pub(crate) use self::cargo_metadata::{CargoMetadata, Package as MetadataPackage}; -pub(crate) use self::copy::copy_dir_all; -pub use self::daemon::{start_daemon, watch_registry}; -pub(crate) use self::html::rewrite_rustdoc_html_stream; -pub use self::queue::{ - get_crate_pattern_and_priority, get_crate_priority, list_crate_priorities, - remove_crate_priority, set_crate_priority, +pub(crate) use self::{ + cargo_metadata::{CargoMetadata, Dependency, Package as MetadataPackage}, + copy::copy_dir_all, + html::rewrite_rustdoc_html_stream, + rustc_version::{get_correct_docsrs_style_file, parse_rustc_version}, +}; +pub use self::{ + daemon::{start_daemon, watch_registry}, + queue::{ + get_crate_pattern_and_priority, get_crate_priority, list_crate_priorities, + remove_crate_priority, set_crate_priority, + }, + queue_builder::queue_builder, }; -pub use self::queue_builder::queue_builder; -pub(crate) use self::rustc_version::{get_correct_docsrs_style_file, parse_rustc_version}; - -#[cfg(test)] -pub(crate) use self::cargo_metadata::{Dependency, Target}; -mod cargo_metadata; +pub(crate) mod cargo_metadata; pub mod consistency; mod copy; pub mod daemon; @@ -22,14 +23,12 @@ mod html; mod queue; pub(crate) mod queue_builder; pub(crate) mod rustc_version; -use anyhow::{Context as _, Result}; -use serde::Serialize; -use serde::de::DeserializeOwned; -use std::{fmt, panic}; -use tracing::{Span, error, warn}; pub(crate) mod sized_buffer; -use std::{future::Future, thread, time::Duration}; +use anyhow::{Context as _, Result}; +use serde::{Serialize, de::DeserializeOwned}; +use std::{fmt, future::Future, panic, thread, time::Duration}; +use tracing::{Span, error, warn}; pub(crate) fn report_error(err: &anyhow::Error) { // Debug-format for anyhow errors includes context & backtrace diff --git a/src/web/build_details.rs b/src/web/build_details.rs index 35c787156..fcef258bf 100644 --- a/src/web/build_details.rs +++ b/src/web/build_details.rs @@ -5,7 +5,7 @@ use crate::{ web::{ MetaData, error::{AxumNope, AxumResult}, - extractors::{DbConnection, Path}, + extractors::{DbConnection, Path, rustdoc::RustdocParams}, file::File, filters, page::templates::{RenderBrands, RenderRegular, RenderSolid}, @@ -33,12 +33,13 @@ pub(crate) struct BuildDetails { #[derive(Template)] #[template(path = "crate/build_details.html")] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] struct BuildDetailsPage { metadata: MetaData, build_details: BuildDetails, all_log_filenames: Vec, current_filename: Option, + params: RustdocParams, } impl_axum_webpage! { BuildDetailsPage } @@ -141,8 +142,11 @@ pub(crate) async fn build_details_handler( (file_content, all_log_filenames, current_filename) }; + let metadata = MetaData::from_crate(&mut conn, ¶ms.name, ¶ms.version, None).await?; + let params = RustdocParams::from_metadata(&metadata); + Ok(BuildDetailsPage { - metadata: MetaData::from_crate(&mut conn, ¶ms.name, ¶ms.version, None).await?, + metadata, build_details: BuildDetails { id, rustc_version: row.rustc_version, @@ -154,6 +158,7 @@ pub(crate) async fn build_details_handler( }, all_log_filenames, current_filename, + params, } .into_response()) } diff --git a/src/web/builds.rs b/src/web/builds.rs index 0980d332a..4aa96bd54 100644 --- a/src/web/builds.rs +++ b/src/web/builds.rs @@ -1,8 +1,3 @@ -use super::{ - cache::CachePolicy, - error::{AxumNope, JsonAxumNope, JsonAxumResult}, - headers::CanonicalUrl, -}; use crate::{ AsyncBuildQueue, Config, db::{BuildId, types::BuildStatus}, @@ -10,9 +5,12 @@ use crate::{ impl_axum_webpage, web::{ MetaData, ReqVersion, - error::{AxumResult, EscapedURI}, - extractors::{DbConnection, Path}, - filters, match_version, + cache::CachePolicy, + error::{AxumNope, AxumResult, JsonAxumNope, JsonAxumResult}, + extractors::{DbConnection, Path, rustdoc::RustdocParams}, + filters, + headers::CanonicalUrl, + match_version, page::templates::{RenderBrands, RenderRegular, RenderSolid}, }, }; @@ -47,6 +45,7 @@ struct BuildsPage { builds: Vec, limits: Limits, canonical_url: CanonicalUrl, + params: RustdocParams, } impl_axum_webpage! { BuildsPage } @@ -58,26 +57,41 @@ impl BuildsPage { } pub(crate) async fn build_list_handler( - Path((name, req_version)): Path<(String, ReqVersion)>, + params: RustdocParams, mut conn: DbConnection, Extension(config): Extension>, ) -> AxumResult { - let version = match_version(&mut conn, &name, &req_version) + let version = match_version(&mut conn, params.name(), params.req_version()) .await? .assume_exact_name()? .into_canonical_req_version_or_else(|version| { AxumNope::Redirect( - EscapedURI::new(&format!("/crate/{name}/{version}/builds"), None), + params.clone().with_req_version(version).builds_url(), CachePolicy::ForeverInCdn, ) })? .into_version(); + let metadata = MetaData::from_crate( + &mut conn, + params.name(), + &version, + Some(params.req_version().clone()), + ) + .await?; + let params = params.apply_metadata(&metadata); + Ok(BuildsPage { - metadata: MetaData::from_crate(&mut conn, &name, &version, Some(req_version)).await?, - builds: get_builds(&mut conn, &name, &version).await?, - limits: Limits::for_crate(&config, &mut conn, &name).await?, - canonical_url: CanonicalUrl::from_path(format!("/crate/{name}/latest/builds")), + metadata, + builds: get_builds(&mut conn, params.name(), &version).await?, + limits: Limits::for_crate(&config, &mut conn, params.name()).await?, + canonical_url: CanonicalUrl::from_uri( + params + .clone() + .with_req_version(&ReqVersion::Latest) + .builds_url(), + ), + params, } .into_response()) } @@ -523,7 +537,6 @@ mod tests { .collect(); let values: Vec<_> = values.iter().map(|v| &**v).collect(); - dbg!(&values); assert!(values.contains(&"6.44 GB")); assert!(values.contains(&"2 hours")); assert!(values.contains(&"102.4 kB")); diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index d6e5692b9..253596817 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -1,19 +1,21 @@ -use super::{MetaData, match_version}; -use crate::db::{BuildId, ReleaseId}; -use crate::registry_api::OwnerKind; -use crate::utils::{get_correct_docsrs_style_file, report_error}; use crate::{ AsyncStorage, - db::{CrateId, types::BuildStatus}, + db::{BuildId, CrateId, ReleaseDependency, ReleaseId, types::BuildStatus}, impl_axum_webpage, + registry_api::OwnerKind, storage::PathNotFoundError, + utils::{Dependency, get_correct_docsrs_style_file, report_error}, web::{ - MatchedRelease, ReqVersion, + MatchedRelease, MetaData, ReqVersion, cache::CachePolicy, - error::{AxumNope, AxumResult, EscapedURI}, - extractors::{DbConnection, Path}, + error::{AxumNope, AxumResult}, + extractors::{ + DbConnection, + rustdoc::{PageKind, RustdocParams}, + }, + headers::CanonicalUrl, + match_version, page::templates::{RenderBrands, RenderRegular, RenderSolid, filters}, - rustdoc::RustdocHtmlParams, }, }; use anyhow::{Context, Result, anyhow}; @@ -26,7 +28,6 @@ use chrono::{DateTime, Utc}; use futures_util::stream::TryStreamExt; use log::warn; use semver::Version; -use serde::Deserialize; use serde_json::Value; use std::sync::Arc; @@ -37,13 +38,13 @@ pub(crate) struct CrateDetails { pub(crate) version: Version, pub(crate) description: Option, pub(crate) owners: Vec<(String, String, OwnerKind)>, - pub(crate) dependencies: Option, + pub(crate) dependencies: Vec, readme: Option, rustdoc: Option, // this is description_long in database release_time: Option>, build_status: BuildStatus, pub latest_build_id: Option, - last_successful_build: Option, + last_successful_build: Option, pub rustdoc_status: Option, pub archive_storage: bool, pub repository_url: Option, @@ -100,6 +101,8 @@ pub(crate) struct Release { pub is_library: Option, pub rustdoc_status: Option, pub target_name: Option, + pub default_target: Option, + pub doc_targets: Option>, pub release_time: Option>, } @@ -229,12 +232,20 @@ impl CrateDetails { let parsed_license = krate.license.as_deref().map(super::licenses::parse_license); + let dependencies = krate + .dependencies + .and_then(|value| serde_json::from_value::>(value).ok()) + .unwrap_or_default() + .into_iter() + .map(|rdep| rdep.into_inner()) + .collect(); + let mut crate_details = CrateDetails { name: krate.name, version: version.clone(), description: krate.description, owners: Vec::new(), - dependencies: krate.dependencies, + dependencies, readme: krate.readme, rustdoc: krate.description_long, release_time: krate.release_time, @@ -285,7 +296,7 @@ impl CrateDetails { .filter(|release| { release.build_status == BuildStatus::Success && release.yanked == Some(false) }) - .map(|release| release.version.to_string()) + .map(|release| release.version.clone()) .next(); } @@ -384,7 +395,9 @@ pub(crate) async fn releases_for_crate( releases.is_library, releases.rustdoc_status, releases.release_time, - releases.target_name + releases.target_name, + releases.default_target, + releases.doc_targets FROM releases INNER JOIN release_build_status ON releases.id = release_build_status.rid WHERE @@ -414,6 +427,8 @@ pub(crate) async fn releases_for_crate( is_library: row.is_library, rustdoc_status: row.rustdoc_status, target_name: row.target_name, + default_target: row.default_target, + doc_targets: row.doc_targets.map(MetaData::parse_doc_targets), release_time: row.release_time, })) }) @@ -424,9 +439,8 @@ pub(crate) async fn releases_for_crate( Ok(releases) } -#[derive(Template)] +#[derive(Debug, Clone, Template)] #[template(path = "crate/details.html")] -#[derive(Debug, Clone, PartialEq)] struct CrateDetailsPage { version: Version, name: String, @@ -440,16 +454,18 @@ struct CrateDetailsPage { documentation_url: Option, repository_url: Option, repository_metadata: Option, - dependencies: Option, + dependencies: Vec, releases: Vec, readme: Option, build_status: BuildStatus, rustdoc_status: Option, is_library: Option, - last_successful_build: Option, + last_successful_build: Option, rustdoc: Option, // this is description_long in database source_size: Option, documentation_size: Option, + canonical_url: CanonicalUrl, + params: RustdocParams, } impl CrateDetailsPage { @@ -464,37 +480,29 @@ impl_axum_webpage! { cpu_intensive_rendering = true, } -#[derive(Deserialize, Clone, Debug)] -pub(crate) struct CrateDetailHandlerParams { - name: String, - version: Option, -} - #[tracing::instrument(skip(conn, storage))] pub(crate) async fn crate_details_handler( - Path(params): Path, + params: RustdocParams, Extension(storage): Extension>, mut conn: DbConnection, ) -> AxumResult { - let req_version = params.version.ok_or_else(|| { - AxumNope::Redirect( - EscapedURI::new( - &format!("/crate/{}/{}", ¶ms.name, ReqVersion::Latest), - None, - ), + if params.original_path() != params.crate_details_url().path() { + return Err(AxumNope::Redirect( + params.crate_details_url(), CachePolicy::ForeverInCdn, - ) - })?; + )); + } - let matched_release = match_version(&mut conn, ¶ms.name, &req_version) + let matched_release = match_version(&mut conn, params.name(), params.req_version()) .await? .assume_exact_name()? .into_canonical_req_version_or_else(|version| { AxumNope::Redirect( - EscapedURI::new(&format!("/crate/{}/{}", ¶ms.name, version), None), + params.clone().with_req_version(version).crate_details_url(), CachePolicy::ForeverInCdn, ) })?; + let params = params.apply_matched_release(&matched_release); let mut details = CrateDetails::from_matched_release(&mut conn, matched_release).await?; @@ -529,6 +537,8 @@ pub(crate) async fn crate_details_handler( .. } = details; + let is_latest_version = params.req_version().is_latest(); + let mut res = CrateDetailsPage { version, name, @@ -552,15 +562,22 @@ pub(crate) async fn crate_details_handler( rustdoc, source_size, documentation_size, + canonical_url: CanonicalUrl::from_uri( + params + .clone() + .with_req_version(ReqVersion::Latest) + .crate_details_url(), + ), + params, } .into_response(); res.extensions_mut() - .insert::(if req_version.is_latest() { + .insert::(if is_latest_version { CachePolicy::ForeverInCdn } else { CachePolicy::ForeverInCdnAndStaleInBrowser }); - Ok(res.into_response()) + Ok(res) } #[derive(Template)] @@ -568,9 +585,7 @@ pub(crate) async fn crate_details_handler( #[derive(Debug, Clone, PartialEq)] struct ReleaseList { releases: Vec, - crate_name: String, - inner_path: String, - target: String, + params: RustdocParams, } impl_axum_webpage! { @@ -581,16 +596,15 @@ impl_axum_webpage! { #[tracing::instrument] pub(crate) async fn get_all_releases( - Path(params): Path, + params: RustdocParams, mut conn: DbConnection, ) -> AxumResult { - // NOTE: we're getting RustDocHtmlParams here, where both target and path are optional. - // Due to how this handler is used in the `releases_list` macro, we always get both values. - // both values (when used in the topbar). - - let matched_release = match_version(&mut conn, ¶ms.name, ¶ms.version) + let params = params.with_page_kind(PageKind::Rustdoc); + // NOTE: we're getting RustDocParams here, where both target and path are optional. + let matched_release = match_version(&mut conn, params.name(), params.req_version()) .await? .into_canonical_req_version_or_else(|_| AxumNope::VersionNotFound)?; + let params = params.apply_matched_release(&matched_release); if matched_release.build_status() != BuildStatus::Success { // This handler should only be used for successful builds, so then we have all rows in the @@ -600,51 +614,20 @@ pub(crate) async fn get_all_releases( return Err(AxumNope::CrateNotFound); } - // NOTE: we don't check if the target exists here. - // If the target doesn't exist, the target-redirect will think - // it's part of the `inner_path`, don't find the file in storage, - // and redirect to a search. - let target = if let Some(req_target) = params.target { - format!("{req_target}/") - } else { - String::new() - }; - - let inner_path = params.path.unwrap_or_default(); - let inner_path = inner_path.trim_end_matches('/'); - Ok(ReleaseList { releases: matched_release.all_releases, - target, - inner_path: inner_path.to_string(), - crate_name: params.name, + params, } .into_response()) } -#[derive(Debug, Clone, PartialEq)] -struct ShortMetadata { - name: String, - version: Version, - req_version: ReqVersion, - doc_targets: Vec, -} - -impl ShortMetadata { - // Used in templates. - pub(crate) fn doc_targets(&self) -> Option<&[String]> { - Some(&self.doc_targets) - } -} - #[derive(Template)] #[template(path = "rustdoc/platforms.html")] #[derive(Debug, Clone, PartialEq)] struct PlatformList { - metadata: ShortMetadata, - inner_path: String, use_direct_platform_links: bool, current_target: String, + params: RustdocParams, } impl_axum_webpage! { @@ -655,140 +638,77 @@ impl_axum_webpage! { #[tracing::instrument] pub(crate) async fn get_all_platforms_inner( - Path(params): Path, + mut params: RustdocParams, mut conn: DbConnection, is_crate_root: bool, ) -> AxumResult { - let req_path: String = params.path.unwrap_or_default(); - let req_path: Vec<&str> = req_path.split('/').collect(); + if !is_crate_root { + params = params.with_page_kind(PageKind::Rustdoc); + } - let matched_release = match_version(&mut conn, ¶ms.name, ¶ms.version) + let matched_release = match_version(&mut conn, params.name(), params.req_version()) .await? .into_exactly_named_or_else(|corrected_name, req_version| { AxumNope::Redirect( - EscapedURI::new( - &format!( - "/platforms/{}/{}/{}", - corrected_name, - req_version, - req_path.join("/") - ), - None, - ), + params + .clone() + .with_name(corrected_name) + .with_req_version(req_version) + .platforms_partial_url(), CachePolicy::NoCaching, ) })? .into_canonical_req_version_or_else(|version| { AxumNope::Redirect( - EscapedURI::new( - &format!( - "/platforms/{}/{}/{}", - ¶ms.name, - version, - req_path.join("/") - ), - None, - ), + params + .clone() + .with_req_version(version) + .platforms_partial_url(), CachePolicy::ForeverInCdn, ) })?; + let params = params.apply_matched_release(&matched_release); - let krate = sqlx::query!( - "SELECT - releases.default_target, - releases.doc_targets - FROM releases - WHERE releases.id = $1;", - matched_release.id().0, - ) - .fetch_optional(&mut *conn) - .await? - .ok_or(AxumNope::CrateNotFound)?; - - if krate.doc_targets.is_none() - || krate.default_target.is_none() - || matched_release.target_name().is_none() - { + if !matched_release.build_status().is_success() { // when the build wasn't finished, we don't have any target platforms // we could read from. return Ok(PlatformList { - metadata: ShortMetadata { - name: params.name, - version: matched_release.version().clone(), - req_version: params.version.clone(), - doc_targets: Vec::new(), - }, - inner_path: "".into(), use_direct_platform_links: is_crate_root, current_target: "".into(), + params, } .into_response()); } - let doc_targets = MetaData::parse_doc_targets(krate.doc_targets.unwrap()); - - // The path within this crate version's rustdoc output - let inner; - let (target, inner_path) = { - let mut inner_path = req_path.clone(); - - let target = if inner_path.len() > 1 - && doc_targets - .iter() - .any(|s| Some(s) == params.target.as_ref()) - { - inner_path.remove(0); - params.target.as_ref().unwrap() - } else { - "" - }; - - inner = inner_path.join("/"); - (target, inner.trim_end_matches('/')) - }; - let inner_path = if inner_path.is_empty() { - format!("{}/index.html", matched_release.target_name().unwrap()) - } else { - format!("{}/{inner_path}", matched_release.target_name().unwrap()) - }; - let latest_release = latest_release(&matched_release.all_releases) .expect("we couldn't end up here without releases"); let current_target = if latest_release.build_status.is_success() { - if target.is_empty() { - krate.default_target.unwrap() - } else { - target.to_owned() - } + params + .doc_target_or_default() + .unwrap_or_default() + .to_owned() } else { String::new() }; Ok(PlatformList { - metadata: ShortMetadata { - name: params.name, - version: matched_release.version().clone(), - req_version: params.version.clone(), - doc_targets, - }, - inner_path, use_direct_platform_links: is_crate_root, current_target, + params, } .into_response()) } pub(crate) async fn get_all_platforms_root( - Path(mut params): Path, + params: RustdocParams, conn: DbConnection, ) -> AxumResult { - params.path = None; - get_all_platforms_inner(Path(params), conn, true).await + get_all_platforms_inner(params.with_inner_path(""), conn, true).await } pub(crate) async fn get_all_platforms( - params: Path, + params: RustdocParams, conn: DbConnection, ) -> AxumResult { get_all_platforms_inner(params, conn, false).await @@ -872,13 +792,13 @@ mod tests { db: &TestDatabase, package: &str, version: &str, - expected_last_successful_build: Option<&str>, + expected_last_successful_build: Option, ) -> Result<(), Error> { let mut conn = db.async_conn().await; let details = crate_details(&mut conn, package, version, None).await; anyhow::ensure!( - details.last_successful_build.as_deref() == expected_last_successful_build, + details.last_successful_build == expected_last_successful_build, "didn't expect {:?}", details.last_successful_build, ); @@ -971,9 +891,11 @@ mod tests { assert_last_successful_build_equals(db, "foo", "0.0.1", None).await?; assert_last_successful_build_equals(db, "foo", "0.0.2", None).await?; - assert_last_successful_build_equals(db, "foo", "0.0.3", Some("0.0.2")).await?; + assert_last_successful_build_equals(db, "foo", "0.0.3", Some("0.0.2".parse().unwrap())) + .await?; assert_last_successful_build_equals(db, "foo", "0.0.4", None).await?; - assert_last_successful_build_equals(db, "foo", "0.0.5", Some("0.0.2")).await?; + assert_last_successful_build_equals(db, "foo", "0.0.5", Some("0.0.2".parse().unwrap())) + .await?; Ok(()) }); } @@ -1045,7 +967,8 @@ mod tests { .await?; assert_last_successful_build_equals(db, "foo", "0.0.1", None).await?; - assert_last_successful_build_equals(db, "foo", "0.0.2", Some("0.0.4")).await?; + assert_last_successful_build_equals(db, "foo", "0.0.2", Some("0.0.4".parse().unwrap())) + .await?; assert_last_successful_build_equals(db, "foo", "0.0.3", None).await?; assert_last_successful_build_equals(db, "foo", "0.0.4", None).await?; Ok(()) @@ -1129,6 +1052,8 @@ mod tests { id: details.releases[0].id, target_name: Some("foo".to_owned()), release_time: None, + default_target: Some("x86_64-unknown-linux-gnu".into()), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".into()]), }, Release { version: semver::Version::parse("0.12.0")?, @@ -1139,6 +1064,8 @@ mod tests { id: details.releases[1].id, target_name: Some("foo".to_owned()), release_time: None, + default_target: Some("x86_64-unknown-linux-gnu".into()), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".into()]), }, Release { version: semver::Version::parse("0.3.0")?, @@ -1149,6 +1076,8 @@ mod tests { id: details.releases[2].id, target_name: Some("foo".to_owned()), release_time: None, + default_target: Some("x86_64-unknown-linux-gnu".into()), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".into()]), }, Release { version: semver::Version::parse("0.2.0")?, @@ -1159,6 +1088,8 @@ mod tests { id: details.releases[3].id, target_name: Some("foo".to_owned()), release_time: None, + default_target: Some("x86_64-unknown-linux-gnu".into()), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".into()]), }, Release { version: semver::Version::parse("0.2.0-alpha")?, @@ -1169,6 +1100,8 @@ mod tests { id: details.releases[4].id, target_name: Some("foo".to_owned()), release_time: None, + default_target: Some("x86_64-unknown-linux-gnu".into()), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".into()]), }, Release { version: semver::Version::parse("0.1.1")?, @@ -1179,6 +1112,8 @@ mod tests { id: details.releases[5].id, target_name: Some("foo".to_owned()), release_time: None, + default_target: Some("x86_64-unknown-linux-gnu".into()), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".into()]), }, Release { version: semver::Version::parse("0.1.0")?, @@ -1189,6 +1124,8 @@ mod tests { id: details.releases[6].id, target_name: Some("foo".to_owned()), release_time: None, + default_target: Some("x86_64-unknown-linux-gnu".into()), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".into()]), }, Release { version: semver::Version::parse("0.0.1")?, @@ -1199,6 +1136,8 @@ mod tests { id: details.releases[7].id, target_name: Some("foo".to_owned()), release_time: None, + default_target: Some("x86_64-unknown-linux-gnu".into()), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".into()]), }, ] ); @@ -1867,6 +1806,8 @@ mod tests { }) .collect(); + dbg!(&platform_links); + assert_eq!(platform_links.len(), 2); for (_, url, rel) in &platform_links { @@ -1889,7 +1830,7 @@ mod tests { url: &str, should_contain_redirect: bool, ) { - let response = env.web_app().await.get(url).await.unwrap(); + let response = env.web_app().await.get(dbg!(url)).await.unwrap(); let status = response.status(); assert!( status.is_success(), @@ -1899,7 +1840,11 @@ mod tests { response.redirect_target().unwrap_or_default(), ); let text = response.text().await.unwrap(); - let list1 = check_links(text.clone(), false, should_contain_redirect); + let list1 = dbg!(check_links( + text.clone(), + false, + dbg!(should_contain_redirect) + )); // Same test with AJAX endpoint. let platform_menu_url = kuchikiki::parse_html() @@ -1911,14 +1856,23 @@ mod tests { .get("data-url") .expect("data-url") .to_string(); - let response = env.web_app().await.get(&platform_menu_url).await.unwrap(); - assert!(response.status().is_success()); + let response = env + .web_app() + .await + .get(&dbg!(platform_menu_url)) + .await + .unwrap(); + assert!( + response.status().is_success(), + "{}", + response.text().await.unwrap() + ); response.assert_cache_control(CachePolicy::ForeverInCdn, env.config()); - let list2 = check_links( + let list2 = dbg!(check_links( response.text().await.unwrap(), true, should_contain_redirect, - ); + )); assert_eq!(list1, list2); } @@ -1943,12 +1897,7 @@ mod tests { run_check_links_redir(&env, "/crate/dummy/0.4.0", false).await; run_check_links_redir(&env, "/dummy/latest/dummy/", true).await; - run_check_links_redir( - &env, - "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/index.html", - true, - ) - .await; + run_check_links_redir(&env, "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/", true).await; run_check_links_redir( &env, "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/struct.A.html", @@ -1987,6 +1936,7 @@ mod tests { .rustdoc_file("dummy-ba/index.html") .rustdoc_file("x86_64-unknown-linux-gnu/dummy-ba/index.html") .add_target("x86_64-unknown-linux-gnu") + .default_target("aarch64-apple-darwin") .create() .await?; env.fake_release() @@ -1996,6 +1946,7 @@ mod tests { .rustdoc_file("dummy-ba/index.html") .rustdoc_file("x86_64-unknown-linux-gnu/dummy-ba/index.html") .add_target("x86_64-unknown-linux-gnu") + .default_target("aarch64-apple-darwin") .create() .await?; @@ -2014,8 +1965,8 @@ mod tests { &env, "/crate/dummy-ba/latest/menus/releases/dummy_ba/index.html", vec![ - "/crate/dummy-ba/0.5.0/target-redirect/dummy_ba/index.html".to_string(), - "/crate/dummy-ba/0.4.0/target-redirect/dummy_ba/index.html".to_string(), + "/crate/dummy-ba/0.5.0/target-redirect/dummy_ba/".to_string(), + "/crate/dummy-ba/0.4.0/target-redirect/dummy_ba/".to_string(), ], ) .await; @@ -2024,8 +1975,8 @@ mod tests { &env, "/crate/dummy-ba/latest/menus/releases/x86_64-unknown-linux-gnu/dummy_ba/index.html", vec![ - "/crate/dummy-ba/0.5.0/target-redirect/x86_64-unknown-linux-gnu/dummy_ba/index.html".to_string(), - "/crate/dummy-ba/0.4.0/target-redirect/x86_64-unknown-linux-gnu/dummy_ba/index.html".to_string(), + "/crate/dummy-ba/0.5.0/target-redirect/x86_64-unknown-linux-gnu/dummy_ba/".to_string(), + "/crate/dummy-ba/0.4.0/target-redirect/x86_64-unknown-linux-gnu/dummy_ba/".to_string(), ], ).await; diff --git a/src/web/error.rs b/src/web/error.rs index 7930367a6..70dec0844 100644 --- a/src/web/error.rs +++ b/src/web/error.rs @@ -1,9 +1,9 @@ use crate::{ db::PoolError, storage::PathNotFoundError, - web::{cache::CachePolicy, encode_url_path, releases::Search}, + web::{AxumErrorPage, cache::CachePolicy, escaped_uri::EscapedURI, releases::Search}, }; -use anyhow::anyhow; +use anyhow::{Result, anyhow}; use axum::{ Json, http::StatusCode, @@ -12,26 +12,6 @@ use axum::{ use std::borrow::Cow; use tracing::error; -use super::AxumErrorPage; - -#[derive(Debug)] -pub struct EscapedURI(String); - -impl EscapedURI { - pub fn new(path: &str, query: Option<&str>) -> Self { - let mut path = encode_url_path(path); - if let Some(query) = query { - path.push('?'); - path.push_str(query); - } - Self(path) - } - - pub fn as_str(&self) -> &str { - self.0.as_str() - } -} - #[derive(Debug, thiserror::Error)] pub enum AxumNope { #[error("Requested resource not found")] @@ -147,7 +127,7 @@ struct ErrorInfo { } fn redirect_with_policy(target: EscapedURI, cache_policy: CachePolicy) -> AxumResponse { - match super::axum_cached_redirect(target.0, cache_policy) { + match super::axum_cached_redirect(target, cache_policy) { Ok(response) => response.into_response(), Err(err) => AxumNope::InternalError(err).into_response(), } @@ -251,7 +231,7 @@ mod tests { #[test] fn test_redirect_error_encodes_url_path() { let response = AxumNope::Redirect( - EscapedURI::new("/something>", None), + EscapedURI::from_path("/something>"), CachePolicy::ForeverInCdnAndBrowser, ) .into_response(); diff --git a/src/web/escaped_uri.rs b/src/web/escaped_uri.rs new file mode 100644 index 000000000..6512ce58b --- /dev/null +++ b/src/web/escaped_uri.rs @@ -0,0 +1,460 @@ +use crate::web::encode_url_path; +use askama::filters::HtmlSafe; +use http::{Uri, uri::PathAndQuery}; +use std::{borrow::Borrow, fmt::Display, iter, str::FromStr}; +use url::form_urlencoded; + +/// internal wrapper around `http::Uri` with some convenience functions. +/// +/// Ensures that the path part is always properly percent-encoded, including some characters +/// that http::Uri would allow, but we still want to encode, like umlauts. +/// +/// Also we support fragments, with http::Uri doesn't support yet. +/// See https://github.com/hyperium/http/issues/775 +#[derive(Debug, Clone, PartialEq)] +pub struct EscapedURI { + uri: Uri, + fragment: Option, +} + +impl EscapedURI { + pub fn from_uri(uri: Uri) -> Self { + if uri.path_and_query().is_some() { + let encoded_path = encode_url_path(uri.path()); + if uri.path() == encoded_path { + Self { + uri, + fragment: None, + } + } else { + // path needs additional encoding + let mut parts = uri.into_parts(); + + parts.path_and_query = Some( + PathAndQuery::from_maybe_shared( + parts + .path_and_query + .take() + .map(|pq| { + format!( + "{}{}", + encoded_path, + pq.query().map(|q| format!("?{}", q)).unwrap_or_default(), + ) + }) + .unwrap_or_default(), + ) + .expect("can't fail since we encode the path ourselves"), + ); + + Self { + uri: Uri::from_parts(parts) + .expect("everything is coming from a previous Uri, or encoded here"), + fragment: None, + } + } + } else { + Self { + uri, + fragment: None, + } + } + } + + pub fn from_path(path: impl AsRef) -> Self { + Self { + uri: Uri::builder() + .path_and_query(encode_url_path(path.as_ref())) + .build() + .expect("this can never fail because we encode the path"), + fragment: None, + } + } + + pub fn from_path_and_raw_query( + path: impl AsRef, + raw_query: Option>, + ) -> Self { + Self::from_path(path).append_raw_query(raw_query) + } + + #[cfg(test)] + pub(crate) fn from_path_and_query(path: P, queries: I) -> Self + where + P: AsRef, + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef, + V: AsRef, + { + Self::from_path(path).append_query_pairs(queries) + } + + pub fn scheme(&self) -> Option<&http::uri::Scheme> { + self.uri.scheme() + } + + pub fn authority(&self) -> Option<&http::uri::Authority> { + self.uri.authority() + } + + pub fn path(&self) -> &str { + self.uri.path() + } + + pub fn query(&self) -> Option<&str> { + self.uri.query() + } + + pub fn fragment(&self) -> Option<&str> { + self.fragment.as_deref() + } + + /// extend the query part of the Uri with the given raw query string. + /// + /// Will parse & re-encode the string, which is why the method is infallible (I think) + pub fn append_raw_query(self, raw_query: Option>) -> Self { + let raw_query = match raw_query { + Some(ref q) => q.as_ref(), + None => return self, + }; + + self.append_query_pairs(form_urlencoded::parse(raw_query.as_bytes())) + } + + pub fn append_query_pairs(self, new_query_args: I) -> Self + where + I: IntoIterator, + I::Item: Borrow<(K, V)>, + K: AsRef, + V: AsRef, + { + let mut new_query_args = new_query_args.into_iter().peekable(); + if new_query_args.peek().is_none() { + return self; + } + + let mut serializer = form_urlencoded::Serializer::new(String::new()); + + if let Some(existing_query_args) = self.uri.query() { + serializer.extend_pairs(form_urlencoded::parse(existing_query_args.as_bytes())); + } + + serializer.extend_pairs(new_query_args); + + let mut parts = self.uri.into_parts(); + + parts.path_and_query = Some( + PathAndQuery::from_maybe_shared(format!( + "{}?{}", + parts + .path_and_query + .map(|pg| pg.path().to_owned()) + .unwrap_or_default(), + serializer.finish(), + )) + .expect("can't fail since all the data is either coming from a previous Uri, or we encode it ourselves") + ); + + Self::from_uri( + Uri::from_parts(parts).expect( + "can't fail since data is either coming from an Uri, or encoded by ourselves.", + ), + ) + } + + /// extend query part + pub fn append_query_pair(self, key: impl AsRef, value: impl AsRef) -> Self { + self.append_query_pairs(iter::once((key, value))) + } + + pub fn into_inner(self) -> Uri { + self.uri + } + + pub(crate) fn with_fragment(mut self, fragment: impl AsRef) -> Self { + self.fragment = Some(encode_url_path(fragment.as_ref())); + self + } +} + +impl FromStr for EscapedURI { + type Err = http::uri::InvalidUri; + + fn from_str(s: &str) -> Result { + if let Some((base, fragment)) = s.split_once('#') { + Ok(Self::from_uri(base.parse()?).with_fragment(fragment)) + } else { + Ok(Self::from_uri(s.parse()?)) + } + } +} + +impl Display for EscapedURI { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.uri)?; + if let Some(ref fragment) = self.fragment { + write!(f, "#{}", fragment)?; + } + Ok(()) + } +} + +impl HtmlSafe for EscapedURI {} + +impl TryFrom for Uri { + type Error = anyhow::Error; + + fn try_from(value: EscapedURI) -> Result { + if let Some(fragment) = value.fragment { + Err(anyhow::anyhow!( + "can't convert EscapedURI with fragment '{}' into Uri", + fragment + )) + } else { + Ok(value.uri) + } + } +} + +impl From for EscapedURI { + fn from(value: Uri) -> Self { + Self::from_uri(value) + } +} + +impl PartialEq for &EscapedURI { + fn eq(&self, other: &String) -> bool { + *self == other + } +} + +impl PartialEq for EscapedURI { + fn eq(&self, other: &String) -> bool { + >::eq(self, other) + } +} + +impl PartialEq<&str> for EscapedURI { + fn eq(&self, other: &&str) -> bool { + >::eq(self, other) + } +} + +impl PartialEq for EscapedURI { + fn eq(&self, other: &str) -> bool { + if let Some((other_uri, other_fragment)) = other.split_once('#') { + self.uri == other_uri && self.fragment.as_deref() == Some(other_fragment) + } else { + self.uri == other && self.fragment.is_none() + } + } +} + +#[cfg(test)] +mod tests { + use super::EscapedURI; + use crate::web::{cache::CachePolicy, error::AxumNope}; + use axum::response::IntoResponse as _; + use http::Uri; + use test_case::test_case; + + fn test_serialization_roundtrip(input: &EscapedURI) { + let s = input.to_string(); + assert_eq!(input, s); // tests the ParialEq impl + assert_eq!(s.parse::().unwrap(), *input); + } + + #[test] + fn test_redirect_error_encodes_url_path() { + let response = AxumNope::Redirect( + EscapedURI::from_path("/something>"), + CachePolicy::ForeverInCdnAndBrowser, + ) + .into_response(); + + assert_eq!(response.status(), 302); + assert_eq!(response.headers().get("Location").unwrap(), "/something%3E"); + } + + #[test_case("/something" => "/something")] + #[test_case("/something>" => "/something%3E")] + fn test_escaped_uri_encodes_from_path(input: &str) -> String { + let escaped = EscapedURI::from_path(input); + test_serialization_roundtrip(&escaped); + escaped.path().to_owned() + } + + #[test_case("/something" => "/something"; "plain path")] + #[test_case("/somethingäöü" => "/something%C3%A4%C3%B6%C3%BC"; "path with umlauts")] + fn test_escaped_uri_encodes_path_from_uri(path: &str) -> String { + let uri: Uri = path.parse().unwrap(); + let escaped = EscapedURI::from_uri(uri); + test_serialization_roundtrip(&escaped); + escaped.path().to_string() + } + + #[test] + fn test_escaped_uri_from_uri_with_query_args() { + let uri: Uri = "/something?key=value&foo=bar".parse().unwrap(); + let escaped = EscapedURI::from_uri(uri); + test_serialization_roundtrip(&escaped); + assert_eq!(escaped.path(), "/something"); + assert_eq!(escaped.query(), Some("key=value&foo=bar")); + } + + #[test] + fn test_escaped_uri_from_uri_with_query_args_and_fragment() { + let input = "/something?key=value&foo=bar#frag"; + let escaped: EscapedURI = input.parse().unwrap(); + test_serialization_roundtrip(&escaped); + assert_eq!(escaped.path(), "/something"); + assert_eq!(escaped.query(), Some("key=value&foo=bar")); + assert_eq!(escaped.fragment(), Some("frag")); + assert_eq!(escaped.to_string(), input); + } + + #[test] + fn test_escaped_uri_from_uri_with_query_args_and_fragment_to_encode() { + let input = "/something?key=value&foo=bar#fräöag"; + let escaped: EscapedURI = input.parse().unwrap(); + test_serialization_roundtrip(&escaped); + assert_eq!(escaped.path(), "/something"); + assert_eq!(escaped.query(), Some("key=value&foo=bar")); + assert_eq!(escaped.fragment(), Some("fr%C3%A4%C3%B6ag")); + assert_eq!( + escaped.to_string(), + "/something?key=value&foo=bar#fr%C3%A4%C3%B6ag" + ); + } + + #[test_case("/something>")] + #[test_case("/something?key=().is_err()); + } + + #[test_case( + "/something", "key=value&foo=bar" + => ("/something".into(), "key=value&foo=bar".into()); + "plain convert" + )] + #[test_case( + "/something", "value=foo\rbar&key= ("/something".into(), "value=foo%0Dbar&key=%3Cvalue".into()); + "invalid query gets re-encoded without error" + )] + fn test_escaped_uri_from_raw_query(path: &str, query: &str) -> (String, String) { + let uri = EscapedURI::from_path_and_raw_query(path, Some(query)); + test_serialization_roundtrip(&uri); + + (uri.path().to_owned(), uri.query().unwrap().to_owned()) + } + + #[test] + fn test_escaped_uri_from_query() { + let uri = + EscapedURI::from_path_and_query("/something", &[("key", "value"), ("foo", "bar")]); + test_serialization_roundtrip(&uri); + + assert_eq!(uri.path(), "/something"); + assert_eq!(uri.query(), Some("key=value&foo=bar")); + } + + #[test] + fn test_escaped_uri_from_query_with_chars_to_encode() { + let uri = + EscapedURI::from_path_and_query("/something", &[("key", "value>"), ("foo", "\rbar")]); + test_serialization_roundtrip(&uri); + + assert_eq!(uri.path(), "/something"); + assert_eq!(uri.query(), Some("key=value%3E&foo=%0Dbar")); + } + + #[test] + fn test_escaped_uri_append_query_pairs_without_path() { + let uri = Uri::builder().build().unwrap(); + + let parts = uri.into_parts(); + // `append_query_pairs` has a special case when path_and_query is `None`, + // which I want to test here. + assert!(parts.path_and_query.is_none()); + + // also tests appending query pairs if there are no existing query args + let uri = EscapedURI::from_uri(Uri::from_parts(parts).unwrap()) + .append_query_pairs(&[("foo", "bar"), ("bar", "baz")]); + test_serialization_roundtrip(&uri); + + assert_eq!(uri.path(), "/"); + assert_eq!(uri.query(), Some("foo=bar&bar=baz")); + } + + #[test] + fn test_escaped_uri_append_query_pairs() { + let uri = EscapedURI::from_path_and_query("/something", &[("key", "value")]) + .append_query_pairs(&[("foo", "bar"), ("bar", "baz")]) + .append_query_pair("last", "one"); + test_serialization_roundtrip(&uri); + + assert_eq!(uri.path(), "/something"); + assert_eq!(uri.query(), Some("key=value&foo=bar&bar=baz&last=one")); + } + + #[test] + fn test_escaped_uri_append_fragment() { + let uri = EscapedURI::from_path("/something").with_fragment("some-fragment"); + test_serialization_roundtrip(&uri); + + assert_eq!(uri.path(), "/something"); + assert_eq!(uri.query(), None); + assert_eq!(uri.fragment(), Some("some-fragment")); + assert_eq!(uri.to_string(), "/something#some-fragment"); + } + + #[test] + fn test_escaped_uri_append_fragment_encode() { + let uri = EscapedURI::from_path("/something").with_fragment("some-äö-fragment"); + test_serialization_roundtrip(&uri); + + assert_eq!(uri.path(), "/something"); + assert_eq!(uri.query(), None); + assert_eq!(uri.fragment(), Some("some-%C3%A4%C3%B6-fragment")); + assert_eq!(uri.to_string(), "/something#some-%C3%A4%C3%B6-fragment"); + } + + #[test] + fn test_escaped_uri_replace_fragment() { + let uri = EscapedURI::from_path("/something") + .with_fragment("some-fragment") + .with_fragment("other-fragment"); + + test_serialization_roundtrip(&uri); + + assert_eq!(uri.path(), "/something"); + assert_eq!(uri.query(), None); + assert_eq!(uri.fragment(), Some("other-fragment")); + assert_eq!(uri.to_string(), "/something#other-fragment"); + } + + #[test] + fn test_comparision() { + let uri = EscapedURI::from_path("/something").with_fragment("other-fragment"); + + test_serialization_roundtrip(&uri); + + assert_eq!(uri.path(), "/something"); + assert_eq!(uri.query(), None); + assert_eq!(uri.fragment(), Some("other-fragment")); + assert_eq!(uri.to_string(), "/something#other-fragment"); + } + + #[test] + fn test_not_eq() { + let uri = EscapedURI::from_path("/something").with_fragment("other-fragment"); + assert_ne!(uri, "/something"); + } +} diff --git a/src/web/extractors/context.rs b/src/web/extractors/context.rs new file mode 100644 index 000000000..a0594c04b --- /dev/null +++ b/src/web/extractors/context.rs @@ -0,0 +1,53 @@ +//! a collection of custom extractors related to our app-context (context::Context) + +use crate::{ + db::{AsyncPoolClient, Pool}, + web::error::AxumNope, +}; +use anyhow::Context as _; +use axum::{ + RequestPartsExt, + extract::{Extension, FromRequestParts}, + http::request::Parts, +}; +use std::ops::{Deref, DerefMut}; + +/// Extractor for a async sqlx database connection. +/// Can be used in normal axum handlers, middleware, or other extractors. +/// +/// For now, we will retrieve a new connection each time the extractor is used. +/// +/// This could be optimized in the future by caching the connection as a request +/// extension, so one request only uses on connection. +#[derive(Debug)] +pub(crate) struct DbConnection(AsyncPoolClient); + +impl FromRequestParts for DbConnection +where + S: Send + Sync, +{ + type Rejection = AxumNope; + + async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result { + let Extension(pool) = parts + .extract::>() + .await + .context("could not extract pool extension")?; + + Ok(Self(pool.get_async().await?)) + } +} + +impl Deref for DbConnection { + type Target = sqlx::PgConnection; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for DbConnection { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/src/web/extractors/mod.rs b/src/web/extractors/mod.rs new file mode 100644 index 000000000..09edabfb7 --- /dev/null +++ b/src/web/extractors/mod.rs @@ -0,0 +1,6 @@ +mod context; +mod path; +pub(crate) mod rustdoc; + +pub(crate) use context::DbConnection; +pub(crate) use path::{Path, PathFileExtension}; diff --git a/src/web/extractors.rs b/src/web/extractors/path.rs similarity index 78% rename from src/web/extractors.rs rename to src/web/extractors/path.rs index 639e84aa0..62e2aecb6 100644 --- a/src/web/extractors.rs +++ b/src/web/extractors/path.rs @@ -1,53 +1,11 @@ -use crate::db::{AsyncPoolClient, Pool}; -use anyhow::{Context as _, anyhow}; +//! custom axum extractors for path parameters +use crate::web::error::AxumNope; +use anyhow::anyhow; use axum::{ RequestPartsExt, - extract::{Extension, FromRequestParts, OptionalFromRequestParts}, + extract::{FromRequestParts, OptionalFromRequestParts}, http::request::Parts, }; -use std::ops::{Deref, DerefMut}; - -use super::error::AxumNope; - -/// Extractor for a async sqlx database connection. -/// Can be used in normal axum handlers, middleware, or other extractors. -/// -/// For now, we will retrieve a new connection each time the extractor is used. -/// -/// This could be optimized in the future by caching the connection as a request -/// extension, so one request only uses on connection. -#[derive(Debug)] -pub(crate) struct DbConnection(AsyncPoolClient); - -impl FromRequestParts for DbConnection -where - S: Send + Sync, -{ - type Rejection = AxumNope; - - async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result { - let Extension(pool) = parts - .extract::>() - .await - .context("could not extract pool extension")?; - - Ok(Self(pool.get_async().await?)) - } -} - -impl Deref for DbConnection { - type Target = sqlx::PgConnection; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for DbConnection { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} /// custom axum `Path` extractor that uses our own AxumNope::BadRequest /// as error response instead of a plain text "bad request" @@ -139,8 +97,6 @@ where } } -// TODO: we will write tests for this when async db tests are working - #[cfg(test)] mod tests { use super::*; diff --git a/src/web/extractors/rustdoc.rs b/src/web/extractors/rustdoc.rs new file mode 100644 index 000000000..d4724bbcb --- /dev/null +++ b/src/web/extractors/rustdoc.rs @@ -0,0 +1,1601 @@ +//! special rustdoc extractors + +use crate::{ + db::BuildId, + web::{ + MatchedRelease, MetaData, ReqVersion, error::AxumNope, escaped_uri::EscapedURI, + extractors::Path, + }, +}; +use anyhow::Result; +use axum::{ + RequestPartsExt, + extract::{FromRequestParts, MatchedPath}, + http::{Uri, request::Parts}, +}; +use itertools::Itertools as _; +use serde::Deserialize; +use std::borrow::Cow; + +static INDEX_HTML: &str = "index.html"; +static FOLDER_AND_INDEX_HTML: &str = "/index.html"; + +#[derive(Clone, Debug, PartialEq)] +pub(crate) enum PageKind { + Rustdoc, + Source, +} + +/// Extractor for rustdoc parameters from a request. +/// +/// Among other things, centralizes +/// * how we parse & interpret rustdoc related URL alements +/// * how we generate rustdoc related URLs shown in interefaces. +/// * if there is one, where to find the related file in the rustdoc build output. +/// +/// All of these have more or less detail depending on how much metadata we have here. +/// Maintains some additional fields containing "fixed" things, whos quality +/// gets better the more metadata we provide. +#[derive(Clone, PartialEq)] +pub(crate) struct RustdocParams { + // optional behaviour marker + page_kind: Option, + + original_uri: Option, + name: String, + req_version: ReqVersion, + doc_target: Option, + inner_path: Option, + static_route_suffix: Option, + + doc_targets: Option>, + default_target: Option, + target_name: Option, + + merged_inner_path: Option, +} + +impl std::fmt::Debug for RustdocParams { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RustdocParams") + .field("page_kind", &self.page_kind) + .field("original_uri", &self.original_uri) + .field("name", &self.name) + .field("req_version", &self.req_version) + .field("doc_target", &self.doc_target) + .field("inner_path", &self.inner_path) + .field("doc_targets", &self.doc_targets) + .field("default_target", &self.default_target) + .field("target_name", &self.target_name) + .field("static_route_suffix", &self.static_route_suffix) + .field("merged_inner_path", &self.merged_inner_path) + // also include some method outputs + .field("rustdoc_url()", &self.rustdoc_url()) + .field("crate_details_url()", &self.crate_details_url()) + .field("platforms_partial_url()", &self.platforms_partial_url()) + .field("releases_partial_url()", &self.releases_partial_url()) + .field("builds_url()", &self.builds_url()) + .field("build_status_url()", &self.build_status_url()) + .field( + "build_details_url(42, None)", + &self.build_details_url(BuildId(42), None), + ) + .field( + "build_details_url(42, Some(\"log.txt\")", + &self.build_details_url(BuildId(42), Some("log.txt")), + ) + .field("features_url()", &self.features_url()) + .field("source_url()", &self.source_url()) + .field("target_redirect_url()", &self.target_redirect_url()) + .field("storage_path()", &self.storage_path()) + .field("generate_fallback_url()", &self.generate_fallback_url()) + .field("path_is_folder()", &self.path_is_folder()) + .field("file_extension()", &self.file_extension()) + .finish() + } +} + +/// the parameters that might come as url parameters via route. +/// All except the crate name are optional or have a default, +/// so this extractor can be used in many handlers with a variety of +/// specificity of the route. +#[derive(Deserialize, Debug)] +struct UrlParams { + pub name: String, + #[serde(default)] + pub version: ReqVersion, + pub target: Option, + pub path: Option, +} + +impl FromRequestParts for RustdocParams +where + S: Send + Sync, +{ + type Rejection = AxumNope; + + /// extract rustdoc parameters from request parts. + /// + /// For now, we're using specificially named path parameters, most are optional: + /// * `{name}` (mandatory) => crate name + /// * `{version}` (optional) => request version + /// * `{target}` (optional) => doc target + /// * `{path}` (optional) => inner path + /// + /// We also extract & store the original URI, and also use it to find a potential static + /// route stuffix (e.g. the `/settings.html` in the `/{krate}/{version}/settings.html` route). + async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result { + let Path(params) = parts + .extract::>() + .await + .map_err(|err| AxumNope::BadRequest(err.into()))?; + + let original_uri = parts.extract::().await.expect("infallible extractor"); + + let static_route_suffix = { + let uri_path = url_decode(original_uri.path()).map_err(AxumNope::BadRequest)?; + + let matched_path = parts + .extract::() + .await + .map_err(|err| AxumNope::BadRequest(err.into()))?; + let matched_route = url_decode(matched_path.as_str()).map_err(AxumNope::BadRequest)?; + + find_static_route_suffix(&matched_route, &uri_path) + }; + + Ok(RustdocParams::new(params.name) + .with_req_version(params.version) + .with_maybe_doc_target(params.target) + .with_maybe_inner_path(params.path) + .with_original_uri(original_uri) + .with_maybe_static_route_suffix(static_route_suffix)) + } +} + +/// Builder-style methods to create & update the parameters. +#[allow(dead_code)] +impl RustdocParams { + pub(crate) fn new(name: impl Into) -> Self { + Self { + name: name.into().trim().into(), + req_version: ReqVersion::default(), + original_uri: None, + doc_target: None, + inner_path: None, + page_kind: None, + static_route_suffix: None, + doc_targets: None, + default_target: None, + target_name: None, + merged_inner_path: None, + } + } + + fn try_update(self, f: F) -> Result + where + F: FnOnce(Self) -> Result, + { + let mut new = f(self)?; + new.parse(); + Ok(new) + } + + fn update(self, f: F) -> Self + where + F: FnOnce(Self) -> Self, + { + self.try_update(|mut params| { + params = f(params); + Ok(params) + }) + .expect("infallible") + } + + pub(crate) fn from_metadata(metadata: &MetaData) -> Self { + RustdocParams::new(&metadata.name).apply_metadata(metadata) + } + + pub(crate) fn apply_metadata(self, metadata: &MetaData) -> RustdocParams { + self.with_name(&metadata.name) + .with_req_version(&metadata.req_version) + // first set the doc-target list + .with_maybe_doc_targets(metadata.doc_targets.clone()) + // then the default target, so we can validate it. + .with_maybe_default_target(metadata.default_target.as_deref()) + .with_maybe_target_name(metadata.target_name.as_deref()) + } + + pub(crate) fn from_matched_release(matched_release: &MatchedRelease) -> Self { + RustdocParams::new(&matched_release.name).apply_matched_release(matched_release) + } + + pub(crate) fn apply_matched_release(self, matched_release: &MatchedRelease) -> RustdocParams { + let release = &matched_release.release; + self.with_name(&matched_release.name) + .with_req_version(&matched_release.req_version) + .with_maybe_doc_targets(release.doc_targets.as_deref()) + .with_maybe_default_target(release.default_target.as_deref()) + .with_maybe_target_name(release.target_name.as_deref()) + } + + pub(crate) fn name(&self) -> &str { + &self.name + } + pub(crate) fn with_name(self, name: impl Into) -> Self { + self.update(|mut params| { + params.name = name.into().trim().into(); + params + }) + } + + pub(crate) fn req_version(&self) -> &ReqVersion { + &self.req_version + } + pub(crate) fn with_req_version(self, version: impl Into) -> Self { + self.update(|mut params| { + params.req_version = version.into(); + params + }) + } + #[cfg(test)] + pub(crate) fn try_with_req_version(self, version: V) -> Result + where + V: TryInto, + V::Error: std::error::Error + Send + Sync + 'static, + { + use anyhow::Context as _; + self.try_update(|mut params| { + params.req_version = version.try_into().context("couldn't parse version")?; + Ok(params) + }) + } + + pub(crate) fn inner_path(&self) -> &str { + if self.page_kind == Some(PageKind::Rustdoc) + && let Some(merged_inner_path) = self.merged_inner_path.as_deref() + { + merged_inner_path + } else { + self.inner_path.as_deref().unwrap_or_default() + } + } + pub(crate) fn with_inner_path(self, inner_path: impl Into) -> Self { + self.with_maybe_inner_path(Some(inner_path)) + } + pub(crate) fn with_maybe_inner_path(self, inner_path: Option>) -> Self { + self.update(|mut params| { + params.inner_path = inner_path.map(|t| t.into().trim().to_owned()); + params + }) + } + + pub(crate) fn original_uri(&self) -> Option<&Uri> { + self.original_uri.as_ref() + } + pub(crate) fn with_original_uri(self, original_uri: impl Into) -> Self { + self.with_maybe_original_uri(Some(original_uri)) + } + pub(crate) fn with_maybe_original_uri(self, original_uri: Option>) -> Self { + self.update(|mut params| { + params.original_uri = original_uri.map(Into::into); + params + }) + } + #[cfg(test)] + pub(crate) fn try_with_original_uri(self, original_uri: V) -> Result + where + V: TryInto, + V::Error: std::error::Error + Send + Sync + 'static, + { + use anyhow::Context as _; + self.try_update(|mut params| { + params.original_uri = Some(original_uri.try_into().context("couldn't parse uri")?); + Ok(params) + }) + } + pub(crate) fn file_extension(&self) -> Option<&str> { + self.original_uri() + .as_ref() + .and_then(|uri| get_file_extension(uri.path())) + } + pub(crate) fn original_path(&self) -> &str { + self.original_uri() + .as_ref() + .map(|p| p.path()) + .unwrap_or_default() + } + pub(crate) fn path_is_folder(&self) -> bool { + path_is_folder(self.original_path()) + } + + pub(crate) fn page_kind(&self) -> Option<&PageKind> { + self.page_kind.as_ref() + } + pub(crate) fn with_page_kind(self, page_kind: impl Into) -> Self { + self.with_maybe_page_kind(Some(page_kind)) + } + pub(crate) fn with_maybe_page_kind(self, page_kind: Option>) -> Self { + self.update(|mut params| { + params.page_kind = page_kind.map(Into::into); + params + }) + } + + pub(crate) fn default_target(&self) -> Option<&str> { + self.default_target.as_deref() + } + pub(crate) fn with_default_target(self, default_target: impl Into) -> Self { + self.with_maybe_default_target(Some(default_target)) + } + pub(crate) fn with_maybe_default_target( + self, + default_target: Option>, + ) -> Self { + self.update(|mut params| { + params.default_target = default_target.map(Into::into); + params + }) + } + + pub(crate) fn target_name(&self) -> Option<&str> { + self.target_name.as_deref() + } + pub(crate) fn with_target_name(self, target_name: impl Into) -> Self { + self.with_maybe_target_name(Some(target_name)) + } + pub(crate) fn with_maybe_target_name(self, target_name: Option>) -> Self { + self.update(|mut params| { + params.target_name = target_name.map(Into::into); + params + }) + } + + #[cfg(test)] + pub(crate) fn with_static_route_suffix(self, static_route_suffix: impl Into) -> Self { + self.with_maybe_static_route_suffix(Some(static_route_suffix)) + } + pub(crate) fn with_maybe_static_route_suffix( + self, + static_route_suffix: Option>, + ) -> Self { + self.update(|mut params| { + params.static_route_suffix = static_route_suffix.map(Into::into); + params + }) + } + + pub(crate) fn doc_target(&self) -> Option<&str> { + self.doc_target.as_deref() + } + pub(crate) fn with_doc_target(self, doc_target: impl Into) -> Self { + self.with_maybe_doc_target(Some(doc_target)) + } + /// set the "doc taget" parameter. + /// Might not be a target, depending on how it's generated. + pub(crate) fn with_maybe_doc_target(self, doc_target: Option>) -> Self { + self.update(|mut params| { + params.doc_target = doc_target.map(Into::into); + params + }) + } + + pub(crate) fn doc_targets(&self) -> Option<&[String]> { + self.doc_targets.as_deref() + } + pub(crate) fn with_doc_targets( + self, + doc_targets: impl IntoIterator>, + ) -> Self { + self.with_maybe_doc_targets(Some(doc_targets)) + } + pub(crate) fn with_maybe_doc_targets( + self, + doc_targets: Option>>, + ) -> Self { + self.update(|mut params| { + params.doc_targets = + doc_targets.map(|doc_targets| doc_targets.into_iter().map(Into::into).collect()); + params + }) + } + + pub(crate) fn doc_target_or_default(&self) -> Option<&str> { + self.doc_target().or(self.default_target.as_deref()) + } + + /// check if we have a target component in the path, that matches the default + /// target. This affects the geneated storage path, since default target docs are at the root, + /// and the other target docs are in subfolders named after the target. + pub(crate) fn target_is_default(&self) -> bool { + self.default_target + .as_deref() + .is_some_and(|t| self.doc_target() == Some(t)) + } +} + +/// parser methods +impl RustdocParams { + fn fix_target_and_path(&mut self) { + let Some(doc_targets) = &self.doc_targets else { + // no doc targets given, so we can't fix anything here. + return; + }; + + let is_valid_target = |t: &str| doc_targets.iter().any(|s| s == t); + + let inner_path = self + .inner_path + .as_deref() + .unwrap_or("") + .trim_start_matches('/') + .trim() + .to_string(); + + let (doc_target, inner_path) = if let Some(given_target) = self + .doc_target + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + { + if is_valid_target(given_target) { + (Some(given_target.to_string()), inner_path) + } else { + // The given `doc_target` is not in the list of valid targets, + // so we assume it's part of the path. + let path = if inner_path.is_empty() { + if self.original_path().ends_with('/') { + format!("{}/", given_target) + } else { + given_target.to_string() + } + } else { + format!("{}/{}", given_target, inner_path) + }; + (None, path) + } + } else { + // No `doc_target` was given, so we try to extract it from the first component of the path. + if let Some((potential_target, rest)) = inner_path.split_once('/') { + if is_valid_target(potential_target) { + (Some(potential_target.to_string()), rest.to_string()) + } else { + // The first path component is not a valid target. + (None, inner_path) + } + } else { + // The path has no slashes, so the whole path could be a target. + if is_valid_target(&inner_path) { + (Some(inner_path), String::new()) + } else { + (None, inner_path) + } + } + }; + + debug_assert!( + doc_target + .as_ref() + .is_none_or(|t| { !t.is_empty() && !t.contains('/') && t.contains('-') }), + "doc-target {:?} has to be non-empty, shouldn't contain slashes, but has dashes", + doc_target + ); + + debug_assert!(!inner_path.starts_with('/')); // we should trim leading slashes + + self.inner_path = Some(inner_path); + self.doc_target = doc_target; + } + + /// convert the raw rustdoc parameters from the request to a "parsed" version, using additional + /// information from release metadata. + /// + /// Will also validate & fix the given `doc_target` URL parameter. + fn parse(&mut self) { + self.fix_target_and_path(); + + self.merged_inner_path = None; + + // for rustdoc pages we are merging the inner path from the URL and any potential + // static suffix on the route. For other page kinds we do not want this. + if self.page_kind == Some(PageKind::Rustdoc) + && let Some(suffix) = self + .static_route_suffix + .as_deref() + .filter(|s| !s.is_empty()) + { + let mut result = self.inner_path().to_owned(); + if !result.is_empty() { + result.push('/'); + } + result.push_str(suffix); + self.merged_inner_path = Some(result); + } + } +} + +/// URL & path generation for the given params. +impl RustdocParams { + pub(crate) fn rustdoc_url(&self) -> EscapedURI { + generate_rustdoc_url(&self.name, &self.req_version, &self.path_for_rustdoc_url()) + } + + pub(crate) fn crate_details_url(&self) -> EscapedURI { + EscapedURI::from_path(format!("/crate/{}/{}", self.name, self.req_version)) + } + + pub(crate) fn platforms_partial_url(&self) -> EscapedURI { + EscapedURI::from_path(format!( + "/crate/{}/{}/menus/platforms/{}", + self.name, + self.req_version, + self.path_for_rustdoc_url_for_partials() + )) + } + + pub(crate) fn releases_partial_url(&self) -> EscapedURI { + EscapedURI::from_path(format!( + "/crate/{}/{}/menus/releases/{}", + self.name, + self.req_version, + self.path_for_rustdoc_url_for_partials() + )) + } + + pub(crate) fn builds_url(&self) -> EscapedURI { + EscapedURI::from_path(format!("/crate/{}/{}/builds", self.name, self.req_version)) + } + + pub(crate) fn build_status_url(&self) -> EscapedURI { + EscapedURI::from_path(format!( + "/crate/{}/{}/status.json", + self.name, self.req_version + )) + } + + pub(crate) fn build_details_url(&self, id: BuildId, filename: Option<&str>) -> EscapedURI { + let mut path = format!("/crate/{}/{}/builds/{}", self.name, self.req_version, id); + + if let Some(filename) = filename { + path.push('/'); + path.push_str(filename); + } + + EscapedURI::from_path(path) + } + + pub(crate) fn features_url(&self) -> EscapedURI { + EscapedURI::from_path(format!( + "/crate/{}/{}/features", + self.name, self.req_version + )) + } + + pub(crate) fn source_url(&self) -> EscapedURI { + // if the params were created for a rustdoc page, + // the inner path is a source file path, so is not usable for + // source urls. + let inner_path = if self.page_kind == Some(PageKind::Source) { + self.inner_path() + } else { + "" + }; + EscapedURI::from_path(format!( + "/crate/{}/{}/source/{}", + &self.name, &self.req_version, &inner_path + )) + } + + pub(crate) fn target_redirect_url(&self) -> EscapedURI { + EscapedURI::from_path(format!( + "/crate/{}/{}/target-redirect/{}", + self.name, + self.req_version, + &self.path_for_rustdoc_url(), + )) + } + + /// generate a potential storage path where to find the file that is described by these params. + /// + /// This is the path _inside_ the rustdoc archive zip file we create in the build process. + pub(crate) fn storage_path(&self) -> String { + let mut storage_path = self.path_for_rustdoc_url(); + + if path_is_folder(&storage_path) { + storage_path.push_str(INDEX_HTML); + } + + storage_path + } + + fn path_for_rustdoc_url_for_partials(&self) -> String { + if self.page_kind() == Some(&PageKind::Rustdoc) { + generate_rustdoc_path_for_url(None, None, self.doc_target(), Some(self.inner_path())) + } else { + generate_rustdoc_path_for_url(None, None, self.doc_target(), None) + } + } + + fn path_for_rustdoc_url(&self) -> String { + if self.page_kind() == Some(&PageKind::Rustdoc) { + generate_rustdoc_path_for_url( + self.target_name.as_deref(), + self.default_target.as_deref(), + self.doc_target(), + Some(self.inner_path()), + ) + } else { + generate_rustdoc_path_for_url( + self.target_name.as_deref(), + self.default_target.as_deref(), + self.doc_target(), + None, + ) + } + } + + /// Generate a possible target path to redirect to, with the information we have. + /// + /// Built for the target-redirect view, when we don't find the + /// target in our storage. + /// + /// Input is our set or parameters, plus some details from the metadata. + /// + /// This method is typically only used when we already know the target file doesn't exist, + /// and we just need to redirect to a search or something similar. + fn generate_fallback_search(&self) -> Option { + // we already split out the potentially leading target information in `Self::parse`. + // So we have an optional target, and then the path. + let components: Vec<_> = self + .inner_path() + .trim_start_matches('/') + .split('/') + .collect(); + + let is_source_view = components.first() == Some(&"src"); + + components + .last() + .and_then(|&last_component| { + if last_component.is_empty() || last_component == INDEX_HTML { + // this is a module, we extract the module name + // + // path might look like: + // `/[krate]/[version]/{target_name}/{module}/index.html` (last_component is index) + // or + // `/[krate]/[version]/{target_name}/{module}/` (last_component is empty) + // + // for the search we want to use the module name. + components.iter().rev().nth(1).cloned() + } else if !is_source_view { + // this is an item, typically the filename (last component) is something + // `trait.SomeAwesomeStruct.html`, where we want `SomeAwesomeStruct` for + // the search + last_component.split('.').nth(1) + } else { + // this is from the rustdoc source view. + // Example last component: + // `tuple_impl.rs.html` where we want just `tuple_impl` for the search. + last_component.strip_suffix(".rs.html") + } + }) + .map(ToString::to_string) + } + + pub(crate) fn generate_fallback_url(&self) -> EscapedURI { + let rustdoc_url = self.clone().with_inner_path("").rustdoc_url(); + + if let Some(search_item) = self.generate_fallback_search() { + rustdoc_url.append_query_pair("search", search_item) + } else { + rustdoc_url + } + } +} + +fn get_file_extension(path: &str) -> Option<&str> { + path.rsplit_once('.').and_then(|(_, ext)| { + if ext.contains('/') { + // to handle cases like `foo.html/bar` where I want `None` + None + } else { + Some(ext) + } + }) +} + +fn url_decode<'a>(input: &'a str) -> Result> { + Ok(percent_encoding::percent_decode(input.as_bytes()).decode_utf8()?) +} + +fn generate_rustdoc_url(name: &str, version: &ReqVersion, path: &str) -> EscapedURI { + EscapedURI::from_path(format!("/{}/{}/{}", name, version, path)) +} + +fn generate_rustdoc_path_for_url( + target_name: Option<&str>, + default_target: Option<&str>, + doc_target: Option<&str>, + inner_path: Option<&str>, +) -> String { + // first validate & fix the inner path to use. + let result = if let Some(path) = inner_path + && !path.is_empty() + && path != INDEX_HTML + { + // ust juse the given inner to start, if: + // * it's not empty + // * it's not just "index.html" + path.to_string() + } else if let Some(target_name) = target_name { + // after having no usable given path, we generate one with the + // target name, if we have one. + format!("{}/", target_name) + } else { + // no usable given path: + // * empty + // * "index.html" + String::new() + }; + + // then prepent the inner path with the doc target, if it's not the default target. + let result = match (doc_target, default_target) { + // add a subfolder for any non-default target. + (Some(doc_target), Some(default_target)) if doc_target != default_target => { + format!("{}/{}", doc_target, result) + } + // when we don't know which the default target is, always add the target, + // and assume it's non-default. + (Some(doc_target), None) => { + format!("{}/{}", doc_target, result) + } + + // other cases: don't do anything, keep the last result: + // * no doc_target, has default target -> no target in url + // * no doc_target, no default target -> no target in url + _ => result, + }; + + // case handled above and replaced with an empty path + debug_assert_ne!(result, INDEX_HTML); + + // for folders we might have `/index.html` at the end. + // We want to normalize the requests for folders, so a trailing `/index.html` + // will be cut off. + if result.ends_with(FOLDER_AND_INDEX_HTML) { + result.trim_end_matches(INDEX_HTML).to_string() + } else { + result + } +} + +fn path_is_folder(path: impl AsRef) -> bool { + let path = path.as_ref(); + path.is_empty() || path.ends_with('/') +} + +/// we sometimes have routes with a static suffix. +/// +/// For example: `/{name}/{version}/help.html` +/// In this case, we won't get the `help.html` part in our `path` parameter, since there is +/// no `{*path}` in the route. +/// +/// We're working around that by re-attaching the static suffix. This function is to find the +/// shared suffix between the route and the actual path. +fn find_static_route_suffix<'a, 'b>(route: &'a str, path: &'b str) -> Option { + let mut suffix: Vec<&'a str> = Vec::new(); + + for (route_component, path_component) in route.rsplit('/').zip(path.rsplit('/')) { + if route_component.starts_with('{') && route_component.ends_with('}') { + // we've reached a dynamic component in the route, stop here + break; + } + + if route_component != path_component { + // components don't match, no static suffix. + // Everything has to match up to the last dynamic component. + return None; + } + + // components match, continue to the next component + suffix.push(route_component); + } + + if suffix.is_empty() { + None + } else if let &[suffix] = suffix.as_slice() + && suffix.is_empty() + { + // special case: if the suffix is just empty, return None + None + } else { + Some(suffix.iter().rev().join("/")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::{AxumResponseTestExt, AxumRouterTestExt}; + use axum::{Router, routing::get}; + use semver::Version; + use test_case::test_case; + + static KRATE: &str = "krate"; + const VERSION: Version = Version::new(0, 1, 0); + static DEFAULT_TARGET: &str = "x86_64-unknown-linux-gnu"; + static OTHER_TARGET: &str = "x86_64-pc-windows-msvc"; + static UNKNOWN_TARGET: &str = "some-unknown-target"; + static TARGETS: &[&str] = &[DEFAULT_TARGET, OTHER_TARGET]; + + #[test_case( + "/{name}/{version}/help/some.html", + "/foo/1.2.3/help/some.html" + => Some("help/some.html".into()); + "suffix with path" + )] + #[test_case("/{name}/{version}/help.html", "/foo/1.2.3/help.html" => Some("help.html".into()); "simple suffix")] + #[test_case("help.html", "help.html" => Some("help.html".into()); "simple suffix without other components")] + #[test_case("/{name}/{version}/help/", "/foo/1.2.3/help/" => Some("help/".into()); "suffix is folder")] + #[test_case("{name}/{version}/help/", "foo/1.2.3/help/" => Some("help/".into()); "without leading slash")] + #[test_case("/{name}/{version}/{*path}", "/foo/1.2.3/help.html" => None; "no suffix in route")] + #[test_case("/{name}/{version}/help.html", "/foo/1.2.3/other.html" => None; "different suffix")] + #[test_case( + "/{name}/{version}/some/help.html", + "/foo/1.2.3/other/help.html" + => None; + "different suffix later" + )] + #[test_case("", "" => None; "empty strings")] + #[test_case("/", "" => None; "one slash, one empty")] + fn test_find_static_route_suffix(route: &str, path: &str) -> Option { + find_static_route_suffix(route, path) + } + + #[test_case( + "/{name}", + RustdocParams::new(KRATE) + .try_with_original_uri("/krate").unwrap(); + "just name" + )] + #[test_case( + "/{name}/", + RustdocParams::new(KRATE) + .try_with_original_uri("/krate/").unwrap(); + "just name with trailing slash" + )] + #[test_case( + "/{name}/{version}", + RustdocParams::new(KRATE) + .try_with_original_uri("/krate/latest").unwrap(); + "just name and version" + )] + #[test_case( + "/{name}/{version}/{*path}", + RustdocParams::new(KRATE) + .try_with_original_uri("/krate/latest/static.html").unwrap() + .with_inner_path("static.html"); + "name, version, path extract" + )] + #[test_case( + "/{name}/{version}/{path}/static.html", + RustdocParams::new(KRATE) + .try_with_original_uri("/krate/latest/path_add/static.html").unwrap() + .with_inner_path("path_add") + .with_static_route_suffix("static.html"); + "name, version, path extract, static suffix" + )] + #[test_case( + "/{name}/{version}/clapproc%20%60macro.html", + RustdocParams::new("clap") + .try_with_original_uri("/clap/latest/clapproc%20%60macro.html").unwrap() + .with_static_route_suffix("clapproc `macro.html"); + "name, version, static suffix with some urlencoding" + )] + #[test_case( + "/{name}/{version}/static.html", + RustdocParams::new(KRATE) + .try_with_original_uri("/krate/latest/static.html").unwrap() + .with_static_route_suffix("static.html"); + "name, version, static suffix" + )] + #[test_case( + "/{name}/{version}/{target}", + RustdocParams::new(KRATE) + .try_with_req_version("1.2.3").unwrap() + .try_with_original_uri(format!("/krate/1.2.3/{OTHER_TARGET}")).unwrap() + .with_doc_target(OTHER_TARGET); + "name, version, target" + )] + #[test_case( + "/{name}/{version}/{target}/folder/something.html", + RustdocParams::new(KRATE) + .try_with_req_version("1.2.3").unwrap() + .try_with_original_uri(format!("/krate/1.2.3/{OTHER_TARGET}/folder/something.html")).unwrap() + .with_doc_target(OTHER_TARGET) + .with_static_route_suffix("folder/something.html"); + "name, version, target, static suffix" + )] + #[test_case( + "/{name}/{version}/{target}/", + RustdocParams::new(KRATE) + .try_with_req_version("1.2.3").unwrap() + .try_with_original_uri(format!("/krate/1.2.3/{OTHER_TARGET}/")).unwrap() + .with_doc_target(OTHER_TARGET); + "name, version, target trailing slash" + )] + #[test_case( + "/{name}/{version}/{target}/{*path}", + RustdocParams::new(KRATE) + .try_with_req_version("1.2.3").unwrap() + .try_with_original_uri(format!("/krate/1.2.3/{OTHER_TARGET}/some/path/to/a/file.html")).unwrap() + .with_doc_target(OTHER_TARGET) + .with_inner_path("some/path/to/a/file.html"); + "name, version, target, path" + )] + #[test_case( + "/{name}/{version}/{target}/{path}/path/to/a/file.html", + RustdocParams::new(KRATE) + .try_with_req_version("1.2.3").unwrap() + .try_with_original_uri(format!("/krate/1.2.3/{OTHER_TARGET}/path_add/path/to/a/file.html")).unwrap() + .with_doc_target(OTHER_TARGET) + .with_inner_path("path_add") + .with_static_route_suffix("path/to/a/file.html"); + "name, version, target, path, static suffix" + )] + #[tokio::test] + async fn test_extract_rustdoc_params_from_request( + route: &str, + expected: RustdocParams, + ) -> anyhow::Result<()> { + let expected = expected.with_page_kind(PageKind::Rustdoc); + + let app = Router::new().route( + route, + get(|params: RustdocParams| async move { + format!("{:?}", params.with_page_kind(PageKind::Rustdoc)) + }), + ); + + let path = expected.original_uri.as_ref().unwrap().path().to_owned(); + + let res = app.get(&path).await?; + assert!(res.status().is_success()); + assert_eq!(res.text().await?, format!("{:?}", expected)); + + Ok(()) + } + + #[test_case( + None, None, false, + None, "", "krate/index.html"; + "super empty 1" + )] + #[test_case( + Some(""), Some(""), false, + None, "", "krate/index.html"; + "super empty 2" + )] + // test cases when no separate "target" component was present in the params + #[test_case( + None, Some("/"), true, + None, "", "krate/index.html"; + "just slash" + )] + #[test_case( + None, Some("something"), false, + None, "something", "something"; + "without trailing slash" + )] + #[test_case( + None, Some("/something"), false, + None, "something", "something"; + "leading slash is cut" + )] + #[test_case( + None, Some("something/"), true, + None, "something/", "something/index.html"; + "with trailing slash" + )] + // a target is given, but as first component of the path, for routes without separate + // "target" component + #[test_case( + None, Some(DEFAULT_TARGET), false, + Some(DEFAULT_TARGET), "", "krate/index.html"; + "just target without trailing slash" + )] + #[test_case( + None, Some(&format!("{DEFAULT_TARGET}/")), true, + Some(DEFAULT_TARGET), "", "krate/index.html"; + "just default target with trailing slash" + )] + #[test_case( + None, Some(&format!("{DEFAULT_TARGET}/one")), false, + Some(DEFAULT_TARGET), "one", "one"; + "target + one without trailing slash" + )] + #[test_case( + None, Some(&format!("{DEFAULT_TARGET}/one/")), true, + Some(DEFAULT_TARGET), "one/", "one/index.html"; + "target + one target with trailing slash" + )] + #[test_case( + None, Some(&format!("{UNKNOWN_TARGET}/one/")), true, + None, &format!("{UNKNOWN_TARGET}/one/"), &format!("{UNKNOWN_TARGET}/one/index.html"); + "unknown target stays in path" + )] + #[test_case( + None, Some(&format!("{DEFAULT_TARGET}/some/inner/path")), false, + Some(DEFAULT_TARGET), "some/inner/path", "some/inner/path"; + "all without trailing slash" + )] + #[test_case( + None, Some(&format!("{DEFAULT_TARGET}/some/inner/path/")), true, + Some(DEFAULT_TARGET), "some/inner/path/", "some/inner/path/index.html"; + "all with trailing slash" + )] + // here we have a separate target path parameter, we check it and use it accordingly + #[test_case( + Some(DEFAULT_TARGET), None, false, + Some(DEFAULT_TARGET), "", "krate/index.html"; + "actual target, that is default" + )] + #[test_case( + Some(DEFAULT_TARGET), Some("inner/path.html"), false, + Some(DEFAULT_TARGET), "inner/path.html", "inner/path.html"; + "actual target with path" + )] + #[test_case( + Some(DEFAULT_TARGET), Some("inner/path/"), true, + Some(DEFAULT_TARGET), "inner/path/", "inner/path/index.html"; + "actual target with path slash" + )] + #[test_case( + Some(UNKNOWN_TARGET), None, true, + None, &format!("{UNKNOWN_TARGET}/"), &format!("{UNKNOWN_TARGET}/index.html"); + "unknown target" + )] + #[test_case( + Some(UNKNOWN_TARGET), None, false, + None, UNKNOWN_TARGET, UNKNOWN_TARGET; + "unknown target without trailing slash" + )] + #[test_case( + Some(UNKNOWN_TARGET), Some("inner/path.html"), false, + None, &format!("{UNKNOWN_TARGET}/inner/path.html"), &format!("{UNKNOWN_TARGET}/inner/path.html"); + "unknown target with path" + )] + #[test_case( + Some(OTHER_TARGET), Some("inner/path.html"), false, + Some(OTHER_TARGET), "inner/path.html", &format!("{OTHER_TARGET}/inner/path.html"); + "other target with path" + )] + #[test_case( + Some(UNKNOWN_TARGET), Some("inner/path/"), true, + None, &format!("{UNKNOWN_TARGET}/inner/path/"), &format!("{UNKNOWN_TARGET}/inner/path/index.html"); + "unknown target with path slash" + )] + #[test_case( + Some(OTHER_TARGET), Some("inner/path/"), true, + Some(OTHER_TARGET), "inner/path/", &format!("{OTHER_TARGET}/inner/path/index.html"); + "other target with path slash" + )] + #[test_case( + Some(DEFAULT_TARGET), None, false, + Some(DEFAULT_TARGET), "", "krate/index.html"; + "pure default target, without trailing slash" + )] + fn test_parse( + target: Option<&str>, + path: Option<&str>, + had_trailing_slash: bool, + expected_target: Option<&str>, + expected_path: &str, + expected_storage_path: &str, + ) { + let mut dummy_path = match (target, path) { + (Some(target), Some(path)) => format!("{}/{}", target, path), + (Some(target), None) => target.to_string(), + (None, Some(path)) => path.to_string(), + (None, None) => String::new(), + }; + dummy_path.insert(0, '/'); + if had_trailing_slash && !dummy_path.is_empty() { + dummy_path.push('/'); + } + + let parsed = RustdocParams::new(KRATE) + .with_page_kind(PageKind::Rustdoc) + .with_req_version(ReqVersion::Latest) + .with_maybe_doc_target(target) + .with_maybe_inner_path(path) + .try_with_original_uri(&dummy_path) + .unwrap() + .with_default_target(DEFAULT_TARGET) + .with_target_name(KRATE) + .with_doc_targets(TARGETS.iter().cloned()); + + assert_eq!(parsed.name(), KRATE); + assert_eq!(parsed.req_version(), &ReqVersion::Latest); + assert_eq!(parsed.doc_target(), expected_target); + assert_eq!(parsed.inner_path(), expected_path); + assert_eq!(parsed.storage_path(), expected_storage_path); + assert_eq!( + parsed.path_is_folder(), + had_trailing_slash || dummy_path.ends_with('/') || dummy_path.is_empty() + ); + } + + #[test_case("dummy/struct.WindowsOnly.html", Some("WindowsOnly"))] + #[test_case("dummy/some_module/struct.SomeItem.html", Some("SomeItem"))] + #[test_case("dummy/some_module/index.html", Some("some_module"))] + #[test_case("dummy/some_module/", Some("some_module"))] + #[test_case("src/folder1/folder2/logic.rs.html", Some("logic"))] + #[test_case("src/non_source_file.rs", None)] + #[test_case("html", None; "plain file without extension")] + #[test_case("something.html", Some("html"); "plain file")] + #[test_case("", None)] + fn test_generate_fallback_search(path: &str, search: Option<&str>) { + let mut params = RustdocParams::new("dummy") + .try_with_req_version("0.4.0") + .unwrap() + // non-default target, target stays in the url + .with_doc_target(OTHER_TARGET) + .with_inner_path(path) + .with_default_target(DEFAULT_TARGET) + .with_target_name("dummy") + .with_doc_targets(TARGETS.iter().cloned()); + + assert_eq!(params.generate_fallback_search().as_deref(), search); + assert_eq!( + params.generate_fallback_url().to_string(), + format!( + "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/{}", + search.map(|s| format!("?search={}", s)).unwrap_or_default() + ) + ); + + // change to default target, check url again + params = params.with_doc_target(DEFAULT_TARGET); + + assert_eq!(params.generate_fallback_search().as_deref(), search); + assert_eq!( + params.generate_fallback_url().to_string(), + format!( + "/dummy/0.4.0/dummy/{}", + search.map(|s| format!("?search={}", s)).unwrap_or_default() + ) + ); + } + + #[test] + fn test_parse_source() { + let params = RustdocParams::new("dummy") + .try_with_req_version("0.4.0") + .unwrap() + .with_inner_path("README.md") + .with_page_kind(PageKind::Source) + .try_with_original_uri("/crate/dummy/0.4.0/source/README.md") + .unwrap() + .with_default_target(DEFAULT_TARGET) + .with_target_name("dummy") + .with_doc_targets(TARGETS.iter().cloned()); + + assert_eq!(params.rustdoc_url().to_string(), "/dummy/0.4.0/dummy/"); + assert_eq!( + params.source_url().to_string(), + "/crate/dummy/0.4.0/source/README.md" + ); + assert_eq!( + params.target_redirect_url().to_string(), + "/crate/dummy/0.4.0/target-redirect/dummy/" + ); + } + + #[test_case( + None, None, None, None => "" + )] + #[test_case( + Some("target_name"), None, None, None => "target_name/" + )] + #[test_case( + None, None, None, Some("path/index.html") => "path/"; + "cuts trailing /index.html" + )] + #[test_case( + Some("target_name"), None, + Some(DEFAULT_TARGET), Some("inner/path.html") + => "x86_64-unknown-linux-gnu/inner/path.html"; + "default target, but we don't know about it, keeps target" + )] + #[test_case( + Some("target_name"), None, + Some(DEFAULT_TARGET), None + => "x86_64-unknown-linux-gnu/target_name/"; + "default target, we don't know about it, without path" + )] + #[test_case( + Some("target_name"), Some(DEFAULT_TARGET), + Some(DEFAULT_TARGET), None + => "target_name/"; + "default-target, without path, target_name is used to generate the inner path" + )] + #[test_case( + Some("target_name"), Some(DEFAULT_TARGET), + Some(DEFAULT_TARGET), Some("inner/path.html") + => "inner/path.html"; + "default target, with path, target_name is ignored" + )] + #[test_case( + None, Some(DEFAULT_TARGET), + Some(DEFAULT_TARGET), Some("inner/path/index.html") + => "inner/path/"; + "default target, with path as folder with index.html" + )] + #[test_case( + None, Some(DEFAULT_TARGET), + Some(DEFAULT_TARGET), Some("inner/path/") + => "inner/path/"; + "default target, with path as folder" + )] + #[test_case( + Some("target_name"), Some(DEFAULT_TARGET), + Some(OTHER_TARGET), None + => "x86_64-pc-windows-msvc/target_name/"; + "non-default-target, without path, target_name is used to generate the inner path" + )] + #[test_case( + Some("target_name"), Some(DEFAULT_TARGET), + Some(OTHER_TARGET), Some("inner/path.html") + => "x86_64-pc-windows-msvc/inner/path.html"; + "non-default target, with path, target_name is ignored" + )] + fn test_generate_rustdoc_path_for_url( + target_name: Option<&str>, + default_target: Option<&str>, + doc_target: Option<&str>, + inner_path: Option<&str>, + ) -> String { + generate_rustdoc_path_for_url(target_name, default_target, doc_target, inner_path) + } + + #[test] + fn test_case_1() { + let params = RustdocParams::new("dummy") + .try_with_req_version("0.2.0") + .unwrap() + .with_doc_target("dummy") + .with_inner_path("struct.Dummy.html") + .with_page_kind(PageKind::Rustdoc) + .try_with_original_uri("/dummy/0.2.0/dummy/struct.Dummy.html") + .unwrap() + .with_default_target(DEFAULT_TARGET) + .with_target_name("dummy") + .with_doc_targets(TARGETS.iter().cloned()); + + dbg!(¶ms); + + assert!(params.doc_target().is_none()); + assert_eq!(params.inner_path(), "dummy/struct.Dummy.html"); + assert_eq!(params.storage_path(), "dummy/struct.Dummy.html"); + + let params = params.with_doc_target(DEFAULT_TARGET); + dbg!(¶ms); + assert_eq!(params.doc_target(), Some(DEFAULT_TARGET)); + assert_eq!(params.inner_path(), "dummy/struct.Dummy.html"); + assert_eq!(params.storage_path(), "dummy/struct.Dummy.html"); + + let params = params.with_doc_target(OTHER_TARGET); + assert_eq!(params.doc_target(), Some(OTHER_TARGET)); + assert_eq!( + params.storage_path(), + format!("{OTHER_TARGET}/dummy/struct.Dummy.html") + ); + assert_eq!( + params.storage_path(), + format!("{OTHER_TARGET}/dummy/struct.Dummy.html") + ); + } + + #[test_case( + "/", + None, None, + None, "" + ; "no target, no path" + )] + #[test_case( + &format!("/{DEFAULT_TARGET}"), + Some(DEFAULT_TARGET), None, + Some(DEFAULT_TARGET), ""; + "existing target, no path" + )] + #[test_case( + &format!("/{UNKNOWN_TARGET}"), + Some(UNKNOWN_TARGET), None, + None, UNKNOWN_TARGET; + "unknown target, no path" + )] + #[test_case( + &format!("/{UNKNOWN_TARGET}/"), + Some(UNKNOWN_TARGET), Some("something/file.html"), + None, &format!("{UNKNOWN_TARGET}/something/file.html"); + "unknown target, with path, trailling slash is kept" + )] + #[test_case( + &format!("/{UNKNOWN_TARGET}/"), + Some(UNKNOWN_TARGET), None, + None, &format!("{UNKNOWN_TARGET}/"); + "unknown target, no path, trailling slash is kept" + )] + fn test_with_fixed_target_and_path( + original_uri: &str, + target: Option<&str>, + path: Option<&str>, + expected_target: Option<&str>, + expected_path: &str, + ) { + let params = RustdocParams::new(KRATE) + .try_with_req_version("0.4.0") + .unwrap() + .try_with_original_uri(original_uri) + .unwrap() + .with_maybe_doc_target(target) + .with_maybe_inner_path(path) + .with_doc_targets(TARGETS.iter().cloned()); + + dbg!(¶ms); + + assert_eq!(params.doc_target(), expected_target); + assert_eq!(params.inner_path(), expected_path); + } + + #[test_case( + None, None, + None, None + => ""; + "empty" + )] + #[test_case( + None, None, + None, Some("folder/index.html") + => "folder/"; + "just folder index.html will be removed" + )] + #[test_case( + None, None, + None, Some(INDEX_HTML) + => ""; + "just root index.html will be removed" + )] + #[test_case( + None, Some(DEFAULT_TARGET), + Some(DEFAULT_TARGET), None + => ""; + "just default target" + )] + #[test_case( + None, Some(DEFAULT_TARGET), + Some(OTHER_TARGET), None + => format!("{OTHER_TARGET}/"); + "just other target" + )] + #[test_case( + Some(KRATE), Some(DEFAULT_TARGET), + Some(DEFAULT_TARGET), None + => format!("{KRATE}/"); + "full with default target, target name is used" + )] + #[test_case( + Some(KRATE), Some(DEFAULT_TARGET), + Some(OTHER_TARGET), None + => format!("{OTHER_TARGET}/{KRATE}/"); + "full with other target, target name is used" + )] + #[test_case( + Some(KRATE), Some(DEFAULT_TARGET), + Some(DEFAULT_TARGET), Some("inner/something.html") + => "inner/something.html"; + "full with default target, target name is ignored" + )] + #[test_case( + Some(KRATE), Some(DEFAULT_TARGET), + Some(OTHER_TARGET), Some("inner/something.html") + => format!("{OTHER_TARGET}/inner/something.html"); + "full with other target, target name is ignored" + )] + fn test_rustdoc_path_for_url( + target_name: Option<&str>, + default_target: Option<&str>, + doc_target: Option<&str>, + inner_path: Option<&str>, + ) -> String { + generate_rustdoc_path_for_url(target_name, default_target, doc_target, inner_path) + } + + #[test] + fn test_override_page_kind() { + let params = RustdocParams::new(KRATE) + .try_with_original_uri("/krate/latest/path_add/static.html") + .unwrap() + .with_inner_path("path_add") + .with_static_route_suffix("static.html") + .with_default_target(DEFAULT_TARGET) + .with_target_name(KRATE) + .with_doc_targets(TARGETS.iter().cloned()); + + // without page kind, rustdoc path doesn' thave a path, and static suffix ignored + assert_eq!(params.rustdoc_url(), "/krate/latest/krate/"); + assert_eq!(params.source_url(), "/crate/krate/latest/source/"); + assert_eq!( + params.target_redirect_url(), + "/crate/krate/latest/target-redirect/krate/" + ); + + let params = params.with_page_kind(PageKind::Rustdoc); + assert_eq!(params.rustdoc_url(), "/krate/latest/path_add/static.html"); + assert_eq!(params.source_url(), "/crate/krate/latest/source/"); + assert_eq!( + params.target_redirect_url(), + "/crate/krate/latest/target-redirect/path_add/static.html" + ); + + let params = params.with_page_kind(PageKind::Source); + assert_eq!(params.rustdoc_url(), "/krate/latest/krate/"); + // just path added, not static suffix + assert_eq!(params.source_url(), "/crate/krate/latest/source/path_add"); + assert_eq!( + params.target_redirect_url(), + "/crate/krate/latest/target-redirect/krate/" + ); + } + + #[test] + fn test_override_page_kind_with_target() { + let params = RustdocParams::new(KRATE) + .try_with_original_uri(format!("/krate/latest/{OTHER_TARGET}/path_add/static.html")) + .unwrap() + .with_inner_path("path_add") + .with_static_route_suffix("static.html") + .with_doc_target(OTHER_TARGET) + .with_default_target(DEFAULT_TARGET) + .with_target_name(KRATE) + .with_doc_targets(TARGETS.iter().cloned()); + + // without page kind, rustdoc path doesn' thave a path, and static suffix ignored + assert_eq!( + params.rustdoc_url(), + format!("/krate/latest/{OTHER_TARGET}/krate/") + ); + assert_eq!(params.source_url(), "/crate/krate/latest/source/"); + assert_eq!( + params.target_redirect_url(), + format!("/crate/krate/latest/target-redirect/{OTHER_TARGET}/krate/") + ); + + // same when the pagekind is "Source" + let params = params.with_page_kind(PageKind::Source); + assert_eq!( + params.rustdoc_url(), + format!("/krate/latest/{OTHER_TARGET}/krate/") + ); + assert_eq!(params.source_url(), "/crate/krate/latest/source/path_add"); + assert_eq!( + params.target_redirect_url(), + format!("/crate/krate/latest/target-redirect/{OTHER_TARGET}/krate/") + ); + + // with page-kind "Rustdoc", we get the full path with static suffix + let params = params.with_page_kind(PageKind::Rustdoc); + dbg!(¶ms); + assert_eq!( + params.rustdoc_url(), + format!("/krate/latest/{OTHER_TARGET}/path_add/static.html") + ); + assert_eq!(params.source_url(), format!("/crate/krate/latest/source/")); + assert_eq!( + params.target_redirect_url(), + format!("/crate/krate/latest/target-redirect/{OTHER_TARGET}/path_add/static.html") + ); + } + + #[test] + fn test_debug_output() { + let params = RustdocParams::new("dummy") + .try_with_req_version("0.2.0") + .unwrap() + .with_inner_path("struct.Dummy.html") + .with_doc_target("dummy") + .with_page_kind(PageKind::Rustdoc) + .try_with_original_uri("/dummy/0.2.0/dummy/struct.Dummy.html") + .unwrap() + .with_default_target(DEFAULT_TARGET) + .with_target_name("dummy") + .with_doc_targets(TARGETS.iter().cloned()); + + let debug_output = format!("{:?}", params); + + assert!(debug_output.contains("EscapedURI")); + assert!(debug_output.contains("rustdoc_url()")); + assert!(debug_output.contains("generate_fallback_url()")); + } + + #[test] + fn test_override_doc_target_when_old_doc_target_was_path() { + // params as if they would have come from a route like + // `/{name}/{version}/{target}/{*path}`, + // where in the `{target}` place we have part of the path. + let params = RustdocParams::new(KRATE) + .with_req_version(ReqVersion::Exact(VERSION)) + .try_with_original_uri("/dummy/0.1.0/dummy/struct.Dummy.html") + .unwrap() + .with_doc_target("dummy") + .with_inner_path("struct.Dummy.html"); + + dbg!(¶ms); + + // initial params, doc-target is "dummy", not validated + assert_eq!(params.doc_target(), Some("dummy")); + assert_eq!(params.inner_path(), "struct.Dummy.html"); + + // after parsing, we recognize that the doc target is not a target, and attach + // it to the inner_path. + let params = params + .with_default_target(DEFAULT_TARGET) + .with_target_name(KRATE) + .with_doc_targets(TARGETS.iter().cloned()); + + dbg!(¶ms); + + assert_eq!(params.doc_target(), None); + assert_eq!(params.inner_path(), "dummy/struct.Dummy.html"); + + // now, in some cases, we now want to generate a variation of these params, + // with an actual non-default doc target. + // Then we expect the path to be intact still, and the target to be set, even + // though the folder-part of the path was initially generated from the doc_target field. + let params = params.with_doc_target(OTHER_TARGET); + dbg!(¶ms); + assert_eq!(params.doc_target(), Some(OTHER_TARGET)); + assert_eq!(params.inner_path(), "dummy/struct.Dummy.html"); + } + + #[test] + fn test_if_order_matters_1() { + let params = RustdocParams::new(KRATE) + .with_req_version(ReqVersion::Exact(VERSION)) + .try_with_original_uri("/dummy/0.1.0/dummy/struct.Dummy.html") + .unwrap() + .with_inner_path("dummy/struct.Dummy.html") + .with_default_target(DEFAULT_TARGET) + .with_target_name(KRATE) + .with_doc_targets(TARGETS.iter().cloned()); + + assert_eq!(params.doc_target(), None); + assert_eq!(params.inner_path(), "dummy/struct.Dummy.html"); + + let params = params.with_doc_target(OTHER_TARGET); + assert_eq!(params.doc_target(), Some(OTHER_TARGET)); + assert_eq!(params.inner_path(), "dummy/struct.Dummy.html"); + } + + #[test] + fn test_if_order_matters_2() { + let params = RustdocParams::new(KRATE) + .with_req_version(ReqVersion::Exact(VERSION)) + .try_with_original_uri(format!( + "/dummy/0.1.0/{OTHER_TARGET}/dummy/struct.Dummy.html" + )) + .unwrap() + .with_inner_path(format!("{OTHER_TARGET}/dummy/struct.Dummy.html")) + .with_default_target(DEFAULT_TARGET) + .with_target_name(KRATE) + .with_doc_targets(TARGETS.iter().cloned()); + + assert_eq!(params.doc_target(), Some(OTHER_TARGET)); + assert_eq!(params.inner_path(), "dummy/struct.Dummy.html"); + + let params = params.with_doc_target(DEFAULT_TARGET); + assert_eq!(params.doc_target(), Some(DEFAULT_TARGET)); + assert_eq!(params.inner_path(), "dummy/struct.Dummy.html"); + } +} diff --git a/src/web/features.rs b/src/web/features.rs index 836ff31ba..633d5d5b8 100644 --- a/src/web/features.rs +++ b/src/web/features.rs @@ -4,8 +4,11 @@ use crate::{ web::{ MetaData, ReqVersion, cache::CachePolicy, - error::{AxumNope, AxumResult, EscapedURI}, - extractors::{DbConnection, Path}, + error::{AxumNope, AxumResult}, + extractors::{ + DbConnection, + rustdoc::{PageKind, RustdocParams}, + }, filters, headers::CanonicalUrl, match_version, @@ -84,22 +87,22 @@ impl SubFeature { #[derive(Debug, Clone)] struct FeaturesPage { metadata: MetaData, - dependencies: HashMap, + dependencies: HashMap, sorted_features: Option>, default_features: HashSet, canonical_url: CanonicalUrl, is_latest_url: bool, + params: RustdocParams, } impl FeaturesPage { fn is_default_feature(&self, feature: &str) -> bool { self.default_features.contains(feature) } - fn dependency_version(&self, dependency: &str) -> &str { + fn dependency_version(&self, dependency: &str) -> &ReqVersion { self.dependencies .get(dependency) - .map(|s| s.as_str()) - .unwrap_or("latest") + .unwrap_or(&ReqVersion::Latest) } } @@ -137,22 +140,28 @@ impl FeaturesPage { } pub(crate) async fn build_features_handler( - Path((name, req_version)): Path<(String, ReqVersion)>, + params: RustdocParams, mut conn: DbConnection, ) -> AxumResult { - let version = match_version(&mut conn, &name, &req_version) + let matched_release = match_version(&mut conn, params.name(), params.req_version()) .await? .assume_exact_name()? .into_canonical_req_version_or_else(|version| { AxumNope::Redirect( - EscapedURI::new(&format!("/crate/{}/{}/features", &name, version), None), + params.clone().with_req_version(version).features_url(), CachePolicy::ForeverInCdn, ) - })? - .into_version(); - - let metadata = - MetaData::from_crate(&mut conn, &name, &version, Some(req_version.clone())).await?; + })?; + let params = params.apply_matched_release(&matched_release); + let version = matched_release.into_version(); + + let metadata = MetaData::from_crate( + &mut conn, + params.name(), + &version, + Some(params.req_version().clone()), + ) + .await?; let row = sqlx::query!( r#" @@ -162,7 +171,7 @@ pub(crate) async fn build_features_handler( FROM releases INNER JOIN crates ON crates.id = releases.crate_id WHERE crates.name = $1 AND releases.version = $2"#, - name, + params.name(), version.to_string(), ) .fetch_optional(&mut *conn) @@ -182,14 +191,20 @@ pub(crate) async fn build_features_handler( dependencies, sorted_features, default_features, - is_latest_url: req_version.is_latest(), - canonical_url: CanonicalUrl::from_path(format!("/crate/{}/latest/features", &name)), + is_latest_url: params.req_version().is_latest(), + canonical_url: CanonicalUrl::from_uri( + params + .clone() + .with_req_version(ReqVersion::Latest) + .features_url(), + ), + params, } .into_response()) } /// Turns the raw JSON `dependencies` into a [`HashMap`] of dependencies and their versions. -fn get_dependency_versions(raw_dependencies: Option) -> HashMap { +fn get_dependency_versions(raw_dependencies: Option) -> HashMap { let mut map = HashMap::new(); if let Some(deps) = raw_dependencies.as_ref().and_then(Value::as_array) { @@ -197,7 +212,8 @@ fn get_dependency_versions(raw_dependencies: Option) -> HashMap>(path: P) -> Self { - Self( - encode_url_path(path.as_ref()) - .try_into() - .expect("invalid URI path characters even after encoding them"), - ) - } + pub fn from_uri(uri: EscapedURI) -> Self { + if uri.scheme().is_some() && uri.authority().is_some() { + return Self(uri); + } + + let mut parts = uri.into_inner().into_parts(); + + if parts.scheme.is_none() { + parts.scheme = Some("https".try_into().unwrap()); + } - fn build_full_uri(&self) -> Uri { - Uri::builder() - .scheme("https") - .authority("docs.rs") - .path_and_query(self.0.clone()) - .build() - .expect("this unwrap can't fail because PathAndQuery is valid") + if parts.authority.is_none() { + parts.authority = Some("docs.rs".try_into().unwrap()); + } + + Self(EscapedURI::from_uri( + Uri::from_parts(parts).expect("parts were already in Uri, or are static"), + )) } } @@ -45,9 +50,7 @@ impl Header for CanonicalUrl { where E: Extend, { - let value: HeaderValue = format!(r#"<{}>; rel="canonical""#, self.build_full_uri()) - .parse() - .unwrap(); + let value: HeaderValue = format!(r#"<{}>; rel="canonical""#, self.0).parse().unwrap(); values.extend(std::iter::once(value)); } @@ -55,7 +58,7 @@ impl Header for CanonicalUrl { impl fmt::Display for CanonicalUrl { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.build_full_uri()) + write!(f, "{}", self.0) } } @@ -64,10 +67,32 @@ impl Serialize for CanonicalUrl { where S: serde::Serializer, { - serializer.serialize_str(&self.build_full_uri().to_string()) + serializer.serialize_str(&self.0.to_string()) + } +} + +impl From for CanonicalUrl { + fn from(value: Uri) -> Self { + Self(EscapedURI::from_uri(value)) + } +} + +impl From for CanonicalUrl { + fn from(value: EscapedURI) -> Self { + Self::from_uri(value) + } +} + +impl Deref for CanonicalUrl { + type Target = EscapedURI; + + fn deref(&self) -> &Self::Target { + &self.0 } } +impl HtmlSafe for CanonicalUrl {} + #[cfg(test)] mod tests { use super::*; @@ -75,9 +100,26 @@ mod tests { use axum::http::HeaderMap; use axum_extra::headers::HeaderMapExt; + #[test] + fn test_serialize_canonical_from_uri() { + let url = CanonicalUrl::from_uri(EscapedURI::from_uri( + Uri::builder() + .scheme("https") + .authority("some_server.org") + .path_and_query("/some/path.html") + .build() + .unwrap(), + )); + + assert_eq!( + serde_json::to_string(&url).unwrap(), + "\"https://some_server.org/some/path.html\"" + ); + } + #[test] fn test_serialize_canonical() { - let url = CanonicalUrl::from_path("/some/path/"); + let url = CanonicalUrl::from_uri("/some/path/".parse::().unwrap().into()); assert_eq!( serde_json::to_string(&url).unwrap(), @@ -88,7 +130,9 @@ mod tests { #[test] fn test_encode_canonical() { let mut map = HeaderMap::new(); - map.typed_insert(CanonicalUrl::from_path("/some/path/")); + map.typed_insert(CanonicalUrl::from_uri( + "/some/path/".parse::().unwrap().into(), + )); assert_eq!( map["link"], "; rel=\"canonical\"" @@ -97,8 +141,11 @@ mod tests { #[test] fn test_encode_canonical_with_encoding() { + // umlauts are allowed in http::Uri, but we still want to encode them. let mut map = HeaderMap::new(); - map.typed_insert(CanonicalUrl::from_path("/some/äöü/")); + map.typed_insert(CanonicalUrl::from_uri( + "/some/äöü/".parse::().unwrap().into(), + )); assert_eq!( map["link"], "; rel=\"canonical\"" diff --git a/src/web/mod.rs b/src/web/mod.rs index 615cd32d8..a591b3f19 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -1,14 +1,12 @@ //! Web interface of docs.rs pub mod page; -// mod tmp; - -use crate::db::CrateId; -use crate::db::ReleaseId; -use crate::db::types::BuildStatus; -use crate::utils::get_correct_docsrs_style_file; -use crate::utils::report_error; -use crate::web::page::templates::{RenderBrands, RenderSolid, filters}; + +use crate::{ + db::{CrateId, types::BuildStatus}, + utils::{get_correct_docsrs_style_file, report_error}, + web::page::templates::{RenderBrands, RenderSolid, filters}, +}; use anyhow::{Context as _, Result, anyhow, bail}; use askama::Template; use axum_extra::middleware::option_layer; @@ -21,6 +19,7 @@ pub(crate) mod cache; pub(crate) mod crate_details; mod csp; pub(crate) mod error; +mod escaped_uri; mod extractors; mod features; mod file; @@ -55,7 +54,7 @@ use semver::{Version, VersionReq}; use sentry::integrations::tower as sentry_tower; use serde_with::{DeserializeFromStr, SerializeDisplay}; use std::{ - borrow::{Borrow, Cow}, + borrow::Cow, fmt::{self, Display}, net::{IpAddr, Ipv4Addr, SocketAddr}, str::FromStr, @@ -63,7 +62,6 @@ use std::{ }; use tower::ServiceBuilder; use tower_http::{catch_panic::CatchPanicLayer, timeout::TimeoutLayer, trace::TraceLayer}; -use url::form_urlencoded; use self::crate_details::Release; @@ -119,6 +117,52 @@ impl FromStr for ReqVersion { } } +impl From<&ReqVersion> for ReqVersion { + fn from(value: &ReqVersion) -> Self { + value.clone() + } +} + +impl From for ReqVersion { + fn from(value: Version) -> Self { + ReqVersion::Exact(value) + } +} + +impl From<&Version> for ReqVersion { + fn from(value: &Version) -> Self { + value.clone().into() + } +} + +impl From for ReqVersion { + fn from(value: VersionReq) -> Self { + ReqVersion::Semver(value) + } +} + +impl From<&VersionReq> for ReqVersion { + fn from(value: &VersionReq) -> Self { + value.clone().into() + } +} + +impl TryFrom for ReqVersion { + type Error = semver::Error; + + fn try_from(value: String) -> Result { + value.parse() + } +} + +impl TryFrom<&str> for ReqVersion { + type Error = semver::Error; + + fn try_from(value: &str) -> Result { + value.parse() + } +} + #[derive(Debug)] pub(crate) struct MatchedRelease { /// crate name @@ -215,14 +259,6 @@ impl MatchedRelease { self.release.version } - fn version(&self) -> &Version { - &self.release.version - } - - fn id(&self) -> ReleaseId { - self.release.id - } - fn build_status(&self) -> BuildStatus { self.release.build_status } @@ -231,10 +267,6 @@ impl MatchedRelease { self.release.rustdoc_status.unwrap_or(false) } - fn target_name(&self) -> Option<&str> { - self.release.target_name.as_deref() - } - fn is_latest_url(&self) -> bool { matches!(self.req_version, ReqVersion::Latest) } @@ -616,29 +648,6 @@ where Ok(resp) } -/// Parse an URI into a http::Uri struct. -/// When `queries` are given these are added to the URL, -/// with empty `queries` the `?` will be omitted. -pub(crate) fn axum_parse_uri_with_params(uri: &str, queries: I) -> Result -where - I: IntoIterator, - I::Item: Borrow<(K, V)>, - K: AsRef, - V: AsRef, -{ - let mut queries = queries.into_iter().peekable(); - if queries.peek().is_some() { - let query_params: String = form_urlencoded::Serializer::new(String::new()) - .extend_pairs(queries) - .finish(); - format!("{uri}?{query_params}") - .parse::() - .context("error parsing URL") - } else { - uri.parse::().context("error parsing URL") - } -} - /// MetaData used in header #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(test, derive(serde::Serialize))] @@ -716,30 +725,10 @@ impl MetaData { } fn parse_doc_targets(targets: Value) -> Vec { - let mut targets: Vec<_> = targets - .as_array() - .map(|array| { - array - .iter() - .filter_map(|item| item.as_str().map(|s| s.to_owned())) - .collect() - }) - .unwrap_or_default(); + let mut targets: Vec = serde_json::from_value(targets).unwrap_or_default(); targets.sort_unstable(); targets } - - fn target_name_url(&self) -> String { - if let Some(ref target_name) = self.target_name { - format!("{target_name}/index.html") - } else { - String::new() - } - } - - pub(crate) fn doc_targets(&self) -> Option<&[String]> { - self.doc_targets.as_deref() - } } #[derive(Template)] @@ -756,6 +745,7 @@ pub(crate) struct AxumErrorPage { impl_axum_webpage! { AxumErrorPage, status = |err| err.status, + } #[cfg(test)] @@ -767,6 +757,7 @@ mod test { }; use crate::{db::ReleaseId, docbuilder::DocCoverage}; use kuchikiki::traits::TendrilSink; + use pretty_assertions::assert_eq; use serde_json::json; use test_case::test_case; @@ -844,7 +835,7 @@ mod test { for (idx, value) in ["60%", "6", "10", "2", "1"].iter().enumerate() { let mut menu_items = foo_crate.select(".pure-menu-item b").unwrap(); assert!( - menu_items.any(|e| dbg!(e.text_contents()).contains(value)), + menu_items.any(|e| e.text_contents().contains(value)), "({idx}, {value:?})" ); } @@ -1273,7 +1264,7 @@ mod test { target_name: Some("foo".to_string()), rustdoc_status: Some(true), default_target: Some("x86_64-unknown-linux-gnu".to_string()), - doc_targets: Some(vec![]), + doc_targets: Some(vec!["x86_64-unknown-linux-gnu".to_string()]), yanked: Some(false), rustdoc_css_file: Some("rustdoc.css".to_string()), }, diff --git a/src/web/releases.rs b/src/web/releases.rs index 92d7c4c2f..80bb78232 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -1,4 +1,4 @@ -//! Releases web handlers +//! Releases web handlersrelease use crate::{ AsyncBuildQueue, Config, InstanceMetrics, RegistryApi, @@ -6,11 +6,12 @@ use crate::{ cdn, impl_axum_webpage, utils::report_error, web::{ - ReqVersion, axum_parse_uri_with_params, axum_redirect, encode_url_path, + ReqVersion, axum_redirect, encode_url_path, error::{AxumNope, AxumResult}, - extractors::{DbConnection, Path}, + extractors::{DbConnection, Path, rustdoc::RustdocParams}, match_version, page::templates::{RenderBrands, RenderRegular, RenderSolid, filters}, + rustdoc::OfficialCrateDescription, }, }; use anyhow::{Context as _, Result, anyhow}; @@ -23,12 +24,15 @@ use base64::{Engine, engine::general_purpose::STANDARD as b64}; use chrono::{DateTime, Utc}; use futures_util::stream::TryStreamExt; use itertools::Itertools; +use semver::Version; use serde::{Deserialize, Serialize}; use sqlx::Row; -use std::collections::{BTreeMap, HashMap, HashSet}; -use std::str; -use std::sync::Arc; -use tracing::warn; +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + str, + sync::Arc, +}; +use tracing::{trace, warn}; use url::form_urlencoded; use super::cache::CachePolicy; @@ -43,14 +47,21 @@ const RELEASES_IN_FEED: i64 = 150; #[derive(Debug, Clone, PartialEq, Eq, Serialize)] pub struct Release { pub(crate) name: String, - pub(crate) version: String, + pub(crate) version: semver::Version, pub(crate) description: Option, pub(crate) target_name: Option, pub(crate) rustdoc_status: bool, pub(crate) build_time: Option>, pub(crate) stars: i32, pub(crate) has_unyanked_releases: Option, - pub(crate) href: Option<&'static str>, +} + +impl Release { + pub fn rustdoc_params(&self) -> RustdocParams { + RustdocParams::new(&self.name) + .with_req_version(self.version.clone()) + .with_maybe_target_name(self.target_name.clone()) + } } #[derive(Debug, Copy, Clone, PartialEq, Eq, Default)] @@ -106,29 +117,34 @@ pub(crate) async fn get_releases( } ); - Ok(sqlx::query(query.as_str()) + sqlx::query(query.as_str()) .bind(limit) .bind(offset) .bind(filter_failed) .fetch(conn) - .map_ok(|row| Release { - name: row.get(0), - version: row.get(1), - description: row.get(2), - target_name: row.get(3), - rustdoc_status: row.get::, _>(4).unwrap_or(false), - build_time: row.get(5), - stars: row.get::, _>(6).unwrap_or(0), - has_unyanked_releases: None, - href: None, + .err_into::() + .and_then(|row| async move { + let version: semver::Version = row.get::(1).parse()?; + + Ok(Release { + name: row.get(0), + version, + description: row.get(2), + target_name: row.get(3), + rustdoc_status: row.get::, _>(4).unwrap_or(false), + build_time: row.get(5), + stars: row.get::, _>(6).unwrap_or(0), + has_unyanked_releases: None, + }) }) .try_collect() - .await?) + .await } #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) enum ReleaseStatus { Available(Release), + External(&'static OfficialCrateDescription), /// Only contains the crate name. NotAvailable(String), } @@ -139,20 +155,6 @@ struct SearchResult { pub next_page: Option, } -fn rust_lib_release(name: &str, description: &str, href: &'static str) -> ReleaseStatus { - ReleaseStatus::Available(Release { - name: name.to_string(), - version: String::new(), - description: Some(description.to_string()), - build_time: None, - target_name: None, - rustdoc_status: false, - stars: 0, - has_unyanked_releases: None, - href: Some(href), - }) -} - /// Get the search results for a crate search query /// /// This delegates to the crates.io search API. @@ -206,21 +208,22 @@ async fn get_search_results( &names[..], ) .fetch(&mut *conn) - .map_ok(|row| { - ( + .err_into::() + .and_then(|row| async move { + let version: semver::Version = row.version.parse()?; + Ok(( row.name.clone(), Release { name: row.name, - version: row.version, + version, description: row.description, build_time: row.last_build_time, target_name: row.target_name, rustdoc_status: row.rustdoc_status.unwrap_or(false), stars: row.stars.unwrap_or(0), has_unyanked_releases: row.has_unyanked_releases, - href: None, }, - ) + )) }) .try_collect() .await?; @@ -229,13 +232,8 @@ async fn get_search_results( // extend with the release/build information from docs.rs // Crates that are not on docs.rs yet will not be returned. let mut results = Vec::new(); - if let Some(super::rustdoc::OfficialCrateDescription { - name, - href, - description, - }) = super::rustdoc::DOC_RUST_LANG_ORG_REDIRECTS.get(query) - { - results.push(rust_lib_release(name, description, href)) + if let Some(desc) = super::rustdoc::DOC_RUST_LANG_ORG_REDIRECTS.get(query) { + results.push(ReleaseStatus::External(desc)); } let names: Vec = @@ -501,13 +499,17 @@ async fn redirect_to_random_crate( if let Some(row) = row { metrics.im_feeling_lucky_searches.inc(); - Ok(axum_redirect(format!( - "/{}/{}/{}/", - row.name, - row.version, - row.target_name - .expect("we only look at releases with docs, so target_name will exist") - ))?) + let params = RustdocParams::new(&row.name) + .with_req_version(ReqVersion::Exact( + row.version + .parse() + .context("could not parse version releases table")?, + )) + .with_maybe_target_name(row.target_name.as_deref()); + + trace!(?row, ?params, "redirecting to random crate result"); + + Ok(axum_redirect(params.rustdoc_url())?) } else { report_error(&anyhow!("found no result in random crate search")); Err(AxumNope::NoResults) @@ -524,19 +526,19 @@ pub(crate) async fn search_handler( Extension(config): Extension>, Extension(registry): Extension>, Extension(metrics): Extension>, - Query(mut params): Query>, + Query(mut query_params): Query>, ) -> AxumResult { - let mut query = params + let mut query = query_params .get("query") .map(|q| q.to_string()) .unwrap_or_else(|| "".to_string()); - let mut sort_by = params + let mut sort_by = query_params .get("sort") .map(|q| q.to_string()) .unwrap_or_else(|| "relevance".to_string()); // check if I am feeling lucky button pressed and redirect user to crate page // if there is a match. Also check for paths to items within crates. - if params.remove("i-am-feeling-lucky").is_some() || query.contains("::") { + if query_params.remove("i-am-feeling-lucky").is_some() || query.contains("::") { // redirect to a random crate if query is empty if query.is_empty() { return Ok(redirect_to_random_crate(config, metrics, &mut conn) @@ -561,32 +563,29 @@ pub(crate) async fn search_handler( .await .map(|matched_release| matched_release.into_exactly_named()) { - params.remove("query"); - queries.extend(params); - - let uri = if matchver.rustdoc_status() { - axum_parse_uri_with_params( - &format!( - "/{}/{}/{}/", - matchver.name, - matchver.version(), - matchver - .target_name() - .expect("target name will exist when rustdoc_status is true"), - ), - queries, - )? + query_params.remove("query"); + queries.extend(query_params); + + let rustdoc_status = matchver.rustdoc_status(); + let params = RustdocParams::from_matched_release(&matchver); + + trace!( + krate, + ?params, + "redirecting I'm feeling lucky search to crate page" + ); + + let uri = if rustdoc_status { + params.rustdoc_url().append_query_pairs(queries) } else { - format!("/crate/{}/{}", matchver.name, matchver.version()) - .parse::() - .context("could not parse redirect URI")? + params.crate_details_url() }; return Ok(super::axum_redirect(uri)?.into_response()); } } - let search_result = if let Some(paginate) = params.get("paginate") { + let search_result = if let Some(paginate) = query_params.get("paginate") { let decoded = b64.decode(paginate.as_bytes()).map_err(|e| { warn!("error when decoding pagination base64 string \"{paginate}\": {e:?}"); AxumNope::NoResults @@ -720,7 +719,7 @@ struct BuildQueuePage { queue: Vec, rebuild_queue: Vec, active_cdn_deployments: Vec, - in_progress_builds: Vec<(String, String)>, + in_progress_builds: Vec<(String, Version)>, expand_rebuild_queue: bool, } @@ -749,7 +748,7 @@ pub(crate) async fn build_queue_handler( // reverse the list, so the oldest comes first active_cdn_deployments.reverse(); - let in_progress_builds: Vec<(String, String)> = sqlx::query!( + let in_progress_builds: Vec<(String, Version)> = sqlx::query!( r#"SELECT crates.name, releases.version @@ -763,7 +762,14 @@ pub(crate) async fn build_queue_handler( .fetch_all(&mut *conn) .await? .into_iter() - .map(|rec| (rec.name, rec.version)) + .map(|rec| { + ( + rec.name, + rec.version + .parse() + .expect("all versions in the db are valid"), + ) + }) .collect(); let mut rebuild_queue = Vec::new(); @@ -773,8 +779,11 @@ pub(crate) async fn build_queue_handler( .into_iter() .filter(|krate| { !in_progress_builds.iter().any(|(name, version)| { + // temporary, until I migrated the other version occurences to semver::Version + // We know that in the DB we only have semver + let krate_version: Version = krate.version.parse().unwrap(); // use `.any` instead of `.contains` to avoid cloning name& version for the match - *name == krate.name && *version == krate.version + *name == krate.name && *version == krate_version }) }) .collect_vec(); @@ -940,7 +949,7 @@ mod tests { web.assert_redirect( "/releases/search?query=some_random_crate&i-am-feeling-lucky=1", - "/crate/some_random_crate/1.0.0", + "/crate/some_random_crate/latest", ) .await?; Ok(()) @@ -964,7 +973,7 @@ mod tests { web.assert_redirect( "/releases/search?query=some_random_crate&i-am-feeling-lucky=1", - "/some_random_crate/1.0.0/some_random_crate/", + "/some_random_crate/latest/some_random_crate/", ) .await?; Ok(()) @@ -1019,12 +1028,12 @@ mod tests { web.assert_redirect( "/releases/search?query=some_random_crate::somepath", - "/some_random_crate/1.0.0/some_random_crate/?search=somepath", + "/some_random_crate/latest/some_random_crate/?search=somepath", ) .await?; web.assert_redirect( "/releases/search?query=some_random_crate::some::path", - "/some_random_crate/1.0.0/some_random_crate/?search=some%3A%3Apath", + "/some_random_crate/latest/some_random_crate/?search=some%3A%3Apath", ) .await?; Ok(()) @@ -1043,7 +1052,7 @@ mod tests { web.assert_redirect( "/releases/search?query=some_random_crate::somepath&go_to_first=true", - "/some_random_crate/1.0.0/some_random_crate/?go_to_first=true&search=somepath", + "/some_random_crate/latest/some_random_crate/?go_to_first=true&search=somepath", ) .await?; Ok(()) @@ -2087,7 +2096,11 @@ mod tests { web.get(&url).await? }; let status = resp.status(); - assert!(status.is_success(), "failed to GET {url}: {status}"); + assert!( + status.is_success(), + "failed to GET {url}: {status}, {:?}", + resp.headers().get("Location"), + ); } Ok(()) diff --git a/src/web/routes.rs b/src/web/routes.rs index 8e136a259..a69bbb126 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -1,4 +1,4 @@ -use super::{ +use crate::web::{ cache::CachePolicy, error::AxumNope, metrics::request_recorder, statics::build_static_router, }; use askama::Template; @@ -258,7 +258,7 @@ pub(super) fn build_axum_routes() -> AxumRouter { get_internal(super::source::source_browser_handler), ) .route( - "/crate/{name}/{version}/menus/platforms/{target}", + "/crate/{name}/{version}/menus/platforms/{target}/", get_internal(super::crate_details::get_all_platforms), ) .route( @@ -266,11 +266,11 @@ pub(super) fn build_axum_routes() -> AxumRouter { get_internal(super::crate_details::get_all_platforms), ) .route( - "/crate/{name}/{version}/menus/platforms", + "/crate/{name}/{version}/menus/platforms/", get_internal(super::crate_details::get_all_platforms_root), ) .route( - "/crate/{name}/{version}/menus/releases/{target}/{*path}", + "/crate/{name}/{version}/menus/releases/{*path}", get_internal(super::crate_details::get_all_releases), ) .route( diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 3b38bfa68..528fc74f2 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -2,20 +2,22 @@ use crate::{ AsyncStorage, Config, InstanceMetrics, RUSTDOC_STATIC_STORAGE_PREFIX, - db::Pool, storage::{ CompressionAlgorithm, RustdocJsonFormatVersion, StreamingBlob, compression::compression_from_file_extension, rustdoc_archive_path, rustdoc_json_path, }, utils, web::{ - MetaData, ReqVersion, axum_cached_redirect, axum_parse_uri_with_params, + MetaData, ReqVersion, axum_cached_redirect, cache::CachePolicy, crate_details::CrateDetails, csp::Csp, - encode_url_path, - error::{AxumNope, AxumResult, EscapedURI}, - extractors::{DbConnection, Path}, + error::{AxumNope, AxumResult}, + escaped_uri::EscapedURI, + extractors::{ + DbConnection, Path, PathFileExtension, + rustdoc::{PageKind, RustdocParams}, + }, file::StreamingFile, match_version, page::{ @@ -28,24 +30,22 @@ use anyhow::{Context as _, anyhow}; use askama::Template; use axum::{ body::Body, - extract::{Extension, Query}, - http::{StatusCode, Uri}, + extract::{Extension, Query, RawQuery}, + http::StatusCode, response::{IntoResponse, Response as AxumResponse}, }; -use http::{HeaderValue, header}; -use semver::Version; +use http::{HeaderValue, Uri, header}; use serde::Deserialize; use std::{ - collections::{BTreeMap, HashMap}, + collections::HashMap, sync::{Arc, LazyLock}, }; -use tracing::{Instrument, debug, error, info_span, instrument, trace}; - -use super::extractors::PathFileExtension; +use tracing::{Instrument, error, info_span, instrument, trace}; +#[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct OfficialCrateDescription { pub(crate) name: &'static str, - pub(crate) href: &'static str, + pub(crate) href: Uri, pub(crate) description: &'static str, } @@ -56,7 +56,7 @@ pub(crate) static DOC_RUST_LANG_ORG_REDIRECTS: LazyLock, - target: Option, -} - /// try to serve a toolchain specific asset from the legacy location. /// /// Newer rustdoc builds use a specific subfolder on the bucket, @@ -206,44 +211,41 @@ async fn try_serve_legacy_toolchain_asset( /// or crate details page based on whether the given crate version was successfully built. #[instrument(skip(storage, conn))] pub(crate) async fn rustdoc_redirector_handler( - Path(params): Path, + params: RustdocParams, Extension(storage): Extension>, mut conn: DbConnection, - Query(query_pairs): Query>, - uri: Uri, + RawQuery(original_query): RawQuery, ) -> AxumResult { - #[instrument] + let params = params.with_page_kind(PageKind::Rustdoc); + fn redirect_to_doc( - query_pairs: &HashMap, - url_str: String, + url: EscapedURI, cache_policy: CachePolicy, path_in_crate: Option<&str>, - ) -> AxumResult { - let mut queries: BTreeMap = BTreeMap::new(); - if let Some(path) = path_in_crate { - queries.insert("search".into(), path.into()); - } - queries.extend(query_pairs.to_owned()); + ) -> AxumResult { + let url = if let Some(path) = path_in_crate { + url.append_query_pair("search", path) + } else { + url + }; + trace!("redirect to doc"); - Ok(axum_cached_redirect( - axum_parse_uri_with_params(&url_str, queries)?, - cache_policy, - )?) + Ok(axum_cached_redirect(url, cache_policy)?) } // global static assets for older builds are served from the root, which ends up // in this handler as `params.name`. - if let Some((_, extension)) = params.name.rsplit_once('.') + if let Some((_, extension)) = params.name().rsplit_once('.') && ["css", "js", "png", "svg", "woff", "woff2"] .binary_search(&extension) .is_ok() { - return try_serve_legacy_toolchain_asset(storage, params.name) + return try_serve_legacy_toolchain_asset(storage, params.name()) .instrument(info_span!("serve static asset")) .await; } - if let Some((_, extension)) = uri.path().rsplit_once('.') + if let Some(extension) = params.file_extension() && extension == "ico" { // redirect all ico requests @@ -252,41 +254,41 @@ pub(crate) async fn rustdoc_redirector_handler( return Ok(axum_cached_redirect( "/-/static/favicon.ico", CachePolicy::ForeverInCdnAndBrowser, - )? - .into_response()); + )?); } - let (crate_name, path_in_crate) = match params.name.split_once("::") { - Some((krate, path)) => (krate.to_string(), Some(path.to_string())), - None => (params.name.to_string(), None), + let (crate_name, path_in_crate) = match params.name().split_once("::") { + Some((krate, path)) => (krate.to_owned(), Some(path.to_owned())), + None => (params.name().to_owned(), None), }; - if let Some(description) = DOC_RUST_LANG_ORG_REDIRECTS.get(crate_name.as_str()) { - return Ok(redirect_to_doc( - &query_pairs, - description.href.to_string(), + if let Some(description) = DOC_RUST_LANG_ORG_REDIRECTS.get(&*crate_name) { + let target_uri = + EscapedURI::from_uri(description.href.clone()).append_raw_query(original_query); + return redirect_to_doc( + target_uri, CachePolicy::ForeverInCdnAndStaleInBrowser, path_in_crate.as_deref(), - )? - .into_response()); + ); } // it doesn't matter if the version that was given was exact or not, since we're redirecting // anyway - let matched_release = match_version( - &mut conn, - &crate_name, - ¶ms.version.clone().unwrap_or_default(), - ) - .await? - .into_exactly_named(); - trace!(?matched_release, "matched version"); + let matched_release = match_version(&mut conn, &crate_name, ¶ms.req_version().clone()) + .await? + .into_exactly_named() + .into_canonical_req_version(); + let params = params.apply_matched_release(&matched_release); + trace!( + ?matched_release, + ?params, + "parsed params with matched version" + ); let crate_name = matched_release.name.clone(); // we might get requests to crate-specific JS/CSS files here. - if let Some(ref target) = params.target - && (target.ends_with(".js") || target.ends_with(".css")) - { + if params.inner_path().ends_with(".js") || params.inner_path().ends_with(".css") { + let inner_path = params.inner_path(); // this URL is actually from a crate-internal path, serve it there instead return async { let krate = CrateDetails::from_matched_release(&mut conn, matched_release).await?; @@ -296,7 +298,7 @@ pub(crate) async fn rustdoc_redirector_handler( &crate_name, &krate.version.to_string(), krate.latest_build_id, - target, + inner_path, krate.archive_storage, ) .await @@ -306,15 +308,15 @@ pub(crate) async fn rustdoc_redirector_handler( if !matches!(err.downcast_ref(), Some(AxumNope::ResourceNotFound)) && !matches!(err.downcast_ref(), Some(crate::storage::PathNotFoundError)) { - debug!(?target, ?err, "got error serving file"); + error!(inner_path, ?err, "got error serving file"); } // FIXME: we sometimes still get requests for toolchain // specific static assets under the crate/version/ path. // This is fixed in rustdoc, but pending a rebuild for // docs that were affected by this bug. // https://github.com/rust-lang/docs.rs/issues/1979 - if target.starts_with("search-") || target.starts_with("settings-") { - try_serve_legacy_toolchain_asset(storage, target).await + if inner_path.starts_with("search-") || inner_path.starts_with("settings-") { + try_serve_legacy_toolchain_asset(storage, inner_path).await } else { Err(err.into()) } @@ -325,49 +327,20 @@ pub(crate) async fn rustdoc_redirector_handler( .await; } - let matched_release = matched_release.into_canonical_req_version(); - if matched_release.rustdoc_status() { - let target_name = matched_release - .target_name() - .expect("when rustdoc_status is true, target name exists"); - let mut target = params.target.as_deref(); - if target == Some("index.html") || target == Some(target_name) { - target = None; - } - - let url_str = if let Some(target) = target { - format!( - "/{crate_name}/{}/{target}/{}/", - matched_release.req_version, target_name - ) - } else { - format!( - "/{crate_name}/{}/{}/", - matched_release.req_version, target_name - ) - }; - - let cache = if matched_release.is_latest_url() { - CachePolicy::ForeverInCdn - } else { - CachePolicy::ForeverInCdnAndStaleInBrowser - }; - Ok(redirect_to_doc( - &query_pairs, - encode_url_path(&url_str), - cache, + params.rustdoc_url().append_raw_query(original_query), + if matched_release.is_latest_url() { + CachePolicy::ForeverInCdn + } else { + CachePolicy::ForeverInCdnAndStaleInBrowser + }, path_in_crate.as_deref(), )? .into_response()) } else { Ok(axum_cached_redirect( - EscapedURI::new( - &format!("/crate/{crate_name}/{}", matched_release.req_version), - uri.query(), - ) - .as_str(), + params.crate_details_url().append_raw_query(original_query), CachePolicy::ForeverInCdn, )? .into_response()) @@ -376,11 +349,9 @@ pub(crate) async fn rustdoc_redirector_handler( #[derive(Template)] #[template(path = "rustdoc/topbar.html")] -#[derive(Debug, Clone)] pub struct RustdocPage { - pub latest_path: String, - pub permalink_path: String, - pub inner_path: String, + pub latest_path: EscapedURI, + pub permalink_path: EscapedURI, // true if we are displaying the latest version of the crate, regardless // of whether the URL specifies a version number or the string "latest." pub is_latest_version: bool, @@ -390,6 +361,7 @@ pub struct RustdocPage { pub krate: CrateDetails, pub metadata: MetaData, pub current_target: String, + params: RustdocParams, } impl RustdocPage { @@ -426,21 +398,10 @@ impl RustdocPage { } pub(crate) fn use_direct_platform_links(&self) -> bool { - !self.latest_path.contains("/target-redirect/") + !&self.latest_path.path().contains("/target-redirect/") } } -#[derive(Clone, Deserialize, Debug)] -pub(crate) struct RustdocHtmlParams { - pub(crate) name: String, - pub(crate) version: ReqVersion, - // both target and path are only used for matching the route. - // The actual path is read from the request `Uri` because - // we have some static filenames directly in the routes. - pub(crate) target: Option, - pub(crate) path: Option, -} - /// Serves documentation generated by rustdoc. /// /// This includes all HTML files for an individual crate, as well as the `search-index.js`, which is @@ -448,51 +409,22 @@ pub(crate) struct RustdocHtmlParams { #[allow(clippy::too_many_arguments)] #[instrument(skip_all)] pub(crate) async fn rustdoc_html_server_handler( - Path(params): Path, + params: RustdocParams, Extension(metrics): Extension>, Extension(templates): Extension>, - Extension(pool): Extension, Extension(storage): Extension>, Extension(config): Extension>, Extension(csp): Extension>, - uri: Uri, + RawQuery(original_query): RawQuery, + mut conn: DbConnection, ) -> AxumResult { - // since we directly use the Uri-path and not the extracted params from the router, - // we have to percent-decode the string here. - let original_path = percent_encoding::percent_decode(uri.path().as_bytes()) - .decode_utf8() - .map_err(|err| AxumNope::BadRequest(err.into()))?; - - let mut req_path: Vec<&str> = original_path.split('/').collect(); - // Remove the empty start, the name and the version from the path - req_path.drain(..3).for_each(drop); + let params = params.with_page_kind(PageKind::Rustdoc); + trace!(?params, ?original_query, "original params"); // Pages generated by Rustdoc are not ready to be served with a CSP yet. csp.suppress(true); - // Convenience function to allow for easy redirection - #[instrument] - fn redirect( - name: &str, - vers: &Version, - path: &[&str], - cache_policy: CachePolicy, - uri: &Uri, - ) -> AxumResult { - trace!("redirect"); - Ok(axum_cached_redirect( - EscapedURI::new( - &format!("/{}/{}/{}", name, vers, path.join("/")), - uri.query(), - ) - .as_str(), - cache_policy, - )? - .into_response()) - } - trace!("match version"); - let mut conn = pool.get_async().await?; // Check the database for releases with the requested version while doing the following: // * If no matching releases are found, return a 404 with the underlying error @@ -500,71 +432,67 @@ pub(crate) async fn rustdoc_html_server_handler( // * If both the name and the version are an exact match, return the version of the crate. // * If there is an exact match, but the requested crate name was corrected (dashes vs. underscores), redirect to the corrected name. // * If there is a semver (but not exact) match, redirect to the exact version. - let matched_release = match_version(&mut conn, ¶ms.name, ¶ms.version) + let matched_release = match_version(&mut conn, params.name(), params.req_version()) .await? .into_exactly_named_or_else(|corrected_name, req_version| { AxumNope::Redirect( - EscapedURI::new( - &format!("/{}/{}/{}", corrected_name, req_version, req_path.join("/")), - uri.query(), - ), + params + .clone() + .with_name(corrected_name) + .with_req_version(req_version) + .rustdoc_url() + .append_raw_query(original_query.as_deref()), CachePolicy::NoCaching, ) })? .into_canonical_req_version_or_else(|version| { AxumNope::Redirect( - EscapedURI::new( - &format!("/{}/{}/{}", ¶ms.name, version, req_path.join("/")), - None, - ), + params.clone().with_req_version(version).rustdoc_url(), CachePolicy::ForeverInCdn, ) })?; + let params = params.apply_matched_release(&matched_release); if !matched_release.rustdoc_status() { - return Ok(axum_cached_redirect( - format!("/crate/{}/{}", params.name, params.version), - CachePolicy::ForeverInCdn, - )? - .into_response()); + return Ok( + axum_cached_redirect(params.crate_details_url(), CachePolicy::ForeverInCdn)? + .into_response(), + ); } let krate = CrateDetails::from_matched_release(&mut conn, matched_release).await?; - // if visiting the full path to the default target, remove the target from the path - // expects a req_path that looks like `[/:target]/.*` - if req_path.first().copied() - == Some( - krate - .metadata - .default_target - .as_ref() - .expect("when we have docs, this is always filled"), - ) - { - return redirect( - ¶ms.name, - &krate.version, - &req_path[1..], + trace!( + ?params, + doc_targets=?krate.metadata.doc_targets, + default_target=?krate.metadata.default_target, + + "parsed params" + ); + + if params.target_is_default() { + // if visiting the full path to the default target, remove the target from the path + // expects a req_path that looks like `[/:target]/.*` + return Ok(axum_cached_redirect( + params + .rustdoc_url() + .append_raw_query(original_query.as_deref()), CachePolicy::ForeverInCdn, - &uri, - ); + )?); } - // Create the path to access the file from - let mut storage_path = req_path.join("/"); - if storage_path.ends_with('/') { - req_path.pop(); // get rid of empty string - storage_path.push_str("index.html"); - req_path.push("index.html"); - } + let storage_path = params.storage_path(); - trace!(?storage_path, ?req_path, "try fetching from storage"); + trace!( + storage_path, + inner_path = params.inner_path(), + "try fetching from storage" + ); - // Attempt to load the file from the database + // Attempt to load the given file from storage. let blob = match storage .stream_rustdoc_file( - ¶ms.name, + params.name(), &krate.version.to_string(), krate.latest_build_id, &storage_path, @@ -577,50 +505,48 @@ pub(crate) async fn rustdoc_html_server_handler( if !matches!(err.downcast_ref(), Some(AxumNope::ResourceNotFound)) && !matches!(err.downcast_ref(), Some(crate::storage::PathNotFoundError)) { - debug!("got error serving {}: {}", storage_path, err); + error!("got error serving {}: {}", storage_path, err); } - { - // If it fails, we try again with /index.html at the end - let mut storage_path = storage_path.clone(); - storage_path.push_str("/index.html"); + if !params.path_is_folder() && params.file_extension().is_none() { + // for 404s we try again attaching `/index.html` if: + // * the path doesn't already ends with `/`, because then we already tried this path + // * the path doesn't contain a file extension. in this case, we won't ever find + // a file with another `/index.html` attached. - let mut req_path = req_path.clone(); - req_path.push("index.html"); + let mut new_path = params.inner_path().trim_end_matches('/').to_owned(); + new_path.push_str("/index.html"); + let params = params.clone().with_inner_path(new_path); if storage .rustdoc_file_exists( - ¶ms.name, + params.name(), &krate.version.to_string(), krate.latest_build_id, - &storage_path, + ¶ms.storage_path(), krate.archive_storage, ) .await? { - return redirect( - ¶ms.name, - &krate.version, - &req_path, + return Ok(axum_cached_redirect( + params + .rustdoc_url() + .append_raw_query(original_query.as_deref()), CachePolicy::ForeverInCdn, - &uri, - ); + )?); } } - if req_path.first().is_some_and(|p| p.contains('-')) { + if params.doc_target().is_some() { // This is a target, not a module; it may not have been built. // Redirect to the default target and show a search page instead of a hard 404. + // NOTE: I'm not sure about the use-case here. + // we are forwarding 404s to a target-redirect ( = likely a search), + // but only if the first element after the version is a target? return Ok(axum_cached_redirect( - encode_url_path(&format!( - "/crate/{}/{}/target-redirect/{}", - params.name, - params.version, - req_path.join("/") - )), + params.target_redirect_url(), CachePolicy::ForeverInCdn, - )? - .into_response()); + )?); } if storage_path @@ -632,9 +558,9 @@ pub(crate) async fn rustdoc_html_server_handler( ) { error!( - krate = params.name, + krate = params.name(), version = krate.version.to_string(), - original_path = original_path.as_ref(), + original_path = params.inner_path(), storage_path, "Couldn't find crate documentation root on storage. Something is wrong with the build." @@ -662,221 +588,93 @@ pub(crate) async fn rustdoc_html_server_handler( let is_latest_version = latest_version == krate.version; let is_prerelease = !(krate.version.pre.is_empty()); - // The path within this crate version's rustdoc output - let (target, inner_path) = { - let mut inner_path = req_path.clone(); - - let target = if inner_path.len() > 1 - && krate - .metadata - .doc_targets - .as_ref() - .expect("with rustdoc_status=true we always have doc_targets") - .iter() - .any(|s| s == inner_path[0]) - { - inner_path.remove(0) - } else { - "" - }; - - (target, inner_path.join("/")) - }; - // Find the path of the latest version for the `Go to latest` and `Permalink` links - let mut current_target = String::new(); - let target_redirect = if latest_release.build_status.is_success() { - current_target = if target.is_empty() { - krate - .metadata - .default_target - .as_ref() - .expect("with docs we always have a default_target") - .clone() - } else { - target.to_owned() - }; - format!("/target-redirect/{current_target}/{inner_path}") - } else { - "".to_string() - }; - - let query_string = if let Some(query) = uri.query() { - format!("?{query}") + let permalink_path = params + .clone() + .with_req_version(&latest_version) + .rustdoc_url() + .append_raw_query(original_query.as_deref()); + + let latest_path = if latest_release.build_status.is_success() { + params + .clone() + .with_req_version(&ReqVersion::Latest) + .target_redirect_url() } else { - "".to_string() - }; - - let permalink_path = format!( - "/{}/{}/{}{}", - params.name, latest_version, inner_path, query_string - ); + params + .clone() + .with_req_version(&ReqVersion::Latest) + .crate_details_url() + } + .append_raw_query(original_query.as_deref()); - let latest_path = format!( - "/crate/{}/latest{}{}", - params.name, target_redirect, query_string - ); + let current_target = params.doc_target_or_default().unwrap_or_default(); metrics .recently_accessed_releases - .record(krate.crate_id, krate.release_id, target); + .record(krate.crate_id, krate.release_id, current_target); // Build the page of documentation, let page = Arc::new(RustdocPage { latest_path, permalink_path, - inner_path, is_latest_version, - is_latest_url: params.version.is_latest(), + is_latest_url: params.req_version().is_latest(), is_prerelease, metadata: krate.metadata.clone(), + current_target: current_target.to_owned(), krate, - current_target, + params, }); page.into_response(templates, metrics, blob, config.max_parse_memory) .await } -/// Checks whether the given path exists. -/// The crate's `target_name` is used to confirm whether a platform triple is part of the path. -/// -/// Note that path is overloaded in this context to mean both the path of a URL -/// and the file path of a static file in the DB. -/// -/// `file_path` is assumed to have the following format: -/// `[/platform]/module/[kind.name.html|index.html]` -/// -/// Returns a path that can be appended to `/crate/version/` to create a complete URL. -fn path_for_version( - file_path: &[&str], - crate_details: &CrateDetails, -) -> (String, HashMap) { - // check if req_path[3] is the platform choice or the name of the crate - // Note we don't require the platform to have a trailing slash. - let platform = if crate_details - .metadata - .doc_targets - .as_ref() - .expect("this method is only used when we have docs, so this field contains data") - .iter() - .any(|s| s == file_path[0]) - && !file_path.is_empty() - { - file_path[0] - } else { - "" - }; - let is_source_view = if platform.is_empty() { - // /{name}/{version}/src/{crate}/index.html - file_path.first().copied() == Some("src") - } else { - // /{name}/{version}/{platform}/src/{crate}/index.html - file_path.get(1).copied() == Some("src") - }; - // this page doesn't exist in the latest version - let last_component = *file_path.last().unwrap(); - let search_item = if last_component == "index.html" { - // this is a module - file_path.get(file_path.len() - 2).copied() - // no trailing slash; no one should be redirected here but we handle it gracefully anyway - } else if last_component == platform { - // nothing to search for - None - } else if !is_source_view { - // this is an item - last_component.split('.').nth(1) - } else { - // if this is a Rust source file, try searching for the module; - // else, don't try searching at all, we don't know how to find it - last_component.strip_suffix(".rs.html") - }; - let target_name = &crate_details - .target_name - .as_ref() - .expect("this method is only used when we have docs, so this field contains data"); - let path = if platform.is_empty() { - format!("{target_name}/") - } else { - format!("{platform}/{target_name}/") - }; - - let query_params = search_item - .map(|i| HashMap::from_iter([("search".into(), i.into())])) - .unwrap_or_default(); - - (path, query_params) -} - #[instrument(skip_all)] pub(crate) async fn target_redirect_handler( - Path((name, req_version, req_path)): Path<(String, ReqVersion, String)>, + params: RustdocParams, mut conn: DbConnection, Extension(storage): Extension>, ) -> AxumResult { - let matched_release = match_version(&mut conn, &name, &req_version) + let params = params.with_page_kind(PageKind::Rustdoc); + + trace!(params=?params, "target redirect endpoint with params"); + + let matched_release = match_version(&mut conn, params.name(), params.req_version()) .await? .into_canonical_req_version_or_else(|_| AxumNope::VersionNotFound)?; + let params = params.apply_matched_release(&matched_release); let crate_details = CrateDetails::from_matched_release(&mut conn, matched_release).await?; + trace!(?params, "parsed params"); - // this handler should only be used when we have docs. - // So we can assume here that we always have a default_target. - // the only case where this would be empty is when the build failed before calling rustdoc. - let default_target = crate_details - .metadata - .default_target - .as_ref() - .ok_or_else(|| { - error!("target_redirect_handler was called with release with missing default_target"); - AxumNope::VersionNotFound - })?; - - // We're trying to find the storage location - // for the requested path in the target-redirect. - // *path always contains the target, - // here we are dropping it when it's the - // default target, - // and add `/index.html` if we request - // a folder. - let storage_location_for_path = { - let mut pieces: Vec<_> = req_path.split('/').map(str::to_owned).collect(); - - if pieces.first() == Some(default_target) { - pieces.remove(0); - } - - if let Some(last) = pieces.last_mut() - && last.is_empty() - { - *last = "index.html".to_string(); - } - - pieces.join("/") - }; - - let (redirect_path, query_args) = if storage + let storage_path = params.storage_path(); + trace!(storage_path, "checking if path exists in other version"); + let redirect_uri = if storage .rustdoc_file_exists( - &name, + params.name(), &crate_details.version.to_string(), crate_details.latest_build_id, - &storage_location_for_path, + &storage_path, crate_details.archive_storage, ) .await? { // Simple case: page exists in the other target & version, so just change these - (storage_location_for_path, HashMap::new()) + trace!(storage_path, "path exist, redirecting"); + params.rustdoc_url() } else { - let pieces: Vec<_> = storage_location_for_path.split('/').collect(); - path_for_version(&pieces, &crate_details) + trace!( + storage_path, + "path doesn't exist, generating redirect to search" + ); + params.generate_fallback_url() }; + trace!(?redirect_uri, "generate URL"); Ok(axum_cached_redirect( - axum_parse_uri_with_params( - &encode_url_path(&format!("/{name}/{req_version}/{redirect_path}")), - query_args, - )?, - if req_version.is_latest() { + redirect_uri, + if params.req_version().is_latest() { CachePolicy::ForeverInCdn } else { CachePolicy::ForeverInCdnAndStaleInBrowser @@ -1352,11 +1150,16 @@ mod test { .get("/dummy/latest/dummy/") .await? .error_for_status()?; + resp.assert_cache_control(CachePolicy::ForeverInCdn, env.config()); let body = resp.text().await?; - assert!(body.contains(" Result<(), anyhow::Error> { let mut links: BTreeMap<_, _> = links.iter().copied().collect(); - for (platform, link, rel) in get_platform_links(path, web).await? { + for (platform, link, rel) in dbg!(get_platform_links(path, web).await?) { assert_eq!(rel, "nofollow"); web.assert_redirect(&link, links.remove(platform.as_str()).unwrap()) .await?; @@ -1897,14 +1691,14 @@ mod test { assert_platform_links( &web, "/dummy/0.1.0/dummy/", - &[("x86_64-unknown-linux-gnu", "/dummy/0.1.0/dummy/index.html")], + &[("x86_64-unknown-linux-gnu", "/dummy/0.1.0/dummy/")], ) .await?; assert_platform_links( &web, - "/dummy/0.1.0/dummy/index.html", - &[("x86_64-unknown-linux-gnu", "/dummy/0.1.0/dummy/index.html")], + "/dummy/0.1.0/dummy/", + &[("x86_64-unknown-linux-gnu", "/dummy/0.1.0/dummy/")], ) .await?; @@ -1921,14 +1715,14 @@ mod test { assert_platform_links( &web, "/dummy/latest/dummy/", - &[("x86_64-unknown-linux-gnu", "/dummy/latest/dummy/index.html")], + &[("x86_64-unknown-linux-gnu", "/dummy/latest/dummy/")], ) .await?; assert_platform_links( &web, "/dummy/latest/dummy/index.html", - &[("x86_64-unknown-linux-gnu", "/dummy/latest/dummy/index.html")], + &[("x86_64-unknown-linux-gnu", "/dummy/latest/dummy/")], ) .await?; @@ -1957,14 +1751,14 @@ mod test { assert_platform_links( &web, "/dummy/0.2.0/dummy/", - &[("x86_64-pc-windows-msvc", "/dummy/0.2.0/dummy/index.html")], + &[("x86_64-pc-windows-msvc", "/dummy/0.2.0/dummy/")], ) .await?; assert_platform_links( &web, "/dummy/0.2.0/dummy/index.html", - &[("x86_64-pc-windows-msvc", "/dummy/0.2.0/dummy/index.html")], + &[("x86_64-pc-windows-msvc", "/dummy/0.2.0/dummy/")], ) .await?; @@ -1981,14 +1775,14 @@ mod test { assert_platform_links( &web, "/dummy/latest/dummy/", - &[("x86_64-pc-windows-msvc", "/dummy/latest/dummy/index.html")], + &[("x86_64-pc-windows-msvc", "/dummy/latest/dummy/")], ) .await?; assert_platform_links( &web, "/dummy/latest/dummy/index.html", - &[("x86_64-pc-windows-msvc", "/dummy/latest/dummy/index.html")], + &[("x86_64-pc-windows-msvc", "/dummy/latest/dummy/")], ) .await?; @@ -2017,14 +1811,14 @@ mod test { assert_platform_links( &web, "/dummy/0.3.0/dummy/", - &[("x86_64-unknown-linux-gnu", "/dummy/0.3.0/dummy/index.html")], + &[("x86_64-unknown-linux-gnu", "/dummy/0.3.0/dummy/")], ) .await?; assert_platform_links( &web, "/dummy/0.3.0/dummy/index.html", - &[("x86_64-unknown-linux-gnu", "/dummy/0.3.0/dummy/index.html")], + &[("x86_64-unknown-linux-gnu", "/dummy/0.3.0/dummy/")], ) .await?; @@ -2041,14 +1835,14 @@ mod test { assert_platform_links( &web, "/dummy/latest/dummy/", - &[("x86_64-unknown-linux-gnu", "/dummy/latest/dummy/index.html")], + &[("x86_64-unknown-linux-gnu", "/dummy/latest/dummy/")], ) .await?; assert_platform_links( &web, "/dummy/latest/dummy/index.html", - &[("x86_64-unknown-linux-gnu", "/dummy/latest/dummy/index.html")], + &[("x86_64-unknown-linux-gnu", "/dummy/latest/dummy/")], ) .await?; @@ -2113,35 +1907,35 @@ mod test { &[ ( "x86_64-pc-windows-msvc", - "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/index.html", + "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/", ), - ("x86_64-unknown-linux-gnu", "/dummy/0.4.0/dummy/index.html"), + ("x86_64-unknown-linux-gnu", "/dummy/0.4.0/dummy/"), ], ) .await?; assert_platform_links( &web, - "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/index.html", + "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/", &[ ( "x86_64-pc-windows-msvc", - "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/index.html", + "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/", ), - ("x86_64-unknown-linux-gnu", "/dummy/0.4.0/dummy/index.html"), + ("x86_64-unknown-linux-gnu", "/dummy/0.4.0/dummy/"), ], ) .await?; assert_platform_links( &web, - "/dummy/0.4.0/dummy/index.html", + "/dummy/0.4.0/dummy/", &[ ( "x86_64-pc-windows-msvc", - "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/index.html", + "/dummy/0.4.0/x86_64-pc-windows-msvc/dummy/", ), - ("x86_64-unknown-linux-gnu", "/dummy/0.4.0/dummy/index.html"), + ("x86_64-unknown-linux-gnu", "/dummy/0.4.0/dummy/"), ], ) .await?; @@ -2550,10 +2344,7 @@ mod test { env.web_app() .await - .assert_redirect( - "/tokio/0.2.21/tokio/time", - "/tokio/0.2.21/tokio/time/index.html", - ) + .assert_redirect("/tokio/0.2.21/tokio/time", "/tokio/0.2.21/tokio/time/") .await?; Ok(()) @@ -2604,12 +2395,12 @@ mod test { .await?; assert_eq!( latest_version_redirect( - "/tungstenite/0.10.0/tungstenite/?search=String%20-%3E%20Message", + "/tungstenite/0.10.0/tungstenite/?search=String+-%3E+Message", &env.web_app().await, env.config() ) .await?, - "/crate/tungstenite/latest/target-redirect/x86_64-unknown-linux-gnu/tungstenite/index.html?search=String%20-%3E%20Message", + "/crate/tungstenite/latest/target-redirect/tungstenite/?search=String+-%3E+Message", ); Ok(()) }); @@ -2633,7 +2424,7 @@ mod test { .version("0.13.2") .create() .await?; - let target_redirect = "/crate/pyo3/latest/target-redirect/x86_64-unknown-linux-gnu/src/pyo3/objects/exc.rs.html"; + let target_redirect = "/crate/pyo3/latest/target-redirect/src/pyo3/objects/exc.rs.html"; let web = env.web_app().await; assert_eq!( latest_version_redirect( @@ -2670,6 +2461,8 @@ mod test { .version("0.3.0") .archive_storage(archive_storage) .rustdoc_file("hexponent/index.html") + .add_target("x86_64-unknown-linux-gnu") + .default_target("x86_64-pc-windows-msvc") .create() .await?; env.fake_release() @@ -2679,6 +2472,8 @@ mod test { .archive_storage(archive_storage) .rustdoc_file("hexponent/index.html") .rustdoc_file("hexponent/something.html") + .add_target("x86_64-unknown-linux-gnu") + .default_target("x86_64-pc-windows-msvc") .create() .await?; @@ -2693,8 +2488,10 @@ mod test { assert_eq!( parse_release_links_from_menu(&releases_response.text().await?), vec![ - "/crate/hexponent/0.3.1/target-redirect/x86_64-unknown-linux-gnu/hexponent/index.html".to_owned(), - "/crate/hexponent/0.3.0/target-redirect/x86_64-unknown-linux-gnu/hexponent/index.html".to_owned(), + "/crate/hexponent/0.3.1/target-redirect/x86_64-unknown-linux-gnu/hexponent/" + .to_owned(), + "/crate/hexponent/0.3.0/target-redirect/x86_64-unknown-linux-gnu/hexponent/" + .to_owned(), ] ); @@ -2865,24 +2662,26 @@ mod test { .version("0.1.0") .rustdoc_file("testing/index.html") .add_dependency( - Dependency::new("optional-dep".to_string(), "1.2.3".to_string()) + Dependency::new("optional-dep".to_string(), "1.2.3".parse().unwrap()) .set_optional(true), ) .create() .await?; - let dom = kuchikiki::parse_html().one( + let dom = kuchikiki::parse_html().one(dbg!( env.web_app() .await .get("/testing/0.1.0/testing/") .await? .text() - .await?, - ); + .await? + )); assert!( - dom.select(r#"a[href="/optional-dep/1.2.3"] > i[class="dependencies normal"] + i"#) - .expect("should have optional dependency") - .any(|el| { el.text_contents().contains("optional") }) + dom.select( + r#"a[href="/optional-dep/^1.2.3/"] > i[class="dependencies normal"] + i"# + ) + .expect("should have optional dependency") + .any(|el| { el.text_contents().contains("optional") }) ); let dom = kuchikiki::parse_html().one( env.web_app() @@ -2894,7 +2693,7 @@ mod test { ); assert!( dom.select( - r#"a[href="/crate/optional-dep/1.2.3"] > i[class="dependencies normal"] + i"# + r#"a[href="/crate/optional-dep/^1.2.3"] > i[class="dependencies normal"] + i"# ) .expect("should have optional dependency") .any(|el| { el.text_contents().contains("optional") }) @@ -3184,9 +2983,12 @@ mod test { let web = env.web_app().await; + let response = web.get("/dummy/0.1.0/asset.js").await?; assert_eq!( - web.get("/dummy/0.1.0/asset.js").await?.status(), - StatusCode::NOT_FOUND + response.status(), + StatusCode::NOT_FOUND, + "{:?}", + response.headers().get("Location"), ); assert!(web.get("/asset.js").await?.status().is_success()); @@ -3206,6 +3008,7 @@ mod test { .await .name("clap") .version("2.24.0") + .add_platform("i686-pc-windows-gnu") .archive_storage(true) .create() .await?; @@ -3236,7 +3039,7 @@ mod test { web.assert_redirect_cached_unchecked( "/clap/latest/clapproc%20macro%20%60Parser%60%20not%20expanded:%20Cannot%20create%20expander%20for", - "/clap/latest/clapproc%20macro%20%60Parser%60%20not%20expanded:%20Cannot%20create%20expander%20for/clap/", + "/clap/latest/clapproc%20macro%20%60Parser%60%20not%20expanded:%20Cannot%20create%20expander%20for", CachePolicy::ForeverInCdn, env.config(), ).await?; diff --git a/src/web/source.rs b/src/web/source.rs index 58fcc074d..b07c0e8d4 100644 --- a/src/web/source.rs +++ b/src/web/source.rs @@ -1,16 +1,19 @@ -use super::{error::AxumResult, match_version}; use crate::{ AsyncStorage, - db::{BuildId, Pool}, + db::BuildId, impl_axum_webpage, storage::PathNotFoundError, web::{ MetaData, ReqVersion, cache::CachePolicy, - error::{AxumNope, EscapedURI}, - extractors::Path, + error::{AxumNope, AxumResult}, + extractors::{ + DbConnection, + rustdoc::{PageKind, RustdocParams}, + }, file::File as DbFile, headers::CanonicalUrl, + match_version, page::templates::{RenderBrands, RenderRegular, RenderSolid, filters}, }, }; @@ -20,7 +23,6 @@ use axum::{Extension, response::IntoResponse}; use axum_extra::headers::HeaderMapExt; use mime::Mime; use semver::Version; -use serde::Deserialize; use std::{cmp::Ordering, sync::Arc}; use tracing::instrument; @@ -75,7 +77,6 @@ impl FileList { conn: &mut sqlx::PgConnection, name: &str, version: &Version, - req_version: Option, folder: &str, ) -> Result> { let row = match sqlx::query!( @@ -163,6 +164,7 @@ struct SourcePage { canonical_url: CanonicalUrl, is_file_too_large: bool, is_latest_url: bool, + params: RustdocParams, } impl_axum_webpage! { @@ -183,46 +185,33 @@ impl SourcePage { } } -#[derive(Deserialize, Clone, Debug)] -pub(crate) struct SourceBrowserHandlerParams { - name: String, - version: ReqVersion, - #[serde(default)] - path: String, -} - -#[instrument(skip(pool, storage))] +#[instrument(skip(conn, storage))] pub(crate) async fn source_browser_handler( - Path(params): Path, + params: RustdocParams, Extension(storage): Extension>, - Extension(pool): Extension, + mut conn: DbConnection, ) -> AxumResult { - let mut conn = pool.get_async().await?; - - let version = match_version(&mut conn, ¶ms.name, ¶ms.version) + let params = params.with_page_kind(PageKind::Source); + let matched_release = match_version(&mut conn, params.name(), params.req_version()) .await? .into_exactly_named_or_else(|corrected_name, req_version| { AxumNope::Redirect( - EscapedURI::new( - &format!( - "/crate/{corrected_name}/{req_version}/source/{}", - params.path - ), - None, - ), + params + .clone() + .with_name(corrected_name) + .with_req_version(req_version) + .source_url(), CachePolicy::NoCaching, ) })? .into_canonical_req_version_or_else(|version| { AxumNope::Redirect( - EscapedURI::new( - &format!("/crate/{}/{version}/source/{}", params.name, params.path), - None, - ), + params.clone().with_req_version(version).source_url(), CachePolicy::ForeverInCdn, ) - })? - .into_version(); + })?; + let params = params.apply_matched_release(&matched_release); + let version = matched_release.into_version(); let row = sqlx::query!( r#"SELECT @@ -241,21 +230,23 @@ pub(crate) async fn source_browser_handler( WHERE name = $1 AND version = $2"#, - params.name, + params.name(), version.to_string() ) .fetch_one(&mut *conn) .await?; + let inner_path = params.inner_path(); + // try to get actual file first // skip if request is a directory - let (blob, is_file_too_large) = if !params.path.ends_with('/') { + let (blob, is_file_too_large) = if !params.path_is_folder() { match storage .fetch_source_file( - ¶ms.name, + params.name(), &version.to_string(), row.latest_build_id, - ¶ms.path, + inner_path, row.archive_storage, ) .await @@ -280,10 +271,12 @@ pub(crate) async fn source_browser_handler( (None, false) }; - let canonical_url = CanonicalUrl::from_path(format!( - "/crate/{}/latest/source/{}", - params.name, params.path - )); + let canonical_url = CanonicalUrl::from_uri( + params + .clone() + .with_req_version(ReqVersion::Latest) + .source_url(), + ); let (file, file_content) = if let Some(blob) = blob { let is_text = blob.mime.type_() == mime::TEXT || blob.mime == mime::APPLICATION_JSON; @@ -312,37 +305,35 @@ pub(crate) async fn source_browser_handler( (None, None) }; - let current_folder = if let Some(last_slash_pos) = params.path.rfind('/') { - ¶ms.path[..last_slash_pos + 1] + let current_folder = if let Some(last_slash_pos) = inner_path.rfind('/') { + &inner_path[..last_slash_pos + 1] } else { "" }; + let show_parent_link = !current_folder.is_empty(); + + let file_list = FileList::from_path(&mut conn, params.name(), &version, current_folder) + .await? + .unwrap_or_default(); - let file_list = FileList::from_path( + let metadata = MetaData::from_crate( &mut conn, - ¶ms.name, + params.name(), &version, - Some(params.version.clone()), - current_folder, + Some(params.req_version().clone()), ) - .await? - .unwrap_or_default(); + .await?; Ok(SourcePage { file_list, - metadata: MetaData::from_crate( - &mut conn, - ¶ms.name, - &version, - Some(params.version.clone()), - ) - .await?, - show_parent_link: !current_folder.is_empty(), + metadata, + show_parent_link, file, file_content, canonical_url, is_file_too_large, - is_latest_url: params.version.is_latest(), + is_latest_url: params.req_version().is_latest(), + params, } .into_response()) } diff --git a/src/web/status.rs b/src/web/status.rs index dfc048ea6..ff9315864 100644 --- a/src/web/status.rs +++ b/src/web/status.rs @@ -1,8 +1,7 @@ -use super::{cache::CachePolicy, error::AxumNope}; use crate::web::{ - ReqVersion, - error::{AxumResult, EscapedURI}, - extractors::{DbConnection, Path}, + cache::CachePolicy, + error::{AxumNope, AxumResult}, + extractors::{DbConnection, rustdoc::RustdocParams}, match_version, }; use axum::{ @@ -10,7 +9,7 @@ use axum::{ }; pub(crate) async fn status_handler( - Path((name, req_version)): Path<(String, ReqVersion)>, + params: RustdocParams, mut conn: DbConnection, ) -> impl IntoResponse { ( @@ -19,7 +18,7 @@ pub(crate) async fn status_handler( // We use an async block to emulate a try block so that we can apply the above CORS header // and cache policy to both successful and failed responses async move { - let matched_release = match_version(&mut conn, &name, &req_version) + let matched_release = match_version(&mut conn, params.name(), params.req_version()) .await? .assume_exact_name()?; @@ -28,7 +27,7 @@ pub(crate) async fn status_handler( let version = matched_release .into_canonical_req_version_or_else(|version| { AxumNope::Redirect( - EscapedURI::new(&format!("/crate/{name}/{version}/status.json"), None), + params.clone().with_req_version(version).build_status_url(), CachePolicy::NoCaching, ) })? @@ -154,7 +153,6 @@ mod tests { .await?; response.assert_cache_control(CachePolicy::NoStoreMustRevalidate, env.config()); assert_eq!(response.headers()["access-control-allow-origin"], "*"); - dbg!(&response); assert_eq!(response.status(), StatusCode::OK); let value: serde_json::Value = serde_json::from_str(&response.text().await?)?; diff --git a/templates/core/home.html b/templates/core/home.html index 1e2d60cc3..3af8f54a9 100644 --- a/templates/core/home.html +++ b/templates/core/home.html @@ -42,11 +42,12 @@

{{ crate::icons::IconCubes.render_solid(false, false, "") }} D