From 6df5e9da461efd16054e73a836450bf9c2a86627 Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Mon, 20 Oct 2025 21:35:43 +0300 Subject: [PATCH 1/3] [PERF] see if my fork of `stringdex` affects perf --- Cargo.lock | 3 +-- src/librustdoc/Cargo.toml | 2 +- src/librustdoc/html/render/search_index.rs | 4 ++-- src/tools/tidy/src/extdeps.rs | 2 ++ 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2b96e6d10270..2457148839903 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5274,8 +5274,7 @@ dependencies = [ [[package]] name = "stringdex" version = "0.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b3bd4f10d15ef859c40291769f0d85209de6b0f1c30713ff9cdf45ac43ea36" +source = "git+https://gitlab.com/yotamofek/stringdex?rev=5dc67b8e6ce4f2d1f22c176ba881521167b950b1#5dc67b8e6ce4f2d1f22c176ba881521167b950b1" dependencies = [ "stacker", ] diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index 63412e2b9373d..9f726bab06a1c 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" smallvec = "1.8.1" -stringdex = "=0.0.2" +stringdex = { git = "https://gitlab.com/yotamofek/stringdex", rev = "5dc67b8e6ce4f2d1f22c176ba881521167b950b1" } tempfile = "3" threadpool = "1.8.1" tracing = "0.1" diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index 9c072eed51aec..d0e1835a5e29a 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -1055,12 +1055,12 @@ impl Serialize for TypeData { let mut buf = Vec::new(); encode::write_postings_to_string(&self.inverted_function_inputs_index, &mut buf); let mut serialized_result = Vec::new(); - stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result); + stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result).unwrap(); seq.serialize_element(&str::from_utf8(&serialized_result).unwrap())?; buf.clear(); serialized_result.clear(); encode::write_postings_to_string(&self.inverted_function_output_index, &mut buf); - stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result); + stringdex_internals::encode::write_base64_to_bytes(&buf, &mut serialized_result).unwrap(); seq.serialize_element(&str::from_utf8(&serialized_result).unwrap())?; if self.search_unbox { seq.serialize_element(&1)?; diff --git a/src/tools/tidy/src/extdeps.rs b/src/tools/tidy/src/extdeps.rs index 19c773d12f7fa..053a5222e2546 100644 --- a/src/tools/tidy/src/extdeps.rs +++ b/src/tools/tidy/src/extdeps.rs @@ -11,6 +11,8 @@ const ALLOWED_SOURCES: &[&str] = &[ r#""registry+https://github.com/rust-lang/crates.io-index""#, // This is `rust_team_data` used by `site` in src/tools/rustc-perf, r#""git+https://github.com/rust-lang/team#a5260e76d3aa894c64c56e6ddc8545b9a98043ec""#, + // TMP: + r#""git+https://gitlab.com/yotamofek/stringdex?rev=5dc67b8e6ce4f2d1f22c176ba881521167b950b1#5dc67b8e6ce4f2d1f22c176ba881521167b950b1""#, ]; /// Checks for external package sources. `root` is the path to the directory that contains the From 4b67e540c3a68929d878e3544ac30030690559c0 Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Thu, 6 Nov 2025 12:53:46 +0200 Subject: [PATCH 2/3] Adapt for https://gitlab.com/notriddle/stringdex/-/merge_requests/11/diffs?commit_id=5f470cc5639fab652379d9f336e79dc9c495444f Also take in https://gitlab.com/notriddle/stringdex/-/merge_requests/12 --- Cargo.lock | 2 +- src/librustdoc/Cargo.toml | 2 +- src/librustdoc/html/render/search_index.rs | 98 ++++++++++++++-------- src/tools/tidy/src/extdeps.rs | 2 +- 4 files changed, 65 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2457148839903..5f4e354d8a783 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5274,7 +5274,7 @@ dependencies = [ [[package]] name = "stringdex" version = "0.0.2" -source = "git+https://gitlab.com/yotamofek/stringdex?rev=5dc67b8e6ce4f2d1f22c176ba881521167b950b1#5dc67b8e6ce4f2d1f22c176ba881521167b950b1" +source = "git+https://gitlab.com/yotamofek/stringdex?rev=61a1cda4f942e6a6773e4b969aff7b26903a31f8#61a1cda4f942e6a6773e4b969aff7b26903a31f8" dependencies = [ "stacker", ] diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index 9f726bab06a1c..0697c4789220a 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" smallvec = "1.8.1" -stringdex = { git = "https://gitlab.com/yotamofek/stringdex", rev = "5dc67b8e6ce4f2d1f22c176ba881521167b950b1" } +stringdex = { git = "https://gitlab.com/yotamofek/stringdex", rev = "61a1cda4f942e6a6773e4b969aff7b26903a31f8" } tempfile = "3" threadpool = "1.8.1" tracing = "0.1" diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index d0e1835a5e29a..fb8d9263b06c9 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -3,7 +3,9 @@ mod serde; use std::collections::BTreeSet; use std::collections::hash_map::Entry; +use std::io; use std::path::Path; +use std::string::FromUtf8Error; use ::serde::de::{self, Deserializer, Error as _}; use ::serde::ser::{SerializeSeq, Serializer}; @@ -95,21 +97,28 @@ impl SerializedSearchIndex { ) -> Result<(), Error> { let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js")); let column_path = doc_root.join(format!("search.index/{column_name}/")); + + struct Consumer<'col>(&'col mut Vec); + + impl<'col> stringdex_internals::Consumer for Consumer<'col> { + type Err = FromUtf8Error; + + fn consume(&mut self, _id: u32, cell: &[u8]) -> Result<(), Self::Err> { + self.0.push(String::from_utf8(cell.to_vec())?); + Ok(()) + } + } + stringdex_internals::read_data_from_disk_column( root_path, column_name.as_bytes(), column_path.clone(), - &mut |_id, item| { - column.push(String::from_utf8(item.to_vec())?); - Ok(()) - }, - ) - .map_err( - |error: stringdex_internals::ReadDataError>| Error { - file: column_path, - error: format!("failed to read column from disk: {error}"), - }, + &mut Consumer(column), ) + .map_err(|error| Error { + file: column_path, + error: format!("failed to read column from disk: {error}"), + }) } fn perform_read_serde( resource_suffix: &str, @@ -119,25 +128,35 @@ impl SerializedSearchIndex { ) -> Result<(), Error> { let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js")); let column_path = doc_root.join(format!("search.index/{column_name}/")); + + struct Consumer<'col, T>(&'col mut Vec>); + + impl<'col, T> stringdex_internals::Consumer for Consumer<'col, T> + where + T: for<'de> Deserialize<'de> + 'static, + { + type Err = serde_json::Error; + + fn consume(&mut self, _id: u32, cell: &[u8]) -> Result<(), Self::Err> { + if cell.is_empty() { + self.0.push(None); + } else { + self.0.push(Some(serde_json::from_slice(cell)?)); + } + Ok(()) + } + } + stringdex_internals::read_data_from_disk_column( root_path, column_name.as_bytes(), column_path.clone(), - &mut |_id, item| { - if item.is_empty() { - column.push(None); - } else { - column.push(Some(serde_json::from_slice(item)?)); - } - Ok(()) - }, - ) - .map_err( - |error: stringdex_internals::ReadDataError>| Error { - file: column_path, - error: format!("failed to read column from disk: {error}"), - }, + &mut Consumer(column), ) + .map_err(|error| Error { + file: column_path, + error: format!("failed to read column from disk: {error}"), + }) } fn perform_read_postings( resource_suffix: &str, @@ -147,23 +166,30 @@ impl SerializedSearchIndex { ) -> Result<(), Error> { let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js")); let column_path = doc_root.join(format!("search.index/{column_name}/")); + + struct Consumer<'col>(&'col mut Vec>>); + + impl<'col> stringdex_internals::Consumer for Consumer<'col> { + type Err = io::Error; + + fn consume(&mut self, _id: u32, cell: &[u8]) -> Result<(), Self::Err> { + let mut postings = Vec::new(); + encode::read_postings_from_string(&mut postings, cell); + self.0.push(postings); + Ok(()) + } + } + stringdex_internals::read_data_from_disk_column( root_path, column_name.as_bytes(), column_path.clone(), - &mut |_id, buf| { - let mut postings = Vec::new(); - encode::read_postings_from_string(&mut postings, buf); - column.push(postings); - Ok(()) - }, - ) - .map_err( - |error: stringdex_internals::ReadDataError>| Error { - file: column_path, - error: format!("failed to read column from disk: {error}"), - }, + &mut Consumer(column), ) + .map_err(|error| Error { + file: column_path, + error: format!("failed to read column from disk: {error}"), + }) } assert_eq!(names.len(), path_data.len()); diff --git a/src/tools/tidy/src/extdeps.rs b/src/tools/tidy/src/extdeps.rs index 053a5222e2546..35daacc3a7d1e 100644 --- a/src/tools/tidy/src/extdeps.rs +++ b/src/tools/tidy/src/extdeps.rs @@ -12,7 +12,7 @@ const ALLOWED_SOURCES: &[&str] = &[ // This is `rust_team_data` used by `site` in src/tools/rustc-perf, r#""git+https://github.com/rust-lang/team#a5260e76d3aa894c64c56e6ddc8545b9a98043ec""#, // TMP: - r#""git+https://gitlab.com/yotamofek/stringdex?rev=5dc67b8e6ce4f2d1f22c176ba881521167b950b1#5dc67b8e6ce4f2d1f22c176ba881521167b950b1""#, + r#""git+https://gitlab.com/yotamofek/stringdex?rev=61a1cda4f942e6a6773e4b969aff7b26903a31f8#61a1cda4f942e6a6773e4b969aff7b26903a31f8""#, ]; /// Checks for external package sources. `root` is the path to the directory that contains the From da1490d123a5ec22eff3b5645ee8c4b89e3e656d Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Mon, 10 Nov 2025 12:04:36 +0200 Subject: [PATCH 3/3] Adapt for https://gitlab.com/notriddle/stringdex/-/merge_requests/11#note_2877222677 --- Cargo.lock | 2 +- src/librustdoc/Cargo.toml | 2 +- src/librustdoc/html/render/search_index.rs | 55 ++++++++-------------- src/tools/tidy/src/extdeps.rs | 2 +- 4 files changed, 22 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f4e354d8a783..87e37f50e4b91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5274,7 +5274,7 @@ dependencies = [ [[package]] name = "stringdex" version = "0.0.2" -source = "git+https://gitlab.com/yotamofek/stringdex?rev=61a1cda4f942e6a6773e4b969aff7b26903a31f8#61a1cda4f942e6a6773e4b969aff7b26903a31f8" +source = "git+https://gitlab.com/yotamofek/stringdex?rev=a1812aefdb3dce2bb85d9ec52a6e53d8eefa3c4b#a1812aefdb3dce2bb85d9ec52a6e53d8eefa3c4b" dependencies = [ "stacker", ] diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index 0697c4789220a..a6bf39c524968 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" smallvec = "1.8.1" -stringdex = { git = "https://gitlab.com/yotamofek/stringdex", rev = "61a1cda4f942e6a6773e4b969aff7b26903a31f8" } +stringdex = { git = "https://gitlab.com/yotamofek/stringdex", rev = "a1812aefdb3dce2bb85d9ec52a6e53d8eefa3c4b" } tempfile = "3" threadpool = "1.8.1" tracing = "0.1" diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index fb8d9263b06c9..3514c517d9134 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -98,22 +98,16 @@ impl SerializedSearchIndex { let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js")); let column_path = doc_root.join(format!("search.index/{column_name}/")); - struct Consumer<'col>(&'col mut Vec); - - impl<'col> stringdex_internals::Consumer for Consumer<'col> { - type Err = FromUtf8Error; - - fn consume(&mut self, _id: u32, cell: &[u8]) -> Result<(), Self::Err> { - self.0.push(String::from_utf8(cell.to_vec())?); - Ok(()) - } - } + let mut consume = |_, cell: &[u8]| { + column.push(String::from_utf8(cell.to_vec())?); + Ok::<_, FromUtf8Error>(()) + }; stringdex_internals::read_data_from_disk_column( root_path, column_name.as_bytes(), column_path.clone(), - &mut Consumer(column), + &mut consume, ) .map_err(|error| Error { file: column_path, @@ -129,29 +123,20 @@ impl SerializedSearchIndex { let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js")); let column_path = doc_root.join(format!("search.index/{column_name}/")); - struct Consumer<'col, T>(&'col mut Vec>); - - impl<'col, T> stringdex_internals::Consumer for Consumer<'col, T> - where - T: for<'de> Deserialize<'de> + 'static, - { - type Err = serde_json::Error; - - fn consume(&mut self, _id: u32, cell: &[u8]) -> Result<(), Self::Err> { - if cell.is_empty() { - self.0.push(None); - } else { - self.0.push(Some(serde_json::from_slice(cell)?)); - } - Ok(()) + let mut consume = |_, cell: &[u8]| { + if cell.is_empty() { + column.push(None); + } else { + column.push(Some(serde_json::from_slice(cell)?)); } - } + Ok::<_, serde_json::Error>(()) + }; stringdex_internals::read_data_from_disk_column( root_path, column_name.as_bytes(), column_path.clone(), - &mut Consumer(column), + &mut consume, ) .map_err(|error| Error { file: column_path, @@ -167,15 +152,13 @@ impl SerializedSearchIndex { let root_path = doc_root.join(format!("search.index/root{resource_suffix}.js")); let column_path = doc_root.join(format!("search.index/{column_name}/")); - struct Consumer<'col>(&'col mut Vec>>); - - impl<'col> stringdex_internals::Consumer for Consumer<'col> { - type Err = io::Error; - - fn consume(&mut self, _id: u32, cell: &[u8]) -> Result<(), Self::Err> { + fn consumer( + column: &mut Vec>>, + ) -> impl FnMut(u32, &[u8]) -> io::Result<()> { + |_, cell| { let mut postings = Vec::new(); encode::read_postings_from_string(&mut postings, cell); - self.0.push(postings); + column.push(postings); Ok(()) } } @@ -184,7 +167,7 @@ impl SerializedSearchIndex { root_path, column_name.as_bytes(), column_path.clone(), - &mut Consumer(column), + &mut consumer(column), ) .map_err(|error| Error { file: column_path, diff --git a/src/tools/tidy/src/extdeps.rs b/src/tools/tidy/src/extdeps.rs index 35daacc3a7d1e..a932fe7122d9a 100644 --- a/src/tools/tidy/src/extdeps.rs +++ b/src/tools/tidy/src/extdeps.rs @@ -12,7 +12,7 @@ const ALLOWED_SOURCES: &[&str] = &[ // This is `rust_team_data` used by `site` in src/tools/rustc-perf, r#""git+https://github.com/rust-lang/team#a5260e76d3aa894c64c56e6ddc8545b9a98043ec""#, // TMP: - r#""git+https://gitlab.com/yotamofek/stringdex?rev=61a1cda4f942e6a6773e4b969aff7b26903a31f8#61a1cda4f942e6a6773e4b969aff7b26903a31f8""#, + r#""git+https://gitlab.com/yotamofek/stringdex?rev=a1812aefdb3dce2bb85d9ec52a6e53d8eefa3c4b#a1812aefdb3dce2bb85d9ec52a6e53d8eefa3c4b""#, ]; /// Checks for external package sources. `root` is the path to the directory that contains the