From a5346c609ec52353b0d931ca7ca0f84016f4d7b6 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Tue, 16 Sep 2025 04:13:27 +0200 Subject: [PATCH 1/6] Improve source code for `highlight.rs` --- src/librustdoc/html/highlight.rs | 704 ++++++++++++++++--------------- src/librustdoc/lib.rs | 1 + 2 files changed, 358 insertions(+), 347 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index f2055608aa9d2..a9cf100344322 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -5,7 +5,6 @@ //! //! Use the `render_with_highlighting` to highlight some rust code. -use std::borrow::Cow; use std::collections::VecDeque; use std::fmt::{self, Display, Write}; use std::{cmp, iter}; @@ -134,151 +133,341 @@ fn can_merge(class1: Option, class2: Option, text: &str) -> bool { } } +#[derive(Debug)] +struct Content { + text: String, + /// If `Some` and the `span` is different from the parent, then it might generate a link so we + /// need to keep this information. + class: Option, + needs_escape: bool, +} + +#[derive(Debug)] +struct Element { + /// If `class` is `None`, then it's just plain text with no HTML tag. + class: Option, + /// Content for the current element. + content: Vec, +} + +impl Element { + fn new(class: Option, text: String, needs_escape: bool) -> Self { + Self { class, content: vec![Content { text, class, needs_escape }] } + } + + fn can_merge(&self, other: &Self) -> bool { + other.content.iter().all(|c| can_merge(self.class, other.class, &c.text)) + } + + fn write_elem_to(&self, out: &mut W, href_context: &Option>, parent_class: Option) { + let mut prev = parent_class; + let mut closing_tag = None; + for part in &self.content { + let text: &dyn Display = if part.needs_escape { &EscapeBodyText(&part.text) } else { &part.text }; + if part.class.is_some() { + // We only try to generate links as the `` should have already be generated + // by the caller of `write_elem_to`. + if let Some(new_closing_tag) = string_without_closing_tag( + out, + text, + part.class, + href_context, + prev != part.class, + ) { + if new_closing_tag == "" { + out.write_str(new_closing_tag).unwrap(); + closing_tag = None; + } else { + closing_tag = Some(new_closing_tag); + } + } + prev = part.class; + } else { + write!(out, "{text}").unwrap(); + } + } + if let Some(closing_tag) = closing_tag { + out.write_str(closing_tag).unwrap(); + } + } +} + +#[derive(Debug)] +enum ElementOrStack { + Element(Element), + Stack(ElementStack), +} + +#[derive(Debug)] +struct ElementStack { + elements: Vec, + parent: Option>, + class: Option, +} + +impl ElementStack { + fn new() -> Self { + Self::new_with_class(None) + } + + fn new_with_class(class: Option) -> Self { + Self { elements: Vec::new(), parent: None, class } + } + + fn push_element(&mut self, mut elem: Element) { + if let Some(ElementOrStack::Element(last)) = self.elements.last_mut() + && last.can_merge(&elem) + { + for part in elem.content.drain(..) { + last.content.push(part); + } + } else { + self.elements.push(ElementOrStack::Element(elem)); + } + } + + fn empty_stack_and_set_new_heads(&mut self, class: Class, element: Element) { + self.elements.clear(); + if let Some(parent) = &mut self.parent { + parent.empty_stack_and_set_new_heads(class, element); + } else { + let mut new_stack = ElementStack::new_with_class(Some(class)); + new_stack.elements.push(ElementOrStack::Element(element)); + self.parent.replace(Box::new(new_stack)); + } + } + + fn enter_stack(&mut self, ElementStack { elements, parent, class }: ElementStack) { + assert!(parent.is_none(), "`enter_stack` used with a non empty parent"); + let parent_elements = std::mem::take(&mut self.elements); + let parent_parent = std::mem::take(&mut self.parent); + self.parent = Some(Box::new(ElementStack { + elements: parent_elements, + parent: parent_parent, + class: self.class, + })); + self.class = class; + self.elements = elements; + } + + fn enter_elem(&mut self, class: Class) { + let elements = std::mem::take(&mut self.elements); + let parent = std::mem::take(&mut self.parent); + self.parent = Some(Box::new(ElementStack { elements, parent, class: self.class })); + self.class = Some(class); + } + + fn exit_elem(&mut self) { + let Some(element) = std::mem::take(&mut self.parent) else { + panic!("exiting an element where there is no parent"); + }; + let ElementStack { elements, parent, class } = Box::into_inner(element); + + let old_elements = std::mem::take(&mut self.elements); + self.elements = elements; + self.elements.push(ElementOrStack::Stack(ElementStack { + elements: old_elements, + class: self.class, + parent: None, + })); + self.parent = parent; + self.class = class; + } + + fn write_content(&self, out: &mut W, href_context: &Option>) { + let mut elem = self; + + // We get the top most item. + while let Some(parent) = &elem.parent { + elem = parent; + } + // Now we can output the whole content. + elem.write_to(out, href_context, None); + } + + fn write_to( + &self, + out: &mut W, + href_context: &Option>, + parent_class: Option, + ) { + // If it only contains stack, it means it has no content of its own so no need to generate + // a tag. + let closing_tag = if let Some(Class::Expansion) = self.class { + out.write_str("").unwrap(); + "" + } else if let Some(class) = self.class + // `PreludeTy` can never include more than an ident so it should not generate + // a wrapping `span`. + && !matches!(class, Class::PreludeTy(_)) + { + // Macro is the only `ElementStack` that can generate a link to definition to its + // whole content, so to prevent having ``, + // we generate the `` directly here. + // + // For other elements, the links will be generated in `write_elem_to`. + let href_context = if matches!(class, Class::Macro(_)) { + href_context + } else { + &None + }; + string_without_closing_tag(out, "", Some(class), href_context, self.class != parent_class) + .expect( + "internal error: enter_span was called with Some(class) but did not \ + return a closing HTML tag", + ) + } else { + "" + }; + + for child_elem in self.elements.iter() { + let child_elem = match child_elem { + ElementOrStack::Element(elem) => elem, + ElementOrStack::Stack(stack) => { + stack.write_to(out, href_context, parent_class); + continue; + } + }; + if child_elem.content.is_empty() { + continue; + } + child_elem.write_elem_to(out, href_context, parent_class); + } + + out.write_str(closing_tag).unwrap(); + } +} + /// This type is used as a conveniency to prevent having to pass all its fields as arguments into /// the various functions (which became its methods). struct TokenHandler<'a, 'tcx, F: Write> { out: &'a mut F, - /// It contains the closing tag and the associated `Class`. - closing_tags: Vec<(&'static str, Class)>, - /// This is used because we don't automatically generate the closing tag on `ExitSpan` in - /// case an `EnterSpan` event with the same class follows. - pending_exit_span: Option, - /// `current_class` and `pending_elems` are used to group HTML elements with same `class` - /// attributes to reduce the DOM size. - current_class: Option, + element_stack: ElementStack, /// We need to keep the `Class` for each element because it could contain a `Span` which is /// used to generate links. - pending_elems: Vec<(Cow<'a, str>, Option)>, href_context: Option>, - write_line_number: fn(&mut F, u32, &'static str), + write_line_number: fn(u32) -> String, + line: u32, + max_lines: u32, } impl std::fmt::Debug for TokenHandler<'_, '_, F> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TokenHandler") - .field("closing_tags", &self.closing_tags) - .field("pending_exit_span", &self.pending_exit_span) - .field("current_class", &self.current_class) - .field("pending_elems", &self.pending_elems) - .finish() + f.debug_struct("TokenHandler").field("element_stack", &self.element_stack).finish() } } impl TokenHandler<'_, '_, F> { - fn handle_exit_span(&mut self) { - // We can't get the last `closing_tags` element using `pop()` because `closing_tags` is - // being used in `write_pending_elems`. - let class = self.closing_tags.last().expect("ExitSpan without EnterSpan").1; - // We flush everything just in case... - self.write_pending_elems(Some(class)); - - exit_span(self.out, self.closing_tags.pop().expect("ExitSpan without EnterSpan").0); - self.pending_exit_span = None; + fn handle_backline(&mut self) -> Option { + self.line += 1; + if self.line < self.max_lines { + return Some((self.write_line_number)(self.line)); + } + None } - /// Write all the pending elements sharing a same (or at mergeable) `Class`. - /// - /// If there is a "parent" (if a `EnterSpan` event was encountered) and the parent can be merged - /// with the elements' class, then we simply write the elements since the `ExitSpan` event will - /// close the tag. - /// - /// Otherwise, if there is only one pending element, we let the `string` function handle both - /// opening and closing the tag, otherwise we do it into this function. - /// - /// It returns `true` if `current_class` must be set to `None` afterwards. - fn write_pending_elems(&mut self, current_class: Option) -> bool { - if self.pending_elems.is_empty() { - return false; - } - if let Some((_, parent_class)) = self.closing_tags.last() - && can_merge(current_class, Some(*parent_class), "") + fn push_element_without_backline_check( + &mut self, + class: Option, + text: String, + needs_escape: bool, + ) { + self.element_stack.push_element(Element::new(class, text, needs_escape)) + } + + fn push_element(&mut self, class: Option, mut text: String) { + let needs_escape = if text == "\n" + && let Some(backline) = self.handle_backline() { - for (text, class) in self.pending_elems.iter() { - string( - self.out, - EscapeBodyText(text), - *class, - &self.href_context, - false, - self.write_line_number, - ); - } + text.push_str(&backline); + false } else { - // We only want to "open" the tag ourselves if we have more than one pending and if the - // current parent tag is not the same as our pending content. - let close_tag = if self.pending_elems.len() > 1 - && let Some(current_class) = current_class - // `PreludeTy` can never include more than an ident so it should not generate - // a wrapping `span`. - && !matches!(current_class, Class::PreludeTy(_)) + true + }; + + self.push_element_without_backline_check(class, text, needs_escape); + } + + fn start_expansion(&mut self) { + // We display everything. + self.element_stack.write_content(self.out, &self.href_context); + + // We remove everything and recreate the stack with the expansion at its head. + self.element_stack.empty_stack_and_set_new_heads( + Class::Expansion, + Element { + class: None, + content: vec![Content { + text: format!( + "", + self.line, + ), + class: None, + needs_escape: false, + }], + }, + ); + } + + fn add_expanded_code(&mut self, expanded_code: &ExpandedCode) { + self.element_stack.push_element(Element::new( + None, + format!("{}", expanded_code.code), + false, + )); + self.element_stack.enter_stack(ElementStack::new_with_class(Some(Class::Original))); + } + + fn close_expansion(&mut self) { + let mut old_stack = Vec::new(); + + // We inline everything into the top-most element. + while self.element_stack.parent.is_some() { + self.element_stack.exit_elem(); + if let Some(ElementOrStack::Stack(stack)) = self.element_stack.elements.last() + && let Some(class) = stack.class + && class != Class::Original { - Some(enter_span(self.out, current_class, &self.href_context)) - } else { - None - }; - // To prevent opening a macro expansion span being closed right away because - // the currently open item is replaced by a new class. - let last_pending = - self.pending_elems.pop_if(|(_, class)| *class == Some(Class::Expansion)); - for (text, class) in self.pending_elems.iter() { - string( - self.out, - EscapeBodyText(text), - *class, - &self.href_context, - close_tag.is_none(), - self.write_line_number, - ); - } - if let Some(close_tag) = close_tag { - exit_span(self.out, close_tag); - } - if let Some((text, class)) = last_pending { - string( - self.out, - EscapeBodyText(&text), - class, - &self.href_context, - close_tag.is_none(), - self.write_line_number, - ); + old_stack.push(class); } } - self.pending_elems.clear(); - true - } - - #[inline] - fn write_line_number(&mut self, line: u32, extra: &'static str) { - (self.write_line_number)(self.out, line, extra); + // We display everything. + self.element_stack.write_content(self.out, &self.href_context); + + // We recreate the tree but without the expansion node. + self.element_stack.elements.clear(); + self.element_stack.class = None; + for class in old_stack.iter().rev() { + self.element_stack.enter_stack(ElementStack::new_with_class(Some(*class))); + } } } impl Drop for TokenHandler<'_, '_, F> { /// When leaving, we need to flush all pending data to not have missing content. fn drop(&mut self) { - if self.pending_exit_span.is_some() { - self.handle_exit_span(); - } else { - self.write_pending_elems(self.current_class); - } + self.element_stack.write_content(self.out, &self.href_context); } } -fn write_scraped_line_number(out: &mut impl Write, line: u32, extra: &'static str) { +fn scraped_line_number(line: u32) -> String { // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#data-nosnippet-attr // Do not show "1 2 3 4 5 ..." in web search results. - write!(out, "{extra}{line}",).unwrap(); + format!("{line}") } -fn write_line_number(out: &mut impl Write, line: u32, extra: &'static str) { +fn line_number(line: u32) -> String { // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#data-nosnippet-attr // Do not show "1 2 3 4 5 ..." in web search results. - write!(out, "{extra}{line}",).unwrap(); + format!("{line}") } -fn empty_line_number(out: &mut impl Write, _: u32, extra: &'static str) { - out.write_str(extra).unwrap(); +fn empty_line_number(_: u32) -> String { + String::new() } fn get_next_expansion( @@ -292,80 +481,24 @@ fn get_next_expansion( fn get_expansion<'a, W: Write>( token_handler: &mut TokenHandler<'_, '_, W>, expanded_codes: &'a [ExpandedCode], - line: u32, span: Span, ) -> Option<&'a ExpandedCode> { - if let Some(expanded_code) = get_next_expansion(expanded_codes, line, span) { - let (closing, reopening) = if let Some(current_class) = token_handler.current_class - && let class = current_class.as_html() - && !class.is_empty() - { - ("", format!("")) - } else { - ("", String::new()) - }; - let id = format!("expand-{line}"); - token_handler.pending_elems.push(( - Cow::Owned(format!( - "{closing}\ -\ - {reopening}", - )), - Some(Class::Expansion), - )); - Some(expanded_code) - } else { - None - } -} - -fn start_expansion(out: &mut Vec<(Cow<'_, str>, Option)>, expanded_code: &ExpandedCode) { - out.push(( - Cow::Owned(format!( - "{}", - expanded_code.code, - )), - Some(Class::Expansion), - )); + let expanded_code = get_next_expansion(expanded_codes, token_handler.line, span)?; + token_handler.start_expansion(); + Some(expanded_code) } fn end_expansion<'a, W: Write>( token_handler: &mut TokenHandler<'_, '_, W>, expanded_codes: &'a [ExpandedCode], - expansion_start_tags: &[(&'static str, Class)], - line: u32, span: Span, ) -> Option<&'a ExpandedCode> { - if let Some(expanded_code) = get_next_expansion(expanded_codes, line, span) { - // We close the current "original" content. - token_handler.pending_elems.push((Cow::Borrowed(""), Some(Class::Expansion))); - return Some(expanded_code); + token_handler.element_stack.exit_elem(); + let expansion = get_next_expansion(expanded_codes, token_handler.line, span); + if expansion.is_none() { + token_handler.close_expansion(); } - - let skip = iter::zip(token_handler.closing_tags.as_slice(), expansion_start_tags) - .position(|(tag, start_tag)| tag != start_tag) - .unwrap_or_else(|| cmp::min(token_handler.closing_tags.len(), expansion_start_tags.len())); - - let tags = iter::chain( - expansion_start_tags.iter().skip(skip), - token_handler.closing_tags.iter().skip(skip), - ); - - let mut elem = Cow::Borrowed(""); - - for (tag, _) in tags.clone() { - elem.to_mut().push_str(tag); - } - for (_, class) in tags { - write!(elem.to_mut(), "", class.as_html()).unwrap(); - } - - token_handler.pending_elems.push((elem, Some(Class::Expansion))); - None + expansion } #[derive(Clone, Copy)] @@ -417,29 +550,29 @@ pub(super) fn write_code( if src.contains('\r') { src.replace("\r\n", "\n").into() } else { Cow::Borrowed(src) }; let mut token_handler = TokenHandler { out, - closing_tags: Vec::new(), - pending_exit_span: None, - current_class: None, - pending_elems: Vec::with_capacity(20), href_context, write_line_number: match line_info { Some(line_info) => { if line_info.is_scraped_example { - write_scraped_line_number + scraped_line_number } else { - write_line_number + line_number } } None => empty_line_number, }, + line: 0, + max_lines: u32::MAX, + element_stack: ElementStack::new(), }; - let (mut line, max_lines) = if let Some(line_info) = line_info { - token_handler.write_line_number(line_info.start_line, ""); - (line_info.start_line, line_info.max_lines) - } else { - (0, u32::MAX) - }; + if let Some(line_info) = line_info { + token_handler.line = line_info.start_line - 1; + token_handler.max_lines = line_info.max_lines; + if let Some(text) = token_handler.handle_backline() { + token_handler.push_element_without_backline_check(None, text, false); + } + } let (expanded_codes, file_span) = match token_handler.href_context.as_ref().and_then(|c| { let expanded_codes = c.context.shared.expanded_codes.get(&c.file_span.lo())?; @@ -448,114 +581,44 @@ pub(super) fn write_code( Some((expanded_codes, file_span)) => (expanded_codes.as_slice(), file_span), None => (&[] as &[ExpandedCode], DUMMY_SP), }; - let mut current_expansion = get_expansion(&mut token_handler, expanded_codes, line, file_span); - token_handler.write_pending_elems(None); - let mut expansion_start_tags = Vec::new(); + let mut current_expansion = get_expansion(&mut token_handler, expanded_codes, file_span); Classifier::new( &src, token_handler.href_context.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP), decoration_info, ) - .highlight(&mut |span, highlight| { - match highlight { - Highlight::Token { text, class } => { - // If we received a `ExitSpan` event and then have a non-compatible `Class`, we - // need to close the ``. - let need_current_class_update = if let Some(pending) = - token_handler.pending_exit_span - && !can_merge(Some(pending), class, text) - { - token_handler.handle_exit_span(); - true - // If the two `Class` are different, time to flush the current content and start - // a new one. - } else if !can_merge(token_handler.current_class, class, text) { - token_handler.write_pending_elems(token_handler.current_class); - true - } else { - token_handler.current_class.is_none() - }; + .highlight(&mut |span, highlight| match highlight { + Highlight::Token { text, class } => { + token_handler.push_element(class, text.to_string()); - if need_current_class_update { - token_handler.current_class = class.map(Class::dummy); + if text == "\n" { + if current_expansion.is_none() { + current_expansion = get_expansion(&mut token_handler, expanded_codes, span); } - if text == "\n" { - line += 1; - if line < max_lines { - token_handler - .pending_elems - .push((Cow::Borrowed(text), Some(Class::Backline(line)))); - } - if current_expansion.is_none() { - current_expansion = - get_expansion(&mut token_handler, expanded_codes, line, span); - expansion_start_tags = token_handler.closing_tags.clone(); - } - if let Some(ref current_expansion) = current_expansion - && current_expansion.span.lo() == span.hi() - { - start_expansion(&mut token_handler.pending_elems, current_expansion); - } - } else { - token_handler.pending_elems.push((Cow::Borrowed(text), class)); - - let mut need_end = false; - if let Some(ref current_expansion) = current_expansion { - if current_expansion.span.lo() == span.hi() { - start_expansion(&mut token_handler.pending_elems, current_expansion); - } else if current_expansion.end_line == line - && span.hi() >= current_expansion.span.hi() - { - need_end = true; - } - } - if need_end { - current_expansion = end_expansion( - &mut token_handler, - expanded_codes, - &expansion_start_tags, - line, - span, - ); - } + if let Some(ref current_expansion) = current_expansion + && current_expansion.span.lo() == span.hi() + { + token_handler.add_expanded_code(current_expansion); } - } - Highlight::EnterSpan { class } => { - let mut should_add = true; - if let Some(pending_exit_span) = token_handler.pending_exit_span { - if class.is_equal_to(pending_exit_span) { - should_add = false; - } else { - token_handler.handle_exit_span(); - } - } else { - // We flush everything just in case... - if token_handler.write_pending_elems(token_handler.current_class) { - token_handler.current_class = None; + } else { + let mut need_end = false; + if let Some(ref current_expansion) = current_expansion { + if current_expansion.span.lo() == span.hi() { + token_handler.add_expanded_code(current_expansion); + } else if current_expansion.end_line == token_handler.line + && span.hi() >= current_expansion.span.hi() + { + need_end = true; } } - if should_add { - let closing_tag = - enter_span(token_handler.out, class, &token_handler.href_context); - token_handler.closing_tags.push((closing_tag, class)); + if need_end { + current_expansion = end_expansion(&mut token_handler, expanded_codes, span); } - - token_handler.current_class = None; - token_handler.pending_exit_span = None; - } - Highlight::ExitSpan => { - token_handler.current_class = None; - token_handler.pending_exit_span = Some( - token_handler - .closing_tags - .last() - .as_ref() - .expect("ExitSpan without EnterSpan") - .1, - ); } - }; + } + Highlight::EnterSpan { class } => token_handler.element_stack.enter_elem(class), + Highlight::ExitSpan => token_handler.element_stack.exit_elem(), }); } @@ -585,9 +648,10 @@ enum Class { PreludeVal(Span), QuestionMark, Decoration(&'static str), - Backline(u32), /// Macro expansion. Expansion, + /// "original" code without macro expansion. + Original, } impl Class { @@ -605,17 +669,6 @@ impl Class { } } - /// If `self` contains a `Span`, it'll be replaced with `DUMMY_SP` to prevent creating links - /// on "empty content" (because of the attributes merge). - fn dummy(self) -> Self { - match self { - Self::Self_(_) => Self::Self_(DUMMY_SP), - Self::Macro(_) => Self::Macro(DUMMY_SP), - Self::Ident(_) => Self::Ident(DUMMY_SP), - s => s, - } - } - /// Returns the css class expected by rustdoc for each `Class`. fn as_html(self) -> &'static str { match self { @@ -636,8 +689,8 @@ impl Class { Class::PreludeVal(_) => "prelude-val", Class::QuestionMark => "question-mark", Class::Decoration(kind) => kind, - Class::Backline(_) => "", - Class::Expansion => "", + Class::Expansion => "expansion", + Class::Original => "original", } } @@ -662,12 +715,23 @@ impl Class { | Self::Lifetime | Self::QuestionMark | Self::Decoration(_) - | Self::Backline(_) + // | Self::Backline(_) + | Self::Original | Self::Expansion => None, } } } +impl fmt::Display for Class { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let html = self.as_html(); + if html.is_empty() { + return Ok(()); + } + write!(f, " class=\"{html}\"") + } +} + #[derive(Debug)] enum Highlight<'a> { Token { text: &'a str, class: Option }, @@ -1190,60 +1254,6 @@ impl<'src> Classifier<'src> { } } -/// Called when we start processing a span of text that should be highlighted. -/// The `Class` argument specifies how it should be highlighted. -fn enter_span( - out: &mut impl Write, - klass: Class, - href_context: &Option>, -) -> &'static str { - string_without_closing_tag(out, "", Some(klass), href_context, true).expect( - "internal error: enter_span was called with Some(klass) but did not return a \ - closing HTML tag", - ) -} - -/// Called at the end of a span of highlighted text. -fn exit_span(out: &mut impl Write, closing_tag: &str) { - out.write_str(closing_tag).unwrap(); -} - -/// Called for a span of text. If the text should be highlighted differently -/// from the surrounding text, then the `Class` argument will be a value other -/// than `None`. -/// -/// The following sequences of callbacks are equivalent: -/// ```plain -/// enter_span(Foo), string("text", None), exit_span() -/// string("text", Foo) -/// ``` -/// -/// The latter can be thought of as a shorthand for the former, which is more -/// flexible. -/// -/// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function -/// will then try to find this `span` in the `span_correspondence_map`. If found, it'll then -/// generate a link for this element (which corresponds to where its definition is located). -fn string( - out: &mut W, - text: EscapeBodyText<'_>, - klass: Option, - href_context: &Option>, - open_tag: bool, - write_line_number_callback: fn(&mut W, u32, &'static str), -) { - if let Some(Class::Backline(line)) = klass { - write_line_number_callback(out, line, "\n"); - } else if let Some(Class::Expansion) = klass { - // This has already been escaped so we get the text to write it directly. - out.write_str(text.0).unwrap(); - } else if let Some(closing_tag) = - string_without_closing_tag(out, text, klass, href_context, open_tag) - { - out.write_str(closing_tag).unwrap(); - } -} - fn generate_link_to_def( out: &mut impl Write, text_s: &str, diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 5f72064f0a8ce..820b2392e07ce 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -6,6 +6,7 @@ #![feature(ascii_char)] #![feature(ascii_char_variants)] #![feature(assert_matches)] +#![feature(box_into_inner)] #![feature(box_patterns)] #![feature(debug_closure_helpers)] #![feature(file_buffered)] From c6e26f98810b306669d03f86156f88998d2b67c5 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 24 Sep 2025 18:31:38 +0200 Subject: [PATCH 2/6] Make compatible stack elements "glue" together to prevent creating more HTML tags than necessary --- src/librustdoc/html/highlight.rs | 93 ++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 23 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index a9cf100344322..2fd5ef5608e8b 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -159,11 +159,17 @@ impl Element { other.content.iter().all(|c| can_merge(self.class, other.class, &c.text)) } - fn write_elem_to(&self, out: &mut W, href_context: &Option>, parent_class: Option) { + fn write_elem_to( + &self, + out: &mut W, + href_context: &Option>, + parent_class: Option, + ) { let mut prev = parent_class; let mut closing_tag = None; for part in &self.content { - let text: &dyn Display = if part.needs_escape { &EscapeBodyText(&part.text) } else { &part.text }; + let text: &dyn Display = + if part.needs_escape { &EscapeBodyText(&part.text) } else { &part.text }; if part.class.is_some() { // We only try to generate links as the `` should have already be generated // by the caller of `write_elem_to`. @@ -198,11 +204,18 @@ enum ElementOrStack { Stack(ElementStack), } +/// This represents the stack of HTML elements. For example a macro expansion +/// will contain other elements which might themselves contain other elements +/// (like macros). +/// +/// This allows to easily handle HTML tags instead of having a more complicated +/// state machine to keep track of which tags are open. #[derive(Debug)] struct ElementStack { elements: Vec, parent: Option>, class: Option, + pending_exit: bool, } impl ElementStack { @@ -211,10 +224,15 @@ impl ElementStack { } fn new_with_class(class: Option) -> Self { - Self { elements: Vec::new(), parent: None, class } + Self { elements: Vec::new(), parent: None, class, pending_exit: false } } fn push_element(&mut self, mut elem: Element) { + if self.pending_exit + && !can_merge(self.class, elem.class, elem.content.first().map_or("", |c| &c.text)) + { + self.exit_current_stack(); + } if let Some(ElementOrStack::Element(last)) = self.elements.last_mut() && last.can_merge(&elem) { @@ -237,7 +255,21 @@ impl ElementStack { } } - fn enter_stack(&mut self, ElementStack { elements, parent, class }: ElementStack) { + fn enter_stack( + &mut self, + ElementStack { elements, parent, class, pending_exit }: ElementStack, + ) { + if self.pending_exit { + if can_merge(self.class, class, "") { + self.pending_exit = false; + for elem in elements { + self.elements.push(elem); + } + // Compatible stacks, nothing to be done here! + return; + } + self.exit_current_stack(); + } assert!(parent.is_none(), "`enter_stack` used with a non empty parent"); let parent_elements = std::mem::take(&mut self.elements); let parent_parent = std::mem::take(&mut self.parent); @@ -245,23 +277,27 @@ impl ElementStack { elements: parent_elements, parent: parent_parent, class: self.class, + pending_exit: self.pending_exit, })); self.class = class; self.elements = elements; + self.pending_exit = pending_exit; } - fn enter_elem(&mut self, class: Class) { - let elements = std::mem::take(&mut self.elements); - let parent = std::mem::take(&mut self.parent); - self.parent = Some(Box::new(ElementStack { elements, parent, class: self.class })); - self.class = Some(class); + /// This sets the `pending_exit` field to `true`. Meaning that if we try to push another stack + /// which is not compatible with this one, it will exit the current one before adding the new + /// one. + fn exit_elem(&mut self) { + self.pending_exit = true; } - fn exit_elem(&mut self) { + /// Unlike `exit_elem`, this method directly exits the current stack. It is called when the + /// current stack is not compatible with a new one pushed or if an expansion was ended. + fn exit_current_stack(&mut self) { let Some(element) = std::mem::take(&mut self.parent) else { panic!("exiting an element where there is no parent"); }; - let ElementStack { elements, parent, class } = Box::into_inner(element); + let ElementStack { elements, parent, class, pending_exit } = Box::into_inner(element); let old_elements = std::mem::take(&mut self.elements); self.elements = elements; @@ -269,9 +305,11 @@ impl ElementStack { elements: old_elements, class: self.class, parent: None, + pending_exit: false, })); self.parent = parent; self.class = class; + self.pending_exit = pending_exit; } fn write_content(&self, out: &mut W, href_context: &Option>) { @@ -306,16 +344,18 @@ impl ElementStack { // we generate the `` directly here. // // For other elements, the links will be generated in `write_elem_to`. - let href_context = if matches!(class, Class::Macro(_)) { - href_context - } else { - &None - }; - string_without_closing_tag(out, "", Some(class), href_context, self.class != parent_class) - .expect( - "internal error: enter_span was called with Some(class) but did not \ + let href_context = if matches!(class, Class::Macro(_)) { href_context } else { &None }; + string_without_closing_tag( + out, + "", + Some(class), + href_context, + self.class != parent_class, + ) + .expect( + "internal error: enter_span was called with Some(class) but did not \ return a closing HTML tag", - ) + ) } else { "" }; @@ -427,7 +467,7 @@ impl TokenHandler<'_, '_, F> { // We inline everything into the top-most element. while self.element_stack.parent.is_some() { - self.element_stack.exit_elem(); + self.element_stack.exit_current_stack(); if let Some(ElementOrStack::Stack(stack)) = self.element_stack.elements.last() && let Some(class) = stack.class && class != Class::Original @@ -450,6 +490,11 @@ impl TokenHandler<'_, '_, F> { impl Drop for TokenHandler<'_, '_, F> { /// When leaving, we need to flush all pending data to not have missing content. fn drop(&mut self) { + // We need to clean the hierarchy before displaying it, otherwise the parents won't see + // the last child. + while self.element_stack.parent.is_some() { + self.element_stack.exit_current_stack(); + } self.element_stack.write_content(self.out, &self.href_context); } } @@ -493,7 +538,7 @@ fn end_expansion<'a, W: Write>( expanded_codes: &'a [ExpandedCode], span: Span, ) -> Option<&'a ExpandedCode> { - token_handler.element_stack.exit_elem(); + token_handler.element_stack.exit_current_stack(); let expansion = get_next_expansion(expanded_codes, token_handler.line, span); if expansion.is_none() { token_handler.close_expansion(); @@ -617,7 +662,9 @@ pub(super) fn write_code( } } } - Highlight::EnterSpan { class } => token_handler.element_stack.enter_elem(class), + Highlight::EnterSpan { class } => { + token_handler.element_stack.enter_stack(ElementStack::new_with_class(Some(class))) + } Highlight::ExitSpan => token_handler.element_stack.exit_elem(), }); } From b28eabc5e61af59bf7004fd5bf64f064c89411df Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Tue, 7 Oct 2025 14:33:39 +0200 Subject: [PATCH 3/6] Improve performance --- src/librustdoc/html/highlight.rs | 76 ++++++++++++++++---------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 2fd5ef5608e8b..03cbe379a42a6 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -5,10 +5,12 @@ //! //! Use the `render_with_highlighting` to highlight some rust code. +use std::borrow::Cow; use std::collections::VecDeque; use std::fmt::{self, Display, Write}; -use std::{cmp, iter}; +use std::iter; +use itertools::Either; use rustc_data_structures::fx::FxIndexMap; use rustc_lexer::{Cursor, FrontmatterAllowed, LiteralKind, TokenKind}; use rustc_span::BytePos; @@ -134,8 +136,8 @@ fn can_merge(class1: Option, class2: Option, text: &str) -> bool { } #[derive(Debug)] -struct Content { - text: String, +struct Content<'a> { + text: Cow<'a, str>, /// If `Some` and the `span` is different from the parent, then it might generate a link so we /// need to keep this information. class: Option, @@ -143,15 +145,15 @@ struct Content { } #[derive(Debug)] -struct Element { +struct Element<'a> { /// If `class` is `None`, then it's just plain text with no HTML tag. class: Option, /// Content for the current element. - content: Vec, + content: Vec>, } -impl Element { - fn new(class: Option, text: String, needs_escape: bool) -> Self { +impl<'a> Element<'a> { + fn new(class: Option, text: Cow<'a, str>, needs_escape: bool) -> Self { Self { class, content: vec![Content { text, class, needs_escape }] } } @@ -168,8 +170,11 @@ impl Element { let mut prev = parent_class; let mut closing_tag = None; for part in &self.content { - let text: &dyn Display = - if part.needs_escape { &EscapeBodyText(&part.text) } else { &part.text }; + let text = if part.needs_escape { + Either::Left(&EscapeBodyText(&part.text)) + } else { + Either::Right(&part.text) + }; if part.class.is_some() { // We only try to generate links as the `` should have already be generated // by the caller of `write_elem_to`. @@ -199,9 +204,9 @@ impl Element { } #[derive(Debug)] -enum ElementOrStack { - Element(Element), - Stack(ElementStack), +enum ElementOrStack<'a> { + Element(Element<'a>), + Stack(ElementStack<'a>), } /// This represents the stack of HTML elements. For example a macro expansion @@ -211,14 +216,14 @@ enum ElementOrStack { /// This allows to easily handle HTML tags instead of having a more complicated /// state machine to keep track of which tags are open. #[derive(Debug)] -struct ElementStack { - elements: Vec, - parent: Option>, +struct ElementStack<'a> { + elements: Vec>, + parent: Option>>, class: Option, pending_exit: bool, } -impl ElementStack { +impl<'a> ElementStack<'a> { fn new() -> Self { Self::new_with_class(None) } @@ -227,7 +232,7 @@ impl ElementStack { Self { elements: Vec::new(), parent: None, class, pending_exit: false } } - fn push_element(&mut self, mut elem: Element) { + fn push_element(&mut self, mut elem: Element<'a>) { if self.pending_exit && !can_merge(self.class, elem.class, elem.content.first().map_or("", |c| &c.text)) { @@ -236,15 +241,13 @@ impl ElementStack { if let Some(ElementOrStack::Element(last)) = self.elements.last_mut() && last.can_merge(&elem) { - for part in elem.content.drain(..) { - last.content.push(part); - } + last.content.append(&mut elem.content); } else { self.elements.push(ElementOrStack::Element(elem)); } } - fn empty_stack_and_set_new_heads(&mut self, class: Class, element: Element) { + fn empty_stack_and_set_new_heads(&mut self, class: Class, element: Element<'a>) { self.elements.clear(); if let Some(parent) = &mut self.parent { parent.empty_stack_and_set_new_heads(class, element); @@ -257,14 +260,12 @@ impl ElementStack { fn enter_stack( &mut self, - ElementStack { elements, parent, class, pending_exit }: ElementStack, + ElementStack { elements, parent, class, pending_exit }: ElementStack<'a>, ) { if self.pending_exit { if can_merge(self.class, class, "") { self.pending_exit = false; - for elem in elements { - self.elements.push(elem); - } + self.elements.extend(elements); // Compatible stacks, nothing to be done here! return; } @@ -382,7 +383,7 @@ impl ElementStack { /// the various functions (which became its methods). struct TokenHandler<'a, 'tcx, F: Write> { out: &'a mut F, - element_stack: ElementStack, + element_stack: ElementStack<'a>, /// We need to keep the `Class` for each element because it could contain a `Span` which is /// used to generate links. href_context: Option>, @@ -397,7 +398,7 @@ impl std::fmt::Debug for TokenHandler<'_, '_, F> { } } -impl TokenHandler<'_, '_, F> { +impl<'a, F: Write> TokenHandler<'a, '_, F> { fn handle_backline(&mut self) -> Option { self.line += 1; if self.line < self.max_lines { @@ -409,20 +410,21 @@ impl TokenHandler<'_, '_, F> { fn push_element_without_backline_check( &mut self, class: Option, - text: String, + text: Cow<'a, str>, needs_escape: bool, ) { self.element_stack.push_element(Element::new(class, text, needs_escape)) } - fn push_element(&mut self, class: Option, mut text: String) { - let needs_escape = if text == "\n" + fn push_element(&mut self, class: Option, text: Cow<'a, str>) { + let (needs_escape, text) = if text == "\n" && let Some(backline) = self.handle_backline() { + let mut text = text.into_owned(); text.push_str(&backline); - false + (false, Cow::Owned(text)) } else { - true + (true, text) }; self.push_element_without_backline_check(class, text, needs_escape); @@ -438,14 +440,14 @@ impl TokenHandler<'_, '_, F> { Element { class: None, content: vec![Content { - text: format!( + text: Cow::Owned(format!( "", self.line, - ), + )), class: None, needs_escape: false, }], @@ -456,7 +458,7 @@ impl TokenHandler<'_, '_, F> { fn add_expanded_code(&mut self, expanded_code: &ExpandedCode) { self.element_stack.push_element(Element::new( None, - format!("{}", expanded_code.code), + Cow::Owned(format!("{}", expanded_code.code)), false, )); self.element_stack.enter_stack(ElementStack::new_with_class(Some(Class::Original))); @@ -615,7 +617,7 @@ pub(super) fn write_code( token_handler.line = line_info.start_line - 1; token_handler.max_lines = line_info.max_lines; if let Some(text) = token_handler.handle_backline() { - token_handler.push_element_without_backline_check(None, text, false); + token_handler.push_element_without_backline_check(None, Cow::Owned(text), false); } } @@ -635,7 +637,7 @@ pub(super) fn write_code( ) .highlight(&mut |span, highlight| match highlight { Highlight::Token { text, class } => { - token_handler.push_element(class, text.to_string()); + token_handler.push_element(class, Cow::Borrowed(text)); if text == "\n" { if current_expansion.is_none() { From 6ed9a9dd8fa444471f51c5050349712b9065b09b Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 8 Oct 2025 13:30:19 +0200 Subject: [PATCH 4/6] Flush elements when there are too many --- src/librustdoc/html/highlight.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 03cbe379a42a6..d7d68bfceb902 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -407,13 +407,26 @@ impl<'a, F: Write> TokenHandler<'a, '_, F> { None } + fn maybe_write_content(&mut self) { + if self.element_stack.parent.is_none() + && self.element_stack.class.is_none() + && !self.element_stack.pending_exit + // Completely random number. + && self.element_stack.elements.len() > 30 + { + self.element_stack.write_content(self.out, &self.href_context); + self.element_stack.elements.clear(); + } + } + fn push_element_without_backline_check( &mut self, class: Option, text: Cow<'a, str>, needs_escape: bool, ) { - self.element_stack.push_element(Element::new(class, text, needs_escape)) + self.element_stack.push_element(Element::new(class, text, needs_escape)); + self.maybe_write_content(); } fn push_element(&mut self, class: Option, text: Cow<'a, str>) { @@ -486,6 +499,7 @@ impl<'a, F: Write> TokenHandler<'a, '_, F> { for class in old_stack.iter().rev() { self.element_stack.enter_stack(ElementStack::new_with_class(Some(*class))); } + self.maybe_write_content(); } } From 713cd50ea0b09691cf8496e91a24fa83c0cadcce Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Tue, 14 Oct 2025 23:01:27 +0200 Subject: [PATCH 5/6] Switch back to stream highlight processing --- src/librustdoc/html/highlight.rs | 531 ++++++++++++++----------------- 1 file changed, 236 insertions(+), 295 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index d7d68bfceb902..ecc67fa065fef 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -136,79 +136,36 @@ fn can_merge(class1: Option, class2: Option, text: &str) -> bool { } #[derive(Debug)] -struct Content<'a> { - text: Cow<'a, str>, - /// If `Some` and the `span` is different from the parent, then it might generate a link so we - /// need to keep this information. - class: Option, - needs_escape: bool, -} - -#[derive(Debug)] -struct Element<'a> { - /// If `class` is `None`, then it's just plain text with no HTML tag. - class: Option, - /// Content for the current element. - content: Vec>, +struct ClassInfo { + class: Class, + /// Set to true only when an item was written inside this tag. + open: bool, + /// Set to true when leaving the current item. The closing tag will be + /// written if: + /// + /// 1. `self.open` is true + /// 2. Only when the first non-mergeable item is pushed. + pending_exit: bool, + /// If `true`, it means it's ``, otherwise it's ``. + link_closing_tag: bool, } -impl<'a> Element<'a> { - fn new(class: Option, text: Cow<'a, str>, needs_escape: bool) -> Self { - Self { class, content: vec![Content { text, class, needs_escape }] } - } - - fn can_merge(&self, other: &Self) -> bool { - other.content.iter().all(|c| can_merge(self.class, other.class, &c.text)) +impl ClassInfo { + fn new(class: Class, pending_exit: bool) -> Self { + Self { class, open: pending_exit, pending_exit, link_closing_tag: false } } - fn write_elem_to( - &self, - out: &mut W, - href_context: &Option>, - parent_class: Option, - ) { - let mut prev = parent_class; - let mut closing_tag = None; - for part in &self.content { - let text = if part.needs_escape { - Either::Left(&EscapeBodyText(&part.text)) + fn close_tag(&self, out: &mut W) { + if self.open { + if self.link_closing_tag { + out.write_str("").unwrap(); } else { - Either::Right(&part.text) - }; - if part.class.is_some() { - // We only try to generate links as the `` should have already be generated - // by the caller of `write_elem_to`. - if let Some(new_closing_tag) = string_without_closing_tag( - out, - text, - part.class, - href_context, - prev != part.class, - ) { - if new_closing_tag == "" { - out.write_str(new_closing_tag).unwrap(); - closing_tag = None; - } else { - closing_tag = Some(new_closing_tag); - } - } - prev = part.class; - } else { - write!(out, "{text}").unwrap(); + out.write_str("").unwrap(); } } - if let Some(closing_tag) = closing_tag { - out.write_str(closing_tag).unwrap(); - } } } -#[derive(Debug)] -enum ElementOrStack<'a> { - Element(Element<'a>), - Stack(ElementStack<'a>), -} - /// This represents the stack of HTML elements. For example a macro expansion /// will contain other elements which might themselves contain other elements /// (like macros). @@ -216,166 +173,164 @@ enum ElementOrStack<'a> { /// This allows to easily handle HTML tags instead of having a more complicated /// state machine to keep track of which tags are open. #[derive(Debug)] -struct ElementStack<'a> { - elements: Vec>, - parent: Option>>, - class: Option, - pending_exit: bool, +struct ClassStack { + open_classes: Vec, } -impl<'a> ElementStack<'a> { +impl ClassStack { fn new() -> Self { - Self::new_with_class(None) - } - - fn new_with_class(class: Option) -> Self { - Self { elements: Vec::new(), parent: None, class, pending_exit: false } - } - - fn push_element(&mut self, mut elem: Element<'a>) { - if self.pending_exit - && !can_merge(self.class, elem.class, elem.content.first().map_or("", |c| &c.text)) - { - self.exit_current_stack(); - } - if let Some(ElementOrStack::Element(last)) = self.elements.last_mut() - && last.can_merge(&elem) - { - last.content.append(&mut elem.content); - } else { - self.elements.push(ElementOrStack::Element(elem)); - } - } - - fn empty_stack_and_set_new_heads(&mut self, class: Class, element: Element<'a>) { - self.elements.clear(); - if let Some(parent) = &mut self.parent { - parent.empty_stack_and_set_new_heads(class, element); - } else { - let mut new_stack = ElementStack::new_with_class(Some(class)); - new_stack.elements.push(ElementOrStack::Element(element)); - self.parent.replace(Box::new(new_stack)); - } + Self { open_classes: Vec::new() } } - fn enter_stack( + fn enter_elem( &mut self, - ElementStack { elements, parent, class, pending_exit }: ElementStack<'a>, + out: &mut W, + href_context: &Option>, + new_class: Class, + pending_exit: bool, ) { - if self.pending_exit { - if can_merge(self.class, class, "") { - self.pending_exit = false; - self.elements.extend(elements); - // Compatible stacks, nothing to be done here! + if let Some(current_class) = self.open_classes.last_mut() { + if can_merge(Some(current_class.class), Some(new_class), "") { + current_class.pending_exit = false; return; + } else if current_class.pending_exit { + current_class.close_tag(out); + self.open_classes.pop(); } - self.exit_current_stack(); } - assert!(parent.is_none(), "`enter_stack` used with a non empty parent"); - let parent_elements = std::mem::take(&mut self.elements); - let parent_parent = std::mem::take(&mut self.parent); - self.parent = Some(Box::new(ElementStack { - elements: parent_elements, - parent: parent_parent, - class: self.class, - pending_exit: self.pending_exit, - })); - self.class = class; - self.elements = elements; - self.pending_exit = pending_exit; + let mut class_info = ClassInfo::new(new_class, pending_exit); + if pending_exit { + class_info.open = true; + } else if matches!(new_class, Class::Decoration(_) | Class::Original) { + // We open it right away to ensure it always come at the expected location. + // FIXME: Should we instead add a new boolean field to `ClassInfo` to force a non-open + // tags to be added if another one comes before it's open? + write!(out, "", new_class.as_html()).unwrap(); + class_info.open = true; + } else if new_class.get_span().is_some() + && let Some(closing_tag) = + string_without_closing_tag(out, "", Some(class_info.class), href_context, false) + && !closing_tag.is_empty() + { + class_info.open = true; + class_info.link_closing_tag = closing_tag == ""; + } + + self.open_classes.push(class_info); } /// This sets the `pending_exit` field to `true`. Meaning that if we try to push another stack /// which is not compatible with this one, it will exit the current one before adding the new /// one. fn exit_elem(&mut self) { - self.pending_exit = true; + let current_class = + self.open_classes.last_mut().expect("`exit_elem` called on empty class stack"); + if !current_class.pending_exit { + current_class.pending_exit = true; + return; + } + // If the current class was already closed, it means we are actually closing its parent. + self.open_classes.pop(); + let current_class = + self.open_classes.last_mut().expect("`exit_elem` called on empty class stack parent"); + current_class.pending_exit = true; } - /// Unlike `exit_elem`, this method directly exits the current stack. It is called when the - /// current stack is not compatible with a new one pushed or if an expansion was ended. - fn exit_current_stack(&mut self) { - let Some(element) = std::mem::take(&mut self.parent) else { - panic!("exiting an element where there is no parent"); - }; - let ElementStack { elements, parent, class, pending_exit } = Box::into_inner(element); - - let old_elements = std::mem::take(&mut self.elements); - self.elements = elements; - self.elements.push(ElementOrStack::Stack(ElementStack { - elements: old_elements, - class: self.class, - parent: None, - pending_exit: false, - })); - self.parent = parent; - self.class = class; - self.pending_exit = pending_exit; + fn last_class(&self) -> Option { + self.open_classes.last().map(|c| c.class) } - fn write_content(&self, out: &mut W, href_context: &Option>) { - let mut elem = self; + fn last_class_is_open(&self) -> bool { + if let Some(last) = self.open_classes.last() { + last.open + } else { + // If there is no class, then it's already open. + true + } + } - // We get the top most item. - while let Some(parent) = &elem.parent { - elem = parent; + fn close_last_if_needed(&mut self, out: &mut W) { + if let Some(last) = self.open_classes.last() + && last.pending_exit + && last.open + { + last.close_tag(out); + self.open_classes.pop(); } - // Now we can output the whole content. - elem.write_to(out, href_context, None); } - fn write_to( - &self, + fn push( + &mut self, out: &mut W, href_context: &Option>, - parent_class: Option, + class: Option, + text: Cow<'_, str>, + needs_escape: bool, ) { - // If it only contains stack, it means it has no content of its own so no need to generate - // a tag. - let closing_tag = if let Some(Class::Expansion) = self.class { - out.write_str("").unwrap(); - "" - } else if let Some(class) = self.class - // `PreludeTy` can never include more than an ident so it should not generate - // a wrapping `span`. - && !matches!(class, Class::PreludeTy(_)) - { - // Macro is the only `ElementStack` that can generate a link to definition to its - // whole content, so to prevent having ``, - // we generate the `` directly here. - // - // For other elements, the links will be generated in `write_elem_to`. - let href_context = if matches!(class, Class::Macro(_)) { href_context } else { &None }; - string_without_closing_tag( - out, - "", - Some(class), - href_context, - self.class != parent_class, - ) - .expect( - "internal error: enter_span was called with Some(class) but did not \ - return a closing HTML tag", - ) - } else { - "" - }; + // If the new token cannot be merged with the currently open `Class`, we close the `Class` + // if possible. + if !can_merge(self.last_class(), class, &text) { + self.close_last_if_needed(out) + } - for child_elem in self.elements.iter() { - let child_elem = match child_elem { - ElementOrStack::Element(elem) => elem, - ElementOrStack::Stack(stack) => { - stack.write_to(out, href_context, parent_class); - continue; + let current_class = self.last_class(); + + // If we have a `Class` that hasn't been "open" yet (ie, we received only an `EnterSpan` + // event), we need to open the `Class` before going any further so the new token will be + // written inside it. + if class.is_none() && !self.last_class_is_open() { + if let Some(current_class_info) = self.open_classes.last_mut() { + let class_s = current_class_info.class.as_html(); + if !class_s.is_empty() { + write!(out, "").unwrap(); } - }; - if child_elem.content.is_empty() { - continue; + current_class_info.open = true; } - child_elem.write_elem_to(out, href_context, parent_class); } - out.write_str(closing_tag).unwrap(); + let current_class_is_open = self.open_classes.last().is_some_and(|c| c.open); + let can_merge = can_merge(class, current_class, &text); + let should_open_tag = !current_class_is_open || !can_merge; + + let text = + if needs_escape { Either::Left(&EscapeBodyText(&text)) } else { Either::Right(text) }; + + let closing_tag = + string_without_closing_tag(out, &text, class, href_context, should_open_tag); + if class.is_some() && should_open_tag && closing_tag.is_none() { + panic!( + "called `string_without_closing_tag` with a class but no closing tag was returned" + ); + } else if let Some(closing_tag) = closing_tag + && !closing_tag.is_empty() + { + // If this is a link, we need to close it right away and not open a new `Class`, + // otherwise extra content would go into the `` HTML tag. + if closing_tag == "" { + out.write_str(closing_tag).unwrap(); + // If the current `Class` is not compatible with this one, we create a new `Class`. + } else if let Some(class) = class + && !can_merge + { + self.enter_elem(out, href_context, class, true); + // Otherwise, we consider the actual `Class` to have been open. + } else if let Some(current_class_info) = self.open_classes.last_mut() { + current_class_info.open = true; + } + } + } + + fn empty_stack(&mut self, out: &mut W) -> Vec { + let mut classes = Vec::with_capacity(self.open_classes.len()); + + // We close all open tags and only keep the ones that were not already waiting to be closed. + while let Some(class_info) = self.open_classes.pop() { + class_info.close_tag(out); + if !class_info.pending_exit { + classes.push(class_info.class); + } + } + classes } } @@ -383,7 +338,7 @@ impl<'a> ElementStack<'a> { /// the various functions (which became its methods). struct TokenHandler<'a, 'tcx, F: Write> { out: &'a mut F, - element_stack: ElementStack<'a>, + class_stack: ClassStack, /// We need to keep the `Class` for each element because it could contain a `Span` which is /// used to generate links. href_context: Option>, @@ -394,7 +349,7 @@ struct TokenHandler<'a, 'tcx, F: Write> { impl std::fmt::Debug for TokenHandler<'_, '_, F> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TokenHandler").field("element_stack", &self.element_stack).finish() + f.debug_struct("TokenHandler").field("class_stack", &self.class_stack).finish() } } @@ -407,111 +362,77 @@ impl<'a, F: Write> TokenHandler<'a, '_, F> { None } - fn maybe_write_content(&mut self) { - if self.element_stack.parent.is_none() - && self.element_stack.class.is_none() - && !self.element_stack.pending_exit - // Completely random number. - && self.element_stack.elements.len() > 30 - { - self.element_stack.write_content(self.out, &self.href_context); - self.element_stack.elements.clear(); - } - } - - fn push_element_without_backline_check( + fn push_token_without_backline_check( &mut self, class: Option, text: Cow<'a, str>, needs_escape: bool, ) { - self.element_stack.push_element(Element::new(class, text, needs_escape)); - self.maybe_write_content(); + self.class_stack.push(self.out, &self.href_context, class, text, needs_escape); } - fn push_element(&mut self, class: Option, text: Cow<'a, str>) { - let (needs_escape, text) = if text == "\n" + fn push_token(&mut self, class: Option, text: Cow<'a, str>) { + if text == "\n" && let Some(backline) = self.handle_backline() { - let mut text = text.into_owned(); - text.push_str(&backline); - (false, Cow::Owned(text)) + self.out.write_str(&text).unwrap(); + self.out.write_str(&backline).unwrap(); } else { - (true, text) - }; - - self.push_element_without_backline_check(class, text, needs_escape); + self.push_token_without_backline_check(class, text, true); + } } fn start_expansion(&mut self) { - // We display everything. - self.element_stack.write_content(self.out, &self.href_context); - - // We remove everything and recreate the stack with the expansion at its head. - self.element_stack.empty_stack_and_set_new_heads( - Class::Expansion, - Element { - class: None, - content: vec![Content { - text: Cow::Owned(format!( - "", - self.line, - )), - class: None, - needs_escape: false, - }], - }, + self.line, + )), + false, ); + + // We re-open all tags. + for class in classes.into_iter().rev() { + self.class_stack.enter_elem(self.out, &self.href_context, class, false); + } } fn add_expanded_code(&mut self, expanded_code: &ExpandedCode) { - self.element_stack.push_element(Element::new( + self.push_token_without_backline_check( None, Cow::Owned(format!("{}", expanded_code.code)), false, - )); - self.element_stack.enter_stack(ElementStack::new_with_class(Some(Class::Original))); + ); + self.class_stack.enter_elem(self.out, &self.href_context, Class::Original, false); } fn close_expansion(&mut self) { - let mut old_stack = Vec::new(); - - // We inline everything into the top-most element. - while self.element_stack.parent.is_some() { - self.element_stack.exit_current_stack(); - if let Some(ElementOrStack::Stack(stack)) = self.element_stack.elements.last() - && let Some(class) = stack.class - && class != Class::Original - { - old_stack.push(class); + // We close all open tags. + let classes = self.class_stack.empty_stack(self.out); + + // We re-open all tags without expansion-related ones. + for class in classes.into_iter().rev() { + if !matches!(class, Class::Expansion | Class::Original) { + self.class_stack.enter_elem(self.out, &self.href_context, class, false); } } - // We display everything. - self.element_stack.write_content(self.out, &self.href_context); - - // We recreate the tree but without the expansion node. - self.element_stack.elements.clear(); - self.element_stack.class = None; - for class in old_stack.iter().rev() { - self.element_stack.enter_stack(ElementStack::new_with_class(Some(*class))); - } - self.maybe_write_content(); } } impl Drop for TokenHandler<'_, '_, F> { /// When leaving, we need to flush all pending data to not have missing content. fn drop(&mut self) { - // We need to clean the hierarchy before displaying it, otherwise the parents won't see - // the last child. - while self.element_stack.parent.is_some() { - self.element_stack.exit_current_stack(); - } - self.element_stack.write_content(self.out, &self.href_context); + self.class_stack.empty_stack(self.out); } } @@ -554,7 +475,7 @@ fn end_expansion<'a, W: Write>( expanded_codes: &'a [ExpandedCode], span: Span, ) -> Option<&'a ExpandedCode> { - token_handler.element_stack.exit_current_stack(); + token_handler.class_stack.exit_elem(); let expansion = get_next_expansion(expanded_codes, token_handler.line, span); if expansion.is_none() { token_handler.close_expansion(); @@ -624,14 +545,14 @@ pub(super) fn write_code( }, line: 0, max_lines: u32::MAX, - element_stack: ElementStack::new(), + class_stack: ClassStack::new(), }; if let Some(line_info) = line_info { token_handler.line = line_info.start_line - 1; token_handler.max_lines = line_info.max_lines; if let Some(text) = token_handler.handle_backline() { - token_handler.push_element_without_backline_check(None, Cow::Owned(text), false); + token_handler.push_token_without_backline_check(None, Cow::Owned(text), false); } } @@ -651,7 +572,7 @@ pub(super) fn write_code( ) .highlight(&mut |span, highlight| match highlight { Highlight::Token { text, class } => { - token_handler.push_element(class, Cow::Borrowed(text)); + token_handler.push_token(class, Cow::Borrowed(text)); if text == "\n" { if current_expansion.is_none() { @@ -679,9 +600,16 @@ pub(super) fn write_code( } } Highlight::EnterSpan { class } => { - token_handler.element_stack.enter_stack(ElementStack::new_with_class(Some(class))) + token_handler.class_stack.enter_elem( + token_handler.out, + &token_handler.href_context, + class, + false, + ); + } + Highlight::ExitSpan => { + token_handler.class_stack.exit_elem(); } - Highlight::ExitSpan => token_handler.element_stack.exit_elem(), }); } @@ -872,12 +800,15 @@ impl<'a> PeekIter<'a> { None } } + + fn stop_peeking(&mut self) { + self.peek_pos = 0; + } } impl<'a> Iterator for PeekIter<'a> { type Item = (TokenKind, &'a str); fn next(&mut self) -> Option { - self.peek_pos = 0; if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() } } } @@ -1257,31 +1188,35 @@ impl<'src> Classifier<'src> { LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number, }, TokenKind::GuardedStrPrefix => return no_highlight(sink), - TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => { + TokenKind::Ident | TokenKind::RawIdent + if self.peek_non_whitespace() == Some(TokenKind::Bang) => + { self.in_macro = true; let span = new_span(before, text, file_span); sink(DUMMY_SP, Highlight::EnterSpan { class: Class::Macro(span) }); sink(span, Highlight::Token { text, class: None }); return; } - TokenKind::Ident => match get_real_ident_class(text, false) { - None => match text { - "Option" | "Result" => Class::PreludeTy(new_span(before, text, file_span)), - "Some" | "None" | "Ok" | "Err" => { - Class::PreludeVal(new_span(before, text, file_span)) - } - // "union" is a weak keyword and is only considered as a keyword when declaring - // a union type. - "union" if self.check_if_is_union_keyword() => Class::KeyWord, - _ if self.in_macro_nonterminal => { - self.in_macro_nonterminal = false; - Class::MacroNonTerminal - } - "self" | "Self" => Class::Self_(new_span(before, text, file_span)), - _ => Class::Ident(new_span(before, text, file_span)), - }, - Some(c) => c, - }, + TokenKind::Ident => { + match get_real_ident_class(text, false) { + None => match text { + "Option" | "Result" => Class::PreludeTy(new_span(before, text, file_span)), + "Some" | "None" | "Ok" | "Err" => { + Class::PreludeVal(new_span(before, text, file_span)) + } + // "union" is a weak keyword and is only considered as a keyword when declaring + // a union type. + "union" if self.check_if_is_union_keyword() => Class::KeyWord, + _ if self.in_macro_nonterminal => { + self.in_macro_nonterminal = false; + Class::MacroNonTerminal + } + "self" | "Self" => Class::Self_(new_span(before, text, file_span)), + _ => Class::Ident(new_span(before, text, file_span)), + }, + Some(c) => c, + } + } TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => { Class::Ident(new_span(before, text, file_span)) } @@ -1306,14 +1241,20 @@ impl<'src> Classifier<'src> { self.tokens.peek().map(|(token_kind, _text)| *token_kind) } - fn check_if_is_union_keyword(&mut self) -> bool { - while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) { - if *kind == TokenKind::Whitespace { - continue; + fn peek_non_whitespace(&mut self) -> Option { + while let Some((token_kind, _)) = self.tokens.peek_next() { + if *token_kind != TokenKind::Whitespace { + let token_kind = *token_kind; + self.tokens.stop_peeking(); + return Some(token_kind); } - return *kind == TokenKind::Ident; } - false + self.tokens.stop_peeking(); + None + } + + fn check_if_is_union_keyword(&mut self) -> bool { + self.peek_non_whitespace().is_some_and(|kind| kind == TokenKind::Ident) } } From ab1dcee5f2749a0d32d1ebaab2023aad5f61d1c4 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Thu, 23 Oct 2025 18:09:10 +0200 Subject: [PATCH 6/6] Merge `ClassInfo::open` and `ClassInfo::link_closing_tag` fields into `closing_tags` Improve documentation Improve code --- src/librustdoc/html/highlight.rs | 98 ++++++++++++++++---------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index ecc67fa065fef..c37736f137df9 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -138,32 +138,30 @@ fn can_merge(class1: Option, class2: Option, text: &str) -> bool { #[derive(Debug)] struct ClassInfo { class: Class, - /// Set to true only when an item was written inside this tag. - open: bool, - /// Set to true when leaving the current item. The closing tag will be - /// written if: + /// If `Some`, then it means the tag was opened and needs to be closed. + closing_tag: Option<&'static str>, + /// Set to `true` by `exit_elem` to signal that all the elements of this class have been pushed. /// - /// 1. `self.open` is true - /// 2. Only when the first non-mergeable item is pushed. + /// The class will be closed and removed from the stack when the next non-mergeable item is + /// pushed. When it is removed, the closing tag will be written if (and only if) + /// `self.closing_tag` is `Some`. pending_exit: bool, - /// If `true`, it means it's ``, otherwise it's ``. - link_closing_tag: bool, } impl ClassInfo { - fn new(class: Class, pending_exit: bool) -> Self { - Self { class, open: pending_exit, pending_exit, link_closing_tag: false } + fn new(class: Class, closing_tag: Option<&'static str>) -> Self { + Self { class, closing_tag, pending_exit: closing_tag.is_some() } } fn close_tag(&self, out: &mut W) { - if self.open { - if self.link_closing_tag { - out.write_str("").unwrap(); - } else { - out.write_str("").unwrap(); - } + if let Some(closing_tag) = self.closing_tag { + out.write_str(closing_tag).unwrap(); } } + + fn is_open(&self) -> bool { + self.closing_tag.is_some() + } } /// This represents the stack of HTML elements. For example a macro expansion @@ -187,7 +185,7 @@ impl ClassStack { out: &mut W, href_context: &Option>, new_class: Class, - pending_exit: bool, + closing_tag: Option<&'static str>, ) { if let Some(current_class) = self.open_classes.last_mut() { if can_merge(Some(current_class.class), Some(new_class), "") { @@ -198,22 +196,22 @@ impl ClassStack { self.open_classes.pop(); } } - let mut class_info = ClassInfo::new(new_class, pending_exit); - if pending_exit { - class_info.open = true; - } else if matches!(new_class, Class::Decoration(_) | Class::Original) { - // We open it right away to ensure it always come at the expected location. - // FIXME: Should we instead add a new boolean field to `ClassInfo` to force a non-open - // tags to be added if another one comes before it's open? - write!(out, "", new_class.as_html()).unwrap(); - class_info.open = true; - } else if new_class.get_span().is_some() - && let Some(closing_tag) = - string_without_closing_tag(out, "", Some(class_info.class), href_context, false) - && !closing_tag.is_empty() - { - class_info.open = true; - class_info.link_closing_tag = closing_tag == ""; + let mut class_info = ClassInfo::new(new_class, closing_tag); + if closing_tag.is_none() { + if matches!(new_class, Class::Decoration(_) | Class::Original) { + // Even if a whitespace characters follows, we need to open the class right away + // as these characters are part of the element. + // FIXME: Should we instead add a new boolean field to `ClassInfo` to force a + // non-open tag to be added if another one comes before it's open? + write!(out, "", new_class.as_html()).unwrap(); + class_info.closing_tag = Some(""); + } else if new_class.get_span().is_some() + && let Some(closing_tag) = + string_without_closing_tag(out, "", Some(class_info.class), href_context, false) + && !closing_tag.is_empty() + { + class_info.closing_tag = Some(closing_tag); + } } self.open_classes.push(class_info); @@ -242,7 +240,7 @@ impl ClassStack { fn last_class_is_open(&self) -> bool { if let Some(last) = self.open_classes.last() { - last.open + last.is_open() } else { // If there is no class, then it's already open. true @@ -250,12 +248,9 @@ impl ClassStack { } fn close_last_if_needed(&mut self, out: &mut W) { - if let Some(last) = self.open_classes.last() - && last.pending_exit - && last.open + if let Some(last) = self.open_classes.pop_if(|class| class.pending_exit && class.is_open()) { last.close_tag(out); - self.open_classes.pop(); } } @@ -284,11 +279,11 @@ impl ClassStack { if !class_s.is_empty() { write!(out, "").unwrap(); } - current_class_info.open = true; + current_class_info.closing_tag = Some(""); } } - let current_class_is_open = self.open_classes.last().is_some_and(|c| c.open); + let current_class_is_open = self.open_classes.last().is_some_and(|c| c.is_open()); let can_merge = can_merge(class, current_class, &text); let should_open_tag = !current_class_is_open || !can_merge; @@ -312,14 +307,20 @@ impl ClassStack { } else if let Some(class) = class && !can_merge { - self.enter_elem(out, href_context, class, true); + self.enter_elem(out, href_context, class, Some("")); // Otherwise, we consider the actual `Class` to have been open. } else if let Some(current_class_info) = self.open_classes.last_mut() { - current_class_info.open = true; + current_class_info.closing_tag = Some(""); } } } + /// This method closes all open tags and returns the list of `Class` which were not already + /// closed (ie `pending_exit` set to `true`). + /// + /// It is used when starting a macro expansion: we need to close all HTML tags and then to + /// reopen them inside the newly created expansion HTML tag. Same goes when we close the + /// expansion. fn empty_stack(&mut self, out: &mut W) -> Vec { let mut classes = Vec::with_capacity(self.open_classes.len()); @@ -387,7 +388,7 @@ impl<'a, F: Write> TokenHandler<'a, '_, F> { let classes = self.class_stack.empty_stack(self.out); // We start the expansion tag. - self.class_stack.enter_elem(self.out, &self.href_context, Class::Expansion, false); + self.class_stack.enter_elem(self.out, &self.href_context, Class::Expansion, None); self.push_token_without_backline_check( Some(Class::Expansion), Cow::Owned(format!( @@ -401,9 +402,9 @@ impl<'a, F: Write> TokenHandler<'a, '_, F> { false, ); - // We re-open all tags. + // We re-open all tags that didn't have `pending_exit` set to `true`. for class in classes.into_iter().rev() { - self.class_stack.enter_elem(self.out, &self.href_context, class, false); + self.class_stack.enter_elem(self.out, &self.href_context, class, None); } } @@ -413,7 +414,7 @@ impl<'a, F: Write> TokenHandler<'a, '_, F> { Cow::Owned(format!("{}", expanded_code.code)), false, ); - self.class_stack.enter_elem(self.out, &self.href_context, Class::Original, false); + self.class_stack.enter_elem(self.out, &self.href_context, Class::Original, None); } fn close_expansion(&mut self) { @@ -423,7 +424,7 @@ impl<'a, F: Write> TokenHandler<'a, '_, F> { // We re-open all tags without expansion-related ones. for class in classes.into_iter().rev() { if !matches!(class, Class::Expansion | Class::Original) { - self.class_stack.enter_elem(self.out, &self.href_context, class, false); + self.class_stack.enter_elem(self.out, &self.href_context, class, None); } } } @@ -604,7 +605,7 @@ pub(super) fn write_code( token_handler.out, &token_handler.href_context, class, - false, + None, ); } Highlight::ExitSpan => { @@ -706,7 +707,6 @@ impl Class { | Self::Lifetime | Self::QuestionMark | Self::Decoration(_) - // | Self::Backline(_) | Self::Original | Self::Expansion => None, }