@@ -102,16 +102,44 @@ impl ArtifactStats {
102102 }
103103}
104104
105- /// Tries to match hashes produces by rustc in mangled symbol names.
106- static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
107-
108105/// Demangle the symbol and remove rustc mangling hashes.
106+ ///
107+ /// Normalizes the following things, in the following order:
108+ /// - Demangles the symbol.
109+ /// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
110+ /// into the same symbol.
111+ /// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
112+ /// `foo::abcd` -> `foo`.
113+ /// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
114+ /// `foo.llvm.123` -> `foo`.
115+ ///
116+ /// These modifications should remove things added by LLVM in the LTO/PGO phase.
117+ /// See more information here: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#vendor-specific-suffix
109118fn normalize_symbol_name ( symbol : & str ) -> String {
110- let regex =
111- RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?" ) . unwrap ( ) ) ;
119+ /// Tries to match hashes in brackets produced by rustc in mangled symbol names.
120+ static RUSTC_BRACKET_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
121+ /// Tries to match hashes without brackets after :: produced by rustc in mangled symbol names.
122+ static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
123+ /// Tries to match suffixes after a dot.
124+ static DOT_SUFFIX_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
125+
126+ let bracket_hash_regex =
127+ RUSTC_BRACKET_HASH_REGEX . get_or_init ( || Regex :: new ( r"\[[a-z0-9]{13,17}\]" ) . unwrap ( ) ) ;
128+ let hash_regex = RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"::[a-z0-9]{15,17}$" ) . unwrap ( ) ) ;
129+ let dot_suffix_regex = DOT_SUFFIX_REGEX . get_or_init ( || Regex :: new ( r"\.[a-z0-9]+\b" ) . unwrap ( ) ) ;
130+
131+ let mut symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
112132
113- let symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
114- regex. replace_all ( & symbol, "" ) . to_string ( )
133+ if let Some ( stripped) = symbol. strip_suffix ( ".cold" ) {
134+ symbol = stripped. to_string ( ) ;
135+ }
136+ if let Some ( stripped) = symbol. strip_suffix ( ".warm" ) {
137+ symbol = stripped. to_string ( ) ;
138+ }
139+ let symbol = bracket_hash_regex. replace_all ( & symbol, "" ) ;
140+ let symbol = hash_regex. replace_all ( & symbol, "" ) ;
141+ let symbol = dot_suffix_regex. replace_all ( & symbol, "" ) ;
142+ symbol. to_string ( )
115143}
116144
117145/// Simple heuristic that tries to normalize section names.
@@ -229,3 +257,80 @@ pub fn compile_and_get_stats(
229257
230258 Ok ( archives)
231259}
260+
261+ #[ cfg( test) ]
262+ mod tests {
263+ use super :: * ;
264+ use rustc_demangle:: demangle;
265+
266+ #[ test]
267+ fn normalize_remove_cold_annotation ( ) {
268+ // Check that .cold at the end is removed
269+ check (
270+ "_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold" ,
271+ "rustc_query_impl[16af0aa4f1d40934]::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace.cold" ,
272+ "rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace" ,
273+ ) ;
274+ }
275+
276+ #[ test]
277+ fn normalize_remove_numeric_suffix ( ) {
278+ // Check that numeric suffix at the end is removed.
279+ // In this case, it is removed by demangling itself.
280+ check (
281+ "_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645" ,
282+ "<regex_syntax[48ff133cf18e629c]::ast::parse::ParserI<&mut regex_syntax[48ff133cf18e629c]::ast::parse::Parser>>::parse_with_comments" ,
283+ "<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments" ,
284+ ) ;
285+ }
286+
287+ #[ test]
288+ fn normalize_remove_numeric_suffix_with_cold ( ) {
289+ // Check that a combination of the .cold suffix and a numeric suffix is removed.
290+ check (
291+ "_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold" ,
292+ "<rustc_parse[45fe911b13bda40a]::parser::Parser>::parse_ty_common.llvm.13047176952295404880.cold" ,
293+ "<rustc_parse::parser::Parser>::parse_ty_common" ,
294+ ) ;
295+ }
296+
297+ #[ test]
298+ fn normalize_hash_at_end ( ) {
299+ // Check that hashes at the end of the symbol are removed.
300+ check (
301+ "anon.58936091071a36b1b82cf536b463328b.3488" ,
302+ "anon.58936091071a36b1b82cf536b463328b.3488" ,
303+ "anon" ,
304+ ) ;
305+ }
306+
307+ #[ test]
308+ fn normalize_short_hash ( ) {
309+ // Check that short hashes in brackets are removed.
310+ check (
311+ "_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args" ,
312+ "rustc_builtin_macros[e293f6447c7da]::format::make_format_args" ,
313+ "rustc_builtin_macros::format::make_format_args" ,
314+ ) ;
315+ }
316+
317+ #[ test]
318+ fn normalize_hash_without_brackets ( ) {
319+ // Check that hashes without brackets are removed.
320+ check (
321+ "_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E" ,
322+ "proc_macro::quote::quote::{{closure}}::h90045007b0e69fc9" ,
323+ "proc_macro::quote::quote::{{closure}}" ,
324+ ) ;
325+ }
326+
327+ /// Checks the result of symbol normalization.
328+ /// The function receives the mangled symbol, and expects the correct demangled
329+ /// symbol and normalized symbol. The demangled version is passed mostly just to make
330+ /// the test more readable.
331+ fn check ( symbol : & str , expect_demangled : & str , expect_normalized : & str ) {
332+ let demangled = demangle ( symbol) . to_string ( ) ;
333+ assert_eq ! ( demangled, expect_demangled) ;
334+ assert_eq ! ( normalize_symbol_name( symbol) , expect_normalized. to_string( ) ) ;
335+ }
336+ }
0 commit comments