@@ -102,16 +102,41 @@ impl ArtifactStats {
102102 }
103103}
104104
105- /// Tries to match hashes produces by rustc in mangled symbol names.
106- static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
107-
108105/// Demangle the symbol and remove rustc mangling hashes.
106+ ///
107+ /// Normalizes the following things, in the following order:
108+ /// - Demangles the symbol.
109+ /// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
110+ /// into the same symbol.
111+ /// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
112+ /// `foo::abcd` -> `foo`.
113+ /// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
114+ /// `foo.llvm.123` -> `foo`.
109115fn normalize_symbol_name ( symbol : & str ) -> String {
110- let regex =
111- RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?" ) . unwrap ( ) ) ;
116+ /// Tries to match hashes in brackets produces by rustc in mangled symbol names.
117+ static RUSTC_BRACKET_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
118+ /// Tries to match hashes without brackets after :: produces by rustc in mangled symbol names.
119+ static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
120+ /// Tries to match suffixes after a dot.
121+ static DOT_SUFFIX_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
122+
123+ let bracket_hash_regex =
124+ RUSTC_BRACKET_HASH_REGEX . get_or_init ( || Regex :: new ( r"\[[a-z0-9]{13,17}\]" ) . unwrap ( ) ) ;
125+ let hash_regex = RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"::[a-z0-9]{15,17}$" ) . unwrap ( ) ) ;
126+ let dot_suffix_regex = DOT_SUFFIX_REGEX . get_or_init ( || Regex :: new ( r"\.[a-z0-9]+\b" ) . unwrap ( ) ) ;
127+
128+ let mut symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
112129
113- let symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
114- regex. replace_all ( & symbol, "" ) . to_string ( )
130+ if let Some ( stripped) = symbol. strip_suffix ( ".cold" ) {
131+ symbol = stripped. to_string ( ) ;
132+ }
133+ if let Some ( stripped) = symbol. strip_suffix ( ".warm" ) {
134+ symbol = stripped. to_string ( ) ;
135+ }
136+ let symbol = bracket_hash_regex. replace_all ( & symbol, "" ) ;
137+ let symbol = hash_regex. replace_all ( & symbol, "" ) ;
138+ let symbol = dot_suffix_regex. replace_all ( & symbol, "" ) ;
139+ symbol. to_string ( )
115140}
116141
117142/// Simple heuristic that tries to normalize section names.
@@ -229,3 +254,80 @@ pub fn compile_and_get_stats(
229254
230255 Ok ( archives)
231256}
257+
258+ #[ cfg( test) ]
259+ mod tests {
260+ use super :: * ;
261+ use rustc_demangle:: demangle;
262+
263+ #[ test]
264+ fn normalize_remove_cold_annotation ( ) {
265+ // Check that .cold at the end is removed
266+ check (
267+ "_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold" ,
268+ "rustc_query_impl[16af0aa4f1d40934]::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace.cold" ,
269+ "rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace" ,
270+ ) ;
271+ }
272+
273+ #[ test]
274+ fn normalize_remove_numeric_suffix ( ) {
275+ // Check that numeric suffix at the end is removed.
276+ // In this case, it is removed by demangling itself.
277+ check (
278+ "_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645" ,
279+ "<regex_syntax[48ff133cf18e629c]::ast::parse::ParserI<&mut regex_syntax[48ff133cf18e629c]::ast::parse::Parser>>::parse_with_comments" ,
280+ "<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments" ,
281+ ) ;
282+ }
283+
284+ #[ test]
285+ fn normalize_remove_numeric_suffix_with_cold ( ) {
286+ // Check that a combination of the .cold suffix and a numeric suffix is removed.
287+ check (
288+ "_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold" ,
289+ "<rustc_parse[45fe911b13bda40a]::parser::Parser>::parse_ty_common.llvm.13047176952295404880.cold" ,
290+ "<rustc_parse::parser::Parser>::parse_ty_common" ,
291+ ) ;
292+ }
293+
294+ #[ test]
295+ fn normalize_hash_at_end ( ) {
296+ // Check that hashes at the end of the symbol are removed.
297+ check (
298+ "anon.58936091071a36b1b82cf536b463328b.3488" ,
299+ "anon.58936091071a36b1b82cf536b463328b.3488" ,
300+ "anon" ,
301+ ) ;
302+ }
303+
304+ #[ test]
305+ fn normalize_short_hash ( ) {
306+ // Check that short hashes in brackets are removed.
307+ check (
308+ "_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args" ,
309+ "rustc_builtin_macros[e293f6447c7da]::format::make_format_args" ,
310+ "rustc_builtin_macros::format::make_format_args" ,
311+ ) ;
312+ }
313+
314+ #[ test]
315+ fn normalize_hash_without_brackets ( ) {
316+ // Check that hashes withouto brackets are removed.
317+ check (
318+ "_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E" ,
319+ "proc_macro::quote::quote::{{closure}}::h90045007b0e69fc9" ,
320+ "proc_macro::quote::quote::{{closure}}" ,
321+ ) ;
322+ }
323+
324+ /// Checks the result of symbol normalization.
325+ /// The function receives the mangled symbol, and expects the correct demangled
326+ /// symbol and normalized symbol. The demangled version is passed mostly just to make
327+ /// the test more readable.
328+ fn check ( symbol : & str , expect_demangled : & str , expect_normalized : & str ) {
329+ let demangled = demangle ( symbol) . to_string ( ) ;
330+ assert_eq ! ( demangled, expect_demangled) ;
331+ assert_eq ! ( normalize_symbol_name( symbol) , expect_normalized. to_string( ) ) ;
332+ }
333+ }
0 commit comments