@@ -102,16 +102,41 @@ impl ArtifactStats {
102102 }
103103}
104104
105- /// Tries to match hashes produces by rustc in mangled symbol names.
106- static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
107-
108105/// Demangle the symbol and remove rustc mangling hashes.
106+ ///
107+ /// Normalizes the following things, in the following order:
108+ /// - Demangles the symbol.
109+ /// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
110+ /// into the same symbol.
111+ /// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
112+ /// `foo::abcd` -> `foo`.
113+ /// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
114+ /// `foo.llvm.123` -> `foo`.
109115fn normalize_symbol_name ( symbol : & str ) -> String {
110- let regex =
111- RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r#"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?"# ) . unwrap ( ) ) ;
116+ /// Tries to match hashes in brackets produces by rustc in mangled symbol names.
117+ static RUSTC_BRACKET_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
118+ /// Tries to match hashes after :: produces by rustc in mangled symbol names.
119+ static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
120+ /// Tries to match suffixes after a dot.
121+ static DOT_SUFFIX_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
122+
123+ let bracket_hash_regex =
124+ RUSTC_BRACKET_HASH_REGEX . get_or_init ( || Regex :: new ( r#"\[[a-z0-9]{13,17}\]"# ) . unwrap ( ) ) ;
125+ let hash_regex = RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r#"::[a-z0-9]{15,17}$"# ) . unwrap ( ) ) ;
126+ let dot_suffix_regex = DOT_SUFFIX_REGEX . get_or_init ( || Regex :: new ( r#"\.[a-z0-9]+\b"# ) . unwrap ( ) ) ;
127+
128+ let mut symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
112129
113- let symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
114- regex. replace_all ( & symbol, "" ) . to_string ( )
130+ if let Some ( stripped) = symbol. strip_suffix ( ".cold" ) {
131+ symbol = stripped. to_string ( ) ;
132+ }
133+ if let Some ( stripped) = symbol. strip_suffix ( ".warm" ) {
134+ symbol = stripped. to_string ( ) ;
135+ }
136+ let symbol = bracket_hash_regex. replace_all ( & symbol, "" ) ;
137+ let symbol = hash_regex. replace_all ( & symbol, "" ) ;
138+ let symbol = dot_suffix_regex. replace_all ( & symbol, "" ) ;
139+ symbol. to_string ( )
115140}
116141
117142/// Simple heuristic that tries to normalize section names.
@@ -229,3 +254,60 @@ pub fn compile_and_get_stats(
229254
230255 Ok ( archives)
231256}
257+
258+ #[ cfg( test) ]
259+ mod tests {
260+ use super :: * ;
261+
262+ #[ test]
263+ fn normalize_remove_cold_annotation ( ) {
264+ assert_eq ! (
265+ normalize_symbol_name( "_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold" ) ,
266+ "rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace" . to_string( )
267+ ) ;
268+ }
269+
270+ #[ test]
271+ fn normalize_remove_numeric_suffix ( ) {
272+ assert_eq ! (
273+ normalize_symbol_name( "_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645" ) ,
274+ "<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments" . to_string( )
275+ ) ;
276+ }
277+
278+ #[ test]
279+ fn normalize_remove_numeric_suffix_with_cold ( ) {
280+ assert_eq ! (
281+ normalize_symbol_name( "_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold" ) ,
282+ "<rustc_parse::parser::Parser>::parse_ty_common" . to_string( )
283+ ) ;
284+ }
285+
286+ #[ test]
287+ fn normalize_hash_at_end ( ) {
288+ assert_eq ! (
289+ normalize_symbol_name( "anon.58936091071a36b1b82cf536b463328b.3488" ) ,
290+ "anon" . to_string( )
291+ ) ;
292+ }
293+
294+ #[ test]
295+ fn normalize_short_hash ( ) {
296+ assert_eq ! (
297+ normalize_symbol_name(
298+ "_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args"
299+ ) ,
300+ "rustc_builtin_macros::format::make_format_args" . to_string( )
301+ ) ;
302+ }
303+
304+ #[ test]
305+ fn normalize_hash_without_brackets ( ) {
306+ assert_eq ! (
307+ normalize_symbol_name(
308+ "_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E"
309+ ) ,
310+ "proc_macro::quote::quote::{{closure}}" . to_string( )
311+ ) ;
312+ }
313+ }
0 commit comments