11use core:: convert:: TryFrom ;
2- use core:: { char, fmt, iter, mem} ;
2+ use core:: { char, fmt, iter, mem, str } ;
33
44#[ allow( unused_macros) ]
55macro_rules! write {
@@ -287,6 +287,84 @@ impl<'s> HexNibbles<'s> {
287287 }
288288 Some ( v)
289289 }
290+
291+ /// Decode a UTF-8 byte sequence (with each byte using a pair of nibbles)
292+ /// into individual `char`s, returning `None` for invalid UTF-8.
293+ fn try_parse_str_chars ( & self ) -> Option < impl Iterator < Item = char > + ' s > {
294+ if self . nibbles . len ( ) % 2 != 0 {
295+ return None ;
296+ }
297+
298+ // FIXME(eddyb) use `array_chunks` instead, when that becomes stable.
299+ let mut bytes = self
300+ . nibbles
301+ . as_bytes ( )
302+ . chunks_exact ( 2 )
303+ . map ( |slice| match slice {
304+ [ a, b] => [ a, b] ,
305+ _ => unreachable ! ( ) ,
306+ } )
307+ . map ( |[ & hi, & lo] | {
308+ let half = |nibble : u8 | ( nibble as char ) . to_digit ( 16 ) . unwrap ( ) as u8 ;
309+ ( half ( hi) << 4 ) | half ( lo)
310+ } ) ;
311+
312+ let chars = iter:: from_fn ( move || {
313+ // As long as there are any bytes left, there's at least one more
314+ // UTF-8-encoded `char` to decode (or the possibility of error).
315+ bytes. next ( ) . map ( |first_byte| -> Result < char , ( ) > {
316+ // FIXME(eddyb) this `enum` and `fn` should be somewhere in `core`.
317+ enum Utf8FirstByteError {
318+ ContinuationByte ,
319+ TooLong ,
320+ }
321+ fn utf8_len_from_first_byte ( byte : u8 ) -> Result < usize , Utf8FirstByteError > {
322+ match byte {
323+ 0x00 ..=0x7f => Ok ( 1 ) ,
324+ 0x80 ..=0xbf => Err ( Utf8FirstByteError :: ContinuationByte ) ,
325+ 0xc0 ..=0xdf => Ok ( 2 ) ,
326+ 0xe0 ..=0xef => Ok ( 3 ) ,
327+ 0xf0 ..=0xf7 => Ok ( 4 ) ,
328+ 0xf8 ..=0xff => Err ( Utf8FirstByteError :: TooLong ) ,
329+ }
330+ }
331+
332+ // Collect the appropriate amount of bytes (up to 4), according
333+ // to the UTF-8 length implied by the first byte.
334+ let utf8_len = utf8_len_from_first_byte ( first_byte) . map_err ( |_| ( ) ) ?;
335+ let utf8 = & mut [ first_byte, 0 , 0 , 0 ] [ ..utf8_len] ;
336+ for i in 1 ..utf8_len {
337+ utf8[ i] = bytes. next ( ) . ok_or ( ( ) ) ?;
338+ }
339+
340+ // Fully validate the UTF-8 sequence.
341+ let s = str:: from_utf8 ( utf8) . map_err ( |_| ( ) ) ?;
342+
343+ // Since we included exactly one UTF-8 sequence, and validation
344+ // succeeded, `str::chars` should return exactly one `char`.
345+ let mut chars = s. chars ( ) ;
346+ match ( chars. next ( ) , chars. next ( ) ) {
347+ ( Some ( c) , None ) => Ok ( c) ,
348+ _ => unreachable ! (
349+ "str::from_utf8({:?}) = {:?} was expected to have 1 char, \
350+ but {} chars were found",
351+ utf8,
352+ s,
353+ s. chars( ) . count( )
354+ ) ,
355+ }
356+ } )
357+ } ) ;
358+
359+ // HACK(eddyb) doing a separate validation iteration like this might be
360+ // wasteful, but it's easier to avoid starting to print a string literal
361+ // in the first place, than to abort it mid-string.
362+ if chars. clone ( ) . any ( |r| r. is_err ( ) ) {
363+ None
364+ } else {
365+ Some ( chars. map ( Result :: unwrap) )
366+ }
367+ }
290368}
291369
292370fn basic_type ( tag : u8 ) -> Option < & ' static str > {
@@ -815,7 +893,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
815893 let lt = parse ! ( self , integer_62) ;
816894 self . print_lifetime_from_index ( lt)
817895 } else if self . eat ( b'K' ) {
818- self . print_const ( )
896+ self . print_const ( false )
819897 } else {
820898 self . print_type ( )
821899 }
@@ -861,7 +939,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
861939 self . print_type ( ) ?;
862940 if tag == b'A' {
863941 self . print ( "; " ) ?;
864- self . print_const ( ) ?;
942+ self . print_const ( true ) ?;
865943 }
866944 self . print ( "]" ) ?;
867945 }
@@ -1001,11 +1079,28 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
10011079 Ok ( ( ) )
10021080 }
10031081
1004- fn print_const ( & mut self ) -> fmt:: Result {
1082+ fn print_const ( & mut self , in_value : bool ) -> fmt:: Result {
10051083 let tag = parse ! ( self , next) ;
10061084
10071085 parse ! ( self , push_depth) ;
10081086
1087+ // Only literals (and the names of `const` generic parameters, but they
1088+ // don't get mangled at all), can appear in generic argument position
1089+ // without any disambiguation, all other expressions require braces.
1090+ // To avoid duplicating the mapping between `tag` and what syntax gets
1091+ // used (especially any special-casing), every case that needs braces
1092+ // has to call `open_brace(self)?` (and the closing brace is automatic).
1093+ let mut opened_brace = false ;
1094+ let mut open_brace_if_outside_expr = |this : & mut Self | {
1095+ // If this expression is nested in another, braces aren't required.
1096+ if in_value {
1097+ return Ok ( ( ) ) ;
1098+ }
1099+
1100+ opened_brace = true ;
1101+ this. print ( "{" )
1102+ } ;
1103+
10091104 match tag {
10101105 b'p' => self . print ( "_" ) ?,
10111106
@@ -1033,13 +1128,82 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
10331128 None => invalid ! ( self ) ,
10341129 }
10351130 }
1131+ b'e' => {
1132+ // NOTE(eddyb) a string literal `"..."` has type `&str`, so
1133+ // to get back the type `str`, `*"..."` syntax is needed
1134+ // (even if that may not be valid in Rust itself).
1135+ open_brace_if_outside_expr ( self ) ?;
1136+ self . print ( "*" ) ?;
1137+
1138+ self . print_const_str_literal ( ) ?;
1139+ }
10361140
1141+ b'R' | b'Q' => {
1142+ // NOTE(eddyb) this prints `"..."` instead of `&*"..."`, which
1143+ // is what `Re..._` would imply (see comment for `str` above).
1144+ if tag == b'R' && self . eat ( b'e' ) {
1145+ self . print_const_str_literal ( ) ?;
1146+ } else {
1147+ open_brace_if_outside_expr ( self ) ?;
1148+ self . print ( "&" ) ?;
1149+ if tag != b'R' {
1150+ self . print ( "mut " ) ?;
1151+ }
1152+ self . print_const ( true ) ?;
1153+ }
1154+ }
1155+ b'A' => {
1156+ open_brace_if_outside_expr ( self ) ?;
1157+ self . print ( "[" ) ?;
1158+ self . print_sep_list ( |this| this. print_const ( true ) , ", " ) ?;
1159+ self . print ( "]" ) ?;
1160+ }
1161+ b'T' => {
1162+ open_brace_if_outside_expr ( self ) ?;
1163+ self . print ( "(" ) ?;
1164+ let count = self . print_sep_list ( |this| this. print_const ( true ) , ", " ) ?;
1165+ if count == 1 {
1166+ self . print ( "," ) ?;
1167+ }
1168+ self . print ( ")" ) ?;
1169+ }
1170+ b'V' => {
1171+ open_brace_if_outside_expr ( self ) ?;
1172+ self . print_path ( true ) ?;
1173+ match parse ! ( self , next) {
1174+ b'U' => { }
1175+ b'T' => {
1176+ self . print ( "(" ) ?;
1177+ self . print_sep_list ( |this| this. print_const ( true ) , ", " ) ?;
1178+ self . print ( ")" ) ?;
1179+ }
1180+ b'S' => {
1181+ self . print ( " { " ) ?;
1182+ self . print_sep_list (
1183+ |this| {
1184+ parse ! ( this, disambiguator) ;
1185+ let name = parse ! ( this, ident) ;
1186+ this. print ( name) ?;
1187+ this. print ( ": " ) ?;
1188+ this. print_const ( true )
1189+ } ,
1190+ ", " ,
1191+ ) ?;
1192+ self . print ( " }" ) ?;
1193+ }
1194+ _ => invalid ! ( self ) ,
1195+ }
1196+ }
10371197 b'B' => {
1038- self . print_backref ( Self :: print_const) ?;
1198+ self . print_backref ( |this| this . print_const ( in_value ) ) ?;
10391199 }
10401200 _ => invalid ! ( self ) ,
10411201 }
10421202
1203+ if opened_brace {
1204+ self . print ( "}" ) ?;
1205+ }
1206+
10431207 self . pop_depth ( ) ;
10441208 Ok ( ( ) )
10451209 }
@@ -1066,6 +1230,13 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
10661230
10671231 Ok ( ( ) )
10681232 }
1233+
1234+ fn print_const_str_literal ( & mut self ) -> fmt:: Result {
1235+ match parse ! ( self , hex_nibbles) . try_parse_str_chars ( ) {
1236+ Some ( chars) => self . print_quoted_escaped_chars ( '"' , chars) ,
1237+ None => invalid ! ( self ) ,
1238+ }
1239+ }
10691240}
10701241
10711242#[ cfg( test) ]
@@ -1164,6 +1335,92 @@ mod tests {
11641335 t_const ! ( "c2202_" , "'∂'" ) ;
11651336 }
11661337
1338+ #[ test]
1339+ fn demangle_const_str ( ) {
1340+ t_const ! ( "e616263_" , "{*\" abc\" }" ) ;
1341+ t_const ! ( "e27_" , r#"{*"'"}"# ) ;
1342+ t_const ! ( "e090a_" , "{*\" \\ t\\ n\" }" ) ;
1343+ t_const ! ( "ee28882c3bc_" , "{*\" ∂ü\" }" ) ;
1344+ t_const ! (
1345+ "ee183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
1346+ e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_",
1347+ "{*\" საჭმელად_გემრიელი_სადილი\" }"
1348+ ) ;
1349+ t_const ! (
1350+ "ef09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\
1351+ 95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_",
1352+ "{*\" 🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\" }"
1353+ ) ;
1354+ }
1355+
1356+ // NOTE(eddyb) this uses the same strings as `demangle_const_str` and should
1357+ // be kept in sync with it - while a macro could be used to generate both
1358+ // `str` and `&str` tests, from a single list of strings, this seems clearer.
1359+ #[ test]
1360+ fn demangle_const_ref_str ( ) {
1361+ t_const ! ( "Re616263_" , "\" abc\" " ) ;
1362+ t_const ! ( "Re27_" , r#""'""# ) ;
1363+ t_const ! ( "Re090a_" , "\" \\ t\\ n\" " ) ;
1364+ t_const ! ( "Ree28882c3bc_" , "\" ∂ü\" " ) ;
1365+ t_const ! (
1366+ "Ree183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
1367+ e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_",
1368+ "\" საჭმელად_გემრიელი_სადილი\" "
1369+ ) ;
1370+ t_const ! (
1371+ "Ref09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\
1372+ 95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_",
1373+ "\" 🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\" "
1374+ ) ;
1375+ }
1376+
1377+ #[ test]
1378+ fn demangle_const_ref ( ) {
1379+ t_const ! ( "Rp" , "{&_}" ) ;
1380+ t_const ! ( "Rh7b_" , "{&123}" ) ;
1381+ t_const ! ( "Rb0_" , "{&false}" ) ;
1382+ t_const ! ( "Rc58_" , "{&'X'}" ) ;
1383+ t_const ! ( "RRRh0_" , "{&&&0}" ) ;
1384+ t_const ! ( "RRRe_" , "{&&\" \" }" ) ;
1385+ t_const ! ( "QAE" , "{&mut []}" ) ;
1386+ }
1387+
1388+ #[ test]
1389+ fn demangle_const_array ( ) {
1390+ t_const ! ( "AE" , "{[]}" ) ;
1391+ t_const ! ( "Aj0_E" , "{[0]}" ) ;
1392+ t_const ! ( "Ah1_h2_h3_E" , "{[1, 2, 3]}" ) ;
1393+ t_const ! ( "ARe61_Re62_Re63_E" , "{[\" a\" , \" b\" , \" c\" ]}" ) ;
1394+ t_const ! ( "AAh1_h2_EAh3_h4_EE" , "{[[1, 2], [3, 4]]}" ) ;
1395+ }
1396+
1397+ #[ test]
1398+ fn demangle_const_tuple ( ) {
1399+ t_const ! ( "TE" , "{()}" ) ;
1400+ t_const ! ( "Tj0_E" , "{(0,)}" ) ;
1401+ t_const ! ( "Th1_b0_E" , "{(1, false)}" ) ;
1402+ t_const ! (
1403+ "TRe616263_c78_RAh1_h2_h3_EE" ,
1404+ "{(\" abc\" , 'x', &[1, 2, 3])}"
1405+ ) ;
1406+ }
1407+
1408+ #[ test]
1409+ fn demangle_const_adt ( ) {
1410+ t_const ! (
1411+ "VNvINtNtC4core6option6OptionjE4NoneU" ,
1412+ "{core::option::Option::<usize>::None}"
1413+ ) ;
1414+ t_const ! (
1415+ "VNvINtNtC4core6option6OptionjE4SomeTj0_E" ,
1416+ "{core::option::Option::<usize>::Some(0)}"
1417+ ) ;
1418+ t_const ! (
1419+ "VNtC3foo3BarS1sRe616263_2chc78_5sliceRAh1_h2_h3_EE" ,
1420+ "{foo::Bar { s: \" abc\" , ch: 'x', slice: &[1, 2, 3] }}"
1421+ ) ;
1422+ }
1423+
11671424 #[ test]
11681425 fn demangle_exponential_explosion ( ) {
11691426 // NOTE(eddyb) because of the prefix added by `t_nohash_type!` is
0 commit comments