@@ -868,7 +868,7 @@ impl<'tcx> ExtraInfo<'tcx> {
868868
869869#[ derive( Eq , PartialEq , Clone , Debug ) ]
870870pub ( crate ) struct LangString {
871- original : String ,
871+ pub ( crate ) original : String ,
872872 pub ( crate ) should_panic : bool ,
873873 pub ( crate ) no_run : bool ,
874874 pub ( crate ) ignore : Ignore ,
@@ -893,11 +893,13 @@ pub(crate) enum Ignore {
893893/// ```eBNF
894894/// lang-string = *(token-list / delimited-attribute-list / comment)
895895///
896- /// bareword = CHAR *(CHAR)
896+ /// bareword = LEADINGCHAR *(CHAR)
897+ /// bareword-without-leading-char = CHAR *(CHAR)
897898/// quoted-string = QUOTE *(NONQUOTE) QUOTE
898899/// token = bareword / quoted-string
900+ /// token-without-leading-char = bareword-without-leading-char / quoted-string
899901/// sep = COMMA/WS *(COMMA/WS)
900- /// attribute = (DOT token)/(token EQUAL token)
902+ /// attribute = (DOT token)/(token EQUAL token-without-leading-char )
901903/// attribute-list = [sep] attribute *(sep attribute) [sep]
902904/// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
903905/// token-list = [sep] token *(sep token) [sep]
@@ -907,8 +909,15 @@ pub(crate) enum Ignore {
907909/// CLOSE_PARENT = ")"
908910/// OPEN-CURLY-BRACKET = "{"
909911/// CLOSE-CURLY-BRACKET = "}"
910- /// CHAR = ALPHA / DIGIT / "_" / "-" / ":"
911- /// QUOTE = %x22
912+ /// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
913+ /// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
914+ /// ; Comma is used to separate language tokens, so it can't be used in one.
915+ /// ; Quote is used to allow otherwise-disallowed characters in language tokens.
916+ /// ; Equals is used to make key=value pairs in attribute blocks.
917+ /// ; Backslash and grave are special Markdown characters.
918+ /// ; Braces are used to start an attribute block.
919+ /// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
920+ /// ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
912921/// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
913922/// COMMA = ","
914923/// DOT = "."
@@ -932,9 +941,12 @@ pub(crate) enum LangStringToken<'a> {
932941 KeyValueAttribute ( & ' a str , & ' a str ) ,
933942}
934943
935- fn is_bareword_char ( c : char ) -> bool {
944+ fn is_leading_char ( c : char ) -> bool {
936945 c == '_' || c == '-' || c == ':' || c. is_ascii_alphabetic ( ) || c. is_ascii_digit ( )
937946}
947+ fn is_bareword_char ( c : char ) -> bool {
948+ is_leading_char ( c) || ".!#$%&*+/;<>?@^|~" . contains ( c)
949+ }
938950fn is_separator ( c : char ) -> bool {
939951 c == ' ' || c == ',' || c == '\t'
940952}
@@ -1077,7 +1089,7 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
10771089 return self . next ( ) ;
10781090 } else if c == '.' {
10791091 return self . parse_class ( pos) ;
1080- } else if c == '"' || is_bareword_char ( c) {
1092+ } else if c == '"' || is_leading_char ( c) {
10811093 return self . parse_key_value ( c, pos) ;
10821094 } else {
10831095 self . emit_error ( format ! ( "unexpected character `{c}`" ) ) ;
@@ -1107,16 +1119,18 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
11071119 return None ;
11081120 }
11091121 let indices = self . parse_string ( pos) ?;
1110- if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) && c != '{' && !is_separator ( c) && c != '(' {
1122+ if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) &&
1123+ c != '{' &&
1124+ !is_separator ( c) &&
1125+ c != '('
1126+ {
11111127 self . emit_error ( format ! ( "expected ` `, `{{` or `,` after `\" `, found `{c}`" ) ) ;
11121128 return None ;
11131129 }
11141130 return Some ( LangStringToken :: LangToken ( & self . data [ indices. start ..indices. end ] ) ) ;
11151131 } else if c == '{' {
11161132 self . is_in_attribute_block = true ;
11171133 return self . next ( ) ;
1118- } else if is_bareword_char ( c) {
1119- continue ;
11201134 } else if is_separator ( c) {
11211135 if pos != start {
11221136 return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
@@ -1130,6 +1144,10 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
11301144 return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
11311145 }
11321146 return self . next ( ) ;
1147+ } else if pos == start && is_leading_char ( c) {
1148+ continue ;
1149+ } else if pos != start && is_bareword_char ( c) {
1150+ continue ;
11331151 } else {
11341152 self . emit_error ( format ! ( "unexpected character `{c}`" ) ) ;
11351153 return None ;
@@ -1158,6 +1176,29 @@ impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> {
11581176 }
11591177}
11601178
1179+ fn tokens ( string : & str ) -> impl Iterator < Item = LangStringToken < ' _ > > {
1180+ // Pandoc, which Rust once used for generating documentation,
1181+ // expects lang strings to be surrounded by `{}` and for each token
1182+ // to be proceeded by a `.`. Since some of these lang strings are still
1183+ // loose in the wild, we strip a pair of surrounding `{}` from the lang
1184+ // string and a leading `.` from each token.
1185+
1186+ let string = string. trim ( ) ;
1187+
1188+ let first = string. chars ( ) . next ( ) ;
1189+ let last = string. chars ( ) . last ( ) ;
1190+
1191+ let string =
1192+ if first == Some ( '{' ) && last == Some ( '}' ) { & string[ 1 ..string. len ( ) - 1 ] } else { string } ;
1193+
1194+ string
1195+ . split ( |c| c == ',' || c == ' ' || c == '\t' )
1196+ . map ( str:: trim)
1197+ . map ( |token| token. strip_prefix ( '.' ) . unwrap_or ( token) )
1198+ . filter ( |token| !token. is_empty ( ) )
1199+ . map ( |token| LangStringToken :: LangToken ( token) )
1200+ }
1201+
11611202impl Default for LangString {
11621203 fn default ( ) -> Self {
11631204 Self {
@@ -1208,122 +1249,130 @@ impl LangString {
12081249
12091250 data. original = string. to_owned ( ) ;
12101251
1211- for token in TagIterator :: new ( string, extra) {
1212- match token {
1213- LangStringToken :: LangToken ( "should_panic" ) => {
1214- data. should_panic = true ;
1215- seen_rust_tags = !seen_other_tags;
1216- }
1217- LangStringToken :: LangToken ( "no_run" ) => {
1218- data. no_run = true ;
1219- seen_rust_tags = !seen_other_tags;
1220- }
1221- LangStringToken :: LangToken ( "ignore" ) => {
1222- data. ignore = Ignore :: All ;
1223- seen_rust_tags = !seen_other_tags;
1224- }
1225- LangStringToken :: LangToken ( x) if x. starts_with ( "ignore-" ) => {
1226- if enable_per_target_ignores {
1227- ignores. push ( x. trim_start_matches ( "ignore-" ) . to_owned ( ) ) ;
1252+ let mut call = |tokens : & mut dyn Iterator < Item = LangStringToken < ' _ > > | {
1253+ for token in tokens {
1254+ match token {
1255+ LangStringToken :: LangToken ( "should_panic" ) => {
1256+ data. should_panic = true ;
12281257 seen_rust_tags = !seen_other_tags;
12291258 }
1230- }
1231- LangStringToken :: LangToken ( "rust" ) => {
1232- data. rust = true ;
1233- seen_rust_tags = true ;
1234- }
1235- LangStringToken :: LangToken ( "custom" ) => {
1236- if custom_code_classes_in_docs {
1237- seen_custom_tag = true ;
1238- } else {
1239- seen_other_tags = true ;
1259+ LangStringToken :: LangToken ( "no_run" ) => {
1260+ data. no_run = true ;
1261+ seen_rust_tags = !seen_other_tags;
12401262 }
1241- }
1242- LangStringToken :: LangToken ( "test_harness" ) => {
1243- data. test_harness = true ;
1244- seen_rust_tags = !seen_other_tags || seen_rust_tags;
1245- }
1246- LangStringToken :: LangToken ( "compile_fail" ) => {
1247- data. compile_fail = true ;
1248- seen_rust_tags = !seen_other_tags || seen_rust_tags;
1249- data. no_run = true ;
1250- }
1251- LangStringToken :: LangToken ( x) if x. starts_with ( "edition" ) => {
1252- data. edition = x[ 7 ..] . parse :: < Edition > ( ) . ok ( ) ;
1253- }
1254- LangStringToken :: LangToken ( x)
1255- if allow_error_code_check && x. starts_with ( 'E' ) && x. len ( ) == 5 =>
1256- {
1257- if x[ 1 ..] . parse :: < u32 > ( ) . is_ok ( ) {
1258- data. error_codes . push ( x. to_owned ( ) ) ;
1263+ LangStringToken :: LangToken ( "ignore" ) => {
1264+ data. ignore = Ignore :: All ;
1265+ seen_rust_tags = !seen_other_tags;
1266+ }
1267+ LangStringToken :: LangToken ( x) if x. starts_with ( "ignore-" ) => {
1268+ if enable_per_target_ignores {
1269+ ignores. push ( x. trim_start_matches ( "ignore-" ) . to_owned ( ) ) ;
1270+ seen_rust_tags = !seen_other_tags;
1271+ }
1272+ }
1273+ LangStringToken :: LangToken ( "rust" ) => {
1274+ data. rust = true ;
1275+ seen_rust_tags = true ;
1276+ }
1277+ LangStringToken :: LangToken ( "custom" ) => {
1278+ if custom_code_classes_in_docs {
1279+ seen_custom_tag = true ;
1280+ } else {
1281+ seen_other_tags = true ;
1282+ }
1283+ }
1284+ LangStringToken :: LangToken ( "test_harness" ) => {
1285+ data. test_harness = true ;
12591286 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1260- } else {
1261- seen_other_tags = true ;
12621287 }
1263- }
1264- LangStringToken :: LangToken ( x) if extra. is_some ( ) => {
1265- let s = x. to_lowercase ( ) ;
1266- if let Some ( ( flag, help) ) = if s == "compile-fail"
1267- || s == "compile_fail"
1268- || s == "compilefail"
1288+ LangStringToken :: LangToken ( "compile_fail" ) => {
1289+ data. compile_fail = true ;
1290+ seen_rust_tags = !seen_other_tags || seen_rust_tags;
1291+ data. no_run = true ;
1292+ }
1293+ LangStringToken :: LangToken ( x) if x. starts_with ( "edition" ) => {
1294+ data. edition = x[ 7 ..] . parse :: < Edition > ( ) . ok ( ) ;
1295+ }
1296+ LangStringToken :: LangToken ( x)
1297+ if allow_error_code_check && x. starts_with ( 'E' ) && x. len ( ) == 5 =>
12691298 {
1270- Some ( (
1271- "compile_fail" ,
1272- "the code block will either not be tested if not marked as a rust one \
1273- or won't fail if it compiles successfully",
1274- ) )
1275- } else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" {
1276- Some ( (
1277- "should_panic" ,
1278- "the code block will either not be tested if not marked as a rust one \
1279- or won't fail if it doesn't panic when running",
1280- ) )
1281- } else if s == "no-run" || s == "no_run" || s == "norun" {
1282- Some ( (
1283- "no_run" ,
1284- "the code block will either not be tested if not marked as a rust one \
1285- or will be run (which you might not want)",
1286- ) )
1287- } else if s == "test-harness" || s == "test_harness" || s == "testharness" {
1288- Some ( (
1289- "test_harness" ,
1290- "the code block will either not be tested if not marked as a rust one \
1291- or the code will be wrapped inside a main function",
1292- ) )
1293- } else {
1294- None
1295- } {
1296- if let Some ( extra) = extra {
1297- extra. error_invalid_codeblock_attr_with_help (
1298- format ! ( "unknown attribute `{x}`. Did you mean `{flag}`?" ) ,
1299- help,
1300- ) ;
1299+ if x[ 1 ..] . parse :: < u32 > ( ) . is_ok ( ) {
1300+ data. error_codes . push ( x. to_owned ( ) ) ;
1301+ seen_rust_tags = !seen_other_tags || seen_rust_tags;
1302+ } else {
1303+ seen_other_tags = true ;
13011304 }
13021305 }
1303- seen_other_tags = true ;
1304- data. unknown . push ( x. to_owned ( ) ) ;
1305- }
1306- LangStringToken :: LangToken ( x) => {
1307- seen_other_tags = true ;
1308- data. unknown . push ( x. to_owned ( ) ) ;
1309- }
1310- LangStringToken :: KeyValueAttribute ( key, value) => {
1311- if custom_code_classes_in_docs {
1312- if key == "class" {
1313- data. added_classes . push ( value. to_owned ( ) ) ;
1314- } else if let Some ( extra) = extra {
1315- extra. error_invalid_codeblock_attr ( format ! (
1316- "unsupported attribute `{key}`"
1317- ) ) ;
1306+ LangStringToken :: LangToken ( x) if extra. is_some ( ) => {
1307+ let s = x. to_lowercase ( ) ;
1308+ if let Some ( ( flag, help) ) = if s == "compile-fail"
1309+ || s == "compile_fail"
1310+ || s == "compilefail"
1311+ {
1312+ Some ( (
1313+ "compile_fail" ,
1314+ "the code block will either not be tested if not marked as a rust one \
1315+ or won't fail if it compiles successfully",
1316+ ) )
1317+ } else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" {
1318+ Some ( (
1319+ "should_panic" ,
1320+ "the code block will either not be tested if not marked as a rust one \
1321+ or won't fail if it doesn't panic when running",
1322+ ) )
1323+ } else if s == "no-run" || s == "no_run" || s == "norun" {
1324+ Some ( (
1325+ "no_run" ,
1326+ "the code block will either not be tested if not marked as a rust one \
1327+ or will be run (which you might not want)",
1328+ ) )
1329+ } else if s == "test-harness" || s == "test_harness" || s == "testharness" {
1330+ Some ( (
1331+ "test_harness" ,
1332+ "the code block will either not be tested if not marked as a rust one \
1333+ or the code will be wrapped inside a main function",
1334+ ) )
1335+ } else {
1336+ None
1337+ } {
1338+ if let Some ( extra) = extra {
1339+ extra. error_invalid_codeblock_attr_with_help (
1340+ format ! ( "unknown attribute `{x}`. Did you mean `{flag}`?" ) ,
1341+ help,
1342+ ) ;
1343+ }
13181344 }
1319- } else {
13201345 seen_other_tags = true ;
1346+ data. unknown . push ( x. to_owned ( ) ) ;
1347+ }
1348+ LangStringToken :: LangToken ( x) => {
1349+ seen_other_tags = true ;
1350+ data. unknown . push ( x. to_owned ( ) ) ;
1351+ }
1352+ LangStringToken :: KeyValueAttribute ( key, value) => {
1353+ if custom_code_classes_in_docs {
1354+ if key == "class" {
1355+ data. added_classes . push ( value. to_owned ( ) ) ;
1356+ } else if let Some ( extra) = extra {
1357+ extra. error_invalid_codeblock_attr ( format ! (
1358+ "unsupported attribute `{key}`"
1359+ ) ) ;
1360+ }
1361+ } else {
1362+ seen_other_tags = true ;
1363+ }
1364+ }
1365+ LangStringToken :: ClassAttribute ( class) => {
1366+ data. added_classes . push ( class. to_owned ( ) ) ;
13211367 }
1322- }
1323- LangStringToken :: ClassAttribute ( class) => {
1324- data. added_classes . push ( class. to_owned ( ) ) ;
13251368 }
13261369 }
1370+ } ;
1371+
1372+ if custom_code_classes_in_docs {
1373+ call ( & mut TagIterator :: new ( string, extra) . into_iter ( ) )
1374+ } else {
1375+ call ( & mut tokens ( string) )
13271376 }
13281377
13291378 // ignore-foo overrides ignore
0 commit comments