Allow more characters in custom classes

GuillaumeGomez · GuillaumeGomez · commit 6bed1c60619b · 2023-09-19T17:29:30.000+02:00
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
@@ -893,11 +893,13 @@ pub(crate) enum Ignore {
 /// ```eBNF
 /// lang-string = *(token-list / delimited-attribute-list / comment)
 ///
-/// bareword = CHAR *(CHAR)
+/// bareword = LEADINGCHAR *(CHAR)
+/// bareword-without-leading-char = CHAR *(CHAR)
 /// quoted-string = QUOTE *(NONQUOTE) QUOTE
 /// token = bareword / quoted-string
+/// token-without-leading-char = bareword-without-leading-char / quoted-string
 /// sep = COMMA/WS *(COMMA/WS)
-/// attribute = (DOT token)/(token EQUAL token)
+/// attribute = (DOT token)/(token EQUAL token-without-leading-char)
 /// attribute-list = [sep] attribute *(sep attribute) [sep]
 /// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
 /// token-list = [sep] token *(sep token) [sep]
@@ -907,8 +909,15 @@ pub(crate) enum Ignore {
 /// CLOSE_PARENT = ")"
 /// OPEN-CURLY-BRACKET = "{"
 /// CLOSE-CURLY-BRACKET = "}"
-/// CHAR = ALPHA / DIGIT / "_" / "-" / ":"
-/// QUOTE = %x22
+/// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
+/// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
+/// ; Comma is used to separate language tokens, so it can't be used in one.
+/// ; Quote is used to allow otherwise-disallowed characters in language tokens.
+/// ; Equals is used to make key=value pairs in attribute blocks.
+/// ; Backslash and grave are special Markdown characters.
+/// ; Braces are used to start an attribute block.
+/// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
+///        ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
 /// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
 /// COMMA = ","
 /// DOT = "."
@@ -932,9 +941,12 @@ pub(crate) enum LangStringToken<'a> {
     KeyValueAttribute(&'a str, &'a str),
 }
 
-fn is_bareword_char(c: char) -> bool {
+fn is_leading_char(c: char) -> bool {
     c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
 }
+fn is_bareword_char(c: char) -> bool {
+    is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
+}
 fn is_separator(c: char) -> bool {
     c == ' ' || c == ',' || c == '\t'
 }
@@ -1077,7 +1089,7 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
                 return self.next();
             } else if c == '.' {
                 return self.parse_class(pos);
-            } else if c == '"' || is_bareword_char(c) {
+            } else if c == '"' || is_leading_char(c) {
                 return self.parse_key_value(c, pos);
             } else {
                 self.emit_error(format!("unexpected character `{c}`"));
@@ -1107,16 +1119,18 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
                     return None;
                 }
                 let indices = self.parse_string(pos)?;
-                if let Some((_, c)) = self.inner.peek().copied() && c != '{' && !is_separator(c) && c != '(' {
+                if let Some((_, c)) = self.inner.peek().copied() &&
+                    c != '{' &&
+                    !is_separator(c) &&
+                    c != '('
+                {
                     self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
                     return None;
                 }
                 return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
             } else if c == '{' {
                 self.is_in_attribute_block = true;
                 return self.next();
-            } else if is_bareword_char(c) {
-                continue;
             } else if is_separator(c) {
                 if pos != start {
                     return Some(LangStringToken::LangToken(&self.data[start..pos]));
@@ -1130,6 +1144,10 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
                     return Some(LangStringToken::LangToken(&self.data[start..pos]));
                 }
                 return self.next();
+            } else if pos == start && is_leading_char(c) {
+                continue;
+            } else if pos != start && is_bareword_char(c) {
+                continue;
             } else {
                 self.emit_error(format!("unexpected character `{c}`"));
                 return None;
diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs
@@ -226,13 +226,28 @@ fn test_lang_string_parse() {
         ..Default::default()
     });
     // error
-    t(LangString { original: "{.first.second}".into(), rust: true, ..Default::default() });
+    t(LangString {
+        original: "{.first.second}".into(),
+        rust: true,
+        added_classes: vec!["first.second".into()],
+        ..Default::default()
+    });
     // error
     t(LangString { original: "{class=first=second}".into(), rust: true, ..Default::default() });
     // error
-    t(LangString { original: "{class=first.second}".into(), rust: true, ..Default::default() });
+    t(LangString {
+        original: "{class=first.second}".into(),
+        rust: true,
+        added_classes: vec!["first.second".into()],
+        ..Default::default()
+    });
     // error
-    t(LangString { original: "{class=.first}".into(), rust: true, ..Default::default() });
+    t(LangString {
+        original: "{class=.first}".into(),
+        added_classes: vec![".first".into()],
+        rust: true,
+        ..Default::default()
+    });
     t(LangString {
         original: r#"{class="first"}"#.into(),
         added_classes: vec!["first".into()],