@@ -893,11 +893,13 @@ pub(crate) enum Ignore {
893893/// ```eBNF
894894/// lang-string = *(token-list / delimited-attribute-list / comment)
895895///
896- /// bareword = CHAR *(CHAR)
896+ /// bareword = LEADINGCHAR *(CHAR)
897+ /// bareword-without-leading-char = CHAR *(CHAR)
897898/// quoted-string = QUOTE *(NONQUOTE) QUOTE
898899/// token = bareword / quoted-string
900+ /// token-without-leading-char = bareword-without-leading-char / quoted-string
899901/// sep = COMMA/WS *(COMMA/WS)
900- /// attribute = (DOT token)/(token EQUAL token)
902+ /// attribute = (DOT token)/(token EQUAL token-without-leading-char )
901903/// attribute-list = [sep] attribute *(sep attribute) [sep]
902904/// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
903905/// token-list = [sep] token *(sep token) [sep]
@@ -907,8 +909,15 @@ pub(crate) enum Ignore {
907909/// CLOSE_PARENT = ")"
908910/// OPEN-CURLY-BRACKET = "{"
909911/// CLOSE-CURLY-BRACKET = "}"
910- /// CHAR = ALPHA / DIGIT / "_" / "-" / ":"
911- /// QUOTE = %x22
912+ /// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
913+ /// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
914+ /// ; Comma is used to separate language tokens, so it can't be used in one.
915+ /// ; Quote is used to allow otherwise-disallowed characters in language tokens.
916+ /// ; Equals is used to make key=value pairs in attribute blocks.
917+ /// ; Backslash and grave are special Markdown characters.
918+ /// ; Braces are used to start an attribute block.
919+ /// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
920+ /// ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
912921/// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
913922/// COMMA = ","
914923/// DOT = "."
@@ -932,9 +941,12 @@ pub(crate) enum LangStringToken<'a> {
932941 KeyValueAttribute ( & ' a str , & ' a str ) ,
933942}
934943
935- fn is_bareword_char ( c : char ) -> bool {
944+ fn is_leading_char ( c : char ) -> bool {
936945 c == '_' || c == '-' || c == ':' || c. is_ascii_alphabetic ( ) || c. is_ascii_digit ( )
937946}
947+ fn is_bareword_char ( c : char ) -> bool {
948+ is_leading_char ( c) || ".!#$%&*+/;<>?@^|~" . contains ( c)
949+ }
938950fn is_separator ( c : char ) -> bool {
939951 c == ' ' || c == ',' || c == '\t'
940952}
@@ -1077,7 +1089,7 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
10771089 return self . next ( ) ;
10781090 } else if c == '.' {
10791091 return self . parse_class ( pos) ;
1080- } else if c == '"' || is_bareword_char ( c) {
1092+ } else if c == '"' || is_leading_char ( c) {
10811093 return self . parse_key_value ( c, pos) ;
10821094 } else {
10831095 self . emit_error ( format ! ( "unexpected character `{c}`" ) ) ;
@@ -1107,16 +1119,18 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
11071119 return None ;
11081120 }
11091121 let indices = self . parse_string ( pos) ?;
1110- if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) && c != '{' && !is_separator ( c) && c != '(' {
1122+ if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) &&
1123+ c != '{' &&
1124+ !is_separator ( c) &&
1125+ c != '('
1126+ {
11111127 self . emit_error ( format ! ( "expected ` `, `{{` or `,` after `\" `, found `{c}`" ) ) ;
11121128 return None ;
11131129 }
11141130 return Some ( LangStringToken :: LangToken ( & self . data [ indices. start ..indices. end ] ) ) ;
11151131 } else if c == '{' {
11161132 self . is_in_attribute_block = true ;
11171133 return self . next ( ) ;
1118- } else if is_bareword_char ( c) {
1119- continue ;
11201134 } else if is_separator ( c) {
11211135 if pos != start {
11221136 return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
@@ -1130,6 +1144,10 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
11301144 return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
11311145 }
11321146 return self . next ( ) ;
1147+ } else if pos == start && is_leading_char ( c) {
1148+ continue ;
1149+ } else if pos != start && is_bareword_char ( c) {
1150+ continue ;
11331151 } else {
11341152 self . emit_error ( format ! ( "unexpected character `{c}`" ) ) ;
11351153 return None ;
0 commit comments