@@ -862,19 +862,34 @@ pub(crate) struct TagIterator<'a, 'tcx> {
862862 extra : Option < & ' a ExtraInfo < ' tcx > > ,
863863}
864864
865- #[ derive( Debug , PartialEq ) ]
866- pub ( crate ) enum TokenKind < ' a > {
867- Token ( & ' a str ) ,
868- Attribute ( & ' a str ) ,
865+ #[ derive( Clone , Debug , Eq , PartialEq ) ]
866+ pub ( crate ) enum LangStringToken < ' a > {
867+ LangToken ( & ' a str ) ,
868+ ClassAttribute ( & ' a str ) ,
869+ KeyValueAttribute ( & ' a str , & ' a str ) ,
869870}
870871
872+ fn is_bareword_char ( c : char ) -> bool {
873+ c == '_' || c == '-' || c == ':' || c. is_ascii_alphabetic ( ) || c. is_ascii_digit ( )
874+ }
871875fn is_separator ( c : char ) -> bool {
872876 c == ' ' || c == ',' || c == '\t'
873877}
874878
879+ struct Indices {
880+ start : usize ,
881+ end : usize ,
882+ }
883+
875884impl < ' a , ' tcx > TagIterator < ' a , ' tcx > {
876885 pub ( crate ) fn new ( data : & ' a str , extra : Option < & ' a ExtraInfo < ' tcx > > ) -> Self {
877- Self { inner : data. char_indices ( ) . peekable ( ) , data, extra, is_in_attribute_block : false }
886+ Self { inner : data. char_indices ( ) . peekable ( ) , data, is_in_attribute_block : false , extra }
887+ }
888+
889+ fn emit_error ( & self , err : & str ) {
890+ if let Some ( extra) = self . extra {
891+ extra. error_invalid_codeblock_attr ( err) ;
892+ }
878893 }
879894
880895 fn skip_separators ( & mut self ) -> Option < usize > {
@@ -887,84 +902,183 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
887902 None
888903 }
889904
890- fn emit_error ( & self , err : & str ) {
891- if let Some ( extra) = self . extra {
892- extra. error_invalid_codeblock_attr ( err) ;
905+ fn parse_string ( & mut self , start : usize ) -> Option < Indices > {
906+ while let Some ( ( pos, c) ) = self . inner . next ( ) {
907+ if c == '"' {
908+ return Some ( Indices { start : start + 1 , end : pos } ) ;
909+ }
893910 }
911+ self . emit_error ( "unclosed quote string `\" `" ) ;
912+ None
894913 }
895914
896- /// Returns false if the string is unfinished.
897- fn skip_string ( & mut self ) -> bool {
898- while let Some ( ( _, c) ) = self . inner . next ( ) {
899- if c == '"' {
900- return true ;
915+ fn parse_class ( & mut self , start : usize ) -> Option < LangStringToken < ' a > > {
916+ while let Some ( ( pos, c) ) = self . inner . peek ( ) . copied ( ) {
917+ if is_bareword_char ( c) {
918+ self . inner . next ( ) ;
919+ } else {
920+ let class = & self . data [ start + 1 ..pos] ;
921+ if class. is_empty ( ) {
922+ self . emit_error ( & format ! ( "unexpected `{c}` character after `.`" ) ) ;
923+ return None ;
924+ } else if self . check_after_token ( ) {
925+ return Some ( LangStringToken :: ClassAttribute ( class) ) ;
926+ } else {
927+ return None ;
928+ }
901929 }
902930 }
903- self . emit_error ( "unclosed quote string: missing `\" ` at the end" ) ;
904- false
931+ let class = & self . data [ start + 1 ..] ;
932+ if class. is_empty ( ) {
933+ self . emit_error ( "missing character after `.`" ) ;
934+ None
935+ } else if self . check_after_token ( ) {
936+ Some ( LangStringToken :: ClassAttribute ( class) )
937+ } else {
938+ None
939+ }
940+ }
941+
942+ fn parse_token ( & mut self , start : usize ) -> Option < Indices > {
943+ while let Some ( ( pos, c) ) = self . inner . peek ( ) {
944+ if !is_bareword_char ( * c) {
945+ return Some ( Indices { start, end : * pos } ) ;
946+ }
947+ self . inner . next ( ) ;
948+ }
949+ self . emit_error ( "unexpected end" ) ;
950+ None
951+ }
952+
953+ fn parse_key_value ( & mut self , c : char , start : usize ) -> Option < LangStringToken < ' a > > {
954+ let key_indices =
955+ if c == '"' { self . parse_string ( start) ? } else { self . parse_token ( start) ? } ;
956+ if key_indices. start == key_indices. end {
957+ self . emit_error ( "unexpected empty string as key" ) ;
958+ return None ;
959+ }
960+
961+ if let Some ( ( _, c) ) = self . inner . next ( ) {
962+ if c != '=' {
963+ self . emit_error ( & format ! ( "expected `=`, found `{}`" , c) ) ;
964+ return None ;
965+ }
966+ } else {
967+ self . emit_error ( "unexpected end" ) ;
968+ return None ;
969+ }
970+ let value_indices = match self . inner . next ( ) {
971+ Some ( ( pos, '"' ) ) => self . parse_string ( pos) ?,
972+ Some ( ( pos, c) ) if is_bareword_char ( c) => self . parse_token ( pos) ?,
973+ Some ( ( _, c) ) => {
974+ self . emit_error ( & format ! ( "unexpected `{c}` character after `=`" ) ) ;
975+ return None ;
976+ }
977+ None => {
978+ self . emit_error ( "expected value after `=`" ) ;
979+ return None ;
980+ }
981+ } ;
982+ if value_indices. start == value_indices. end {
983+ self . emit_error ( "unexpected empty string as value" ) ;
984+ None
985+ } else if self . check_after_token ( ) {
986+ Some ( LangStringToken :: KeyValueAttribute (
987+ & self . data [ key_indices. start ..key_indices. end ] ,
988+ & self . data [ value_indices. start ..value_indices. end ] ,
989+ ) )
990+ } else {
991+ None
992+ }
905993 }
906994
907- fn parse_in_attribute_block ( & mut self , start : usize ) -> Option < TokenKind < ' a > > {
995+ /// Returns `false` if an error was emitted.
996+ fn check_after_token ( & mut self ) -> bool {
997+ if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) {
998+ if c == '}' || is_separator ( c) || c == '(' {
999+ true
1000+ } else {
1001+ self . emit_error ( & format ! ( "unexpected `{c}` character" ) ) ;
1002+ false
1003+ }
1004+ } else {
1005+ // The error will be caught on the next iteration.
1006+ true
1007+ }
1008+ }
1009+
1010+ fn parse_in_attribute_block ( & mut self ) -> Option < LangStringToken < ' a > > {
9081011 while let Some ( ( pos, c) ) = self . inner . next ( ) {
909- if is_separator ( c) {
910- return Some ( TokenKind :: Attribute ( & self . data [ start..pos] ) ) ;
911- } else if c == '{' {
912- // There shouldn't be a nested block!
913- self . emit_error ( "unexpected `{` inside attribute block (`{}`)" ) ;
914- let attr = & self . data [ start..pos] ;
915- if attr. is_empty ( ) {
916- return self . next ( ) ;
917- }
918- self . inner . next ( ) ;
919- return Some ( TokenKind :: Attribute ( attr) ) ;
920- } else if c == '}' {
1012+ if c == '}' {
9211013 self . is_in_attribute_block = false ;
922- let attr = & self . data [ start..pos] ;
923- if attr. is_empty ( ) {
924- return self . next ( ) ;
925- }
926- return Some ( TokenKind :: Attribute ( attr) ) ;
927- } else if c == '"' && !self . skip_string ( ) {
1014+ return self . next ( ) ;
1015+ } else if c == '.' {
1016+ return self . parse_class ( pos) ;
1017+ } else if c == '"' || is_bareword_char ( c) {
1018+ return self . parse_key_value ( c, pos) ;
1019+ } else {
1020+ self . emit_error ( & format ! ( "unexpected character `{c}`" ) ) ;
9281021 return None ;
9291022 }
9301023 }
931- // Unclosed attribute block!
9321024 self . emit_error ( "unclosed attribute block (`{}`): missing `}` at the end" ) ;
933- let token = & self . data [ start..] ;
934- if token. is_empty ( ) { None } else { Some ( TokenKind :: Attribute ( token) ) }
1025+ None
9351026 }
9361027
937- fn parse_outside_attribute_block ( & mut self , start : usize ) -> Option < TokenKind < ' a > > {
1028+ /// Returns `false` if an error was emitted.
1029+ fn skip_paren_block ( & mut self ) -> bool {
1030+ while let Some ( ( _, c) ) = self . inner . next ( ) {
1031+ if c == ')' {
1032+ return true ;
1033+ }
1034+ }
1035+ self . emit_error ( "unclosed comment: missing `)` at the end" ) ;
1036+ false
1037+ }
1038+
1039+ fn parse_outside_attribute_block ( & mut self , start : usize ) -> Option < LangStringToken < ' a > > {
9381040 while let Some ( ( pos, c) ) = self . inner . next ( ) {
939- if is_separator ( c) {
940- return Some ( TokenKind :: Token ( & self . data [ start..pos] ) ) ;
1041+ if c == '"' {
1042+ if pos != start {
1043+ self . emit_error ( "expected ` `, `{` or `,` found `\" `" ) ;
1044+ return None ;
1045+ }
1046+ let indices = self . parse_string ( pos) ?;
1047+ if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) && c != '{' && !is_separator ( c) && c != '(' {
1048+ self . emit_error ( & format ! ( "expected ` `, `{{` or `,` after `\" `, found `{c}`" ) ) ;
1049+ return None ;
1050+ }
1051+ return Some ( LangStringToken :: LangToken ( & self . data [ indices. start ..indices. end ] ) ) ;
9411052 } else if c == '{' {
9421053 self . is_in_attribute_block = true ;
943- let token = & self . data [ start..pos] ;
944- if token. is_empty ( ) {
945- return self . next ( ) ;
1054+ return self . next ( ) ;
1055+ } else if is_bareword_char ( c) {
1056+ continue ;
1057+ } else if is_separator ( c) {
1058+ if pos != start {
1059+ return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
9461060 }
947- return Some ( TokenKind :: Token ( token) ) ;
948- } else if c == '}' {
949- // We're not in a block so it shouldn't be there!
950- self . emit_error ( "unexpected `}` outside attribute block (`{}`)" ) ;
951- let token = & self . data [ start..pos] ;
952- if token. is_empty ( ) {
953- return self . next ( ) ;
1061+ return self . next ( ) ;
1062+ } else if c == '(' {
1063+ if !self . skip_paren_block ( ) {
1064+ return None ;
9541065 }
955- self . inner . next ( ) ;
956- return Some ( TokenKind :: Attribute ( token) ) ;
957- } else if c == '"' && !self . skip_string ( ) {
1066+ if pos != start {
1067+ return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
1068+ }
1069+ return self . next ( ) ;
1070+ } else {
1071+ self . emit_error ( & format ! ( "unexpected character `{c}`" ) ) ;
9581072 return None ;
9591073 }
9601074 }
9611075 let token = & self . data [ start..] ;
962- if token. is_empty ( ) { None } else { Some ( TokenKind :: Token ( token ) ) }
1076+ if token. is_empty ( ) { None } else { Some ( LangStringToken :: LangToken ( & self . data [ start.. ] ) ) }
9631077 }
9641078}
9651079
9661080impl < ' a , ' tcx > Iterator for TagIterator < ' a , ' tcx > {
967- type Item = TokenKind < ' a > ;
1081+ type Item = LangStringToken < ' a > ;
9681082
9691083 fn next ( & mut self ) -> Option < Self :: Item > {
9701084 let Some ( start) = self . skip_separators ( ) else {
@@ -974,7 +1088,7 @@ impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> {
9741088 return None ;
9751089 } ;
9761090 if self . is_in_attribute_block {
977- self . parse_in_attribute_block ( start )
1091+ self . parse_in_attribute_block ( )
9781092 } else {
9791093 self . parse_outside_attribute_block ( start)
9801094 }
@@ -999,16 +1113,6 @@ impl Default for LangString {
9991113 }
10001114}
10011115
1002- fn handle_class ( class : & str , after : & str , data : & mut LangString , extra : Option < & ExtraInfo < ' _ > > ) {
1003- if class. is_empty ( ) {
1004- if let Some ( extra) = extra {
1005- extra. error_invalid_codeblock_attr ( & format ! ( "missing class name after `{after}`" ) ) ;
1006- }
1007- } else {
1008- data. added_classes . push ( class. replace ( '"' , "" ) ) ;
1009- }
1010- }
1011-
10121116impl LangString {
10131117 fn parse_without_check (
10141118 string : & str ,
@@ -1034,41 +1138,41 @@ impl LangString {
10341138
10351139 for token in TagIterator :: new ( string, extra) {
10361140 match token {
1037- TokenKind :: Token ( "should_panic" ) => {
1141+ LangStringToken :: LangToken ( "should_panic" ) => {
10381142 data. should_panic = true ;
10391143 seen_rust_tags = !seen_other_tags;
10401144 }
1041- TokenKind :: Token ( "no_run" ) => {
1145+ LangStringToken :: LangToken ( "no_run" ) => {
10421146 data. no_run = true ;
10431147 seen_rust_tags = !seen_other_tags;
10441148 }
1045- TokenKind :: Token ( "ignore" ) => {
1149+ LangStringToken :: LangToken ( "ignore" ) => {
10461150 data. ignore = Ignore :: All ;
10471151 seen_rust_tags = !seen_other_tags;
10481152 }
1049- TokenKind :: Token ( x) if x. starts_with ( "ignore-" ) => {
1153+ LangStringToken :: LangToken ( x) if x. starts_with ( "ignore-" ) => {
10501154 if enable_per_target_ignores {
10511155 ignores. push ( x. trim_start_matches ( "ignore-" ) . to_owned ( ) ) ;
10521156 seen_rust_tags = !seen_other_tags;
10531157 }
10541158 }
1055- TokenKind :: Token ( "rust" ) => {
1159+ LangStringToken :: LangToken ( "rust" ) => {
10561160 data. rust = true ;
10571161 seen_rust_tags = true ;
10581162 }
1059- TokenKind :: Token ( "test_harness" ) => {
1163+ LangStringToken :: LangToken ( "test_harness" ) => {
10601164 data. test_harness = true ;
10611165 seen_rust_tags = !seen_other_tags || seen_rust_tags;
10621166 }
1063- TokenKind :: Token ( "compile_fail" ) => {
1167+ LangStringToken :: LangToken ( "compile_fail" ) => {
10641168 data. compile_fail = true ;
10651169 seen_rust_tags = !seen_other_tags || seen_rust_tags;
10661170 data. no_run = true ;
10671171 }
1068- TokenKind :: Token ( x) if x. starts_with ( "edition" ) => {
1172+ LangStringToken :: LangToken ( x) if x. starts_with ( "edition" ) => {
10691173 data. edition = x[ 7 ..] . parse :: < Edition > ( ) . ok ( ) ;
10701174 }
1071- TokenKind :: Token ( x)
1175+ LangStringToken :: LangToken ( x)
10721176 if allow_error_code_check && x. starts_with ( 'E' ) && x. len ( ) == 5 =>
10731177 {
10741178 if x[ 1 ..] . parse :: < u32 > ( ) . is_ok ( ) {
@@ -1078,7 +1182,7 @@ impl LangString {
10781182 seen_other_tags = true ;
10791183 }
10801184 }
1081- TokenKind :: Token ( x) if extra. is_some ( ) => {
1185+ LangStringToken :: LangToken ( x) if extra. is_some ( ) => {
10821186 let s = x. to_lowercase ( ) ;
10831187 if let Some ( ( flag, help) ) = if s == "compile-fail"
10841188 || s == "compile_fail"
@@ -1120,22 +1224,24 @@ impl LangString {
11201224 seen_other_tags = true ;
11211225 data. unknown . push ( x. to_owned ( ) ) ;
11221226 }
1123- TokenKind :: Token ( x) => {
1227+ LangStringToken :: LangToken ( x) => {
11241228 seen_other_tags = true ;
11251229 data. unknown . push ( x. to_owned ( ) ) ;
11261230 }
1127- TokenKind :: Attribute ( attr ) => {
1231+ LangStringToken :: KeyValueAttribute ( key , value ) => {
11281232 seen_other_tags = true ;
1129- if let Some ( class) = attr. strip_prefix ( '.' ) {
1130- handle_class ( class, "." , & mut data, extra) ;
1131- } else if let Some ( class) = attr. strip_prefix ( "class=" ) {
1132- handle_class ( class, "class=" , & mut data, extra) ;
1233+ if key == "class" {
1234+ data. added_classes . push ( value. to_owned ( ) ) ;
11331235 } else if let Some ( extra) = extra {
11341236 extra. error_invalid_codeblock_attr ( & format ! (
1135- "unsupported attribute `{attr }`"
1237+ "unsupported attribute `{key }`"
11361238 ) ) ;
11371239 }
11381240 }
1241+ LangStringToken :: ClassAttribute ( class) => {
1242+ seen_other_tags = true ;
1243+ data. added_classes . push ( class. to_owned ( ) ) ;
1244+ }
11391245 }
11401246 }
11411247
0 commit comments