@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> {
21072107 ) -> Result < Option < Token > , TokenizerError > {
21082108 let mut s = String :: new ( ) ;
21092109 let mut nested = 1 ;
2110+ let mut c_style_comments = false ;
21102111 let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
2111-
2112+ let supports_c_style_comments = self . dialect . supports_c_style_comments ( ) ;
21122113 loop {
21132114 match chars. next ( ) {
21142115 Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
@@ -2117,10 +2118,40 @@ impl<'a> Tokenizer<'a> {
21172118 s. push ( '*' ) ;
21182119 nested += 1 ;
21192120 }
2121+ Some ( '!' ) if supports_c_style_comments => {
2122+ c_style_comments = true ;
2123+ // consume the optional version digits and whitespace
2124+ while let Some ( & c) = chars. peek ( ) {
2125+ if c. is_ascii_digit ( ) || c. is_whitespace ( ) {
2126+ chars. next ( ) ;
2127+ } else {
2128+ break ;
2129+ }
2130+ }
2131+ }
2132+ // consume all leading whitespaces until the '*/' character if in a C-style comment
2133+ Some ( ch) if ch. is_whitespace ( ) && c_style_comments => {
2134+ let mut tmp_s = String :: new ( ) ;
2135+ while let Some ( c) = chars. next ( ) {
2136+ if c. is_whitespace ( ) {
2137+ tmp_s. push ( c) ;
2138+ } else if c == '*' && chars. peek ( ) == Some ( & '/' ) {
2139+ chars. next ( ) ; // consume the '/'
2140+ return Ok ( Some ( Token :: make_word ( & s, None ) ) ) ;
2141+ } else {
2142+ tmp_s. push ( c) ;
2143+ s. push_str ( & tmp_s) ;
2144+ break ;
2145+ }
2146+ }
2147+ }
21202148 Some ( '*' ) if matches ! ( chars. peek( ) , Some ( '/' ) ) => {
21212149 chars. next ( ) ; // consume the '/'
21222150 nested -= 1 ;
21232151 if nested == 0 {
2152+ if c_style_comments {
2153+ break Ok ( Some ( Token :: make_word ( & s, None ) ) ) ;
2154+ }
21242155 break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
21252156 }
21262157 s. push ( '*' ) ;
@@ -4070,4 +4101,39 @@ mod tests {
40704101 panic ! ( "Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}" ) ;
40714102 }
40724103 }
4104+ #[ test]
4105+ fn tokenize_multiline_comment_with_c_style_comment ( ) {
4106+ let sql = String :: from ( "0/*! word */1" ) ;
4107+
4108+ let dialect = MySqlDialect { } ;
4109+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4110+ let expected = vec ! [
4111+ Token :: Number ( "0" . to_string( ) , false ) ,
4112+ Token :: Word ( Word {
4113+ value: "word" . to_string( ) ,
4114+ quote_style: None ,
4115+ keyword: Keyword :: NoKeyword ,
4116+ } ) ,
4117+ Token :: Number ( "1" . to_string( ) , false ) ,
4118+ ] ;
4119+ compare ( expected, tokens) ;
4120+ }
4121+
4122+ #[ test]
4123+ fn tokenize_multiline_comment_with_c_style_comment_and_version ( ) {
4124+ let sql = String :: from ( "0/*!8000000 word */1" ) ;
4125+
4126+ let dialect = MySqlDialect { } ;
4127+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4128+ let expected = vec ! [
4129+ Token :: Number ( "0" . to_string( ) , false ) ,
4130+ Token :: Word ( Word {
4131+ value: "word" . to_string( ) ,
4132+ quote_style: None ,
4133+ keyword: Keyword :: NoKeyword ,
4134+ } ) ,
4135+ Token :: Number ( "1" . to_string( ) , false ) ,
4136+ ] ;
4137+ compare ( expected, tokens) ;
4138+ }
40734139}
0 commit comments