Skip to content

Commit c099883

Browse files
authored
Correctly tokenize nested comments in Databricks, Clickhouse, and ANSI (#2044)
1 parent a430838 commit c099883

File tree

4 files changed

+81
-76
lines changed

4 files changed

+81
-76
lines changed

src/dialect/ansi.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,9 @@ impl Dialect for AnsiDialect {
3333
fn require_interval_qualifier(&self) -> bool {
3434
true
3535
}
36+
37+
/// The SQL standard explicitly states that block comments nest.
38+
fn supports_nested_comments(&self) -> bool {
39+
true
40+
}
3641
}

src/dialect/clickhouse.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,10 @@ impl Dialect for ClickHouseDialect {
9494
fn supports_group_by_with_modifier(&self) -> bool {
9595
true
9696
}
97+
98+
/// Supported since 2020.
99+
/// See <https://clickhouse.com/docs/whats-new/changelog/2020#backward-incompatible-change-2>
100+
fn supports_nested_comments(&self) -> bool {
101+
true
102+
}
97103
}

src/dialect/databricks.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,9 @@ impl Dialect for DatabricksDialect {
6464
fn supports_struct_literal(&self) -> bool {
6565
true
6666
}
67+
68+
/// See <https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-comment>
69+
fn supports_nested_comments(&self) -> bool {
70+
true
71+
}
6772
}

src/tokenizer.rs

Lines changed: 65 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -2419,7 +2419,7 @@ mod tests {
24192419
use crate::dialect::{
24202420
BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect,
24212421
};
2422-
use crate::test_utils::all_dialects_where;
2422+
use crate::test_utils::{all_dialects_except, all_dialects_where};
24232423
use core::fmt::Debug;
24242424

24252425
#[test]
@@ -3169,90 +3169,79 @@ mod tests {
31693169

31703170
#[test]
31713171
fn tokenize_nested_multiline_comment() {
3172-
let dialect = GenericDialect {};
3173-
let test_cases = vec![
3174-
(
3175-
"0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1",
3176-
vec![
3177-
Token::Number("0".to_string(), false),
3178-
Token::Whitespace(Whitespace::MultiLineComment(
3179-
"multi-line\n* \n/* comment \n /*comment*/*/ ".into(),
3180-
)),
3181-
Token::Whitespace(Whitespace::Space),
3182-
Token::Div,
3183-
Token::Word(Word {
3184-
value: "comment".to_string(),
3185-
quote_style: None,
3186-
keyword: Keyword::COMMENT,
3187-
}),
3188-
Token::Mul,
3189-
Token::Div,
3190-
Token::Number("1".to_string(), false),
3191-
],
3192-
),
3193-
(
3194-
"0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1",
3195-
vec![
3196-
Token::Number("0".to_string(), false),
3197-
Token::Whitespace(Whitespace::MultiLineComment(
3198-
"multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(),
3199-
)),
3200-
Token::Number("1".to_string(), false),
3201-
],
3202-
),
3203-
(
3204-
"SELECT 1/* a /* b */ c */0",
3205-
vec![
3206-
Token::make_keyword("SELECT"),
3207-
Token::Whitespace(Whitespace::Space),
3208-
Token::Number("1".to_string(), false),
3209-
Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
3210-
Token::Number("0".to_string(), false),
3211-
],
3212-
),
3213-
];
3172+
all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
3173+
"0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1",
3174+
vec![
3175+
Token::Number("0".to_string(), false),
3176+
Token::Whitespace(Whitespace::MultiLineComment(
3177+
"multi-line\n* \n/* comment \n /*comment*/*/ ".into(),
3178+
)),
3179+
Token::Whitespace(Whitespace::Space),
3180+
Token::Div,
3181+
Token::Word(Word {
3182+
value: "comment".to_string(),
3183+
quote_style: None,
3184+
keyword: Keyword::COMMENT,
3185+
}),
3186+
Token::Mul,
3187+
Token::Div,
3188+
Token::Number("1".to_string(), false),
3189+
],
3190+
);
32143191

3215-
for (sql, expected) in test_cases {
3216-
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
3217-
compare(expected, tokens);
3218-
}
3192+
all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
3193+
"0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1",
3194+
vec![
3195+
Token::Number("0".to_string(), false),
3196+
Token::Whitespace(Whitespace::MultiLineComment(
3197+
"multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(),
3198+
)),
3199+
Token::Number("1".to_string(), false),
3200+
],
3201+
);
3202+
3203+
all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
3204+
"SELECT 1/* a /* b */ c */0",
3205+
vec![
3206+
Token::make_keyword("SELECT"),
3207+
Token::Whitespace(Whitespace::Space),
3208+
Token::Number("1".to_string(), false),
3209+
Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
3210+
Token::Number("0".to_string(), false),
3211+
],
3212+
);
32193213
}
32203214

32213215
#[test]
32223216
fn tokenize_nested_multiline_comment_empty() {
3223-
let sql = "select 1/*/**/*/0";
3224-
3225-
let dialect = GenericDialect {};
3226-
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
3227-
let expected = vec![
3228-
Token::make_keyword("select"),
3229-
Token::Whitespace(Whitespace::Space),
3230-
Token::Number("1".to_string(), false),
3231-
Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
3232-
Token::Number("0".to_string(), false),
3233-
];
3234-
3235-
compare(expected, tokens);
3217+
all_dialects_where(|d| d.supports_nested_comments()).tokenizes_to(
3218+
"select 1/*/**/*/0",
3219+
vec![
3220+
Token::make_keyword("select"),
3221+
Token::Whitespace(Whitespace::Space),
3222+
Token::Number("1".to_string(), false),
3223+
Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
3224+
Token::Number("0".to_string(), false),
3225+
],
3226+
);
32363227
}
32373228

32383229
#[test]
32393230
fn tokenize_nested_comments_if_not_supported() {
3240-
let dialect = SQLiteDialect {};
3241-
let sql = "SELECT 1/*/* nested comment */*/0";
3242-
let tokens = Tokenizer::new(&dialect, sql).tokenize();
3243-
let expected = vec![
3244-
Token::make_keyword("SELECT"),
3245-
Token::Whitespace(Whitespace::Space),
3246-
Token::Number("1".to_string(), false),
3247-
Token::Whitespace(Whitespace::MultiLineComment(
3248-
"/* nested comment ".to_string(),
3249-
)),
3250-
Token::Mul,
3251-
Token::Div,
3252-
Token::Number("0".to_string(), false),
3253-
];
3254-
3255-
compare(expected, tokens.unwrap());
3231+
all_dialects_except(|d| d.supports_nested_comments()).tokenizes_to(
3232+
"SELECT 1/*/* nested comment */*/0",
3233+
vec![
3234+
Token::make_keyword("SELECT"),
3235+
Token::Whitespace(Whitespace::Space),
3236+
Token::Number("1".to_string(), false),
3237+
Token::Whitespace(Whitespace::MultiLineComment(
3238+
"/* nested comment ".to_string(),
3239+
)),
3240+
Token::Mul,
3241+
Token::Div,
3242+
Token::Number("0".to_string(), false),
3243+
],
3244+
);
32563245
}
32573246

32583247
#[test]

0 commit comments

Comments
 (0)