Skip to content

Commit 8e323ac

Browse files
authored
fix(query): fix query function parsing nested conditions (#18940)
* fix(query): fix query function parsing nested conditions * fix * fix
1 parent 3b47ead commit 8e323ac

File tree

5 files changed

+129
-64
lines changed

5 files changed

+129
-64
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ tantivy = "0.25.0"
503503
tantivy-common = "0.10.0"
504504
tantivy-fst = "0.5"
505505
tantivy-jieba = "0.17.0"
506+
tantivy-query-grammar = "0.25.0"
506507
temp-env = "0.3.0"
507508
tempfile = "3.4.0"
508509
terminal_size = "0.4.2"
@@ -660,5 +661,6 @@ sub-cache = { git = "https://github.com/databendlabs/sub-cache", tag = "v0.2.1"
660661
tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d" }
661662
tantivy-common = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-common" }
662663
tantivy-jieba = { git = "https://github.com/datafuse-extras/tantivy-jieba", rev = "ac27464" }
664+
tantivy-query-grammar = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-query-grammar" }
663665
watcher = { git = "https://github.com/databendlabs/watcher", tag = "v0.4.2" }
664666
xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", tag = "databend-alpha.4" }

src/query/sql/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ serde_json = { workspace = true }
6868
sha2 = { workspace = true }
6969
similar = { workspace = true }
7070
simsearch = { workspace = true }
71+
tantivy-query-grammar = { workspace = true }
7172
unicase = { workspace = true }
7273
url = { workspace = true }
7374

src/query/sql/src/planner/semantic/type_check.rs

Lines changed: 66 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ use jsonb::keypath::KeyPaths;
134134
use serde_json::json;
135135
use serde_json::to_string;
136136
use simsearch::SimSearch;
137+
use tantivy_query_grammar::parse_query_lenient;
138+
use tantivy_query_grammar::UserInputAst;
139+
use tantivy_query_grammar::UserInputLeaf;
137140
use unicase::Ascii;
138141

139142
use super::name_resolution::NameResolutionContext;
@@ -2657,28 +2660,75 @@ impl<'a> TypeChecker<'a> {
26572660
.set_span(query_scalar.span()));
26582661
};
26592662

2660-
let field_strs: Vec<&str> = query_text.split(' ').collect();
2661-
let mut column_refs = Vec::with_capacity(field_strs.len());
2662-
for field_str in field_strs {
2663-
if !field_str.contains(':') {
2664-
continue;
2663+
// Extract the first subfield from the query field as the field name,
2664+
// as queries may contain dot separators when the field is JSON type.
2665+
// For example: The value of the `info` field is: `{“tags”:{“id”:10,“env”:“prod”,‘name’:“test”}}`
2666+
// The query statement can be written as `info.tags.env:prod`, the field `info` can be extracted.
2667+
fn extract_first_subfield(field: &str) -> String {
2668+
field.split('.').next().unwrap_or(field).to_string()
2669+
}
2670+
2671+
fn collect_fields(ast: &UserInputAst, fields: &mut HashSet<String>) {
2672+
match ast {
2673+
UserInputAst::Clause(clauses) => {
2674+
for (_, sub_ast) in clauses {
2675+
collect_fields(sub_ast, fields);
2676+
}
2677+
}
2678+
UserInputAst::Boost(inner_ast, _) => {
2679+
collect_fields(inner_ast, fields);
2680+
}
2681+
UserInputAst::Leaf(leaf) => match &**leaf {
2682+
UserInputLeaf::Literal(literal) => {
2683+
if let Some(field) = &literal.field_name {
2684+
fields.insert(extract_first_subfield(field));
2685+
}
2686+
}
2687+
UserInputLeaf::Range { field, .. } => {
2688+
if let Some(field) = field {
2689+
fields.insert(extract_first_subfield(field));
2690+
}
2691+
}
2692+
UserInputLeaf::Set { field, .. } => {
2693+
if let Some(field) = field {
2694+
fields.insert(extract_first_subfield(field));
2695+
}
2696+
}
2697+
UserInputLeaf::Exists { field } => {
2698+
fields.insert(extract_first_subfield(field));
2699+
}
2700+
UserInputLeaf::Regex { field, .. } => {
2701+
if let Some(field) = field {
2702+
fields.insert(extract_first_subfield(field));
2703+
}
2704+
}
2705+
UserInputLeaf::All => {}
2706+
},
26652707
}
2666-
let field_names: Vec<&str> = field_str.split(':').collect();
2667-
// if the field is JSON type, must specify the key path in the object
2668-
// for example:
2669-
// the field `info` has the value: `{"tags":{"id":10,"env":"prod","name":"test"}}`
2670-
// a query can be written like this `info.tags.env:prod`
2671-
let field_name = field_names[0].trim();
2672-
let sub_field_names: Vec<&str> = field_name.split('.').collect();
2708+
}
2709+
2710+
let (query_ast, errs) = parse_query_lenient(query_text);
2711+
if !errs.is_empty() {
2712+
let err_msg = errs
2713+
.into_iter()
2714+
.map(|err| format!("{} pos {}", err.message, err.pos))
2715+
.join(", ");
2716+
return Err(
2717+
ErrorCode::SemanticError(format!("invalid query: {err_msg}",))
2718+
.set_span(query_scalar.span()),
2719+
);
2720+
}
2721+
let mut fields = HashSet::new();
2722+
collect_fields(&query_ast, &mut fields);
2723+
2724+
let mut column_refs = Vec::with_capacity(fields.len());
2725+
for field in fields.into_iter() {
26732726
let column_expr = Expr::ColumnRef {
26742727
span: query_scalar.span(),
26752728
column: ColumnRef {
26762729
database: None,
26772730
table: None,
2678-
column: ColumnID::Name(Identifier::from_name(
2679-
query_scalar.span(),
2680-
sub_field_names[0].trim(),
2681-
)),
2731+
column: ColumnID::Name(Identifier::from_name(query_scalar.span(), field)),
26822732
},
26832733
};
26842734
let box (field_scalar, _) = self.resolve(&column_expr)?;

0 commit comments

Comments
 (0)