Skip to content

Commit 09c1042

Browse files
authored
Refactor Column Pruning and support where case: like and not like, is not null (#93)
* refactor(column_pruning): reconstruct column clipping using Column detection under self-direction * feat(simplify): add `constant_calculation` compute constant calculations ahead of time to avoid double calculations * feat: support like operator * feat: support is not null operator * ci: nightly version * config: nightly version update * ci: config toolchain
1 parent 3dbe4a8 commit 09c1042

39 files changed

+676
-463
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ integer-encoding = "3.0.4"
3030
strum_macros = "0.24"
3131
ordered-float = "3.0"
3232
petgraph = "0.6.3"
33-
futures-async-stream = "0.2.6"
33+
futures-async-stream = "0.2.9"
3434
futures = "0.3.25"
3535
ahash = "0.8.3"
3636
lazy_static = "1.4.0"
@@ -39,6 +39,7 @@ bytes = "1.5.0"
3939
kip_db = "0.1.2-alpha.17"
4040
rust_decimal = "1"
4141
csv = "1"
42+
regex = "1.10.2"
4243

4344
[dev-dependencies]
4445
tokio-test = "0.4.2"

README.md

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,27 @@
1-
# KipSQL
1+
<pre align="center">
2+
Built by @KipData
3+
4+
██╗ ██╗██╗██████╗ ███████╗ ██████╗ ██╗
5+
██║ ██╔╝██║██╔══██╗██╔════╝██╔═══██╗██║
6+
█████╔╝ ██║██████╔╝███████╗██║ ██║██║
7+
██╔═██╗ ██║██╔═══╝ ╚════██║██║▄▄ ██║██║
8+
██║ ██╗██║██║ ███████║╚██████╔╝███████╗
9+
╚═╝ ╚═╝╚═╝╚═╝ ╚══════╝ ╚══▀▀═╝ ╚══════╝
10+
-----------------------------------
11+
Embedded SQL DBMS
12+
</pre>
13+
<br/>
14+
15+
### Architecture
16+
Welcome to our WebSite, Power By KipSQL:
17+
**http://www.kipdata.site/**
218

319
> Lightweight SQL calculation engine, as the SQL layer of KipDB, implemented with TalentPlan's TinySQL as the reference standard
420
5-
### Architecture
21+
622
![architecture](./static/images/architecture.png)
723

824
### Get Started
9-
#### 组件引入
1025
``` toml
1126
kip-sql = "0.0.1-alpha.0"
1227
```
@@ -79,6 +94,12 @@ implement_from_tuple!(Post, (
7994
- not null
8095
- null
8196
- unique
97+
- primary key
98+
- SQL where options
99+
- is null
100+
- is not null
101+
- like
102+
- not like
82103
- Supports index type
83104
- Unique Index
84105
- Supports multiple primary key types

rust-toolchain

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
nightly-2023-10-13
1+
nightly

src/binder/aggregate.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,20 @@ impl<'a, T: Transaction> Binder<'a, T> {
9292
expr: &mut ScalarExpression,
9393
is_select: bool,
9494
) -> Result<(), BindError> {
95+
let ref_columns = expr.referenced_columns();
96+
9597
match expr {
9698
ScalarExpression::AggCall {
9799
ty: return_type, ..
98100
} => {
99101
let ty = return_type.clone();
100102
if is_select {
101103
let index = self.context.input_ref_index(InputRefType::AggCall);
102-
let input_ref = ScalarExpression::InputRef { index, ty };
104+
let input_ref = ScalarExpression::InputRef {
105+
index,
106+
ty,
107+
ref_columns,
108+
};
103109
match std::mem::replace(expr, input_ref) {
104110
ScalarExpression::AggCall {
105111
kind,
@@ -124,14 +130,21 @@ impl<'a, T: Transaction> Binder<'a, T> {
124130
.find_position(|agg_expr| agg_expr == &expr)
125131
.ok_or_else(|| BindError::AggMiss(format!("{:?}", expr)))?;
126132

127-
let _ = std::mem::replace(expr, ScalarExpression::InputRef { index, ty });
133+
let _ = std::mem::replace(
134+
expr,
135+
ScalarExpression::InputRef {
136+
index,
137+
ty,
138+
ref_columns,
139+
},
140+
);
128141
}
129142
}
130143

131144
ScalarExpression::TypeCast { expr, .. } => {
132145
self.visit_column_agg_expr(expr, is_select)?
133146
}
134-
ScalarExpression::IsNull { expr } => self.visit_column_agg_expr(expr, is_select)?,
147+
ScalarExpression::IsNull { expr, .. } => self.visit_column_agg_expr(expr, is_select)?,
135148
ScalarExpression::Unary { expr, .. } => self.visit_column_agg_expr(expr, is_select)?,
136149
ScalarExpression::Alias { expr, .. } => self.visit_column_agg_expr(expr, is_select)?,
137150
ScalarExpression::Binary {
@@ -228,13 +241,15 @@ impl<'a, T: Transaction> Binder<'a, T> {
228241
}) {
229242
let index = self.context.input_ref_index(InputRefType::GroupBy);
230243
let mut select_item = &mut select_list[i];
244+
let ref_columns = select_item.referenced_columns();
231245
let return_type = select_item.return_type();
232246

233247
self.context.group_by_exprs.push(std::mem::replace(
234248
&mut select_item,
235249
ScalarExpression::InputRef {
236250
index,
237251
ty: return_type,
252+
ref_columns,
238253
},
239254
));
240255
return;
@@ -243,6 +258,8 @@ impl<'a, T: Transaction> Binder<'a, T> {
243258

244259
if let Some(i) = select_list.iter().position(|column| column == expr) {
245260
let expr = &mut select_list[i];
261+
let ref_columns = expr.referenced_columns();
262+
246263
match expr {
247264
ScalarExpression::Constant(_) | ScalarExpression::ColumnRef { .. } => {
248265
self.context.group_by_exprs.push(expr.clone())
@@ -255,6 +272,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
255272
ScalarExpression::InputRef {
256273
index,
257274
ty: expr.return_type(),
275+
ref_columns,
258276
},
259277
))
260278
}
@@ -300,7 +318,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
300318
}
301319

302320
ScalarExpression::TypeCast { expr, .. } => self.validate_having_orderby(expr),
303-
ScalarExpression::IsNull { expr } => self.validate_having_orderby(expr),
321+
ScalarExpression::IsNull { expr, .. } => self.validate_having_orderby(expr),
304322
ScalarExpression::Unary { expr, .. } => self.validate_having_orderby(expr),
305323
ScalarExpression::Binary {
306324
left_expr,

src/binder/create_table.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,13 @@ mod tests {
8080
match plan1.operator {
8181
Operator::CreateTable(op) => {
8282
assert_eq!(op.table_name, Arc::new("t1".to_string()));
83-
assert_eq!(op.columns[0].name, "id".to_string());
83+
assert_eq!(op.columns[0].name(), "id");
8484
assert_eq!(op.columns[0].nullable, false);
8585
assert_eq!(
8686
op.columns[0].desc,
8787
ColumnDesc::new(LogicalType::Integer, true, false)
8888
);
89-
assert_eq!(op.columns[1].name, "name".to_string());
89+
assert_eq!(op.columns[1].name(), "name");
9090
assert_eq!(op.columns[1].nullable, true);
9191
assert_eq!(
9292
op.columns[1].desc,

src/binder/expr.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::binder::BindError;
2+
use crate::expression;
23
use crate::expression::agg::AggKind;
34
use itertools::Itertools;
45
use sqlparser::ast::{
@@ -25,13 +26,41 @@ impl<'a, T: Transaction> Binder<'a, T> {
2526
Expr::Function(func) => self.bind_agg_call(func),
2627
Expr::Nested(expr) => self.bind_expr(expr),
2728
Expr::UnaryOp { expr, op } => self.bind_unary_op_internal(expr, op),
28-
Expr::IsNull(expr) => self.bind_is_null(expr),
29+
Expr::Like {
30+
negated,
31+
expr,
32+
pattern,
33+
..
34+
} => self.bind_like(*negated, expr, pattern),
35+
Expr::IsNull(expr) => self.bind_is_null(expr, false),
36+
Expr::IsNotNull(expr) => self.bind_is_null(expr, true),
2937
_ => {
3038
todo!()
3139
}
3240
}
3341
}
3442

43+
pub fn bind_like(
44+
&mut self,
45+
negated: bool,
46+
expr: &Expr,
47+
pattern: &Expr,
48+
) -> Result<ScalarExpression, BindError> {
49+
let left_expr = Box::new(self.bind_expr(expr)?);
50+
let right_expr = Box::new(self.bind_expr(pattern)?);
51+
let op = if negated {
52+
expression::BinaryOperator::NotLike
53+
} else {
54+
expression::BinaryOperator::Like
55+
};
56+
Ok(ScalarExpression::Binary {
57+
op,
58+
left_expr,
59+
right_expr,
60+
ty: LogicalType::Boolean,
61+
})
62+
}
63+
3564
pub fn bind_column_ref_from_identifiers(
3665
&mut self,
3766
idents: &[Ident],
@@ -199,8 +228,9 @@ impl<'a, T: Transaction> Binder<'a, T> {
199228
})
200229
}
201230

202-
fn bind_is_null(&mut self, expr: &Expr) -> Result<ScalarExpression, BindError> {
231+
fn bind_is_null(&mut self, expr: &Expr, negated: bool) -> Result<ScalarExpression, BindError> {
203232
Ok(ScalarExpression::IsNull {
233+
negated,
204234
expr: Box::new(self.bind_expr(expr)?),
205235
})
206236
}

src/binder/select.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -343,9 +343,7 @@ impl<'a, T: Transaction> Binder<'a, T> {
343343
select_list: Vec<ScalarExpression>,
344344
) -> LogicalPlan {
345345
LogicalPlan {
346-
operator: Operator::Project(ProjectOperator {
347-
columns: select_list,
348-
}),
346+
operator: Operator::Project(ProjectOperator { exprs: select_list }),
349347
childrens: vec![children],
350348
}
351349
}
@@ -431,7 +429,8 @@ impl<'a, T: Transaction> Binder<'a, T> {
431429

432430
for column in select_items {
433431
if let ScalarExpression::ColumnRef(col) = column {
434-
if let Some(nullable) = table_force_nullable.get(col.table_name.as_ref().unwrap()) {
432+
if let Some(nullable) = table_force_nullable.get(col.table_name().as_ref().unwrap())
433+
{
435434
let mut new_col = ColumnCatalog::clone(col);
436435
new_col.nullable = *nullable;
437436

@@ -504,12 +503,12 @@ impl<'a, T: Transaction> Binder<'a, T> {
504503
// example: foo = bar
505504
(ScalarExpression::ColumnRef(l), ScalarExpression::ColumnRef(r)) => {
506505
// reorder left and right joins keys to pattern: (left, right)
507-
if left_schema.contains_column(&l.name)
508-
&& right_schema.contains_column(&r.name)
506+
if left_schema.contains_column(l.name())
507+
&& right_schema.contains_column(r.name())
509508
{
510509
accum.push((left, right));
511-
} else if left_schema.contains_column(&r.name)
512-
&& right_schema.contains_column(&l.name)
510+
} else if left_schema.contains_column(r.name())
511+
&& right_schema.contains_column(l.name())
513512
{
514513
accum.push((right, left));
515514
} else {

src/catalog/column.rs

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::catalog::TableName;
22
use crate::expression::ScalarExpression;
33
use serde::{Deserialize, Serialize};
44
use sqlparser::ast::{ColumnDef, ColumnOption};
5+
use std::hash::Hash;
56
use std::sync::Arc;
67

78
use crate::types::{ColumnId, LogicalType};
@@ -10,14 +11,19 @@ pub type ColumnRef = Arc<ColumnCatalog>;
1011

1112
#[derive(Debug, Clone, Serialize, Deserialize, Hash, Eq, PartialEq)]
1213
pub struct ColumnCatalog {
13-
pub id: Option<ColumnId>,
14-
pub name: String,
15-
pub table_name: Option<TableName>,
14+
pub summary: ColumnSummary,
1615
pub nullable: bool,
1716
pub desc: ColumnDesc,
1817
pub ref_expr: Option<ScalarExpression>,
1918
}
2019

20+
#[derive(Debug, Clone, Serialize, Deserialize, Hash, Eq, PartialEq)]
21+
pub struct ColumnSummary {
22+
pub id: Option<ColumnId>,
23+
pub name: String,
24+
pub table_name: Option<TableName>,
25+
}
26+
2127
impl ColumnCatalog {
2228
pub(crate) fn new(
2329
column_name: String,
@@ -26,9 +32,11 @@ impl ColumnCatalog {
2632
ref_expr: Option<ScalarExpression>,
2733
) -> ColumnCatalog {
2834
ColumnCatalog {
29-
id: None,
30-
name: column_name,
31-
table_name: None,
35+
summary: ColumnSummary {
36+
id: None,
37+
name: column_name,
38+
table_name: None,
39+
},
3240
nullable,
3341
desc: column_desc,
3442
ref_expr,
@@ -37,20 +45,39 @@ impl ColumnCatalog {
3745

3846
pub(crate) fn new_dummy(column_name: String) -> ColumnCatalog {
3947
ColumnCatalog {
40-
id: Some(0),
41-
name: column_name,
42-
table_name: None,
48+
summary: ColumnSummary {
49+
id: Some(0),
50+
name: column_name,
51+
table_name: None,
52+
},
4353
nullable: false,
4454
desc: ColumnDesc::new(LogicalType::Varchar(None), false, false),
4555
ref_expr: None,
4656
}
4757
}
4858

59+
pub(crate) fn summary(&self) -> &ColumnSummary {
60+
&self.summary
61+
}
62+
63+
pub(crate) fn id(&self) -> Option<ColumnId> {
64+
self.summary.id
65+
}
66+
67+
pub(crate) fn table_name(&self) -> Option<TableName> {
68+
self.summary.table_name.clone()
69+
}
70+
71+
pub(crate) fn name(&self) -> &str {
72+
&self.summary.name
73+
}
74+
4975
pub(crate) fn datatype(&self) -> &LogicalType {
5076
&self.desc.column_datatype
5177
}
5278

53-
pub fn desc(&self) -> &ColumnDesc {
79+
#[allow(dead_code)]
80+
pub(crate) fn desc(&self) -> &ColumnDesc {
5481
&self.desc
5582
}
5683
}

0 commit comments

Comments
 (0)