Skip to content

Commit 7f2fe3e

Browse files
committed
feat: add validation support for the Nested(...) type
1 parent 900129d commit 7f2fe3e

File tree

3 files changed

+191
-40
lines changed

3 files changed

+191
-40
lines changed

src/rowbinary/validation.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,10 @@ fn validate_impl<'de, 'cursor, R: Row>(
548548
root,
549549
kind: InnerDataTypeValidatorKind::Array(&DataTypeNode::LineString),
550550
}),
551+
DataTypeNode::Nested { as_tuple, .. } => Some(InnerDataTypeValidator {
552+
root,
553+
kind: InnerDataTypeValidatorKind::Array(as_tuple),
554+
}),
551555
_ => root.panic_on_schema_mismatch(data_type, serde_type, is_inner),
552556
},
553557
SerdeType::Tuple(len) => match data_type {

tests/it/nested.rs

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,110 @@ async fn smoke() {
5050

5151
assert_eq!(row, original_row);
5252
}
53+
54+
#[tokio::test]
55+
async fn no_flatten() {
56+
let client = prepare_database!().with_option("flatten_nested", "0");
57+
58+
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Row)]
59+
struct MyRow {
60+
no: i32,
61+
items: Vec<(String, u32)>,
62+
}
63+
64+
// `flatten_nested = 0` prevents flattening of nested columns, causing them to be stored as a
65+
// single array of tuples instead of as separate arrays
66+
67+
client
68+
.query(
69+
"
70+
CREATE TABLE test(
71+
no Int32,
72+
items Nested(
73+
name String,
74+
count UInt32
75+
)
76+
)
77+
ENGINE = MergeTree ORDER BY no
78+
",
79+
)
80+
.execute()
81+
.await
82+
.unwrap();
83+
84+
let original_row = MyRow {
85+
no: 42,
86+
items: vec![("foo".into(), 1), ("bar".into(), 5)],
87+
};
88+
89+
let mut insert = client.insert::<MyRow>("test").unwrap();
90+
insert.write(&original_row).await.unwrap();
91+
insert.end().await.unwrap();
92+
93+
let row = client
94+
.query("SELECT ?fields FROM test")
95+
.fetch_one::<MyRow>()
96+
.await
97+
.unwrap();
98+
99+
assert_eq!(row, original_row);
100+
}
101+
102+
#[tokio::test]
103+
async fn doubly_flattened() {
104+
let client = prepare_database!();
105+
106+
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Row)]
107+
struct MyRow {
108+
no: i32,
109+
#[serde(rename = "items.names")]
110+
items_names: Vec<Vec<(String, String)>>,
111+
#[serde(rename = "items.count")]
112+
items_count: Vec<u32>,
113+
}
114+
115+
// Only the first level is flattened and any more deeply nested columns are stored as an array
116+
// of tuples, so the table ends up with columns
117+
// - `no Int32`
118+
// - `items.names Array(Nested(first String, last String))`
119+
// (i.e. `Array(Array(Tuple(first String, last String)))`)
120+
// - `items.count Array(UInt32)`
121+
122+
client
123+
.query(
124+
"
125+
CREATE TABLE test(
126+
no Int32,
127+
items Nested(
128+
names Nested(first String, last String),
129+
count UInt32
130+
)
131+
)
132+
ENGINE = MergeTree ORDER BY no
133+
",
134+
)
135+
.execute()
136+
.await
137+
.unwrap();
138+
139+
let original_row = MyRow {
140+
no: 42,
141+
items_names: vec![
142+
vec![("foo".into(), "foo".into())],
143+
vec![("bar".into(), "bar".into())],
144+
],
145+
items_count: vec![1, 5],
146+
};
147+
148+
let mut insert = client.insert::<MyRow>("test").unwrap();
149+
insert.write(&original_row).await.unwrap();
150+
insert.end().await.unwrap();
151+
152+
let row = client
153+
.query("SELECT ?fields FROM test")
154+
.fetch_one::<MyRow>()
155+
.await
156+
.unwrap();
157+
158+
assert_eq!(row, original_row);
159+
}

types/src/data_types.rs

Lines changed: 80 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,12 @@ pub enum DataTypeNode {
102102
Polygon,
103103
MultiPolygon,
104104

105-
Nested(Vec<Column>),
105+
Nested {
106+
columns: Vec<Column>,
107+
// This stores the types in `columns` as a tuple node as a hack to be able to validate
108+
// data for this column as an array of tuples
109+
as_tuple: Box<DataTypeNode>,
110+
},
106111
}
107112

108113
impl DataTypeNode {
@@ -159,7 +164,7 @@ impl DataTypeNode {
159164
str if str.starts_with("Tuple") => parse_tuple(str),
160165
str if str.starts_with("Variant") => parse_variant(str),
161166

162-
str if str.starts_with("Nested") => Ok(Self::Nested(parse_nested(str)?)),
167+
str if str.starts_with("Nested") => parse_nested(str),
163168

164169
// ...
165170
str => Err(TypesError::TypeParsingError(format!(
@@ -280,7 +285,7 @@ impl Display for DataTypeNode {
280285
MultiLineString => write!(f, "MultiLineString"),
281286
Polygon => write!(f, "Polygon"),
282287
MultiPolygon => write!(f, "MultiPolygon"),
283-
Nested(columns) => {
288+
Nested { columns, .. } => {
284289
write!(f, "Nested(")?;
285290
for (i, column) in columns.iter().enumerate() {
286291
if i > 0 {
@@ -836,7 +841,7 @@ fn parse_enum_values_map(input: &str) -> Result<HashMap<i16, String>, TypesError
836841
.collect::<HashMap<i16, String>>())
837842
}
838843

839-
fn parse_nested(mut input: &str) -> Result<Vec<Column>, TypesError> {
844+
fn parse_nested(mut input: &str) -> Result<DataTypeNode, TypesError> {
840845
/// Removes the prefix `prefix` from `input`.
841846
fn parse_str(input: &mut &str, prefix: &str) -> Result<(), TypesError> {
842847
if input.starts_with(prefix) {
@@ -897,15 +902,18 @@ fn parse_nested(mut input: &str) -> Result<Vec<Column>, TypesError> {
897902
parse_str(&mut input, "Nested(")?;
898903

899904
let mut columns = Vec::new();
905+
let mut types = Vec::new();
906+
900907
while !input.starts_with(')') {
901908
let name = parse_identifier(&mut input)?;
902909
parse_str(&mut input, " ")?;
903910
let data_type = parse_inner_type(&mut input)?;
904911

905912
columns.push(Column {
906913
name: name.to_string(),
907-
data_type,
914+
data_type: data_type.clone(),
908915
});
916+
types.push(data_type);
909917

910918
if input.starts_with(',') {
911919
parse_str(&mut input, ", ")?;
@@ -919,7 +927,10 @@ fn parse_nested(mut input: &str) -> Result<Vec<Column>, TypesError> {
919927
}
920928

921929
parse_str(&mut input, ")")?;
922-
Ok(columns)
930+
Ok(DataTypeNode::Nested {
931+
columns,
932+
as_tuple: Box::new(DataTypeNode::Tuple(types)),
933+
})
923934
}
924935

925936
#[cfg(test)]
@@ -1576,50 +1587,73 @@ mod tests {
15761587
fn test_data_type_new_nested() {
15771588
assert_eq!(
15781589
DataTypeNode::new("Nested(foo UInt8)").unwrap(),
1579-
DataTypeNode::Nested(vec![Column::new("foo".to_string(), DataTypeNode::UInt8)])
1590+
DataTypeNode::Nested {
1591+
columns: vec![Column::new("foo".to_string(), DataTypeNode::UInt8)],
1592+
as_tuple: Box::new(DataTypeNode::Tuple(vec![DataTypeNode::UInt8])),
1593+
}
15801594
);
15811595
assert_eq!(
15821596
DataTypeNode::new("Nested(foo UInt8, bar String)").unwrap(),
1583-
DataTypeNode::Nested(vec![
1584-
Column::new("foo".to_string(), DataTypeNode::UInt8),
1585-
Column::new("bar".to_string(), DataTypeNode::String),
1586-
])
1597+
DataTypeNode::Nested {
1598+
columns: vec![
1599+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1600+
Column::new("bar".to_string(), DataTypeNode::String),
1601+
],
1602+
as_tuple: Box::new(DataTypeNode::Tuple(vec![
1603+
DataTypeNode::UInt8,
1604+
DataTypeNode::String,
1605+
])),
1606+
}
15871607
);
15881608
assert_eq!(
15891609
DataTypeNode::new("Nested(foo UInt8, `bar` String)").unwrap(),
1590-
DataTypeNode::Nested(vec![
1591-
Column::new("foo".to_string(), DataTypeNode::UInt8),
1592-
Column::new("bar".to_string(), DataTypeNode::String),
1593-
])
1610+
DataTypeNode::Nested {
1611+
columns: vec![
1612+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1613+
Column::new("bar".to_string(), DataTypeNode::String),
1614+
],
1615+
as_tuple: Box::new(DataTypeNode::Tuple(vec![
1616+
DataTypeNode::UInt8,
1617+
DataTypeNode::String,
1618+
])),
1619+
}
15941620
);
15951621
assert_eq!(
15961622
DataTypeNode::new("Nested(foo UInt8, `b a r` String)").unwrap(),
1597-
DataTypeNode::Nested(vec![
1598-
Column::new("foo".to_string(), DataTypeNode::UInt8),
1599-
Column::new("b a r".to_string(), DataTypeNode::String),
1600-
])
1623+
DataTypeNode::Nested {
1624+
columns: vec![
1625+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1626+
Column::new("b a r".to_string(), DataTypeNode::String),
1627+
],
1628+
as_tuple: Box::new(DataTypeNode::Tuple(vec![
1629+
DataTypeNode::UInt8,
1630+
DataTypeNode::String,
1631+
])),
1632+
}
16011633
);
1634+
1635+
let foo = DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(1, "f\\'(".to_string())]));
1636+
let baz = DataTypeNode::Tuple(vec![DataTypeNode::Enum(
1637+
EnumType::Enum8,
1638+
HashMap::from([(1, "f\\'()".to_string())]),
1639+
)]);
1640+
let bar = DataTypeNode::Nested {
1641+
columns: vec![Column::new("baz".to_string(), baz.clone())],
1642+
as_tuple: Box::new(DataTypeNode::Tuple(vec![baz])),
1643+
};
1644+
16021645
assert_eq!(
16031646
DataTypeNode::new(
1604-
"Nested(foo Enum8('f\\'(' = 1), `b a r` Nested(bar Tuple(Enum8('f\\'()' = 1))))"
1647+
"Nested(foo Enum8('f\\'(' = 1), `b a r` Nested(baz Tuple(Enum8('f\\'()' = 1))))"
16051648
)
16061649
.unwrap(),
1607-
DataTypeNode::Nested(vec![
1608-
Column::new(
1609-
"foo".to_string(),
1610-
DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(1, "f\\'(".to_string())]),)
1611-
),
1612-
Column::new(
1613-
"b a r".to_string(),
1614-
DataTypeNode::Nested(vec![Column::new(
1615-
"bar".to_string(),
1616-
DataTypeNode::Tuple(vec![DataTypeNode::Enum(
1617-
EnumType::Enum8,
1618-
HashMap::from([(1, "f\\'()".to_string())]),
1619-
)]),
1620-
)])
1621-
),
1622-
])
1650+
DataTypeNode::Nested {
1651+
columns: vec![
1652+
Column::new("foo".to_string(), foo.clone()),
1653+
Column::new("b a r".to_string(), bar.clone()),
1654+
],
1655+
as_tuple: Box::new(DataTypeNode::Tuple(vec![foo, bar])),
1656+
}
16231657
);
16241658

16251659
assert!(DataTypeNode::new("Nested").is_err());
@@ -1740,10 +1774,16 @@ mod tests {
17401774
"Variant(UInt8, Bool)"
17411775
);
17421776
assert_eq!(
1743-
DataTypeNode::Nested(vec![
1744-
Column::new("foo".to_string(), DataTypeNode::UInt8),
1745-
Column::new("bar".to_string(), DataTypeNode::String),
1746-
])
1777+
DataTypeNode::Nested {
1778+
columns: vec![
1779+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1780+
Column::new("bar".to_string(), DataTypeNode::String),
1781+
],
1782+
as_tuple: Box::new(DataTypeNode::Tuple(vec![
1783+
DataTypeNode::UInt8,
1784+
DataTypeNode::String
1785+
])),
1786+
}
17471787
.to_string(),
17481788
"Nested(foo UInt8, bar String)"
17491789
);

0 commit comments

Comments
 (0)