Skip to content

Commit 4998ec1

Browse files
committed
feat: add DataTypeNode::Nested
1 parent 6ce466a commit 4998ec1

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

types/src/data_types.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ pub enum DataTypeNode {
101101
MultiLineString,
102102
Polygon,
103103
MultiPolygon,
104+
105+
Nested(Vec<Column>),
104106
}
105107

106108
impl DataTypeNode {
@@ -157,6 +159,8 @@ impl DataTypeNode {
157159
str if str.starts_with("Tuple") => parse_tuple(str),
158160
str if str.starts_with("Variant") => parse_variant(str),
159161

162+
str if str.starts_with("Nested") => Ok(Self::Nested(parse_nested(str)?)),
163+
160164
// ...
161165
str => Err(TypesError::TypeParsingError(format!(
162166
"Unknown data type: {str}"
@@ -276,6 +280,16 @@ impl Display for DataTypeNode {
276280
MultiLineString => write!(f, "MultiLineString"),
277281
Polygon => write!(f, "Polygon"),
278282
MultiPolygon => write!(f, "MultiPolygon"),
283+
Nested(columns) => {
284+
write!(f, "Nested(")?;
285+
for (i, column) in columns.iter().enumerate() {
286+
if i > 0 {
287+
write!(f, ", ")?;
288+
}
289+
write!(f, "{} {}", column.name, column.data_type)?;
290+
}
291+
write!(f, ")")
292+
}
279293
}
280294
}
281295
}
@@ -826,6 +840,92 @@ fn parse_enum_values_map(input: &str) -> Result<HashMap<i16, String>, TypesError
826840
.collect::<HashMap<i16, String>>())
827841
}
828842

843+
fn parse_nested(mut input: &str) -> Result<Vec<Column>, TypesError> {
844+
/// Removes the prefix `prefix` from `input`.
845+
fn parse_str(input: &mut &str, prefix: &str) -> Result<(), TypesError> {
846+
if input.starts_with(prefix) {
847+
*input = &input[prefix.len()..];
848+
Ok(())
849+
} else {
850+
Err(TypesError::TypeParsingError(format!(
851+
"Expected {prefix:?}, got {input:?}"
852+
)))
853+
}
854+
}
855+
856+
/// Removes and returns the prefix of `input` up to the first character that does not match the
857+
/// predicate.
858+
fn parse_while<'a>(input: &mut &'a str, predicate: impl Fn(char) -> bool) -> &'a str {
859+
let index = input
860+
.char_indices()
861+
.find(|(_, c)| !predicate(*c))
862+
.map(|(i, _)| i)
863+
.unwrap_or(input.len());
864+
let (prefix, rest) = input.split_at(index);
865+
*input = rest;
866+
prefix
867+
}
868+
869+
/// Removes and returns a valid identifier from the start of `input`.
870+
fn parse_identifier<'a>(input: &mut &'a str) -> Result<&'a str, TypesError> {
871+
let original_input = *input;
872+
873+
if input.starts_with('`') {
874+
parse_str(input, "`")?;
875+
let mut is_escaping = false;
876+
877+
for (index, char) in input.char_indices() {
878+
match char {
879+
_ if is_escaping => is_escaping = false,
880+
'\\' => is_escaping = true,
881+
'`' => {
882+
let name = &input[..index];
883+
*input = &input[index + 1..];
884+
return Ok(name);
885+
}
886+
_ => {}
887+
}
888+
}
889+
890+
Err(TypesError::TypeParsingError(format!(
891+
"Unclosed backtick in name: {original_input}"
892+
)))
893+
} else {
894+
Ok(parse_while(input, |c| {
895+
c.is_ascii_alphanumeric() || c == '_'
896+
}))
897+
}
898+
}
899+
900+
let original_input = input;
901+
parse_str(&mut input, "Nested(")?;
902+
903+
let mut columns = Vec::new();
904+
while !input.starts_with(')') {
905+
let name = parse_identifier(&mut input)?;
906+
parse_str(&mut input, " ")?;
907+
let data_type = parse_inner_type(&mut input)?;
908+
909+
columns.push(Column {
910+
name: name.to_string(),
911+
data_type,
912+
});
913+
914+
if input.starts_with(',') {
915+
parse_str(&mut input, ", ")?;
916+
}
917+
}
918+
919+
if columns.is_empty() {
920+
return Err(TypesError::TypeParsingError(format!(
921+
"Expected at least one column in Nested from input {original_input}"
922+
)));
923+
}
924+
925+
parse_str(&mut input, ")")?;
926+
Ok(columns)
927+
}
928+
829929
#[cfg(test)]
830930
mod tests {
831931
use super::*;
@@ -1476,6 +1576,65 @@ mod tests {
14761576
);
14771577
}
14781578

1579+
#[test]
1580+
fn test_data_type_new_nested() {
1581+
assert_eq!(
1582+
DataTypeNode::new("Nested(foo UInt8)").unwrap(),
1583+
DataTypeNode::Nested(vec![Column::new("foo".to_string(), DataTypeNode::UInt8)])
1584+
);
1585+
assert_eq!(
1586+
DataTypeNode::new("Nested(foo UInt8, bar String)").unwrap(),
1587+
DataTypeNode::Nested(vec![
1588+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1589+
Column::new("bar".to_string(), DataTypeNode::String),
1590+
])
1591+
);
1592+
assert_eq!(
1593+
DataTypeNode::new("Nested(foo UInt8, `bar` String)").unwrap(),
1594+
DataTypeNode::Nested(vec![
1595+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1596+
Column::new("bar".to_string(), DataTypeNode::String),
1597+
])
1598+
);
1599+
assert_eq!(
1600+
DataTypeNode::new("Nested(foo UInt8, `b a r` String)").unwrap(),
1601+
DataTypeNode::Nested(vec![
1602+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1603+
Column::new("b a r".to_string(), DataTypeNode::String),
1604+
])
1605+
);
1606+
assert_eq!(
1607+
DataTypeNode::new(
1608+
"Nested(foo Enum8('f\\'(' = 1), `b a r` Nested(bar Tuple(Enum8('f\\'()' = 1))))"
1609+
)
1610+
.unwrap(),
1611+
DataTypeNode::Nested(vec![
1612+
Column::new(
1613+
"foo".to_string(),
1614+
DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(1, "f\\'(".to_string())]),)
1615+
),
1616+
Column::new(
1617+
"b a r".to_string(),
1618+
DataTypeNode::Nested(vec![Column::new(
1619+
"bar".to_string(),
1620+
DataTypeNode::Tuple(vec![DataTypeNode::Enum(
1621+
EnumType::Enum8,
1622+
HashMap::from([(1, "f\\'()".to_string())]),
1623+
)]),
1624+
)])
1625+
),
1626+
])
1627+
);
1628+
1629+
assert!(DataTypeNode::new("Nested").is_err());
1630+
assert!(DataTypeNode::new("Nested(").is_err());
1631+
assert!(DataTypeNode::new("Nested()").is_err());
1632+
assert!(DataTypeNode::new("Nested(,)").is_err());
1633+
assert!(DataTypeNode::new("Nested(String)").is_err());
1634+
assert!(DataTypeNode::new("Nested(Int32, String)").is_err());
1635+
assert!(DataTypeNode::new("Nested(foo Int32, String)").is_err());
1636+
}
1637+
14791638
#[test]
14801639
fn test_data_type_to_string_simple() {
14811640
// Simple types
@@ -1584,6 +1743,14 @@ mod tests {
15841743
DataTypeNode::Variant(vec![DataTypeNode::UInt8, DataTypeNode::Bool]).to_string(),
15851744
"Variant(UInt8, Bool)"
15861745
);
1746+
assert_eq!(
1747+
DataTypeNode::Nested(vec![
1748+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1749+
Column::new("bar".to_string(), DataTypeNode::String),
1750+
])
1751+
.to_string(),
1752+
"Nested(foo UInt8, bar String)"
1753+
);
15871754
}
15881755

15891756
#[test]

0 commit comments

Comments
 (0)