Skip to content

Commit 900129d

Browse files
committed
feat: add DataTypeNode::Nested
1 parent e0e4319 commit 900129d

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

types/src/data_types.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ pub enum DataTypeNode {
101101
MultiLineString,
102102
Polygon,
103103
MultiPolygon,
104+
105+
Nested(Vec<Column>),
104106
}
105107

106108
impl DataTypeNode {
@@ -157,6 +159,8 @@ impl DataTypeNode {
157159
str if str.starts_with("Tuple") => parse_tuple(str),
158160
str if str.starts_with("Variant") => parse_variant(str),
159161

162+
str if str.starts_with("Nested") => Ok(Self::Nested(parse_nested(str)?)),
163+
160164
// ...
161165
str => Err(TypesError::TypeParsingError(format!(
162166
"Unknown data type: {str}"
@@ -276,6 +280,16 @@ impl Display for DataTypeNode {
276280
MultiLineString => write!(f, "MultiLineString"),
277281
Polygon => write!(f, "Polygon"),
278282
MultiPolygon => write!(f, "MultiPolygon"),
283+
Nested(columns) => {
284+
write!(f, "Nested(")?;
285+
for (i, column) in columns.iter().enumerate() {
286+
if i > 0 {
287+
write!(f, ", ")?;
288+
}
289+
write!(f, "{} {}", column.name, column.data_type)?;
290+
}
291+
write!(f, ")")
292+
}
279293
}
280294
}
281295
}
@@ -822,6 +836,92 @@ fn parse_enum_values_map(input: &str) -> Result<HashMap<i16, String>, TypesError
822836
.collect::<HashMap<i16, String>>())
823837
}
824838

839+
fn parse_nested(mut input: &str) -> Result<Vec<Column>, TypesError> {
840+
/// Removes the prefix `prefix` from `input`.
841+
fn parse_str(input: &mut &str, prefix: &str) -> Result<(), TypesError> {
842+
if input.starts_with(prefix) {
843+
*input = &input[prefix.len()..];
844+
Ok(())
845+
} else {
846+
Err(TypesError::TypeParsingError(format!(
847+
"Expected {prefix:?}, got {input:?}"
848+
)))
849+
}
850+
}
851+
852+
/// Removes and returns the prefix of `input` up to the first character that does not match the
853+
/// predicate.
854+
fn parse_while<'a>(input: &mut &'a str, predicate: impl Fn(char) -> bool) -> &'a str {
855+
let index = input
856+
.char_indices()
857+
.find(|(_, c)| !predicate(*c))
858+
.map(|(i, _)| i)
859+
.unwrap_or(input.len());
860+
let (prefix, rest) = input.split_at(index);
861+
*input = rest;
862+
prefix
863+
}
864+
865+
/// Removes and returns a valid identifier from the start of `input`.
866+
fn parse_identifier<'a>(input: &mut &'a str) -> Result<&'a str, TypesError> {
867+
let original_input = *input;
868+
869+
if input.starts_with('`') {
870+
parse_str(input, "`")?;
871+
let mut is_escaping = false;
872+
873+
for (index, char) in input.char_indices() {
874+
match char {
875+
_ if is_escaping => is_escaping = false,
876+
'\\' => is_escaping = true,
877+
'`' => {
878+
let name = &input[..index];
879+
*input = &input[index + 1..];
880+
return Ok(name);
881+
}
882+
_ => {}
883+
}
884+
}
885+
886+
Err(TypesError::TypeParsingError(format!(
887+
"Unclosed backtick in name: {original_input}"
888+
)))
889+
} else {
890+
Ok(parse_while(input, |c| {
891+
c.is_ascii_alphanumeric() || c == '_'
892+
}))
893+
}
894+
}
895+
896+
let original_input = input;
897+
parse_str(&mut input, "Nested(")?;
898+
899+
let mut columns = Vec::new();
900+
while !input.starts_with(')') {
901+
let name = parse_identifier(&mut input)?;
902+
parse_str(&mut input, " ")?;
903+
let data_type = parse_inner_type(&mut input)?;
904+
905+
columns.push(Column {
906+
name: name.to_string(),
907+
data_type,
908+
});
909+
910+
if input.starts_with(',') {
911+
parse_str(&mut input, ", ")?;
912+
}
913+
}
914+
915+
if columns.is_empty() {
916+
return Err(TypesError::TypeParsingError(format!(
917+
"Expected at least one column in Nested from input {original_input}"
918+
)));
919+
}
920+
921+
parse_str(&mut input, ")")?;
922+
Ok(columns)
923+
}
924+
825925
#[cfg(test)]
826926
mod tests {
827927
use super::*;
@@ -1472,6 +1572,65 @@ mod tests {
14721572
);
14731573
}
14741574

1575+
#[test]
1576+
fn test_data_type_new_nested() {
1577+
assert_eq!(
1578+
DataTypeNode::new("Nested(foo UInt8)").unwrap(),
1579+
DataTypeNode::Nested(vec![Column::new("foo".to_string(), DataTypeNode::UInt8)])
1580+
);
1581+
assert_eq!(
1582+
DataTypeNode::new("Nested(foo UInt8, bar String)").unwrap(),
1583+
DataTypeNode::Nested(vec![
1584+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1585+
Column::new("bar".to_string(), DataTypeNode::String),
1586+
])
1587+
);
1588+
assert_eq!(
1589+
DataTypeNode::new("Nested(foo UInt8, `bar` String)").unwrap(),
1590+
DataTypeNode::Nested(vec![
1591+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1592+
Column::new("bar".to_string(), DataTypeNode::String),
1593+
])
1594+
);
1595+
assert_eq!(
1596+
DataTypeNode::new("Nested(foo UInt8, `b a r` String)").unwrap(),
1597+
DataTypeNode::Nested(vec![
1598+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1599+
Column::new("b a r".to_string(), DataTypeNode::String),
1600+
])
1601+
);
1602+
assert_eq!(
1603+
DataTypeNode::new(
1604+
"Nested(foo Enum8('f\\'(' = 1), `b a r` Nested(bar Tuple(Enum8('f\\'()' = 1))))"
1605+
)
1606+
.unwrap(),
1607+
DataTypeNode::Nested(vec![
1608+
Column::new(
1609+
"foo".to_string(),
1610+
DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(1, "f\\'(".to_string())]),)
1611+
),
1612+
Column::new(
1613+
"b a r".to_string(),
1614+
DataTypeNode::Nested(vec![Column::new(
1615+
"bar".to_string(),
1616+
DataTypeNode::Tuple(vec![DataTypeNode::Enum(
1617+
EnumType::Enum8,
1618+
HashMap::from([(1, "f\\'()".to_string())]),
1619+
)]),
1620+
)])
1621+
),
1622+
])
1623+
);
1624+
1625+
assert!(DataTypeNode::new("Nested").is_err());
1626+
assert!(DataTypeNode::new("Nested(").is_err());
1627+
assert!(DataTypeNode::new("Nested()").is_err());
1628+
assert!(DataTypeNode::new("Nested(,)").is_err());
1629+
assert!(DataTypeNode::new("Nested(String)").is_err());
1630+
assert!(DataTypeNode::new("Nested(Int32, String)").is_err());
1631+
assert!(DataTypeNode::new("Nested(foo Int32, String)").is_err());
1632+
}
1633+
14751634
#[test]
14761635
fn test_data_type_to_string_simple() {
14771636
// Simple types
@@ -1580,6 +1739,14 @@ mod tests {
15801739
DataTypeNode::Variant(vec![DataTypeNode::UInt8, DataTypeNode::Bool]).to_string(),
15811740
"Variant(UInt8, Bool)"
15821741
);
1742+
assert_eq!(
1743+
DataTypeNode::Nested(vec![
1744+
Column::new("foo".to_string(), DataTypeNode::UInt8),
1745+
Column::new("bar".to_string(), DataTypeNode::String),
1746+
])
1747+
.to_string(),
1748+
"Nested(foo UInt8, bar String)"
1749+
);
15831750
}
15841751

15851752
#[test]

0 commit comments

Comments
 (0)