Skip to content

Commit 31d109a

Browse files
committed
Draft RowBinaryWNAT/Native header parser
1 parent 72b3729 commit 31d109a

File tree

5 files changed

+1267
-0
lines changed

5 files changed

+1267
-0
lines changed

rowbinary/Cargo.toml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[package]
2+
name = "clickhouse-rowbinary"
3+
version = "0.0.1"
4+
description = "RowBinaryWithNamesAndTypes format utils"
5+
authors = ["ClickHouse"]
6+
repository = "https://github.com/ClickHouse/clickhouse-rs"
7+
homepage = "https://clickhouse.com"
8+
edition = "2021"
9+
license = "MIT OR Apache-2.0"
10+
# update `Cargo.toml` and CI if changed
11+
rust-version = "1.73.0"
12+
13+
[lib]
14+
#proc-macro = true
15+
16+
[dependencies]
17+
thiserror = "1.0.16"

rowbinary/src/error.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#[derive(Debug, thiserror::Error)]
2+
pub enum ColumnsParserError {
3+
#[error("IO error: {0}")]
4+
IoError(#[from] std::io::Error),
5+
6+
#[error("Expected LF at position {0}")]
7+
ExpectedLF(usize),
8+
9+
#[error("Invalid integer encoding at position {0}")]
10+
InvalidIntegerEncoding(usize),
11+
12+
#[error("Incomplete column data at position {0}")]
13+
IncompleteColumnData(usize),
14+
15+
#[error("Invalid column spec at position {0}: {1}")]
16+
InvalidColumnSpec(usize, String),
17+
18+
#[error("Type parsing error: {0}")]
19+
TypeParsingError(String),
20+
}

rowbinary/src/leb128.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
use std::io::{ErrorKind, Read};
2+
3+
use crate::error::ColumnsParserError;
4+
5+
pub fn decode_leb128<R: Read>(pos: &mut usize, reader: &mut R) -> Result<u64, ColumnsParserError> {
6+
let mut result: u64 = 0;
7+
let mut shift: u32 = 0;
8+
let mut buf = [0u8; 1];
9+
10+
loop {
11+
reader.read_exact(&mut buf).map_err(|e| {
12+
if e.kind() == ErrorKind::UnexpectedEof {
13+
ColumnsParserError::InvalidIntegerEncoding(*pos)
14+
} else {
15+
ColumnsParserError::IoError(e)
16+
}
17+
})?;
18+
19+
*pos += 1;
20+
21+
let byte = buf[0];
22+
result |= ((byte & 0x7f) as u64) << shift;
23+
24+
if byte & 0x80 == 0 {
25+
break;
26+
}
27+
28+
shift += 7;
29+
30+
if shift > 63 {
31+
return Err(ColumnsParserError::InvalidIntegerEncoding(*pos));
32+
}
33+
}
34+
35+
Ok(result)
36+
}
37+
38+
pub fn encode_leb128(value: u64) -> Vec<u8> {
39+
let mut result = Vec::new();
40+
let mut val = value;
41+
42+
loop {
43+
let mut byte = (val & 0x7f) as u8;
44+
val >>= 7;
45+
46+
if val != 0 {
47+
byte |= 0x80; // Set high bit to indicate more bytes follow
48+
}
49+
50+
result.push(byte);
51+
52+
if val == 0 {
53+
break;
54+
}
55+
}
56+
57+
result
58+
}
59+
60+
mod tests {
61+
#[test]
62+
fn test_decode_leb128() {
63+
let test_cases = vec![
64+
// (input bytes, expected value)
65+
(vec![0], 0),
66+
(vec![1], 1),
67+
(vec![127], 127),
68+
(vec![128, 1], 128),
69+
(vec![255, 1], 255),
70+
(vec![0x85, 0x91, 0x26], 624773),
71+
(vec![0xE5, 0x8E, 0x26], 624485),
72+
];
73+
74+
for (input, expected) in test_cases {
75+
let mut cursor = std::io::Cursor::new(input.clone());
76+
let mut pos = 0;
77+
let result = super::decode_leb128(&mut pos, &mut cursor).unwrap();
78+
assert_eq!(result, expected, "Failed decoding {:?}", input);
79+
}
80+
}
81+
82+
#[test]
83+
fn test_encode_decode_leb128() {
84+
let test_values = vec![
85+
0u64,
86+
1,
87+
127,
88+
128,
89+
255,
90+
624773,
91+
624485,
92+
300_000,
93+
10_000_000,
94+
u32::MAX as u64,
95+
(u32::MAX as u64) + 1,
96+
];
97+
98+
for value in test_values {
99+
let encoded = super::encode_leb128(value);
100+
let mut cursor = std::io::Cursor::new(&encoded);
101+
let mut pos = 0;
102+
let decoded = super::decode_leb128(&mut pos, &mut cursor).unwrap();
103+
104+
assert_eq!(
105+
decoded, value,
106+
"Failed round trip for {}: encoded as {:?}, decoded as {}",
107+
value, encoded, decoded
108+
);
109+
}
110+
}
111+
}

rowbinary/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
mod error;
2+
mod leb128;
3+
mod types;

0 commit comments

Comments
 (0)