Skip to content

Commit 466b873

Browse files
authored
feat!(query): RowBinaryWithNamesAndTypes (#221)
1 parent 69c3e7a commit 466b873

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+5736
-518
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
push:
55
branches: [ main ]
66
pull_request:
7-
branches: [ main ]
7+
workflow_dispatch:
88

99
env:
1010
CARGO_TERM_COLOR: always

CHANGELOG.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99
## [Unreleased] - ReleaseDate
1010

1111
### Removed
12+
1213
- **BREAKING** watch: `Client::watch()` API is removed ([#245]).
1314
- **BREAKING** mock: `watch()` and `watch_only_events()` are removed ([#245]).
1415

16+
### Changed
17+
18+
- **BREAKING** query: `RowBinaryWithNamesAndTypes` is now used by default for query results. This may cause panics if
19+
the row struct definition does not match the database schema. Use `Client::with_validation(false)` to revert to the
20+
previous behavior which uses plain `RowBinary` format for fetching rows. ([#221])
21+
- **BREAKING** mock: when using `test-util` feature, it is now required to use `Client::with_mock(&mock)` to set up the
22+
mock server, so it properly handles the response format and automatically disables parsing
23+
`RowBinaryWithNamesAndTypes` header parsing and validation. Additionally, it is not required to call `with_url`
24+
explicitly. See the [updated example](./examples/mock.rs).
25+
- query: due to `RowBinaryWithNamesAndTypes` format usage, there might be an impact on fetch performance, which largely
26+
depends on how the dataset is defined. If you notice decreased performance, consider disabling validation by using
27+
`Client::with_validation(false)`.
28+
- serde: it is now possible to deserialize Map ClickHouse type into `HashMap<K, V>` (or `BTreeMap`, `IndexMap`,
29+
`DashMap`, etc.).
30+
31+
### Added
32+
33+
- client: added `Client::with_validation` builder method. Validation is enabled by default, meaning that
34+
`RowBinaryWithNamesAndTypes` format will be used to fetch rows from the database. If validation is disabled,
35+
`RowBinary` format will be used, similarly to the previous versions. ([#221]).
36+
- types: a new crate `clickhouse-types` was added to the project workspace. This crate is required for
37+
`RowBinaryWithNamesAndTypes` struct definition validation, as it contains ClickHouse data types AST, as well as
38+
functions and utilities to parse the types out of the ClickHouse server response. ([#221]).
39+
40+
[#221]: https://github.com/ClickHouse/clickhouse-rs/pull/221
1541
[#245]: https://github.com/ClickHouse/clickhouse-rs/pull/245
1642

1743
## [0.13.3] - 2025-05-29

Cargo.toml

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,21 @@ homepage = "https://clickhouse.com"
99
license = "MIT OR Apache-2.0"
1010
readme = "README.md"
1111
edition = "2021"
12-
# update `derive/Cargo.toml` and CI if changed
12+
# update `workspace.package.rust-version` below and CI if changed
1313
# TODO: after bumping to v1.80, remove `--precise` in the "msrv" CI job
1414
rust-version = "1.73.0"
1515

16+
[workspace]
17+
members = ["derive", "types"]
18+
19+
[workspace.package]
20+
authors = ["ClickHouse Contributors", "Paul Loyd <pavelko95@gmail.com>"]
21+
repository = "https://github.com/ClickHouse/clickhouse-rs"
22+
homepage = "https://clickhouse.com"
23+
edition = "2021"
24+
license = "MIT OR Apache-2.0"
25+
rust-version = "1.73.0"
26+
1627
[lints.rust]
1728
rust_2018_idioms = { level = "warn", priority = -1 }
1829
unreachable_pub = "warn"
@@ -26,16 +37,21 @@ undocumented_unsafe_blocks = "warn"
2637
all-features = true
2738
rustdoc-args = ["--cfg", "docsrs"]
2839

40+
[[bench]]
41+
name = "select_nyc_taxi_data"
42+
harness = false
43+
required-features = ["time"]
44+
2945
[[bench]]
3046
name = "select_numbers"
3147
harness = false
3248

3349
[[bench]]
34-
name = "insert"
50+
name = "mocked_insert"
3551
harness = false
3652

3753
[[bench]]
38-
name = "select"
54+
name = "mocked_select"
3955
harness = false
4056

4157
[[example]]
@@ -97,6 +113,7 @@ rustls-tls-native-roots = [
97113

98114
[dependencies]
99115
clickhouse-derive = { version = "0.2.0", path = "derive" }
116+
clickhouse-types = { version = "0.1.0", path = "types" }
100117

101118
thiserror = "2.0"
102119
serde = "1.0.106"
@@ -128,6 +145,7 @@ quanta = { version = "0.12", optional = true }
128145
replace_with = { version = "0.1.7" }
129146

130147
[dev-dependencies]
148+
clickhouse-derive = { version = "0.2.0", path = "derive" }
131149
criterion = "0.6"
132150
serde = { version = "1.0.106", features = ["derive"] }
133151
tokio = { version = "1.0.1", features = ["full", "test-util"] }
@@ -136,6 +154,6 @@ serde_bytes = "0.11.4"
136154
serde_json = "1"
137155
serde_repr = "0.1.7"
138156
uuid = { version = "1", features = ["v4", "serde"] }
139-
time = { version = "0.3.17", features = ["macros", "rand"] }
157+
time = { version = "0.3.17", features = ["macros", "rand", "parsing"] }
140158
fixnum = { version = "0.9.2", features = ["serde", "i32", "i64", "i128"] }
141159
rand = { version = "0.9", features = ["small_rng"] }

README.md

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ Official pure Rust typed client for ClickHouse DB.
1818

1919
* Uses `serde` for encoding/decoding rows.
2020
* Supports `serde` attributes: `skip_serializing`, `skip_deserializing`, `rename`.
21-
* Uses `RowBinary` encoding over HTTP transport.
22-
* There are plans to switch to `Native` over TCP.
21+
* Uses `RowBinaryWithNamesAndTypes` or `RowBinary` formats over HTTP transport.
22+
* By default, `RowBinaryWithNamesAndTypes` with database schema validation is used.
23+
* It is possible to switch to `RowBinary`, which can potentially lead to increased performance ([see below](#validation)).
24+
* There are plans to implement `Native` format over TCP.
2325
* Supports TLS (see `native-tls` and `rustls-tls` features below).
2426
* Supports compression and decompression (LZ4 and LZ4HC).
2527
* Provides API for selecting.
@@ -29,9 +31,30 @@ Official pure Rust typed client for ClickHouse DB.
2931

3032
Note: [ch2rs](https://github.com/ClickHouse/ch2rs) is useful to generate a row type from ClickHouse.
3133

34+
## Validation
35+
36+
Starting from 0.14.0, the crate uses `RowBinaryWithNamesAndTypes` format by default, which allows row types validation
37+
against the ClickHouse schema. This enables clearer error messages in case of schema mismatch at the cost of
38+
performance. Additionally, with enabled validation, the crate supports structs with correct field names and matching
39+
types, but incorrect order of the fields, with an additional slight (5-10%) performance penalty.
40+
41+
If you are looking to maximize performance, you could disable validation using `Client::with_validation(false)`. When
42+
validation is disabled, the client switches to `RowBinary` format usage instead.
43+
44+
The downside with plain `RowBinary` is that instead of clearer error messages, a mismatch between `Row` and database
45+
schema will result in a `NotEnoughData` error without specific details.
46+
47+
However, depending on the dataset, there might be x1.1 to x3 performance improvement, but that highly depends on the
48+
shape and volume of the dataset.
49+
50+
It is always recommended to measure the performance impact of validation in your specific use case. Additionally,
51+
writing smoke tests to ensure that the row types match the ClickHouse schema is highly recommended, if you plan to
52+
disable validation in your application.
53+
3254
## Usage
3355

3456
To use the crate, add this to your `Cargo.toml`:
57+
3558
```toml
3659
[dependencies]
3760
clickhouse = "0.13.3"
@@ -43,16 +66,6 @@ clickhouse = { version = "0.13.3", features = ["test-util"] }
4366
<details>
4467
<summary>
4568

46-
### Note about ClickHouse prior to v22.6
47-
48-
</summary>
49-
50-
CH server older than v22.6 (2022-06-16) handles `RowBinary` [incorrectly](https://github.com/ClickHouse/ClickHouse/issues/37420) in some rare cases. Use 0.11 and enable `wa-37420` feature to solve this problem. Don't use it for newer versions.
51-
52-
</details>
53-
<details>
54-
<summary>
55-
5669
### Create a client
5770

5871
</summary>
@@ -249,7 +262,8 @@ How to choose between all these features? Here are some considerations:
249262
}
250263
```
251264
</details>
252-
* `Enum(8|16)` are supported using [serde_repr](https://docs.rs/serde_repr/latest/serde_repr/).
265+
* `Enum(8|16)` are supported using [serde_repr](https://docs.rs/serde_repr/latest/serde_repr/). You could use
266+
`#[repr(i8)]` for `Enum8` and `#[repr(i16)]` for `Enum16`.
253267
<details>
254268
<summary>Example</summary>
255269
@@ -262,7 +276,7 @@ How to choose between all these features? Here are some considerations:
262276
}
263277
264278
#[derive(Debug, Serialize_repr, Deserialize_repr)]
265-
#[repr(u8)]
279+
#[repr(i8)]
266280
enum Level {
267281
Debug = 1,
268282
Info = 2,
@@ -387,7 +401,7 @@ How to choose between all these features? Here are some considerations:
387401
</details>
388402
* `Tuple(A, B, ...)` maps to/from `(A, B, ...)` or a newtype around it.
389403
* `Array(_)` maps to/from any slice, e.g. `Vec<_>`, `&[_]`. Newtypes are also supported.
390-
* `Map(K, V)` behaves like `Array((K, V))`.
404+
* `Map(K, V)` can be deserialized as `HashMap<K, V>` or `Vec<(K, V)>`.
391405
* `LowCardinality(_)` is supported seamlessly.
392406
* `Nullable(_)` maps to/from `Option<_>`. For `clickhouse::serde::*` helpers add `::option`.
393407
<details>
@@ -416,7 +430,8 @@ How to choose between all these features? Here are some considerations:
416430
}
417431
```
418432
</details>
419-
* `Geo` types are supported. `Point` behaves like a tuple `(f64, f64)`, and the rest of the types are just slices of points.
433+
* `Geo` types are supported. `Point` behaves like a tuple `(f64, f64)`, and the rest of the types are just slices of
434+
points.
420435
<details>
421436
<summary>Example</summary>
422437

benches/README.md

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,41 @@ All cases are run with `cargo bench --bench <case>`.
44

55
## With a mocked server
66

7-
These benchmarks are run against a mocked server, which is a simple HTTP server that responds with a fixed response. This is useful to measure the overhead of the client itself:
8-
* `select` checks throughput of `Client::query()`.
9-
* `insert` checks throughput of `Client::insert()` and `Client::inserter()` (if the `inserter` features is enabled).
7+
These benchmarks are run against a mocked server, which is a simple HTTP server that responds with a fixed response.
8+
This is useful to measure the overhead of the client itself.
9+
10+
### Scenarios
11+
12+
* [mocked_select](mocked_select.rs) checks throughput of `Client::query()`.
13+
* [mocked_insert](mocked_insert.rs) checks throughput of `Client::insert()` and `Client::inserter()`
14+
(requires `inserter` feature).
1015

1116
### How to collect perf data
1217

1318
The crate's code runs on the thread with the name `testee`:
19+
1420
```bash
1521
cargo bench --bench <name> &
1622
perf record -p `ps -AT | grep testee | awk '{print $2}'` --call-graph dwarf,65528 --freq 5000 -g -- sleep 5
1723
perf script > perf.script
1824
```
1925

20-
Then upload the `perf.script` file to [Firefox Profiler](https://profiler.firefox.com).
26+
Then upload the `perf.script` file to [Firefox Profiler].
2127

2228
## With a running ClickHouse server
2329

2430
These benchmarks are run against a real ClickHouse server, so it must be started:
31+
2532
```bash
2633
docker compose up -d
2734
cargo bench --bench <case>
2835
```
2936

30-
Cases:
31-
* `select_numbers` measures time of running a big SELECT query to the `system.numbers_mt` table.
37+
### Scenarios
38+
39+
* [select_numbers.rs](select_numbers.rs) measures time of running a big SELECT query to the `system.numbers_mt` table.
40+
* [select_nyc_taxi_data.rs](select_nyc_taxi_data.rs) measures time of running a fairly large SELECT query (approximately
41+
3 million records) to the `nyc_taxi_data` table using the [NYC taxi dataset].
3242

3343
### How to collect perf data
3444

@@ -38,4 +48,10 @@ perf record -p `ps -AT | grep <name> | awk '{print $2}'` --call-graph dwarf,6552
3848
perf script > perf.script
3949
```
4050

41-
Then upload the `perf.script` file to [Firefox Profiler](https://profiler.firefox.com).
51+
Then upload the `perf.script` file to [Firefox Profiler].
52+
53+
<!-- links -->
54+
55+
[Firefox Profiler]: https://profiler.firefox.com
56+
57+
[NYC taxi dataset]: https://clickhouse.com/docs/getting-started/example-datasets/nyc-taxi#create-the-table-trips

benches/common.rs

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use std::{
1111
};
1212

1313
use bytes::Bytes;
14+
use clickhouse::error::Result;
1415
use futures::stream::StreamExt;
1516
use http_body_util::BodyExt;
1617
use hyper::{
@@ -25,35 +26,65 @@ use tokio::{
2526
sync::{mpsc, oneshot},
2627
};
2728

28-
use clickhouse::error::Result;
29+
pub(crate) struct ServerHandle {
30+
handle: Option<thread::JoinHandle<()>>,
31+
shutdown_tx: Option<oneshot::Sender<()>>,
32+
}
2933

30-
pub(crate) struct ServerHandle;
34+
impl ServerHandle {
35+
fn shutdown(&mut self) {
36+
if let Some(tx) = self.shutdown_tx.take() {
37+
tx.send(()).unwrap();
38+
}
39+
if let Some(handle) = self.handle.take() {
40+
handle.join().unwrap();
41+
}
42+
}
43+
}
3144

32-
pub(crate) fn start_server<S, F, B>(addr: SocketAddr, serve: S) -> ServerHandle
45+
impl Drop for ServerHandle {
46+
fn drop(&mut self) {
47+
self.shutdown();
48+
}
49+
}
50+
51+
pub(crate) async fn start_server<S, F, B>(addr: SocketAddr, serve: S) -> ServerHandle
3352
where
3453
S: Fn(Request<Incoming>) -> F + Send + Sync + 'static,
3554
F: Future<Output = Response<B>> + Send,
3655
B: Body<Data = Bytes, Error = Infallible> + Send + 'static,
3756
{
57+
let (shutdown_tx, mut shutdown_rx) = oneshot::channel::<()>();
58+
let (ready_tx, ready_rx) = oneshot::channel::<()>();
59+
3860
let serving = async move {
3961
let listener = TcpListener::bind(addr).await.unwrap();
62+
ready_tx.send(()).unwrap();
4063

4164
loop {
4265
let (stream, _) = listener.accept().await.unwrap();
43-
44-
let service =
45-
service::service_fn(|request| async { Ok::<_, Infallible>(serve(request).await) });
46-
47-
// SELECT benchmark doesn't read the whole body, so ignore possible errors.
48-
let _ = conn::http1::Builder::new()
66+
let server_future = conn::http1::Builder::new()
4967
.timer(TokioTimer::new())
50-
.serve_connection(TokioIo::new(stream), service)
51-
.await;
68+
.serve_connection(
69+
TokioIo::new(stream),
70+
service::service_fn(|request| async {
71+
Ok::<_, Infallible>(serve(request).await)
72+
}),
73+
);
74+
tokio::select! {
75+
_ = server_future => {}
76+
_ = &mut shutdown_rx => { break; }
77+
}
5278
}
5379
};
5480

55-
run_on_st_runtime("server", serving);
56-
ServerHandle
81+
let handle = Some(run_on_st_runtime("server", serving));
82+
ready_rx.await.unwrap();
83+
84+
ServerHandle {
85+
handle,
86+
shutdown_tx: Some(shutdown_tx),
87+
}
5788
}
5889

5990
pub(crate) async fn skip_incoming(request: Request<Incoming>) {
@@ -105,7 +136,7 @@ pub(crate) fn start_runner() -> RunnerHandle {
105136
RunnerHandle { tx }
106137
}
107138

108-
fn run_on_st_runtime(name: &str, f: impl Future + Send + 'static) {
139+
fn run_on_st_runtime(name: &str, f: impl Future + Send + 'static) -> thread::JoinHandle<()> {
109140
let name = name.to_string();
110141
thread::Builder::new()
111142
.name(name.clone())
@@ -121,5 +152,5 @@ fn run_on_st_runtime(name: &str, f: impl Future + Send + 'static) {
121152
.unwrap()
122153
.block_on(f);
123154
})
124-
.unwrap();
155+
.unwrap()
125156
}

0 commit comments

Comments
 (0)