Skip to content

Commit 9fdab6c

Browse files
authored
fix: prepare the nyc_taxi database ahead of benchmark (#279)
1 parent 44525e4 commit 9fdab6c

File tree

1 file changed

+82
-1
lines changed

1 file changed

+82
-1
lines changed

benches/select_nyc_taxi_data.rs

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use crate::common_select::{
44
do_select_bench, print_header, print_results, BenchmarkRow, WithAccessType, WithId,
55
};
6-
use clickhouse::{Compression, Row};
6+
use clickhouse::{Client, Compression, Row};
77
use serde::Deserialize;
88
use serde_repr::Deserialize_repr;
99
use time::OffsetDateTime;
@@ -74,6 +74,86 @@ struct TripSmallMapAccess {
7474
impl_benchmark_row!(TripSmallSeqAccess, trip_id, "seq");
7575
impl_benchmark_row!(TripSmallMapAccess, trip_id, "map");
7676

77+
// See https://clickhouse.com/docs/getting-started/example-datasets/nyc-taxi
78+
async fn prepare_data() {
79+
let client = Client::default().with_url("http://localhost:8123");
80+
81+
client
82+
.query("CREATE DATABASE IF NOT EXISTS nyc_taxi")
83+
.execute()
84+
.await
85+
.unwrap();
86+
client
87+
.query(
88+
r#"
89+
CREATE TABLE IF NOT EXISTS nyc_taxi.trips_small (
90+
trip_id UInt32,
91+
pickup_datetime DateTime,
92+
dropoff_datetime DateTime,
93+
pickup_longitude Nullable(Float64),
94+
pickup_latitude Nullable(Float64),
95+
dropoff_longitude Nullable(Float64),
96+
dropoff_latitude Nullable(Float64),
97+
passenger_count UInt8,
98+
trip_distance Float32,
99+
fare_amount Float32,
100+
extra Float32,
101+
tip_amount Float32,
102+
tolls_amount Float32,
103+
total_amount Float32,
104+
payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4, 'UNK' = 5),
105+
pickup_ntaname LowCardinality(String),
106+
dropoff_ntaname LowCardinality(String)
107+
)
108+
ENGINE = MergeTree
109+
PRIMARY KEY (pickup_datetime, dropoff_datetime)
110+
"#,
111+
)
112+
.execute()
113+
.await
114+
.unwrap();
115+
116+
let len = client
117+
.query("SELECT count() FROM nyc_taxi.trips_small")
118+
.fetch_one::<usize>()
119+
.await
120+
.unwrap();
121+
122+
if len == 0 {
123+
client
124+
.query(
125+
"
126+
INSERT INTO nyc_taxi.trips_small
127+
SELECT
128+
trip_id,
129+
pickup_datetime,
130+
dropoff_datetime,
131+
pickup_longitude,
132+
pickup_latitude,
133+
dropoff_longitude,
134+
dropoff_latitude,
135+
passenger_count,
136+
trip_distance,
137+
fare_amount,
138+
extra,
139+
tip_amount,
140+
tolls_amount,
141+
total_amount,
142+
payment_type,
143+
pickup_ntaname,
144+
dropoff_ntaname
145+
FROM gcs(
146+
'https://storage.googleapis.com/clickhouse-public-datasets/nyc-taxi/trips_{0..2}.gz',
147+
'TabSeparatedWithNames'
148+
);
149+
",
150+
)
151+
.execute()
152+
.await
153+
.unwrap();
154+
}
155+
}
156+
77157
async fn bench<T: BenchmarkRow>(compression: Compression, validation: bool) {
78158
let stats = do_select_bench::<T>(
79159
"SELECT * FROM nyc_taxi.trips_small ORDER BY trip_id DESC",
@@ -87,6 +167,7 @@ async fn bench<T: BenchmarkRow>(compression: Compression, validation: bool) {
87167

88168
#[tokio::main]
89169
async fn main() {
170+
prepare_data().await;
90171
print_header(Some(" access"));
91172
bench::<TripSmallSeqAccess>(Compression::None, false).await;
92173
bench::<TripSmallSeqAccess>(Compression::None, true).await;

0 commit comments

Comments
 (0)