@@ -13,18 +13,17 @@ boost::filesystem::path g_data_path;
1313size_t num_threads = 15 ;
1414size_t g_fragment_size = 160000000 / num_threads;
1515bool g_use_parquet{false };
16+ bool g_use_hot_data{false };
1617ExecutorDeviceType g_device_type{ExecutorDeviceType::GPU};
1718
1819using namespace TestHelpers ::ArrowSQLRunner;
1920
20- // #define USE_HOT_DATA
21+ #define USE_HOT_DATA
2122#define PARALLEL_IMPORT_ENABLED
2223
2324// when we want to measure storage latencies, read the csv files before starting the
2425// benchmark
25- #ifndef USE_HOT_DATA
2626std::vector<std::shared_ptr<arrow::Table>> g_taxi_data_files;
27- #endif
2827
2928std::istream& operator >>(std::istream& in, ExecutorDeviceType& device_type) {
3029 std::string token;
@@ -286,67 +285,66 @@ T v(const TargetValue& r) {
286285
287286static void table_count (benchmark::State& state) {
288287 for (auto _ : state) {
289- # ifndef USE_HOT_DATA
290- createTaxiTable ();
291- populateTaxiTable ();
292- # endif
288+ if (!g_use_hot_data) {
289+ createTaxiTable ();
290+ populateTaxiTable ();
291+ }
293292
294- auto res = v<int64_t >(run_simple_agg (" select count(*) from trips" , g_device_type));
293+ auto res = v<int64_t >(run_simple_agg (" select count(*) from trips; " , g_device_type));
295294 std::cout << " Number of loaded tuples: " << res << std::endl;
296295 }
297296}
298297
299298static void taxi_q1 (benchmark::State& state) {
300299 for (auto _ : state) {
301- # ifndef USE_HOT_DATA
302- createTaxiTable ();
303- populateTaxiTable ();
304- # endif
300+ if (!g_use_hot_data) {
301+ createTaxiTable ();
302+ populateTaxiTable ();
303+ }
305304
306- run_multiple_agg (" select cab_type, count(*) from trips group by cab_type" ,
305+ run_multiple_agg (" select cab_type, count(*) from trips group by cab_type; " ,
307306 g_device_type);
308307 }
309308}
310309
311310static void taxi_q2 (benchmark::State& state) {
312311 for (auto _ : state) {
313- # ifndef USE_HOT_DATA
314- createTaxiTable ();
315- populateTaxiTable ();
316- # endif
312+ if (!g_use_hot_data) {
313+ createTaxiTable ();
314+ populateTaxiTable ();
315+ }
317316
318317 run_multiple_agg (
319- " SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count" ,
318+ " SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count; " ,
320319 g_device_type);
321320 }
322321}
323322
324323static void taxi_q3 (benchmark::State& state) {
325324 for (auto _ : state) {
326- #ifndef USE_HOT_DATA
327- createTaxiTable ();
328- populateTaxiTable ();
329- #endif
330-
325+ if (!g_use_hot_data) {
326+ createTaxiTable ();
327+ populateTaxiTable ();
328+ }
331329 run_multiple_agg (
332330 " SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
333- " count(*) FROM trips GROUP BY passenger_count, pickup_year" ,
331+ " count(*) FROM trips GROUP BY passenger_count, pickup_year; " ,
334332 g_device_type);
335333 }
336334}
337335
338336static void taxi_q4 (benchmark::State& state) {
339337 for (auto _ : state) {
340- # ifndef USE_HOT_DATA
341- createTaxiTable ();
342- populateTaxiTable ();
343- # endif
338+ if (!g_use_hot_data) {
339+ createTaxiTable ();
340+ populateTaxiTable ();
341+ }
344342
345343 run_multiple_agg (
346344 " SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
347345 " cast(trip_distance as int) AS distance, count(*) AS the_count FROM trips GROUP "
348346 " BY passenger_count, pickup_year, distance ORDER BY pickup_year, the_count "
349- " desc" ,
347+ " desc; " ,
350348 g_device_type);
351349 }
352350}
@@ -406,6 +404,11 @@ int main(int argc, char* argv[]) {
406404 ->implicit_value (ExecutorDeviceType::GPU)
407405 ->default_value (ExecutorDeviceType::CPU),
408406 " Device type to use." );
407+ desc.add_options ()(" use-hot-data" ,
408+ po::value<bool >(&g_use_hot_data)
409+ ->implicit_value (true )
410+ ->default_value (g_use_hot_data),
411+ " Use prepopulated taxi data in queries." );
409412
410413 desc.add_options ()(
411414 " use-lazy-materialization" ,
@@ -434,22 +437,20 @@ int main(int argc, char* argv[]) {
434437 }
435438
436439 try {
437- #ifdef USE_HOT_DATA
438440 createTaxiTable ();
439- populateTaxiTable ();
440- #else
441- if (g_use_parquet) {
441+ if (!g_use_hot_data && g_use_parquet) {
442442 throw std::runtime_error (" Cannot use parquet files in cold data mode yet." );
443443 }
444- createTaxiTable ();
445444 auto table_info = getStorage ()->getTableInfo (getStorage ()->dbId (), " trips" );
446445 if (!table_info) {
447446 throw std::runtime_error (" Cannot find table \" trips\" , creation failed?" );
448447 }
449448
450449 auto col_infos = getStorage ()->listColumns (table_info->db_id , table_info->table_id );
451450 g_taxi_data_files = readTaxiFilesCsv (col_infos);
452- #endif
451+ if (g_use_hot_data) {
452+ loadTaxiArrowData ();
453+ }
453454 // warmup();
454455 ::benchmark::RunSpecifiedBenchmarks ();
455456 } catch (const std::exception& e) {
0 commit comments