Skip to content

Commit 3431da8

Browse files
madhav-madhusoodananLegNeato
authored andcommitted
chore: format code
1 parent 6a1128a commit 3431da8

File tree

2 files changed

+36
-31
lines changed

2 files changed

+36
-31
lines changed

examples/cuda/async_api/kernels/src/lib.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,8 @@ pub unsafe fn increment(g_data: *mut u32, inc_value: u32) {
66
// This can also be obtained directly as
77
//
88
// let idx: usize = cuda_std::thread::index() as usize;
9-
let idx: usize = (
10-
cuda_std::thread::block_dim().x
11-
* cuda_std::thread::block_idx().x
12-
+ cuda_std::thread::thread_idx().x
13-
) as usize;
9+
let idx: usize = (cuda_std::thread::block_dim().x * cuda_std::thread::block_idx().x
10+
+ cuda_std::thread::thread_idx().x) as usize;
1411

1512
let elem: &mut u32 = unsafe { &mut *g_data.add(idx) };
1613
*elem = *elem + inc_value;

examples/cuda/async_api/src/main.rs

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,22 @@
1-
2-
use cust::memory::{DeviceBuffer, LockedBuffer, AsyncCopyDestination};
1+
use cust::context::Context;
2+
use cust::device::Device;
33
use cust::event::{Event, EventFlags};
4+
use cust::function::{BlockSize, GridSize};
5+
use cust::memory::{AsyncCopyDestination, DeviceBuffer, LockedBuffer};
6+
use cust::module::Module;
47
use cust::prelude::EventStatus;
58
use cust::stream::{Stream, StreamFlags};
6-
use cust::module::Module;
7-
use cust::context::Context;
8-
use cust::{launch, CudaFlags};
9-
use cust::device::Device;
10-
use cust::function::{GridSize, BlockSize};
9+
use cust::{CudaFlags, launch};
1110
use std::time::Instant;
1211

1312
static PTX: &str = include_str!(concat!(env!("OUT_DIR"), "/kernels.ptx"));
1413

1514
fn correct_output(data: &[u32], x: u32) -> bool {
16-
let not_matching_element = data
17-
.iter()
18-
.enumerate()
19-
.find(|&(_, &elem)| elem != x);
15+
let not_matching_element = data.iter().enumerate().find(|&(_, &elem)| elem != x);
2016

2117
match not_matching_element {
2218
Some((index, elem)) => println!("Error! data[{index}] = {elem}, ref = {x}"),
23-
None => println!("All elements of the array match the value!")
19+
None => println!("All elements of the array match the value!"),
2420
}
2521

2622
not_matching_element.is_none()
@@ -29,15 +25,16 @@ fn correct_output(data: &[u32], x: u32) -> bool {
2925
fn main() -> Result<(), cust::error::CudaError> {
3026
cust::init(CudaFlags::empty()).expect("Couldn't initialize CUDA environment!");
3127

32-
let device = Device::get_device(0)
33-
.expect("Couldn't find Cuda supported devices!");
28+
let device = Device::get_device(0).expect("Couldn't find Cuda supported devices!");
3429

3530
println!("Device Name: {}", device.name().unwrap());
3631

3732
// Set up the context, load the module, and create a stream to run kernels in.
3833
let _ctx = Context::new(device);
3934
let module = Module::from_ptx(PTX, &[]).expect("Module couldn't be init!");
40-
let increment = module.get_function("increment").expect("Kernel function not found!");
35+
let increment = module
36+
.get_function("increment")
37+
.expect("Kernel function not found!");
4138
let stream = Stream::new(StreamFlags::NON_BLOCKING, None).expect("Stream couldn't be init!");
4239

4340
const N: usize = 16 * 1024 * 1024;
@@ -53,9 +50,12 @@ fn main() -> Result<(), cust::error::CudaError> {
5350
// Create buffers for data on host-side
5451
// Ideally should be page-locked for efficiency
5552
let mut host_a = LockedBuffer::new(&0u32, N).expect("host array couldn't be initialized!");
56-
let mut device_a = DeviceBuffer::from_slice(&[u32::MAX; N]).expect("device array couldn't be initialized!");
53+
let mut device_a =
54+
DeviceBuffer::from_slice(&[u32::MAX; N]).expect("device array couldn't be initialized!");
5755

58-
start_event.record(&stream).expect("Failed to record start_event in the CUDA stream!");
56+
start_event
57+
.record(&stream)
58+
.expect("Failed to record start_event in the CUDA stream!");
5959
let start = Instant::now();
6060

6161
// SAFETY: until the stop_event is triggered:
@@ -64,7 +64,9 @@ fn main() -> Result<(), cust::error::CudaError> {
6464
// 3. Until `stop_query` yields `EventStatus::Ready`, `device_a` is not involved in any other operation
6565
// other than those of the operations in the stream.
6666
unsafe {
67-
device_a.async_copy_from(&host_a, &stream).expect("Could not copy from host to device!");
67+
device_a
68+
.async_copy_from(&host_a, &stream)
69+
.expect("Could not copy from host to device!");
6870
}
6971

7072
// SAFETY: number of threads * number of blocks = total number of elements.
@@ -80,27 +82,33 @@ fn main() -> Result<(), cust::error::CudaError> {
8082
// SAFETY: until the stop_event is triggered:
8183
// 1. `device_a` is not being modified
8284
// 2. Both `device_a` and `host_a` are not deallocated
83-
// 3. At this point, until `stop_query` yields `EventStatus::Ready`,
85+
// 3. At this point, until `stop_query` yields `EventStatus::Ready`,
8486
// `host_a` is not involved in any other operation.
8587
unsafe {
86-
device_a.async_copy_to(&mut host_a, &stream).expect("Could not copy from device to host!");
88+
device_a
89+
.async_copy_to(&mut host_a, &stream)
90+
.expect("Could not copy from device to host!");
8791
}
8892

89-
stop_event.record(&stream).expect("Failed to record stop_event in the CUDA stream!");
93+
stop_event
94+
.record(&stream)
95+
.expect("Failed to record stop_event in the CUDA stream!");
9096
let cpu_time: u128 = start.elapsed().as_micros();
9197

9298
let mut counter: u64 = 0;
93-
while stop_event.query() != Ok(EventStatus::Ready) { counter += 1 }
99+
while stop_event.query() != Ok(EventStatus::Ready) {
100+
counter += 1
101+
}
94102

95103
let gpu_time: u128 = stop_event
96104
.elapsed(&start_event)
97105
.expect("Failed to calculate duration of GPU operations!")
98106
.as_micros();
99-
107+
100108
println!("Time spent executing by the GPU: {gpu_time} microseconds");
101109
println!("Time spent by CPU in CUDA calls: {cpu_time} microseconds");
102110
println!("CPU executed {counter} iterations while waiting for GPU to finish.");
103-
111+
104112
assert!(correct_output(host_a.as_slice(), value));
105113

106114
// Stream is synchronized as a safety measure
@@ -111,14 +119,14 @@ fn main() -> Result<(), cust::error::CudaError> {
111119
Ok(()) => println!("Successfully destroyed start_event"),
112120
Err((cuda_error, _event)) => {
113121
println!("Failed to destroy start_event: {:?}", cuda_error);
114-
},
122+
}
115123
}
116124

117125
match Event::drop(stop_event) {
118126
Ok(()) => println!("Successfully destroyed stop_event"),
119127
Err((cuda_error, _event)) => {
120128
println!("Failed to destroy stop_event: {:?}", cuda_error);
121-
},
129+
}
122130
}
123131

124132
DeviceBuffer::drop(device_a).expect("Couldn't drop device array!");

0 commit comments

Comments
 (0)