1-
2- use cust:: memory :: { DeviceBuffer , LockedBuffer , AsyncCopyDestination } ;
1+ use cust :: context :: Context ;
2+ use cust:: device :: Device ;
33use cust:: event:: { Event , EventFlags } ;
4+ use cust:: function:: { BlockSize , GridSize } ;
5+ use cust:: memory:: { AsyncCopyDestination , DeviceBuffer , LockedBuffer } ;
6+ use cust:: module:: Module ;
47use cust:: prelude:: EventStatus ;
58use cust:: stream:: { Stream , StreamFlags } ;
6- use cust:: module:: Module ;
7- use cust:: context:: Context ;
8- use cust:: { launch, CudaFlags } ;
9- use cust:: device:: Device ;
10- use cust:: function:: { GridSize , BlockSize } ;
9+ use cust:: { CudaFlags , launch} ;
1110use std:: time:: Instant ;
1211
1312static PTX : & str = include_str ! ( concat!( env!( "OUT_DIR" ) , "/kernels.ptx" ) ) ;
1413
1514fn correct_output ( data : & [ u32 ] , x : u32 ) -> bool {
16- let not_matching_element = data
17- . iter ( )
18- . enumerate ( )
19- . find ( |& ( _, & elem) | elem != x) ;
15+ let not_matching_element = data. iter ( ) . enumerate ( ) . find ( |& ( _, & elem) | elem != x) ;
2016
2117 match not_matching_element {
2218 Some ( ( index, elem) ) => println ! ( "Error! data[{index}] = {elem}, ref = {x}" ) ,
23- None => println ! ( "All elements of the array match the value!" )
19+ None => println ! ( "All elements of the array match the value!" ) ,
2420 }
2521
2622 not_matching_element. is_none ( )
@@ -29,15 +25,16 @@ fn correct_output(data: &[u32], x: u32) -> bool {
2925fn main ( ) -> Result < ( ) , cust:: error:: CudaError > {
3026 cust:: init ( CudaFlags :: empty ( ) ) . expect ( "Couldn't initialize CUDA environment!" ) ;
3127
32- let device = Device :: get_device ( 0 )
33- . expect ( "Couldn't find Cuda supported devices!" ) ;
28+ let device = Device :: get_device ( 0 ) . expect ( "Couldn't find Cuda supported devices!" ) ;
3429
3530 println ! ( "Device Name: {}" , device. name( ) . unwrap( ) ) ;
3631
3732 // Set up the context, load the module, and create a stream to run kernels in.
3833 let _ctx = Context :: new ( device) ;
3934 let module = Module :: from_ptx ( PTX , & [ ] ) . expect ( "Module couldn't be init!" ) ;
40- let increment = module. get_function ( "increment" ) . expect ( "Kernel function not found!" ) ;
35+ let increment = module
36+ . get_function ( "increment" )
37+ . expect ( "Kernel function not found!" ) ;
4138 let stream = Stream :: new ( StreamFlags :: NON_BLOCKING , None ) . expect ( "Stream couldn't be init!" ) ;
4239
4340 const N : usize = 16 * 1024 * 1024 ;
@@ -53,9 +50,12 @@ fn main() -> Result<(), cust::error::CudaError> {
5350 // Create buffers for data on host-side
5451 // Ideally should be page-locked for efficiency
5552 let mut host_a = LockedBuffer :: new ( & 0u32 , N ) . expect ( "host array couldn't be initialized!" ) ;
56- let mut device_a = DeviceBuffer :: from_slice ( & [ u32:: MAX ; N ] ) . expect ( "device array couldn't be initialized!" ) ;
53+ let mut device_a =
54+ DeviceBuffer :: from_slice ( & [ u32:: MAX ; N ] ) . expect ( "device array couldn't be initialized!" ) ;
5755
58- start_event. record ( & stream) . expect ( "Failed to record start_event in the CUDA stream!" ) ;
56+ start_event
57+ . record ( & stream)
58+ . expect ( "Failed to record start_event in the CUDA stream!" ) ;
5959 let start = Instant :: now ( ) ;
6060
6161 // SAFETY: until the stop_event is triggered:
@@ -64,7 +64,9 @@ fn main() -> Result<(), cust::error::CudaError> {
6464 // 3. Until `stop_query` yields `EventStatus::Ready`, `device_a` is not involved in any other operation
6565 // other than those of the operations in the stream.
6666 unsafe {
67- device_a. async_copy_from ( & host_a, & stream) . expect ( "Could not copy from host to device!" ) ;
67+ device_a
68+ . async_copy_from ( & host_a, & stream)
69+ . expect ( "Could not copy from host to device!" ) ;
6870 }
6971
7072 // SAFETY: number of threads * number of blocks = total number of elements.
@@ -80,27 +82,33 @@ fn main() -> Result<(), cust::error::CudaError> {
8082 // SAFETY: until the stop_event is triggered:
8183 // 1. `device_a` is not being modified
8284 // 2. Both `device_a` and `host_a` are not deallocated
83- // 3. At this point, until `stop_query` yields `EventStatus::Ready`,
85+ // 3. At this point, until `stop_query` yields `EventStatus::Ready`,
8486 // `host_a` is not involved in any other operation.
8587 unsafe {
86- device_a. async_copy_to ( & mut host_a, & stream) . expect ( "Could not copy from device to host!" ) ;
88+ device_a
89+ . async_copy_to ( & mut host_a, & stream)
90+ . expect ( "Could not copy from device to host!" ) ;
8791 }
8892
89- stop_event. record ( & stream) . expect ( "Failed to record stop_event in the CUDA stream!" ) ;
93+ stop_event
94+ . record ( & stream)
95+ . expect ( "Failed to record stop_event in the CUDA stream!" ) ;
9096 let cpu_time: u128 = start. elapsed ( ) . as_micros ( ) ;
9197
9298 let mut counter: u64 = 0 ;
93- while stop_event. query ( ) != Ok ( EventStatus :: Ready ) { counter += 1 }
99+ while stop_event. query ( ) != Ok ( EventStatus :: Ready ) {
100+ counter += 1
101+ }
94102
95103 let gpu_time: u128 = stop_event
96104 . elapsed ( & start_event)
97105 . expect ( "Failed to calculate duration of GPU operations!" )
98106 . as_micros ( ) ;
99-
107+
100108 println ! ( "Time spent executing by the GPU: {gpu_time} microseconds" ) ;
101109 println ! ( "Time spent by CPU in CUDA calls: {cpu_time} microseconds" ) ;
102110 println ! ( "CPU executed {counter} iterations while waiting for GPU to finish." ) ;
103-
111+
104112 assert ! ( correct_output( host_a. as_slice( ) , value) ) ;
105113
106114 // Stream is synchronized as a safety measure
@@ -111,14 +119,14 @@ fn main() -> Result<(), cust::error::CudaError> {
111119 Ok ( ( ) ) => println ! ( "Successfully destroyed start_event" ) ,
112120 Err ( ( cuda_error, _event) ) => {
113121 println ! ( "Failed to destroy start_event: {:?}" , cuda_error) ;
114- } ,
122+ }
115123 }
116124
117125 match Event :: drop ( stop_event) {
118126 Ok ( ( ) ) => println ! ( "Successfully destroyed stop_event" ) ,
119127 Err ( ( cuda_error, _event) ) => {
120128 println ! ( "Failed to destroy stop_event: {:?}" , cuda_error) ;
121- } ,
129+ }
122130 }
123131
124132 DeviceBuffer :: drop ( device_a) . expect ( "Couldn't drop device array!" ) ;
0 commit comments