Skip to content

Commit ab4b28b

Browse files
nnethercoteLegNeato
authored andcommitted
Fix and document in_bounds! usage points.
Every single one has an upper bound that is one higher than it should be. - For `thread_idx_[xyz]`: indices are 0-indexed, so the maximum index is the `block_dim_[xyz]` maximum minus one. Changing `..=` to `..` fixes it. - For `block_idx_[xyz]`: likewise, but relative to `grid_dim_[xyz]`. - For `block_dim_[xyz]`: these were all one too big. Not sure why, perhaps a `..`/`..=` mix-up? - For `grid_dim_[xyz]`: likewise. (Yes, these grid maximum dimensions are all of the form 2^N-1 even though the block maximum dimensions are all of the form 2^N. I don't know why, but it's what the CUDA docs say.)
1 parent dedb993 commit ab4b28b

File tree

1 file changed

+24
-12
lines changed

1 file changed

+24
-12
lines changed

crates/cuda_std/src/thread.rs

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -108,73 +108,85 @@ macro_rules! in_range {
108108
#[gpu_only]
109109
#[inline(always)]
110110
pub fn thread_idx_x() -> u32 {
111-
in_range!(__nvvm_thread_idx_x, 0..=1024)
111+
// The range is derived from the `block_idx_x` range.
112+
in_range!(__nvvm_thread_idx_x, 0..1024)
112113
}
113114

114115
#[gpu_only]
115116
#[inline(always)]
116117
pub fn thread_idx_y() -> u32 {
117-
in_range!(__nvvm_thread_idx_y, 0..=1024)
118+
// The range is derived from the `block_idx_y` range.
119+
in_range!(__nvvm_thread_idx_y, 0..1024)
118120
}
119121

120122
#[gpu_only]
121123
#[inline(always)]
122124
pub fn thread_idx_z() -> u32 {
123-
in_range!(__nvvm_thread_idx_z, 0..=64)
125+
// The range is derived from the `block_idx_z` range.
126+
in_range!(__nvvm_thread_idx_z, 0..64)
124127
}
125128

126129
#[gpu_only]
127130
#[inline(always)]
128131
pub fn block_idx_x() -> u32 {
129-
in_range!(__nvvm_block_idx_x, 0..=2147483647)
132+
// The range is derived from the `grid_idx_x` range.
133+
in_range!(__nvvm_block_idx_x, 0..2147483647)
130134
}
131135

132136
#[gpu_only]
133137
#[inline(always)]
134138
pub fn block_idx_y() -> u32 {
135-
in_range!(__nvvm_block_idx_y, 0..=65535)
139+
// The range is derived from the `grid_idx_y` range.
140+
in_range!(__nvvm_block_idx_y, 0..65535)
136141
}
137142

138143
#[gpu_only]
139144
#[inline(always)]
140145
pub fn block_idx_z() -> u32 {
141-
in_range!(__nvvm_block_idx_z, 0..=65535)
146+
// The range is derived from the `grid_idx_z` range.
147+
in_range!(__nvvm_block_idx_z, 0..65535)
142148
}
143149

144150
#[gpu_only]
145151
#[inline(always)]
146152
pub fn block_dim_x() -> u32 {
147-
in_range!(__nvvm_block_dim_x, 1..=1025)
153+
// CUDA Compute Capabilities: "Maximum x- or y-dimensionality of a block" is 1024.
154+
in_range!(__nvvm_block_dim_x, 1..=1024)
148155
}
149156

150157
#[gpu_only]
151158
#[inline(always)]
152159
pub fn block_dim_y() -> u32 {
153-
in_range!(__nvvm_block_dim_y, 1..=1025)
160+
// CUDA Compute Capabilities: "Maximum x- or y-dimensionality of a block" is 1024.
161+
in_range!(__nvvm_block_dim_y, 1..=1024)
154162
}
155163

156164
#[gpu_only]
157165
#[inline(always)]
158166
pub fn block_dim_z() -> u32 {
159-
in_range!(__nvvm_block_dim_z, 1..=65)
167+
// CUDA Compute Capabilities: "Maximum z-dimension of a block" is 64.
168+
in_range!(__nvvm_block_dim_z, 1..=64)
160169
}
161170

162171
#[gpu_only]
163172
#[inline(always)]
164173
pub fn grid_dim_x() -> u32 {
165-
in_range!(__nvvm_grid_dim_x, 1..=2147483648)
174+
// CUDA Compute Capabilities: "Maximum x-dimension of a grid of thread blocks" is 2^32 - 1.
175+
in_range!(__nvvm_grid_dim_x, 1..=2147483647)
166176
}
167177

168178
#[gpu_only]
169179
#[inline(always)]
170180
pub fn grid_dim_y() -> u32 {
171-
in_range!(__nvvm_grid_dim_y, 1..=65536)
181+
// CUDA Compute Capabilities: "Maximum y- or z-dimension of a grid of thread blocks" is 65535.
182+
in_range!(__nvvm_grid_dim_y, 1..=65535)
172183
}
173184

174185
#[gpu_only]
175186
#[inline(always)]
176187
pub fn grid_dim_z() -> u32 {
177-
in_range!(__nvvm_grid_dim_z, 1..=65536)
188+
// CUDA Compute Capabilities: "Maximum y- or z-dimension of a grid of thread blocks" is 65535.
189+
in_range!(__nvvm_grid_dim_z, 1..=65535)
178190
}
179191

180192
/// Gets the 3d index of the thread currently executing the kernel.

0 commit comments

Comments
 (0)