We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d341f2d commit 6a2c51bCopy full SHA for 6a2c51b
src/CUDAKernels.jl
@@ -175,7 +175,9 @@ end
175
176
177
function KI.kernel_max_work_group_size(::CUDABackend, kikern::KI.KIKernel{<:CUDABackend}; max_work_items::Int=typemax(Int))::Int
178
- Int(min(kikern.kern.pipeline.maxTotalThreadsPerThreadgroup, max_work_items))
+ kernel_config = launch_configuration(kikern.kern.fun)
179
+
180
+ Int(min(kernel_config.threads, max_work_items))
181
end
182
function KI.max_work_group_size(::CUDABackend)::Int
183
Int(attribute(device(), CUDA.DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK))
0 commit comments