|
| 1 | +# INCLUDE ROCM |
1 | 2 | using KernelAbstractions, Test |
2 | 3 | using KernelAbstractions: @atomic, @atomicswap, @atomicreplace |
3 | 4 | include(joinpath(@__DIR__, "utils.jl")) # Load backend |
@@ -65,8 +66,10 @@ function histogram!(histogram_output, input; |
65 | 66 |
|
66 | 67 | if isa(input, Array) |
67 | 68 | kernel! = histogram_kernel!(CPU(), numcores) |
68 | | - else |
| 69 | + elseif has_cuda |
69 | 70 | kernel! = histogram_kernel!(CUDADevice(), numthreads) |
| 71 | + elseif has_rocm |
| 72 | + kernel! = histogram_kernel!(ROCDevice(), numthreads) |
70 | 73 | end |
71 | 74 |
|
72 | 75 | kernel!(histogram_output, input, ndrange=size(input)) |
|
96 | 99 | @test isapprox(CPU_2_histogram, histogram_2_baseline) |
97 | 100 | end |
98 | 101 |
|
99 | | - if has_cuda_gpu() |
| 102 | + if has_cuda && has_cuda_gpu() |
100 | 103 | CUDA.allowscalar(false) |
101 | | - |
102 | | - GPU_rand_input = CuArray(rand_input) |
103 | | - GPU_linear_input = CuArray(linear_input) |
104 | | - GPU_2_input = CuArray(all_2) |
105 | | - |
106 | | - GPU_rand_histogram = CuArray(zeros(Int, 128)) |
107 | | - GPU_linear_histogram = CuArray(zeros(Int, 1024)) |
108 | | - GPU_2_histogram = CuArray(zeros(Int, 2)) |
| 104 | + GPUArray = CuArray |
| 105 | + has_gpu = true |
| 106 | + elseif has_rocm && AMDGPU.functional() |
| 107 | + AMDGPU.allowscalar(false) |
| 108 | + GPUArray = ROCArray |
| 109 | + has_gpu = true |
| 110 | + end |
| 111 | + if has_gpu |
| 112 | + GPU_rand_input = GPUArray(rand_input) |
| 113 | + GPU_linear_input = GPUArray(linear_input) |
| 114 | + GPU_2_input = GPUArray(all_2) |
| 115 | + |
| 116 | + GPU_rand_histogram = GPUArray(zeros(Int, 128)) |
| 117 | + GPU_linear_histogram = GPUArray(zeros(Int, 1024)) |
| 118 | + GPU_2_histogram = GPUArray(zeros(Int, 2)) |
109 | 119 |
|
110 | 120 | wait(histogram!(GPU_rand_histogram, GPU_rand_input)) |
111 | 121 | wait(histogram!(GPU_linear_histogram, GPU_linear_input)) |
|
0 commit comments