1- using KernelAbstractions, CUDAKernels, Test, CUDA
1+ using KernelAbstractions
2+ using CUDA
3+ using CUDAKernels
4+ using AMDGPU
5+ using ROCKernels
6+ using Test
27
38if has_cuda_gpu ()
49 CUDA. allowscalar (false )
510end
611
712@kernel function naive_transpose_kernel! (a, b)
8- i, j = @index (Global, NTuple)
9- @inbounds b[i, j] = a[j, i]
13+ i, j = @index (Global, NTuple)
14+ @inbounds b[i, j] = a[j, i]
1015end
1116
1217# create wrapper function to check inputs
@@ -16,11 +21,17 @@ function naive_transpose!(a, b)
1621 println (" Matrix size mismatch!" )
1722 return nothing
1823 end
24+
1925 if isa (a, Array)
20- kernel! = naive_transpose_kernel! (CPU (),4 )
26+ kernel! = naive_transpose_kernel! (CPU (), 4 )
27+ elseif isa (a, CuArray)
28+ kernel! = naive_transpose_kernel! (CUDADevice (), 256 )
29+ elseif isa (a, ROCArray)
30+ kernel! = naive_transpose_kernel! (ROCDevice (), 256 )
2131 else
22- kernel! = naive_transpose_kernel! ( CUDADevice (), 256 )
32+ println ( " Unrecognized array type! " )
2333 end
34+
2435 kernel! (a, b, ndrange= size (a))
2536end
2637
@@ -49,3 +60,18 @@ if has_cuda_gpu()
4960
5061 @test a == transpose (b)
5162end
63+
64+
65+ if has_rocm_gpu ()
66+ d_a = ROCArray (a)
67+ d_b = zeros (Float32, res, res) |> ROCArray
68+
69+ ev = naive_transpose! (d_a, d_b)
70+ wait (ev)
71+
72+ a = Array (d_a)
73+ b = Array (d_b)
74+
75+ @test a == transpose (b)
76+ end
77+
0 commit comments