@@ -16,7 +16,7 @@ function _reverse(input::AnyGPUArray{T, N}, output::AnyGPUArray{T, N};
1616 nd_idx = CartesianIndices (input)
1717
1818 # # COV_EXCL_START
19- @kernel unsafe_indices= true function kernel (input:: AbstractArray{T, N} , output:: AbstractArray{T, N} ) where {T, N}
19+ @kernel unsafe_indices= true function kernel (input, output)
2020 offset_in = @groupsize ()[1 ] * (@index (Group, Linear) - 1 i32)
2121 index_in = offset_in + @index (Local, Linear)
2222
@@ -32,7 +32,7 @@ function _reverse(input::AnyGPUArray{T, N}, output::AnyGPUArray{T, N};
3232 nthreads = 256
3333 nblocks = cld (length (input), nthreads)
3434
35- kernel (get_backend (input), nblocks)(input, output; ndrange= length (nblocks ))
35+ kernel (get_backend (input), nblocks)(input, output; ndrange= length (input ))
3636end
3737
3838# in-place version, swapping elements on half the number of threads
@@ -52,10 +52,9 @@ function _reverse!(data::AnyGPUArray{T, N}; dims=1:ndims(data)) where {T, N}
5252 nd_idx = CartesianIndices (reduced_size)
5353
5454 # # COV_EXCL_START
55- @kernel unsafe_indices= true function kernel (data:: AbstractArray{T, N} ) where {T, N}
55+ @kernel unsafe_indices= true function kernel (data)
5656 offset_in = @groupsize ()[1 ] * (@index (Group, Linear) - 1 i32)
57-
58- index_in = offset_in + threadIdx (). x
57+ index_in = offset_in + @index (Local, Linear)
5958
6059 @inbounds if index_in <= reduced_length
6160 idx = Tuple (nd_idx[index_in])
@@ -80,7 +79,7 @@ function _reverse!(data::AnyGPUArray{T, N}; dims=1:ndims(data)) where {T, N}
8079 nthreads = 256
8180 nblocks = cld (prod (reduced_size), nthreads)
8281
83- kernel (get_backend (input ), nblocks)(input, output ; ndrange= length (nblocks ))
82+ kernel (get_backend (data ), nblocks)(data ; ndrange= length (data ))
8483end
8584
8685
0 commit comments