@@ -4,19 +4,19 @@ import Base.MultiplicativeInverses: SignedMultiplicativeInverse
44
55# CartesianIndex uses Int instead of Int32
66
7- @eval EmptySMI () = $ (Expr (:new , SignedMultiplicativeInverse{Int32}, Int32 (0 ), typemax (Int32), 0 % Int8, 0 % UInt8))
7+ @eval EmptySMI () = $ (Expr (:new , SignedMultiplicativeInverse{Int32}, Int32 (0 ), typemax (Int32), 0 % Int8, 0 % UInt8))
88SMI (i) = i == 0 ? EmptySMI () : SignedMultiplicativeInverse {Int32} (i)
99
1010struct FastCartesianIndices{N} <: AbstractArray{CartesianIndex{N}, N}
1111 inverses:: NTuple{N, SignedMultiplicativeInverse{Int32}}
1212end
1313
14- function FastCartesianIndices (indices:: NTuple{N} ) where N
15- inverses = map (i-> SMI (Int32 (i)), indices)
14+ function FastCartesianIndices (indices:: NTuple{N} ) where {N}
15+ inverses = map (i -> SMI (Int32 (i)), indices)
1616 FastCartesianIndices (inverses)
1717end
1818
19- function Base. size (FCI:: FastCartesianIndices{N} ) where N
19+ function Base. size (FCI:: FastCartesianIndices{N} ) where {N}
2020 ntuple (Val (N)) do I
2121 FCI. inverses[I]. divisor
2222 end
2626 return CartesianIndex ()
2727end
2828
29- @inline function Base. getindex (iter:: FastCartesianIndices{N} , I:: Vararg{Int, N} ) where N
29+ @inline function Base. getindex (iter:: FastCartesianIndices{N} , I:: Vararg{Int, N} ) where {N}
3030 @boundscheck checkbounds (iter, I... )
3131 index = map (iter. inverses, I) do inv, i
3232 @inbounds getindex (Base. OneTo (inv. divisor), i)
3333 end
3434 CartesianIndex (index)
3535end
3636
37- _ind2sub_recuse (:: Tuple{} , ind) = (ind+ 1 ,)
37+ _ind2sub_recuse (:: Tuple{} , ind) = (ind + 1 ,)
3838function _ind2sub_recurse (indslast:: NTuple{1} , ind)
39- @inline
39+ Base . @_inline_meta
4040 (_lookup (ind, indslast[1 ]),)
4141end
4242
4343function _ind2sub_recurse (inds, ind)
44- @inline
44+ Base . @_inline_meta
4545 inv = inds[1 ]
4646 indnext, f, l = _div (ind, inv)
47- (ind- l * indnext+ f, _ind2sub_recurse (Base. tail (inds), indnext)... )
47+ (ind - l * indnext + f, _ind2sub_recurse (Base. tail (inds), indnext)... )
4848end
4949
50- _lookup (ind, inv:: SignedMultiplicativeInverse ) = ind+ 1
50+ _lookup (ind, inv:: SignedMultiplicativeInverse ) = ind + 1
5151function _div (ind, inv:: SignedMultiplicativeInverse )
5252 inv. divisor == 0 && throw (DivideError ())
53- div (ind% Int32, inv), 1 , inv. divisor
53+ div (ind % Int32, inv), 1 , inv. divisor
5454end
5555
5656function Base. _ind2sub (inv:: FastCartesianIndices , ind)
57- @inline
58- _ind2sub_recurse (inv. inverses, ind- 1 )
57+ Base . @_inline_meta
58+ _ind2sub_recurse (inv. inverses, ind - 1 )
5959end
6060
6161export _Size, StaticSize, DynamicSize, get
@@ -151,24 +151,7 @@ Base.length(range::NDRange) = length(blocks(range))
151151end
152152
153153Base. @propagate_inbounds function expand (ndrange:: NDRange{N} , groupidx:: Integer , idx:: Integer ) where {N}
154- # This causes two sdiv operations, one for each Linear to CartesianIndex
155154 return expand (ndrange, blocks (ndrange)[groupidx], workitems (ndrange)[idx])
156-
157- # The formulation below saves one sdiv
158- # but leads to a different index order...
159- # previous: julia> expand(ndrange, 1, 32*32)
160- # CartesianIndex(32, 32)
161- # now: julia> expand(ndrange, 1, 32*32)
162- # CartesianIndex(1024, 1)
163- # B = blocks(ndrange)::CartesianIndices
164- # W = workitems(ndrange)::CartesianIndices
165- # Ind = ntuple(Val(N)) do I
166- # Base.@_inline_meta
167- # b = B.indices[I]
168- # w = W.indices[I]
169- # length(b) * length(w)
170- # end
171- # CartesianIndices(Ind)[(groupidx-1)* prod(size(W)) + idx]
172155end
173156
174157Base. @propagate_inbounds function expand (ndrange:: NDRange{N} , groupidx:: CartesianIndex{N} , idx:: Integer ) where {N}
0 commit comments