Skip to content

Commit af02364

Browse files
committed
Fix workgroup size determinaiton
1 parent ec21cdd commit af02364

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

lib/JLArrays/Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "JLArrays"
22
uuid = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
33
authors = ["Tim Besard <tim.besard@gmail.com>"]
4-
version = "0.3.0"
4+
version = "0.3.1"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"

lib/JLArrays/src/JLArrays.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ KernelAbstractions.allocate(::JLBackend, ::Type{T}, dims::Tuple) where T = JLArr
600600
end
601601

602602
if KernelAbstractions.workgroupsize(kernel) <: DynamicSize && workgroupsize === nothing
603-
workgroupsize = (1024,) # Vectorization, 4x unrolling, minimal grain size
603+
workgroupsize = (MAXTHREADS,) # Vectorization, 4x unrolling, minimal grain size
604604
end
605605
iterspace, dynamic = partition(kernel, ndrange, workgroupsize)
606606
# partition checked that the ndrange's agreed
@@ -626,6 +626,7 @@ else
626626
end
627627

628628
function (obj::Kernel{JLBackend})(args...; ndrange=nothing, workgroupsize=nothing)
629+
ndrange, workgroupsize, _, _ = launch_config(obj, ndrange, workgroupsize)
629630
device_args = jlconvert.(args)
630631
new_obj = convert_to_cpu(obj)
631632
new_obj(device_args...; ndrange, workgroupsize)

0 commit comments

Comments
 (0)