Skip to content

Commit a90ed05

Browse files
authored
Merge pull request #58 from christiangnrd/bbench
Refactor benchmarks
2 parents c146374 + 4ca5dd0 commit a90ed05

File tree

10 files changed

+1866
-362
lines changed

10 files changed

+1866
-362
lines changed

benchmark/Project.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
11
[deps]
22
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
3+
BenchmarkPlots = "ab8c0f59-4072-4e0d-8f91-a91e1495eb26"
4+
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
5+
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
6+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
37
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
8+
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
9+
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
10+
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"

benchmark/accumulate_1d.jl

Lines changed: 12 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,22 @@
1-
import AcceleratedKernels as AK
2-
using KernelAbstractions
1+
group = addgroup!(SUITE, "accumulate_1d")
32

4-
using BenchmarkTools
5-
using Random
6-
Random.seed!(0)
3+
acc_f(x, y) = sin(x) + cos(y)
74

85

9-
# Choose the Array backend:
10-
#
11-
# using CUDA
12-
# const ArrayType = CuArray
13-
#
14-
# using AMDGPU
15-
# const ArrayType = ROCArray
16-
#
17-
# using oneAPI
18-
# const ArrayType = oneArray
19-
#
20-
# using Metal
21-
# const ArrayType = MtlArray
22-
#
23-
# using OpenCL
24-
# const ArrayType = CLArray
25-
#
26-
const ArrayType = Array
27-
28-
29-
println("Using ArrayType: ", ArrayType)
30-
6+
GPUArrays.neutral_element(::typeof(acc_f), T) = T(0)
317

328
n = 1_000_000
339

10+
for T in [UInt32, Int64, Float32]
11+
local _group = addgroup!(group, "$T")
3412

35-
println("\n===\nBenchmarking accumulate(+) on $n UInt32 - Base vs. AK")
36-
display(@benchmark Base.accumulate(+, v, init=UInt32(0)) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n))))
37-
display(@benchmark AK.accumulate(+, v, init=UInt32(0)) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n))))
38-
39-
40-
println("\n===\nBenchmarking accumulate(+) on $n Int64 - Base vs. AK")
41-
display(@benchmark Base.accumulate(+, v, init=Int64(0)) setup=(v = ArrayType(rand(Int64(1):Int64(100), n))))
42-
display(@benchmark AK.accumulate(+, v, init=Int64(0)) setup=(v = ArrayType(rand(Int64(1):Int64(100), n))))
43-
44-
45-
println("\n===\nBenchmarking accumulate(+) on $n Float32 - Base vs. AK")
46-
display(@benchmark Base.accumulate(+, v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
47-
display(@benchmark AK.accumulate(+, v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
13+
local randrange = T == Float32 ? T : T(1):T(100)
4814

15+
_group["base_1d"] = @benchmarkable @sb(Base.accumulate(+, v; init=$T(0))) setup=(v = ArrayType(rand(rng, $randrange, n)))
16+
_group["acck_1d"] = @benchmarkable @sb(AK.accumulate(+, v; init=$T(0))) setup=(v = ArrayType(rand(rng, $randrange, n)))
4917

50-
println("\n===\nBenchmarking accumulate((x, y) -> sin(x) + cos(y)) on $n Float32 - Base vs. AK")
51-
display(@benchmark Base.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
52-
display(@benchmark AK.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), neutral=Float32(0)) setup=(v = ArrayType(rand(Float32, n))))
18+
T == Float32 || continue
5319

20+
_group["base_1d_sincos"] = @benchmarkable @sb(Base.accumulate(acc_f, v; init=$T(0))) setup=(v = ArrayType(rand(rng, $randrange, n)))
21+
_group["acck_1d_sincos"] = @benchmarkable @sb(AK.accumulate(acc_f, v; init=$T(0), neutral=$T(0))) setup=(v = ArrayType(rand(rng, $randrange, n)))
22+
end

benchmark/accumulate_nd.jl

Lines changed: 16 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,28 @@
1-
import AcceleratedKernels as AK
2-
using KernelAbstractions
1+
group = addgroup!(SUITE, "accumulate_nd")
32

4-
using BenchmarkTools
5-
using Random
6-
Random.seed!(0)
7-
8-
9-
# Choose the Array backend:
10-
#
11-
# using CUDA
12-
# const ArrayType = CuArray
13-
#
14-
# using AMDGPU
15-
# const ArrayType = ROCArray
16-
#
17-
# using oneAPI
18-
# const ArrayType = oneArray
19-
#
20-
# using Metal
21-
# const ArrayType = MtlArray
22-
#
23-
# using OpenCL
24-
# const ArrayType = CLArray
25-
#
26-
const ArrayType = Array
27-
28-
29-
println("Using ArrayType: ", ArrayType)
3+
acc_f(x, y) = sin(x) + cos(y)
304

5+
GPUArrays.neutral_element(::typeof(acc_f), T) = T(0)
316

327
n1 = 3
338
n2 = 1_000_000
349

10+
for T in [UInt32, Int64, Float32]
11+
local _group = addgroup!(group, "$T")
3512

36-
println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 UInt32 - Base vs. AK")
37-
display(@benchmark Base.accumulate(+, v, init=UInt32(0), dims=1) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
38-
display(@benchmark AK.accumulate(+, v, init=UInt32(0), dims=1) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
39-
40-
println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 UInt32 - Base vs. AK")
41-
display(@benchmark Base.accumulate(+, v, init=UInt32(0), dims=2) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
42-
display(@benchmark AK.accumulate(+, v, init=UInt32(0), dims=2) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2))))
43-
44-
45-
46-
47-
println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 Int64 - Base vs. AK")
48-
display(@benchmark Base.accumulate(+, v, init=Int64(0), dims=1) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
49-
display(@benchmark AK.accumulate(+, v, init=Int64(0), dims=1) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
50-
51-
println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 Int64 - Base vs. AK")
52-
display(@benchmark Base.accumulate(+, v, init=Int64(0), dims=2) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
53-
display(@benchmark AK.accumulate(+, v, init=Int64(0), dims=2) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2))))
54-
55-
56-
57-
58-
println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 Float32 - Base vs. AK")
59-
display(@benchmark Base.accumulate(+, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
60-
display(@benchmark AK.accumulate(+, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
61-
62-
println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 Float32 - Base vs. AK")
63-
display(@benchmark Base.accumulate(+, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
64-
display(@benchmark AK.accumulate(+, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
13+
local randrange = T == Float32 ? T : T(1):T(100)
6514

15+
_group["base_dims=1"] = @benchmarkable @sb(Base.accumulate(+, v, init=$T(0), dims=1)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2)))
16+
_group["acck_dims=1"] = @benchmarkable @sb(AK.accumulate(+, v, init=$T(0), dims=1)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2)))
6617

18+
_group["base_dims=2"] = @benchmarkable @sb(Base.accumulate(+, v, init=$T(0), dims=2)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2)))
19+
_group["acck_dims=2"] = @benchmarkable @sb(AK.accumulate(+, v, init=$T(0), dims=2)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2)))
6720

21+
T == Float32 || continue
6822

69-
println("\n===\nBenchmarking accumulate((x, y) -> sin(x) + cos(y)), dims=1) on $n1 × $n2 Float32 - Base vs. AK")
70-
display(@benchmark Base.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
71-
display(@benchmark AK.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), neutral=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2))))
23+
_group["base_sincos_dims=1"] = @benchmarkable @sb(Base.accumulate(acc_f, v, init=$T(0), dims=1)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2)))
24+
_group["acck_sincos_dims=1"] = @benchmarkable @sb(AK.accumulate(acc_f, v, init=$T(0), neutral=$T(0), dims=1)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2)))
7225

73-
println("\n===\nBenchmarking accumulate((x, y) -> sin(x) + cos(y)), dims=2) on $n1 × $n2 Float32 - Base vs. AK")
74-
display(@benchmark Base.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
75-
display(@benchmark AK.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), neutral=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2))))
26+
_group["base_sincos_dims=2"] = @benchmarkable @sb(Base.accumulate(acc_f, v, init=$T(0), dims=2)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2)))
27+
_group["acck_sincos_dims=2"] = @benchmarkable @sb(AK.accumulate(acc_f, v, init=$T(0), neutral=$T(0), dims=2)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2)))
28+
end

0 commit comments

Comments
 (0)