|
1 | | -import AcceleratedKernels as AK |
2 | | -using KernelAbstractions |
| 1 | +group = addgroup!(SUITE, "accumulate_nd") |
3 | 2 |
|
4 | | -using BenchmarkTools |
5 | | -using Random |
6 | | -Random.seed!(0) |
7 | | - |
8 | | - |
9 | | -# Choose the Array backend: |
10 | | -# |
11 | | -# using CUDA |
12 | | -# const ArrayType = CuArray |
13 | | -# |
14 | | -# using AMDGPU |
15 | | -# const ArrayType = ROCArray |
16 | | -# |
17 | | -# using oneAPI |
18 | | -# const ArrayType = oneArray |
19 | | -# |
20 | | -# using Metal |
21 | | -# const ArrayType = MtlArray |
22 | | -# |
23 | | -# using OpenCL |
24 | | -# const ArrayType = CLArray |
25 | | -# |
26 | | -const ArrayType = Array |
27 | | - |
28 | | - |
29 | | -println("Using ArrayType: ", ArrayType) |
| 3 | +acc_f(x, y) = sin(x) + cos(y) |
30 | 4 |
|
| 5 | +GPUArrays.neutral_element(::typeof(acc_f), T) = T(0) |
31 | 6 |
|
32 | 7 | n1 = 3 |
33 | 8 | n2 = 1_000_000 |
34 | 9 |
|
| 10 | +for T in [UInt32, Int64, Float32] |
| 11 | + local _group = addgroup!(group, "$T") |
35 | 12 |
|
36 | | -println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 UInt32 - Base vs. AK") |
37 | | -display(@benchmark Base.accumulate(+, v, init=UInt32(0), dims=1) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2)))) |
38 | | -display(@benchmark AK.accumulate(+, v, init=UInt32(0), dims=1) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2)))) |
39 | | - |
40 | | -println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 UInt32 - Base vs. AK") |
41 | | -display(@benchmark Base.accumulate(+, v, init=UInt32(0), dims=2) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2)))) |
42 | | -display(@benchmark AK.accumulate(+, v, init=UInt32(0), dims=2) setup=(v = ArrayType(rand(UInt32(1):UInt32(100), n1, n2)))) |
43 | | - |
44 | | - |
45 | | - |
46 | | - |
47 | | -println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 Int64 - Base vs. AK") |
48 | | -display(@benchmark Base.accumulate(+, v, init=Int64(0), dims=1) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2)))) |
49 | | -display(@benchmark AK.accumulate(+, v, init=Int64(0), dims=1) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2)))) |
50 | | - |
51 | | -println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 Int64 - Base vs. AK") |
52 | | -display(@benchmark Base.accumulate(+, v, init=Int64(0), dims=2) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2)))) |
53 | | -display(@benchmark AK.accumulate(+, v, init=Int64(0), dims=2) setup=(v = ArrayType(rand(Int64(1):Int64(100), n1, n2)))) |
54 | | - |
55 | | - |
56 | | - |
57 | | - |
58 | | -println("\n===\nBenchmarking accumulate(+, dims=1) on $n1 × $n2 Float32 - Base vs. AK") |
59 | | -display(@benchmark Base.accumulate(+, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2)))) |
60 | | -display(@benchmark AK.accumulate(+, v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2)))) |
61 | | - |
62 | | -println("\n===\nBenchmarking accumulate(+, dims=2) on $n1 × $n2 Float32 - Base vs. AK") |
63 | | -display(@benchmark Base.accumulate(+, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2)))) |
64 | | -display(@benchmark AK.accumulate(+, v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2)))) |
| 13 | + local randrange = T == Float32 ? T : T(1):T(100) |
65 | 14 |
|
| 15 | + _group["base_dims=1"] = @benchmarkable @sb(Base.accumulate(+, v, init=$T(0), dims=1)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2))) |
| 16 | + _group["acck_dims=1"] = @benchmarkable @sb(AK.accumulate(+, v, init=$T(0), dims=1)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2))) |
66 | 17 |
|
| 18 | + _group["base_dims=2"] = @benchmarkable @sb(Base.accumulate(+, v, init=$T(0), dims=2)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2))) |
| 19 | + _group["acck_dims=2"] = @benchmarkable @sb(AK.accumulate(+, v, init=$T(0), dims=2)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2))) |
67 | 20 |
|
| 21 | + T == Float32 || continue |
68 | 22 |
|
69 | | -println("\n===\nBenchmarking accumulate((x, y) -> sin(x) + cos(y)), dims=1) on $n1 × $n2 Float32 - Base vs. AK") |
70 | | -display(@benchmark Base.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2)))) |
71 | | -display(@benchmark AK.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), neutral=Float32(0), dims=1) setup=(v = ArrayType(rand(Float32, n1, n2)))) |
| 23 | + _group["base_sincos_dims=1"] = @benchmarkable @sb(Base.accumulate(acc_f, v, init=$T(0), dims=1)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2))) |
| 24 | + _group["acck_sincos_dims=1"] = @benchmarkable @sb(AK.accumulate(acc_f, v, init=$T(0), neutral=$T(0), dims=1)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2))) |
72 | 25 |
|
73 | | -println("\n===\nBenchmarking accumulate((x, y) -> sin(x) + cos(y)), dims=2) on $n1 × $n2 Float32 - Base vs. AK") |
74 | | -display(@benchmark Base.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2)))) |
75 | | -display(@benchmark AK.accumulate((x, y) -> sin(x) + cos(y), v, init=Float32(0), neutral=Float32(0), dims=2) setup=(v = ArrayType(rand(Float32, n1, n2)))) |
| 26 | + _group["base_sincos_dims=2"] = @benchmarkable @sb(Base.accumulate(acc_f, v, init=$T(0), dims=2)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2))) |
| 27 | + _group["acck_sincos_dims=2"] = @benchmarkable @sb(AK.accumulate(acc_f, v, init=$T(0), neutral=$T(0), dims=2)) setup=(v = ArrayType(rand(rng, $randrange, n1, n2))) |
| 28 | +end |
0 commit comments