@@ -2,14 +2,18 @@ group = addgroup!(SUITE, "array")
22
33const m = 512
44const n = 1000
5+ const m_long = 3
6+ const n_long = 1_000_000
57
68# generate some arrays
79cpu_mat = rand (rng, Float32, m, n)
8- gpu_mat = CuArray {Float32} (undef, size (cpu_mat))
10+ gpu_mat = CuArray {Float32} (cpu_mat)
11+ gpu_mat_long = CuArray {Float32} (rand (rng, Float32, m_long, n_long))
912gpu_vec = reshape (gpu_mat, length (gpu_mat))
1013gpu_arr_3d = reshape (gpu_mat, (m, 40 , 25 ))
1114gpu_arr_4d = reshape (gpu_mat, (m, 10 , 10 , 10 ))
12- gpu_mat_ints = CuArray (rand (rng, Int, m, n))
15+ gpu_mat_ints = CuArray (rand (rng, - 10 : 10 , m, n))
16+ gpu_mat_long_ints = CuArray (rand (rng, - 10 : 10 , m_long, n_long))
1317gpu_vec_ints = reshape (gpu_mat_ints, length (gpu_mat_ints))
1418gpu_mat_bools = CuArray (rand (rng, Bool, m, n))
1519gpu_vec_bools = reshape (gpu_mat_bools, length (gpu_mat_bools))
@@ -57,19 +61,57 @@ group["broadcast"] = @async_benchmarkable $gpu_mat .= 0f0
5761
5862# no need to test inplace version, which performs the same operation (but with an alloc)
5963let group = addgroup! (group, " accumulate" )
60- group[" 1d" ] = @async_benchmarkable accumulate (+ , $ gpu_vec)
61- group[" 2d" ] = @async_benchmarkable accumulate (+ , $ gpu_mat; dims= 1 )
64+ let group = addgroup! (group, " Float32" )
65+ group[" 1d" ] = @async_benchmarkable accumulate (+ , $ gpu_vec)
66+ group[" dims=1" ] = @async_benchmarkable accumulate (+ , $ gpu_mat; dims= 1 )
67+ group[" dims=2" ] = @async_benchmarkable accumulate (+ , $ gpu_mat; dims= 2 )
68+
69+ group[" dims=1L" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_long; dims= 1 )
70+ group[" dims=2L" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_long; dims= 2 )
71+ end
72+ let group = addgroup! (group, " Int64" )
73+ group[" 1d" ] = @async_benchmarkable accumulate (+ , $ gpu_vec_ints)
74+ group[" dims=1" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_ints; dims= 1 )
75+ group[" dims=2" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_ints; dims= 2 )
76+
77+ group[" dims=1L" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_long_ints; dims= 1 )
78+ group[" dims=2L" ] = @async_benchmarkable accumulate (+ , $ gpu_mat_long_ints; dims= 2 )
79+ end
6280end
6381
6482let group = addgroup! (group, " reductions" )
6583 let group = addgroup! (group, " reduce" )
66- group[" 1d" ] = @async_benchmarkable reduce (+ , $ gpu_vec)
67- group[" 2d" ] = @async_benchmarkable reduce (+ , $ gpu_mat; dims= 1 )
84+ let group = addgroup! (group, " Float32" )
85+ group[" 1d" ] = @async_benchmarkable reduce (+ , $ gpu_vec)
86+ group[" dims=1" ] = @async_benchmarkable reduce (+ , $ gpu_mat; dims= 1 )
87+ group[" dims=2" ] = @async_benchmarkable reduce (+ , $ gpu_mat; dims= 2 )
88+ group[" dims=1L" ] = @async_benchmarkable reduce (+ , $ gpu_mat_long; dims= 1 )
89+ group[" dims=2L" ] = @async_benchmarkable reduce (+ , $ gpu_mat_long; dims= 2 )
90+ end
91+ let group = addgroup! (group, " Int64" )
92+ group[" 1d" ] = @async_benchmarkable reduce (+ , $ gpu_vec_ints)
93+ group[" dims=1" ] = @async_benchmarkable reduce (+ , $ gpu_mat_ints; dims= 1 )
94+ group[" dims=2" ] = @async_benchmarkable reduce (+ , $ gpu_mat_ints; dims= 2 )
95+ group[" dims=1L" ] = @async_benchmarkable reduce (+ , $ gpu_mat_long_ints; dims= 1 )
96+ group[" dims=2L" ] = @async_benchmarkable reduce (+ , $ gpu_mat_long_ints; dims= 2 )
97+ end
6898 end
6999
70100 let group = addgroup! (group, " mapreduce" )
71- group[" 1d" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_vec)
72- group[" 2d" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat; dims= 1 )
101+ let group = addgroup! (group, " Float32" )
102+ group[" 1d" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_vec)
103+ group[" dims=1" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat; dims= 1 )
104+ group[" dims=2" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat; dims= 2 )
105+ group[" dims=1L" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_long; dims= 1 )
106+ group[" dims=2L" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_long; dims= 2 )
107+ end
108+ let group = addgroup! (group, " Int64" )
109+ group[" 1d" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_vec_ints)
110+ group[" dims=1" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_ints; dims= 1 )
111+ group[" dims=2" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_ints; dims= 2 )
112+ group[" dims=1L" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_long_ints; dims= 1 )
113+ group[" dims=2L" ] = @async_benchmarkable mapreduce (x-> x+ 1 , + , $ gpu_mat_long_ints; dims= 2 )
114+ end
73115 end
74116
75117 # used by sum, prod, minimum, maximum, all, any, count
@@ -88,12 +130,10 @@ let group = addgroup!(group, "random")
88130
89131 let group = addgroup! (group, " randn" )
90132 group[" Float32" ] = @async_benchmarkable CUDA. randn (Float32, m* n)
91- # group["Int64"] = @async_benchmarkable CUDA.randn(Int64, m*n)
92133 end
93134
94135 let group = addgroup! (group, " randn!" )
95136 group[" Float32" ] = @async_benchmarkable CUDA. randn! ($ gpu_vec)
96- # group["Int64"] = @async_benchmarkable CUDA.randn!($gpu_vec_ints)
97137 end
98138end
99139
0 commit comments