|
28 | 28 | # possible to get a value of 312, then we will have 2 separate shmem blocks, |
29 | 29 | # one from 1->256, and another from 256->512 |
30 | 30 | @uniform max_element = 1 |
| 31 | + # @print("tid=$tid, lid=$lid, gs=$gs, N=$N, max_element=$max_element\n") |
| 32 | + |
31 | 33 | for min_element in 1:gs:N |
32 | 34 |
|
33 | 35 | # Setting shared_histogram to 0 |
|
41 | 43 |
|
42 | 44 | # Defining bin on shared memory and writing to it if possible |
43 | 45 | bin = input[tid] |
| 46 | + win = bin |
| 47 | + # if lid == 5 |
| 48 | + # end |
44 | 49 | if bin >= min_element && bin < max_element |
45 | 50 | bin -= min_element - 1 |
| 51 | + @print("tid=$tid, lid=$lid, bin=$win, gs=$gs, N=$N, max_element=$max_element, min_element=$min_element, bin=$(bin)\n") |
46 | 52 | @atomic shared_histogram[bin] += 1 |
47 | 53 | end |
48 | 54 |
|
|
53 | 59 | end |
54 | 60 |
|
55 | 61 | end |
| 62 | + # @print("tid=$tid, lid=$lid, gs=$gs, N=$N, max_element=$max_element\n") |
56 | 63 |
|
57 | 64 | end |
58 | 65 |
|
59 | | -function histogram!(histogram_output, input) |
| 66 | +function histogram!(histogram_output, input, groupsize=256) |
60 | 67 | backend = get_backend(histogram_output) |
61 | 68 | # Need static block size |
62 | | - kernel! = histogram_kernel!(backend, (256,)) |
| 69 | + kernel! = histogram_kernel!(backend, (groupsize,)) |
| 70 | + @show kernel! |
| 71 | + @show size(input) |
63 | 72 | kernel!(histogram_output, input, ndrange = size(input)) |
64 | 73 | return |
65 | 74 | end |
|
73 | 82 |
|
74 | 83 | @testset "histogram tests" begin |
75 | 84 | # Use Int32 as some backends don't support 64-bit atomics |
76 | | - rand_input = Int32.(rand(1:128, 1000)) |
77 | | - linear_input = Int32.(rand(1:128, 1024)) |
78 | | - all_two = fill(Int32(2), 512) |
| 85 | + # rand_input = Int32.(rand(1:128, 1000)) |
| 86 | + rand_input = Int32.(rand(1:20, 20)) |
| 87 | + # linear_input = Int32.(rand(1:128, 1024)) |
| 88 | + # all_two = fill(Int32(2), 512) |
79 | 89 |
|
80 | 90 | histogram_rand_baseline = create_histogram(rand_input) |
81 | | - histogram_linear_baseline = create_histogram(linear_input) |
82 | | - histogram_two_baseline = create_histogram(all_two) |
| 91 | + # histogram_linear_baseline = create_histogram(linear_input) |
| 92 | + # histogram_two_baseline = create_histogram(all_two) |
83 | 93 |
|
84 | 94 | rand_input = move(backend, rand_input) |
85 | | - linear_input = move(backend, linear_input) |
86 | | - all_two = move(backend, all_two) |
| 95 | + # linear_input = move(backend, linear_input) |
| 96 | + # all_two = move(backend, all_two) |
87 | 97 |
|
88 | 98 | rand_histogram = KernelAbstractions.zeros(backend, eltype(rand_input), maximum(rand_input)) |
89 | | - linear_histogram = KernelAbstractions.zeros(backend, eltype(linear_input), maximum(linear_input)) |
90 | | - two_histogram = KernelAbstractions.zeros(backend, eltype(all_two), maximum(all_two)) |
| 99 | + # linear_histogram = KernelAbstractions.zeros(backend, eltype(linear_input), maximum(linear_input)) |
| 100 | + # two_histogram = KernelAbstractions.zeros(backend, eltype(all_two), maximum(all_two)) |
91 | 101 |
|
92 | | - histogram!(rand_histogram, rand_input) |
93 | | - histogram!(linear_histogram, linear_input) |
94 | | - histogram!(two_histogram, all_two) |
| 102 | + histogram!(rand_histogram, rand_input, 9) |
| 103 | + # histogram!(linear_histogram, linear_input) |
| 104 | + # histogram!(two_histogram, all_two) |
95 | 105 | KernelAbstractions.synchronize(backend) |
96 | 106 |
|
| 107 | + @show sum(Array(rand_histogram)) |
| 108 | + @show sum(histogram_rand_baseline) |
| 109 | + @show findall(Array(rand_histogram) .!= histogram_rand_baseline) |
| 110 | + |
97 | 111 | @test isapprox(Array(rand_histogram), histogram_rand_baseline) |
98 | | - @test isapprox(Array(linear_histogram), histogram_linear_baseline) |
99 | | - @test isapprox(Array(two_histogram), histogram_two_baseline) |
| 112 | + # @test isapprox(Array(linear_histogram), histogram_linear_baseline) |
| 113 | + # @test isapprox(Array(two_histogram), histogram_two_baseline) |
100 | 114 | end |
0 commit comments