@@ -5,23 +5,13 @@ module accelerated_module
55 private
66 public :: co_dot_accelerated
77 public :: co_dot_unaccelerated
8- public :: co_dot_manually_accelerated
98 public :: co_dot_mapped_manually_accelerated
109 public :: CUDA,OpenACC,OpenMP
1110 public :: walltime
1211
1312 ! Explicit interfaces for procedures that wrap accelerated kernels
1413 interface
1514
16- ! This is the wrapper a programmer would have to write today to manually accelerate calculations
17- subroutine manual_cudaDot (a ,b ,partial_dot ,n ,img ) bind(C, name= " manual_cudaDot" )
18- use iso_c_binding, only : c_float,c_int
19- real (c_float) :: a(* ),b(* )
20- real (c_float) :: partial_dot
21- integer (c_int),value :: n
22- integer (c_int),value :: img
23- end subroutine
24-
2515 subroutine manual_mapped_cudaDot (a ,b ,partial_dot ,n ,img ) bind(C, name= " manual_mapped_cudaDot" )
2616 use iso_c_binding, only : c_float,c_int
2717 real (c_float) :: a(* ),b(* )
@@ -59,14 +49,6 @@ subroutine co_dot_unaccelerated(x,y,x_dot_y)
5949 call co_sum(x_dot_y) ! Call Fortarn 2015 collective sum
6050 end subroutine
6151
62- ! This parallel collective dot product uses manual acceleration
63- subroutine co_dot_manually_accelerated (x ,y ,x_dot_y )
64- real (c_float), intent (in ) :: x(:),y(:)
65- real (c_float), intent (out ) :: x_dot_y
66- call manual_cudaDot(x,y,x_dot_y,size (x),this_image()- 1 )
67- call co_sum(x_dot_y) ! Call Fortarn 2015 collective sum
68- end subroutine
69-
7052 subroutine co_dot_mapped_manually_accelerated (x ,y ,x_dot_y )
7153 real (c_float), intent (in ) :: x(:),y(:)
7254 real (c_float), intent (out ) :: x_dot_y
@@ -106,7 +88,7 @@ program cu_dot_test
10688 real (c_float) :: dot
10789 real (c_double) :: t_start, t_end
10890
109- ! Compiler/library -accelerated variables
91+ ! Library -accelerated variables
11092 real (c_float), allocatable :: a_acc(:)[:], b_acc(:)[:]
11193 real (c_float) :: dot_acc[* ]
11294
@@ -127,7 +109,7 @@ program cu_dot_test
127109 sync all
128110
129111 block
130- ! use accelerated_module, only : co_dot_accelerated,co_dot_unaccelerated,co_dot_manually_accelerated, CUDA,walltime,co_dot_mapped_manually_accelerated
112+ ! use accelerated_module, only : co_dot_accelerated,co_dot_unaccelerated,CUDA,walltime,co_dot_mapped_manually_accelerated
131113 use accelerated_module
132114
133115 ! Parallel execution
@@ -138,13 +120,6 @@ program cu_dot_test
138120
139121 sync all
140122
141- t_start = walltime()
142- call co_dot_manually_accelerated(a_man,b_man,dot_man)
143- t_end = walltime()
144- if (me== 1 ) print * , ' Manually accelerated dot_prod' ,dot_man,' time:' ,t_end- t_start
145-
146- sync all
147-
148123 ! Serial execution
149124 t_start = walltime()
150125 call co_dot_unaccelerated(a_man,b_man,dot)
0 commit comments