@@ -31,19 +31,6 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
3131 constargs[i] = false
3232 end
3333
34- # create two functions
35- # 1. GPU function
36- # 2. CPU function with work-group loops inserted
37- #
38- # Without the deepcopy we might accidentially modify expr shared between CPU and GPU
39- cpu_name = Symbol (:cpu_ , name)
40- if generate_cpu
41- def_cpu = deepcopy (def)
42- def_cpu[:name ] = cpu_name
43- transform_cpu! (def_cpu, constargs, force_inbounds)
44- cpu_function = combinedef (def_cpu)
45- end
46-
4734 def_gpu = deepcopy (def)
4835 def_gpu[:name ] = gpu_name = Symbol (:gpu_ , name)
4936 transform_gpu! (def_gpu, constargs, force_inbounds)
@@ -56,24 +43,12 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
5643 $ name (dev, size) = $ name (dev, $ StaticSize (size), $ DynamicSize ())
5744 $ name (dev, size, range) = $ name (dev, $ StaticSize (size), $ StaticSize (range))
5845 function $name (dev:: Dev , sz:: S , range:: NDRange ) where {Dev, S <: $_Size , NDRange <: $_Size }
59- if $ isgpu (dev)
60- return $ construct (dev, sz, range, $ gpu_name)
61- else
62- if $ generate_cpu
63- return $ construct (dev, sz, range, $ cpu_name)
64- else
65- error (" This kernel is unavailable for backend CPU" )
66- end
67- end
46+ return $ construct (dev, sz, range, $ gpu_name)
6847 end
6948 end
7049 end
7150
72- if generate_cpu
73- return Expr (:block , esc (cpu_function), esc (gpu_function), esc (constructors))
74- else
75- return Expr (:block , esc (gpu_function), esc (constructors))
76- end
51+ return Expr (:block , esc (gpu_function), esc (constructors))
7752end
7853
7954# The easy case, transform the function for GPU execution
@@ -94,42 +69,7 @@ function transform_gpu!(def, constargs, force_inbounds)
9469 if force_inbounds
9570 push! (new_stmts, Expr (:inbounds , true ))
9671 end
97- append! (new_stmts, split (emit_gpu, body. args))
98- if force_inbounds
99- push! (new_stmts, Expr (:inbounds , :pop ))
100- end
101- push! (new_stmts, Expr (:popaliasscope ))
102- push! (new_stmts, :(return nothing ))
103- def[:body ] = Expr (
104- :let ,
105- Expr (:block , let_constargs... ),
106- Expr (:block , new_stmts... ),
107- )
108- return
109- end
110-
111- # The hard case, transform the function for CPU execution
112- # - mark constant arguments by applying `constify`.
113- # - insert aliasscope markers
114- # - insert implied loop bodys
115- # - handle indicies
116- # - hoist workgroup definitions
117- # - hoist uniform variables
118- function transform_cpu! (def, constargs, force_inbounds)
119- let_constargs = Expr[]
120- for (i, arg) in enumerate (def[:args ])
121- if constargs[i]
122- push! (let_constargs, :($ arg = $ constify ($ arg)))
123- end
124- end
125- pushfirst! (def[:args ], :__ctx__ )
126- new_stmts = Expr[]
127- body = MacroTools. flatten (def[:body ])
128- push! (new_stmts, Expr (:aliasscope ))
129- if force_inbounds
130- push! (new_stmts, Expr (:inbounds , true ))
131- end
132- append! (new_stmts, split (emit_cpu, body. args))
72+ append! (new_stmts, split (body. args))
13373 if force_inbounds
13474 push! (new_stmts, Expr (:inbounds , :pop ))
13575 end
169109
170110# TODO proper handling of LineInfo
171111function split (
172- emit,
173112 stmts,
174113 indicies = Any[], private = Set {Symbol} (),
175114 )
@@ -249,62 +188,7 @@ function split(
249188 return new_stmts
250189end
251190
252- function emit_cpu (loop)
253- idx = gensym (:I )
254- for stmt in loop. indicies
255- # splice index into the i = @index(Cartesian, $idx)
256- @assert stmt. head === :(= )
257- rhs = stmt. args[2 ]
258- push! (rhs. args, idx)
259- end
260- stmts = Any[]
261- append! (stmts, loop. allocations)
262-
263- # private_allocations turn into lhs = ntuple(i->rhs, length(__workitems_iterspace()))
264- N = gensym (:N )
265- push! (stmts, :($ N = length ($ __workitems_iterspace (__ctx__))))
266-
267- for stmt in loop. private_allocations
268- if @capture (stmt, lhs_ = rhs_)
269- push! (stmts, :($ lhs = ntuple (_ -> $ rhs, $ N)))
270- else
271- error (" @private $stmt not an assignment" )
272- end
273- end
274-
275- # don't emit empty loops
276- if ! (isempty (loop. stmts) || all (s -> s isa LineNumberNode, loop. stmts))
277- body = Expr (:block , loop. stmts... )
278- body = postwalk (body) do expr
279- if @capture (expr, lhs_ = rhs_)
280- if lhs in loop. private
281- error (" Can't assign to variables marked private" )
282- end
283- elseif @capture (expr, A_[i__])
284- if A in loop. private
285- return :($ A[$ __index_Local_Linear (__ctx__, $ (idx))][$ (i... )])
286- end
287- elseif expr isa Symbol
288- if expr in loop. private
289- return :($ expr[$ __index_Local_Linear (__ctx__, $ (idx))])
290- end
291- end
292- return expr
293- end
294- loopexpr = quote
295- for $ idx in $ __workitems_iterspace (__ctx__)
296- $ __validindex (__ctx__, $ idx) || continue
297- $ (loop. indicies... )
298- $ (unblock (body))
299- end
300- end
301- push! (stmts, loopexpr)
302- end
303-
304- return unblock (Expr (:block , stmts... ))
305- end
306-
307- function emit_gpu (loop)
191+ function emit (loop)
308192 stmts = Any[]
309193 append! (stmts, loop. allocations)
310194 for stmt in loop. private_allocations
0 commit comments