@@ -86,22 +86,35 @@ function transform_gpu!(def, constargs, force_inbounds)
8686 end
8787 end
8888 pushfirst! (def[:args ], :__ctx__ )
89- body = def[:body ]
89+ new_stmts = Expr[]
90+ body = MacroTools. flatten (def[:body ])
91+ stmts = body. args
92+ push! (new_stmts, Expr (:aliasscope ))
93+ push! (new_stmts, :(__active_lane__ = $ __validindex (__ctx__)))
9094 if force_inbounds
91- body = quote
92- @inbounds $ (body)
93- end
95+ push! (new_stmts, Expr (:inbounds , true ))
9496 end
95- body = quote
96- if $ __validindex (__ctx__)
97- $ (body)
97+
98+ # fix convergence
99+ active_stmts = Any[]
100+ for stmt in stmts
101+ push! (active_stmts, stmt)
102+ has_sync = find_sync (stmt)
103+ if has_sync
104+ push! (new_stmts, Expr (:if , :__active_lane__ , Expr (:block , active_stmts... )))
105+ empty! (active_stmts)
98106 end
99- return nothing
100107 end
108+ push! (new_stmts, Expr (:if , :__active_lane__ , Expr (:block , active_stmts... )))
109+ if force_inbounds
110+ push! (new_stmts, Expr (:inbounds , :pop ))
111+ end
112+ push! (new_stmts, Expr (:popaliasscope ))
113+ push! (new_stmts, :(return nothing ))
101114 def[:body ] = Expr (
102115 :let ,
103116 Expr (:block , let_constargs... ),
104- body ,
117+ Expr ( :block , new_stmts ... ) ,
105118 )
106119 return
107120end
0 commit comments