99#ifndef __OMPX_H
1010#define __OMPX_H
1111
12+ #ifdef __AMDGCN_WAVEFRONT_SIZE
13+ #define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
14+ #else
15+ #define __WARP_SIZE 32
16+ #endif
17+
1218typedef unsigned long uint64_t;
1319
1420#ifdef __cplusplus
@@ -75,28 +81,44 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
7581 static inline RETTY ompx_##NAME(ARGS) { BODY; }
7682
7783_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
78- _Pragma("omp barrier"));
84+ _Pragma("omp barrier"))
7985_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
80- ompx_sync_block(ompx_acq_rel));
86+ ompx_sync_block(ompx_acq_rel))
8187_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
82- ompx_sync_block(Ordering));
88+ ompx_sync_block(Ordering))
8389#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
8490///}
8591
8692static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
8793 __builtin_trap();
8894}
8995
96+ /// ompx_shfl_down_sync_{i,f,l,d}
97+ ///{
98+ #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \
99+ static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \
100+ unsigned delta, int width) { \
101+ __builtin_trap(); \
102+ }
103+
104+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
105+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
106+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
107+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
108+
109+ #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
110+ ///}
111+
90112#pragma omp end declare variant
91113
92114/// ompx_{sync_block}_{,divergent}
93115///{
94116#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \
95117 RETTY ompx_##NAME(ARGS);
96118
97- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering);
98- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void);
99- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering);
119+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
120+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
121+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
100122#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
101123///}
102124
@@ -117,6 +139,20 @@ _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
117139
118140uint64_t ompx_ballot_sync(uint64_t mask, int pred);
119141
142+ /// ompx_shfl_down_sync_{i,f,l,d}
143+ ///{
144+ #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
145+ TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \
146+ int width);
147+
148+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
149+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
150+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
151+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
152+
153+ #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
154+ ///}
155+
120156#ifdef __cplusplus
121157}
122158#endif
@@ -162,16 +198,32 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
162198 }
163199
164200_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
165- Ordering);
201+ Ordering)
166202_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
167- int Ordering = acc_rel, Ordering);
203+ int Ordering = acc_rel, Ordering)
168204#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
169205///}
170206
171207static inline uint64_t ballot_sync(uint64_t mask, int pred) {
172208 return ompx_ballot_sync(mask, pred);
173209}
174210
211+ /// shfl_down_sync
212+ ///{
213+ #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
214+ static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \
215+ int width = __WARP_SIZE) { \
216+ return ompx_shfl_down_sync_##TY(mask, var, delta, width); \
217+ }
218+
219+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
220+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
221+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
222+ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
223+
224+ #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
225+ ///}
226+
175227} // namespace ompx
176228#endif
177229
0 commit comments