99#ifndef __OMPX_H
1010#define __OMPX_H
1111
12- #ifdef __AMDGCN_WAVEFRONT_SIZE
13- #define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
14- #else
15- #define __WARP_SIZE 32
16- #endif
17-
1812typedef unsigned long uint64_t;
1913
2014#ifdef __cplusplus
@@ -81,44 +75,28 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
8175 static inline RETTY ompx_##NAME(ARGS) { BODY; }
8276
8377_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
84- _Pragma("omp barrier"))
78+ _Pragma("omp barrier"));
8579_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
86- ompx_sync_block(ompx_acq_rel))
80+ ompx_sync_block(ompx_acq_rel));
8781_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
88- ompx_sync_block(Ordering))
82+ ompx_sync_block(Ordering));
8983#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
9084///}
9185
9286static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
9387 __builtin_trap();
9488}
9589
96- /// ompx_shfl_down_sync_{i,f,l,d}
97- ///{
98- #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \
99- static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \
100- unsigned delta, int width) { \
101- __builtin_trap(); \
102- }
103-
104- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
105- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
106- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
107- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
108-
109- #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
110- ///}
111-
11290#pragma omp end declare variant
11391
11492/// ompx_{sync_block}_{,divergent}
11593///{
11694#define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \
11795 RETTY ompx_##NAME(ARGS);
11896
119- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
120- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
121- _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
97+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering);
98+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void);
99+ _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering);
122100#undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
123101///}
124102
@@ -139,20 +117,6 @@ _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
139117
140118uint64_t ompx_ballot_sync(uint64_t mask, int pred);
141119
142- /// ompx_shfl_down_sync_{i,f,l,d}
143- ///{
144- #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
145- TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \
146- int width);
147-
148- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
149- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
150- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
151- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
152-
153- #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
154- ///}
155-
156120#ifdef __cplusplus
157121}
158122#endif
@@ -198,32 +162,16 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
198162 }
199163
200164_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
201- Ordering)
165+ Ordering);
202166_TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
203- int Ordering = acc_rel, Ordering)
167+ int Ordering = acc_rel, Ordering);
204168#undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
205169///}
206170
207171static inline uint64_t ballot_sync(uint64_t mask, int pred) {
208172 return ompx_ballot_sync(mask, pred);
209173}
210174
211- /// shfl_down_sync
212- ///{
213- #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
214- static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \
215- int width = __WARP_SIZE) { \
216- return ompx_shfl_down_sync_##TY(mask, var, delta, width); \
217- }
218-
219- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
220- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
221- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
222- _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
223-
224- #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
225- ///}
226-
227175} // namespace ompx
228176#endif
229177
0 commit comments