@@ -22,17 +22,34 @@ namespace hlsl
2222namespace subgroup
2323{
2424
25- namespace native
25+ namespace impl
2626{
27-
28- template<class Binop, typename T=typename Binop::type_t>
27+ template<class Binop, bool native> // might need a 3rd default param `typename T=typename Binop::type_t`
2928struct reduction;
30- template<class Binop, typename T=typename Binop::type_t >
29+ template<class Binop, bool native >
3130struct inclusive_scan;
32- template<class Binop, typename T=typename Binop::type_t >
31+ template<class Binop, bool native >
3332struct exclusive_scan;
3433
35- #define SPECIALIZE (NAME,BINOP,SUBGROUP_OP) template<typename T> struct NAME<BINOP<T>,T> \
34+ // native
35+ template<class Binop>
36+ struct reduction<Binop, true >;
37+ template<class Binop>
38+ struct inclusive_scan<Binop, true >;
39+ template<class Binop>
40+ struct exclusive_scan<Binop, true >;
41+
42+ // portability
43+ template<class Binop>
44+ struct reduction<Binop, false >;
45+ template<class Binop>
46+ struct inclusive_scan<Binop, false >;
47+ template<class Binop>
48+ struct exclusive_scan<Binop, false >;
49+
50+ // specialize native
51+
52+ #define SPECIALIZE (NAME,BINOP,SUBGROUP_OP) template<typename T> struct NAME<BINOP<T>,true > \
3653{ \
3754 using type_t = T; \
3855 \
@@ -56,19 +73,16 @@ SPECIALIZE_ALL(maximum,Max);
5673#undef SPECIALIZE_ALL
5774#undef SPECIALIZE
5875
59- }
76+ // specialize portability
6077
61- namespace portability
62- {
63-
6478// WARNING
6579// THIS PORTABILITY IMPLEMENTATION USES SHUFFLE OPS
6680// Shuffles where you attempt to read an invactive lane, return garbage,
6781// which means that our portability reductions and prefix sums will also return garbage/UB/UV
6882// Always use the native subgroup_arithmetic extensions if supported
6983
7084template<class Binop>
71- struct inclusive_scan
85+ struct inclusive_scan<Binop, false >
7286{
7387 using type_t = typename Binop::type_t;
7488
@@ -97,13 +111,13 @@ struct inclusive_scan
97111};
98112
99113template<class Binop>
100- struct exclusive_scan
114+ struct exclusive_scan<Binop, false >
101115{
102116 using type_t = typename Binop::type_t;
103117
104118 type_t operator ()(type_t value)
105119 {
106- value = inclusive_scan<Binop>::__call (value);
120+ value = inclusive_scan<Binop, false >::__call (value);
107121 // can't risk getting short-circuited, need to store to a var
108122 type_t left = glsl::subgroupShuffleUp<type_t>(value,1 );
109123 // the first invocation doesn't have anything in its left so we set to the binop's identity value for exlusive scan
@@ -112,16 +126,17 @@ struct exclusive_scan
112126};
113127
114128template<class Binop>
115- struct reduction
129+ struct reduction<Binop, false >
116130{
117131 using type_t = typename Binop::type_t;
118132
119133 type_t operator ()(NBL_CONST_REF_ARG (type_t) value)
120134 {
121135 // take the last subgroup invocation's value for the reduction
122- return BroadcastLast<type_t>(inclusive_scan<Binop>::__call (value));
136+ return BroadcastLast<type_t>(inclusive_scan<Binop, false >::__call (value));
123137 }
124138};
139+
125140}
126141
127142}
0 commit comments