@@ -12,6 +12,7 @@ namespace hlsl
1212namespace prefix_sum_blur
1313{
1414
15+ // Requires an *inclusive* prefix sum
1516template<typename PrefixSumAccessor, typename T>
1617struct BoxSampler
1718{
@@ -20,7 +21,6 @@ struct BoxSampler
2021 PrefixSumAccessor prefixSumAccessor;
2122 uint16_t wrapMode;
2223 uint16_t linearSize;
23- T normalizationFactor;
2424
2525 T operator ()(float32_t ix, float32_t radius, float32_t borderColor)
2626 {
@@ -33,7 +33,8 @@ struct BoxSampler
3333 const int32_t leftFlIdx = (int32_t)floor (leftIdx);
3434 const int32_t leftClIdx = (int32_t)ceil (leftIdx);
3535
36- assert (linearSize > 1 );
36+ assert (linearSize > 1 && radius >= 0 );
37+ assert (borderColor >= 0 && borderColor <= 1 );
3738
3839 T result = 0 ;
3940 if (rightClIdx < linearSize)
@@ -45,10 +46,15 @@ struct BoxSampler
4546 switch (wrapMode) {
4647 case ETC_REPEAT:
4748 {
49+ const uint32_t flooredMod = rightFlIdx % linearSize;
50+ const uint32_t ceiledMod = rightClIdx % linearSize;
4851 const T last = prefixSumAccessor.template get<T, uint32_t>(lastIdx);
49- const T floored = prefixSumAccessor.template get<T, uint32_t>(rightFlIdx % linearSize) + last;
50- const T ceiled = prefixSumAccessor.template get<T, uint32_t>(rightClIdx % linearSize) + last;
51- result += lerp (floored, ceiled, alpha);
52+ const T periodicOffset = (T (rightFlIdx) / linearSize) * last;
53+ const T floored = prefixSumAccessor.template get<T, uint32_t>(flooredMod);
54+ T ceiled = prefixSumAccessor.template get<T, uint32_t>(ceiledMod);
55+ if (flooredMod == lastIdx && ceiledMod == 0 )
56+ ceiled += last;
57+ result += lerp (floored, ceiled, alpha) + periodicOffset;
5258 break ;
5359 }
5460 case ETC_CLAMP_TO_BORDER:
@@ -114,10 +120,15 @@ struct BoxSampler
114120 switch (wrapMode) {
115121 case ETC_REPEAT:
116122 {
123+ const uint32_t flooredMod = (linearSize + leftFlIdx) % linearSize;
124+ const uint32_t ceiledMod = (linearSize + leftClIdx) % linearSize;
117125 const T last = prefixSumAccessor.template get<T, uint32_t>(lastIdx);
118- const T floored = prefixSumAccessor.template get<T, uint32_t>((lastIdx + leftFlIdx) % linearSize) + floor (T (leftFlIdx) / linearSize) * last;
119- const T ceiled = prefixSumAccessor.template get<T, uint32_t>((lastIdx + leftClIdx) % linearSize) + floor (T (leftClIdx) / linearSize) * last;
120- result -= lerp (floored, ceiled, alpha);
126+ const T periodicOffset = (T (linearSize + leftClIdx) / T (linearSize)) * last;
127+ const T floored = prefixSumAccessor.template get<T, uint32_t>(flooredMod);
128+ T ceiled = prefixSumAccessor.template get<T, uint32_t>(ceiledMod);
129+ if (flooredMod == lastIdx && ceiledMod == 0 )
130+ ceiled += last;
131+ result -= lerp (floored, ceiled, alpha) - periodicOffset;
121132 break ;
122133 }
123134 case ETC_CLAMP_TO_BORDER:
@@ -127,36 +138,36 @@ struct BoxSampler
127138 }
128139 case ETC_CLAMP_TO_EDGE:
129140 {
130- result -= (1 - abs (leftIdx) ) * prefixSumAccessor.template get<T, uint32_t>(0 );
141+ result -= (leftIdx + 1 ) * prefixSumAccessor.template get<T, uint32_t>(0 );
131142 break ;
132143 }
133144 case ETC_MIRROR:
134145 {
135146 const T last = prefixSumAccessor.template get<T, uint32_t>(lastIdx);
136147 T floored, ceiled;
137148
138- if (abs (leftFlIdx + 1 ) % (2 * linearSize) == 0 )
139- floored = -(abs (leftFlIdx + 1 ) / linearSize) * last;
149+ if (abs (leftFlIdx) % (2 * linearSize) == 0 )
150+ floored = -(abs (leftFlIdx) / linearSize) * last;
140151 else
141152 {
142- const uint32_t period = uint32_t (ceil (float32_t (abs (leftFlIdx + 1 )) / linearSize));
153+ const uint32_t period = uint32_t (ceil (float32_t (abs (leftFlIdx)) / linearSize));
143154 if ((period & 0x1u) == 1 )
144- floored = -(period - 1 ) * last - prefixSumAccessor.template get<T, uint32_t>((abs (leftFlIdx + 1 ) - 1 ) % linearSize);
155+ floored = -(period - 1 ) * last - prefixSumAccessor.template get<T, uint32_t>((abs (leftFlIdx) - 1 ) % linearSize);
145156 else
146- floored = -(period - 1 ) * last - (last - prefixSumAccessor.template get<T, uint32_t>(( leftFlIdx + 1 ) % linearSize - 1 ));
157+ floored = -(period - 1 ) * last - (last - prefixSumAccessor.template get<T, uint32_t>(leftFlIdx % linearSize - 1 ));
147158 }
148159
149160 if (leftClIdx == 0 ) // Special case, wouldn't be possible for `floored` above
150161 ceiled = 0 ;
151- else if (abs (leftClIdx + 1 ) % (2 * linearSize) == 0 )
152- ceiled = -(abs (leftClIdx + 1 ) / linearSize) * last;
162+ else if (abs (leftClIdx) % (2 * linearSize) == 0 )
163+ ceiled = -(abs (leftClIdx) / linearSize) * last;
153164 else
154165 {
155- const uint32_t period = uint32_t (ceil (float32_t (abs (leftClIdx + 1 )) / linearSize));
166+ const uint32_t period = uint32_t (ceil (float32_t (abs (leftClIdx)) / linearSize));
156167 if ((period & 0x1u) == 1 )
157- ceiled = -(period - 1 ) * last - prefixSumAccessor.template get<T, uint32_t>((abs (leftClIdx + 1 ) - 1 ) % linearSize);
168+ ceiled = -(period - 1 ) * last - prefixSumAccessor.template get<T, uint32_t>((abs (leftClIdx) - 1 ) % linearSize);
158169 else
159- ceiled = -(period - 1 ) * last - (last - prefixSumAccessor.template get<T, uint32_t>(( leftClIdx + 1 ) % linearSize - 1 ));
170+ ceiled = -(period - 1 ) * last - (last - prefixSumAccessor.template get<T, uint32_t>(leftClIdx % linearSize - 1 ));
160171 }
161172
162173 result -= lerp (floored, ceiled, alpha);
@@ -166,13 +177,13 @@ struct BoxSampler
166177 {
167178 const T last = prefixSumAccessor.template get<T, uint32_t>(lastIdx);
168179 const T lastMinusOne = prefixSumAccessor.template get<T, uint32_t>(lastIdx - 1 );
169- result -= (1 - abs (leftIdx) ) * (last - lastMinusOne);
180+ result -= (leftIdx + 1 ) * (last - lastMinusOne);
170181 break ;
171182 }
172183 }
173184 }
174185
175- return result * normalizationFactor ;
186+ return result / ( 2 * radius + 1 ) ;
176187 }
177188};
178189
0 commit comments