Skip to content

Commit f1b7d17

Browse files
committed
Add median_luma_meter
1 parent ce2ca41 commit f1b7d17

File tree

1 file changed

+145
-0
lines changed

1 file changed

+145
-0
lines changed

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,151 @@ struct geom_meter {
141141
float_t sampleCount;
142142
float_t2 lumaMinMax;
143143
};
144+
145+
template<uint32_t GroupSize, uint16_t BinCount, typename HistogramAccessor, typename SharedAccessor, typename TexAccessor>
146+
struct median_meter {
147+
using int_t = typename SharedAccessor::type;
148+
using float_t = float32_t;
149+
using float_t2 = typename conditional<is_same_v<float_t, float32_t>, float32_t2, float16_t2>::type;
150+
using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
151+
using this_t = median_meter<GroupSize, BinCount, HistogramAccessor, SharedAccessor, TexAccessor>;
152+
153+
static this_t create(float_t2 lumaMinMax, float_t sampleCount) {
154+
this_t retval;
155+
retval.lumaMinMax = lumaMinMax;
156+
retval.sampleCount = sampleCount;
157+
return retval;
158+
}
159+
160+
int_t inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) {
161+
return workgroup::inclusive_scan < plus < int_t >, GroupSize >::
162+
template __call <SharedAccessor>(value, sdata);
163+
}
164+
165+
float_t computeLuma(
166+
NBL_CONST_REF_ARG(MeteringWindow) window,
167+
NBL_REF_ARG(TexAccessor) tex,
168+
float_t2 shiftedCoord
169+
) {
170+
float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset;
171+
float_t3 color = tex.get(uvPos);
172+
float_t luma = (float_t)TexAccessor::toXYZ(color);
173+
174+
return clamp(luma, lumaMinMax.x, lumaMinMax.y);
175+
}
176+
177+
int_t float2Int(
178+
float_t val,
179+
float_t minLog2,
180+
float_t rangeLog2
181+
) {
182+
uint32_t3 workGroupCount = glsl::gl_NumWorkGroups();
183+
uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();
184+
185+
return int_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1)));
186+
}
187+
188+
float_t int2Float(
189+
int_t val,
190+
float_t minLog2,
191+
float_t rangeLog2
192+
) {
193+
return val / rangeLog2 + minLog2;
194+
}
195+
196+
void sampleLuma(
197+
NBL_CONST_REF_ARG(MeteringWindow) window,
198+
NBL_REF_ARG(HistogramAccessor) histo,
199+
NBL_REF_ARG(TexAccessor) tex,
200+
NBL_REF_ARG(SharedAccessor) sdata,
201+
float_t2 tileOffset,
202+
float_t2 viewportSize
203+
) {
204+
uint32_t tid = workgroup::SubgroupContiguousIndex();
205+
206+
for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) {
207+
sdata.set(vid, 0);
208+
}
209+
210+
sdata.workgroupExecutionAndMemoryBarrier();
211+
212+
uint32_t2 coord = {
213+
morton2d_decode_x(tid),
214+
morton2d_decode_y(tid)
215+
};
216+
217+
float_t luma = 0.0f;
218+
float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
219+
luma = computeLuma(window, tex, shiftedCoord);
220+
221+
float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount;
222+
uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize);
223+
224+
sdata.atomicAdd(binIndex, float2Int(luma, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
225+
226+
sdata.workgroupExecutionAndMemoryBarrier();
227+
228+
float_t histogram_value;
229+
sdata.get(tid, histogram_value);
230+
231+
sdata.workgroupExecutionAndMemoryBarrier();
232+
233+
float_t sum = inclusive_scan(histogram_value, sdata);
234+
histo.atomicAdd(tid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
235+
236+
const bool is_last_wg_invocation = tid == (GroupSize - 1);
237+
const static uint32_t RoundedBinCount = 1 + (BinCount - 1) / GroupSize;
238+
239+
for (int i = 1; i < RoundedBinCount; i++) {
240+
uint32_t keyBucketStart = GroupSize * i;
241+
uint32_t vid = tid + keyBucketStart;
242+
243+
// no if statement about the last iteration needed
244+
if (is_last_wg_invocation) {
245+
float_t beforeSum;
246+
sdata.get(keyBucketStart, beforeSum);
247+
sdata.set(keyBucketStart, beforeSum + sum);
248+
}
249+
250+
// propagate last block tail to next block head and protect against subsequent scans stepping on each other's toes
251+
sdata.workgroupExecutionAndMemoryBarrier();
252+
253+
// no aliasing anymore
254+
float_t atVid;
255+
sdata.get(vid, atVid);
256+
sum = inclusive_scan(atVid, sdata);
257+
if (vid < BinCount) {
258+
histo.atomicAdd(vid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
259+
}
260+
}
261+
}
262+
263+
float_t gatherLuma(
264+
NBL_REF_ARG(HistogramAccessor) histo,
265+
NBL_REF_ARG(SharedAccessor) sdata
266+
) {
267+
uint32_t tid = workgroup::SubgroupContiguousIndex();
268+
269+
for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) {
270+
sdata.set(
271+
vid,
272+
histo.get(vid & (BinCount - 1))
273+
);
274+
}
275+
276+
sdata.workgroupExecutionAndMemoryBarrier();
277+
278+
uint32_t percentile40, percentile60;
279+
sdata.get(BinCount * 0.4, percentile40);
280+
sdata.get(BinCount * 0.6, percentile60);
281+
282+
return (int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2;
283+
}
284+
285+
float_t sampleCount;
286+
float_t2 lumaMinMax;
287+
};
288+
144289
}
145290
}
146291
}

0 commit comments

Comments
 (0)