@@ -36,13 +36,13 @@ struct geom_meter {
3636 return retval;
3737 }
3838
39- float_t reduction (float_t value, NBL_REF_ARG (SharedAccessor) sdata)
39+ float_t __reduction (float_t value, NBL_REF_ARG (SharedAccessor) sdata)
4040 {
4141 return workgroup::reduction < plus < float_t >, GroupSize >::
4242 template __call <SharedAccessor>(value, sdata);
4343 }
4444
45- float_t computeLumaLog2 (
45+ float_t __computeLumaLog2 (
4646 NBL_CONST_REF_ARG (MeteringWindow) window,
4747 NBL_REF_ARG (TexAccessor) tex,
4848 float_t2 shiftedCoord
@@ -54,26 +54,26 @@ struct geom_meter {
5454
5555 luma = clamp (luma, lumaMinMax.x, lumaMinMax.y);
5656
57- return max ( log2 (luma), log2 (lumaMinMax.x) );
57+ return log2 (luma);
5858 }
5959
60- void uploadFloat (
60+ void __uploadFloat (
6161 NBL_REF_ARG (ValueAccessor) val_accessor,
62- uint32_t index,
6362 float_t val,
6463 float_t minLog2,
6564 float_t rangeLog2
6665 )
6766 {
6867 uint32_t3 workGroupCount = glsl::gl_NumWorkGroups ();
68+ uint32_t workgroupIndex = (workGroupCount.x * workGroupCount.y * workGroupCount.z) / 64 ;
6969 uint32_t fixedPointBitsLeft = 32 - uint32_t (ceil (log2 (workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2 ();
7070
7171 uint32_t lumaSumBitPattern = uint32_t (clamp ((val - minLog2) * rangeLog2, 0.f , float32_t ((1 << fixedPointBitsLeft) - 1 )));
7272
73- val_accessor.atomicAdd (index & ((1 << glsl::gl_SubgroupSizeLog2 ()) - 1 ), lumaSumBitPattern);
73+ val_accessor.atomicAdd (workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2 ()) - 1 ), lumaSumBitPattern);
7474 }
7575
76- float_t downloadFloat (
76+ float_t __downloadFloat (
7777 NBL_REF_ARG (ValueAccessor) val_accessor,
7878 uint32_t index,
7979 float_t minLog2,
@@ -101,17 +101,13 @@ struct geom_meter {
101101
102102 float_t luma = 0.0f ;
103103 float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
104- luma = computeLumaLog2 (window, tex, shiftedCoord);
105- float_t lumaSum = reduction (luma, sdata);
106-
107- if (tid == GroupSize - 1 ) {
108- uint32_t3 workgroupCount = glsl::gl_NumWorkGroups ();
109- uint32_t workgroupIndex = (workgroupCount.x * workgroupCount.y * workgroupCount.z) / 64 ;
104+ float_t lumaLog2 = __computeLumaLog2 (window, tex, shiftedCoord);
105+ float_t lumaLog2Sum = __reduction (lumaLog2, sdata);
110106
111- uploadFloat (
107+ if (tid == 0 ) {
108+ __uploadFloat (
112109 val,
113- workgroupIndex,
114- lumaSum,
110+ lumaLog2Sum,
115111 log2 (lumaMinMax.x),
116112 log2 (lumaMinMax.y / lumaMinMax.x)
117113 );
@@ -124,7 +120,7 @@ struct geom_meter {
124120 {
125121 uint32_t tid = glsl::gl_SubgroupInvocationID ();
126122 float_t luma = glsl::subgroupAdd (
127- downloadFloat (
123+ __downloadFloat (
128124 val,
129125 tid,
130126 log2 (lumaMinMax.x),
@@ -150,19 +146,18 @@ struct median_meter {
150146 using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
151147 using this_t = median_meter<GroupSize, BinCount, HistogramAccessor, SharedAccessor, TexAccessor>;
152148
153- static this_t create (float_t2 lumaMinMax, float_t sampleCount ) {
149+ static this_t create (float_t2 lumaMinMax) {
154150 this_t retval;
155151 retval.lumaMinMax = lumaMinMax;
156- retval.sampleCount = sampleCount;
157152 return retval;
158153 }
159154
160- int_t inclusive_scan (float_t value, NBL_REF_ARG (SharedAccessor) sdata) {
155+ int_t __inclusive_scan (float_t value, NBL_REF_ARG (SharedAccessor) sdata) {
161156 return workgroup::inclusive_scan < plus < int_t >, GroupSize >::
162157 template __call <SharedAccessor>(value, sdata);
163158 }
164159
165- float_t computeLuma (
160+ float_t __computeLuma (
166161 NBL_CONST_REF_ARG (MeteringWindow) window,
167162 NBL_REF_ARG (TexAccessor) tex,
168163 float_t2 shiftedCoord
@@ -174,7 +169,7 @@ struct median_meter {
174169 return clamp (luma, lumaMinMax.x, lumaMinMax.y);
175170 }
176171
177- int_t float2Int (
172+ int_t __float2Int (
178173 float_t val,
179174 float_t minLog2,
180175 float_t rangeLog2
@@ -185,7 +180,7 @@ struct median_meter {
185180 return int_t (clamp ((val - minLog2) * rangeLog2, 0.f , float32_t ((1 << fixedPointBitsLeft) - 1 )));
186181 }
187182
188- float_t int2Float (
183+ float_t __int2Float (
189184 int_t val,
190185 float_t minLog2,
191186 float_t rangeLog2
@@ -216,7 +211,7 @@ struct median_meter {
216211
217212 float_t luma = 0.0f ;
218213 float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
219- luma = computeLuma (window, tex, shiftedCoord);
214+ luma = __computeLuma (window, tex, shiftedCoord);
220215
221216 float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount;
222217 uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize);
@@ -255,7 +250,7 @@ struct median_meter {
255250 sdata.get (vid, atVid);
256251 sum = inclusive_scan (atVid, sdata);
257252 if (vid < BinCount) {
258- histo.atomicAdd (vid, float2Int (sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
253+ histo.atomicAdd (vid, __float2Int (sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
259254 }
260255 }
261256 }
@@ -279,10 +274,9 @@ struct median_meter {
279274 sdata.get (BinCount * 0.4 , percentile40);
280275 sdata.get (BinCount * 0.6 , percentile60);
281276
282- return (int2Float (percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + int2Float (percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2 ;
277+ return (__int2Float (percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + __int2Float (percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2 ;
283278 }
284279
285- float_t sampleCount;
286280 float_t2 lumaMinMax;
287281};
288282
0 commit comments