Skip to content

Commit 2b5e502

Browse files
committed
Make changes to luma_meter
1 parent 83ac633 commit 2b5e502

File tree

2 files changed

+34
-34
lines changed

2 files changed

+34
-34
lines changed

include/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ struct geom_meter {
3636
return retval;
3737
}
3838

39-
float_t reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata)
39+
float_t __reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata)
4040
{
4141
return workgroup::reduction < plus < float_t >, GroupSize >::
4242
template __call <SharedAccessor>(value, sdata);
4343
}
4444

45-
float_t computeLumaLog2(
45+
float_t __computeLumaLog2(
4646
NBL_CONST_REF_ARG(MeteringWindow) window,
4747
NBL_REF_ARG(TexAccessor) tex,
4848
float_t2 shiftedCoord
@@ -54,26 +54,26 @@ struct geom_meter {
5454

5555
luma = clamp(luma, lumaMinMax.x, lumaMinMax.y);
5656

57-
return max(log2(luma), log2(lumaMinMax.x));
57+
return log2(luma);
5858
}
5959

60-
void uploadFloat(
60+
void __uploadFloat(
6161
NBL_REF_ARG(ValueAccessor) val_accessor,
62-
uint32_t index,
6362
float_t val,
6463
float_t minLog2,
6564
float_t rangeLog2
6665
)
6766
{
6867
uint32_t3 workGroupCount = glsl::gl_NumWorkGroups();
68+
uint32_t workgroupIndex = (workGroupCount.x * workGroupCount.y * workGroupCount.z) / 64;
6969
uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();
7070

7171
uint32_t lumaSumBitPattern = uint32_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1)));
7272

73-
val_accessor.atomicAdd(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern);
73+
val_accessor.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern);
7474
}
7575

76-
float_t downloadFloat(
76+
float_t __downloadFloat(
7777
NBL_REF_ARG(ValueAccessor) val_accessor,
7878
uint32_t index,
7979
float_t minLog2,
@@ -101,17 +101,13 @@ struct geom_meter {
101101

102102
float_t luma = 0.0f;
103103
float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
104-
luma = computeLumaLog2(window, tex, shiftedCoord);
105-
float_t lumaSum = reduction(luma, sdata);
106-
107-
if (tid == GroupSize - 1) {
108-
uint32_t3 workgroupCount = glsl::gl_NumWorkGroups();
109-
uint32_t workgroupIndex = (workgroupCount.x * workgroupCount.y * workgroupCount.z) / 64;
104+
float_t lumaLog2 = __computeLumaLog2(window, tex, shiftedCoord);
105+
float_t lumaLog2Sum = __reduction(lumaLog2, sdata);
110106

111-
uploadFloat(
107+
if (tid == 0) {
108+
__uploadFloat(
112109
val,
113-
workgroupIndex,
114-
lumaSum,
110+
lumaLog2Sum,
115111
log2(lumaMinMax.x),
116112
log2(lumaMinMax.y / lumaMinMax.x)
117113
);
@@ -124,7 +120,7 @@ struct geom_meter {
124120
{
125121
uint32_t tid = glsl::gl_SubgroupInvocationID();
126122
float_t luma = glsl::subgroupAdd(
127-
downloadFloat(
123+
__downloadFloat(
128124
val,
129125
tid,
130126
log2(lumaMinMax.x),
@@ -150,19 +146,18 @@ struct median_meter {
150146
using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
151147
using this_t = median_meter<GroupSize, BinCount, HistogramAccessor, SharedAccessor, TexAccessor>;
152148

153-
static this_t create(float_t2 lumaMinMax, float_t sampleCount) {
149+
static this_t create(float_t2 lumaMinMax) {
154150
this_t retval;
155151
retval.lumaMinMax = lumaMinMax;
156-
retval.sampleCount = sampleCount;
157152
return retval;
158153
}
159154

160-
int_t inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) {
155+
int_t __inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) {
161156
return workgroup::inclusive_scan < plus < int_t >, GroupSize >::
162157
template __call <SharedAccessor>(value, sdata);
163158
}
164159

165-
float_t computeLuma(
160+
float_t __computeLuma(
166161
NBL_CONST_REF_ARG(MeteringWindow) window,
167162
NBL_REF_ARG(TexAccessor) tex,
168163
float_t2 shiftedCoord
@@ -174,7 +169,7 @@ struct median_meter {
174169
return clamp(luma, lumaMinMax.x, lumaMinMax.y);
175170
}
176171

177-
int_t float2Int(
172+
int_t __float2Int(
178173
float_t val,
179174
float_t minLog2,
180175
float_t rangeLog2
@@ -185,7 +180,7 @@ struct median_meter {
185180
return int_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1)));
186181
}
187182

188-
float_t int2Float(
183+
float_t __int2Float(
189184
int_t val,
190185
float_t minLog2,
191186
float_t rangeLog2
@@ -216,7 +211,7 @@ struct median_meter {
216211

217212
float_t luma = 0.0f;
218213
float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
219-
luma = computeLuma(window, tex, shiftedCoord);
214+
luma = __computeLuma(window, tex, shiftedCoord);
220215

221216
float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount;
222217
uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize);
@@ -255,7 +250,7 @@ struct median_meter {
255250
sdata.get(vid, atVid);
256251
sum = inclusive_scan(atVid, sdata);
257252
if (vid < BinCount) {
258-
histo.atomicAdd(vid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
253+
histo.atomicAdd(vid, __float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
259254
}
260255
}
261256
}
@@ -279,10 +274,9 @@ struct median_meter {
279274
sdata.get(BinCount * 0.4, percentile40);
280275
sdata.get(BinCount * 0.6, percentile60);
281276

282-
return (int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2;
277+
return (__int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + __int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2;
283278
}
284279

285-
float_t sampleCount;
286280
float_t2 lumaMinMax;
287281
};
288282

include/nbl/builtin/hlsl/tonemapper/operators.hlsl

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,25 @@ template<typename T = float32_t>
1919
struct Reinhard
2020
{
2121
using float_t = enable_if_t<is_floating_point<T>::value, T>;
22-
using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
22+
using float_t3 = vector<float_t, 3>;
2323
using this_t = Reinhard<float_t>;
24+
2425
static this_t create(float_t EV, float_t key = 0.18f, float_t WhitePointRelToEV = 16.f)
2526
{
2627
this_t retval;
28+
29+
const float_t unit = 1.0;
2730
retval.keyAndManualLinearExposure = key * exp2(EV);
28-
retval.rcpWhite2 = 1.f / (WhitePointRelToEV * WhitePointRelToEV);
31+
retval.rcpWhite2 = unit / (WhitePointRelToEV * WhitePointRelToEV);
32+
2933
return retval;
3034
}
3135

3236
float_t3 operator()(float_t3 rawCIEXYZcolor) {
37+
const float_t unit = 1.0;
3338
float_t exposureFactors = keyAndManualLinearExposure;
3439
float_t exposedLuma = rawCIEXYZcolor.y * exposureFactors;
35-
float_t colorMultiplier = (exposureFactors * (1.0 + exposedLuma * rcpWhite2) / (1.0 + exposedLuma));
40+
float_t colorMultiplier = (exposureFactors * (unit + exposedLuma * rcpWhite2) / (unit + exposedLuma));
3641
return rawCIEXYZcolor * colorMultiplier;
3742
}
3843

@@ -44,8 +49,8 @@ template<typename T = float32_t>
4449
struct ACES
4550
{
4651
using float_t = enable_if_t<is_floating_point<T>::value, T>;
47-
using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
48-
using float_t3x3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3x3, float16_t3x3>::type;
52+
using float_t3 = vector<float_t, 3>;
53+
using float_t3x3 = matrix<float_t, 3, 3>;
4954

5055
using this_t = ACES<T>;
5156
static this_t create(float_t EV, float_t key = 0.18f, float_t Contrast = 1.f) {
@@ -57,9 +62,10 @@ struct ACES
5762
}
5863

5964
float_t3 operator()(float_t3 rawCIEXYZcolor) {
65+
const float_t unit = 1.0;
6066
float_t3 tonemapped = rawCIEXYZcolor;
61-
if (tonemapped.y > 1.175494351e-38)
62-
tonemapped *= exp2(log2(tonemapped.y) * (gamma - 1.0) + (exposure) * gamma);
67+
if (tonemapped.y > bit_cast<float_t>(numeric_limits<float_t>::min))
68+
tonemapped *= exp2(log2(tonemapped.y) * (gamma - unit) + (exposure) * gamma);
6369

6470
// XYZ => RRT_SAT
6571
// this seems to be a matrix for some hybrid colorspace, coefficients are somewhere inbetween BT2020 and ACEScc(t)

0 commit comments

Comments
 (0)