Skip to content

Commit 5f93cec

Browse files
committed
reverted virtual index, fix hlsl colors
1 parent 3cdfb4b commit 5f93cec

File tree

3 files changed

+128
-144
lines changed

3 files changed

+128
-144
lines changed

31_HLSLPathTracer/app_resources/glsl/common.glsl

Lines changed: 83 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ vec2 getTexCoords() {
3535
#include <nbl/builtin/glsl/limits/numeric.glsl>
3636
#include <nbl/builtin/glsl/math/constants.glsl>
3737
#include <nbl/builtin/glsl/utils/common.glsl>
38-
#include <nbl/builtin/glsl/utils/morton.glsl>
3938

4039
#include <nbl/builtin/glsl/sampling/box_muller_transform.glsl>
4140

@@ -689,115 +688,109 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb
689688
void main()
690689
{
691690
const ivec2 imageExtents = imageSize(outImage);
691+
const ivec2 coords = getCoordinates();
692+
vec2 texCoord = vec2(coords) / vec2(imageExtents);
693+
texCoord.y = 1.0 - texCoord.y;
692694

693-
uint virtualThreadIndex;
694-
for (uint virtualThreadBase = gl_WorkGroupID.x * _NBL_GLSL_WORKGROUP_SIZE_; virtualThreadBase < 1920*1080; virtualThreadBase += gl_NumWorkGroups.x * _NBL_GLSL_WORKGROUP_SIZE_) // not sure why 1280*720 doesn't cover entire window
695+
if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) {
696+
return;
697+
}
698+
699+
if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0)
695700
{
696-
virtualThreadIndex = virtualThreadBase + gl_LocalInvocationIndex.x;
697-
const ivec2 coords = ivec2(nbl_glsl_morton_decode2d32b(virtualThreadIndex)); // getCoordinates();
698-
vec2 texCoord = vec2(coords) / vec2(imageExtents);
699-
texCoord.y = 1.0 - texCoord.y;
701+
vec4 pixelCol = vec4(1.0,0.0,0.0,1.0);
702+
imageStore(outImage, coords, pixelCol);
703+
return;
704+
}
700705

701-
if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) {
702-
continue;
703-
}
706+
nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg;
707+
const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0));
704708

705-
if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0)
706-
{
707-
vec4 pixelCol = vec4(1.0,0.0,0.0,1.0);
708-
imageStore(outImage, coords, pixelCol);
709-
continue;
710-
}
711709

712-
nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg;
713-
const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0));
710+
const mat4 invMVP = PTPushConstant.invMVP;
714711

712+
vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0);
713+
vec3 camPos;
714+
{
715+
vec4 tmp = invMVP*NDC;
716+
camPos = tmp.xyz/tmp.w;
717+
NDC.z = 1.0;
718+
}
715719

716-
const mat4 invMVP = PTPushConstant.invMVP;
720+
vec3 color = vec3(0.0);
721+
float meanLumaSquared = 0.0;
722+
// TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC
723+
for (int i=0; i<PTPushConstant.sampleCount; i++)
724+
{
725+
nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
717726

718-
vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0);
719-
vec3 camPos;
727+
Ray_t ray;
728+
// raygen
720729
{
721-
vec4 tmp = invMVP*NDC;
722-
camPos = tmp.xyz/tmp.w;
723-
NDC.z = 1.0;
724-
}
730+
ray._immutable.origin = camPos;
731+
732+
vec4 tmp = NDC;
733+
// apply stochastic reconstruction filter
734+
const float gaussianFilterCutoff = 2.5;
735+
const float truncation = exp(-0.5*gaussianFilterCutoff*gaussianFilterCutoff);
736+
vec2 remappedRand = rand3d(0u,i,scramble_state)[0].xy;
737+
remappedRand.x *= 1.0-truncation;
738+
remappedRand.x += truncation;
739+
tmp.xy += pixOffsetParam*nbl_glsl_BoxMullerTransform(remappedRand,1.5);
740+
// for depth of field we could do another stochastic point-pick
741+
tmp = invMVP*tmp;
742+
ray._immutable.direction = normalize(tmp.xyz/tmp.w-camPos);
725743

726-
vec3 color = vec3(0.0);
727-
float meanLumaSquared = 0.0;
728-
// TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC
729-
for (int i=0; i<PTPushConstant.sampleCount; i++)
730-
{
731-
nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
744+
#if POLYGON_METHOD==2
745+
ray._immutable.normalAtOrigin = vec3(0.0,0.0,0.0);
746+
ray._immutable.wasBSDFAtOrigin = false;
747+
#endif
732748

733-
Ray_t ray;
734-
// raygen
735-
{
736-
ray._immutable.origin = camPos;
737-
738-
vec4 tmp = NDC;
739-
// apply stochastic reconstruction filter
740-
const float gaussianFilterCutoff = 2.5;
741-
const float truncation = exp(-0.5*gaussianFilterCutoff*gaussianFilterCutoff);
742-
vec2 remappedRand = rand3d(0u,i,scramble_state)[0].xy;
743-
remappedRand.x *= 1.0-truncation;
744-
remappedRand.x += truncation;
745-
tmp.xy += pixOffsetParam*nbl_glsl_BoxMullerTransform(remappedRand,1.5);
746-
// for depth of field we could do another stochastic point-pick
747-
tmp = invMVP*tmp;
748-
ray._immutable.direction = normalize(tmp.xyz/tmp.w-camPos);
749-
750-
#if POLYGON_METHOD==2
751-
ray._immutable.normalAtOrigin = vec3(0.0,0.0,0.0);
752-
ray._immutable.wasBSDFAtOrigin = false;
753-
#endif
754-
755-
ray._payload.accumulation = vec3(0.0);
756-
ray._payload.otherTechniqueHeuristic = 0.0; // needed for direct eye-light paths
757-
ray._payload.throughput = vec3(1.0);
758-
#ifdef KILL_DIFFUSE_SPECULAR_PATHS
759-
ray._payload.hasDiffuse = false;
760-
#endif
761-
}
749+
ray._payload.accumulation = vec3(0.0);
750+
ray._payload.otherTechniqueHeuristic = 0.0; // needed for direct eye-light paths
751+
ray._payload.throughput = vec3(1.0);
752+
#ifdef KILL_DIFFUSE_SPECULAR_PATHS
753+
ray._payload.hasDiffuse = false;
754+
#endif
755+
}
762756

763-
// bounces
757+
// bounces
758+
{
759+
bool hit = true; bool rayAlive = true;
760+
for (int d=1; d<=PTPushConstant.depth && hit && rayAlive; d+=2)
764761
{
765-
bool hit = true; bool rayAlive = true;
766-
for (int d=1; d<=PTPushConstant.depth && hit && rayAlive; d+=2)
767-
{
768-
ray._mutable.intersectionT = nbl_glsl_FLT_MAX;
769-
ray._mutable.objectID = traceRay(ray._mutable.intersectionT,ray._immutable.origin,ray._immutable.direction);
770-
hit = ray._mutable.objectID!=-1;
771-
if (hit)
772-
rayAlive = closestHitProgram(d, i, ray, scramble_state);
773-
}
774-
// was last trace a miss?
775-
if (!hit)
776-
missProgram(ray._immutable,ray._payload);
762+
ray._mutable.intersectionT = nbl_glsl_FLT_MAX;
763+
ray._mutable.objectID = traceRay(ray._mutable.intersectionT,ray._immutable.origin,ray._immutable.direction);
764+
hit = ray._mutable.objectID!=-1;
765+
if (hit)
766+
rayAlive = closestHitProgram(d, i, ray, scramble_state);
777767
}
768+
// was last trace a miss?
769+
if (!hit)
770+
missProgram(ray._immutable,ray._payload);
771+
}
778772

779-
vec3 accumulation = ray._payload.accumulation;
780-
781-
float rcpSampleSize = 1.0/float(i+1);
782-
color += (accumulation-color)*rcpSampleSize;
773+
vec3 accumulation = ray._payload.accumulation;
783774

784-
#ifdef VISUALIZE_HIGH_VARIANCE
785-
float luma = getLuma(accumulation);
786-
meanLumaSquared += (luma*luma-meanLumaSquared)*rcpSampleSize;
787-
#endif
788-
}
775+
float rcpSampleSize = 1.0/float(i+1);
776+
color += (accumulation-color)*rcpSampleSize;
789777

790778
#ifdef VISUALIZE_HIGH_VARIANCE
791-
float variance = getLuma(color);
792-
variance *= variance;
793-
variance = meanLumaSquared-variance;
794-
if (variance>5.0)
795-
color = vec3(1.0,0.0,0.0);
779+
float luma = getLuma(accumulation);
780+
meanLumaSquared += (luma*luma-meanLumaSquared)*rcpSampleSize;
796781
#endif
797-
798-
vec4 pixelCol = vec4(color, 1.0);
799-
imageStore(outImage, coords, pixelCol);
800782
}
783+
784+
#ifdef VISUALIZE_HIGH_VARIANCE
785+
float variance = getLuma(color);
786+
variance *= variance;
787+
variance = meanLumaSquared-variance;
788+
if (variance>5.0)
789+
color = vec3(1.0,0.0,0.0);
790+
#endif
791+
792+
vec4 pixelCol = vec4(color, 1.0);
793+
imageStore(outImage, coords, pixelCol);
801794
}
802795
/** TODO: Improving Rendering
803796

31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl

Lines changed: 44 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
33
#include "nbl/builtin/hlsl/random/pcg.hlsl"
44
#include "nbl/builtin/hlsl/random/xoroshiro.hlsl"
5-
#include "nbl/builtin/hlsl/math/morton.hlsl"
65

76
#include "nbl/builtin/hlsl/bxdf/reflection.hlsl"
87
#include "nbl/builtin/hlsl/bxdf/transmission.hlsl"
@@ -140,9 +139,9 @@ static const bxdfnode_type bxdfs[BXDF_COUNT] = {
140139
bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.8,0.8)),
141140
bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.4,0.4)),
142141
bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.4,0.8,0.4)),
143-
bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1,1,1), spectral_t(0.98,0.98,0.77)),
144-
bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1,1,1), spectral_t(0.98,0.77,0.98)),
145-
bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0.15,0.15), spectral_t(1,1,1), spectral_t(0.98,0.77,0.98)),
142+
bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.02,1.3), spectral_t(1.0,1.0,2.0)),
143+
bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)),
144+
bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0.15,0.15), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)),
146145
bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIELECTRIC, false, float2(0.0625,0.0625), spectral_t(1,1,1), spectral_t(0.71,0.69,0.67))
147146
};
148147

@@ -157,55 +156,48 @@ void main(uint32_t3 threadID : SV_DispatchThreadID)
157156
{
158157
uint32_t width, height;
159158
outImage.GetDimensions(width, height);
159+
const int32_t2 coords = getCoordinates();
160+
float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height);
161+
texCoord.y = 1.0 - texCoord.y;
160162

161-
uint32_t virtualThreadIndex;
162-
[loop]
163-
for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920*1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) // not sure why 1280*720 doesn't cover entire window
163+
if (false == (all((int32_t2)0 < coords)) && all(int32_t2(width, height) < coords)) {
164+
return;
165+
}
166+
167+
if (((pc.depth - 1) >> MAX_DEPTH_LOG2) > 0 || ((pc.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0)
168+
{
169+
float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0);
170+
outImage[coords] = pixelCol;
171+
return;
172+
}
173+
174+
int flatIdx = glsl::gl_GlobalInvocationID().y * glsl::gl_NumWorkGroups().x * WorkgroupSize + glsl::gl_GlobalInvocationID().x;
175+
176+
// set up path tracer
177+
ext::PathTracer::PathTracerCreationParams<create_params_t, float> ptCreateParams;
178+
ptCreateParams.rngState = scramblebuf[coords].rg;
179+
180+
uint2 scrambleDim;
181+
scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y);
182+
ptCreateParams.pixOffsetParam = (float2)1.0 / float2(scrambleDim);
183+
184+
float4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
164185
{
165-
virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x;
166-
const int32_t2 coords = (int32_t2)math::Morton<uint32_t>::decode2d(virtualThreadIndex); // getCoordinates();
167-
float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height);
168-
texCoord.y = 1.0 - texCoord.y;
169-
170-
if (false == (hlsl::all((int32_t2)0 < coords)) && hlsl::all(int32_t2(width, height) < coords)) {
171-
continue;
172-
}
173-
174-
if (((pc.depth - 1) >> MAX_DEPTH_LOG2) > 0 || ((pc.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0)
175-
{
176-
float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0);
177-
outImage[coords] = pixelCol;
178-
continue;
179-
}
180-
181-
int flatIdx = glsl::gl_GlobalInvocationID().y * glsl::gl_NumWorkGroups().x * WorkgroupSize + glsl::gl_GlobalInvocationID().x;
182-
183-
// set up path tracer
184-
ext::PathTracer::PathTracerCreationParams<create_params_t, float> ptCreateParams;
185-
ptCreateParams.rngState = scramblebuf[coords].rg;
186-
187-
uint2 scrambleDim;
188-
scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y);
189-
ptCreateParams.pixOffsetParam = (float2)1.0 / float2(scrambleDim);
190-
191-
float4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
192-
{
193-
float4 tmp = hlsl::mul(pc.invMVP, NDC);
194-
ptCreateParams.camPos = tmp.xyz / tmp.w;
195-
NDC.z = 1.0;
196-
}
197-
198-
ptCreateParams.NDC = NDC;
199-
ptCreateParams.invMVP = pc.invMVP;
200-
201-
ptCreateParams.diffuseParams = bxdfs[0].params;
202-
ptCreateParams.conductorParams = bxdfs[3].params;
203-
ptCreateParams.dielectricParams = bxdfs[6].params;
204-
205-
pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams);
206-
207-
float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene);
208-
float32_t4 pixCol = float32_t4(color, 1.0);
209-
outImage[coords] = pixCol;
186+
float4 tmp = mul(pc.invMVP, NDC);
187+
ptCreateParams.camPos = tmp.xyz / tmp.w;
188+
NDC.z = 1.0;
210189
}
190+
191+
ptCreateParams.NDC = NDC;
192+
ptCreateParams.invMVP = pc.invMVP;
193+
194+
ptCreateParams.diffuseParams = bxdfs[0].params;
195+
ptCreateParams.conductorParams = bxdfs[3].params;
196+
ptCreateParams.dielectricParams = bxdfs[6].params;
197+
198+
pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams);
199+
200+
float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene);
201+
float32_t4 pixCol = float32_t4(color, 1.0);
202+
outImage[coords] = pixCol;
211203
}

31_HLSLPathTracer/main.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,8 +1068,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
10681068
cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get());
10691069
cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get());
10701070
cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc);
1071-
uint32_t dispatchSize = m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize);
1072-
cmdbuf->dispatch(dispatchSize, 1u, 1u);
1071+
cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u);
10731072
}
10741073

10751074
// TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image)

0 commit comments

Comments
 (0)