Skip to content

Commit 3cdfb4b

Browse files
committed
use morton and virtual indexing
1 parent 56994a9 commit 3cdfb4b

File tree

3 files changed

+156
-139
lines changed

3 files changed

+156
-139
lines changed

31_HLSLPathTracer/app_resources/glsl/common.glsl

Lines changed: 103 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// debug
1010
//#define NEE_ONLY
1111

12-
layout(set = 2, binding = 0) uniform sampler2D envMap;
12+
layout(set = 2, binding = 0) uniform sampler2D envMap;
1313
layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence;
1414
layout(set = 2, binding = 2) uniform usampler2D scramblebuf;
1515

@@ -35,6 +35,7 @@ vec2 getTexCoords() {
3535
#include <nbl/builtin/glsl/limits/numeric.glsl>
3636
#include <nbl/builtin/glsl/math/constants.glsl>
3737
#include <nbl/builtin/glsl/utils/common.glsl>
38+
#include <nbl/builtin/glsl/utils/morton.glsl>
3839

3940
#include <nbl/builtin/glsl/sampling/box_muller_transform.glsl>
4041

@@ -51,7 +52,7 @@ struct Sphere
5152
vec3 position;
5253
float radius2;
5354
uint bsdfLightIDs;
54-
};
55+
};
5556

5657
Sphere Sphere_Sphere(in vec3 position, in float radius, in uint bsdfID, in uint lightID)
5758
{
@@ -188,7 +189,7 @@ void Rectangle_getNormalBasis(in Rectangle rect, out mat3 basis, out vec2 extent
188189
basis[0] = rect.edge0/extents[0];
189190
basis[1] = rect.edge1/extents[1];
190191
basis[2] = normalize(cross(basis[0],basis[1]));
191-
}
192+
}
192193

193194
// return intersection distance if found, nbl_glsl_FLT_NAN otherwise
194195
float Rectangle_intersect(in Rectangle rect, in vec3 origin, in vec3 direction)
@@ -222,7 +223,7 @@ vec3 Rectangle_getNormalTimesArea(in Rectangle rect)
222223
#define OP_BITS_OFFSET 0
223224
#define OP_BITS_SIZE 2
224225
struct BSDFNode
225-
{
226+
{
226227
uvec4 data[2];
227228
};
228229

@@ -386,13 +387,13 @@ vec2 SampleSphericalMap(vec3 v)
386387
{
387388
vec2 uv = vec2(atan(v.z, v.x), asin(v.y));
388389
uv *= nbl_glsl_RECIPROCAL_PI*0.5;
389-
uv += 0.5;
390+
uv += 0.5;
390391
return uv;
391392
}
392393

393394
void missProgram(in ImmutableRay_t _immutable, inout Payload_t _payload)
394395
{
395-
vec3 finalContribution = _payload.throughput;
396+
vec3 finalContribution = _payload.throughput;
396397
// #define USE_ENVMAP
397398
#ifdef USE_ENVMAP
398399
vec2 uv = SampleSphericalMap(_immutable.direction);
@@ -415,7 +416,7 @@ nbl_glsl_LightSample nbl_glsl_bsdf_cos_generate(in nbl_glsl_AnisotropicViewSurfa
415416
{
416417
const float a = BSDFNode_getRoughness(bsdf);
417418
const mat2x3 ior = BSDFNode_getEta(bsdf);
418-
419+
419420
// fresnel stuff for dielectrics
420421
float orientedEta, rcpOrientedEta;
421422
const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta);
@@ -519,7 +520,7 @@ int traceRay(inout float intersectionT, in vec3 origin, in vec3 direction)
519520

520521
intersectionT = closerIntersection ? t : intersectionT;
521522
objectID = closerIntersection ? i:objectID;
522-
523+
523524
// allowing early out results in a performance regression, WTF!?
524525
//if (anyHit && closerIntersection)
525526
//break;
@@ -543,7 +544,7 @@ nbl_glsl_LightSample nbl_glsl_light_generate_and_remainder_and_pdf(out vec3 rema
543544
{
544545
// normally we'd pick from set of lights, using `xi.z`
545546
const Light light = lights[0];
546-
547+
547548
vec3 L = nbl_glsl_light_generate_and_pdf(pdf,newRayMaxT,origin,interaction,isBSDF,xi,Light_getObjectID(light));
548549

549550
newRayMaxT *= getEndTolerance(depth);
@@ -663,15 +664,15 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb
663664
//
664665
bsdfSampleL = bsdf_sample.L;
665666
}
666-
667+
667668
// additional threshold
668669
const float lumaThroughputThreshold = lumaContributionThreshold;
669670
if (bsdfPdf>bsdfPdfThreshold && getLuma(throughput)>lumaThroughputThreshold)
670671
{
671672
ray._payload.throughput = throughput;
672673
ray._payload.otherTechniqueHeuristic = neeProbability/bsdfPdf; // numerically stable, don't touch
673674
ray._payload.otherTechniqueHeuristic *= ray._payload.otherTechniqueHeuristic;
674-
675+
675676
// trace new ray
676677
ray._immutable.origin = intersection+bsdfSampleL*(1.0/*kSceneSize*/)*getStartTolerance(depth);
677678
ray._immutable.direction = bsdfSampleL;
@@ -688,109 +689,115 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb
688689
void main()
689690
{
690691
const ivec2 imageExtents = imageSize(outImage);
691-
const ivec2 coords = getCoordinates();
692-
vec2 texCoord = vec2(coords) / vec2(imageExtents);
693-
texCoord.y = 1.0 - texCoord.y;
694-
695-
if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) {
696-
return;
697-
}
698692

699-
if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0)
693+
uint virtualThreadIndex;
694+
for (uint virtualThreadBase = gl_WorkGroupID.x * _NBL_GLSL_WORKGROUP_SIZE_; virtualThreadBase < 1920*1080; virtualThreadBase += gl_NumWorkGroups.x * _NBL_GLSL_WORKGROUP_SIZE_) // not sure why 1280*720 doesn't cover entire window
700695
{
701-
vec4 pixelCol = vec4(1.0,0.0,0.0,1.0);
702-
imageStore(outImage, coords, pixelCol);
703-
return;
704-
}
705-
706-
nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg;
707-
const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0));
696+
virtualThreadIndex = virtualThreadBase + gl_LocalInvocationIndex.x;
697+
const ivec2 coords = ivec2(nbl_glsl_morton_decode2d32b(virtualThreadIndex)); // getCoordinates();
698+
vec2 texCoord = vec2(coords) / vec2(imageExtents);
699+
texCoord.y = 1.0 - texCoord.y;
708700

701+
if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) {
702+
continue;
703+
}
709704

710-
const mat4 invMVP = PTPushConstant.invMVP;
711-
712-
vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0);
713-
vec3 camPos;
714-
{
715-
vec4 tmp = invMVP*NDC;
716-
camPos = tmp.xyz/tmp.w;
717-
NDC.z = 1.0;
718-
}
705+
if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0)
706+
{
707+
vec4 pixelCol = vec4(1.0,0.0,0.0,1.0);
708+
imageStore(outImage, coords, pixelCol);
709+
continue;
710+
}
719711

720-
vec3 color = vec3(0.0);
721-
float meanLumaSquared = 0.0;
722-
// TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC
723-
for (int i=0; i<PTPushConstant.sampleCount; i++)
724-
{
725-
nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
712+
nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg;
713+
const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0));
726714

727-
Ray_t ray;
728-
// raygen
729-
{
730-
ray._immutable.origin = camPos;
731-
732-
vec4 tmp = NDC;
733-
// apply stochastic reconstruction filter
734-
const float gaussianFilterCutoff = 2.5;
735-
const float truncation = exp(-0.5*gaussianFilterCutoff*gaussianFilterCutoff);
736-
vec2 remappedRand = rand3d(0u,i,scramble_state)[0].xy;
737-
remappedRand.x *= 1.0-truncation;
738-
remappedRand.x += truncation;
739-
tmp.xy += pixOffsetParam*nbl_glsl_BoxMullerTransform(remappedRand,1.5);
740-
// for depth of field we could do another stochastic point-pick
741-
tmp = invMVP*tmp;
742-
ray._immutable.direction = normalize(tmp.xyz/tmp.w-camPos);
743715

744-
#if POLYGON_METHOD==2
745-
ray._immutable.normalAtOrigin = vec3(0.0,0.0,0.0);
746-
ray._immutable.wasBSDFAtOrigin = false;
747-
#endif
716+
const mat4 invMVP = PTPushConstant.invMVP;
748717

749-
ray._payload.accumulation = vec3(0.0);
750-
ray._payload.otherTechniqueHeuristic = 0.0; // needed for direct eye-light paths
751-
ray._payload.throughput = vec3(1.0);
752-
#ifdef KILL_DIFFUSE_SPECULAR_PATHS
753-
ray._payload.hasDiffuse = false;
754-
#endif
718+
vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0);
719+
vec3 camPos;
720+
{
721+
vec4 tmp = invMVP*NDC;
722+
camPos = tmp.xyz/tmp.w;
723+
NDC.z = 1.0;
755724
}
756725

757-
// bounces
726+
vec3 color = vec3(0.0);
727+
float meanLumaSquared = 0.0;
728+
// TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC
729+
for (int i=0; i<PTPushConstant.sampleCount; i++)
758730
{
759-
bool hit = true; bool rayAlive = true;
760-
for (int d=1; d<=PTPushConstant.depth && hit && rayAlive; d+=2)
731+
nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
732+
733+
Ray_t ray;
734+
// raygen
761735
{
762-
ray._mutable.intersectionT = nbl_glsl_FLT_MAX;
763-
ray._mutable.objectID = traceRay(ray._mutable.intersectionT,ray._immutable.origin,ray._immutable.direction);
764-
hit = ray._mutable.objectID!=-1;
765-
if (hit)
766-
rayAlive = closestHitProgram(d, i, ray, scramble_state);
736+
ray._immutable.origin = camPos;
737+
738+
vec4 tmp = NDC;
739+
// apply stochastic reconstruction filter
740+
const float gaussianFilterCutoff = 2.5;
741+
const float truncation = exp(-0.5*gaussianFilterCutoff*gaussianFilterCutoff);
742+
vec2 remappedRand = rand3d(0u,i,scramble_state)[0].xy;
743+
remappedRand.x *= 1.0-truncation;
744+
remappedRand.x += truncation;
745+
tmp.xy += pixOffsetParam*nbl_glsl_BoxMullerTransform(remappedRand,1.5);
746+
// for depth of field we could do another stochastic point-pick
747+
tmp = invMVP*tmp;
748+
ray._immutable.direction = normalize(tmp.xyz/tmp.w-camPos);
749+
750+
#if POLYGON_METHOD==2
751+
ray._immutable.normalAtOrigin = vec3(0.0,0.0,0.0);
752+
ray._immutable.wasBSDFAtOrigin = false;
753+
#endif
754+
755+
ray._payload.accumulation = vec3(0.0);
756+
ray._payload.otherTechniqueHeuristic = 0.0; // needed for direct eye-light paths
757+
ray._payload.throughput = vec3(1.0);
758+
#ifdef KILL_DIFFUSE_SPECULAR_PATHS
759+
ray._payload.hasDiffuse = false;
760+
#endif
761+
}
762+
763+
// bounces
764+
{
765+
bool hit = true; bool rayAlive = true;
766+
for (int d=1; d<=PTPushConstant.depth && hit && rayAlive; d+=2)
767+
{
768+
ray._mutable.intersectionT = nbl_glsl_FLT_MAX;
769+
ray._mutable.objectID = traceRay(ray._mutable.intersectionT,ray._immutable.origin,ray._immutable.direction);
770+
hit = ray._mutable.objectID!=-1;
771+
if (hit)
772+
rayAlive = closestHitProgram(d, i, ray, scramble_state);
773+
}
774+
// was last trace a miss?
775+
if (!hit)
776+
missProgram(ray._immutable,ray._payload);
767777
}
768-
// was last trace a miss?
769-
if (!hit)
770-
missProgram(ray._immutable,ray._payload);
771-
}
772778

773-
vec3 accumulation = ray._payload.accumulation;
779+
vec3 accumulation = ray._payload.accumulation;
780+
781+
float rcpSampleSize = 1.0/float(i+1);
782+
color += (accumulation-color)*rcpSampleSize;
783+
784+
#ifdef VISUALIZE_HIGH_VARIANCE
785+
float luma = getLuma(accumulation);
786+
meanLumaSquared += (luma*luma-meanLumaSquared)*rcpSampleSize;
787+
#endif
788+
}
774789

775-
float rcpSampleSize = 1.0/float(i+1);
776-
color += (accumulation-color)*rcpSampleSize;
777-
778790
#ifdef VISUALIZE_HIGH_VARIANCE
779-
float luma = getLuma(accumulation);
780-
meanLumaSquared += (luma*luma-meanLumaSquared)*rcpSampleSize;
791+
float variance = getLuma(color);
792+
variance *= variance;
793+
variance = meanLumaSquared-variance;
794+
if (variance>5.0)
795+
color = vec3(1.0,0.0,0.0);
781796
#endif
782-
}
783797

784-
#ifdef VISUALIZE_HIGH_VARIANCE
785-
float variance = getLuma(color);
786-
variance *= variance;
787-
variance = meanLumaSquared-variance;
788-
if (variance>5.0)
789-
color = vec3(1.0,0.0,0.0);
790-
#endif
791-
792-
vec4 pixelCol = vec4(color, 1.0);
793-
imageStore(outImage, coords, pixelCol);
798+
vec4 pixelCol = vec4(color, 1.0);
799+
imageStore(outImage, coords, pixelCol);
800+
}
794801
}
795802
/** TODO: Improving Rendering
796803

0 commit comments

Comments
 (0)