@@ -35,7 +35,6 @@ vec2 getTexCoords() {
3535#include < nbl/ builtin/ glsl/ limits/ numeric.glsl>
3636#include < nbl/ builtin/ glsl/ math/ constants.glsl>
3737#include < nbl/ builtin/ glsl/ utils/ common.glsl>
38- #include < nbl/ builtin/ glsl/ utils/ morton.glsl>
3938
4039#include < nbl/ builtin/ glsl/ sampling/ box_muller_transform.glsl>
4140
@@ -689,115 +688,109 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb
689688void main()
690689{
691690 const ivec2 imageExtents = imageSize(outImage);
691+ const ivec2 coords = getCoordinates();
692+ vec2 texCoord = vec2 (coords) / vec2 (imageExtents);
693+ texCoord.y = 1.0 - texCoord.y;
692694
693- uint virtualThreadIndex;
694- for (uint virtualThreadBase = gl_WorkGroupID.x * _NBL_GLSL_WORKGROUP_SIZE_; virtualThreadBase < 1920 * 1080 ; virtualThreadBase += gl_NumWorkGroups.x * _NBL_GLSL_WORKGROUP_SIZE_) // not sure why 1280*720 doesn't cover entire window
695+ if (false == (all (lessThanEqual (ivec2 (0 ),coords)) && all (greaterThan (imageExtents,coords)))) {
696+ return ;
697+ }
698+
699+ if (((PTPushConstant.depth- 1 )>> MAX_DEPTH_LOG2)> 0 || ((PTPushConstant.sampleCount- 1 )>> MAX_SAMPLES_LOG2)> 0 )
695700 {
696- virtualThreadIndex = virtualThreadBase + gl_LocalInvocationIndex.x ;
697- const ivec2 coords = ivec2 (nbl_glsl_morton_decode2d32b(virtualThreadIndex)); // getCoordinates( );
698- vec2 texCoord = vec2 (coords) / vec2 (imageExtents) ;
699- texCoord.y = 1.0 - texCoord.y;
701+ vec4 pixelCol = vec4 ( 1.0 , 0.0 , 0.0 , 1.0 ) ;
702+ imageStore(outImage, coords, pixelCol );
703+ return ;
704+ }
700705
701- if (false == (all (lessThanEqual (ivec2 (0 ),coords)) && all (greaterThan (imageExtents,coords)))) {
702- continue ;
703- }
706+ nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0 ).rg;
707+ const vec2 pixOffsetParam = vec2 (1.0 )/ vec2 (textureSize(scramblebuf,0 ));
704708
705- if (((PTPushConstant.depth- 1 )>> MAX_DEPTH_LOG2)> 0 || ((PTPushConstant.sampleCount- 1 )>> MAX_SAMPLES_LOG2)> 0 )
706- {
707- vec4 pixelCol = vec4 (1.0 ,0.0 ,0.0 ,1.0 );
708- imageStore(outImage, coords, pixelCol);
709- continue ;
710- }
711709
712- nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0 ).rg;
713- const vec2 pixOffsetParam = vec2 (1.0 )/ vec2 (textureSize(scramblebuf,0 ));
710+ const mat4 invMVP = PTPushConstant.invMVP;
714711
712+ vec4 NDC = vec4 (texCoord* vec2 (2.0 ,- 2.0 )+ vec2 (- 1.0 ,1.0 ),0.0 ,1.0 );
713+ vec3 camPos;
714+ {
715+ vec4 tmp = invMVP* NDC;
716+ camPos = tmp.xyz/ tmp.w;
717+ NDC.z = 1.0 ;
718+ }
715719
716- const mat4 invMVP = PTPushConstant.invMVP;
720+ vec3 color = vec3 (0.0 );
721+ float meanLumaSquared = 0.0 ;
722+ // TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC
723+ for (int i= 0 ; i< PTPushConstant.sampleCount; i++ )
724+ {
725+ nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
717726
718- vec4 NDC = vec4 (texCoord * vec2 ( 2.0 , - 2.0 ) + vec2 ( - 1.0 , 1.0 ), 0.0 , 1.0 ) ;
719- vec3 camPos;
727+ Ray_t ray ;
728+ // raygen
720729 {
721- vec4 tmp = invMVP* NDC;
722- camPos = tmp.xyz/ tmp.w;
723- NDC.z = 1.0 ;
724- }
730+ ray._immutable.origin = camPos;
731+
732+ vec4 tmp = NDC;
733+ // apply stochastic reconstruction filter
734+ const float gaussianFilterCutoff = 2.5 ;
735+ const float truncation = exp (- 0.5 * gaussianFilterCutoff* gaussianFilterCutoff);
736+ vec2 remappedRand = rand3d(0u,i,scramble_state)[0 ].xy;
737+ remappedRand.x *= 1.0 - truncation;
738+ remappedRand.x += truncation;
739+ tmp.xy += pixOffsetParam* nbl_glsl_BoxMullerTransform(remappedRand,1.5 );
740+ // for depth of field we could do another stochastic point-pick
741+ tmp = invMVP* tmp;
742+ ray._immutable.direction = normalize (tmp.xyz/ tmp.w- camPos);
725743
726- vec3 color = vec3 (0.0 );
727- float meanLumaSquared = 0.0 ;
728- // TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC
729- for (int i= 0 ; i< PTPushConstant.sampleCount; i++ )
730- {
731- nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
744+ #if POLYGON_METHOD== 2
745+ ray._immutable.normalAtOrigin = vec3 (0.0 ,0.0 ,0.0 );
746+ ray._immutable.wasBSDFAtOrigin = false;
747+ #endif
732748
733- Ray_t ray;
734- // raygen
735- {
736- ray._immutable.origin = camPos;
737-
738- vec4 tmp = NDC;
739- // apply stochastic reconstruction filter
740- const float gaussianFilterCutoff = 2.5 ;
741- const float truncation = exp (- 0.5 * gaussianFilterCutoff* gaussianFilterCutoff);
742- vec2 remappedRand = rand3d(0u,i,scramble_state)[0 ].xy;
743- remappedRand.x *= 1.0 - truncation;
744- remappedRand.x += truncation;
745- tmp.xy += pixOffsetParam* nbl_glsl_BoxMullerTransform(remappedRand,1.5 );
746- // for depth of field we could do another stochastic point-pick
747- tmp = invMVP* tmp;
748- ray._immutable.direction = normalize (tmp.xyz/ tmp.w- camPos);
749-
750- #if POLYGON_METHOD== 2
751- ray._immutable.normalAtOrigin = vec3 (0.0 ,0.0 ,0.0 );
752- ray._immutable.wasBSDFAtOrigin = false;
753- #endif
754-
755- ray._payload.accumulation = vec3 (0.0 );
756- ray._payload.otherTechniqueHeuristic = 0.0 ; // needed for direct eye-light paths
757- ray._payload.throughput = vec3 (1.0 );
758- #ifdef KILL_DIFFUSE_SPECULAR_PATHS
759- ray._payload.hasDiffuse = false;
760- #endif
761- }
749+ ray._payload.accumulation = vec3 (0.0 );
750+ ray._payload.otherTechniqueHeuristic = 0.0 ; // needed for direct eye-light paths
751+ ray._payload.throughput = vec3 (1.0 );
752+ #ifdef KILL_DIFFUSE_SPECULAR_PATHS
753+ ray._payload.hasDiffuse = false;
754+ #endif
755+ }
762756
763- // bounces
757+ // bounces
758+ {
759+ bool hit = true; bool rayAlive = true;
760+ for (int d= 1 ; d<= PTPushConstant.depth && hit && rayAlive; d+= 2 )
764761 {
765- bool hit = true; bool rayAlive = true;
766- for (int d= 1 ; d<= PTPushConstant.depth && hit && rayAlive; d+= 2 )
767- {
768- ray._mutable.intersectionT = nbl_glsl_FLT_MAX;
769- ray._mutable.objectID = traceRay(ray._mutable.intersectionT,ray._immutable.origin,ray._immutable.direction);
770- hit = ray._mutable.objectID!=-1 ;
771- if (hit)
772- rayAlive = closestHitProgram(d, i, ray, scramble_state);
773- }
774- // was last trace a miss?
775- if (! hit)
776- missProgram(ray._immutable,ray._payload);
762+ ray._mutable.intersectionT = nbl_glsl_FLT_MAX;
763+ ray._mutable.objectID = traceRay(ray._mutable.intersectionT,ray._immutable.origin,ray._immutable.direction);
764+ hit = ray._mutable.objectID!=-1 ;
765+ if (hit)
766+ rayAlive = closestHitProgram(d, i, ray, scramble_state);
777767 }
768+ // was last trace a miss?
769+ if (! hit)
770+ missProgram(ray._immutable,ray._payload);
771+ }
778772
779- vec3 accumulation = ray._payload.accumulation;
780-
781- float rcpSampleSize = 1.0 / float (i+ 1 );
782- color += (accumulation- color)* rcpSampleSize;
773+ vec3 accumulation = ray._payload.accumulation;
783774
784- #ifdef VISUALIZE_HIGH_VARIANCE
785- float luma = getLuma(accumulation);
786- meanLumaSquared += (luma* luma- meanLumaSquared)* rcpSampleSize;
787- #endif
788- }
775+ float rcpSampleSize = 1.0 / float (i+ 1 );
776+ color += (accumulation- color)* rcpSampleSize;
789777
790778 #ifdef VISUALIZE_HIGH_VARIANCE
791- float variance = getLuma(color);
792- variance *= variance;
793- variance = meanLumaSquared- variance;
794- if (variance> 5.0 )
795- color = vec3 (1.0 ,0.0 ,0.0 );
779+ float luma = getLuma(accumulation);
780+ meanLumaSquared += (luma* luma- meanLumaSquared)* rcpSampleSize;
796781 #endif
797-
798- vec4 pixelCol = vec4 (color, 1.0 );
799- imageStore(outImage, coords, pixelCol);
800782 }
783+
784+ #ifdef VISUALIZE_HIGH_VARIANCE
785+ float variance = getLuma(color);
786+ variance *= variance;
787+ variance = meanLumaSquared- variance;
788+ if (variance> 5.0 )
789+ color = vec3 (1.0 ,0.0 ,0.0 );
790+ #endif
791+
792+ vec4 pixelCol = vec4 (color, 1.0 );
793+ imageStore(outImage, coords, pixelCol);
801794}
802795/* * TODO: Improving Rendering
803796
0 commit comments