@@ -323,7 +323,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
323323 m_presentDescriptorSet = presentDSPool->createDescriptorSet (gpuPresentDescriptorSetLayout);
324324
325325 // Create Shaders
326- auto loadAndCompileGLSLShader = [&](const std::string& pathToShader) -> smart_refctd_ptr<IGPUShader>
326+ auto loadAndCompileGLSLShader = [&](const std::string& pathToShader, bool persistentWorkGroups = false ) -> smart_refctd_ptr<IGPUShader>
327327 {
328328 IAssetLoader::SAssetLoadParams lp = {};
329329 lp.workingDirectory = localInputCWD;
@@ -339,6 +339,27 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
339339 // The down-cast should not fail!
340340 assert (source);
341341
342+ auto compiler = make_smart_refctd_ptr<asset::CGLSLCompiler>(smart_refctd_ptr (m_system));
343+ CGLSLCompiler::SOptions options = {};
344+ options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; // should be compute
345+ options.targetSpirvVersion = m_device->getPhysicalDevice ()->getLimits ().spirvVersion ;
346+ options.spirvOptimizer = nullptr ;
347+ #ifndef _NBL_DEBUG
348+ ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO;
349+ auto opt = make_smart_refctd_ptr<ISPIRVOptimizer>(std::span<ISPIRVOptimizer::E_OPTIMIZER_PASS>(&optPasses, 1 ));
350+ options.spirvOptimizer = opt.get ();
351+ #endif
352+ options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT;
353+ options.preprocessorOptions .sourceIdentifier = source->getFilepathHint ();
354+ options.preprocessorOptions .logger = m_logger.get ();
355+ options.preprocessorOptions .includeFinder = compiler->getDefaultIncludeFinder ();
356+
357+ const IShaderCompiler::SMacroDefinition persistentDefine = { " PERSISTENT_WORKGROUPS" , " 1" };
358+ if (persistentWorkGroups)
359+ options.preprocessorOptions .extraDefines = { &persistentDefine, &persistentDefine + 1 };
360+
361+ source = compiler->compileToSPIRV ((const char *)source->getContent ()->getPointer (), options);
362+
342363 // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple
343364 auto shader = m_device->createShader (source.get ());
344365 if (!shader)
@@ -350,7 +371,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
350371 return shader;
351372 };
352373
353- auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro) -> smart_refctd_ptr<IGPUShader>
374+ auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = " " , bool persistentWorkGroups = false ) -> smart_refctd_ptr<IGPUShader>
354375 {
355376 IAssetLoader::SAssetLoadParams lp = {};
356377 lp.workingDirectory = localInputCWD;
@@ -368,7 +389,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
368389
369390 auto compiler = make_smart_refctd_ptr<asset::CHLSLCompiler>(smart_refctd_ptr (m_system));
370391 CHLSLCompiler::SOptions options = {};
371- options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; // should be compute
392+ options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE;
372393 options.targetSpirvVersion = m_device->getPhysicalDevice ()->getLimits ().spirvVersion ;
373394 options.spirvOptimizer = nullptr ;
374395#ifndef _NBL_DEBUG
@@ -381,8 +402,11 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
381402 options.preprocessorOptions .logger = m_logger.get ();
382403 options.preprocessorOptions .includeFinder = compiler->getDefaultIncludeFinder ();
383404
384- const IShaderCompiler::SMacroDefinition variantDefine = { defineMacro, " " };
385- options.preprocessorOptions .extraDefines = { &variantDefine, &variantDefine + 1 };
405+ const IShaderCompiler::SMacroDefinition defines[2 ] = { {defineMacro, " " }, { " PERSISTENT_WORKGROUPS" , " 1" } };
406+ if (!defineMacro.empty () && persistentWorkGroups)
407+ options.preprocessorOptions .extraDefines = { defines, defines + 2 };
408+ else if (!defineMacro.empty () && !persistentWorkGroups)
409+ options.preprocessorOptions .extraDefines = { defines, defines + 1 };
386410
387411 source = compiler->compileToSPIRV ((const char *)source->getContent ()->getPointer (), options);
388412
@@ -441,6 +465,34 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
441465 if (!m_device->createComputePipelines (nullptr , { ¶ms, 1 }, m_PTHLSLPipelines.data () + index))
442466 return logFail (" Failed to create HLSL compute pipeline!\n " );
443467 }
468+
469+ // persistent wg pipelines
470+ {
471+ auto ptShader = loadAndCompileGLSLShader (PTGLSLShaderPaths[index], true );
472+
473+ IGPUComputePipeline::SCreationParams params = {};
474+ params.layout = ptPipelineLayout.get ();
475+ params.shader .shader = ptShader.get ();
476+ params.shader .entryPoint = " main" ;
477+ params.shader .entries = nullptr ;
478+ params.shader .requireFullSubgroups = true ;
479+ params.shader .requiredSubgroupSize = static_cast <IGPUShader::SSpecInfo::SUBGROUP_SIZE>(5 );
480+ if (!m_device->createComputePipelines (nullptr , { ¶ms, 1 }, m_PTGLSLPersistentWGPipelines.data () + index))
481+ return logFail (" Failed to create GLSL PersistentWG compute pipeline!\n " );
482+ }
483+ {
484+ auto ptShader = loadAndCompileHLSLShader (PTHLSLShaderPath, PTHLSLShaderVariants[index], true );
485+
486+ IGPUComputePipeline::SCreationParams params = {};
487+ params.layout = ptPipelineLayout.get ();
488+ params.shader .shader = ptShader.get ();
489+ params.shader .entryPoint = " main" ;
490+ params.shader .entries = nullptr ;
491+ params.shader .requireFullSubgroups = true ;
492+ params.shader .requiredSubgroupSize = static_cast <IGPUShader::SSpecInfo::SUBGROUP_SIZE>(5 );
493+ if (!m_device->createComputePipelines (nullptr , { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data () + index))
494+ return logFail (" Failed to create HLSL PersistentWG compute pipeline!\n " );
495+ }
444496 }
445497 }
446498
@@ -452,7 +504,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
452504 return logFail (" Failed to create Full Screen Triangle protopipeline or load its vertex shader!" );
453505
454506 // Load Fragment Shader
455- auto fragmentShader = loadAndCompileGLSLShader (PresentShaderPath);
507+ auto fragmentShader = loadAndCompileHLSLShader (PresentShaderPath);
456508 if (!fragmentShader)
457509 return logFail (" Failed to Load and Compile Fragment Shader: lumaMeterShader!" );
458510
@@ -944,6 +996,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
944996 ImGui::Combo (" Render Mode" , &renderMode, shaderTypes, E_RENDER_MODE::ERM_COUNT);
945997 ImGui::SliderInt (" SPP" , &spp, 1 , MaxBufferSamples);
946998 ImGui::SliderInt (" Depth" , &depth, 1 , MaxBufferDimensions / 3 );
999+ ImGui::Checkbox (" Persistent WorkGroups" , &usePersistentWorkGroups);
9471000
9481001 ImGui::Text (" X: %f Y: %f" , io.MousePos .x , io.MousePos .y );
9491002
@@ -1069,12 +1122,22 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
10691122
10701123 // cube envmap handle
10711124 {
1072- auto pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get () : m_PTGLSLPipelines[PTPipeline].get ();
1125+ IGPUComputePipeline* pipeline;
1126+ if (usePersistentWorkGroups)
1127+ pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get () : m_PTGLSLPersistentWGPipelines[PTPipeline].get ();
1128+ else
1129+ pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get () : m_PTGLSLPipelines[PTPipeline].get ();
10731130 cmdbuf->bindComputePipeline (pipeline);
10741131 cmdbuf->bindDescriptorSets (EPBP_COMPUTE, pipeline->getLayout (), 0u , 1u , &m_descriptorSet0.get ());
10751132 cmdbuf->bindDescriptorSets (EPBP_COMPUTE, pipeline->getLayout (), 2u , 1u , &m_descriptorSet2.get ());
10761133 cmdbuf->pushConstants (pipeline->getLayout (), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0 , sizeof (PTPushConstant), &pc);
1077- cmdbuf->dispatch (1 + (WindowDimensions.x * WindowDimensions.y - 1 ) / DefaultWorkGroupSize, 1u , 1u );
1134+ if (usePersistentWorkGroups)
1135+ {
1136+ uint32_t dispatchSize = m_physicalDevice->getLimits ().computeOptimalPersistentWorkgroupDispatchSize (WindowDimensions.x * WindowDimensions.y , DefaultWorkGroupSize);
1137+ cmdbuf->dispatch (dispatchSize, 1u , 1u );
1138+ }
1139+ else
1140+ cmdbuf->dispatch (1 + (WindowDimensions.x * WindowDimensions.y - 1 ) / DefaultWorkGroupSize, 1u , 1u );
10781141 }
10791142
10801143 // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image)
@@ -1306,6 +1369,8 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
13061369 smart_refctd_ptr<IGPUCommandPool> m_cmdPool;
13071370 std::array<smart_refctd_ptr<IGPUComputePipeline>, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPipelines;
13081371 std::array<smart_refctd_ptr<IGPUComputePipeline>, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines;
1372+ std::array<smart_refctd_ptr<IGPUComputePipeline>, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines;
1373+ std::array<smart_refctd_ptr<IGPUComputePipeline>, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines;
13091374 smart_refctd_ptr<IGPUGraphicsPipeline> m_presentPipeline;
13101375 uint64_t m_realFrameIx = 0 ;
13111376 std::array<smart_refctd_ptr<IGPUCommandBuffer>, MaxFramesInFlight> m_cmdBufs;
@@ -1357,6 +1422,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication,
13571422 int renderMode = E_RENDER_MODE::ERM_HLSL;
13581423 int spp = 32 ;
13591424 int depth = 3 ;
1425+ bool usePersistentWorkGroups = false ;
13601426
13611427 bool m_firstFrame = true ;
13621428 IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0 .f ,0 .f ,0 .f ,1 .f } };
0 commit comments