Skip to content

Commit 6277569

Browse files
committed
Support for Spirv Quad Ops in Compute Shader Debugging
Use a Linear 4x1x1 layout for the quads CS derivatives in quad scope are defined to be 0.0 if ComputeDerivativeMode is None i.e. the execution mode is not DerivativeGroupQuadsKHR or DerivativeGroupLinearKHR
1 parent d514df9 commit 6277569

File tree

4 files changed

+59
-26
lines changed

4 files changed

+59
-26
lines changed

renderdoc/driver/shaders/spirv/spirv_debug.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,9 @@ static ShaderVariable MakeIdentity(const rdcspv::DataType &type, float val, bool
174174

175175
namespace rdcspv
176176
{
177-
ThreadState::ThreadState(Debugger &debug, const GlobalState &globalState, ShaderStage stage)
178-
: debugger(debug), global(globalState)
177+
ThreadState::ThreadState(Debugger &debug, const GlobalState &globalState, ShaderStage stage,
178+
ShaderFeatures shaderFeatures)
179+
: debugger(debug), global(globalState), features(shaderFeatures)
179180
{
180181
// Default to Coarse, choose Fine for compute shaders
181182
defaultDeriveType = DerivType::Coarse;
@@ -680,6 +681,14 @@ ShaderVariable ThreadState::CalcDeriv(ThreadState::DerivDir dir, ThreadState::De
680681
debugger.GetHumanName(val).c_str()));
681682
return ShaderVariable("", 0.0f, 0.0f, 0.0f, 0.0f);
682683
}
684+
if(!(features & ShaderFeatures::Derivatives))
685+
{
686+
debugger.AddDebugMessage(
687+
MessageCategory::Execution, MessageSeverity::High, MessageSource::RuntimeWarning,
688+
StringFormat::Fmt("Derivative calculation within shader without support for derivatives %s",
689+
debugger.GetHumanName(val).c_str()));
690+
return ShaderVariable("", 0.0f, 0.0f, 0.0f, 0.0f);
691+
}
683692

684693
RDCASSERT(quadNeighbours[0] < workgroup.size(), quadNeighbours[0], workgroup.size());
685694
RDCASSERT(quadNeighbours[1] < workgroup.size(), quadNeighbours[1], workgroup.size());

renderdoc/driver/shaders/spirv/spirv_debug.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,20 @@ struct GpuSampleGatherOperation
244244
ShaderVariable *result = NULL;
245245
};
246246

247+
enum class ShaderFeatures : uint32_t
248+
{
249+
None = 0,
250+
Derivatives = 1 << 0,
251+
};
252+
253+
BITMASK_OPERATORS(ShaderFeatures);
254+
247255
class Debugger;
248256

249257
struct ThreadState
250258
{
251-
ThreadState(Debugger &debug, const GlobalState &globalState, ShaderStage stage);
259+
ThreadState(Debugger &debug, const GlobalState &globalState, ShaderStage stage,
260+
ShaderFeatures shaderFeatures);
252261
~ThreadState();
253262

254263
void EnterEntryPoint(bool useDebugState);
@@ -454,6 +463,7 @@ struct ThreadState
454463
AtomicStore(&atomic_pendingResultStatus, (int32_t)status);
455464
}
456465

466+
ShaderFeatures features;
457467
DerivType defaultDeriveType;
458468
ShaderDebugState pendingDebugState;
459469
bool hasDebugState = false;

renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,10 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
10521052
subgroupSize = threadsInSubgroup;
10531053
stage = shaderStage;
10541054
apiWrapper = api;
1055+
ShaderFeatures shaderFeatures = ShaderFeatures::None;
1056+
if((stage == ShaderStage::Fragment) ||
1057+
((stage == ShaderStage::Compute) && patchData.derivativeMode != ComputeDerivativeMode::None))
1058+
shaderFeatures |= ShaderFeatures::Derivatives;
10551059

10561060
queuedDeviceThreadSteps.resize(threadsInWorkgroup);
10571061
queuedGpuMathOps.resize(threadsInWorkgroup);
@@ -1060,7 +1064,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
10601064
queuedJobs.resize(threadsInWorkgroup);
10611065
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
10621066
{
1063-
workgroup.push_back(ThreadState(*this, global, stage));
1067+
workgroup.push_back(ThreadState(*this, global, stage, shaderFeatures));
10641068
queuedDeviceThreadSteps[i] = false;
10651069
queuedGpuMathOps[i] = false;
10661070
queuedGpuSampleGatherOps[i] = false;

renderdoc/driver/vulkan/vk_shaderdebug.cpp

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6490,14 +6490,15 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
64906490

64916491
uint32_t numThreads = 1;
64926492

6493+
bool hasQuadScope = (shadRefl.patchData.threadScope & rdcspv::ThreadScope::Quad) ? true : false;
64936494
bool hasQuadDerivatives =
64946495
(shadRefl.patchData.derivativeMode != rdcspv::ComputeDerivativeMode::None);
64956496
bool hasSubgroupScoope =
64966497
(shadRefl.patchData.threadScope & rdcspv::ThreadScope::Subgroup) ? true : false;
64976498
bool hasWorkgroupScope =
64986499
(shadRefl.patchData.threadScope & rdcspv::ThreadScope::Workgroup) ? true : false;
64996500

6500-
if(hasQuadDerivatives)
6501+
if(hasQuadDerivatives || hasQuadScope)
65016502
numThreads = RDCMAX(numThreads, 4U);
65026503
if(hasSubgroupScoope)
65036504
numThreads = RDCMAX(numThreads, maxSubgroupSize);
@@ -6535,7 +6536,19 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
65356536
quadH = quadHeights[quadDerivMode];
65366537
countQuadX = threadDim[0] / quadW;
65376538
countQuadY = threadDim[1] / quadH;
6539+
hasQuadScope = true;
6540+
}
6541+
else if(hasQuadScope)
6542+
{
6543+
// Choose linear layout
6544+
quadW = 4;
6545+
quadH = 1;
6546+
countQuadX = threadDim[0] / quadW;
6547+
countQuadY = threadDim[1] / quadH;
6548+
}
65386549

6550+
if(hasQuadScope)
6551+
{
65396552
RDCASSERTEQUAL(threadDim[0], countQuadX * quadW);
65406553
RDCASSERTEQUAL(threadDim[1], countQuadY * quadH);
65416554
}
@@ -6695,7 +6708,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
66956708
RDCASSERTNOTEQUAL(subgroupSize, 0);
66966709
numThreads = RDCMAX(numThreads, subgroupSize);
66976710

6698-
if(hasQuadDerivatives)
6711+
if(hasQuadScope)
66996712
RDCASSERT(numThreads >= 4);
67006713

67016714
if(hasWorkgroupScope)
@@ -6724,7 +6737,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
67246737

67256738
uint32_t quadId = ~0U;
67266739
uint32_t quadLaneIndex = ~0U;
6727-
if(hasQuadDerivatives)
6740+
if(hasQuadScope)
67286741
{
67296742
uint32_t quadX = (compData->threadid[0] / quadW);
67306743
uint32_t quadY = (compData->threadid[1] / quadH);
@@ -6739,9 +6752,9 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
67396752

67406753
if(hasWorkgroupScope)
67416754
{
6742-
// When quad derivatives are enabled, use the quad derivative layout
6743-
if(hasQuadDerivatives)
6755+
if(hasQuadScope)
67446756
{
6757+
// quad scope, derive the lane from the quad layout
67456758
lane = quadId * 4 + quadLaneIndex;
67466759
}
67476760
else
@@ -6795,9 +6808,9 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
67956808
uint32_t quadLaneIndex = ~0U;
67966809

67976810
uint32_t lane = ~0U;
6798-
if(hasQuadDerivatives)
6811+
if(hasQuadScope)
67996812
{
6800-
// When quad derivatives are enabled, use the quad derivative layout
6813+
// quad scope, derive the lane from the quad layout
68016814
uint32_t quadX = (tx / quadW);
68026815
uint32_t quadY = (ty / quadH);
68036816
uint32_t quadZ = tz;
@@ -6833,7 +6846,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
68336846
RDCASSERTEQUAL(thread_builtins[ShaderBuiltin::SubgroupIndexInWorkgroup].value.u32v[0],
68346847
lane / subgroupSize);
68356848

6836-
if(hasQuadDerivatives)
6849+
if(hasQuadScope)
68376850
{
68386851
RDCASSERTEQUAL(
68396852
apiWrapper->thread_props[lane][(size_t)rdcspv::ThreadProperty::QuadLane],
@@ -6856,7 +6869,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
68566869
apiWrapper->thread_props[lane][(size_t)rdcspv::ThreadProperty::SubgroupId] =
68576870
lane % subgroupSize;
68586871

6859-
if(hasQuadDerivatives)
6872+
if(hasQuadScope)
68606873
{
68616874
apiWrapper->thread_props[lane][(size_t)rdcspv::ThreadProperty::QuadLane] =
68626875
quadLaneIndex;
@@ -6931,7 +6944,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
69316944
rdcstr(), tz * threadDim[0] * threadDim[1] + ty * threadDim[0] + tx, 0U, 0U, 0U);
69326945
apiWrapper->thread_props[i][(size_t)rdcspv::ThreadProperty::Active] = 1;
69336946

6934-
if(hasQuadDerivatives)
6947+
if(hasQuadScope)
69356948
{
69366949
uint32_t quadX = (tx / quadW);
69376950
uint32_t quadY = (ty / quadH);
@@ -6954,7 +6967,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
69546967
}
69556968
}
69566969
}
6957-
else if(hasQuadDerivatives)
6970+
else if(hasQuadScope)
69586971
{
69596972
// need to simulate the whole quad, do not readback from the GPU like we do with subgroups
69606973
// the quad is guaranteed to be in the same subgroup
@@ -6978,17 +6991,14 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
69786991
rdcstr(), tz * threadDim[0] * threadDim[1] + ty * threadDim[0] + tx, 0U, 0U, 0U);
69796992
apiWrapper->thread_props[i][(size_t)rdcspv::ThreadProperty::Active] = 1;
69806993

6981-
if(hasQuadDerivatives)
6982-
{
6983-
uint32_t quadX = (tx / quadW);
6984-
uint32_t quadY = (ty / quadH);
6985-
uint32_t quadId =
6986-
quadIdOffset + quadX + (quadY * countQuadX) + (quadZ * countQuadY * countQuadX);
6987-
uint32_t quadLaneIndex = (tx % quadW) + (ty % quadH) * 2;
6988-
6989-
apiWrapper->thread_props[i][(size_t)rdcspv::ThreadProperty::QuadLane] = quadLaneIndex;
6990-
apiWrapper->thread_props[i][(size_t)rdcspv::ThreadProperty::QuadId] = quadId;
6991-
}
6994+
uint32_t quadX = (tx / quadW);
6995+
uint32_t quadY = (ty / quadH);
6996+
uint32_t quadId =
6997+
quadIdOffset + quadX + (quadY * countQuadX) + (quadZ * countQuadY * countQuadX);
6998+
uint32_t quadLaneIndex = (tx % quadW) + (ty % quadH) * 2;
6999+
7000+
apiWrapper->thread_props[i][(size_t)rdcspv::ThreadProperty::QuadLane] = quadLaneIndex;
7001+
apiWrapper->thread_props[i][(size_t)rdcspv::ThreadProperty::QuadId] = quadId;
69927002

69937003
if(rdcfixedarray<uint32_t, 3>({tx, ty, tz}) == threadid)
69947004
laneIndex = i;

0 commit comments

Comments
 (0)