@@ -6490,14 +6490,15 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
64906490
64916491 uint32_t numThreads = 1 ;
64926492
6493+ bool hasQuadScope = (shadRefl.patchData .threadScope & rdcspv::ThreadScope::Quad) ? true : false ;
64936494 bool hasQuadDerivatives =
64946495 (shadRefl.patchData .derivativeMode != rdcspv::ComputeDerivativeMode::None);
64956496 bool hasSubgroupScoope =
64966497 (shadRefl.patchData .threadScope & rdcspv::ThreadScope::Subgroup) ? true : false ;
64976498 bool hasWorkgroupScope =
64986499 (shadRefl.patchData .threadScope & rdcspv::ThreadScope::Workgroup) ? true : false ;
64996500
6500- if (hasQuadDerivatives)
6501+ if (hasQuadDerivatives || hasQuadScope )
65016502 numThreads = RDCMAX (numThreads, 4U );
65026503 if (hasSubgroupScoope)
65036504 numThreads = RDCMAX (numThreads, maxSubgroupSize);
@@ -6535,7 +6536,19 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
65356536 quadH = quadHeights[quadDerivMode];
65366537 countQuadX = threadDim[0 ] / quadW;
65376538 countQuadY = threadDim[1 ] / quadH;
6539+ hasQuadScope = true ;
6540+ }
6541+ else if (hasQuadScope)
6542+ {
6543+ // Choose linear layout
6544+ quadW = 4 ;
6545+ quadH = 1 ;
6546+ countQuadX = threadDim[0 ] / quadW;
6547+ countQuadY = threadDim[1 ] / quadH;
6548+ }
65386549
6550+ if (hasQuadScope)
6551+ {
65396552 RDCASSERTEQUAL (threadDim[0 ], countQuadX * quadW);
65406553 RDCASSERTEQUAL (threadDim[1 ], countQuadY * quadH);
65416554 }
@@ -6695,7 +6708,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
66956708 RDCASSERTNOTEQUAL (subgroupSize, 0 );
66966709 numThreads = RDCMAX (numThreads, subgroupSize);
66976710
6698- if (hasQuadDerivatives )
6711+ if (hasQuadScope )
66996712 RDCASSERT (numThreads >= 4 );
67006713
67016714 if (hasWorkgroupScope)
@@ -6724,7 +6737,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
67246737
67256738 uint32_t quadId = ~0U ;
67266739 uint32_t quadLaneIndex = ~0U ;
6727- if (hasQuadDerivatives )
6740+ if (hasQuadScope )
67286741 {
67296742 uint32_t quadX = (compData->threadid [0 ] / quadW);
67306743 uint32_t quadY = (compData->threadid [1 ] / quadH);
@@ -6739,9 +6752,9 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
67396752
67406753 if (hasWorkgroupScope)
67416754 {
6742- // When quad derivatives are enabled, use the quad derivative layout
6743- if (hasQuadDerivatives)
6755+ if (hasQuadScope)
67446756 {
6757+ // quad scope, derive the lane from the quad layout
67456758 lane = quadId * 4 + quadLaneIndex;
67466759 }
67476760 else
@@ -6795,9 +6808,9 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
67956808 uint32_t quadLaneIndex = ~0U ;
67966809
67976810 uint32_t lane = ~0U ;
6798- if (hasQuadDerivatives )
6811+ if (hasQuadScope )
67996812 {
6800- // When quad derivatives are enabled, use the quad derivative layout
6813+ // quad scope, derive the lane from the quad layout
68016814 uint32_t quadX = (tx / quadW);
68026815 uint32_t quadY = (ty / quadH);
68036816 uint32_t quadZ = tz;
@@ -6833,7 +6846,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
68336846 RDCASSERTEQUAL (thread_builtins[ShaderBuiltin::SubgroupIndexInWorkgroup].value .u32v [0 ],
68346847 lane / subgroupSize);
68356848
6836- if (hasQuadDerivatives )
6849+ if (hasQuadScope )
68376850 {
68386851 RDCASSERTEQUAL (
68396852 apiWrapper->thread_props [lane][(size_t )rdcspv::ThreadProperty::QuadLane],
@@ -6856,7 +6869,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
68566869 apiWrapper->thread_props [lane][(size_t )rdcspv::ThreadProperty::SubgroupId] =
68576870 lane % subgroupSize;
68586871
6859- if (hasQuadDerivatives )
6872+ if (hasQuadScope )
68606873 {
68616874 apiWrapper->thread_props [lane][(size_t )rdcspv::ThreadProperty::QuadLane] =
68626875 quadLaneIndex;
@@ -6931,7 +6944,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
69316944 rdcstr (), tz * threadDim[0 ] * threadDim[1 ] + ty * threadDim[0 ] + tx, 0U , 0U , 0U );
69326945 apiWrapper->thread_props [i][(size_t )rdcspv::ThreadProperty::Active] = 1 ;
69336946
6934- if (hasQuadDerivatives )
6947+ if (hasQuadScope )
69356948 {
69366949 uint32_t quadX = (tx / quadW);
69376950 uint32_t quadY = (ty / quadH);
@@ -6954,7 +6967,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
69546967 }
69556968 }
69566969 }
6957- else if (hasQuadDerivatives )
6970+ else if (hasQuadScope )
69586971 {
69596972 // need to simulate the whole quad, do not readback from the GPU like we do with subgroups
69606973 // the quad is guaranteed to be in the same subgroup
@@ -6978,17 +6991,14 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
69786991 rdcstr (), tz * threadDim[0 ] * threadDim[1 ] + ty * threadDim[0 ] + tx, 0U , 0U , 0U );
69796992 apiWrapper->thread_props [i][(size_t )rdcspv::ThreadProperty::Active] = 1 ;
69806993
6981- if (hasQuadDerivatives)
6982- {
6983- uint32_t quadX = (tx / quadW);
6984- uint32_t quadY = (ty / quadH);
6985- uint32_t quadId =
6986- quadIdOffset + quadX + (quadY * countQuadX) + (quadZ * countQuadY * countQuadX);
6987- uint32_t quadLaneIndex = (tx % quadW) + (ty % quadH) * 2 ;
6988-
6989- apiWrapper->thread_props [i][(size_t )rdcspv::ThreadProperty::QuadLane] = quadLaneIndex;
6990- apiWrapper->thread_props [i][(size_t )rdcspv::ThreadProperty::QuadId] = quadId;
6991- }
6994+ uint32_t quadX = (tx / quadW);
6995+ uint32_t quadY = (ty / quadH);
6996+ uint32_t quadId =
6997+ quadIdOffset + quadX + (quadY * countQuadX) + (quadZ * countQuadY * countQuadX);
6998+ uint32_t quadLaneIndex = (tx % quadW) + (ty % quadH) * 2 ;
6999+
7000+ apiWrapper->thread_props [i][(size_t )rdcspv::ThreadProperty::QuadLane] = quadLaneIndex;
7001+ apiWrapper->thread_props [i][(size_t )rdcspv::ThreadProperty::QuadId] = quadId;
69927002
69937003 if (rdcfixedarray<uint32_t , 3 >({tx, ty, tz}) == threadid)
69947004 laneIndex = i;
0 commit comments