@@ -14,13 +14,13 @@ namespace property_pools
1414template<bool Fill, bool SrcIndexIota, bool DstIndexIota, uint64_t SrcIndexSizeLog2, uint64_t DstIndexSizeLog2>
1515struct TransferLoop
1616{
17- void iteration (uint propertyId, TransferRequest transferRequest, uint invocationIndex)
17+ void iteration (uint propertyId, TransferRequest transferRequest, uint64_t invocationIndex)
1818 {
1919 const uint64_t srcIndexSize = uint64_t (1 ) << SrcIndexSizeLog2;
2020 const uint64_t dstIndexSize = uint64_t (1 ) << DstIndexSizeLog2;
2121
22- const uint64_t srcOffset = uint64_t ( invocationIndex) * srcIndexSize * transferRequest.propertySize;
23- const uint64_t dstOffset = uint64_t ( invocationIndex) * dstIndexSize * transferRequest.propertySize;
22+ const uint64_t srcOffset = invocationIndex * srcIndexSize * transferRequest.propertySize;
23+ const uint64_t dstOffset = invocationIndex * dstIndexSize * transferRequest.propertySize;
2424
2525 const uint64_t srcIndexAddress = Fill ? transferRequest.srcIndexAddr + srcOffset : transferRequest.srcIndexAddr;
2626 const uint64_t dstIndexAddress = Fill ? transferRequest.dstIndexAddr + dstOffset : transferRequest.dstIndexAddr;
@@ -112,26 +112,28 @@ void main(uint32_t3 dispatchId)
112112 // Loading transfer request from the pointer (can't use struct
113113 // with BDA on HLSL SPIRV)
114114 TransferRequest transferRequest;
115- transferRequest.srcAddr = vk::RawBufferLoad<uint64_t >(globals.transferCommandsAddress);
115+ transferRequest.srcAddr = vk::RawBufferLoad<uint >(globals.transferCommandsAddress) | vk::RawBufferLoad< uint >(globals.transferCommandsAddress + sizeof ( uint )) << 32 ;
116116 transferRequest.dstAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof (uint64_t));
117117 transferRequest.srcIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof (uint64_t) * 2 );
118118 transferRequest.dstIndexAddr = vk::RawBufferLoad<uint64_t>(globals.transferCommandsAddress + sizeof (uint64_t) * 3 );
119119 // Remaining elements are part of the same bitfield
120120 // TODO: Do this only using raw buffer load?
121- uint2 bitfieldType = vk::RawBufferLoad<uint2 >(globals.transferCommandsAddress + sizeof (uint64_t) * 4 );
122- transferRequest.elementCount32 = bitfieldType;
123- transferRequest.elementCountExtra = bitfieldType;
124- transferRequest.propertySize = bitfieldType >> 3 ;
125- transferRequest.fill = bitfieldType >> (3 + 24 );
126- transferRequest.srcIndexSizeLog2 = bitfieldType >> (3 + 24 + 1 );
127- transferRequest.dstIndexSizeLog2 = bitfieldType >> (3 + 24 + 1 + 2 );
121+ uint64_t bitfieldType = vk::RawBufferLoad<uint64_t >(globals.transferCommandsAddress + sizeof (uint64_t) * 4 );
122+ transferRequest.elementCount32 = uint32_t ( bitfieldType) ;
123+ transferRequest.elementCountExtra = uint32_t ( bitfieldType) ;
124+ transferRequest.propertySize = uint32_t ( bitfieldType >> 3 ) ;
125+ transferRequest.fill = uint32_t ( bitfieldType >> (3 + 24 ) );
126+ transferRequest.srcIndexSizeLog2 = uint32_t ( bitfieldType >> (3 + 24 + 1 ) );
127+ transferRequest.dstIndexSizeLog2 = uint32_t ( bitfieldType >> (3 + 24 + 1 + 2 ) );
128128
129129 const uint dispatchSize = nbl::hlsl::device_capabilities_traits<device_capabilities>::maxOptimallyResidentWorkgroupInvocations;
130130 const bool fill = transferRequest.fill == 1 ;
131131
132- vk::RawBufferStore<uint32_t>(transferRequest.dstAddr, 69 );
133- // if (fill) { TransferLoopPermutationFill<true> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
134- // else { TransferLoopPermutationFill<false> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
132+ vk::RawBufferStore<uint64_t>(globals.transferCommandsAddress + 40 * 3 , transferRequest.srcAddr);
133+ vk::RawBufferStore<uint64_t>(globals.transferCommandsAddress + 40 * 4 , transferRequest.dstAddr);
134+ vk::RawBufferStore<uint >(globals.transferCommandsAddress + 40 * 5 , vk::RawBufferLoad<uint >(transferRequest.srcAddr + sizeof (uint16_t) * 3 ));
135+ //if (fill) { TransferLoopPermutationFill<true> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
136+ //else { TransferLoopPermutationFill<false> loop; loop.copyLoop(invocationIndex, propertyId, transferRequest, dispatchSize); }
135137}
136138
137139}
0 commit comments