@@ -67,7 +67,7 @@ IGPUQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
6767 return intendedNextSubmit;
6868 }
6969
70- ImageRegionIterator regionIterator = ImageRegionIterator (regions, queueFamProps, srcBuffer, srcFormat, dstImage, 64u /* limits.optimalBufferCopyRowPitchAlignment*/ );
70+ ImageRegionIterator regionIterator = ImageRegionIterator (regions, queueFamProps, srcBuffer, srcFormat, dstImage, limits.optimalBufferCopyRowPitchAlignment );
7171
7272 // Assuming each thread can handle minImageTranferGranularitySize of texelBlocks:
7373 const uint32_t maxResidentImageTransferSize = limits.maxResidentInvocations * texelBlockInfo.getBlockByteSize () * (minImageTransferGranularity.width * minImageTransferGranularity.height * minImageTransferGranularity.depth );
@@ -86,10 +86,9 @@ IGPUQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
8686 uint32_t memoryLowerBound = maxResidentImageTransferSize;
8787 {
8888 const asset::IImage::SBufferCopy & region = regions[regionIterator.getCurrentRegion ()];
89- auto imageExtent = core::vector3du32_SIMD (region.imageExtent .width , region.imageExtent .height , region.imageExtent .depth );
90- auto imageExtentInBlocks = texelBlockInfo.convertTexelsToBlocks (imageExtent);
91- auto imageExtentBlockStridesInBytes = texelBlockInfo.convert3DBlockStridesTo1DByteStrides (imageExtentInBlocks);
92- memoryLowerBound = core::max (memoryLowerBound, imageExtentBlockStridesInBytes[1 ]); // rowPitch = imageExtentBlockStridesInBytes[1]
89+ const auto copyTexelStrides = regionIterator.getOptimalCopyTexelStrides (region.imageExtent );
90+ const auto byteStrides = texelBlockInfo.convert3DTexelStridesTo1DByteStrides (copyTexelStrides);
91+ memoryLowerBound = core::max (memoryLowerBound, byteStrides[1 ]); // max of memoryLowerBound and copy rowPitch
9392 }
9493
9594 uint32_t localOffset = video::StreamingTransientDataBufferMT<>::invalid_value;
@@ -218,6 +217,7 @@ ImageRegionIterator::ImageRegionIterator(
218217 , currentSliceInLayer(0u )
219218 , currentLayerInRegion(0u )
220219 , currentRegion(0u )
220+ , optimalRowPitchAlignment(optimalRowPitchAlignment)
221221{
222222 dstImageFormat = dstImage->getCreationParameters ().format ;
223223 if (srcImageFormat == asset::EF_UNKNOWN)
@@ -328,15 +328,12 @@ size_t ImageRegionIterator::getMemoryNeededForRemainingRegions() const
328328 {
329329 const asset::IImage::SBufferCopy & region = regions[i];
330330
331- // auto optimalRegion = region;
332- // optimalRegion.bufferRowLength = core::alignUp(optimalRegion.bufferRowLength, optimalRowPitchAlignment);
333- auto imageExtent = core::vector3du32_SIMD (region.imageExtent .width , region.imageExtent .height , region.imageExtent .depth );
334- auto imageExtentInBlocks = dstImageTexelBlockInfo.convertTexelsToBlocks (imageExtent);
335-
336- // TODO: This needs to change with optimal rowpitch
337- auto imageExtentBlockStridesInBytes = dstImageTexelBlockInfo.convert3DBlockStridesTo1DByteStrides (imageExtentInBlocks);
331+ auto imageExtentInBlocks = dstImageTexelBlockInfo.convertTexelsToBlocks (core::vector3du32_SIMD (region.imageExtent .width , region.imageExtent .height , region.imageExtent .depth ));
332+
333+ const auto copyTexelStrides = getOptimalCopyTexelStrides (region.imageExtent );
334+ const core::vector4du32_SIMD copyByteStrides = dstImageTexelBlockInfo.convert3DTexelStridesTo1DByteStrides (copyTexelStrides);
338335
339- if (i == currentRegion)
336+ if (i == currentRegion)
340337 {
341338 auto remainingBlocksInRow = imageExtentInBlocks.x - currentBlockInRow;
342339 auto remainingRowsInSlice = imageExtentInBlocks.y - currentRowInSlice;
@@ -345,42 +342,42 @@ size_t ImageRegionIterator::getMemoryNeededForRemainingRegions() const
345342
346343 if (currentBlockInRow == 0 && currentRowInSlice == 0 && currentSliceInLayer == 0 && remainingLayersInRegion > 0 )
347344 {
348- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * remainingLayersInRegion);
345+ incrementMemoryNeeded (copyByteStrides [3 ] * remainingLayersInRegion);
349346 }
350347 else if (currentBlockInRow == 0 && currentRowInSlice == 0 && currentSliceInLayer > 0 )
351348 {
352- incrementMemoryNeeded (imageExtentBlockStridesInBytes [2 ] * remainingSlicesInLayer);
349+ incrementMemoryNeeded (copyByteStrides [2 ] * remainingSlicesInLayer);
353350 if (remainingLayersInRegion > 1u )
354- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * (remainingLayersInRegion - 1u ));
351+ incrementMemoryNeeded (copyByteStrides [3 ] * (remainingLayersInRegion - 1u ));
355352 }
356353 else if (currentBlockInRow == 0 && currentRowInSlice > 0 )
357354 {
358- incrementMemoryNeeded (imageExtentBlockStridesInBytes [1 ] * remainingRowsInSlice);
355+ incrementMemoryNeeded (copyByteStrides [1 ] * remainingRowsInSlice);
359356
360- if (remainingSlicesInLayer > 1u )
361- incrementMemoryNeeded (imageExtentBlockStridesInBytes [2 ] * (remainingSlicesInLayer - 1u ));
362- if (remainingLayersInRegion > 1u )
363- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * (remainingLayersInRegion - 1u ));
357+ if (remainingSlicesInLayer > 1u )
358+ incrementMemoryNeeded (copyByteStrides [2 ] * (remainingSlicesInLayer - 1u ));
359+ if (remainingLayersInRegion > 1u )
360+ incrementMemoryNeeded (copyByteStrides [3 ] * (remainingLayersInRegion - 1u ));
364361 }
365362 else if (currentBlockInRow > 0 )
366363 {
367364 // want to first fill the remaining blocks in current row
368- incrementMemoryNeeded (imageExtentBlockStridesInBytes [0 ] * remainingBlocksInRow);
365+ incrementMemoryNeeded (copyByteStrides [0 ] * remainingBlocksInRow);
369366 // then fill the remaining rows in current slice
370- if (remainingRowsInSlice > 1u )
371- incrementMemoryNeeded (imageExtentBlockStridesInBytes [1 ] * (remainingRowsInSlice - 1u ));
367+ if (remainingRowsInSlice > 1u )
368+ incrementMemoryNeeded (copyByteStrides [1 ] * (remainingRowsInSlice - 1u ));
372369 // then fill the remaining slices in current layer
373- if (remainingSlicesInLayer > 1u )
374- incrementMemoryNeeded (imageExtentBlockStridesInBytes [2 ] * (remainingSlicesInLayer - 1u ));
370+ if (remainingSlicesInLayer > 1u )
371+ incrementMemoryNeeded (copyByteStrides [2 ] * (remainingSlicesInLayer - 1u ));
375372 // then fill the remaining layers in current region
376- if (remainingLayersInRegion > 1u )
377- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * (remainingLayersInRegion - 1u ));
373+ if (remainingLayersInRegion > 1u )
374+ incrementMemoryNeeded (copyByteStrides [3 ] * (remainingLayersInRegion - 1u ));
378375 }
379376 }
380377 else
381378 {
382379 // we want to fill the whole layers in the region
383- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * region.imageSubresource .layerCount ); // = blockByteSize * imageExtentInBlocks.x * imageExtentInBlocks.y * imageExtentInBlocks.z * region.imageSubresource.layerCount
380+ incrementMemoryNeeded (copyByteStrides [3 ] * region.imageSubresource .layerCount ); // = blockByteSize * imageExtentInBlocks.x * imageExtentInBlocks.y * imageExtentInBlocks.z * region.imageSubresource.layerCount
384381 }
385382 }
386383 return memoryNeededForRemainingRegions;
@@ -495,11 +492,9 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
495492 }
496493
497494 const asset::TexelBlockInfo dstImageTexelBlockInfo (dstImageFormat);
498- const asset::TexelBlockInfo srcImageTexelBlockInfo (srcImageFormat);
499495
500496 // ! Current Region that may break down into smaller regions (the first smaller region is nextRegionToCopy)
501497 const asset::IImage::SBufferCopy & mainRegion = regions[currentRegion];
502- const core::vector4du32_SIMD srcBufferByteStrides = mainRegion.getByteStrides (srcImageTexelBlockInfo);
503498
504499 // ! We only need subresourceSize for validations and assertions about minImageTransferGranularity because granularity requirements can be ignored if region fits against the right corner of the subresource (described in more detail below)
505500 const auto subresourceSize = dstImage->getMipSize (mainRegion.imageSubresource .mipLevel );
@@ -511,9 +506,9 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
511506 const auto imageOffsetInBlocks = dstImageTexelBlockInfo.convertTexelsToBlocks (core::vector3du32_SIMD (mainRegion.imageOffset .x , mainRegion.imageOffset .y , mainRegion.imageOffset .z ));
512507 const auto imageExtentInBlocks = dstImageTexelBlockInfo.convertTexelsToBlocks (core::vector3du32_SIMD (mainRegion.imageExtent .width , mainRegion.imageExtent .height , mainRegion.imageExtent .depth ));
513508
514- // TODO: This needs to change with optimal rowpitch
515- const core::vector4du32_SIMD imageExtentBlockStridesInBytes = dstImageTexelBlockInfo.convert3DBlockStridesTo1DByteStrides (imageExtentInBlocks );
516-
509+ const auto copyTexelStrides = getOptimalCopyTexelStrides (mainRegion. imageExtent );
510+ const core::vector4du32_SIMD copyByteStrides = dstImageTexelBlockInfo.convert3DTexelStridesTo1DByteStrides (copyTexelStrides );
511+
517512 // region <-> region.imageSubresource.layerCount <-> imageExtentInBlocks.z <-> imageExtentInBlocks.y <-> imageExtentInBlocks.x
518513 auto updateCurrentOffsets = [&]() -> void
519514 {
@@ -542,10 +537,10 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
542537 }
543538 };
544539
545- uint32_t eachBlockNeededMemory = imageExtentBlockStridesInBytes [0 ]; // = blockByteSize
546- uint32_t eachRowNeededMemory = imageExtentBlockStridesInBytes [1 ]; // = blockByteSize * imageExtentInBlocks .x
547- uint32_t eachSliceNeededMemory = imageExtentBlockStridesInBytes [2 ]; // = blockByteSize * imageExtentInBlocks .x * imageExtentInBlocks .y
548- uint32_t eachLayerNeededMemory = imageExtentBlockStridesInBytes [3 ]; // = blockByteSize * imageExtentInBlocks .x * imageExtentInBlocks .y * imageExtentInBlocks .z
540+ uint32_t eachBlockNeededMemory = copyByteStrides [0 ]; // = blockByteSize
541+ uint32_t eachRowNeededMemory = copyByteStrides [1 ]; // = blockByteSize * copyBlockStrides .x
542+ uint32_t eachSliceNeededMemory = copyByteStrides [2 ]; // = blockByteSize * copyBlockStrides .x * copyBlockStrides .y
543+ uint32_t eachLayerNeededMemory = copyByteStrides [3 ]; // = blockByteSize * copyBlockStrides .x * copyBlockStrides .y * copyBlockStrides .z
549544
550545 // There is remaining layers in region that needs copying
551546 uint32_t uploadableArrayLayers = availableMemory / eachLayerNeededMemory;
@@ -606,8 +601,8 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
606601 uint32_t layersToUploadMemorySize = eachLayerNeededMemory * uploadableArrayLayers;
607602
608603 regionToCopyNext.bufferOffset = stagingBufferOffset;
609- regionToCopyNext.bufferRowLength = imageExtentInBlocks. x * texelBlockDim .x ;
610- regionToCopyNext.bufferImageHeight = imageExtentInBlocks. y * texelBlockDim .y ;
604+ regionToCopyNext.bufferRowLength = copyTexelStrides .x ;
605+ regionToCopyNext.bufferImageHeight = copyTexelStrides .y ;
611606 regionToCopyNext.imageSubresource .aspectMask = mainRegion.imageSubresource .aspectMask ;
612607 regionToCopyNext.imageSubresource .mipLevel = mainRegion.imageSubresource .mipLevel ;
613608 regionToCopyNext.imageSubresource .baseArrayLayer = mainRegion.imageSubresource .baseArrayLayer + currentLayerInRegion;
@@ -645,8 +640,8 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
645640 uint32_t slicesToUploadMemorySize = eachSliceNeededMemory * uploadableSlices;
646641
647642 regionToCopyNext.bufferOffset = stagingBufferOffset;
648- regionToCopyNext.bufferRowLength = imageExtentInBlocks. x * texelBlockDim .x ;
649- regionToCopyNext.bufferImageHeight = imageExtentInBlocks. y * texelBlockDim .y ;
643+ regionToCopyNext.bufferRowLength = copyTexelStrides .x ;
644+ regionToCopyNext.bufferImageHeight = copyTexelStrides .y ;
650645 regionToCopyNext.imageSubresource .aspectMask = mainRegion.imageSubresource .aspectMask ;
651646 regionToCopyNext.imageSubresource .mipLevel = mainRegion.imageSubresource .mipLevel ;
652647 regionToCopyNext.imageSubresource .baseArrayLayer = mainRegion.imageSubresource .baseArrayLayer + currentLayerInRegion;
@@ -684,8 +679,8 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
684679 uint32_t rowsToUploadMemorySize = eachRowNeededMemory * uploadableRows;
685680
686681 regionToCopyNext.bufferOffset = stagingBufferOffset;
687- regionToCopyNext.bufferRowLength = imageExtentInBlocks. x * texelBlockDim .x ;
688- regionToCopyNext.bufferImageHeight = imageExtentInBlocks. y * texelBlockDim .y ;
682+ regionToCopyNext.bufferRowLength = copyTexelStrides .x ;
683+ regionToCopyNext.bufferImageHeight = copyTexelStrides .y ;
689684 regionToCopyNext.imageSubresource .aspectMask = mainRegion.imageSubresource .aspectMask ;
690685 regionToCopyNext.imageSubresource .mipLevel = mainRegion.imageSubresource .mipLevel ;
691686 regionToCopyNext.imageSubresource .baseArrayLayer = mainRegion.imageSubresource .baseArrayLayer + currentLayerInRegion;
@@ -724,8 +719,8 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
724719 uint32_t blocksToUploadMemorySize = eachBlockNeededMemory * uploadableBlocks;
725720
726721 regionToCopyNext.bufferOffset = stagingBufferOffset;
727- regionToCopyNext.bufferRowLength = imageExtentInBlocks. x * texelBlockDim .x ;
728- regionToCopyNext.bufferImageHeight = imageExtentInBlocks. y * texelBlockDim .y ;
722+ regionToCopyNext.bufferRowLength = copyTexelStrides .x ;
723+ regionToCopyNext.bufferImageHeight = copyTexelStrides .y ;
729724 regionToCopyNext.imageSubresource .aspectMask = mainRegion.imageSubresource .aspectMask ;
730725 regionToCopyNext.imageSubresource .mipLevel = mainRegion.imageSubresource .mipLevel ;
731726 regionToCopyNext.imageSubresource .baseArrayLayer = mainRegion.imageSubresource .baseArrayLayer + currentLayerInRegion;
0 commit comments