Skip to content

Commit 54ded60

Browse files
fix(dx12): align tex. <-> buf. copies via intermediate buffer if !UnrestrictedBufferTextureCopyPitchSupported
1 parent 53f3da3 commit 54ded60

File tree

7 files changed

+164
-26
lines changed

7 files changed

+164
-26
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ SamplerDescriptor {
8686
- WebGPU device requests now support the required limits `maxColorAttachments` and `maxColorAttachmentBytesPerSample`. By @evilpie in [#8328](https://github.com/gfx-rs/wgpu/pull/8328)
8787
- Reject binding indices that exceed `wgpu_types::Limits::max_bindings_per_bind_group` when deriving a bind group layout for a pipeline. By @jimblandy in [#8325](https://github.com/gfx-rs/wgpu/pull/8325).
8888

89+
#### DX12
90+
91+
- Align copies b/w textures and buffers via a single intermediate buffer per copy when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721).
92+
8993
## v27.0.2 (2025-10-03)
9094

9195
### Bug Fixes

cts_runner/test.lst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ webgpu:api,operation,command_buffer,copyTextureToTexture:copy_depth_stencil:form
1515
// Fails with OOM in CI.
1616
fails-if(dx12) webgpu:api,operation,command_buffer,image_copy:offsets_and_sizes:*
1717
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="1d"
18-
fails-if(dx12) webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="2d"
18+
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="2d"
1919
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="FullCopyT2B";dimension="3d"
2020
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="1d"
2121
fails-if(dx12) webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="2d"
2222
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="WriteTexture";checkMethod="PartialCopyT2B";dimension="3d"
2323
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="1d"
24-
fails-if(dx12) webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="2d"
24+
webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="2d"
2525
fails-if(dx12,vulkan,metal) webgpu:api,operation,command_buffer,image_copy:undefined_params:initMethod="CopyB2T";checkMethod="FullCopyT2B";dimension="3d"
2626
webgpu:api,operation,compute,basic:memcpy:*
2727
//FAIL: webgpu:api,operation,compute,basic:large_dispatch:*

tests/tests/wgpu-gpu/regression/issue_6827.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
2525
.expect_fail(FailureCase::backend_adapter(
2626
wgpu::Backends::METAL,
2727
"Apple Paravirtual device", // CI on M1
28-
))
29-
.expect_fail(
30-
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
31-
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
32-
FailureCase::backend(wgpu::Backends::DX12)
33-
.flaky()
34-
.validation_error(
35-
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
36-
)
37-
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
38-
),
28+
)),
3929
)
4030
.run_async(|ctx| async move { run_test(ctx, true).await });
4131

wgpu-hal/src/dx12/adapter.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,7 @@ impl super::Adapter {
319319
suballocation_supported: !info.name.contains("Iris(R) Xe"),
320320
shader_model,
321321
max_sampler_descriptor_heap_size,
322-
_unrestricted_buffer_texture_copy_pitch_supported:
323-
unrestricted_buffer_texture_copy_pitch_supported,
322+
unrestricted_buffer_texture_copy_pitch_supported,
324323
};
325324

326325
// Theoretically vram limited, but in practice 2^20 is the limit

wgpu-hal/src/dx12/command.rs

Lines changed: 137 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::{
1414
dxgi::{name::ObjectExt, result::HResult as _},
1515
},
1616
dx12::borrow_interface_temporarily,
17-
AccelerationStructureEntries,
17+
AccelerationStructureEntries, CommandEncoder as _,
1818
};
1919

2020
fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
@@ -312,6 +312,78 @@ impl super::CommandEncoder {
312312
}
313313
}
314314
}
315+
316+
unsafe fn buf_tex_intermediate<T>(
317+
&mut self,
318+
region: crate::BufferTextureCopy,
319+
tex_fmt: wgt::TextureFormat,
320+
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
321+
) -> (T, super::Buffer) {
322+
let size = {
323+
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
324+
tex_fmt,
325+
region.texture_base.aspect.map(),
326+
&region.size.into(),
327+
);
328+
copy_info.unwrap().bytes_in_copy
329+
};
330+
331+
let size = wgt::BufferSize::new(size).unwrap();
332+
333+
let buffer = {
334+
let (resource, allocation) =
335+
super::suballocation::DeviceAllocationContext::from(&*self)
336+
.create_buffer(&crate::BufferDescriptor {
337+
label: None,
338+
size: size.get(),
339+
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
340+
memory_flags: crate::MemoryFlags::empty(),
341+
})
342+
.expect(concat!(
343+
"internal error: ",
344+
"failed to allocate intermediate buffer ",
345+
"for offset alignment"
346+
));
347+
super::Buffer {
348+
resource,
349+
size: size.get(),
350+
allocation,
351+
}
352+
};
353+
354+
let mut region = region;
355+
region.buffer_layout.offset = 0;
356+
357+
unsafe {
358+
self.transition_buffers(
359+
[crate::BufferBarrier {
360+
buffer: &buffer,
361+
usage: crate::StateTransition {
362+
from: wgt::BufferUses::empty(),
363+
to: wgt::BufferUses::COPY_DST,
364+
},
365+
}]
366+
.into_iter(),
367+
)
368+
};
369+
370+
let t = copy_op(self, &buffer, size, region);
371+
372+
unsafe {
373+
self.transition_buffers(
374+
[crate::BufferBarrier {
375+
buffer: &buffer,
376+
usage: crate::StateTransition {
377+
from: wgt::BufferUses::COPY_DST,
378+
to: wgt::BufferUses::COPY_SRC,
379+
},
380+
}]
381+
.into_iter(),
382+
)
383+
};
384+
385+
(t, buffer)
386+
}
315387
}
316388

317389
impl crate::CommandEncoder for super::CommandEncoder {
@@ -366,6 +438,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
366438
Ok(super::CommandBuffer { raw })
367439
}
368440
unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
441+
self.intermediate_copy_bufs.clear();
369442
for cmd_buf in command_buffers {
370443
self.free_lists.push(cmd_buf.raw);
371444
}
@@ -612,31 +685,59 @@ impl crate::CommandEncoder for super::CommandEncoder {
612685
) where
613686
T: Iterator<Item = crate::BufferTextureCopy>,
614687
{
615-
for r in regions {
688+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
689+
690+
for naive_copy_region in regions {
691+
let is_offset_aligned = naive_copy_region.buffer_layout.offset % offset_alignment == 0;
692+
let (final_copy_region, src) = if is_offset_aligned {
693+
(naive_copy_region, src)
694+
} else {
695+
let (intermediate_to_dst_region, intermediate_buf) = unsafe {
696+
let src_offset = naive_copy_region.buffer_layout.offset;
697+
self.buf_tex_intermediate(
698+
naive_copy_region,
699+
dst.format,
700+
|this, buf, size, intermediate_to_dst_region| {
701+
let layout = crate::BufferCopy {
702+
src_offset,
703+
dst_offset: 0,
704+
size,
705+
};
706+
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
707+
intermediate_to_dst_region
708+
},
709+
)
710+
};
711+
self.intermediate_copy_bufs.push(intermediate_buf);
712+
let intermediate_buf = self.intermediate_copy_bufs.last().unwrap();
713+
(intermediate_to_dst_region, intermediate_buf)
714+
};
715+
616716
let list = self.list.as_ref().unwrap();
617717

618718
let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
619719
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
620720
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
621721
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
622-
PlacedFootprint: r.to_subresource_footprint(dst.format),
722+
PlacedFootprint: final_copy_region.to_subresource_footprint(dst.format),
623723
},
624724
};
625725
let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
626726
pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
627727
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
628728
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
629-
SubresourceIndex: dst.calc_subresource_for_copy(&r.texture_base),
729+
SubresourceIndex: dst
730+
.calc_subresource_for_copy(&final_copy_region.texture_base),
630731
},
631732
};
632733

633-
let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
734+
let src_box = make_box(&wgt::Origin3d::ZERO, &final_copy_region.size);
634735
unsafe {
635736
list.CopyTextureRegion(
636737
&dst_location,
637-
r.texture_base.origin.x,
638-
r.texture_base.origin.y,
639-
r.texture_base.origin.z,
738+
final_copy_region.texture_base.origin.x,
739+
final_copy_region.texture_base.origin.y,
740+
final_copy_region.texture_base.origin.z,
640741
&src_location,
641742
Some(&src_box),
642743
)
@@ -680,8 +781,35 @@ impl crate::CommandEncoder for super::CommandEncoder {
680781
};
681782
};
682783

784+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
785+
683786
for r in regions {
684-
copy_aligned(this, src, dst, r);
787+
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
788+
if is_offset_aligned {
789+
copy_aligned(self, src, dst, r)
790+
} else {
791+
let orig_offset = r.buffer_layout.offset;
792+
let (intermediate_to_dst_region, src) = unsafe {
793+
self.buf_tex_intermediate(
794+
r,
795+
src.format,
796+
|this, buf, size, intermediate_region| {
797+
copy_aligned(this, src, buf, intermediate_region);
798+
crate::BufferCopy {
799+
src_offset: 0,
800+
dst_offset: orig_offset,
801+
size,
802+
}
803+
},
804+
)
805+
};
806+
807+
unsafe {
808+
self.copy_buffer_to_buffer(&src, dst, [intermediate_to_dst_region].into_iter());
809+
}
810+
811+
self.intermediate_copy_bufs.push(src);
812+
};
685813
}
686814
}
687815

wgpu-hal/src/dx12/device.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,7 @@ impl crate::Device for super::Device {
796796
mem_allocator: self.mem_allocator.clone(),
797797
rtv_pool: Arc::clone(&self.rtv_pool),
798798
temp_rtv_handles: Vec::new(),
799+
intermediate_copy_bufs: Vec::new(),
799800
null_rtv_handle: self.null_rtv_handle,
800801
list: None,
801802
free_lists: Vec::new(),

wgpu-hal/src/dx12/mod.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,11 @@ use windows::{
9696
core::{Free, Interface},
9797
Win32::{
9898
Foundation,
99-
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
99+
Graphics::{
100+
Direct3D,
101+
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
102+
DirectComposition, Dxgi,
103+
},
100104
System::Threading,
101105
},
102106
};
@@ -601,7 +605,17 @@ struct PrivateCapabilities {
601605
suballocation_supported: bool,
602606
shader_model: naga::back::hlsl::ShaderModel,
603607
max_sampler_descriptor_heap_size: u32,
604-
_unrestricted_buffer_texture_copy_pitch_supported: bool,
608+
unrestricted_buffer_texture_copy_pitch_supported: bool,
609+
}
610+
611+
impl PrivateCapabilities {
612+
fn texture_data_placement_alignment(&self) -> u64 {
613+
if self.unrestricted_buffer_texture_copy_pitch_supported {
614+
4
615+
} else {
616+
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
617+
}
618+
}
605619
}
606620

607621
#[derive(Default)]
@@ -841,6 +855,8 @@ pub struct CommandEncoder {
841855
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
842856
temp_rtv_handles: Vec<descriptor::Handle>,
843857

858+
intermediate_copy_bufs: Vec<Buffer>,
859+
844860
null_rtv_handle: descriptor::Handle,
845861
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
846862
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,

0 commit comments

Comments
 (0)