Skip to content

Commit 480aafc

Browse files
committed
Use Inline ASM for SSE4a nontemporal stores
1 parent fc5ac13 commit 480aafc

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

crates/core_arch/src/x86/sse4a.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ unsafe extern "C" {
1515
fn insertq(x: i64x2, y: i64x2) -> i64x2;
1616
#[link_name = "llvm.x86.sse4a.insertqi"]
1717
fn insertqi(x: i64x2, y: i64x2, len: u8, idx: u8) -> i64x2;
18-
#[link_name = "llvm.x86.sse4a.movnt.sd"]
19-
fn movntsd(x: *mut f64, y: __m128d);
20-
#[link_name = "llvm.x86.sse4a.movnt.ss"]
21-
fn movntss(x: *mut f32, y: __m128);
2218
}
2319

2420
/// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
@@ -114,7 +110,12 @@ pub fn _mm_inserti_si64<const LEN: i32, const IDX: i32>(x: __m128i, y: __m128i)
114110
#[cfg_attr(test, assert_instr(movntsd))]
115111
#[stable(feature = "simd_x86", since = "1.27.0")]
116112
pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
117-
movntsd(p, a);
113+
crate::arch::asm!(
114+
vps!("movntsd", ",{a}"),
115+
p = in(reg) p,
116+
a = in(xmm_reg) a,
117+
options(nostack, preserves_flags),
118+
);
118119
}
119120

120121
/// Non-temporal store of `a.0` into `p`.
@@ -134,7 +135,12 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
134135
#[cfg_attr(test, assert_instr(movntss))]
135136
#[stable(feature = "simd_x86", since = "1.27.0")]
136137
pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
137-
movntss(p, a);
138+
crate::arch::asm!(
139+
vps!("movntss", ",{a}"),
140+
p = in(reg) p,
141+
a = in(xmm_reg) a,
142+
options(nostack, preserves_flags),
143+
);
138144
}
139145

140146
#[cfg(test)]

0 commit comments

Comments
 (0)