@@ -7,6 +7,128 @@ pub const NOTICE: &str = "\
77pub const F16_FORMATTING_DEF : & str = r#"
88use std::arch::x86_64::*;
99
10+ #[inline]
11+ unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i {
12+ _mm_castph_si128(_mm_loadu_ph(mem_addr))
13+ }
14+
15+ #[inline]
16+ unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i {
17+ _mm256_castph_si256(_mm256_loadu_ph(mem_addr))
18+ }
19+
20+ #[inline]
21+ unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i {
22+ _mm512_castph_si512(_mm512_loadu_ph(mem_addr))
23+ }
24+
25+
26+ #[inline]
27+ unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h {
28+ _mm_castps_ph(_mm_loadu_ps(mem_addr))
29+ }
30+
31+ #[inline]
32+ unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h {
33+ _mm256_castps_ph(_mm256_loadu_ps(mem_addr))
34+ }
35+
36+ #[inline]
37+ unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h {
38+ _mm512_castps_ph(_mm512_loadu_ps(mem_addr))
39+ }
40+
41+ #[inline]
42+ unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d {
43+ _mm_castsi128_pd(_mm_loadu_epi16(mem_addr))
44+ }
45+
46+ #[inline]
47+ unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d {
48+ _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr))
49+ }
50+
51+ #[inline]
52+ unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d {
53+ _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr))
54+ }
55+
56+ #[inline]
57+ unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d {
58+ _mm_castsi128_pd(_mm_loadu_epi32(mem_addr))
59+ }
60+
61+ #[inline]
62+ unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d {
63+ _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr))
64+ }
65+
66+ #[inline]
67+ unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d {
68+ _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr))
69+ }
70+
71+ #[inline]
72+ unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d {
73+ _mm_castsi128_pd(_mm_loadu_epi64(mem_addr))
74+ }
75+
76+ #[inline]
77+ unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d {
78+ _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr))
79+ }
80+
81+ #[inline]
82+ unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d {
83+ _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr))
84+ }
85+
86+ // ===
87+ #[inline]
88+ unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 {
89+ _mm_castsi128_ps(_mm_loadu_epi16(mem_addr))
90+ }
91+
92+ #[inline]
93+ unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 {
94+ _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr))
95+ }
96+
97+ #[inline]
98+ unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 {
99+ _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr))
100+ }
101+
102+ #[inline]
103+ unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 {
104+ _mm_castsi128_ps(_mm_loadu_epi32(mem_addr))
105+ }
106+
107+ #[inline]
108+ unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 {
109+ _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr))
110+ }
111+
112+ #[inline]
113+ unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 {
114+ _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr))
115+ }
116+
117+ #[inline]
118+ unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 {
119+ _mm_castsi128_ps(_mm_loadu_epi64(mem_addr))
120+ }
121+
122+ #[inline]
123+ unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 {
124+ _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr))
125+ }
126+
127+ #[inline]
128+ unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 {
129+ _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr))
130+ }
131+
10132#[inline]
11133fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
12134 formatter: &mut core::fmt::Formatter<'_>,
@@ -50,19 +172,40 @@ impl DebugHexF16 for __m128h {
50172 }
51173}
52174
175+ impl DebugHexF16 for __m128i {
176+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
177+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 8]>(*self) };
178+ debug_simd_finish(f, "__m128i", &array)
179+ }
180+ }
181+
53182impl DebugHexF16 for __m256h {
54183 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
55184 let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
56185 debug_simd_finish(f, "__m256h", &array)
57186 }
58187}
59188
189+ impl DebugHexF16 for __m256i {
190+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
191+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
192+ debug_simd_finish(f, "__m256i", &array)
193+ }
194+ }
195+
60196impl DebugHexF16 for __m512h {
61197 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
62198 let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
63199 debug_simd_finish(f, "__m512h", &array)
64200 }
65201}
202+
203+ impl DebugHexF16 for __m512i {
204+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
205+ let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
206+ debug_simd_finish(f, "__m512i", &array)
207+ }
208+ }
66209 "# ;
67210
68211pub const LANE_FUNCTION_HELPERS : & str = r#"
0 commit comments