Skip to content

Commit 11fa0a3

Browse files
committed
Start cleaning up aarch64
1 parent ed3101a commit 11fa0a3

File tree

4 files changed

+329
-24
lines changed

4 files changed

+329
-24
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,90 @@ pub unsafe fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
265265
simd_ge(a, b)
266266
}
267267

268+
/// Halving add
269+
#[inline]
270+
#[target_feature(enable = "neon")]
271+
#[cfg_attr(test, assert_instr(uhadd))]
272+
pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
273+
274+
#[allow(improper_ctypes)]
275+
extern "C" {
276+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i8")]
277+
fn vhadd_u8_(a: uint8x8_t, a: uint8x8_t) -> uint8x8_t;
278+
}
279+
vhadd_u8_(a, b)
280+
}
281+
282+
/// Halving add
283+
#[inline]
284+
#[target_feature(enable = "neon")]
285+
#[cfg_attr(test, assert_instr(uhadd))]
286+
pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
287+
288+
#[allow(improper_ctypes)]
289+
extern "C" {
290+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v16i8")]
291+
fn vhaddq_u8_(a: uint8x16_t, a: uint8x16_t) -> uint8x16_t;
292+
}
293+
vhaddq_u8_(a, b)
294+
}
295+
296+
/// Halving add
297+
#[inline]
298+
#[target_feature(enable = "neon")]
299+
#[cfg_attr(test, assert_instr(uhadd))]
300+
pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
301+
302+
#[allow(improper_ctypes)]
303+
extern "C" {
304+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i16")]
305+
fn vhadd_u16_(a: uint16x4_t, a: uint16x4_t) -> uint16x4_t;
306+
}
307+
vhadd_u16_(a, b)
308+
}
309+
310+
/// Halving add
311+
#[inline]
312+
#[target_feature(enable = "neon")]
313+
#[cfg_attr(test, assert_instr(uhadd))]
314+
pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
315+
316+
#[allow(improper_ctypes)]
317+
extern "C" {
318+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i16")]
319+
fn vhaddq_u16_(a: uint16x8_t, a: uint16x8_t) -> uint16x8_t;
320+
}
321+
vhaddq_u16_(a, b)
322+
}
323+
324+
/// Halving add
325+
#[inline]
326+
#[target_feature(enable = "neon")]
327+
#[cfg_attr(test, assert_instr(uhadd))]
328+
pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
329+
330+
#[allow(improper_ctypes)]
331+
extern "C" {
332+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v2i32")]
333+
fn vhadd_u32_(a: uint32x2_t, a: uint32x2_t) -> uint32x2_t;
334+
}
335+
vhadd_u32_(a, b)
336+
}
337+
338+
/// Halving add
339+
#[inline]
340+
#[target_feature(enable = "neon")]
341+
#[cfg_attr(test, assert_instr(uhadd))]
342+
pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
343+
344+
#[allow(improper_ctypes)]
345+
extern "C" {
346+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i32")]
347+
fn vhaddq_u32_(a: uint32x4_t, a: uint32x4_t) -> uint32x4_t;
348+
}
349+
vhaddq_u32_(a, b)
350+
}
351+
268352
/// Multiply
269353
#[inline]
270354
#[target_feature(enable = "neon")]
@@ -628,6 +712,60 @@ mod test {
628712
assert_eq!(r, e);
629713
}
630714

715+
#[simd_test(enable = "neon")]
716+
unsafe fn test_vhadd_u8() {
717+
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
718+
let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
719+
let e: u8x8 = u8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
720+
let r: u8x8 = transmute(vhadd_u8(transmute(a), transmute(b)));
721+
assert_eq!(r, e);
722+
}
723+
724+
#[simd_test(enable = "neon")]
725+
unsafe fn test_vhaddq_u8() {
726+
let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
727+
let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
728+
let e: u8x16 = u8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
729+
let r: u8x16 = transmute(vhaddq_u8(transmute(a), transmute(b)));
730+
assert_eq!(r, e);
731+
}
732+
733+
#[simd_test(enable = "neon")]
734+
unsafe fn test_vhadd_u16() {
735+
let a: u16x4 = u16x4::new(42, 42, 42, 42);
736+
let b: u16x4 = u16x4::new(1, 2, 3, 4);
737+
let e: u16x4 = u16x4::new(21, 22, 22, 23);
738+
let r: u16x4 = transmute(vhadd_u16(transmute(a), transmute(b)));
739+
assert_eq!(r, e);
740+
}
741+
742+
#[simd_test(enable = "neon")]
743+
unsafe fn test_vhaddq_u16() {
744+
let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
745+
let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
746+
let e: u16x8 = u16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
747+
let r: u16x8 = transmute(vhaddq_u16(transmute(a), transmute(b)));
748+
assert_eq!(r, e);
749+
}
750+
751+
#[simd_test(enable = "neon")]
752+
unsafe fn test_vhadd_u32() {
753+
let a: u32x2 = u32x2::new(42, 42);
754+
let b: u32x2 = u32x2::new(1, 2);
755+
let e: u32x2 = u32x2::new(21, 22);
756+
let r: u32x2 = transmute(vhadd_u32(transmute(a), transmute(b)));
757+
assert_eq!(r, e);
758+
}
759+
760+
#[simd_test(enable = "neon")]
761+
unsafe fn test_vhaddq_u32() {
762+
let a: u32x4 = u32x4::new(42, 42, 42, 42);
763+
let b: u32x4 = u32x4::new(1, 2, 3, 4);
764+
let e: u32x4 = u32x4::new(21, 22, 22, 23);
765+
let r: u32x4 = transmute(vhaddq_u32(transmute(a), transmute(b)));
766+
assert_eq!(r, e);
767+
}
768+
631769
#[simd_test(enable = "neon")]
632770
unsafe fn test_vmul_f64() {
633771
let a: f64 = 1.0;

0 commit comments

Comments
 (0)