Fix first hive of intrinsic changes

Licenser · Licenser · commit 83d2b012ebe1 · 2020-04-05T18:13:51.000+02:00
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -1,3 +1,10 @@
+// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
+//
+// ```
+// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
+// ```
 use super::*;
 #[cfg(test)]
 use stdarch_test::assert_instr;
diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs
diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm/neon/mod.rs
@@ -189,7 +189,7 @@ extern "C" {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovnu))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn.u64))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
 pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     vqmovn_u64_(a)
@@ -1161,7 +1161,7 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_args_required_const(1)]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(mov, imm5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov.32, imm5 = 1))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, imm5 = 1))]
 // Based on the discussioj in https://github.com/rust-lang/stdarch/pull/792
 // `mov` seems to be an acceptable intrinsic to compile to
@@ -1179,10 +1179,9 @@ pub unsafe fn vgetq_lane_u64(v: uint64x2_t, imm5: i32) -> u64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_args_required_const(1)]
-#[cfg_attr(test, assert_instr(fmov, imm5 = 0))]
-// gcc also turns this into a fmov instead of a umove
-// https://clang.godbolt.org/z/J5xS2T
-// #[cfg_attr(test, assert_instr(umov, imm5 = 0))]
+#[cfg_attr(test, assert_instr(vmov.32, imm5 = 0))]
+// FIXME: no 32bit this seems to be turned into two vmov.32 instructions
+// validate correctness
 pub unsafe fn vget_lane_u64(v: uint64x1_t, imm5: i32) -> u64 {
     if imm5 != 0 {
         unreachable_unchecked()
@@ -1195,7 +1194,7 @@ pub unsafe fn vget_lane_u64(v: uint64x1_t, imm5: i32) -> u64 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_args_required_const(1)]
-#[cfg_attr(test, assert_instr(umov, imm5 = 0))]
+#[cfg_attr(test, assert_instr(vmov.u16, imm5 = 0))]
 pub unsafe fn vgetq_lane_u16(v: uint16x8_t, imm5: i32) -> u16 {
     if (imm5) < 0 || (imm5) > 7 {
         unreachable_unchecked()
@@ -1209,9 +1208,7 @@ pub unsafe fn vgetq_lane_u16(v: uint16x8_t, imm5: i32) -> u16 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_args_required_const(1)]
-// see: https://clang.godbolt.org/z/J5xS2T
-//#[cfg_attr(test, assert_instr(umov, imm5 = 0))]
-#[cfg_attr(test, assert_instr(fmov, imm5 = 0))]
+#[cfg_attr(test, assert_instr(vmov.32, imm5 = 0))]
 pub unsafe fn vgetq_lane_u32(v: uint32x4_t, imm5: i32) -> u32 {
     if (imm5) < 0 || (imm5) > 3 {
         unreachable_unchecked()
@@ -1225,7 +1222,7 @@ pub unsafe fn vgetq_lane_u32(v: uint32x4_t, imm5: i32) -> u32 {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[rustc_args_required_const(1)]
-#[cfg_attr(test, assert_instr(umov, imm5 = 0))]
+#[cfg_attr(test, assert_instr(vmov.u8, imm5 = 0))]
 pub unsafe fn vget_lane_u8(v: uint8x8_t, imm5: i32) -> u8 {
     if (imm5) < 0 || (imm5) > 7 {
         unreachable_unchecked()
@@ -1238,7 +1235,7 @@ pub unsafe fn vget_lane_u8(v: uint8x8_t, imm5: i32) -> u8 {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(dup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vdup.8))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
 pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
     int8x16_t(
@@ -1251,7 +1248,7 @@ pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(dup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vdup.8))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
 pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
     uint8x16_t(
@@ -1264,7 +1261,7 @@ pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(dup))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vdup.8))]
 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
 pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t {
     vdupq_n_u8(value)
@@ -1328,7 +1325,7 @@ pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(ushr, imm3 = 1))]
+#[cfg_attr(test, assert_instr(vshr.u8, imm3 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn vshrq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
     if imm3 < 0 || imm3 > 7 {
@@ -1359,7 +1356,7 @@ pub unsafe fn vshrq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(shl, imm3 = 1))]
+#[cfg_attr(test, assert_instr(vshl.s8, imm3 = 1))]
 #[rustc_args_required_const(1)]
 pub unsafe fn vshlq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
     if imm3 < 0 || imm3 > 7 {
@@ -1390,7 +1387,7 @@ pub unsafe fn vshlq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(ext, n = 3))]
+#[cfg_attr(test, assert_instr(vext.8, n = 3))]
 #[rustc_args_required_const(2)]
 pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t {
     if n < 0 || n > 15 {
@@ -1495,7 +1492,7 @@ pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(ext, n = 3))]
+#[cfg_attr(test, assert_instr(vext.8, n = 3))]
 #[rustc_args_required_const(2)]
 pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t, n: i32) -> uint8x16_t {
     if n < 0 || n > 15 {
diff --git a/crates/stdarch-gen/README.md b/crates/stdarch-gen/README.md
@@ -1,8 +1,8 @@
 # Neon intrinsic code generator
 
-A small toll that allows to quickly generate intrinsics for the NEON architecture.
+A small tool that allows to quickly generate intrinsics for the NEON architecture.
 
-The specificaiton for the intrinsics can be found in `neon.spce`.
+The specification for the intrinsics can be found in `neon.spec`.
 
 To run and re-generate the code run the following from the root of the `stdarch` crate.
 
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
@@ -115,7 +115,7 @@ validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE,
 aarch64 = cmeq
 generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
 
-arm = cmeq
+arm = vceq.
 generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
 
 /// Floating-point compare equal
@@ -128,7 +128,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
 aarch64 = fcmeq
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
-arm = fcmeq
+arm = vceq.
 // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
@@ -145,7 +145,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
 aarch64 = cmgt
 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
 
-arm = cmgt
+arm = vcgt.
 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
 
 /// Compare unsigned highe
@@ -158,7 +158,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
 aarch64 = cmhi
 generate uint64x*_t
 
-arm = cmhi
+arm = vcgt.
 generate uint*_t
 
 /// Floating-point compare greater than
@@ -171,7 +171,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
 aarch64 = fcmgt
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
-arm = fcmgt
+arm = vcgt.
 // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
@@ -188,7 +188,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
 aarch64 = cmgt
 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
 
-arm = cmgt
+arm = vcgt.
 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
 
 /// Compare unsigned less than
@@ -201,7 +201,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
 aarch64 = cmhi
 generate uint64x*_t
 
-arm = cmhi
+arm = vcgt.
 generate uint*_t
 
 /// Floating-point compare less than
@@ -214,7 +214,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
 aarch64 = fcmgt
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
-arm = fcmgt
+arm = vcgt.
 // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
@@ -232,7 +232,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
 aarch64 = cmge
 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
 
-arm = cmge
+arm = vcge.
 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
 
 /// Compare unsigned less than or equal
@@ -245,7 +245,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
 aarch64 = cmhs
 generate uint64x*_t
 
-arm = cmhs
+arm = vcge.
 generate uint*_t
 
 /// Floating-point compare less than or equal
@@ -258,7 +258,7 @@ aarch64 = fcmge
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
 // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
-arm = fcmge
+arm = vcge.
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
 ////////////////////
@@ -275,7 +275,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
 aarch64 = cmge
 generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
 
-arm = cmge
+arm = vcge.
 generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
 
 /// Compare unsigned greater than or equal
@@ -288,7 +288,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
 aarch64 = cmhs
 generate uint64x*_t
 
-arm = cmhs
+arm = vcge.
 generate uint*_t
 
 /// Floating-point compare greater than or equal
@@ -301,7 +301,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
 aarch64 = fcmge
 generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
 
-arm = fcmge
+arm = vcge.
 // we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
@@ -316,7 +316,7 @@ link-arm = vqsubu._EXT_
 link-aarch64 = uqsub._EXT_
 generate uint*_t
 
-arm = sqsub
+arm = vqsub.
 link-arm = vqsubs._EXT_
 link-aarch64 = sqsub._EXT_
 generate int*_t
@@ -332,7 +332,7 @@ link-arm = vhaddu._EXT_
 link-aarch64 = uhadd._EXT_
 generate uint*_t
 
-arm = shadd
+arm = vhadd.
 link-arm = vhadds._EXT_
 link-aarch64 = shadd._EXT_
 generate int*_t
@@ -348,7 +348,7 @@ link-arm = vrhaddu._EXT_
 link-aarch64 = urhadd._EXT_
 generate uint*_t
 
-arm = srhadd
+arm = vrhadd.
 link-arm = vrhadds._EXT_
 link-aarch64 = srhadd._EXT_
 generate int*_t
@@ -364,7 +364,7 @@ link-arm = vqaddu._EXT_
 link-aarch64 = uqadd._EXT_
 generate uint*_t
 
-arm = sqadd
+arm = vqadd.
 link-arm = vqadds._EXT_
 link-aarch64 = sqadd._EXT_
 generate int*_t
@@ -393,7 +393,7 @@ name = vmul
 a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
-arm = mul
+arm = vmul.
 fn = simd_mul
 generate int*_t, uint*_t
 
@@ -407,7 +407,7 @@ validate 2.0, 6.0, 4.0, 10.0
 aarch64 = fmul
 generate float64x*_t
 
-arm = fmul
+arm = vmul.
 generate float*_t
 
 
@@ -430,7 +430,7 @@ validate 0.0, 2.0, 0.0, 4.0
 aarch64 = fsub
 generate float64x*_t
 
-arm = fsub
+arm = vsub.
 generate float*_t
 
 
@@ -445,7 +445,7 @@ link-arm = vhsubu._EXT_
 link-aarch64 = uhsub._EXT_
 generate uint*_t
 
-arm = shsub
+arm = vhsub.
 link-arm = vhsubs._EXT_
 link-aarch64 = shsub._EXT_
 generate int*_t
diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs