Skip to content

Commit 83d2b01

Browse files
committed
Fix first hive of intrinsic changes
1 parent 3823ad0 commit 83d2b01

File tree

6 files changed

+265
-203
lines changed

6 files changed

+265
-203
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
// This code is automatically generated. DO NOT MODIFY.
2+
//
3+
// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
4+
//
5+
// ```
6+
// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
7+
// ```
18
use super::*;
29
#[cfg(test)]
310
use stdarch_test::assert_instr;

crates/core_arch/src/arm/neon/generated.rs

Lines changed: 165 additions & 158 deletions
Large diffs are not rendered by default.

crates/core_arch/src/arm/neon/mod.rs

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ extern "C" {
189189
#[inline]
190190
#[target_feature(enable = "neon")]
191191
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
192-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovnu))]
192+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn.u64))]
193193
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))]
194194
pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
195195
vqmovn_u64_(a)
@@ -1161,7 +1161,7 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t
11611161
#[target_feature(enable = "neon")]
11621162
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
11631163
#[rustc_args_required_const(1)]
1164-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(mov, imm5 = 1))]
1164+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov.32, imm5 = 1))]
11651165
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, imm5 = 1))]
11661166
// Based on the discussioj in https://github.com/rust-lang/stdarch/pull/792
11671167
// `mov` seems to be an acceptable intrinsic to compile to
@@ -1179,10 +1179,9 @@ pub unsafe fn vgetq_lane_u64(v: uint64x2_t, imm5: i32) -> u64 {
11791179
#[target_feature(enable = "neon")]
11801180
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
11811181
#[rustc_args_required_const(1)]
1182-
#[cfg_attr(test, assert_instr(fmov, imm5 = 0))]
1183-
// gcc also turns this into a fmov instead of a umove
1184-
// https://clang.godbolt.org/z/J5xS2T
1185-
// #[cfg_attr(test, assert_instr(umov, imm5 = 0))]
1182+
#[cfg_attr(test, assert_instr(vmov.32, imm5 = 0))]
1183+
// FIXME: no 32bit this seems to be turned into two vmov.32 instructions
1184+
// validate correctness
11861185
pub unsafe fn vget_lane_u64(v: uint64x1_t, imm5: i32) -> u64 {
11871186
if imm5 != 0 {
11881187
unreachable_unchecked()
@@ -1195,7 +1194,7 @@ pub unsafe fn vget_lane_u64(v: uint64x1_t, imm5: i32) -> u64 {
11951194
#[target_feature(enable = "neon")]
11961195
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
11971196
#[rustc_args_required_const(1)]
1198-
#[cfg_attr(test, assert_instr(umov, imm5 = 0))]
1197+
#[cfg_attr(test, assert_instr(vmov.u16, imm5 = 0))]
11991198
pub unsafe fn vgetq_lane_u16(v: uint16x8_t, imm5: i32) -> u16 {
12001199
if (imm5) < 0 || (imm5) > 7 {
12011200
unreachable_unchecked()
@@ -1209,9 +1208,7 @@ pub unsafe fn vgetq_lane_u16(v: uint16x8_t, imm5: i32) -> u16 {
12091208
#[target_feature(enable = "neon")]
12101209
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
12111210
#[rustc_args_required_const(1)]
1212-
// see: https://clang.godbolt.org/z/J5xS2T
1213-
//#[cfg_attr(test, assert_instr(umov, imm5 = 0))]
1214-
#[cfg_attr(test, assert_instr(fmov, imm5 = 0))]
1211+
#[cfg_attr(test, assert_instr(vmov.32, imm5 = 0))]
12151212
pub unsafe fn vgetq_lane_u32(v: uint32x4_t, imm5: i32) -> u32 {
12161213
if (imm5) < 0 || (imm5) > 3 {
12171214
unreachable_unchecked()
@@ -1225,7 +1222,7 @@ pub unsafe fn vgetq_lane_u32(v: uint32x4_t, imm5: i32) -> u32 {
12251222
#[target_feature(enable = "neon")]
12261223
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
12271224
#[rustc_args_required_const(1)]
1228-
#[cfg_attr(test, assert_instr(umov, imm5 = 0))]
1225+
#[cfg_attr(test, assert_instr(vmov.u8, imm5 = 0))]
12291226
pub unsafe fn vget_lane_u8(v: uint8x8_t, imm5: i32) -> u8 {
12301227
if (imm5) < 0 || (imm5) > 7 {
12311228
unreachable_unchecked()
@@ -1238,7 +1235,7 @@ pub unsafe fn vget_lane_u8(v: uint8x8_t, imm5: i32) -> u8 {
12381235
#[inline]
12391236
#[target_feature(enable = "neon")]
12401237
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1241-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(dup))]
1238+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vdup.8))]
12421239
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
12431240
pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
12441241
int8x16_t(
@@ -1251,7 +1248,7 @@ pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
12511248
#[inline]
12521249
#[target_feature(enable = "neon")]
12531250
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1254-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(dup))]
1251+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vdup.8))]
12551252
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
12561253
pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
12571254
uint8x16_t(
@@ -1264,7 +1261,7 @@ pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
12641261
#[inline]
12651262
#[target_feature(enable = "neon")]
12661263
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1267-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(dup))]
1264+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vdup.8))]
12681265
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
12691266
pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t {
12701267
vdupq_n_u8(value)
@@ -1328,7 +1325,7 @@ pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
13281325
#[inline]
13291326
#[target_feature(enable = "neon")]
13301327
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1331-
#[cfg_attr(test, assert_instr(ushr, imm3 = 1))]
1328+
#[cfg_attr(test, assert_instr(vshr.u8, imm3 = 1))]
13321329
#[rustc_args_required_const(1)]
13331330
pub unsafe fn vshrq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
13341331
if imm3 < 0 || imm3 > 7 {
@@ -1359,7 +1356,7 @@ pub unsafe fn vshrq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
13591356
#[inline]
13601357
#[target_feature(enable = "neon")]
13611358
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1362-
#[cfg_attr(test, assert_instr(shl, imm3 = 1))]
1359+
#[cfg_attr(test, assert_instr(vshl.s8, imm3 = 1))]
13631360
#[rustc_args_required_const(1)]
13641361
pub unsafe fn vshlq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
13651362
if imm3 < 0 || imm3 > 7 {
@@ -1390,7 +1387,7 @@ pub unsafe fn vshlq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t {
13901387
#[inline]
13911388
#[target_feature(enable = "neon")]
13921389
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1393-
#[cfg_attr(test, assert_instr(ext, n = 3))]
1390+
#[cfg_attr(test, assert_instr(vext.8, n = 3))]
13941391
#[rustc_args_required_const(2)]
13951392
pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t {
13961393
if n < 0 || n > 15 {
@@ -1495,7 +1492,7 @@ pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t {
14951492
#[inline]
14961493
#[target_feature(enable = "neon")]
14971494
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1498-
#[cfg_attr(test, assert_instr(ext, n = 3))]
1495+
#[cfg_attr(test, assert_instr(vext.8, n = 3))]
14991496
#[rustc_args_required_const(2)]
15001497
pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t, n: i32) -> uint8x16_t {
15011498
if n < 0 || n > 15 {

crates/stdarch-gen/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Neon intrinsic code generator
22

3-
A small toll that allows to quickly generate intrinsics for the NEON architecture.
3+
A small tool that allows to quickly generate intrinsics for the NEON architecture.
44

5-
The specificaiton for the intrinsics can be found in `neon.spce`.
5+
The specification for the intrinsics can be found in `neon.spec`.
66

77
To run and re-generate the code run the following from the root of the `stdarch` crate.
88

crates/stdarch-gen/neon.spec

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE,
115115
aarch64 = cmeq
116116
generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t
117117

118-
arm = cmeq
118+
arm = vceq.
119119
generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
120120

121121
/// Floating-point compare equal
@@ -128,7 +128,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
128128
aarch64 = fcmeq
129129
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
130130

131-
arm = fcmeq
131+
arm = vceq.
132132
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
133133
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
134134

@@ -145,7 +145,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
145145
aarch64 = cmgt
146146
generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
147147

148-
arm = cmgt
148+
arm = vcgt.
149149
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
150150

151151
/// Compare unsigned highe
@@ -158,7 +158,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
158158
aarch64 = cmhi
159159
generate uint64x*_t
160160

161-
arm = cmhi
161+
arm = vcgt.
162162
generate uint*_t
163163

164164
/// Floating-point compare greater than
@@ -171,7 +171,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
171171
aarch64 = fcmgt
172172
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
173173

174-
arm = fcmgt
174+
arm = vcgt.
175175
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
176176
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
177177

@@ -188,7 +188,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
188188
aarch64 = cmgt
189189
generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
190190

191-
arm = cmgt
191+
arm = vcgt.
192192
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
193193

194194
/// Compare unsigned less than
@@ -201,7 +201,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
201201
aarch64 = cmhi
202202
generate uint64x*_t
203203

204-
arm = cmhi
204+
arm = vcgt.
205205
generate uint*_t
206206

207207
/// Floating-point compare less than
@@ -214,7 +214,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
214214
aarch64 = fcmgt
215215
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
216216

217-
arm = fcmgt
217+
arm = vcgt.
218218
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
219219
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
220220

@@ -232,7 +232,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
232232
aarch64 = cmge
233233
generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
234234

235-
arm = cmge
235+
arm = vcge.
236236
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
237237

238238
/// Compare unsigned less than or equal
@@ -245,7 +245,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
245245
aarch64 = cmhs
246246
generate uint64x*_t
247247

248-
arm = cmhs
248+
arm = vcge.
249249
generate uint*_t
250250

251251
/// Floating-point compare less than or equal
@@ -258,7 +258,7 @@ aarch64 = fcmge
258258
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
259259

260260
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
261-
arm = fcmge
261+
arm = vcge.
262262
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
263263

264264
////////////////////
@@ -275,7 +275,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
275275
aarch64 = cmge
276276
generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
277277

278-
arm = cmge
278+
arm = vcge.
279279
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t
280280

281281
/// Compare unsigned greater than or equal
@@ -288,7 +288,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
288288
aarch64 = cmhs
289289
generate uint64x*_t
290290

291-
arm = cmhs
291+
arm = vcge.
292292
generate uint*_t
293293

294294
/// Floating-point compare greater than or equal
@@ -301,7 +301,7 @@ validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
301301
aarch64 = fcmge
302302
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
303303

304-
arm = fcmge
304+
arm = vcge.
305305
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
306306
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
307307

@@ -316,7 +316,7 @@ link-arm = vqsubu._EXT_
316316
link-aarch64 = uqsub._EXT_
317317
generate uint*_t
318318

319-
arm = sqsub
319+
arm = vqsub.
320320
link-arm = vqsubs._EXT_
321321
link-aarch64 = sqsub._EXT_
322322
generate int*_t
@@ -332,7 +332,7 @@ link-arm = vhaddu._EXT_
332332
link-aarch64 = uhadd._EXT_
333333
generate uint*_t
334334

335-
arm = shadd
335+
arm = vhadd.
336336
link-arm = vhadds._EXT_
337337
link-aarch64 = shadd._EXT_
338338
generate int*_t
@@ -348,7 +348,7 @@ link-arm = vrhaddu._EXT_
348348
link-aarch64 = urhadd._EXT_
349349
generate uint*_t
350350

351-
arm = srhadd
351+
arm = vrhadd.
352352
link-arm = vrhadds._EXT_
353353
link-aarch64 = srhadd._EXT_
354354
generate int*_t
@@ -364,7 +364,7 @@ link-arm = vqaddu._EXT_
364364
link-aarch64 = uqadd._EXT_
365365
generate uint*_t
366366

367-
arm = sqadd
367+
arm = vqadd.
368368
link-arm = vqadds._EXT_
369369
link-aarch64 = sqadd._EXT_
370370
generate int*_t
@@ -393,7 +393,7 @@ name = vmul
393393
a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
394394
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
395395
validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
396-
arm = mul
396+
arm = vmul.
397397
fn = simd_mul
398398
generate int*_t, uint*_t
399399

@@ -407,7 +407,7 @@ validate 2.0, 6.0, 4.0, 10.0
407407
aarch64 = fmul
408408
generate float64x*_t
409409

410-
arm = fmul
410+
arm = vmul.
411411
generate float*_t
412412

413413

@@ -430,7 +430,7 @@ validate 0.0, 2.0, 0.0, 4.0
430430
aarch64 = fsub
431431
generate float64x*_t
432432

433-
arm = fsub
433+
arm = vsub.
434434
generate float*_t
435435

436436

@@ -445,7 +445,7 @@ link-arm = vhsubu._EXT_
445445
link-aarch64 = uhsub._EXT_
446446
generate uint*_t
447447

448-
arm = shsub
448+
arm = vhsub.
449449
link-arm = vhsubs._EXT_
450450
link-aarch64 = shsub._EXT_
451451
generate int*_t

0 commit comments

Comments
 (0)