Skip to content

Commit 2702942

Browse files
CL/aarch64 back end: implement the wasm SIMD bitmask instructions
The `bitmask.{8x16,16x8,32x4}` instructions do not map neatly to any single AArch64 SIMD instruction, and instead need a sequence of around ten instructions. Because of this, this patch is somewhat longer and more complex than it would be for (eg) x64. Main changes are: * the relevant testsuite test (`simd_boolean.wast`) has been enabled on aarch64. * at the CLIF level, add a new instruction `vhigh_bits`, into which these wasm instructions are to be translated. * in the wasm->CLIF translation (code_translator.rs), translate into `vhigh_bits`. This is straightforward. * in the CLIF->AArch64 translation (lower_inst.rs), translate `vhigh_bits` into equivalent sequences of AArch64 instructions. There is a different sequence for each of the `{8x16, 16x8, 32x4}` variants. All other changes are AArch64-specific, and add instruction definitions needed by the previous step: * Add two new families of AArch64 instructions: `VecShiftImm` (vector shift by immediate) and `VecExtract` (effectively a double-length vector shift) * To the existing AArch64 family `VecRRR`, add a `zip1` variant. To the `VecLanesOp` family add an `addv` variant. * Add supporting code for the above changes to AArch64 instructions: - getting the register uses (`aarch64_get_regs`) - mapping the registers (`aarch64_map_regs`) - printing instructions - emitting instructions (`impl MachInstEmit for Inst`). The handling of `VecShiftImm` is a bit complex. - emission tests for new instructions and variants.
1 parent b10e027 commit 2702942

File tree

8 files changed

+570
-5
lines changed

8 files changed

+570
-5
lines changed

build.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,12 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
229229
return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64";
230230
}
231231

232+
// This is only implemented on aarch64.
233+
("simd", "simd_boolean") => {
234+
return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "aarch64";
235+
}
236+
232237
// These tests have simd operators which aren't implemented yet.
233-
("simd", "simd_boolean") => return true,
234238
("simd", "simd_f32x4_pmin_pmax") => return true,
235239
("simd", "simd_f32x4_rounding") => return true,
236240
("simd", "simd_f64x2_pmin_pmax") => return true,

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2193,6 +2193,24 @@ pub(crate) fn define(
21932193
.operands_out(vec![s]),
21942194
);
21952195

2196+
let a = &Operand::new("a", TxN);
2197+
let x = &Operand::new("x", Int);
2198+
2199+
ig.push(
2200+
Inst::new(
2201+
"vhigh_bits",
2202+
r#"
2203+
Reduce a vector to a scalar integer.
2204+
2205+
Return a scalar integer, consisting of the concatenation of the most significant bit
2206+
of each lane of ``a``.
2207+
"#,
2208+
&formats.unary,
2209+
)
2210+
.operands_in(vec![a])
2211+
.operands_out(vec![x]),
2212+
);
2213+
21962214
let a = &Operand::new("a", &Int.as_bool());
21972215
let Cond = &Operand::new("Cond", &imm.intcc);
21982216
let x = &Operand::new("x", Int);

cranelift/codegen/src/isa/aarch64/inst/emit.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,9 +1441,67 @@ impl MachInstEmit for Inst {
14411441
};
14421442
let (u, opcode) = match op {
14431443
VecLanesOp::Uminv => (0b1, 0b11010),
1444+
VecLanesOp::Addv => (0b0, 0b11011),
14441445
};
14451446
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
14461447
}
1448+
&Inst::VecShiftImm {
1449+
op,
1450+
rd,
1451+
rn,
1452+
size,
1453+
imm,
1454+
} => {
1455+
let (is_shr, template) = match op {
1456+
VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
1457+
VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
1458+
VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
1459+
};
1460+
let imm = imm as u32;
1461+
// Deal with the somewhat strange encoding scheme for, and limits on,
1462+
// the shift amount.
1463+
let immh_immb = match (size, is_shr) {
1464+
(VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
1465+
0b_1000_000_u32 | (64 - imm)
1466+
}
1467+
(VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
1468+
0b_0100_000_u32 | (32 - imm)
1469+
}
1470+
(VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
1471+
0b_0010_000_u32 | (16 - imm)
1472+
}
1473+
(VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
1474+
0b_0001_000_u32 | (8 - imm)
1475+
}
1476+
(VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
1477+
(VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
1478+
(VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
1479+
(VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
1480+
_ => panic!(
1481+
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
1482+
op, size, imm
1483+
),
1484+
};
1485+
let rn_enc = machreg_to_vec(rn);
1486+
let rd_enc = machreg_to_vec(rd.to_reg());
1487+
sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
1488+
}
1489+
&Inst::VecExtract { rd, rn, rm, imm4 } => {
1490+
if imm4 < 16 {
1491+
let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
1492+
let rm_enc = machreg_to_vec(rm);
1493+
let rn_enc = machreg_to_vec(rn);
1494+
let rd_enc = machreg_to_vec(rd.to_reg());
1495+
sink.put4(
1496+
template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
1497+
);
1498+
} else {
1499+
panic!(
1500+
"aarch64: Inst::VecExtract: emit: invalid extract index {}",
1501+
imm4
1502+
);
1503+
}
1504+
}
14471505
&Inst::VecTbl {
14481506
rd,
14491507
rn,
@@ -1827,6 +1885,7 @@ impl MachInstEmit for Inst {
18271885
debug_assert!(!size.is_128bits());
18281886
(0b001_01110_00_1 | enc_size << 1, 0b100000)
18291887
}
1888+
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
18301889
};
18311890
let top11 = if is_float {
18321891
top11 | enc_float_size << 1

cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3175,6 +3175,54 @@ fn test_aarch64_binemit() {
31753175
"umlal v9.2d, v20.2s, v17.2s",
31763176
));
31773177

3178+
insns.push((
3179+
Inst::VecRRR {
3180+
alu_op: VecALUOp::Zip1,
3181+
rd: writable_vreg(16),
3182+
rn: vreg(12),
3183+
rm: vreg(1),
3184+
size: VectorSize::Size8x16,
3185+
},
3186+
"9039014E",
3187+
"zip1 v16.16b, v12.16b, v1.16b",
3188+
));
3189+
3190+
insns.push((
3191+
Inst::VecRRR {
3192+
alu_op: VecALUOp::Zip1,
3193+
rd: writable_vreg(2),
3194+
rn: vreg(13),
3195+
rm: vreg(6),
3196+
size: VectorSize::Size16x8,
3197+
},
3198+
"A239464E",
3199+
"zip1 v2.8h, v13.8h, v6.8h",
3200+
));
3201+
3202+
insns.push((
3203+
Inst::VecRRR {
3204+
alu_op: VecALUOp::Zip1,
3205+
rd: writable_vreg(8),
3206+
rn: vreg(12),
3207+
rm: vreg(14),
3208+
size: VectorSize::Size32x4,
3209+
},
3210+
"88398E4E",
3211+
"zip1 v8.4s, v12.4s, v14.4s",
3212+
));
3213+
3214+
insns.push((
3215+
Inst::VecRRR {
3216+
alu_op: VecALUOp::Zip1,
3217+
rd: writable_vreg(9),
3218+
rn: vreg(20),
3219+
rm: vreg(17),
3220+
size: VectorSize::Size64x2,
3221+
},
3222+
"893AD14E",
3223+
"zip1 v9.2d, v20.2d, v17.2d",
3224+
));
3225+
31783226
insns.push((
31793227
Inst::VecMisc {
31803228
op: VecMisc2::Not,
@@ -3461,6 +3509,168 @@ fn test_aarch64_binemit() {
34613509
"uminv s18, v4.4s",
34623510
));
34633511

3512+
insns.push((
3513+
Inst::VecLanes {
3514+
op: VecLanesOp::Addv,
3515+
rd: writable_vreg(2),
3516+
rn: vreg(29),
3517+
size: VectorSize::Size8x16,
3518+
},
3519+
"A2BB314E",
3520+
"addv b2, v29.16b",
3521+
));
3522+
3523+
insns.push((
3524+
Inst::VecLanes {
3525+
op: VecLanesOp::Addv,
3526+
rd: writable_vreg(3),
3527+
rn: vreg(21),
3528+
size: VectorSize::Size16x8,
3529+
},
3530+
"A3BA714E",
3531+
"addv h3, v21.8h",
3532+
));
3533+
3534+
insns.push((
3535+
Inst::VecLanes {
3536+
op: VecLanesOp::Addv,
3537+
rd: writable_vreg(18),
3538+
rn: vreg(5),
3539+
size: VectorSize::Size32x4,
3540+
},
3541+
"B2B8B14E",
3542+
"addv s18, v5.4s",
3543+
));
3544+
3545+
insns.push((
3546+
Inst::VecShiftImm {
3547+
op: VecShiftImmOp::Shl,
3548+
rd: writable_vreg(27),
3549+
rn: vreg(5),
3550+
imm: 7,
3551+
size: VectorSize::Size8x16,
3552+
},
3553+
"BB540F4F",
3554+
"shl v27.16b, v5.16b, #7",
3555+
));
3556+
3557+
insns.push((
3558+
Inst::VecShiftImm {
3559+
op: VecShiftImmOp::Shl,
3560+
rd: writable_vreg(1),
3561+
rn: vreg(30),
3562+
imm: 0,
3563+
size: VectorSize::Size8x16,
3564+
},
3565+
"C157084F",
3566+
"shl v1.16b, v30.16b, #0",
3567+
));
3568+
3569+
insns.push((
3570+
Inst::VecShiftImm {
3571+
op: VecShiftImmOp::Sshr,
3572+
rd: writable_vreg(26),
3573+
rn: vreg(6),
3574+
imm: 16,
3575+
size: VectorSize::Size16x8,
3576+
},
3577+
"DA04104F",
3578+
"sshr v26.8h, v6.8h, #16",
3579+
));
3580+
3581+
insns.push((
3582+
Inst::VecShiftImm {
3583+
op: VecShiftImmOp::Sshr,
3584+
rd: writable_vreg(3),
3585+
rn: vreg(19),
3586+
imm: 1,
3587+
size: VectorSize::Size16x8,
3588+
},
3589+
"63061F4F",
3590+
"sshr v3.8h, v19.8h, #1",
3591+
));
3592+
3593+
insns.push((
3594+
Inst::VecShiftImm {
3595+
op: VecShiftImmOp::Ushr,
3596+
rd: writable_vreg(25),
3597+
rn: vreg(6),
3598+
imm: 32,
3599+
size: VectorSize::Size32x4,
3600+
},
3601+
"D904206F",
3602+
"ushr v25.4s, v6.4s, #32",
3603+
));
3604+
3605+
insns.push((
3606+
Inst::VecShiftImm {
3607+
op: VecShiftImmOp::Ushr,
3608+
rd: writable_vreg(5),
3609+
rn: vreg(21),
3610+
imm: 1,
3611+
size: VectorSize::Size32x4,
3612+
},
3613+
"A5063F6F",
3614+
"ushr v5.4s, v21.4s, #1",
3615+
));
3616+
3617+
insns.push((
3618+
Inst::VecShiftImm {
3619+
op: VecShiftImmOp::Shl,
3620+
rd: writable_vreg(22),
3621+
rn: vreg(13),
3622+
imm: 63,
3623+
size: VectorSize::Size64x2,
3624+
},
3625+
"B6557F4F",
3626+
"shl v22.2d, v13.2d, #63",
3627+
));
3628+
3629+
insns.push((
3630+
Inst::VecShiftImm {
3631+
op: VecShiftImmOp::Shl,
3632+
rd: writable_vreg(23),
3633+
rn: vreg(9),
3634+
imm: 0,
3635+
size: VectorSize::Size64x2,
3636+
},
3637+
"3755404F",
3638+
"shl v23.2d, v9.2d, #0",
3639+
));
3640+
3641+
insns.push((
3642+
Inst::VecExtract {
3643+
rd: writable_vreg(1),
3644+
rn: vreg(30),
3645+
rm: vreg(17),
3646+
imm4: 0,
3647+
},
3648+
"C103116E",
3649+
"ext v1.16b, v30.16b, v17.16b, #0",
3650+
));
3651+
3652+
insns.push((
3653+
Inst::VecExtract {
3654+
rd: writable_vreg(1),
3655+
rn: vreg(30),
3656+
rm: vreg(17),
3657+
imm4: 8,
3658+
},
3659+
"C143116E",
3660+
"ext v1.16b, v30.16b, v17.16b, #8",
3661+
));
3662+
3663+
insns.push((
3664+
Inst::VecExtract {
3665+
rd: writable_vreg(1),
3666+
rn: vreg(30),
3667+
rm: vreg(17),
3668+
imm4: 15,
3669+
},
3670+
"C17B116E",
3671+
"ext v1.16b, v30.16b, v17.16b, #15",
3672+
));
3673+
34643674
insns.push((
34653675
Inst::VecTbl {
34663676
rd: writable_vreg(0),

0 commit comments

Comments
 (0)