Skip to content

Commit 6ee2e37

Browse files
Wunkologibbed
authored andcommitted
[x64] Add AVX512 optimizations for OPCODE_VECTOR_COMPARE_UGT(Integer)
AVX512 has native unsigned integer comparisons instructions, removing the need to XOR the most-significant-bit with a constant in memory to use the signed comparison instructions. These instructions only write to a k-mask register though and need an additional call to `vpmovm2*` to turn the mask-register into a vector-mask register. As of Icelake: `vpcmpu*` is all L3/T1 `vpmovm2d` is L1/T0.33 `vpmovm2{b,w}` is L3/T0.33 As of Zen4: `vpcmpu*` is all L3/T0.50 `vpmovm2*` is all L1/T0.25
1 parent 121bf93 commit 6ee2e37

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

src/xenia/cpu/backend/x64/x64_seq_vector.cc

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,43 @@ struct VECTOR_COMPARE_UGT_V128
409409
: Sequence<VECTOR_COMPARE_UGT_V128,
410410
I<OPCODE_VECTOR_COMPARE_UGT, V128Op, V128Op, V128Op>> {
411411
static void Emit(X64Emitter& e, const EmitArgType& i) {
412+
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho | kX64EmitAVX512BW |
413+
kX64EmitAVX512DQ) &&
414+
(i.instr->flags != FLOAT32_TYPE)) {
415+
Xmm src1 = e.xmm0;
416+
if (i.src1.is_constant) {
417+
e.LoadConstantXmm(src1, i.src1.constant());
418+
} else {
419+
src1 = i.src1;
420+
}
421+
422+
Xmm src2 = e.xmm1;
423+
if (i.src2.is_constant) {
424+
e.LoadConstantXmm(src2, i.src2.constant());
425+
} else {
426+
src2 = i.src2;
427+
}
428+
429+
switch (i.instr->flags) {
430+
case INT8_TYPE:
431+
e.vpcmpub(e.k1, src1, src2, 0x6);
432+
e.vpmovm2b(i.dest, e.k1);
433+
break;
434+
case INT16_TYPE:
435+
e.vpcmpuw(e.k1, src1, src2, 0x6);
436+
e.vpmovm2w(i.dest, e.k1);
437+
break;
438+
case INT32_TYPE:
439+
e.vpcmpud(e.k1, src1, src2, 0x6);
440+
e.vpmovm2d(i.dest, e.k1);
441+
break;
442+
default:
443+
assert_always();
444+
break;
445+
}
446+
return;
447+
}
448+
412449
Xbyak::Address sign_addr = e.ptr[e.rax]; // dummy
413450
switch (i.instr->flags) {
414451
case INT8_TYPE:

0 commit comments

Comments
 (0)