Skip to content

Commit 4a1e02b

Browse files
author
Peter Zijlstra
committed
x86,retpoline: Optimize patch_retpoline()
Currently the very common retpoline: "CS CALL __x86_indirect_thunk_r11" is transformed into "CALL *R11; NOP3" for eIBRS/BHI_NO parts. Similarly, paranoid fineibt has: "CALL *R11; NOP". Recognise that CS stuffing can avoid the extra NOP. However, due to prefix decode penalties, make sure to not emit too many CS prefixes. Notably: "CS CALL __x86_indirect_thunk_rax" must not become "CS CS CS CS CALL *RAX". Prefix decode penalties are typically many more cycles than decoding an extra NOP. Additionally, if the retpoline is a tail-call, the "JMP *%\reg" should be followed by INT3 for straight-line-speculation mitigation, since emit_indirect() now has a length argument, move this into emit_indirect() such that other users (paranoid-fineibt) also do this. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20250902104627.GM4068168@noisy.programming.kicks-ass.net
1 parent 85a2d4a commit 4a1e02b

File tree

1 file changed

+26
-16
lines changed

1 file changed

+26
-16
lines changed

arch/x86/kernel/alternative.c

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -713,27 +713,43 @@ static inline bool is_jcc32(struct insn *insn)
713713
#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL)
714714

715715
/*
716-
* CALL/JMP *%\reg
716+
* [CS]{,3} CALL/JMP *%\reg [INT3]*
717717
*/
718-
static int emit_indirect(int op, int reg, u8 *bytes)
718+
static int emit_indirect(int op, int reg, u8 *bytes, int len)
719719
{
720+
int cs = 0, bp = 0;
720721
int i = 0;
721722
u8 modrm;
722723

724+
/*
725+
* Set @len to the excess bytes after writing the instruction.
726+
*/
727+
len -= 2 + (reg >= 8);
728+
WARN_ON_ONCE(len < 0);
729+
723730
switch (op) {
724731
case CALL_INSN_OPCODE:
725732
modrm = 0x10; /* Reg = 2; CALL r/m */
733+
/*
734+
* Additional NOP is better than prefix decode penalty.
735+
*/
736+
if (len <= 3)
737+
cs = len;
726738
break;
727739

728740
case JMP32_INSN_OPCODE:
729741
modrm = 0x20; /* Reg = 4; JMP r/m */
742+
bp = len;
730743
break;
731744

732745
default:
733746
WARN_ON_ONCE(1);
734747
return -1;
735748
}
736749

750+
while (cs--)
751+
bytes[i++] = 0x2e; /* CS-prefix */
752+
737753
if (reg >= 8) {
738754
bytes[i++] = 0x41; /* REX.B prefix */
739755
reg -= 8;
@@ -745,6 +761,9 @@ static int emit_indirect(int op, int reg, u8 *bytes)
745761
bytes[i++] = 0xff; /* opcode */
746762
bytes[i++] = modrm;
747763

764+
while (bp--)
765+
bytes[i++] = 0xcc; /* INT3 */
766+
748767
return i;
749768
}
750769

@@ -918,20 +937,11 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
918937
return emit_its_trampoline(addr, insn, reg, bytes);
919938
#endif
920939

921-
ret = emit_indirect(op, reg, bytes + i);
940+
ret = emit_indirect(op, reg, bytes + i, insn->length - i);
922941
if (ret < 0)
923942
return ret;
924943
i += ret;
925944

926-
/*
927-
* The compiler is supposed to EMIT an INT3 after every unconditional
928-
* JMP instruction due to AMD BTC. However, if the compiler is too old
929-
* or MITIGATION_SLS isn't enabled, we still need an INT3 after
930-
* indirect JMPs even on Intel.
931-
*/
932-
if (op == JMP32_INSN_OPCODE && i < insn->length)
933-
bytes[i++] = INT3_INSN_OPCODE;
934-
935945
for (; i < insn->length;)
936946
bytes[i++] = BYTES_NOP1;
937947

@@ -1421,8 +1431,7 @@ asm( ".pushsection .rodata \n"
14211431
"#fineibt_caller_size: \n"
14221432
" jne fineibt_paranoid_start+0xd \n"
14231433
"fineibt_paranoid_ind: \n"
1424-
" call *%r11 \n"
1425-
" nop \n"
1434+
" cs call *%r11 \n"
14261435
"fineibt_paranoid_end: \n"
14271436
".popsection \n"
14281437
);
@@ -1724,8 +1733,9 @@ static int cfi_rewrite_callers(s32 *start, s32 *end)
17241733
emit_paranoid_trampoline(addr + fineibt_caller_size,
17251734
&insn, 11, bytes + fineibt_caller_size);
17261735
} else {
1727-
ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind);
1728-
if (WARN_ON_ONCE(ret != 3))
1736+
int len = fineibt_paranoid_size - fineibt_paranoid_ind;
1737+
ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind, len);
1738+
if (WARN_ON_ONCE(ret != len))
17291739
continue;
17301740
}
17311741

0 commit comments

Comments
 (0)