Skip to content

Commit 85a2d4a

Browse files
author
Peter Zijlstra
committed
x86,ibt: Use UDB instead of 0xEA
A while ago [0] FineIBT started using the 0xEA instruction to raise #UD. All existing parts will generate #UD in 64bit mode on that instruction. However; Intel/AMD have not blessed using this instruction, it is on their 'reserved' opcode list for future use. Peter Anvin worked the committees and got use of 0xD6 blessed, it shall be called UDB (per the next SDM or so), and it being a single byte instruction is easy to slip into a single byte immediate -- as is done by this very patch. Reworking the FineIBT code to use UDB wasn't entirely trivial. Notably the FineIBT-BHI1 case ran out of bytes. In order to condense the encoding some it was required to move the hash register from R10D to EAX (thanks hpa!). Per the x86_64 ABI, RAX is used to pass the number of vector registers for vararg function calls -- something that should not happen in the kernel. More so, the kernel is built with -mskip-rax-setup, which should leave RAX completely unused, allowing its re-use. [ For BPF; while the bpf2bpf tail-call uses RAX in its calling convention, that does not use CFI and is unaffected. Only the 'regular' C->BPF transition is covered by CFI. ] The ENDBR poison value is changed from 'OSP NOP3' to 'NOPL -42(%RAX)', this is basically NOP4 but with UDB as its immediate. As such it is still a non-standard NOP value unique to prior ENDBR sites, but now also provides UDB. Per Agner Fog's optimization guide, Jcc is assumed not-taken. That is, the expected path should be the fallthrough case for improved throughput. Since the preamble now relies on the ENDBR poison to provide UDB, the code is changed to write the poison right along with the initial preamble -- this is possible because the ITS mitigation already disabled IBT over rewriting the CFI scheme. The scheme in detail: Preamble: FineIBT FineIBT-BHI1 FineIBT-BHI __cfi_\func: __cfi_\func: __cfi_\func: endbr endbr endbr subl $0x12345678, %eax subl $0x12345678, %eax subl $0x12345678, %eax jne.d32,np \func+3 cmovne %rax, %rdi cs cs call __bhi_args_N jne.d8,np \func+3 \func: \func: \func: nopl -42(%rax) nopl -42(%rax) nopl -42(%rax) Notably there are 7 bytes available after the SUBL; this enables the BHI1 case to fit without the nasty overlapping case it had previously. The !BHI case uses Jcc.d32,np to consume all 7 bytes without the need for an additional NOP, while the BHI case uses CS padding to align the CALL with the end of the preamble such that it returns to \func+0. Caller: FineIBT Paranoid-FineIBT fineibt_caller: fineibt_caller: mov $0x12345678, %eax mov $0x12345678, %eax lea -10(%r11), %r11 cmp -0x11(%r11), %eax nop5 cs lea -0x10(%r11), %r11 retpoline: retpoline: cs call __x86_indirect_thunk_r11 jne fineibt_caller+0xd call *%r11 nop Notably this is before apply_retpolines() which will fix up the retpoline call -- since all parts with IBT also have eIBRS (lets ignore ITS). Typically the retpoline site is rewritten (when still intact) into: call *%r11 nop3 [0] 06926c6 ("x86/ibt: Optimize the FineIBT instruction sequence") Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20250901191307.GI4067720@noisy.programming.kicks-ass.net
1 parent 0b81582 commit 85a2d4a

File tree

8 files changed

+178
-148
lines changed

8 files changed

+178
-148
lines changed

arch/x86/include/asm/bug.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,19 @@
55
#include <linux/stringify.h>
66
#include <linux/instrumentation.h>
77
#include <linux/objtool.h>
8+
#include <asm/asm.h>
89

910
/*
1011
* Despite that some emulators terminate on UD2, we use it for WARN().
1112
*/
12-
#define ASM_UD2 ".byte 0x0f, 0x0b"
13+
#define ASM_UD2 _ASM_BYTES(0x0f, 0x0b)
1314
#define INSN_UD2 0x0b0f
1415
#define LEN_UD2 2
1516

17+
#define ASM_UDB _ASM_BYTES(0xd6)
18+
#define INSN_UDB 0xd6
19+
#define LEN_UDB 1
20+
1621
/*
1722
* In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
1823
*/
@@ -26,7 +31,7 @@
2631
#define BUG_UD2 0xfffe
2732
#define BUG_UD1 0xfffd
2833
#define BUG_UD1_UBSAN 0xfffc
29-
#define BUG_EA 0xffea
34+
#define BUG_UDB 0xffd6
3035
#define BUG_LOCK 0xfff0
3136

3237
#ifdef CONFIG_GENERIC_BUG

arch/x86/include/asm/cfi.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,10 @@
7171
*
7272
* __cfi_foo:
7373
* endbr64
74-
* subl 0x12345678, %r10d
75-
* jz foo
76-
* ud2
77-
* nop
74+
* subl 0x12345678, %eax
75+
* jne.32,pn foo+3
7876
* foo:
79-
* osp nop3 # was endbr64
77+
* nopl -42(%rax) # was endbr64
8078
* ... code here ...
8179
* ret
8280
*
@@ -86,9 +84,9 @@
8684
* indirect caller:
8785
* lea foo(%rip), %r11
8886
* ...
89-
* movl $0x12345678, %r10d
90-
* subl $16, %r11
91-
* nop4
87+
* movl $0x12345678, %eax
88+
* lea -0x10(%r11), %r11
89+
* nop5
9290
* call *%r11
9391
*
9492
*/

arch/x86/include/asm/ibt.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,21 +59,17 @@ static __always_inline __attribute_const__ u32 gen_endbr(void)
5959
static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
6060
{
6161
/*
62-
* 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
63-
* will be unique to (former) ENDBR sites.
62+
* 4 byte NOP that isn't NOP4, such that it will be unique to (former)
63+
* ENDBR sites. Additionally it carries UDB as immediate.
6464
*/
65-
return 0x001f0f66; /* osp nopl (%rax) */
65+
return 0xd6401f0f; /* nopl -42(%rax) */
6666
}
6767

6868
static inline bool __is_endbr(u32 val)
6969
{
7070
if (val == gen_endbr_poison())
7171
return true;
7272

73-
/* See cfi_fineibt_bhi_preamble() */
74-
if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5)
75-
return true;
76-
7773
val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
7874
return val == gen_endbr();
7975
}

0 commit comments

Comments
 (0)