Skip to content

Commit b124bea

Browse files
committed
x86/bhi: Add support for clearing branch history at syscall entry
jira LE-2015 cve CVE-2024-2201 Rebuild_History Non-Buildable kernel-5.14.0-427.42.1.el9_4 commit-author Pawan Gupta <pawan.kumar.gupta@linux.intel.com> commit 7390db8 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-5.14.0-427.42.1.el9_4/7390db8a.failed Branch History Injection (BHI) attacks may allow a malicious application to influence indirect branch prediction in kernel by poisoning the branch history. eIBRS isolates indirect branch targets in ring0. The BHB can still influence the choice of indirect branch predictor entry, and although branch predictor entries are isolated between modes when eIBRS is enabled, the BHB itself is not isolated between modes. Alder Lake and new processors supports a hardware control BHI_DIS_S to mitigate BHI. For older processors Intel has released a software sequence to clear the branch history on parts that don't support BHI_DIS_S. Add support to execute the software sequence at syscall entry and VMexit to overwrite the branch history. For now, branch history is not cleared at interrupt entry, as malicious applications are not believed to have sufficient control over the registers, since previous register state is cleared at interrupt entry. Researchers continue to poke at this area and it may become necessary to clear at interrupt entry as well in the future. This mitigation is only defined here. It is enabled later. Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> Co-developed-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com> Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> (cherry picked from commit 7390db8) Signed-off-by: Jonathan Maple <jmaple@ciq.com> # Conflicts: # arch/x86/include/asm/syscall.h
1 parent c1d09b6 commit b124bea

File tree

1 file changed

+259
-0
lines changed

1 file changed

+259
-0
lines changed
Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
x86/bhi: Add support for clearing branch history at syscall entry
2+
3+
jira LE-2015
4+
cve CVE-2024-2201
5+
Rebuild_History Non-Buildable kernel-5.14.0-427.42.1.el9_4
6+
commit-author Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
7+
commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5
8+
Empty-Commit: Cherry-Pick Conflicts during history rebuild.
9+
Will be included in final tarball splat. Ref for failed cherry-pick at:
10+
ciq/ciq_backports/kernel-5.14.0-427.42.1.el9_4/7390db8a.failed
11+
12+
Branch History Injection (BHI) attacks may allow a malicious application to
13+
influence indirect branch prediction in kernel by poisoning the branch
14+
history. eIBRS isolates indirect branch targets in ring0. The BHB can
15+
still influence the choice of indirect branch predictor entry, and although
16+
branch predictor entries are isolated between modes when eIBRS is enabled,
17+
the BHB itself is not isolated between modes.
18+
19+
Alder Lake and new processors supports a hardware control BHI_DIS_S to
20+
mitigate BHI. For older processors Intel has released a software sequence
21+
to clear the branch history on parts that don't support BHI_DIS_S. Add
22+
support to execute the software sequence at syscall entry and VMexit to
23+
overwrite the branch history.
24+
25+
For now, branch history is not cleared at interrupt entry, as malicious
26+
applications are not believed to have sufficient control over the
27+
registers, since previous register state is cleared at interrupt
28+
entry. Researchers continue to poke at this area and it may become
29+
necessary to clear at interrupt entry as well in the future.
30+
31+
This mitigation is only defined here. It is enabled later.
32+
33+
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
34+
Co-developed-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
35+
Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
36+
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
37+
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>
38+
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
39+
40+
(cherry picked from commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5)
41+
Signed-off-by: Jonathan Maple <jmaple@ciq.com>
42+
43+
# Conflicts:
44+
# arch/x86/include/asm/syscall.h
45+
diff --cc arch/x86/include/asm/syscall.h
46+
index c7e25c940f1a,2fc7bc3863ff..000000000000
47+
--- a/arch/x86/include/asm/syscall.h
48+
+++ b/arch/x86/include/asm/syscall.h
49+
@@@ -126,7 -124,8 +126,12 @@@ static inline int syscall_get_arch(stru
50+
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
51+
}
52+
53+
++<<<<<<< HEAD
54+
+void do_syscall_64(struct pt_regs *regs, int nr);
55+
++=======
56+
+ bool do_syscall_64(struct pt_regs *regs, int nr);
57+
+ void do_int80_emulation(struct pt_regs *regs);
58+
++>>>>>>> 7390db8aea0d (x86/bhi: Add support for clearing branch history at syscall entry)
59+
60+
#endif /* CONFIG_X86_32 */
61+
62+
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
63+
index 3ec0eebc70f0..a542f2e0cf61 100644
64+
--- a/arch/x86/entry/common.c
65+
+++ b/arch/x86/entry/common.c
66+
@@ -148,7 +148,7 @@ static __always_inline bool int80_is_external(void)
67+
}
68+
69+
/**
70+
- * int80_emulation - 32-bit legacy syscall entry
71+
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
72+
*
73+
* This entry point can be used by 32-bit and 64-bit programs to perform
74+
* 32-bit system calls. Instances of INT $0x80 can be found inline in
75+
@@ -166,7 +166,7 @@ static __always_inline bool int80_is_external(void)
76+
* eax: system call number
77+
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
78+
*/
79+
-DEFINE_IDTENTRY_RAW(int80_emulation)
80+
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
81+
{
82+
int nr;
83+
84+
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
85+
index 38642ee01012..651b4f5b6d33 100644
86+
--- a/arch/x86/entry/entry_64.S
87+
+++ b/arch/x86/entry/entry_64.S
88+
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
89+
/* clobbers %rax, make sure it is after saving the syscall nr */
90+
IBRS_ENTER
91+
UNTRAIN_RET
92+
+ CLEAR_BRANCH_HISTORY
93+
94+
call do_syscall_64 /* returns with IRQs disabled */
95+
96+
@@ -1551,3 +1552,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
97+
call make_task_dead
98+
SYM_CODE_END(rewind_stack_and_make_dead)
99+
.popsection
100+
+
101+
+/*
102+
+ * This sequence executes branches in order to remove user branch information
103+
+ * from the branch history tracker in the Branch Predictor, therefore removing
104+
+ * user influence on subsequent BTB lookups.
105+
+ *
106+
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
107+
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
108+
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
109+
+ * setting BHI_DIS_S for the guests.
110+
+ *
111+
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
112+
+ * and not clearing the branch history. The call tree looks like:
113+
+ *
114+
+ * call 1
115+
+ * call 2
116+
+ * call 2
117+
+ * call 2
118+
+ * call 2
119+
+ * call 2
120+
+ * ret
121+
+ * ret
122+
+ * ret
123+
+ * ret
124+
+ * ret
125+
+ * ret
126+
+ *
127+
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
128+
+ * alone. Therefore we unconditionally set up the frame pointer, which allows
129+
+ * ORC to unwind properly.
130+
+ *
131+
+ * The alignment is for performance and not for safety, and may be safely
132+
+ * refactored in the future if needed.
133+
+ */
134+
+SYM_FUNC_START(clear_bhb_loop)
135+
+ push %rbp
136+
+ mov %rsp, %rbp
137+
+ movl $5, %ecx
138+
+ ANNOTATE_INTRA_FUNCTION_CALL
139+
+ call 1f
140+
+ jmp 5f
141+
+ .align 64, 0xcc
142+
+ ANNOTATE_INTRA_FUNCTION_CALL
143+
+1: call 2f
144+
+ RET
145+
+ .align 64, 0xcc
146+
+2: movl $5, %eax
147+
+3: jmp 4f
148+
+ nop
149+
+4: sub $1, %eax
150+
+ jnz 3b
151+
+ sub $1, %ecx
152+
+ jnz 1b
153+
+ RET
154+
+5: lfence
155+
+ pop %rbp
156+
+ RET
157+
+SYM_FUNC_END(clear_bhb_loop)
158+
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
159+
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
160+
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
161+
index 8cf35d45d588..9fb12c9a50aa 100644
162+
--- a/arch/x86/entry/entry_64_compat.S
163+
+++ b/arch/x86/entry/entry_64_compat.S
164+
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
165+
166+
IBRS_ENTER
167+
UNTRAIN_RET
168+
+ CLEAR_BRANCH_HISTORY
169+
170+
/*
171+
* SYSENTER doesn't filter flags, so we need to clear NT and AC
172+
@@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
173+
174+
IBRS_ENTER
175+
UNTRAIN_RET
176+
+ CLEAR_BRANCH_HISTORY
177+
178+
movq %rsp, %rdi
179+
call do_fast_syscall_32
180+
@@ -274,3 +276,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
181+
sysretl
182+
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
183+
SYM_CODE_END(entry_SYSCALL_compat)
184+
+
185+
+/*
186+
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
187+
+ * point to C routines, however since this is a system call interface the branch
188+
+ * history needs to be scrubbed to protect against BHI attacks, and that
189+
+ * scrubbing needs to take place in assembly code prior to entering any C
190+
+ * routines.
191+
+ */
192+
+SYM_CODE_START(int80_emulation)
193+
+ ANNOTATE_NOENDBR
194+
+ UNWIND_HINT_FUNC
195+
+ CLEAR_BRANCH_HISTORY
196+
+ jmp do_int80_emulation
197+
+SYM_CODE_END(int80_emulation)
198+
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
199+
index 86f4c8a3ef63..2ad77049c123 100644
200+
--- a/arch/x86/include/asm/cpufeatures.h
201+
+++ b/arch/x86/include/asm/cpufeatures.h
202+
@@ -457,11 +457,12 @@
203+
204+
/*
205+
* Extended auxiliary flags: Linux defined - for features scattered in various
206+
- * CPUID levels like 0x80000022, etc.
207+
+ * CPUID levels like 0x80000022, etc and Linux defined features.
208+
*
209+
* Reuse free bits when adding new feature flags!
210+
*/
211+
#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
212+
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
213+
214+
/*
215+
* BUG word(s)
216+
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
217+
index ae26633e4bec..3f9cdd078863 100644
218+
--- a/arch/x86/include/asm/nospec-branch.h
219+
+++ b/arch/x86/include/asm/nospec-branch.h
220+
@@ -310,6 +310,14 @@
221+
ALTERNATIVE __stringify(verw _ASM_RIP(mds_verw_sel)), "", ALT_NOT(X86_FEATURE_CLEAR_CPU_BUF)
222+
.endm
223+
224+
+#ifdef CONFIG_X86_64
225+
+.macro CLEAR_BRANCH_HISTORY
226+
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
227+
+.endm
228+
+#else
229+
+#define CLEAR_BRANCH_HISTORY
230+
+#endif
231+
+
232+
#else /* __ASSEMBLY__ */
233+
234+
#define ANNOTATE_RETPOLINE_SAFE \
235+
@@ -350,6 +358,10 @@ extern void srso_alias_return_thunk(void);
236+
extern void entry_untrain_ret(void);
237+
extern void entry_ibpb(void);
238+
239+
+#ifdef CONFIG_X86_64
240+
+extern void clear_bhb_loop(void);
241+
+#endif
242+
+
243+
extern void (*x86_return_thunk)(void);
244+
245+
#ifdef CONFIG_CALL_DEPTH_TRACKING
246+
* Unmerged path arch/x86/include/asm/syscall.h
247+
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
248+
index 139960deb736..040bd8e5670b 100644
249+
--- a/arch/x86/kvm/vmx/vmenter.S
250+
+++ b/arch/x86/kvm/vmx/vmenter.S
251+
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
252+
253+
call vmx_spec_ctrl_restore_host
254+
255+
+ CLEAR_BRANCH_HISTORY
256+
+
257+
/* Put return value in AX */
258+
mov %_ASM_BX, %_ASM_AX
259+

0 commit comments

Comments
 (0)