|
| 1 | +x86/bhi: Add support for clearing branch history at syscall entry |
| 2 | + |
| 3 | +jira LE-2015 |
| 4 | +cve CVE-2024-2201 |
| 5 | +Rebuild_History Non-Buildable kernel-5.14.0-427.42.1.el9_4 |
| 6 | +commit-author Pawan Gupta <pawan.kumar.gupta@linux.intel.com> |
| 7 | +commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5 |
| 8 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 9 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 10 | +ciq/ciq_backports/kernel-5.14.0-427.42.1.el9_4/7390db8a.failed |
| 11 | + |
| 12 | +Branch History Injection (BHI) attacks may allow a malicious application to |
| 13 | +influence indirect branch prediction in kernel by poisoning the branch |
| 14 | +history. eIBRS isolates indirect branch targets in ring0. The BHB can |
| 15 | +still influence the choice of indirect branch predictor entry, and although |
| 16 | +branch predictor entries are isolated between modes when eIBRS is enabled, |
| 17 | +the BHB itself is not isolated between modes. |
| 18 | + |
| 19 | +Alder Lake and new processors supports a hardware control BHI_DIS_S to |
| 20 | +mitigate BHI. For older processors Intel has released a software sequence |
| 21 | +to clear the branch history on parts that don't support BHI_DIS_S. Add |
| 22 | +support to execute the software sequence at syscall entry and VMexit to |
| 23 | +overwrite the branch history. |
| 24 | + |
| 25 | +For now, branch history is not cleared at interrupt entry, as malicious |
| 26 | +applications are not believed to have sufficient control over the |
| 27 | +registers, since previous register state is cleared at interrupt |
| 28 | +entry. Researchers continue to poke at this area and it may become |
| 29 | +necessary to clear at interrupt entry as well in the future. |
| 30 | + |
| 31 | +This mitigation is only defined here. It is enabled later. |
| 32 | + |
| 33 | + Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> |
| 34 | +Co-developed-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> |
| 35 | + Signed-off-by: Daniel Sneddon <daniel.sneddon@linux.intel.com> |
| 36 | + Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| 37 | + Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com> |
| 38 | + Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> |
| 39 | + |
| 40 | +(cherry picked from commit 7390db8aea0d64e9deb28b8e1ce716f5020c7ee5) |
| 41 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 42 | + |
| 43 | +# Conflicts: |
| 44 | +# arch/x86/include/asm/syscall.h |
| 45 | +diff --cc arch/x86/include/asm/syscall.h |
| 46 | +index c7e25c940f1a,2fc7bc3863ff..000000000000 |
| 47 | +--- a/arch/x86/include/asm/syscall.h |
| 48 | ++++ b/arch/x86/include/asm/syscall.h |
| 49 | +@@@ -126,7 -124,8 +126,12 @@@ static inline int syscall_get_arch(stru |
| 50 | + ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; |
| 51 | + } |
| 52 | + |
| 53 | +++<<<<<<< HEAD |
| 54 | + +void do_syscall_64(struct pt_regs *regs, int nr); |
| 55 | +++======= |
| 56 | ++ bool do_syscall_64(struct pt_regs *regs, int nr); |
| 57 | ++ void do_int80_emulation(struct pt_regs *regs); |
| 58 | +++>>>>>>> 7390db8aea0d (x86/bhi: Add support for clearing branch history at syscall entry) |
| 59 | + |
| 60 | + #endif /* CONFIG_X86_32 */ |
| 61 | + |
| 62 | +diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c |
| 63 | +index 3ec0eebc70f0..a542f2e0cf61 100644 |
| 64 | +--- a/arch/x86/entry/common.c |
| 65 | ++++ b/arch/x86/entry/common.c |
| 66 | +@@ -148,7 +148,7 @@ static __always_inline bool int80_is_external(void) |
| 67 | + } |
| 68 | + |
| 69 | + /** |
| 70 | +- * int80_emulation - 32-bit legacy syscall entry |
| 71 | ++ * do_int80_emulation - 32-bit legacy syscall C entry from asm |
| 72 | + * |
| 73 | + * This entry point can be used by 32-bit and 64-bit programs to perform |
| 74 | + * 32-bit system calls. Instances of INT $0x80 can be found inline in |
| 75 | +@@ -166,7 +166,7 @@ static __always_inline bool int80_is_external(void) |
| 76 | + * eax: system call number |
| 77 | + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 |
| 78 | + */ |
| 79 | +-DEFINE_IDTENTRY_RAW(int80_emulation) |
| 80 | ++__visible noinstr void do_int80_emulation(struct pt_regs *regs) |
| 81 | + { |
| 82 | + int nr; |
| 83 | + |
| 84 | +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
| 85 | +index 38642ee01012..651b4f5b6d33 100644 |
| 86 | +--- a/arch/x86/entry/entry_64.S |
| 87 | ++++ b/arch/x86/entry/entry_64.S |
| 88 | +@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) |
| 89 | + /* clobbers %rax, make sure it is after saving the syscall nr */ |
| 90 | + IBRS_ENTER |
| 91 | + UNTRAIN_RET |
| 92 | ++ CLEAR_BRANCH_HISTORY |
| 93 | + |
| 94 | + call do_syscall_64 /* returns with IRQs disabled */ |
| 95 | + |
| 96 | +@@ -1551,3 +1552,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) |
| 97 | + call make_task_dead |
| 98 | + SYM_CODE_END(rewind_stack_and_make_dead) |
| 99 | + .popsection |
| 100 | ++ |
| 101 | ++/* |
| 102 | ++ * This sequence executes branches in order to remove user branch information |
| 103 | ++ * from the branch history tracker in the Branch Predictor, therefore removing |
| 104 | ++ * user influence on subsequent BTB lookups. |
| 105 | ++ * |
| 106 | ++ * It should be used on parts prior to Alder Lake. Newer parts should use the |
| 107 | ++ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being |
| 108 | ++ * virtualized on newer hardware the VMM should protect against BHI attacks by |
| 109 | ++ * setting BHI_DIS_S for the guests. |
| 110 | ++ * |
| 111 | ++ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging |
| 112 | ++ * and not clearing the branch history. The call tree looks like: |
| 113 | ++ * |
| 114 | ++ * call 1 |
| 115 | ++ * call 2 |
| 116 | ++ * call 2 |
| 117 | ++ * call 2 |
| 118 | ++ * call 2 |
| 119 | ++ * call 2 |
| 120 | ++ * ret |
| 121 | ++ * ret |
| 122 | ++ * ret |
| 123 | ++ * ret |
| 124 | ++ * ret |
| 125 | ++ * ret |
| 126 | ++ * |
| 127 | ++ * This means that the stack is non-constant and ORC can't unwind it with %rsp |
| 128 | ++ * alone. Therefore we unconditionally set up the frame pointer, which allows |
| 129 | ++ * ORC to unwind properly. |
| 130 | ++ * |
| 131 | ++ * The alignment is for performance and not for safety, and may be safely |
| 132 | ++ * refactored in the future if needed. |
| 133 | ++ */ |
| 134 | ++SYM_FUNC_START(clear_bhb_loop) |
| 135 | ++ push %rbp |
| 136 | ++ mov %rsp, %rbp |
| 137 | ++ movl $5, %ecx |
| 138 | ++ ANNOTATE_INTRA_FUNCTION_CALL |
| 139 | ++ call 1f |
| 140 | ++ jmp 5f |
| 141 | ++ .align 64, 0xcc |
| 142 | ++ ANNOTATE_INTRA_FUNCTION_CALL |
| 143 | ++1: call 2f |
| 144 | ++ RET |
| 145 | ++ .align 64, 0xcc |
| 146 | ++2: movl $5, %eax |
| 147 | ++3: jmp 4f |
| 148 | ++ nop |
| 149 | ++4: sub $1, %eax |
| 150 | ++ jnz 3b |
| 151 | ++ sub $1, %ecx |
| 152 | ++ jnz 1b |
| 153 | ++ RET |
| 154 | ++5: lfence |
| 155 | ++ pop %rbp |
| 156 | ++ RET |
| 157 | ++SYM_FUNC_END(clear_bhb_loop) |
| 158 | ++EXPORT_SYMBOL_GPL(clear_bhb_loop) |
| 159 | ++STACK_FRAME_NON_STANDARD(clear_bhb_loop) |
| 160 | +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S |
| 161 | +index 8cf35d45d588..9fb12c9a50aa 100644 |
| 162 | +--- a/arch/x86/entry/entry_64_compat.S |
| 163 | ++++ b/arch/x86/entry/entry_64_compat.S |
| 164 | +@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) |
| 165 | + |
| 166 | + IBRS_ENTER |
| 167 | + UNTRAIN_RET |
| 168 | ++ CLEAR_BRANCH_HISTORY |
| 169 | + |
| 170 | + /* |
| 171 | + * SYSENTER doesn't filter flags, so we need to clear NT and AC |
| 172 | +@@ -209,6 +210,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) |
| 173 | + |
| 174 | + IBRS_ENTER |
| 175 | + UNTRAIN_RET |
| 176 | ++ CLEAR_BRANCH_HISTORY |
| 177 | + |
| 178 | + movq %rsp, %rdi |
| 179 | + call do_fast_syscall_32 |
| 180 | +@@ -274,3 +276,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) |
| 181 | + sysretl |
| 182 | + SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) |
| 183 | + SYM_CODE_END(entry_SYSCALL_compat) |
| 184 | ++ |
| 185 | ++/* |
| 186 | ++ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries |
| 187 | ++ * point to C routines, however since this is a system call interface the branch |
| 188 | ++ * history needs to be scrubbed to protect against BHI attacks, and that |
| 189 | ++ * scrubbing needs to take place in assembly code prior to entering any C |
| 190 | ++ * routines. |
| 191 | ++ */ |
| 192 | ++SYM_CODE_START(int80_emulation) |
| 193 | ++ ANNOTATE_NOENDBR |
| 194 | ++ UNWIND_HINT_FUNC |
| 195 | ++ CLEAR_BRANCH_HISTORY |
| 196 | ++ jmp do_int80_emulation |
| 197 | ++SYM_CODE_END(int80_emulation) |
| 198 | +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
| 199 | +index 86f4c8a3ef63..2ad77049c123 100644 |
| 200 | +--- a/arch/x86/include/asm/cpufeatures.h |
| 201 | ++++ b/arch/x86/include/asm/cpufeatures.h |
| 202 | +@@ -457,11 +457,12 @@ |
| 203 | + |
| 204 | + /* |
| 205 | + * Extended auxiliary flags: Linux defined - for features scattered in various |
| 206 | +- * CPUID levels like 0x80000022, etc. |
| 207 | ++ * CPUID levels like 0x80000022, etc and Linux defined features. |
| 208 | + * |
| 209 | + * Reuse free bits when adding new feature flags! |
| 210 | + */ |
| 211 | + #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ |
| 212 | ++#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ |
| 213 | + |
| 214 | + /* |
| 215 | + * BUG word(s) |
| 216 | +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
| 217 | +index ae26633e4bec..3f9cdd078863 100644 |
| 218 | +--- a/arch/x86/include/asm/nospec-branch.h |
| 219 | ++++ b/arch/x86/include/asm/nospec-branch.h |
| 220 | +@@ -310,6 +310,14 @@ |
| 221 | + ALTERNATIVE __stringify(verw _ASM_RIP(mds_verw_sel)), "", ALT_NOT(X86_FEATURE_CLEAR_CPU_BUF) |
| 222 | + .endm |
| 223 | + |
| 224 | ++#ifdef CONFIG_X86_64 |
| 225 | ++.macro CLEAR_BRANCH_HISTORY |
| 226 | ++ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP |
| 227 | ++.endm |
| 228 | ++#else |
| 229 | ++#define CLEAR_BRANCH_HISTORY |
| 230 | ++#endif |
| 231 | ++ |
| 232 | + #else /* __ASSEMBLY__ */ |
| 233 | + |
| 234 | + #define ANNOTATE_RETPOLINE_SAFE \ |
| 235 | +@@ -350,6 +358,10 @@ extern void srso_alias_return_thunk(void); |
| 236 | + extern void entry_untrain_ret(void); |
| 237 | + extern void entry_ibpb(void); |
| 238 | + |
| 239 | ++#ifdef CONFIG_X86_64 |
| 240 | ++extern void clear_bhb_loop(void); |
| 241 | ++#endif |
| 242 | ++ |
| 243 | + extern void (*x86_return_thunk)(void); |
| 244 | + |
| 245 | + #ifdef CONFIG_CALL_DEPTH_TRACKING |
| 246 | +* Unmerged path arch/x86/include/asm/syscall.h |
| 247 | +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S |
| 248 | +index 139960deb736..040bd8e5670b 100644 |
| 249 | +--- a/arch/x86/kvm/vmx/vmenter.S |
| 250 | ++++ b/arch/x86/kvm/vmx/vmenter.S |
| 251 | +@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) |
| 252 | + |
| 253 | + call vmx_spec_ctrl_restore_host |
| 254 | + |
| 255 | ++ CLEAR_BRANCH_HISTORY |
| 256 | ++ |
| 257 | + /* Put return value in AX */ |
| 258 | + mov %_ASM_BX, %_ASM_AX |
| 259 | + |
0 commit comments