diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index d9415c851cc82..a8fc8134da9e6 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -499,6 +499,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/tsx_async_abort + /sys/devices/system/cpu/vulnerabilities/vmscape Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 2189a6a3b22b8..f73a10373e6ff 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -19,3 +19,4 @@ are configurable at compile, boot or run time. srso gather_data_sampling reg-file-data-sampling + vmscape diff --git a/Documentation/admin-guide/hw-vuln/vmscape.rst b/Documentation/admin-guide/hw-vuln/vmscape.rst new file mode 100644 index 0000000000000..d9b9a2b6c114c --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/vmscape.rst @@ -0,0 +1,110 @@ +.. SPDX-License-Identifier: GPL-2.0 + +VMSCAPE +======= + +VMSCAPE is a vulnerability that may allow a guest to influence the branch +prediction in host userspace. It particularly affects hypervisors like QEMU. + +Even if a hypervisor may not have any sensitive data like disk encryption keys, +guest-userspace may be able to attack the guest-kernel using the hypervisor as +a confused deputy. + +Affected processors +------------------- + +The following CPU families are affected by VMSCAPE: + +**Intel processors:** + - Skylake generation (Parts without Enhanced-IBRS) + - Cascade Lake generation - (Parts affected by ITS guest/host separation) + - Alder Lake and newer (Parts affected by BHI) + +Note that, BHI affected parts that use BHB clearing software mitigation e.g. +Icelake are not vulnerable to VMSCAPE. + +**AMD processors:** + - Zen series (families 0x17, 0x19, 0x1a) + +** Hygon processors:** + - Family 0x18 + +Mitigation +---------- + +Conditional IBPB +---------------- + +Kernel tracks when a CPU has run a potentially malicious guest and issues an +IBPB before the first exit to userspace after VM-exit. If userspace did not run +between VM-exit and the next VM-entry, no IBPB is issued. + +Note that the existing userspace mitigation against Spectre-v2 is effective in +protecting the userspace. They are insufficient to protect the userspace VMMs +from a malicious guest. This is because Spectre-v2 mitigations are applied at +context switch time, while the userspace VMM can run after a VM-exit without a +context switch. + +Vulnerability enumeration and mitigation is not applied inside a guest. This is +because nested hypervisors should already be deploying IBPB to isolate +themselves from nested guests. + +SMT considerations +------------------ + +When Simultaneous Multi-Threading (SMT) is enabled, hypervisors can be +vulnerable to cross-thread attacks. For complete protection against VMSCAPE +attacks in SMT environments, STIBP should be enabled. + +The kernel will issue a warning if SMT is enabled without adequate STIBP +protection. Warning is not issued when: + +- SMT is disabled +- STIBP is enabled system-wide +- Intel eIBRS is enabled (which implies STIBP protection) + +System information and options +------------------------------ + +The sysfs file showing VMSCAPE mitigation status is: + + /sys/devices/system/cpu/vulnerabilities/vmscape + +The possible values in this file are: + + * 'Not affected': + + The processor is not vulnerable to VMSCAPE attacks. + + * 'Vulnerable': + + The processor is vulnerable and no mitigation has been applied. + + * 'Mitigation: IBPB before exit to userspace': + + Conditional IBPB mitigation is enabled. The kernel tracks when a CPU has + run a potentially malicious guest and issues an IBPB before the first + exit to userspace after VM-exit. + + * 'Mitigation: IBPB on VMEXIT': + + IBPB is issued on every VM-exit. This occurs when other mitigations like + RETBLEED or SRSO are already issuing IBPB on VM-exit. + +Mitigation control on the kernel command line +---------------------------------------------- + +The mitigation can be controlled via the ``vmscape=`` command line parameter: + + * ``vmscape=off``: + + Disable the VMSCAPE mitigation. + + * ``vmscape=ibpb``: + + Enable conditional IBPB mitigation (default when CONFIG_MITIGATION_VMSCAPE=y). + + * ``vmscape=force``: + + Force vulnerability detection and mitigation even on processors that are + not known to be affected. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cf789f4181a4d..9bf32bfc1905c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2874,6 +2874,7 @@ srbds=off [X86,INTEL] ssbd=force-off [ARM64] tsx_async_abort=off [X86] + vmscape=off [X86] Exceptions: This does not have any effect on @@ -6019,6 +6020,16 @@ vmpoff= [KNL,S390] Perform z/VM CP command after power off. Format: + vmscape= [X86] Controls mitigation for VMscape attacks. + VMscape attacks can leak information from a userspace + hypervisor to a guest via speculative side-channels. + + off - disable the mitigation + ibpb - use Indirect Branch Prediction Barrier + (IBPB) mitigation (default) + force - force vulnerability detection even on + unaffected processors + vsyscall= [X86-64] Controls the behavior of vsyscalls (i.e. calls to fixed addresses of 0xffffffffff600x00 from legacy diff --git a/Makefile.rhelver b/Makefile.rhelver index 707406aa9cbf9..7d5509eb3662f 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 10 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 553.82.1 +RHEL_RELEASE = 553.83.1 # # ZSTREAM diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index c3402f7971c73..2369d0275ef3e 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -225,7 +225,7 @@ static inline int __pcilg_mio_inuser( [ioaddr_len] "+&d" (ioaddr_len.pair), [cc] "+d" (cc), [val] "=d" (val), [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), - [shift] "+d" (shift) + [shift] "+a" (shift) :: "cc", "memory"); /* did we write everything to the user space buffer? */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 92821623048aa..af1b960273941 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2626,6 +2626,14 @@ config MITIGATION_SPECTRE_BHI indirect branches. See +config MITIGATION_VMSCAPE + bool "Mitigate VMSCAPE" + depends on KVM + default y + help + Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security + vulnerability on Intel and AMD CPUs that may allow a guest to do + Spectre v2 style attacks on userspace hypervisor. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ec087d4e663ae..6630f12eaeb00 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -222,6 +222,13 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); #endif + /* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */ + if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) && + this_cpu_read(x86_ibpb_exit_to_user)) { + indirect_branch_prediction_barrier(); + this_cpu_write(x86_ibpb_exit_to_user, false); + } + user_enter_irqoff(); amd_clear_divider(); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b1dbe87085a69..874eeaa780e0c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -445,6 +445,7 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* "" Use IBPB on exit-to-userspace, see VMSCAPE bug */ /* RHEL specific auxillary flags, word 22 */ #define X86_FEATURE_IBRS_EXIT_SET (22*32+ 0) /* "" Set IBRS on kernel exit */ @@ -497,4 +498,5 @@ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +#define X86_BUG_VMSCAPE X86_BUG(1*32 +10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5a91989c9d98d..ef3a53b35903e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -371,6 +371,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) extern u64 x86_pred_cmd; +DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); + static inline void indirect_branch_prediction_barrier(void) { alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a556e8ade6748..f2dd27d070dc5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -51,6 +51,9 @@ static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init srso_select_mitigation(void); static void __init gds_select_mitigation(void); +static void __init vmscape_select_mitigation(void); +static void __init vmscape_update_mitigation(void); +static void __init vmscape_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -60,6 +63,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); +/* + * Set when the CPU has run a potentially malicious guest. An IBPB will + * be needed to before running userspace. That IBPB will flush the branch + * predictor content. + */ +DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user); +EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user); + u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); @@ -185,6 +196,10 @@ void __init check_bugs(void) srso_select_mitigation(); gds_select_mitigation(); + vmscape_select_mitigation(); + vmscape_update_mitigation(); + vmscape_apply_mitigation(); + arch_smt_update(); #ifdef CONFIG_X86_32 @@ -1939,66 +1954,6 @@ static void update_mds_branch_idle(void) } } -#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" -#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" -#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" - -void cpu_bugs_smt_update(void) -{ - mutex_lock(&spec_ctrl_mutex); - - if (sched_smt_active() && unprivileged_ebpf_enabled() && - spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) - pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); - - switch (spectre_v2_user_stibp) { - case SPECTRE_V2_USER_NONE: - break; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: - update_stibp_strict(); - break; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: - update_indir_branch_cond(); - break; - } - - switch (mds_mitigation) { - case MDS_MITIGATION_FULL: - case MDS_MITIGATION_VMWERV: - if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) - pr_warn_once(MDS_MSG_SMT); - update_mds_branch_idle(); - break; - case MDS_MITIGATION_OFF: - break; - } - - switch (taa_mitigation) { - case TAA_MITIGATION_VERW: - case TAA_MITIGATION_UCODE_NEEDED: - if (sched_smt_active()) - pr_warn_once(TAA_MSG_SMT); - break; - case TAA_MITIGATION_TSX_DISABLED: - case TAA_MITIGATION_OFF: - break; - } - - switch (mmio_mitigation) { - case MMIO_MITIGATION_VERW: - case MMIO_MITIGATION_UCODE_NEEDED: - if (sched_smt_active()) - pr_warn_once(MMIO_MSG_SMT); - break; - case MMIO_MITIGATION_OFF: - break; - } - - mutex_unlock(&spec_ctrl_mutex); -} - #ifdef CONFIG_DEBUG_FS /* * Provide a debugfs file to dump SPEC_CTRL MSRs of all the CPUs @@ -2704,9 +2659,163 @@ static void __init srso_select_mitigation(void) pr_info("%s\n", srso_strings[srso_mitigation]); } +#undef pr_fmt +#define pr_fmt(fmt) "VMSCAPE: " fmt + +enum vmscape_mitigations { + VMSCAPE_MITIGATION_NONE, + VMSCAPE_MITIGATION_AUTO, + VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER, + VMSCAPE_MITIGATION_IBPB_ON_VMEXIT, +}; + +static const char * const vmscape_strings[] = { + [VMSCAPE_MITIGATION_NONE] = "Vulnerable", + /* [VMSCAPE_MITIGATION_AUTO] */ + [VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER] = "Mitigation: IBPB before exit to userspace", + [VMSCAPE_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT", +}; + +static enum vmscape_mitigations vmscape_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE; + +static int __init vmscape_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + vmscape_mitigation = VMSCAPE_MITIGATION_NONE; + } else if (!strcmp(str, "ibpb")) { + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; + } else if (!strcmp(str, "force")) { + setup_force_cpu_bug(X86_BUG_VMSCAPE); + vmscape_mitigation = VMSCAPE_MITIGATION_AUTO; + } else { + pr_err("Ignoring unknown vmscape=%s option.\n", str); + } + + return 0; +} +early_param("vmscape", vmscape_parse_cmdline); + +static void __init vmscape_select_mitigation(void) +{ + if (cpu_mitigations_off() || + !boot_cpu_has_bug(X86_BUG_VMSCAPE) || + !boot_cpu_has(X86_FEATURE_IBPB)) { + vmscape_mitigation = VMSCAPE_MITIGATION_NONE; + return; + } + + if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; +} + +static void __init vmscape_update_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_VMSCAPE)) + return; + + if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB || + srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT) + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT; + + pr_info("%s\n", vmscape_strings[vmscape_mitigation]); +} + +static void __init vmscape_apply_mitigation(void) +{ + if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER) + setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER); +} + #undef pr_fmt #define pr_fmt(fmt) fmt +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" +#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" +#define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n" + +void cpu_bugs_smt_update(void) +{ + mutex_lock(&spec_ctrl_mutex); + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + + switch (spectre_v2_user_stibp) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + case SPECTRE_V2_USER_STRICT_PREFERRED: + update_stibp_strict(); + break; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + update_indir_branch_cond(); + break; + } + + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); + update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } + + switch (taa_mitigation) { + case TAA_MITIGATION_VERW: + case TAA_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(TAA_MSG_SMT); + break; + case TAA_MITIGATION_TSX_DISABLED: + case TAA_MITIGATION_OFF: + break; + } + + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + + switch (vmscape_mitigation) { + case VMSCAPE_MITIGATION_NONE: + case VMSCAPE_MITIGATION_AUTO: + break; + case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT: + case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER: + /* + * Hypervisors can be attacked across-threads, warn for SMT when + * STIBP is not already enabled system-wide. + * + * Intel eIBRS (!AUTOIBRS) implies STIBP on. + */ + if (!sched_smt_active() || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS))) + break; + pr_warn_once(VMSCAPE_MSG_SMT); + break; + } + + mutex_unlock(&spec_ctrl_mutex); +} + #ifdef CONFIG_SYSFS #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" @@ -2940,6 +3049,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t vmscape_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2998,6 +3112,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_RFDS: return rfds_show_state(buf); + case X86_BUG_VMSCAPE: + return vmscape_show_state(buf); + default: break; } @@ -3077,4 +3194,9 @@ ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attrib { return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); } + +ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dbb5dee28ca27..4130d43c6435a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1164,39 +1164,48 @@ static const __initconst struct x86_cpu_id_v2 cpu_vuln_whitelist[] = { #define GDS BIT(6) /* CPU is affected by Register File Data Sampling */ #define RFDS BIT(7) +/* CPU is affected by VMSCAPE */ +#define VMSCAPE BIT(11) static const struct x86_cpu_id_v2 cpu_vuln_blacklist[] __initconst = { - VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(SANDYBRIDGE_X, X86_STEPPING_ANY, VMSCAPE), + VULNBL_INTEL_STEPPINGS(SANDYBRIDGE, X86_STEPPING_ANY, VMSCAPE), + VULNBL_INTEL_STEPPINGS(IVYBRIDGE_X, X86_STEPPING_ANY, VMSCAPE), + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO | VMSCAPE), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO | VMSCAPE), + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO | VMSCAPE), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED | VMSCAPE), VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED | VMSCAPE), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | VMSCAPE), VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS | VMSCAPE), + VULNBL_INTEL_STEPPINGS(METEORLAKE_L, X86_STEPPING_ANY, VMSCAPE), + VULNBL_INTEL_STEPPINGS(SAPPHIRERAPIDS_X,X86_STEPPING_ANY, VMSCAPE), + VULNBL_INTEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPING_ANY, VMSCAPE), + VULNBL_INTEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPING_ANY, VMSCAPE), + VULNBL_INTEL_STEPPINGS(ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS | VMSCAPE), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), @@ -1206,9 +1215,9 @@ static const struct x86_cpu_id_v2 cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED | SRSO), - VULNBL_HYGON(0x18, RETBLEED), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x17, RETBLEED | SRSO | VMSCAPE), + VULNBL_HYGON(0x18, RETBLEED | VMSCAPE), + VULNBL_AMD(0x19, SRSO | VMSCAPE), {} }; @@ -1371,6 +1380,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) boot_cpu_has(X86_FEATURE_HYPERVISOR))) setup_force_cpu_bug(X86_BUG_BHI); + /* + * Set the bug only on bare-metal. A nested hypervisor should already be + * deploying IBPB to isolate itself from nested guests. + */ + if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) && + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) + setup_force_cpu_bug(X86_BUG_VMSCAPE); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70a0121884333..f195373052256 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10037,6 +10037,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_fpu.xfd_err) wrmsrl(MSR_IA32_XFD_ERR, 0); + /* + * Mark this CPU as needing a branch predictor flush before running + * userspace. Must be done before enabling preemption to ensure it gets + * set for the CPU that actually ran the guest, and not the CPU that it + * may migrate to. + */ + if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER)) + this_cpu_write(x86_ibpb_exit_to_user, true); + /* * Consume any pending interrupts, including the possible source of * VM-Exit on SVM and any ticks that occur between VM-Exit and now. diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/2e488f13.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/2e488f13.failed new file mode 100644 index 0000000000000..cb395783eac0b --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/2e488f13.failed @@ -0,0 +1,54 @@ +fs: fix UAF/GPF bug in nilfs_mdt_destroy + +jira LE-4704 +cve CVE-2022-50367 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Dongliang Mu +commit 2e488f13755ffbb60f307e991b27024716a33b29 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/2e488f13.failed + +In alloc_inode, inode_init_always() could return -ENOMEM if +security_inode_alloc() fails, which causes inode->i_private +uninitialized. Then nilfs_is_metadata_file_inode() returns +true and nilfs_free_inode() wrongly calls nilfs_mdt_destroy(), +which frees the uninitialized inode->i_private +and leads to crashes(e.g., UAF/GPF). + +Fix this by moving security_inode_alloc just prior to +this_cpu_inc(nr_inodes) + +Link: https://lkml.kernel.org/r/CAFcO6XOcf1Jj2SeGt=jJV59wmhESeSKpfR0omdFRq+J9nD1vfQ@mail.gmail.com + Reported-by: butt3rflyh4ck + Reported-by: Hao Sun + Reported-by: Jiacheng Xu + Reviewed-by: Christian Brauner (Microsoft) + Signed-off-by: Dongliang Mu + Cc: Al Viro + Cc: stable@vger.kernel.org + Signed-off-by: Al Viro +(cherry picked from commit 2e488f13755ffbb60f307e991b27024716a33b29) + Signed-off-by: Jonathan Maple + +# Conflicts: +# fs/inode.c +diff --cc fs/inode.c +index 024853e8ceb1,5559a2983341..000000000000 +--- a/fs/inode.c ++++ b/fs/inode.c +@@@ -166,10 -192,6 +166,13 @@@ int inode_init_always(struct super_bloc + inode->i_wb_frn_history = 0; + #endif + +++<<<<<<< HEAD + + inode->rh_reserved2 = 0; + + + + if (security_inode_alloc(inode)) + + goto out; +++======= +++>>>>>>> 2e488f13755f (fs: fix UAF/GPF bug in nilfs_mdt_destroy) + spin_lock_init(&inode->i_lock); + lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); + +* Unmerged path fs/inode.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/2f8f1734.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/2f8f1734.failed new file mode 100644 index 0000000000000..6ed9e614f6c21 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/2f8f1734.failed @@ -0,0 +1,142 @@ +x86/vmscape: Add conditional IBPB mitigation + +jira LE-4704 +cve CVE-2025-40300 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Pawan Gupta +commit 2f8f173413f1cbf52660d04df92d0069c4306d25 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/2f8f1734.failed + +VMSCAPE is a vulnerability that exploits insufficient branch predictor +isolation between a guest and a userspace hypervisor (like QEMU). Existing +mitigations already protect kernel/KVM from a malicious guest. Userspace +can additionally be protected by flushing the branch predictors after a +VMexit. + +Since it is the userspace that consumes the poisoned branch predictors, +conditionally issue an IBPB after a VMexit and before returning to +userspace. Workloads that frequently switch between hypervisor and +userspace will incur the most overhead from the new IBPB. + +This new IBPB is not integrated with the existing IBPB sites. For +instance, a task can use the existing speculation control prctl() to +get an IBPB at context switch time. With this implementation, the +IBPB is doubled up: one at context switch and another before running +userspace. + +The intent is to integrate and optimize these cases post-embargo. + +[ dhansen: elaborate on suboptimal IBPB solution ] + + Suggested-by: Dave Hansen + Signed-off-by: Pawan Gupta + Signed-off-by: Dave Hansen + Reviewed-by: Dave Hansen + Reviewed-by: Borislav Petkov (AMD) + Acked-by: Sean Christopherson +(cherry picked from commit 2f8f173413f1cbf52660d04df92d0069c4306d25) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/x86/include/asm/cpufeatures.h +# arch/x86/include/asm/entry-common.h +# arch/x86/include/asm/nospec-branch.h +diff --cc arch/x86/include/asm/cpufeatures.h +index b1dbe87085a6,c8e177016cc4..000000000000 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@@ -440,15 -480,21 +440,33 @@@ + * + * Reuse free bits when adding new feature flags! + */ +++<<<<<<< HEAD + +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ + +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ + +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ + +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ + +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ + + + +/* RHEL specific auxillary flags, word 22 */ + +#define X86_FEATURE_IBRS_EXIT_SET (22*32+ 0) /* "" Set IBRS on kernel exit */ + +#define X86_FEATURE_IBRS_EXIT_SKIP (22*32+ 1) /* "" Skip SPEC_CTRL MSR write on exit */ +++======= ++ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ ++ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ ++ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ ++ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ ++ #define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ ++ #define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ ++ #define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ ++ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ ++ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ ++ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ ++ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ ++ #define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ ++ #define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ ++ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ ++ #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ +++>>>>>>> 2f8f173413f1 (x86/vmscape: Add conditional IBPB mitigation) + + /* + * BUG word(s) +diff --cc arch/x86/include/asm/nospec-branch.h +index 5a91989c9d98,e29f82466f43..000000000000 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@@ -369,7 -530,7 +369,11 @@@ void alternative_msr_write(unsigned in + : "memory"); + } + +++<<<<<<< HEAD + +extern u64 x86_pred_cmd; +++======= ++ DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); +++>>>>>>> 2f8f173413f1 (x86/vmscape: Add conditional IBPB mitigation) + + static inline void indirect_branch_prediction_barrier(void) + { +* Unmerged path arch/x86/include/asm/entry-common.h +* Unmerged path arch/x86/include/asm/cpufeatures.h +* Unmerged path arch/x86/include/asm/entry-common.h +* Unmerged path arch/x86/include/asm/nospec-branch.h +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index a556e8ade674..f262135cfb48 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -60,6 +60,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + DEFINE_PER_CPU(u64, x86_spec_ctrl_current); + EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); + ++/* ++ * Set when the CPU has run a potentially malicious guest. An IBPB will ++ * be needed to before running userspace. That IBPB will flush the branch ++ * predictor content. ++ */ ++DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user); ++EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user); ++ + u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; + EXPORT_SYMBOL_GPL(x86_pred_cmd); + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 70a012188433..f19537305225 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -10037,6 +10037,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) + if (vcpu->arch.guest_fpu.xfd_err) + wrmsrl(MSR_IA32_XFD_ERR, 0); + ++ /* ++ * Mark this CPU as needing a branch predictor flush before running ++ * userspace. Must be done before enabling preemption to ensure it gets ++ * set for the CPU that actually ran the guest, and not the CPU that it ++ * may migrate to. ++ */ ++ if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER)) ++ this_cpu_write(x86_ibpb_exit_to_user, true); ++ + /* + * Consume any pending interrupts, including the possible source of + * VM-Exit on SVM and any ticks that occur between VM-Exit and now. diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/503f1c72.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/503f1c72.failed new file mode 100644 index 0000000000000..e444a42d1f5a4 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/503f1c72.failed @@ -0,0 +1,180 @@ +i40e: fix Jumbo Frame support after iPXE boot + +jira LE-4704 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Jacob Keller +commit 503f1c72c31bbee21e669a08cf65c49e96d42755 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/503f1c72.failed + +The i40e hardware has multiple hardware settings which define the Maximum +Frame Size (MFS) of the physical port. The firmware has an AdminQ command +(0x0603) to configure the MFS, but the i40e Linux driver never issues this +command. + +In most cases this is no problem, as the NVM default value has the device +configured for its maximum value of 9728. Unfortunately, recent versions of +the iPXE intelxl driver now issue the 0x0603 Set Mac Config command, +modifying the MFS and reducing it from its default value of 9728. + +This occurred as part of iPXE commit 6871a7de705b ("[intelxl] Use admin +queue to set port MAC address and maximum frame size"), a prerequisite +change for supporting the E800 series hardware in iPXE. Both the E700 and +E800 firmware support the AdminQ command, and the iPXE code shares much of +the logic between the two device drivers. + +The ice E800 Linux driver already issues the 0x0603 Set Mac Config command +early during probe, and is thus unaffected by the iPXE change. + +Since commit 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set"), the +i40e driver does check the I40E_PRTGL_SAH register, but it only logs a +warning message if its value is below the 9728 default. This register also +only covers received packets and not transmitted packets. A warning can +inform system administrators, but does not correct the issue. No +interactions from userspace cause the driver to write to PRTGL_SAH or issue +the 0x0603 AdminQ command. Only a GLOBR reset will restore the value to its +default value. There is no obvious method to trigger a GLOBR reset from +user space. + +To fix this, introduce the i40e_aq_set_mac_config() function, similar to +the one from the ice driver. Call this during early probe to ensure that +the device configuration matches driver expectation. Unlike E800, the E700 +firmware also has a bit to control whether the MAC should append CRC data. +It is on by default, but setting a 0 to this bit would disable CRC. The +i40e implementation must set this bit to ensure CRC will be appended by the +MAC. + +In addition to the AQ command, instead of just checking the I40E_PRTGL_SAH +register, update its value to the 9728 default and write it back. This +ensures that the hardware is in the expected state, regardless of whether +the iPXE (or any other early boot driver) has modified this state. + +This is a better user experience, as we now fix the issues with larger MTU +instead of merely warning. It also aligns with the way the ice E800 series +driver works. + +A final note: The Fixes tag provided here is not strictly accurate. The +issue occurs as a result of an external entity (the iPXE intelxl driver), +and this is not a regression specifically caused by the mentioned change. +However, I believe the original change to just warn about PRTGL_SAH being +too low was an insufficient fix. + +Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") +Link: https://github.com/ipxe/ipxe/commit/6871a7de705b6f6a4046f0d19da9bcd689c3bc8e + Signed-off-by: Jacob Keller + Signed-off-by: Aleksandr Loktionov + Reviewed-by: Michal Schmidt + Tested-by: Rinitha S (A Contingent worker at Intel) + Signed-off-by: Tony Nguyen +(cherry picked from commit 503f1c72c31bbee21e669a08cf65c49e96d42755) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/intel/i40e/i40e_main.c +diff --cc drivers/net/ethernet/intel/i40e/i40e_main.c +index 3c1e2c4f82f1,b14019d44b58..000000000000 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@@ -16191,16 -16043,19 +16191,28 @@@ static int i40e_probe(struct pci_dev *p + err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL); + if (err) + dev_dbg(&pf->pdev->dev, "get supported phy types ret = %pe last_status = %s\n", + - ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); + + ERR_PTR(err), + + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + +- /* make sure the MFS hasn't been set lower than the default */ + #define MAX_FRAME_SIZE_DEFAULT 0x2600 +++<<<<<<< HEAD + + val = (rd32(&pf->hw, I40E_PRTGL_SAH) & + + I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; + + if (val < MAX_FRAME_SIZE_DEFAULT) + + dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", + + pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); +++======= ++ ++ err = i40e_aq_set_mac_config(hw, MAX_FRAME_SIZE_DEFAULT, NULL); ++ if (err) ++ dev_warn(&pdev->dev, "set mac config ret = %pe last_status = %s\n", ++ ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); ++ ++ /* Make sure the MFS is set to the expected value */ ++ val = rd32(hw, I40E_PRTGL_SAH); ++ FIELD_MODIFY(I40E_PRTGL_SAH_MFS_MASK, &val, MAX_FRAME_SIZE_DEFAULT); ++ wr32(hw, I40E_PRTGL_SAH, val); +++>>>>>>> 503f1c72c31b (i40e: fix Jumbo Frame support after iPXE boot) + + /* Add a filter to drop all Flow control frames from any VSI from being + * transmitted. By doing so we stop a malicious VF from sending out +diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +index 3357d65a906b..fa0a2ff0ddc7 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +@@ -1709,6 +1709,7 @@ I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config); + struct i40e_aq_set_mac_config { + __le16 max_frame_size; + u8 params; ++#define I40E_AQ_SET_MAC_CONFIG_CRC_EN BIT(2) + u8 tx_timer_priority; /* bitmap */ + __le16 tx_timer_value; + __le16 fc_refresh_threshold; +diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c +index 30e15a6fc0ce..3be380b0f00e 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_common.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c +@@ -1503,6 +1503,40 @@ int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures, + return status; + } + ++/** ++ * i40e_aq_set_mac_config - Configure MAC settings ++ * @hw: pointer to the hw struct ++ * @max_frame_size: Maximum Frame Size to be supported by the port ++ * @cmd_details: pointer to command details structure or NULL ++ * ++ * Set MAC configuration (0x0603). Note that max_frame_size must be greater ++ * than zero. ++ * ++ * Return: 0 on success, or a negative error code on failure. ++ */ ++int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size, ++ struct i40e_asq_cmd_details *cmd_details) ++{ ++ struct i40e_aq_set_mac_config *cmd; ++ struct libie_aq_desc desc; ++ ++ cmd = libie_aq_raw(&desc); ++ ++ if (max_frame_size == 0) ++ return -EINVAL; ++ ++ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_mac_config); ++ ++ cmd->max_frame_size = cpu_to_le16(max_frame_size); ++ cmd->params = I40E_AQ_SET_MAC_CONFIG_CRC_EN; ++ ++#define I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD 0x7FFF ++ cmd->fc_refresh_threshold = ++ cpu_to_le16(I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD); ++ ++ return i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); ++} ++ + /** + * i40e_aq_clear_pxe_mode + * @hw: pointer to the hw struct +* Unmerged path drivers/net/ethernet/intel/i40e/i40e_main.c +diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h +index fe845987d99a..7da3ec6b100f 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h ++++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h +@@ -109,6 +109,8 @@ int i40e_aq_set_mac_loopback(struct i40e_hw *hw, + struct i40e_asq_cmd_details *cmd_details); + int i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask, + struct i40e_asq_cmd_details *cmd_details); ++int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size, ++ struct i40e_asq_cmd_details *cmd_details); + int i40e_aq_clear_pxe_mode(struct i40e_hw *hw, + struct i40e_asq_cmd_details *cmd_details); + int i40e_aq_set_link_restart_an(struct i40e_hw *hw, diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/556c1ad6.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/556c1ad6.failed new file mode 100644 index 0000000000000..1a50522927d2e --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/556c1ad6.failed @@ -0,0 +1,724 @@ +x86/vmscape: Enable the mitigation + +jira LE-4704 +cve CVE-2025-40300 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Pawan Gupta +commit 556c1ad666ad90c50ec8fccb930dd5046cfbecfb +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/556c1ad6.failed + +Enable the previously added mitigation for VMscape. Add the cmdline +vmscape={off|ibpb|force} and sysfs reporting. + + Signed-off-by: Pawan Gupta + Signed-off-by: Dave Hansen + Reviewed-by: Borislav Petkov (AMD) + Reviewed-by: Dave Hansen +(cherry picked from commit 556c1ad666ad90c50ec8fccb930dd5046cfbecfb) + Signed-off-by: Jonathan Maple + +# Conflicts: +# Documentation/admin-guide/kernel-parameters.txt +# arch/x86/Kconfig +# arch/x86/kernel/cpu/bugs.c +# drivers/base/cpu.c +# include/linux/cpu.h +diff --cc Documentation/admin-guide/kernel-parameters.txt +index cf789f4181a4,5a7a83c411e9..000000000000 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@@ -6019,7 -8042,17 +6020,21 @@@ + vmpoff= [KNL,S390] Perform z/VM CP command after power off. + Format: + +++<<<<<<< HEAD + + vsyscall= [X86-64] +++======= ++ vmscape= [X86] Controls mitigation for VMscape attacks. ++ VMscape attacks can leak information from a userspace ++ hypervisor to a guest via speculative side-channels. ++ ++ off - disable the mitigation ++ ibpb - use Indirect Branch Prediction Barrier ++ (IBPB) mitigation (default) ++ force - force vulnerability detection even on ++ unaffected processors ++ ++ vsyscall= [X86-64,EARLY] +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + Controls the behavior of vsyscalls (i.e. calls to + fixed addresses of 0xffffffffff600x00 from legacy + code). Most statically-linked binaries and older +diff --cc arch/x86/Kconfig +index 92821623048a,52c8910ba2ef..000000000000 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@@ -2626,6 -2579,137 +2626,140 @@@ config MITIGATION_SPECTRE_BH + indirect branches. + See + +++<<<<<<< HEAD +++======= ++ config MITIGATION_MDS ++ bool "Mitigate Microarchitectural Data Sampling (MDS) hardware bug" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Enable mitigation for Microarchitectural Data Sampling (MDS). MDS is ++ a hardware vulnerability which allows unprivileged speculative access ++ to data which is available in various CPU internal buffers. ++ See also ++ ++ config MITIGATION_TAA ++ bool "Mitigate TSX Asynchronous Abort (TAA) hardware bug" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Enable mitigation for TSX Asynchronous Abort (TAA). TAA is a hardware ++ vulnerability that allows unprivileged speculative access to data ++ which is available in various CPU internal buffers by using ++ asynchronous aborts within an Intel TSX transactional region. ++ See also ++ ++ config MITIGATION_MMIO_STALE_DATA ++ bool "Mitigate MMIO Stale Data hardware bug" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Enable mitigation for MMIO Stale Data hardware bugs. Processor MMIO ++ Stale Data Vulnerabilities are a class of memory-mapped I/O (MMIO) ++ vulnerabilities that can expose data. The vulnerabilities require the ++ attacker to have access to MMIO. ++ See also ++ ++ ++ config MITIGATION_L1TF ++ bool "Mitigate L1 Terminal Fault (L1TF) hardware bug" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Mitigate L1 Terminal Fault (L1TF) hardware bug. L1 Terminal Fault is a ++ hardware vulnerability which allows unprivileged speculative access to data ++ available in the Level 1 Data Cache. ++ See ++ ++ config MITIGATION_SPECTRE_V2 ++ bool "Mitigate SPECTRE V2 hardware bug" ++ default y ++ help ++ Enable mitigation for Spectre V2 (Branch Target Injection). Spectre ++ V2 is a class of side channel attacks that takes advantage of ++ indirect branch predictors inside the processor. In Spectre variant 2 ++ attacks, the attacker can steer speculative indirect branches in the ++ victim to gadget code by poisoning the branch target buffer of a CPU ++ used for predicting indirect branch addresses. ++ See also ++ ++ config MITIGATION_SRBDS ++ bool "Mitigate Special Register Buffer Data Sampling (SRBDS) hardware bug" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Enable mitigation for Special Register Buffer Data Sampling (SRBDS). ++ SRBDS is a hardware vulnerability that allows Microarchitectural Data ++ Sampling (MDS) techniques to infer values returned from special ++ register accesses. An unprivileged user can extract values returned ++ from RDRAND and RDSEED executed on another core or sibling thread ++ using MDS techniques. ++ See also ++ ++ ++ config MITIGATION_SSB ++ bool "Mitigate Speculative Store Bypass (SSB) hardware bug" ++ default y ++ help ++ Enable mitigation for Speculative Store Bypass (SSB). SSB is a ++ hardware security vulnerability and its exploitation takes advantage ++ of speculative execution in a similar way to the Meltdown and Spectre ++ security vulnerabilities. ++ ++ config MITIGATION_ITS ++ bool "Enable Indirect Target Selection mitigation" ++ depends on CPU_SUP_INTEL && X86_64 ++ depends on MITIGATION_RETPOLINE && MITIGATION_RETHUNK ++ select EXECMEM ++ default y ++ help ++ Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in ++ BPU on some Intel CPUs that may allow Spectre V2 style attacks. If ++ disabled, mitigation cannot be enabled via cmdline. ++ See ++ ++ config MITIGATION_TSA ++ bool "Mitigate Transient Scheduler Attacks" ++ depends on CPU_SUP_AMD ++ default y ++ help ++ Enable mitigation for Transient Scheduler Attacks. TSA is a hardware ++ security vulnerability on AMD CPUs which can lead to forwarding of ++ invalid info to subsequent instructions and thus can affect their ++ timing and thereby cause a leakage. ++ ++ config MITIGATION_VMSCAPE ++ bool "Mitigate VMSCAPE" ++ depends on KVM ++ default y ++ help ++ Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security ++ vulnerability on Intel and AMD CPUs that may allow a guest to do ++ Spectre v2 style attacks on userspace hypervisor. +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + endif + + config ARCH_HAS_ADD_PAGES +diff --cc arch/x86/kernel/cpu/bugs.c +index a556e8ade674,c81024dfc4c8..000000000000 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@@ -37,20 -33,72 +37,35 @@@ + + #include "cpu.h" + + -/* + - * Speculation Vulnerability Handling + - * + - * Each vulnerability is handled with the following functions: + - * _select_mitigation() -- Selects a mitigation to use. This should + - * take into account all relevant command line + - * options. + - * _update_mitigation() -- This is called after all vulnerabilities have + - * selected a mitigation, in case the selection + - * may want to change based on other choices + - * made. This function is optional. + - * _apply_mitigation() -- Enable the selected mitigation. + - * + - * The compile-time mitigation in all cases should be AUTO. An explicit + - * command-line option can override AUTO. If no such option is + - * provided, _select_mitigation() will override AUTO to the best + - * mitigation option. + - */ + - + static void __init spectre_v1_select_mitigation(void); + -static void __init spectre_v1_apply_mitigation(void); + static void __init spectre_v2_select_mitigation(void); + -static void __init spectre_v2_update_mitigation(void); + -static void __init spectre_v2_apply_mitigation(void); + static void __init retbleed_select_mitigation(void); + -static void __init retbleed_update_mitigation(void); + -static void __init retbleed_apply_mitigation(void); + static void __init spectre_v2_user_select_mitigation(void); + -static void __init spectre_v2_user_update_mitigation(void); + -static void __init spectre_v2_user_apply_mitigation(void); + static void __init ssb_select_mitigation(void); + -static void __init ssb_apply_mitigation(void); + static void __init l1tf_select_mitigation(void); + -static void __init l1tf_apply_mitigation(void); + static void __init mds_select_mitigation(void); + -static void __init mds_update_mitigation(void); + -static void __init mds_apply_mitigation(void); + +static void __init md_clear_update_mitigation(void); + +static void __init md_clear_select_mitigation(void); + static void __init taa_select_mitigation(void); + -static void __init taa_update_mitigation(void); + -static void __init taa_apply_mitigation(void); + static void __init mmio_select_mitigation(void); + -static void __init mmio_update_mitigation(void); + -static void __init mmio_apply_mitigation(void); + -static void __init rfds_select_mitigation(void); + -static void __init rfds_update_mitigation(void); + -static void __init rfds_apply_mitigation(void); + static void __init srbds_select_mitigation(void); + -static void __init srbds_apply_mitigation(void); + -static void __init l1d_flush_select_mitigation(void); + static void __init srso_select_mitigation(void); + -static void __init srso_update_mitigation(void); + -static void __init srso_apply_mitigation(void); + static void __init gds_select_mitigation(void); +++<<<<<<< HEAD +++======= ++ static void __init gds_apply_mitigation(void); ++ static void __init bhi_select_mitigation(void); ++ static void __init bhi_update_mitigation(void); ++ static void __init bhi_apply_mitigation(void); ++ static void __init its_select_mitigation(void); ++ static void __init its_update_mitigation(void); ++ static void __init its_apply_mitigation(void); ++ static void __init tsa_select_mitigation(void); ++ static void __init tsa_apply_mitigation(void); ++ static void __init vmscape_select_mitigation(void); ++ static void __init vmscape_update_mitigation(void); ++ static void __init vmscape_apply_mitigation(void); +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + + /* The base value of the SPEC_CTRL MSR without task-specific bits set */ + u64 x86_spec_ctrl_base; +@@@ -175,48 -262,68 +190,103 @@@ void __init check_bugs(void + spectre_v2_user_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); + - mds_select_mitigation(); + - taa_select_mitigation(); + - mmio_select_mitigation(); + - rfds_select_mitigation(); + + md_clear_select_mitigation(); + srbds_select_mitigation(); + - l1d_flush_select_mitigation(); + + + + /* + + * srso_select_mitigation() depends and must run after + + * retbleed_select_mitigation(). + + */ + srso_select_mitigation(); + gds_select_mitigation(); +++<<<<<<< HEAD + + + + arch_smt_update(); + + + +#ifdef CONFIG_X86_32 + + /* + + * Check whether we are able to run this kernel safely on SMP. + + * + + * - i386 is no longer supported. + + * - In order to run on anything without a TSC, we need to be + + * compiled for a i486. + + */ + + if (boot_cpu_data.x86 < 4) + + panic("Kernel requires i486+ for 'invlpg' and other features"); + + + + init_utsname()->machine[1] = + + '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + + alternative_instructions(); + + + + fpu__init_check_bugs(); + +#else /* CONFIG_X86_64 */ + + alternative_instructions(); +++======= ++ its_select_mitigation(); ++ bhi_select_mitigation(); ++ tsa_select_mitigation(); ++ vmscape_select_mitigation(); +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + + /* + - * After mitigations are selected, some may need to update their + - * choices. + + * Make sure the first 2MB area is not mapped by huge pages + + * There are typically fixed size MTRRs in there and overlapping + + * MTRRs into large pages causes slow downs. + + * + + * Right now we don't do that with gbpages because there seems + + * very little benefit for that case. + */ +++<<<<<<< HEAD + + if (!direct_gbpages) + + set_memory_4k((unsigned long)__va(0), 1); + +#endif +++======= ++ spectre_v2_update_mitigation(); ++ /* ++ * retbleed_update_mitigation() relies on the state set by ++ * spectre_v2_update_mitigation(); specifically it wants to know about ++ * spectre_v2=ibrs. ++ */ ++ retbleed_update_mitigation(); ++ /* ++ * its_update_mitigation() depends on spectre_v2_update_mitigation() ++ * and retbleed_update_mitigation(). ++ */ ++ its_update_mitigation(); ++ ++ /* ++ * spectre_v2_user_update_mitigation() depends on ++ * retbleed_update_mitigation(), specifically the STIBP ++ * selection is forced for UNRET or IBPB. ++ */ ++ spectre_v2_user_update_mitigation(); ++ mds_update_mitigation(); ++ taa_update_mitigation(); ++ mmio_update_mitigation(); ++ rfds_update_mitigation(); ++ bhi_update_mitigation(); ++ /* srso_update_mitigation() depends on retbleed_update_mitigation(). */ ++ srso_update_mitigation(); ++ vmscape_update_mitigation(); ++ ++ spectre_v1_apply_mitigation(); ++ spectre_v2_apply_mitigation(); ++ retbleed_apply_mitigation(); ++ spectre_v2_user_apply_mitigation(); ++ ssb_apply_mitigation(); ++ l1tf_apply_mitigation(); ++ mds_apply_mitigation(); ++ taa_apply_mitigation(); ++ mmio_apply_mitigation(); ++ rfds_apply_mitigation(); ++ srbds_apply_mitigation(); ++ srso_apply_mitigation(); ++ gds_apply_mitigation(); ++ its_apply_mitigation(); ++ bhi_apply_mitigation(); ++ tsa_apply_mitigation(); ++ vmscape_apply_mitigation(); +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + } + + /* +@@@ -2644,66 -3284,121 +2714,137 @@@ static void __init srso_select_mitigati + if (boot_cpu_has(X86_FEATURE_SBPB)) + x86_pred_cmd = PRED_CMD_SBPB; + return; + - } + + - switch (srso_mitigation) { + - case SRSO_MITIGATION_SAFE_RET: + - case SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED: + - /* + - * Enable the return thunk for generated code + - * like ftrace, static_call, etc. + - */ + - setup_force_cpu_cap(X86_FEATURE_RETHUNK); + - setup_force_cpu_cap(X86_FEATURE_UNRET); + + case SRSO_CMD_MICROCODE: + + if (has_microcode) { + + srso_mitigation = SRSO_MITIGATION_MICROCODE; + + pr_warn(SRSO_NOTICE); + + } + + break; + + - if (boot_cpu_data.x86 == 0x19) { + - setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); + - set_return_thunk(srso_alias_return_thunk); + + case SRSO_CMD_SAFE_RET: + + if (IS_ENABLED(CONFIG_CPU_SRSO)) { + + /* + + * Enable the return thunk for generated code + + * like ftrace, static_call, etc. + + */ + + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + + setup_force_cpu_cap(X86_FEATURE_UNRET); + + + + if (boot_cpu_data.x86 == 0x19) { + + setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); + + x86_return_thunk = srso_alias_return_thunk; + + } else { + + setup_force_cpu_cap(X86_FEATURE_SRSO); + + x86_return_thunk = srso_return_thunk; + + } + + if (has_microcode) + + srso_mitigation = SRSO_MITIGATION_SAFE_RET; + + else + + srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED; + } else { + - setup_force_cpu_cap(X86_FEATURE_SRSO); + - set_return_thunk(srso_return_thunk); + + pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + } + break; + - case SRSO_MITIGATION_IBPB: + - setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + - /* + - * IBPB on entry already obviates the need for + - * software-based untraining so clear those in case some + - * other mitigation like Retbleed has selected them. + - */ + - setup_clear_cpu_cap(X86_FEATURE_UNRET); + - setup_clear_cpu_cap(X86_FEATURE_RETHUNK); + - fallthrough; + - case SRSO_MITIGATION_IBPB_ON_VMEXIT: + - setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + - /* + - * There is no need for RSB filling: entry_ibpb() ensures + - * all predictions, including the RSB, are invalidated, + - * regardless of IBPB implementation. + - */ + - setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); + + + + case SRSO_CMD_IBPB: + + if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + + if (has_microcode) { + + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + + srso_mitigation = SRSO_MITIGATION_IBPB; + + } + + } else { + + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + + } + break; + - default: + + + + case SRSO_CMD_IBPB_ON_VMEXIT: + + if (IS_ENABLED(CONFIG_CPU_SRSO)) { + + if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { + + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + + srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; + + } + + } else { + + pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + + } + break; + } + + + +out: + + pr_info("%s\n", srso_strings[srso_mitigation]); + } + ++ #undef pr_fmt ++ #define pr_fmt(fmt) "VMSCAPE: " fmt ++ ++ enum vmscape_mitigations { ++ VMSCAPE_MITIGATION_NONE, ++ VMSCAPE_MITIGATION_AUTO, ++ VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER, ++ VMSCAPE_MITIGATION_IBPB_ON_VMEXIT, ++ }; ++ ++ static const char * const vmscape_strings[] = { ++ [VMSCAPE_MITIGATION_NONE] = "Vulnerable", ++ /* [VMSCAPE_MITIGATION_AUTO] */ ++ [VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER] = "Mitigation: IBPB before exit to userspace", ++ [VMSCAPE_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT", ++ }; ++ ++ static enum vmscape_mitigations vmscape_mitigation __ro_after_init = ++ IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE; ++ ++ static int __init vmscape_parse_cmdline(char *str) ++ { ++ if (!str) ++ return -EINVAL; ++ ++ if (!strcmp(str, "off")) { ++ vmscape_mitigation = VMSCAPE_MITIGATION_NONE; ++ } else if (!strcmp(str, "ibpb")) { ++ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; ++ } else if (!strcmp(str, "force")) { ++ setup_force_cpu_bug(X86_BUG_VMSCAPE); ++ vmscape_mitigation = VMSCAPE_MITIGATION_AUTO; ++ } else { ++ pr_err("Ignoring unknown vmscape=%s option.\n", str); ++ } ++ ++ return 0; ++ } ++ early_param("vmscape", vmscape_parse_cmdline); ++ ++ static void __init vmscape_select_mitigation(void) ++ { ++ if (cpu_mitigations_off() || ++ !boot_cpu_has_bug(X86_BUG_VMSCAPE) || ++ !boot_cpu_has(X86_FEATURE_IBPB)) { ++ vmscape_mitigation = VMSCAPE_MITIGATION_NONE; ++ return; ++ } ++ ++ if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) ++ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; ++ } ++ ++ static void __init vmscape_update_mitigation(void) ++ { ++ if (!boot_cpu_has_bug(X86_BUG_VMSCAPE)) ++ return; ++ ++ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB || ++ srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT) ++ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT; ++ ++ pr_info("%s\n", vmscape_strings[vmscape_mitigation]); ++ } ++ ++ static void __init vmscape_apply_mitigation(void) ++ { ++ if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER) ++ setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER); ++ } ++ + #undef pr_fmt + #define pr_fmt(fmt) fmt + +@@@ -2940,6 -3642,16 +3081,19 @@@ static ssize_t gds_show_state(char *buf + return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); + } + +++<<<<<<< HEAD +++======= ++ static ssize_t tsa_show_state(char *buf) ++ { ++ return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); ++ } ++ ++ static ssize_t vmscape_show_state(char *buf) ++ { ++ return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]); ++ } ++ +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) + { +@@@ -2998,6 -3709,18 +3152,21 @@@ + case X86_BUG_RFDS: + return rfds_show_state(buf); + +++<<<<<<< HEAD +++======= ++ case X86_BUG_OLD_MICROCODE: ++ return old_microcode_show_state(buf); ++ ++ case X86_BUG_ITS: ++ return its_show_state(buf); ++ ++ case X86_BUG_TSA: ++ return tsa_show_state(buf); ++ ++ case X86_BUG_VMSCAPE: ++ return vmscape_show_state(buf); ++ +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + default: + break; + } +@@@ -3077,4 -3797,29 +3246,27 @@@ ssize_t cpu_show_reg_file_data_sampling + { + return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); + } +++<<<<<<< HEAD +++======= ++ ++ ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf) ++ { ++ return cpu_show_common(dev, attr, buf, X86_BUG_OLD_MICROCODE); ++ } ++ ++ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf) ++ { ++ return cpu_show_common(dev, attr, buf, X86_BUG_ITS); ++ } ++ ++ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) ++ { ++ return cpu_show_common(dev, attr, buf, X86_BUG_TSA); ++ } ++ ++ ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf) ++ { ++ return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE); ++ } +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + #endif + - + -void __warn_thunk(void) + -{ + - WARN_ONCE(1, "Unpatched return thunk in use. This should not happen!\n"); + -} +diff --cc drivers/base/cpu.c +index aae1dde82781,008da0354fba..000000000000 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@@ -551,6 -599,11 +551,14 @@@ CPU_SHOW_VULN_FALLBACK(retbleed) + CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); + CPU_SHOW_VULN_FALLBACK(gds); + CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); +++<<<<<<< HEAD +++======= ++ CPU_SHOW_VULN_FALLBACK(ghostwrite); ++ CPU_SHOW_VULN_FALLBACK(old_microcode); ++ CPU_SHOW_VULN_FALLBACK(indirect_target_selection); ++ CPU_SHOW_VULN_FALLBACK(tsa); ++ CPU_SHOW_VULN_FALLBACK(vmscape); +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); +@@@ -566,6 -619,11 +574,14 @@@ static DEVICE_ATTR(retbleed, 0444, cpu_ + static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); + static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); + static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); +++<<<<<<< HEAD +++======= ++ static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); ++ static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); ++ static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); ++ static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); ++ static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL); +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@@ -582,6 -640,11 +598,14 @@@ + &dev_attr_spec_rstack_overflow.attr, + &dev_attr_gather_data_sampling.attr, + &dev_attr_reg_file_data_sampling.attr, +++<<<<<<< HEAD +++======= ++ &dev_attr_ghostwrite.attr, ++ &dev_attr_old_microcode.attr, ++ &dev_attr_indirect_target_selection.attr, ++ &dev_attr_tsa.attr, ++ &dev_attr_vmscape.attr, +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + NULL + }; + +diff --cc include/linux/cpu.h +index 1e897b9cf3a6,487b3bf2e1ea..000000000000 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@@ -76,6 -77,13 +76,16 @@@ extern ssize_t cpu_show_gds(struct devi + struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); +++<<<<<<< HEAD +++======= ++ extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); ++ extern ssize_t cpu_show_old_microcode(struct device *dev, ++ struct device_attribute *attr, char *buf); ++ extern ssize_t cpu_show_indirect_target_selection(struct device *dev, ++ struct device_attribute *attr, char *buf); ++ extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); ++ extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf); +++>>>>>>> 556c1ad666ad (x86/vmscape: Enable the mitigation) + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, +diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu +index d9415c851cc8..a8fc8134da9e 100644 +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -499,6 +499,7 @@ What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/spectre_v2 + /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort ++ /sys/devices/system/cpu/vulnerabilities/vmscape + Date: January 2018 + Contact: Linux kernel mailing list + Description: Information about CPU vulnerabilities +* Unmerged path Documentation/admin-guide/kernel-parameters.txt +* Unmerged path arch/x86/Kconfig +* Unmerged path arch/x86/kernel/cpu/bugs.c +* Unmerged path drivers/base/cpu.c +* Unmerged path include/linux/cpu.h diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/6449f5ba.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/6449f5ba.failed new file mode 100644 index 0000000000000..c01725fb7d51f --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/6449f5ba.failed @@ -0,0 +1,288 @@ +x86/bugs: Move cpu_bugs_smt_update() down + +jira LE-4704 +cve CVE-2025-40300 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Pawan Gupta +commit 6449f5baf9c78a7a442d64f4a61378a21c5db113 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/6449f5ba.failed + +cpu_bugs_smt_update() uses global variables from different mitigations. For +SMT updates it can't currently use vmscape_mitigation that is defined after +it. + +Since cpu_bugs_smt_update() depends on many other mitigations, move it +after all mitigations are defined. With that, it can use vmscape_mitigation +in a moment. + +No functional change. + + Signed-off-by: Pawan Gupta + Signed-off-by: Dave Hansen + Reviewed-by: Dave Hansen +(cherry picked from commit 6449f5baf9c78a7a442d64f4a61378a21c5db113) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/x86/kernel/cpu/bugs.c +diff --cc arch/x86/kernel/cpu/bugs.c +index a556e8ade674,1f8c1c51d057..000000000000 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@@ -1877,206 -2492,65 +1877,209 @@@ static void __init spectre_v2_select_mi + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } + -} + + -static void update_stibp_msr(void * __unused) + -{ + - u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + - update_spec_ctrl(val); + + /* Set up IBPB and STIBP depending on the general spectre V2 command */ + + spectre_v2_cmd = cmd; + +} + + + +static void update_stibp_msr(void * __unused) + +{ + + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + + update_spec_ctrl(val); + +} + + + +/* Update x86_spec_ctrl_base in case SMT state changed. */ + +static void update_stibp_strict(void) + +{ + + u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + + + if (sched_smt_active()) + + mask |= SPEC_CTRL_STIBP; + + + + if (mask == x86_spec_ctrl_base) + + return; + + + + pr_info("Update user space SMT mitigation: STIBP %s\n", + + mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + + x86_spec_ctrl_base = mask; + + on_each_cpu(update_stibp_msr, NULL, 1); + +} + + + +/* Update the static key controlling the evaluation of TIF_SPEC_IB */ + +static void update_indir_branch_cond(void) + +{ + + if (sched_smt_active()) + + static_branch_enable(&switch_to_cond_stibp); + + else + + static_branch_disable(&switch_to_cond_stibp); + +} + + + +#undef pr_fmt + +#define pr_fmt(fmt) fmt + + + +/* Update the static key controlling the MDS CPU buffer clear in idle */ + +static void update_mds_branch_idle(void) + +{ + + /* + + * Enable the idle clearing if SMT is active on CPUs which are + + * affected only by MSBDS and not any other MDS variant. + + * + + * The other variants cannot be mitigated when SMT is enabled, so + + * clearing the buffers on idle just to prevent the Store Buffer + + * repartitioning leak would be a window dressing exercise. + + */ + + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) + + return; + + + + if (sched_smt_active()) { + + static_branch_enable(&mds_idle_clear); + + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { + + static_branch_disable(&mds_idle_clear); + + } + +} + + +++<<<<<<< HEAD + +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" + +#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" + +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" + + + +void cpu_bugs_smt_update(void) + +{ + + mutex_lock(&spec_ctrl_mutex); + + + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + + + + switch (spectre_v2_user_stibp) { + + case SPECTRE_V2_USER_NONE: + + break; + + case SPECTRE_V2_USER_STRICT: + + case SPECTRE_V2_USER_STRICT_PREFERRED: + + update_stibp_strict(); + + break; + + case SPECTRE_V2_USER_PRCTL: + + case SPECTRE_V2_USER_SECCOMP: + + update_indir_branch_cond(); + + break; + + } + + + + switch (mds_mitigation) { + + case MDS_MITIGATION_FULL: + + case MDS_MITIGATION_VMWERV: + + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + + pr_warn_once(MDS_MSG_SMT); + + update_mds_branch_idle(); + + break; + + case MDS_MITIGATION_OFF: + + break; + + } + + + + switch (taa_mitigation) { + + case TAA_MITIGATION_VERW: + + case TAA_MITIGATION_UCODE_NEEDED: + + if (sched_smt_active()) + + pr_warn_once(TAA_MSG_SMT); + + break; + + case TAA_MITIGATION_TSX_DISABLED: + + case TAA_MITIGATION_OFF: + + break; + + } + + + + switch (mmio_mitigation) { + + case MMIO_MITIGATION_VERW: + + case MMIO_MITIGATION_UCODE_NEEDED: + + if (sched_smt_active()) + + pr_warn_once(MMIO_MSG_SMT); + + break; + + case MMIO_MITIGATION_OFF: + + break; + + } + + + + mutex_unlock(&spec_ctrl_mutex); + } + + -/* Update x86_spec_ctrl_base in case SMT state changed. */ + -static void update_stibp_strict(void) + +#ifdef CONFIG_DEBUG_FS + +/* + + * Provide a debugfs file to dump SPEC_CTRL MSRs of all the CPUs + + * Consecutive MSR values are collapsed together if they are the same. + + */ + +static ssize_t spec_ctrl_msrs_read(struct file *file, char __user *user_buf, + + size_t count, loff_t *ppos) + { + - u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + int bufsiz = min(count, PAGE_SIZE); + + int cpu, prev_cpu, len, cnt = 0; + + u64 val, prev_val; + + char *buf; + + - if (sched_smt_active()) + - mask |= SPEC_CTRL_STIBP; + + /* + + * The MSRs info should be small enough that the whole buffer is + + * copied out in one call. However, user space may read it again + + * to see if there is any data left. Rereading the cached SPEC_CTRL + + * MSR values may produce a different result causing corruption in + + * output data. So skipping the call if *ppos is not starting from 0. + + */ + + if (*ppos) + + return 0; + + - if (mask == x86_spec_ctrl_base) + - return; + + buf = kmalloc(bufsiz, GFP_KERNEL); + + if (!buf) + + return -ENOMEM; + + - pr_info("Update user space SMT mitigation: STIBP %s\n", + - mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + - x86_spec_ctrl_base = mask; + - on_each_cpu(update_stibp_msr, NULL, 1); + -} + + for_each_possible_cpu(cpu) { + + val = per_cpu(x86_spec_ctrl_current, cpu); + + -/* Update the static key controlling the evaluation of TIF_SPEC_IB */ + -static void update_indir_branch_cond(void) + -{ + - if (sched_smt_active()) + - static_branch_enable(&switch_to_cond_stibp); + + if (!cpu) + + goto next; + + + + if (val == prev_val) + + continue; + + + + if (prev_cpu == cpu - 1) + + len = snprintf(buf + cnt, bufsiz - cnt, "CPU %d: 0x%llx\n", + + prev_cpu, prev_val); + + else + + len = snprintf(buf + cnt, bufsiz - cnt, "CPUs %d-%d: 0x%llx\n", + + prev_cpu, cpu - 1, prev_val); + + + + cnt += len; + + if (!len) + + break; /* Out of buffer */ + +next: + + prev_cpu = cpu; + + prev_val = val; + + } + + + + if (prev_cpu == cpu - 1) + + cnt += snprintf(buf + cnt, bufsiz - cnt, "CPU %d: 0x%llx\n", + + prev_cpu, prev_val); + else + - static_branch_disable(&switch_to_cond_stibp); + + cnt += snprintf(buf + cnt, bufsiz - cnt, "CPUs %d-%d: 0x%llx\n", + + prev_cpu, cpu - 1, prev_val); + + + + count = simple_read_from_buffer(user_buf, count, ppos, buf, cnt); + + kfree(buf); + + return count; + } + + -#undef pr_fmt + -#define pr_fmt(fmt) fmt + +static const struct file_operations fops_spec_ctrl = { + + .read = spec_ctrl_msrs_read, + + .llseek = default_llseek, + +}; + + -/* Update the static key controlling the MDS CPU buffer clear in idle */ + -static void update_mds_branch_idle(void) + +static int __init init_spec_ctrl_debugfs(void) + { + - /* + - * Enable the idle clearing if SMT is active on CPUs which are + - * affected only by MSBDS and not any other MDS variant. + - * + - * The other variants cannot be mitigated when SMT is enabled, so + - * clearing the buffers on idle just to prevent the Store Buffer + - * repartitioning leak would be a window dressing exercise. + - */ + - if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) + - return; + - + - if (sched_smt_active()) { + - static_branch_enable(&cpu_buf_idle_clear); + - } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + - (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { + - static_branch_disable(&cpu_buf_idle_clear); + - } + + if (!debugfs_create_file("spec_ctrl_msrs", 0400, arch_debugfs_dir, + + NULL, &fops_spec_ctrl)) + + return -ENOMEM; + + return 0; + } + +fs_initcall(init_spec_ctrl_debugfs); + +#endif + +++======= +++>>>>>>> 6449f5baf9c7 (x86/bugs: Move cpu_bugs_smt_update() down) + #undef pr_fmt + #define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +* Unmerged path arch/x86/kernel/cpu/bugs.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/8a68d64b.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/8a68d64b.failed new file mode 100644 index 0000000000000..0654de96b7b78 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/8a68d64b.failed @@ -0,0 +1,137 @@ +x86/vmscape: Add old Intel CPUs to affected list + +jira LE-4704 +cve CVE-2025-40300 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Pawan Gupta +commit 8a68d64bb10334426834e8c273319601878e961e +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/8a68d64b.failed + +These old CPUs are not tested against VMSCAPE, but are likely vulnerable. + + Signed-off-by: Pawan Gupta + Signed-off-by: Dave Hansen +(cherry picked from commit 8a68d64bb10334426834e8c273319601878e961e) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/x86/kernel/cpu/common.c +diff --cc arch/x86/kernel/cpu/common.c +index dbb5dee28ca2,f98ec9c7fc07..000000000000 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@@ -1164,45 -1230,70 +1164,103 @@@ static const __initconst struct x86_cpu + #define GDS BIT(6) + /* CPU is affected by Register File Data Sampling */ + #define RFDS BIT(7) + -/* CPU is affected by Indirect Target Selection */ + -#define ITS BIT(8) + -/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ + -#define ITS_NATIVE_ONLY BIT(9) + -/* CPU is affected by Transient Scheduler Attacks */ + -#define TSA BIT(10) + -/* CPU is affected by VMSCAPE */ + -#define VMSCAPE BIT(11) + +++<<<<<<< HEAD + +static const struct x86_cpu_id_v2 cpu_vuln_blacklist[] __initconst = { + + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), + + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), + + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), + + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), + + VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), + + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), +++======= ++ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { ++ VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE), ++ VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE), +++>>>>>>> 8a68d64bb103 (x86/vmscape: Add old Intel CPUs to affected list) + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), +* Unmerged path arch/x86/kernel/cpu/common.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/9969779d.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/9969779d.failed new file mode 100644 index 0000000000000..92ed3e49a4166 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/9969779d.failed @@ -0,0 +1,157 @@ +Documentation/hw-vuln: Add VMSCAPE documentation + +jira LE-4704 +cve CVE-2025-40300 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Pawan Gupta +commit 9969779d0803f5dcd4460ae7aca2bc3fd91bff12 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/9969779d.failed + +VMSCAPE is a vulnerability that may allow a guest to influence the branch +prediction in host userspace, particularly affecting hypervisors like QEMU. + +Add the documentation. + + Signed-off-by: Pawan Gupta + Signed-off-by: Dave Hansen + Reviewed-by: Borislav Petkov (AMD) + Reviewed-by: Dave Hansen +(cherry picked from commit 9969779d0803f5dcd4460ae7aca2bc3fd91bff12) + Signed-off-by: Jonathan Maple + +# Conflicts: +# Documentation/admin-guide/hw-vuln/index.rst +diff --cc Documentation/admin-guide/hw-vuln/index.rst +index 2189a6a3b22b,55d747511f83..000000000000 +--- a/Documentation/admin-guide/hw-vuln/index.rst ++++ b/Documentation/admin-guide/hw-vuln/index.rst +@@@ -19,3 -22,8 +19,10 @@@ are configurable at compile, boot or ru + srso + gather_data_sampling + reg-file-data-sampling +++<<<<<<< HEAD +++======= ++ rsb ++ old_microcode ++ indirect-target-selection ++ vmscape +++>>>>>>> 9969779d0803 (Documentation/hw-vuln: Add VMSCAPE documentation) +* Unmerged path Documentation/admin-guide/hw-vuln/index.rst +diff --git a/Documentation/admin-guide/hw-vuln/vmscape.rst b/Documentation/admin-guide/hw-vuln/vmscape.rst +new file mode 100644 +index 000000000000..d9b9a2b6c114 +--- /dev/null ++++ b/Documentation/admin-guide/hw-vuln/vmscape.rst +@@ -0,0 +1,110 @@ ++.. SPDX-License-Identifier: GPL-2.0 ++ ++VMSCAPE ++======= ++ ++VMSCAPE is a vulnerability that may allow a guest to influence the branch ++prediction in host userspace. It particularly affects hypervisors like QEMU. ++ ++Even if a hypervisor may not have any sensitive data like disk encryption keys, ++guest-userspace may be able to attack the guest-kernel using the hypervisor as ++a confused deputy. ++ ++Affected processors ++------------------- ++ ++The following CPU families are affected by VMSCAPE: ++ ++**Intel processors:** ++ - Skylake generation (Parts without Enhanced-IBRS) ++ - Cascade Lake generation - (Parts affected by ITS guest/host separation) ++ - Alder Lake and newer (Parts affected by BHI) ++ ++Note that, BHI affected parts that use BHB clearing software mitigation e.g. ++Icelake are not vulnerable to VMSCAPE. ++ ++**AMD processors:** ++ - Zen series (families 0x17, 0x19, 0x1a) ++ ++** Hygon processors:** ++ - Family 0x18 ++ ++Mitigation ++---------- ++ ++Conditional IBPB ++---------------- ++ ++Kernel tracks when a CPU has run a potentially malicious guest and issues an ++IBPB before the first exit to userspace after VM-exit. If userspace did not run ++between VM-exit and the next VM-entry, no IBPB is issued. ++ ++Note that the existing userspace mitigation against Spectre-v2 is effective in ++protecting the userspace. They are insufficient to protect the userspace VMMs ++from a malicious guest. This is because Spectre-v2 mitigations are applied at ++context switch time, while the userspace VMM can run after a VM-exit without a ++context switch. ++ ++Vulnerability enumeration and mitigation is not applied inside a guest. This is ++because nested hypervisors should already be deploying IBPB to isolate ++themselves from nested guests. ++ ++SMT considerations ++------------------ ++ ++When Simultaneous Multi-Threading (SMT) is enabled, hypervisors can be ++vulnerable to cross-thread attacks. For complete protection against VMSCAPE ++attacks in SMT environments, STIBP should be enabled. ++ ++The kernel will issue a warning if SMT is enabled without adequate STIBP ++protection. Warning is not issued when: ++ ++- SMT is disabled ++- STIBP is enabled system-wide ++- Intel eIBRS is enabled (which implies STIBP protection) ++ ++System information and options ++------------------------------ ++ ++The sysfs file showing VMSCAPE mitigation status is: ++ ++ /sys/devices/system/cpu/vulnerabilities/vmscape ++ ++The possible values in this file are: ++ ++ * 'Not affected': ++ ++ The processor is not vulnerable to VMSCAPE attacks. ++ ++ * 'Vulnerable': ++ ++ The processor is vulnerable and no mitigation has been applied. ++ ++ * 'Mitigation: IBPB before exit to userspace': ++ ++ Conditional IBPB mitigation is enabled. The kernel tracks when a CPU has ++ run a potentially malicious guest and issues an IBPB before the first ++ exit to userspace after VM-exit. ++ ++ * 'Mitigation: IBPB on VMEXIT': ++ ++ IBPB is issued on every VM-exit. This occurs when other mitigations like ++ RETBLEED or SRSO are already issuing IBPB on VM-exit. ++ ++Mitigation control on the kernel command line ++---------------------------------------------- ++ ++The mitigation can be controlled via the ``vmscape=`` command line parameter: ++ ++ * ``vmscape=off``: ++ ++ Disable the VMSCAPE mitigation. ++ ++ * ``vmscape=ibpb``: ++ ++ Enable conditional IBPB mitigation (default when CONFIG_MITIGATION_VMSCAPE=y). ++ ++ * ``vmscape=force``: ++ ++ Force vulnerability detection and mitigation even on processors that are ++ not known to be affected. diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/a508cec6.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/a508cec6.failed new file mode 100644 index 0000000000000..9a2aa20495b46 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/a508cec6.failed @@ -0,0 +1,277 @@ +x86/vmscape: Enumerate VMSCAPE bug + +jira LE-4704 +cve CVE-2025-40300 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Pawan Gupta +commit a508cec6e5215a3fbc7e73ae86a5c5602187934d +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/a508cec6.failed + +The VMSCAPE vulnerability may allow a guest to cause Branch Target +Injection (BTI) in userspace hypervisors. + +Kernels (both host and guest) have existing defenses against direct BTI +attacks from guests. There are also inter-process BTI mitigations which +prevent processes from attacking each other. However, the threat in this +case is to a userspace hypervisor within the same process as the attacker. + +Userspace hypervisors have access to their own sensitive data like disk +encryption keys and also typically have access to all guest data. This +means guest userspace may use the hypervisor as a confused deputy to attack +sensitive guest kernel data. There are no existing mitigations for these +attacks. + +Introduce X86_BUG_VMSCAPE for this vulnerability and set it on affected +Intel and AMD CPUs. + + Signed-off-by: Pawan Gupta + Signed-off-by: Dave Hansen + Reviewed-by: Borislav Petkov (AMD) +(cherry picked from commit a508cec6e5215a3fbc7e73ae86a5c5602187934d) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/x86/include/asm/cpufeatures.h +# arch/x86/kernel/cpu/common.c +diff --cc arch/x86/include/asm/cpufeatures.h +index b1dbe87085a6,b6fa5c33c85d..000000000000 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@@ -469,32 -514,41 +469,46 @@@ + * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional + * to avoid confusion. + */ + -#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ + +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ + #endif + -#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ + -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ + -#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ + -#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ + -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ + -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ + -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ + -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ + -#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ + -#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ + -#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ + -#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ + -#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ + -#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ + -#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ + -#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ + -/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ + -#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ + -#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ + -#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ + -#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ + -#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ + +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ + +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ + +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ + +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ + +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ + +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ + +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ + +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ + +#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ + +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ + +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ + +#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ + +#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ + +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + +#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ + +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ + +#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ + +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ + +#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ + + /* BUG word 2 */ +++<<<<<<< HEAD + +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ + +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ + +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ + +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +++======= ++ #define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */ ++ #define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */ ++ #define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ ++ #define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ ++ #define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ ++ #define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ ++ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ ++ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ ++ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ ++ #define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ ++ #define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ +++>>>>>>> a508cec6e521 (x86/vmscape: Enumerate VMSCAPE bug) + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --cc arch/x86/kernel/cpu/common.c +index dbb5dee28ca2,2b87c93e6609..000000000000 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@@ -1164,51 -1230,74 +1164,122 @@@ static const __initconst struct x86_cpu + #define GDS BIT(6) + /* CPU is affected by Register File Data Sampling */ + #define RFDS BIT(7) +++<<<<<<< HEAD + + + +static const struct x86_cpu_id_v2 cpu_vuln_blacklist[] __initconst = { + + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), + + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), + + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), + + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), + + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), + + VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), + + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), + + VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), + + VULNBL_INTEL_STEPPINGS(ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), + + + + VULNBL_AMD(0x15, RETBLEED), + + VULNBL_AMD(0x16, RETBLEED), + + VULNBL_AMD(0x17, RETBLEED | SRSO), + + VULNBL_HYGON(0x18, RETBLEED), + + VULNBL_AMD(0x19, SRSO), +++======= ++ /* CPU is affected by Indirect Target Selection */ ++ #define ITS BIT(8) ++ /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ ++ #define ITS_NATIVE_ONLY BIT(9) ++ /* CPU is affected by Transient Scheduler Attacks */ ++ #define TSA BIT(10) ++ /* CPU is affected by VMSCAPE */ ++ #define VMSCAPE BIT(11) ++ ++ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { ++ VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), ++ VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS), ++ VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS), ++ VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS), ++ VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO), ++ VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO), ++ VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS), ++ VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO), ++ VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS), ++ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), ++ VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE), ++ VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS), ++ VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE), ++ ++ VULNBL_AMD(0x15, RETBLEED), ++ VULNBL_AMD(0x16, RETBLEED), ++ VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE), ++ VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE), ++ VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE), ++ VULNBL_AMD(0x1a, SRSO | VMSCAPE), +++>>>>>>> a508cec6e521 (x86/vmscape: Enumerate VMSCAPE bug) + {} + }; + +@@@ -1371,6 -1537,33 +1442,36 @@@ static void __init cpu_set_bug_bits(str + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + +++<<<<<<< HEAD +++======= ++ if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) ++ setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); ++ ++ if (vulnerable_to_its(x86_arch_cap_msr)) { ++ setup_force_cpu_bug(X86_BUG_ITS); ++ if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY)) ++ setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); ++ } ++ ++ if (c->x86_vendor == X86_VENDOR_AMD) { ++ if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || ++ !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { ++ if (cpu_matches(cpu_vuln_blacklist, TSA) || ++ /* Enable bug on Zen guests to allow for live migration. */ ++ (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) ++ setup_force_cpu_bug(X86_BUG_TSA); ++ } ++ } ++ ++ /* ++ * Set the bug only on bare-metal. A nested hypervisor should already be ++ * deploying IBPB to isolate itself from nested guests. ++ */ ++ if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) && ++ !boot_cpu_has(X86_FEATURE_HYPERVISOR)) ++ setup_force_cpu_bug(X86_BUG_VMSCAPE); ++ +++>>>>>>> a508cec6e521 (x86/vmscape: Enumerate VMSCAPE bug) + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; + +* Unmerged path arch/x86/include/asm/cpufeatures.h +* Unmerged path arch/x86/kernel/cpu/common.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/b7cc9887.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/b7cc9887.failed new file mode 100644 index 0000000000000..481109e1a9b07 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/b7cc9887.failed @@ -0,0 +1,151 @@ +x86/vmscape: Warn when STIBP is disabled with SMT + +jira LE-4704 +cve CVE-2025-40300 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Pawan Gupta +commit b7cc9887231526ca4fa89f3fa4119e47c2dc7b1e +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/b7cc9887.failed + +Cross-thread attacks are generally harder as they require the victim to be +co-located on a core. However, with VMSCAPE the adversary targets belong to +the same guest execution, that are more likely to get co-located. In +particular, a thread that is currently executing userspace hypervisor +(after the IBPB) may still be targeted by a guest execution from a sibling +thread. + +Issue a warning about the potential risk, except when: + +- SMT is disabled +- STIBP is enabled system-wide +- Intel eIBRS is enabled (which implies STIBP protection) + + Signed-off-by: Pawan Gupta + Signed-off-by: Dave Hansen +(cherry picked from commit b7cc9887231526ca4fa89f3fa4119e47c2dc7b1e) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/x86/kernel/cpu/bugs.c +diff --cc arch/x86/kernel/cpu/bugs.c +index a556e8ade674,fa32615db71d..000000000000 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@@ -2707,6 -3320,111 +2707,114 @@@ out + #undef pr_fmt + #define pr_fmt(fmt) fmt + +++<<<<<<< HEAD +++======= ++ #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" ++ #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" ++ #define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" ++ #define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n" ++ ++ void cpu_bugs_smt_update(void) ++ { ++ mutex_lock(&spec_ctrl_mutex); ++ ++ if (sched_smt_active() && unprivileged_ebpf_enabled() && ++ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) ++ pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); ++ ++ switch (spectre_v2_user_stibp) { ++ case SPECTRE_V2_USER_NONE: ++ break; ++ case SPECTRE_V2_USER_STRICT: ++ case SPECTRE_V2_USER_STRICT_PREFERRED: ++ update_stibp_strict(); ++ break; ++ case SPECTRE_V2_USER_PRCTL: ++ case SPECTRE_V2_USER_SECCOMP: ++ update_indir_branch_cond(); ++ break; ++ } ++ ++ switch (mds_mitigation) { ++ case MDS_MITIGATION_FULL: ++ case MDS_MITIGATION_AUTO: ++ case MDS_MITIGATION_VMWERV: ++ if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) ++ pr_warn_once(MDS_MSG_SMT); ++ update_mds_branch_idle(); ++ break; ++ case MDS_MITIGATION_OFF: ++ break; ++ } ++ ++ switch (taa_mitigation) { ++ case TAA_MITIGATION_VERW: ++ case TAA_MITIGATION_AUTO: ++ case TAA_MITIGATION_UCODE_NEEDED: ++ if (sched_smt_active()) ++ pr_warn_once(TAA_MSG_SMT); ++ break; ++ case TAA_MITIGATION_TSX_DISABLED: ++ case TAA_MITIGATION_OFF: ++ break; ++ } ++ ++ switch (mmio_mitigation) { ++ case MMIO_MITIGATION_VERW: ++ case MMIO_MITIGATION_AUTO: ++ case MMIO_MITIGATION_UCODE_NEEDED: ++ if (sched_smt_active()) ++ pr_warn_once(MMIO_MSG_SMT); ++ break; ++ case MMIO_MITIGATION_OFF: ++ break; ++ } ++ ++ switch (tsa_mitigation) { ++ case TSA_MITIGATION_USER_KERNEL: ++ case TSA_MITIGATION_VM: ++ case TSA_MITIGATION_AUTO: ++ case TSA_MITIGATION_FULL: ++ /* ++ * TSA-SQ can potentially lead to info leakage between ++ * SMT threads. ++ */ ++ if (sched_smt_active()) ++ static_branch_enable(&cpu_buf_idle_clear); ++ else ++ static_branch_disable(&cpu_buf_idle_clear); ++ break; ++ case TSA_MITIGATION_NONE: ++ case TSA_MITIGATION_UCODE_NEEDED: ++ break; ++ } ++ ++ switch (vmscape_mitigation) { ++ case VMSCAPE_MITIGATION_NONE: ++ case VMSCAPE_MITIGATION_AUTO: ++ break; ++ case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT: ++ case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER: ++ /* ++ * Hypervisors can be attacked across-threads, warn for SMT when ++ * STIBP is not already enabled system-wide. ++ * ++ * Intel eIBRS (!AUTOIBRS) implies STIBP on. ++ */ ++ if (!sched_smt_active() || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || ++ (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && ++ !boot_cpu_has(X86_FEATURE_AUTOIBRS))) ++ break; ++ pr_warn_once(VMSCAPE_MSG_SMT); ++ break; ++ } ++ ++ mutex_unlock(&spec_ctrl_mutex); ++ } ++ +++>>>>>>> b7cc98872315 (x86/vmscape: Warn when STIBP is disabled with SMT) + #ifdef CONFIG_SYSFS + + #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" +* Unmerged path arch/x86/kernel/cpu/bugs.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/c4abe623.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/c4abe623.failed new file mode 100644 index 0000000000000..500667e6d0629 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/c4abe623.failed @@ -0,0 +1,95 @@ +s390/pci: Fix __pcilg_mio_inuser() inline assembly + +jira LE-4704 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Heiko Carstens +commit c4abe6234246c75cdc43326415d9cff88b7cf06c +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/c4abe623.failed + +Use "a" constraint for the shift operand of the __pcilg_mio_inuser() inline +assembly. The used "d" constraint allows the compiler to use any general +purpose register for the shift operand, including register zero. + +If register zero is used this my result in incorrect code generation: + + 8f6: a7 0a ff f8 ahi %r0,-8 + 8fa: eb 32 00 00 00 0c srlg %r3,%r2,0 <---- + +If register zero is selected to contain the shift value, the srlg +instruction ignores the contents of the register and always shifts zero +bits. Therefore use the "a" constraint which does not permit to select +register zero. + +Fixes: f058599e22d5 ("s390/pci: Fix s390_mmio_read/write with MIO") + Cc: stable@vger.kernel.org + Reported-by: Niklas Schnelle + Reviewed-by: Niklas Schnelle + Signed-off-by: Heiko Carstens +(cherry picked from commit c4abe6234246c75cdc43326415d9cff88b7cf06c) + Signed-off-by: Jonathan Maple + +# Conflicts: +# arch/s390/pci/pci_mmio.c +diff --cc arch/s390/pci/pci_mmio.c +index c3402f7971c7,51e7a28af899..000000000000 +--- a/arch/s390/pci/pci_mmio.c ++++ b/arch/s390/pci/pci_mmio.c +@@@ -207,27 -221,34 +207,34 @@@ static inline int __pcilg_mio_inuser + * user space) into a register using pcilg then store these bytes at + * user address @dst + */ + - exception = 1; + - sacf_flag = enable_sacf_uaccess(); + - asm_inline volatile ( + - " sacf 256\n" + - "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" + - "1: lhi %[exc],0\n" + - " jne 4f\n" + - "2: ahi %[shift],-8\n" + - " srlg %[tmp],%[val],0(%[shift])\n" + - "3: stc %[tmp],0(%[dst])\n" + + asm volatile ( + + " sacf 256\n" + + "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" + + "1: ipm %[cc]\n" + + " srl %[cc],28\n" + + " ltr %[cc],%[cc]\n" + + " jne 4f\n" + + "2: ahi %[shift],-8\n" + + " srlg %[tmp],%[val],0(%[shift])\n" + + "3: stc %[tmp],0(%[dst])\n" + "5: aghi %[dst],1\n" + - " brctg %[cnt],2b\n" + - /* + - * Use xr to clear exc and set condition code to zero + - * to ensure flag output is correct for this branch. + - */ + - " xr %[exc],%[exc]\n" + - "4: sacf 768\n" + - CC_IPM(cc) + + " brctg %[cnt],2b\n" + + "4: sacf 768\n" + EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b) +++<<<<<<< HEAD +++======= ++ : [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception), ++ CC_OUT(cc, cc), [val] "=d" (val), ++ [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), ++ [shift] "+a" (shift) +++>>>>>>> c4abe6234246 (s390/pci: Fix __pcilg_mio_inuser() inline assembly) + : + - : CC_CLOBBER_LIST("memory")); + - disable_sacf_uaccess(sacf_flag); + - cc = exception ? -ENXIO : CC_TRANSFORM(cc); + + [ioaddr_len] "+&d" (ioaddr_len.pair), + + [cc] "+d" (cc), [val] "=d" (val), + + [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), + + [shift] "+d" (shift) + + :: "cc", "memory"); + + + /* did we write everything to the user space buffer? */ + if (!cc && cnt != 0) + cc = -EFAULT; +* Unmerged path arch/s390/pci/pci_mmio.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/e3b63e96.failed b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/e3b63e96.failed new file mode 100644 index 0000000000000..1ee59e12a044f --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/e3b63e96.failed @@ -0,0 +1,192 @@ +mm: zswap: fix missing folio cleanup in writeback race path + +jira LE-4704 +cve CVE-2023-53178 +Rebuild_History Non-Buildable kernel-4.18.0-553.83.1.el8_10 +commit-author Yosry Ahmed +commit e3b63e966cac0bf78aaa1efede1827a252815a1d +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/e3b63e96.failed + +In zswap_writeback_entry(), after we get a folio from +__read_swap_cache_async(), we grab the tree lock again to check that the +swap entry was not invalidated and recycled. If it was, we delete the +folio we just added to the swap cache and exit. + +However, __read_swap_cache_async() returns the folio locked when it is +newly allocated, which is always true for this path, and the folio is +ref'd. Make sure to unlock and put the folio before returning. + +This was discovered by code inspection, probably because this path handles +a race condition that should not happen often, and the bug would not crash +the system, it will only strand the folio indefinitely. + +Link: https://lkml.kernel.org/r/20240125085127.1327013-1-yosryahmed@google.com +Fixes: 04fc7816089c ("mm: fix zswap writeback race condition") + Signed-off-by: Yosry Ahmed + Reviewed-by: Chengming Zhou + Acked-by: Johannes Weiner + Reviewed-by: Nhat Pham + Cc: Domenico Cerasuolo + Cc: + Signed-off-by: Andrew Morton +(cherry picked from commit e3b63e966cac0bf78aaa1efede1827a252815a1d) + Signed-off-by: Jonathan Maple + +# Conflicts: +# mm/zswap.c +diff --cc mm/zswap.c +index 8d38573d911e,d2423247acfd..000000000000 +--- a/mm/zswap.c ++++ b/mm/zswap.c +@@@ -905,110 -1412,53 +905,117 @@@ static int zswap_writeback_entry(struc + .sync_mode = WB_SYNC_NONE, + }; + + - /* try to allocate swap cache folio */ + - mpol = get_task_policy(current); + - folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, + - NO_INTERLEAVE_INDEX, &folio_was_allocated, true); + - if (!folio) + - return -ENOMEM; + + /* extract swpentry from data */ + + zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); + + swpentry = zhdr->swpentry; /* here */ + + zpool_unmap_handle(pool, handle); + + tree = zswap_trees[swp_type(swpentry)]; + + offset = swp_offset(swpentry); + + - /* + - * Found an existing folio, we raced with load/swapin. We generally + - * writeback cold folios from zswap, and swapin means the folio just + - * became hot. Skip this folio and let the caller find another one. + - */ + - if (!folio_was_allocated) { + - folio_put(folio); + - return -EEXIST; + - } + - + - /* + - * folio is locked, and the swapcache is now secured against + - * concurrent swapping to and from the slot. Verify that the + - * swap entry hasn't been invalidated and recycled behind our + - * backs (our zswap_entry reference doesn't prevent that), to + - * avoid overwriting a new swap folio with old compressed data. + - */ + + /* find and ref zswap entry */ + spin_lock(&tree->lock); + - if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { + + entry = zswap_entry_find_get(&tree->rbroot, offset); + + if (!entry) { + + /* entry was invalidated */ + spin_unlock(&tree->lock); +++<<<<<<< HEAD + + return 0; +++======= ++ delete_from_swap_cache(folio); ++ folio_unlock(folio); ++ folio_put(folio); ++ return -ENOMEM; +++>>>>>>> e3b63e966cac (mm: zswap: fix missing folio cleanup in writeback race path) + } + spin_unlock(&tree->lock); + + BUG_ON(offset != entry->offset); + + - __zswap_load(entry, &folio->page); + + /* try to allocate swap cache page */ + + switch (zswap_get_swap_cache_page(swpentry, &page)) { + + case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ + + ret = -ENOMEM; + + goto fail; + + + + case ZSWAP_SWAPCACHE_EXIST: + + /* page is already in the swap cache, ignore for now */ + + put_page(page); + + ret = -EEXIST; + + goto fail; + + + + case ZSWAP_SWAPCACHE_NEW: /* page is locked */ + + /* + + * Having a local reference to the zswap entry doesn't exclude + + * swapping from invalidating and recycling the swap slot. Once + + * the swapcache is secured against concurrent swapping to and + + * from the slot, recheck that the entry is still current before + + * writing. + + */ + + spin_lock(&tree->lock); + + if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) { + + spin_unlock(&tree->lock); + + delete_from_swap_cache(page_folio(page)); + + ret = -ENOMEM; + + goto fail; + + } + + spin_unlock(&tree->lock); + + - /* folio is up to date */ + - folio_mark_uptodate(folio); + + /* decompress */ + + dlen = PAGE_SIZE; + + src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle, + + ZPOOL_MM_RO) + sizeof(struct zswap_header); + + dst = kmap_atomic(page); + + tfm = *get_cpu_ptr(entry->pool->tfm); + + ret = crypto_comp_decompress(tfm, src, entry->length, + + dst, &dlen); + + put_cpu_ptr(entry->pool->tfm); + + kunmap_atomic(dst); + + zpool_unmap_handle(entry->pool->zpool, entry->handle); + + BUG_ON(ret); + + BUG_ON(dlen != PAGE_SIZE); + + + + /* page is up to date */ + + SetPageUptodate(page); + + } + + /* move it to the tail of the inactive list after end_writeback */ + - folio_set_reclaim(folio); + + SetPageReclaim(page); + + /* start writeback */ + - __swap_writepage(folio, &wbc); + - folio_put(folio); + + __swap_writepage(page, &wbc, end_swap_bio_write); + + put_page(page); + + zswap_written_back_pages++; + + - return 0; + + spin_lock(&tree->lock); + + /* drop local reference */ + + zswap_entry_put(tree, entry); + + + + /* + + * There are two possible situations for entry here: + + * (1) refcount is 1(normal case), entry is valid and on the tree + + * (2) refcount is 0, entry is freed and not on the tree + + * because invalidate happened during writeback + + * search the tree and free the entry if find entry + + */ + + if (entry == zswap_rb_search(&tree->rbroot, offset)) + + zswap_entry_put(tree, entry); + + spin_unlock(&tree->lock); + + + + goto end; + + + + /* + + * if we get here due to ZSWAP_SWAPCACHE_EXIST + + * a load may be happening concurrently. + + * it is safe and okay to not free the entry. + + * if we free the entry in the following put + + * it is also okay to return !0 + + */ + +fail: + + spin_lock(&tree->lock); + + zswap_entry_put(tree, entry); + + spin_unlock(&tree->lock); + + + +end: + + return ret; + } + + static int zswap_is_page_same_filled(void *ptr, unsigned long *value) +* Unmerged path mm/zswap.c diff --git a/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/rebuild.details.txt b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/rebuild.details.txt new file mode 100644 index 0000000000000..57a6ae907aaf6 --- /dev/null +++ b/ciq/ciq_backports/kernel-4.18.0-553.83.1.el8_10/rebuild.details.txt @@ -0,0 +1,35 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v4.18~1..kernel-mainline: 567757 +Number of commits in rpm: 27 +Number of commits matched with upstream: 19 (70.37%) +Number of commits in upstream but not in rpm: 567738 +Number of commits NOT found in upstream: 8 (29.63%) + +Rebuilding Kernel on Branch rocky8_10_rebuild_kernel-4.18.0-553.83.1.el8_10 for kernel-4.18.0-553.83.1.el8_10 +Clean Cherry Picks: 8 (42.11%) +Empty Cherry Picks: 11 (57.89%) +_______________________________ + +__EMPTY COMMITS__________________________ +e3b63e966cac0bf78aaa1efede1827a252815a1d mm: zswap: fix missing folio cleanup in writeback race path +c4abe6234246c75cdc43326415d9cff88b7cf06c s390/pci: Fix __pcilg_mio_inuser() inline assembly +503f1c72c31bbee21e669a08cf65c49e96d42755 i40e: fix Jumbo Frame support after iPXE boot +9969779d0803f5dcd4460ae7aca2bc3fd91bff12 Documentation/hw-vuln: Add VMSCAPE documentation +a508cec6e5215a3fbc7e73ae86a5c5602187934d x86/vmscape: Enumerate VMSCAPE bug +2f8f173413f1cbf52660d04df92d0069c4306d25 x86/vmscape: Add conditional IBPB mitigation +556c1ad666ad90c50ec8fccb930dd5046cfbecfb x86/vmscape: Enable the mitigation +6449f5baf9c78a7a442d64f4a61378a21c5db113 x86/bugs: Move cpu_bugs_smt_update() down +b7cc9887231526ca4fa89f3fa4119e47c2dc7b1e x86/vmscape: Warn when STIBP is disabled with SMT +8a68d64bb10334426834e8c273319601878e961e x86/vmscape: Add old Intel CPUs to affected list +2e488f13755ffbb60f307e991b27024716a33b29 fs: fix UAF/GPF bug in nilfs_mdt_destroy + +__CHANGES NOT IN UPSTREAM________________ +Adding prod certs and changed cert date to 20210620 +Adding Rocky secure boot certs +Fixing vmlinuz removal +Fixing UEFI CA path +Porting to 8.10, debranding and Rocky branding +Fixing pesign_key_name values +redhat/configs: Enable CONFIG_MITIGATION_VMSCAPE for x86_64 +fanotify: add watchdog for permission events diff --git a/configs/kernel-4.18.0-x86_64-debug.config b/configs/kernel-4.18.0-x86_64-debug.config index 61c5583397279..8c8a005c1e9ff 100644 --- a/configs/kernel-4.18.0-x86_64-debug.config +++ b/configs/kernel-4.18.0-x86_64-debug.config @@ -757,6 +757,7 @@ CONFIG_SLS=y # CONFIG_GDS_FORCE_MITIGATION is not set CONFIG_MITIGATION_RFDS=y CONFIG_MITIGATION_SPECTRE_BHI=y +CONFIG_MITIGATION_VMSCAPE=y CONFIG_ARCH_HAS_ADD_PAGES=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y diff --git a/configs/kernel-4.18.0-x86_64.config b/configs/kernel-4.18.0-x86_64.config index 7f320ca792b14..a3441083602f2 100644 --- a/configs/kernel-4.18.0-x86_64.config +++ b/configs/kernel-4.18.0-x86_64.config @@ -758,6 +758,7 @@ CONFIG_SLS=y # CONFIG_GDS_FORCE_MITIGATION is not set CONFIG_MITIGATION_RFDS=y CONFIG_MITIGATION_SPECTRE_BHI=y +CONFIG_MITIGATION_VMSCAPE=y CONFIG_ARCH_HAS_ADD_PAGES=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y diff --git a/configs/kernel-x86_64-debug.config b/configs/kernel-x86_64-debug.config index 5591fb4bf9db3..0050c12408bb0 100644 --- a/configs/kernel-x86_64-debug.config +++ b/configs/kernel-x86_64-debug.config @@ -3936,6 +3936,7 @@ CONFIG_MISDN_L1OIP=m CONFIG_MISDN_NETJET=m CONFIG_MISDN_SPEEDFAX=m CONFIG_MISDN_W6692=m +CONFIG_MITIGATION_VMSCAPE=y CONFIG_MLX4_EN=m CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_INFINIBAND=m diff --git a/configs/kernel-x86_64.config b/configs/kernel-x86_64.config index d3dad0962de13..626a6d72a64f0 100644 --- a/configs/kernel-x86_64.config +++ b/configs/kernel-x86_64.config @@ -3936,6 +3936,7 @@ CONFIG_MISDN_L1OIP=m CONFIG_MISDN_NETJET=m CONFIG_MISDN_SPEEDFAX=m CONFIG_MISDN_W6692=m +CONFIG_MITIGATION_VMSCAPE=y CONFIG_MLX4_EN=m CONFIG_MLX4_EN_DCB=y CONFIG_MLX4_INFINIBAND=m diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index aae1dde82781d..60a2417637509 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -551,6 +551,7 @@ CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); +CPU_SHOW_VULN_FALLBACK(vmscape); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -566,6 +567,7 @@ static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); +static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -582,6 +584,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_rstack_overflow.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, + &dev_attr_vmscape.attr, NULL }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 3357d65a906bf..fa0a2ff0ddc72 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1709,6 +1709,7 @@ I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config); struct i40e_aq_set_mac_config { __le16 max_frame_size; u8 params; +#define I40E_AQ_SET_MAC_CONFIG_CRC_EN BIT(2) u8 tx_timer_priority; /* bitmap */ __le16 tx_timer_value; __le16 fc_refresh_threshold; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 30e15a6fc0ce2..4262afe8d142f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1503,6 +1503,40 @@ int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures, return status; } +/** + * i40e_aq_set_mac_config - Configure MAC settings + * @hw: pointer to the hw struct + * @max_frame_size: Maximum Frame Size to be supported by the port + * @cmd_details: pointer to command details structure or NULL + * + * Set MAC configuration (0x0603). Note that max_frame_size must be greater + * than zero. + * + * Return: 0 on success, or a negative error code on failure. + */ +int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_set_mac_config *cmd; + struct i40e_aq_desc desc; + + cmd = (struct i40e_aq_set_mac_config *)&desc.params.raw; + + if (max_frame_size == 0) + return -EINVAL; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_mac_config); + + cmd->max_frame_size = cpu_to_le16(max_frame_size); + cmd->params = I40E_AQ_SET_MAC_CONFIG_CRC_EN; + +#define I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD 0x7FFF + cmd->fc_refresh_threshold = + cpu_to_le16(I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD); + + return i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); +} + /** * i40e_aq_clear_pxe_mode * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 12fdcf3858fb0..4d2bf0c6a6e8f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16194,13 +16194,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - /* make sure the MFS hasn't been set lower than the default */ #define MAX_FRAME_SIZE_DEFAULT 0x2600 - val = (rd32(&pf->hw, I40E_PRTGL_SAH) & - I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; - if (val < MAX_FRAME_SIZE_DEFAULT) - dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - i, val); + + err = i40e_aq_set_mac_config(hw, MAX_FRAME_SIZE_DEFAULT, NULL); + if (err) + dev_warn(&pdev->dev, "set mac config ret = %pe last_status = %s\n", + ERR_PTR(err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + + /* Make sure the MFS is set to the expected value */ + val = rd32(hw, I40E_PRTGL_SAH); + FIELD_MODIFY(I40E_PRTGL_SAH_MFS_MASK, &val, MAX_FRAME_SIZE_DEFAULT); + wr32(hw, I40E_PRTGL_SAH, val); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index fe845987d99a5..7da3ec6b100f5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -109,6 +109,8 @@ int i40e_aq_set_mac_loopback(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); int i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask, struct i40e_asq_cmd_details *cmd_details); +int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size, + struct i40e_asq_cmd_details *cmd_details); int i40e_aq_clear_pxe_mode(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); int i40e_aq_set_link_restart_an(struct i40e_hw *hw, diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index a116fbc597252..ec98eb2fe5224 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1903,7 +1903,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) { u32 i; - if (!cdev) { + if (!cdev || cdev->recov_in_prog) { memset(stats, 0, sizeof(*stats)); return; } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index df3bbc6b4e0df..886055501482f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -280,6 +280,10 @@ struct qede_dev { #define QEDE_ERR_WARN 3 struct qede_dump_info dump_info; + struct delayed_work periodic_task; + unsigned long stats_coal_ticks; + u32 stats_coal_usecs; + spinlock_t stats_lock; /* lock for vport stats access */ }; enum QEDE_STATE { diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 44df1aa300ef1..7968b5f25f38a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -430,6 +430,8 @@ static void qede_get_ethtool_stats(struct net_device *dev, } } + spin_lock(&edev->stats_lock); + for (i = 0; i < QEDE_NUM_STATS; i++) { if (qede_is_irrelevant_stat(edev, i)) continue; @@ -439,6 +441,8 @@ static void qede_get_ethtool_stats(struct net_device *dev, buf++; } + spin_unlock(&edev->stats_lock); + __qede_unlock(edev); } @@ -830,6 +834,7 @@ static int qede_get_coalesce(struct net_device *dev, coal->rx_coalesce_usecs = rx_coal; coal->tx_coalesce_usecs = tx_coal; + coal->stats_block_coalesce_usecs = edev->stats_coal_usecs; return rc; } @@ -844,6 +849,19 @@ int qede_set_coalesce(struct net_device *dev, int i, rc = 0; u16 rxc, txc; + if (edev->stats_coal_usecs != coal->stats_block_coalesce_usecs) { + edev->stats_coal_usecs = coal->stats_block_coalesce_usecs; + if (edev->stats_coal_usecs) { + edev->stats_coal_ticks = usecs_to_jiffies(edev->stats_coal_usecs); + schedule_delayed_work(&edev->periodic_task, 0); + + DP_INFO(edev, "Configured stats coal ticks=%lu jiffies\n", + edev->stats_coal_ticks); + } else { + cancel_delayed_work_sync(&edev->periodic_task); + } + } + if (!netif_running(dev)) { DP_INFO(edev, "Interface is down\n"); return -EINVAL; @@ -2254,7 +2272,8 @@ static int qede_get_per_coalesce(struct net_device *dev, } static const struct ethtool_ops qede_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_STATS_BLOCK_USECS, .get_link_ksettings = qede_get_link_ksettings, .set_link_ksettings = qede_set_link_ksettings, .get_drvinfo = qede_get_drvinfo, @@ -2305,7 +2324,8 @@ static const struct ethtool_ops qede_ethtool_ops = { }; static const struct ethtool_ops qede_vf_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_STATS_BLOCK_USECS, .get_link_ksettings = qede_get_link_ksettings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index fd591ce22c42a..232e7c359f349 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -312,6 +312,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->ops->get_vport_stats(edev->cdev, &stats); + spin_lock(&edev->stats_lock); + p_common->no_buff_discards = stats.common.no_buff_discards; p_common->packet_too_big_discard = stats.common.packet_too_big_discard; p_common->ttl0_discard = stats.common.ttl0_discard; @@ -409,6 +411,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) p_ah->tx_1519_to_max_byte_packets = stats.ah.tx_1519_to_max_byte_packets; } + + spin_unlock(&edev->stats_lock); } static void qede_get_stats64(struct net_device *dev, @@ -417,9 +421,10 @@ static void qede_get_stats64(struct net_device *dev, struct qede_dev *edev = netdev_priv(dev); struct qede_stats_common *p_common; - qede_fill_by_demand_stats(edev); p_common = &edev->stats.common; + spin_lock(&edev->stats_lock); + stats->rx_packets = p_common->rx_ucast_pkts + p_common->rx_mcast_pkts + p_common->rx_bcast_pkts; stats->tx_packets = p_common->tx_ucast_pkts + p_common->tx_mcast_pkts + @@ -439,6 +444,8 @@ static void qede_get_stats64(struct net_device *dev, stats->collisions = edev->stats.bb.tx_total_collisions; stats->rx_crc_errors = p_common->rx_crc_errors; stats->rx_frame_errors = p_common->rx_align_errors; + + spin_unlock(&edev->stats_lock); } #ifdef CONFIG_QED_SRIOV @@ -1071,6 +1078,23 @@ static void qede_unlock(struct qede_dev *edev) rtnl_unlock(); } +static void qede_periodic_task(struct work_struct *work) +{ + struct qede_dev *edev = container_of(work, struct qede_dev, + periodic_task.work); + + qede_fill_by_demand_stats(edev); + schedule_delayed_work(&edev->periodic_task, edev->stats_coal_ticks); +} + +static void qede_init_periodic_task(struct qede_dev *edev) +{ + INIT_DELAYED_WORK(&edev->periodic_task, qede_periodic_task); + spin_lock_init(&edev->stats_lock); + edev->stats_coal_usecs = USEC_PER_SEC; + edev->stats_coal_ticks = usecs_to_jiffies(USEC_PER_SEC); +} + static void qede_sp_task(struct work_struct *work) { struct qede_dev *edev = container_of(work, struct qede_dev, @@ -1090,6 +1114,7 @@ static void qede_sp_task(struct work_struct *work) */ if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) { + cancel_delayed_work_sync(&edev->periodic_task); #ifdef CONFIG_QED_SRIOV /* SRIOV must be disabled outside the lock to avoid a deadlock. * The recovery of the active VFs is currently not supported. @@ -1284,6 +1309,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, */ INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); mutex_init(&edev->qede_lock); + qede_init_periodic_task(edev); rc = register_netdev(edev->ndev); if (rc) { @@ -1308,6 +1334,11 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, edev->rx_copybreak = QEDE_RX_HDR_SIZE; qede_log_probe(edev); + + /* retain user config (for example - after recovery) */ + if (edev->stats_coal_usecs) + schedule_delayed_work(&edev->periodic_task, 0); + return 0; err4: @@ -1376,6 +1407,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); + cancel_delayed_work_sync(&edev->periodic_task); edev->ops->common->set_power_state(cdev, PCI_D0); diff --git a/fs/inode.c b/fs/inode.c index 024853e8ceb1e..983fbc0a7acf4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -168,8 +168,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->rh_reserved2 = 0; - if (security_inode_alloc(inode)) - goto out; spin_lock_init(&inode->i_lock); lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); @@ -200,11 +198,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif inode->i_flctx = NULL; + + if (unlikely(security_inode_alloc(inode))) + return -ENOMEM; this_cpu_inc(nr_inodes); return 0; -out: - return -ENOMEM; } EXPORT_SYMBOL(inode_init_always); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index ef6e7ab297f0b..1b67524746a61 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -41,7 +41,9 @@ struct fanotify_perm_event { struct fanotify_event fae; u32 response; /* userspace answer to the event */ unsigned short state; /* state of the event */ + unsigned short watchdog_cnt; /* already scanned by watchdog? */ int fd; /* fd we passed to userspace for this event */ + pid_t recv_pid; /* pid of task receiving the event */ union { struct fanotify_response_info_header hdr; struct fanotify_response_info_audit_rule audit_rule; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c2aa95174fdd2..26d87e2267042 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -28,6 +28,117 @@ #define FANOTIFY_DEFAULT_MAX_MARKS 8192 #define FANOTIFY_DEFAULT_MAX_LISTENERS 128 +static int perm_group_timeout __read_mostly; + +#ifdef CONFIG_SYSCTL + +#include + +static struct ctl_table fanotify_table[] = { + { + .procname = "watchdog_timeout", + .data = &perm_group_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, + { } +}; + +static void __init fanotify_sysctls_init(void) +{ + register_sysctl("fs/fanotify", fanotify_table); +} +#else +#define fanotify_sysctls_init() do { } while (0) +#endif /* CONFIG_SYSCTL */ + +static LIST_HEAD(perm_group_list); +static DEFINE_SPINLOCK(perm_group_lock); +static void perm_group_watchdog(struct work_struct *work); +static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog); + +static void perm_group_watchdog_schedule(void) +{ + schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout)); +} + +static void perm_group_watchdog(struct work_struct *work) +{ + struct fsnotify_group *group; + struct fanotify_perm_event *event; + struct task_struct *task; + pid_t failed_pid = 0; + + guard(spinlock)(&perm_group_lock); + if (list_empty(&perm_group_list)) + return; + + list_for_each_entry(group, &perm_group_list, + fanotify_data.perm_grp_list) { + /* + * Ok to test without lock, racing with an addition is + * fine, will deal with it next round + */ + if (list_empty(&group->fanotify_data.access_list)) + continue; + + spin_lock(&group->notification_lock); + list_for_each_entry(event, &group->fanotify_data.access_list, + fae.fse.list) { + if (likely(event->watchdog_cnt == 0)) { + event->watchdog_cnt = 1; + } else if (event->watchdog_cnt == 1) { + /* Report on event only once */ + event->watchdog_cnt = 2; + + /* Do not report same pid repeatedly */ + if (event->recv_pid == failed_pid) + continue; + + failed_pid = event->recv_pid; + rcu_read_lock(); + task = find_task_by_pid_ns(event->recv_pid, + &init_pid_ns); + pr_warn_ratelimited( + "PID %u (%s) failed to respond to fanotify queue for more than %d seconds\n", + event->recv_pid, + task ? task->comm : NULL, + perm_group_timeout); + rcu_read_unlock(); + } + } + spin_unlock(&group->notification_lock); + } + perm_group_watchdog_schedule(); +} + +static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group) +{ + if (!list_empty(&group->fanotify_data.perm_grp_list)) { + /* Perm event watchdog can no longer scan this group. */ + spin_lock(&perm_group_lock); + list_del_init(&group->fanotify_data.perm_grp_list); + spin_unlock(&perm_group_lock); + } +} + +static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group) +{ + if (!perm_group_timeout) + return; + + spin_lock(&perm_group_lock); + if (list_empty(&group->fanotify_data.perm_grp_list)) { + /* Add to perm_group_list for monitoring by watchdog. */ + if (list_empty(&perm_group_list)) + perm_group_watchdog_schedule(); + list_add_tail(&group->fanotify_data.perm_grp_list, &perm_group_list); + } + spin_unlock(&perm_group_lock); +} + /* * All flags that may be specified in parameter event_f_flags of fanotify_init. * @@ -375,6 +486,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, spin_lock(&group->notification_lock); list_add_tail(&kevent->list, &group->fanotify_data.access_list); + FANOTIFY_PE(kevent)->recv_pid = current->pid; spin_unlock(&group->notification_lock); } } @@ -435,6 +547,8 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ fsnotify_group_stop_queueing(group); + fanotify_perm_watchdog_group_remove(group); + /* * Process all permission events on access_list and notification queue * and simulate reply from userspace. @@ -698,6 +812,10 @@ static int fanotify_add_mark(struct fsnotify_group *group, mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); + + if (mask & FANOTIFY_PERM_EVENTS) + fanotify_perm_watchdog_group_add(group); + return 0; } @@ -806,6 +924,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.f_flags = event_f_flags; init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); + INIT_LIST_HEAD(&group->fanotify_data.perm_grp_list); switch (flags & FANOTIFY_CLASS_BITS) { case FAN_CLASS_NOTIF: group->priority = FS_PRIO_0; @@ -1015,6 +1134,7 @@ static int __init fanotify_user_setup(void) fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } + fanotify_sysctls_init(); return 0; } diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index dac8a547b048e..62b170a34d0e1 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -16,6 +16,7 @@ #define _LINUX_BITFIELD_H #include +#include #include /* @@ -43,8 +44,7 @@ * FIELD_PREP(REG_FIELD_D, 0x40); * * Modify: - * reg &= ~REG_FIELD_C; - * reg |= FIELD_PREP(REG_FIELD_C, c); + * FIELD_MODIFY(REG_FIELD_C, ®, c); */ #define __bf_shf(x) (__builtin_ffsll(x) - 1) @@ -117,6 +117,23 @@ (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ }) +/** + * FIELD_MODIFY() - modify a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg_p: pointer to the memory that should be updated + * @_val: value to store in the bitfield + * + * FIELD_MODIFY() modifies the set of bits in @_reg_p specified by @_mask, + * by replacing them with the bitfield value passed in as @_val. + */ +#define FIELD_MODIFY(_mask, _reg_p, _val) \ + ({ \ + typecheck_pointer(_reg_p); \ + __BF_FIELD_CHECK(_mask, *(_reg_p), _val, "FIELD_MODIFY: "); \ + *(_reg_p) &= ~(_mask); \ + *(_reg_p) |= (((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask)); \ + }) + extern void __compiletime_error("value doesn't fit into mask") __field_overflow(void); extern void __compiletime_error("bad bitfield mask") diff --git a/include/linux/bitops.h b/include/linux/bitops.h index fb58ab0e5b38d..0efbba9d3980d 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,6 +4,7 @@ #include #include +#include #include @@ -255,6 +256,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, __clear_bit(nr, addr); } +/** + * __ptr_set_bit - Set bit in a pointer's value + * @nr: the bit to set + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * __ptr_set_bit(bit, &p); + */ +#define __ptr_set_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + __set_bit(nr, (unsigned long *)(addr)); \ + }) + +/** + * __ptr_clear_bit - Clear bit in a pointer's value + * @nr: the bit to clear + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * __ptr_clear_bit(bit, &p); + */ +#define __ptr_clear_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + __clear_bit(nr, (unsigned long *)(addr)); \ + }) + +/** + * __ptr_test_bit - Test bit in a pointer's value + * @nr: the bit to test + * @addr: the address of the pointer variable + * + * Example: + * void *p = foo(); + * if (__ptr_test_bit(bit, &p)) { + * ... + * } else { + * ... + * } + */ +#define __ptr_test_bit(nr, addr) \ + ({ \ + typecheck_pointer(*(addr)); \ + test_bit(nr, (unsigned long *)(addr)); \ + }) + #ifdef __KERNEL__ #ifndef set_mask_bits diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 1e897b9cf3a68..7c2a0e5ab7789 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -76,6 +76,7 @@ extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0c8372f208199..55d3a6dccfdad 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -203,6 +203,8 @@ struct fsnotify_group { int f_flags; /* event_f_flags from fanotify_init() */ unsigned int max_marks; struct user_struct *user; + /* chained on perm_group_list */ + struct list_head perm_grp_list; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 4b9262761eac6..8749f03b07e0e 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -371,6 +371,19 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) } } +/** + * secs_to_jiffies: - convert seconds to jiffies + * @_secs: time in seconds + * + * Conversion is done by simple multiplication with HZ + * + * secs_to_jiffies() is defined as a macro rather than a static inline + * function so it can be used in static initializers. + * + * Return: jiffies value + */ +#define secs_to_jiffies(_secs) (unsigned long)((_secs) * HZ) + extern unsigned long __usecs_to_jiffies(const unsigned int u); #if !(USEC_PER_SEC % HZ) static inline unsigned long _usecs_to_jiffies(const unsigned int u) diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h index 20d310331eb51..46b15e2aaefb4 100644 --- a/include/linux/typecheck.h +++ b/include/linux/typecheck.h @@ -22,4 +22,13 @@ (void)__tmp; \ }) +/* + * Check at compile time that something is a pointer type. + */ +#define typecheck_pointer(x) \ +({ typeof(x) __dummy; \ + (void)sizeof(*__dummy); \ + 1; \ +}) + #endif /* TYPECHECK_H_INCLUDED */ diff --git a/mm/zswap.c b/mm/zswap.c index b9881c118529e..093a13fed33d5 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -936,6 +936,24 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) goto fail; case ZSWAP_SWAPCACHE_NEW: /* page is locked */ + /* + * Having a local reference to the zswap entry doesn't exclude + * swapping from invalidating and recycling the swap slot. Once + * the swapcache is secured against concurrent swapping to and + * from the slot, recheck that the entry is still current before + * writing. + */ + spin_lock(&tree->lock); + if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) { + spin_unlock(&tree->lock); + delete_from_swap_cache(page); + unlock_page(page); + put_page(page); + ret = -ENOMEM; + goto fail; + } + spin_unlock(&tree->lock); + /* decompress */ dlen = PAGE_SIZE; src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7a9451aafdd0f..b6a91d0b0c05b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -43,8 +43,6 @@ #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" -#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000) - /* Handle HCI Event packets */ static void *hci_ev_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,