Skip to content

Commit fef417b

Browse files
author
Maxim Levitsky
committed
KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while running the guest
JIRA: https://issues.redhat.com/browse/RHEL-47242 commit 6b1dd26 Author: Maxim Levitsky <mlevitsk@redhat.com> Date: Tue Jun 10 16:20:10 2025 -0700 KVM: VMX: Preserve host's DEBUGCTLMSR_FREEZE_IN_SMM while running the guest Set/clear DEBUGCTLMSR_FREEZE_IN_SMM in GUEST_IA32_DEBUGCTL based on the host's pre-VM-Enter value, i.e. preserve the host's FREEZE_IN_SMM setting while running the guest. When running with the "default treatment of SMIs" in effect (the only mode KVM supports), SMIs do not generate a VM-Exit that is visible to host (non-SMM) software, and instead transitions directly from VMX non-root to SMM. And critically, DEBUGCTL isn't context switched by hardware on SMI or RSM, i.e. SMM will run with whatever value was resident in hardware at the time of the SMI. Failure to preserve FREEZE_IN_SMM results in the PMU unexpectedly counting events while the CPU is executing in SMM, which can pollute profiling and potentially leak information into the guest. Check for changes in FREEZE_IN_SMM prior to every entry into KVM's inner run loop, as the bit can be toggled in IRQ context via IPI callback (SMP function call), by way of /sys/devices/cpu/freeze_on_smi. Add a field in kvm_x86_ops to communicate which DEBUGCTL bits need to be preserved, as FREEZE_IN_SMM is only supported and defined for Intel CPUs, i.e. explicitly checking FREEZE_IN_SMM in common x86 is at best weird, and at worst could lead to undesirable behavior in the future if AMD CPUs ever happened to pick up a collision with the bit. Exempt TDX vCPUs, i.e. protected guests, from the check, as the TDX Module owns and controls GUEST_IA32_DEBUGCTL. WARN in SVM if KVM_RUN_LOAD_DEBUGCTL is set, mostly to document that the lack of handling isn't a KVM bug (TDX already WARNs on any run_flag). Lastly, explicitly reload GUEST_IA32_DEBUGCTL on a VM-Fail that is missed by KVM but detected by hardware, i.e. in nested_vmx_restore_host_state(). Doing so avoids the need to track host_debugctl on a per-VMCS basis, as GUEST_IA32_DEBUGCTL is unconditionally written by prepare_vmcs02() and load_vmcs12_host_state(). For the VM-Fail case, even though KVM won't have actually entered the guest, vcpu_enter_guest() will have run with vmcs02 active and thus could result in vmcs01 being run with a stale value. Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Co-developed-by: Sean Christopherson <seanjc@google.com> Link: https://lore.kernel.org/r/20250610232010.162191-9-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
1 parent 748ca18 commit fef417b

File tree

6 files changed

+41
-3
lines changed

6 files changed

+41
-3
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,6 +1675,7 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
16751675
enum kvm_x86_run_flags {
16761676
KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0),
16771677
KVM_RUN_LOAD_GUEST_DR6 = BIT(1),
1678+
KVM_RUN_LOAD_DEBUGCTL = BIT(2),
16781679
};
16791680

16801681
struct kvm_x86_ops {
@@ -1705,6 +1706,12 @@ struct kvm_x86_ops {
17051706
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
17061707
void (*vcpu_put)(struct kvm_vcpu *vcpu);
17071708

1709+
/*
1710+
* Mask of DEBUGCTL bits that are owned by the host, i.e. that need to
1711+
* match the host's value even while the guest is active.
1712+
*/
1713+
const u64 HOST_OWNED_DEBUGCTL;
1714+
17081715
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
17091716
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
17101717
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);

arch/x86/kvm/vmx/main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
915915
.vcpu_load = vt_op(vcpu_load),
916916
.vcpu_put = vt_op(vcpu_put),
917917

918+
.HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM,
919+
918920
.update_exception_bitmap = vt_op(update_exception_bitmap),
919921
.get_feature_msr = vmx_get_feature_msr,
920922
.get_msr = vt_op(get_msr),

arch/x86/kvm/vmx/nested.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4851,6 +4851,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
48514851
WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
48524852
}
48534853

4854+
/* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */
4855+
vmx_reload_guest_debugctl(vcpu);
4856+
48544857
/*
48554858
* Note that calling vmx_set_{efer,cr0,cr4} is important as they
48564859
* handle a variety of side effects to KVM's software model.

arch/x86/kvm/vmx/vmx.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7378,6 +7378,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
73787378
if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
73797379
set_debugreg(vcpu->arch.dr6, 6);
73807380

7381+
if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
7382+
vmx_reload_guest_debugctl(vcpu);
7383+
73817384
/*
73827385
* Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
73837386
* prior to VM-Enter, as the kernel may load a new ASID (PCID) any time

arch/x86/kvm/vmx/vmx.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -420,12 +420,25 @@ bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated)
420420

421421
static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val)
422422
{
423+
WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM);
424+
425+
val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM;
423426
vmcs_write64(GUEST_IA32_DEBUGCTL, val);
424427
}
425428

426429
static inline u64 vmx_guest_debugctl_read(void)
427430
{
428-
return vmcs_read64(GUEST_IA32_DEBUGCTL);
431+
return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM;
432+
}
433+
434+
static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu)
435+
{
436+
u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL);
437+
438+
if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM))
439+
return;
440+
441+
vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM);
429442
}
430443

431444
/*

arch/x86/kvm/x86.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10761,7 +10761,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
1076110761
dm_request_for_irq_injection(vcpu) &&
1076210762
kvm_cpu_accept_dm_intr(vcpu);
1076310763
fastpath_t exit_fastpath;
10764-
u64 run_flags;
10764+
u64 run_flags, debug_ctl;
1076510765

1076610766
bool req_immediate_exit = false;
1076710767

@@ -11033,7 +11033,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
1103311033
set_debugreg(0, 7);
1103411034
}
1103511035

11036-
vcpu->arch.host_debugctl = get_debugctlmsr();
11036+
/*
11037+
* Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL
11038+
* can be modified in IRQ context, e.g. via SMP function calls. Inform
11039+
* vendor code if any host-owned bits were changed, e.g. so that the
11040+
* value loaded into hardware while running the guest can be updated.
11041+
*/
11042+
debug_ctl = get_debugctlmsr();
11043+
if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL &&
11044+
!vcpu->arch.guest_state_protected)
11045+
run_flags |= KVM_RUN_LOAD_DEBUGCTL;
11046+
vcpu->arch.host_debugctl = debug_ctl;
1103711047

1103811048
guest_timing_enter_irqoff();
1103911049

0 commit comments

Comments
 (0)