@@ -2578,6 +2578,34 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
25782578 return ctl_opt & allowed ;
25792579}
25802580
2581+ #define vmx_check_entry_exit_pairs (pairs , entry_controls , exit_controls ) \
2582+ ({ \
2583+ int i, r = 0; \
2584+ \
2585+ BUILD_BUG_ON(sizeof(pairs[0].entry_control) != sizeof(entry_controls)); \
2586+ BUILD_BUG_ON(sizeof(pairs[0].exit_control) != sizeof(exit_controls)); \
2587+ \
2588+ for (i = 0; i < ARRAY_SIZE(pairs); i++) { \
2589+ typeof(entry_controls) n_ctrl = pairs[i].entry_control; \
2590+ typeof(exit_controls) x_ctrl = pairs[i].exit_control; \
2591+ \
2592+ if (!(entry_controls & n_ctrl) == !(exit_controls & x_ctrl)) \
2593+ continue; \
2594+ \
2595+ pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, " \
2596+ "entry = %llx (%llx), exit = %llx (%llx)\n", \
2597+ (u64)(entry_controls & n_ctrl), (u64)n_ctrl, \
2598+ (u64)(exit_controls & x_ctrl), (u64)x_ctrl); \
2599+ \
2600+ if (error_on_inconsistent_vmcs_config) \
2601+ r = -EIO; \
2602+ \
2603+ entry_controls &= ~n_ctrl; \
2604+ exit_controls &= ~x_ctrl; \
2605+ } \
2606+ r; \
2607+ })
2608+
25812609static int setup_vmcs_config (struct vmcs_config * vmcs_conf ,
25822610 struct vmx_capability * vmx_cap )
25832611{
@@ -2589,7 +2617,6 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
25892617 u32 _vmentry_control = 0 ;
25902618 u64 basic_msr ;
25912619 u64 misc_msr ;
2592- int i ;
25932620
25942621 /*
25952622 * LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
@@ -2693,22 +2720,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
26932720 & _vmentry_control ))
26942721 return - EIO ;
26952722
2696- for (i = 0 ; i < ARRAY_SIZE (vmcs_entry_exit_pairs ); i ++ ) {
2697- u32 n_ctrl = vmcs_entry_exit_pairs [i ].entry_control ;
2698- u32 x_ctrl = vmcs_entry_exit_pairs [i ].exit_control ;
2699-
2700- if (!(_vmentry_control & n_ctrl ) == !(_vmexit_control & x_ctrl ))
2701- continue ;
2702-
2703- pr_warn_once ("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n" ,
2704- _vmentry_control & n_ctrl , _vmexit_control & x_ctrl );
2705-
2706- if (error_on_inconsistent_vmcs_config )
2707- return - EIO ;
2708-
2709- _vmentry_control &= ~n_ctrl ;
2710- _vmexit_control &= ~x_ctrl ;
2711- }
2723+ if (vmx_check_entry_exit_pairs (vmcs_entry_exit_pairs ,
2724+ _vmentry_control , _vmexit_control ))
2725+ return - EIO ;
27122726
27132727 /*
27142728 * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
@@ -5211,6 +5225,12 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
52115225 (kvm_get_rflags (vcpu ) & X86_EFLAGS_AC );
52125226}
52135227
5228+ static bool is_xfd_nm_fault (struct kvm_vcpu * vcpu )
5229+ {
5230+ return vcpu -> arch .guest_fpu .fpstate -> xfd &&
5231+ !kvm_is_cr0_bit_set (vcpu , X86_CR0_TS );
5232+ }
5233+
52145234static int handle_exception_nmi (struct kvm_vcpu * vcpu )
52155235{
52165236 struct vcpu_vmx * vmx = to_vmx (vcpu );
@@ -5237,7 +5257,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
52375257 * point.
52385258 */
52395259 if (is_nm_fault (intr_info )) {
5240- kvm_queue_exception (vcpu , NM_VECTOR );
5260+ kvm_queue_exception_p (vcpu , NM_VECTOR ,
5261+ is_xfd_nm_fault (vcpu ) ? vcpu -> arch .guest_fpu .xfd_err : 0 );
52415262 return 1 ;
52425263 }
52435264
@@ -5817,7 +5838,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
58175838 error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR )
58185839 ? PFERR_FETCH_MASK : 0 ;
58195840 /* ept page table entry is present? */
5820- error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK )
5841+ error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK )
58215842 ? PFERR_PRESENT_MASK : 0 ;
58225843
58235844 if (error_code & EPT_VIOLATION_GVA_IS_VALID )
@@ -5871,11 +5892,35 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
58715892 return 1 ;
58725893}
58735894
5874- static bool vmx_emulation_required_with_pending_exception (struct kvm_vcpu * vcpu )
5895+ /*
5896+ * Returns true if emulation is required (due to the vCPU having invalid state
5897+ * with unsrestricted guest mode disabled) and KVM can't faithfully emulate the
5898+ * current vCPU state.
5899+ */
5900+ static bool vmx_unhandleable_emulation_required (struct kvm_vcpu * vcpu )
58755901{
58765902 struct vcpu_vmx * vmx = to_vmx (vcpu );
58775903
5878- return vmx -> emulation_required && !vmx -> rmode .vm86_active &&
5904+ if (!vmx -> emulation_required )
5905+ return false;
5906+
5907+ /*
5908+ * It is architecturally impossible for emulation to be required when a
5909+ * nested VM-Enter is pending completion, as VM-Enter will VM-Fail if
5910+ * guest state is invalid and unrestricted guest is disabled, i.e. KVM
5911+ * should synthesize VM-Fail instead emulation L2 code. This path is
5912+ * only reachable if userspace modifies L2 guest state after KVM has
5913+ * performed the nested VM-Enter consistency checks.
5914+ */
5915+ if (vmx -> nested .nested_run_pending )
5916+ return true;
5917+
5918+ /*
5919+ * KVM only supports emulating exceptions if the vCPU is in Real Mode.
5920+ * If emulation is required, KVM can't perform a successful VM-Enter to
5921+ * inject the exception.
5922+ */
5923+ return !vmx -> rmode .vm86_active &&
58795924 (kvm_is_exception_pending (vcpu ) || vcpu -> arch .exception .injected );
58805925}
58815926
@@ -5898,7 +5943,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
58985943 if (!kvm_emulate_instruction (vcpu , 0 ))
58995944 return 0 ;
59005945
5901- if (vmx_emulation_required_with_pending_exception (vcpu )) {
5946+ if (vmx_unhandleable_emulation_required (vcpu )) {
59025947 kvm_prepare_emulation_failure_exit (vcpu );
59035948 return 0 ;
59045949 }
@@ -5922,7 +5967,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
59225967
59235968int vmx_vcpu_pre_run (struct kvm_vcpu * vcpu )
59245969{
5925- if (vmx_emulation_required_with_pending_exception (vcpu )) {
5970+ if (vmx_unhandleable_emulation_required (vcpu )) {
59265971 kvm_prepare_emulation_failure_exit (vcpu );
59275972 return 0 ;
59285973 }
@@ -6997,16 +7042,15 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
69977042 * MSR value is not clobbered by the host activity before the guest
69987043 * has chance to consume it.
69997044 *
7000- * Do not blindly read xfd_err here, since this exception might
7001- * be caused by L1 interception on a platform which doesn't
7002- * support xfd at all.
7003- *
7004- * Do it conditionally upon guest_fpu::xfd. xfd_err matters
7005- * only when xfd contains a non-zero value.
7045+ * Update the guest's XFD_ERR if and only if XFD is enabled, as the #NM
7046+ * interception may have been caused by L1 interception. Per the SDM,
7047+ * XFD_ERR is not modified for non-XFD #NM, i.e. if CR0.TS=1.
70067048 *
7007- * Queuing exception is done in vmx_handle_exit. See comment there.
7049+ * Note, XFD_ERR is updated _before_ the #NM interception check, i.e.
7050+ * unlike CR2 and DR6, the value is not a payload that is attached to
7051+ * the #NM exception.
70087052 */
7009- if (vcpu -> arch . guest_fpu . fpstate -> xfd )
7053+ if (is_xfd_nm_fault ( vcpu ) )
70107054 rdmsrl (MSR_IA32_XFD_ERR , vcpu -> arch .guest_fpu .xfd_err );
70117055}
70127056
0 commit comments