Skip to content

Commit d99e990

Browse files
committed
KVM: s390: Rework guest entry logic
JIRA: https://issues.redhat.com/browse/RHEL-113440 commit 57d88f0 Author: Mark Rutland <mark.rutland@arm.com> Date: Tue Jul 8 19:27:42 2025 +1000 KVM: s390: Rework guest entry logic In __vcpu_run() and do_vsie_run(), we enter an RCU extended quiescent state (EQS) by calling guest_enter_irqoff(), which lasts until __vcpu_run() calls guest_exit_irqoff(). However, between the two we enable interrupts and may handle interrupts during the EQS. As the IRQ entry code will not wake RCU in this case, we may run the core IRQ code and IRQ handler without RCU watching, leading to various potential problems. It is necessary to unmask (host) interrupts around entering the guest, as entering the guest via SIE will not automatically unmask these. When a host interrupt is taken from a guest, it is taken via its regular host IRQ handler rather than being treated as a direct exit from SIE. Due to this, we cannot simply mask interrupts around guest entry, and must handle interrupts during this window, waking RCU as required. Additionally, between guest_enter_irqoff() and guest_exit_irqoff(), we use local_irq_enable() and local_irq_disable() to unmask interrupts, violating the ordering requirements for RCU/lockdep/tracing around entry/exit sequences. Further, since this occurs in an instrumentable function, it's possible that instrumented code runs during this window, with potential usage of RCU, etc. To fix the RCU wakeup problem, an s390 implementation of arch_in_rcu_eqs() is added which checks for PF_VCPU in current->flags. PF_VCPU is set/cleared by guest_timing_{enter,exit}_irqoff(), which surround the actual guest entry. To fix the remaining issues, the lower-level guest entry logic is moved into a shared noinstr helper function using the guest_state_{enter,exit}_irqoff() helpers. These perform all the lockdep/RCU/tracing manipulation necessary, but as sie64a() does not enable/disable interrupts, we must do this explicitly with the non-instrumented arch_local_irq_{enable,disable}() helpers: guest_state_enter_irqoff() arch_local_irq_enable(); sie64a(...); arch_local_irq_disable(); guest_state_exit_irqoff(); [ajd@linux.ibm.com: rebase, fix commit message] Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Frederic Weisbecker <frederic@kernel.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Janosch Frank <frankja@linux.ibm.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Claudio Imbrenda <imbrenda@linux.ibm.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com> Reviewed-by: Janosch Frank <frankja@linux.ibm.com> Link: https://lore.kernel.org/r/20250708092742.104309-3-ajd@linux.ibm.com Signed-off-by: Janosch Frank <frankja@linux.ibm.com> Message-ID: <20250708092742.104309-3-ajd@linux.ibm.com> Signed-off-by: Thomas Huth <thuth@redhat.com>
1 parent e4ea10e commit d99e990

File tree

4 files changed

+59
-22
lines changed

4 files changed

+59
-22
lines changed

arch/s390/include/asm/entry-common.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,14 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
5959

6060
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
6161

62+
static __always_inline bool arch_in_rcu_eqs(void)
63+
{
64+
if (IS_ENABLED(CONFIG_KVM))
65+
return current->flags & PF_VCPU;
66+
67+
return false;
68+
}
69+
70+
#define arch_in_rcu_eqs arch_in_rcu_eqs
71+
6272
#endif

arch/s390/include/asm/kvm_host.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,9 @@ extern char sie_exit;
716716
bool kvm_s390_pv_is_protected(struct kvm *kvm);
717717
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
718718

719+
extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
720+
u64 *gprs, unsigned long gasce);
721+
719722
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
720723
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
721724

arch/s390/kvm/kvm-s390.c

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5060,6 +5060,30 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
50605060
return vcpu_post_run_handle_fault(vcpu);
50615061
}
50625062

5063+
int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
5064+
u64 *gprs, unsigned long gasce)
5065+
{
5066+
int ret;
5067+
5068+
guest_state_enter_irqoff();
5069+
5070+
/*
5071+
* The guest_state_{enter,exit}_irqoff() functions inform lockdep and
5072+
* tracing that entry to the guest will enable host IRQs, and exit from
5073+
* the guest will disable host IRQs.
5074+
*
5075+
* We must not use lockdep/tracing/RCU in this critical section, so we
5076+
* use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
5077+
*/
5078+
arch_local_irq_enable();
5079+
ret = sie64a(scb, gprs, gasce);
5080+
arch_local_irq_disable();
5081+
5082+
guest_state_exit_irqoff();
5083+
5084+
return ret;
5085+
}
5086+
50635087
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
50645088
static int __vcpu_run(struct kvm_vcpu *vcpu)
50655089
{
@@ -5080,20 +5104,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
50805104
kvm_vcpu_srcu_read_unlock(vcpu);
50815105
/*
50825106
* As PF_VCPU will be used in fault handler, between
5083-
* guest_enter and guest_exit should be no uaccess.
5107+
* guest_timing_enter_irqoff and guest_timing_exit_irqoff
5108+
* should be no uaccess.
50845109
*/
5085-
local_irq_disable();
5086-
guest_enter_irqoff();
5087-
__disable_cpu_timer_accounting(vcpu);
5088-
local_irq_enable();
50895110
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
50905111
memcpy(sie_page->pv_grregs,
50915112
vcpu->run->s.regs.gprs,
50925113
sizeof(sie_page->pv_grregs));
50935114
}
5094-
exit_reason = sie64a(vcpu->arch.sie_block,
5095-
vcpu->run->s.regs.gprs,
5096-
vcpu->arch.gmap->asce);
5115+
5116+
local_irq_disable();
5117+
guest_timing_enter_irqoff();
5118+
__disable_cpu_timer_accounting(vcpu);
5119+
5120+
exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
5121+
vcpu->run->s.regs.gprs,
5122+
vcpu->arch.gmap->asce);
5123+
5124+
__enable_cpu_timer_accounting(vcpu);
5125+
guest_timing_exit_irqoff();
5126+
local_irq_enable();
5127+
50975128
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
50985129
memcpy(vcpu->run->s.regs.gprs,
50995130
sie_page->pv_grregs,
@@ -5109,10 +5140,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
51095140
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
51105141
}
51115142
}
5112-
local_irq_disable();
5113-
__enable_cpu_timer_accounting(vcpu);
5114-
guest_exit_irqoff();
5115-
local_irq_enable();
51165143
kvm_vcpu_srcu_read_lock(vcpu);
51175144

51185145
rc = vcpu_post_run(vcpu, exit_reason);

arch/s390/kvm/vsie.c

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,10 +1170,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
11701170
vcpu->arch.sie_block->fpf & FPF_BPBC)
11711171
set_thread_flag(TIF_ISOLATE_BP_GUEST);
11721172

1173-
local_irq_disable();
1174-
guest_enter_irqoff();
1175-
local_irq_enable();
1176-
11771173
/*
11781174
* Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
11791175
* and VCPU requests also hinder the vSIE from running and lead
@@ -1183,15 +1179,16 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
11831179
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
11841180
current->thread.gmap_int_code = 0;
11851181
barrier();
1186-
if (!kvm_s390_vcpu_sie_inhibited(vcpu))
1187-
rc = sie64a(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
1182+
if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
1183+
local_irq_disable();
1184+
guest_timing_enter_irqoff();
1185+
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
1186+
guest_timing_exit_irqoff();
1187+
local_irq_enable();
1188+
}
11881189
barrier();
11891190
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
11901191

1191-
local_irq_disable();
1192-
guest_exit_irqoff();
1193-
local_irq_enable();
1194-
11951192
/* restore guest state for bp isolation override */
11961193
if (!guest_bp_isolation)
11971194
clear_thread_flag(TIF_ISOLATE_BP_GUEST);

0 commit comments

Comments
 (0)