summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2025-07-30 13:56:09 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2025-07-30 13:56:09 -0400
commit196d9e72c4b0bd68b74a4ec7f52d248f37d0f030 (patch)
tree51293a46fba8b9b7283781ada645b8a2ae4c3f68
parent6836e1f30fe90e4c19f6a3749e97ba1e44a840ef (diff)
parent57d88f02eb4449d96dfee3af4b7cd4287998bdbd (diff)
Merge tag 'kvm-s390-next-6.17-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
RCU wakeup fix for KVM s390 guest entry
-rw-r--r--arch/s390/include/asm/entry-common.h10
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/kvm/kvm-s390.c51
-rw-r--r--arch/s390/kvm/vsie.c17
-rw-r--r--include/linux/entry-common.h16
-rw-r--r--kernel/entry/common.c3
6 files changed, 77 insertions, 23 deletions
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 35555c944630..979af986a8fe 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -59,4 +59,14 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+static __always_inline bool arch_in_rcu_eqs(void)
+{
+ if (IS_ENABLED(CONFIG_KVM))
+ return current->flags & PF_VCPU;
+
+ return false;
+}
+
+#define arch_in_rcu_eqs arch_in_rcu_eqs
+
#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index cb89e54ada25..f870d09515cc 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -716,6 +716,9 @@ extern char sie_exit;
bool kvm_s390_pv_is_protected(struct kvm *kvm);
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
+ u64 *gprs, unsigned long gasce);
+
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d5ad10791c25..bfe9ba5c4f45 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5062,6 +5062,30 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
return vcpu_post_run_handle_fault(vcpu);
}
+int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
+ u64 *gprs, unsigned long gasce)
+{
+ int ret;
+
+ guest_state_enter_irqoff();
+
+ /*
+ * The guest_state_{enter,exit}_irqoff() functions inform lockdep and
+ * tracing that entry to the guest will enable host IRQs, and exit from
+ * the guest will disable host IRQs.
+ *
+ * We must not use lockdep/tracing/RCU in this critical section, so we
+ * use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
+ */
+ arch_local_irq_enable();
+ ret = sie64a(scb, gprs, gasce);
+ arch_local_irq_disable();
+
+ guest_state_exit_irqoff();
+
+ return ret;
+}
+
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
@@ -5082,20 +5106,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
kvm_vcpu_srcu_read_unlock(vcpu);
/*
* As PF_VCPU will be used in fault handler, between
- * guest_enter and guest_exit should be no uaccess.
+ * guest_timing_enter_irqoff and guest_timing_exit_irqoff
+ * should be no uaccess.
*/
- local_irq_disable();
- guest_enter_irqoff();
- __disable_cpu_timer_accounting(vcpu);
- local_irq_enable();
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(sie_page->pv_grregs,
vcpu->run->s.regs.gprs,
sizeof(sie_page->pv_grregs));
}
- exit_reason = sie64a(vcpu->arch.sie_block,
- vcpu->run->s.regs.gprs,
- vcpu->arch.gmap->asce);
+
+ local_irq_disable();
+ guest_timing_enter_irqoff();
+ __disable_cpu_timer_accounting(vcpu);
+
+ exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs,
+ vcpu->arch.gmap->asce);
+
+ __enable_cpu_timer_accounting(vcpu);
+ guest_timing_exit_irqoff();
+ local_irq_enable();
+
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(vcpu->run->s.regs.gprs,
sie_page->pv_grregs,
@@ -5111,10 +5142,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
}
}
- local_irq_disable();
- __enable_cpu_timer_accounting(vcpu);
- guest_exit_irqoff();
- local_irq_enable();
kvm_vcpu_srcu_read_lock(vcpu);
rc = vcpu_post_run(vcpu, exit_reason);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 13a9661d2b28..347268f89f2f 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1170,10 +1170,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->arch.sie_block->fpf & FPF_BPBC)
set_thread_flag(TIF_ISOLATE_BP_GUEST);
- local_irq_disable();
- guest_enter_irqoff();
- local_irq_enable();
-
/*
* Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
* and VCPU requests also hinder the vSIE from running and lead
@@ -1183,15 +1179,16 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
current->thread.gmap_int_code = 0;
barrier();
- if (!kvm_s390_vcpu_sie_inhibited(vcpu))
- rc = sie64a(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
+ if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
+ local_irq_disable();
+ guest_timing_enter_irqoff();
+ rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
+ guest_timing_exit_irqoff();
+ local_irq_enable();
+ }
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
- local_irq_disable();
- guest_exit_irqoff();
- local_irq_enable();
-
/* restore guest state for bp isolation override */
if (!guest_bp_isolation)
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index f94f3fdf15fc..3bf99cbad8a3 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -87,6 +87,22 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
#endif
/**
+ * arch_in_rcu_eqs - Architecture specific check for RCU extended quiescent
+ * states.
+ *
+ * Returns: true if the CPU is potentially in an RCU EQS, false otherwise.
+ *
+ * Architectures only need to define this if threads other than the idle thread
+ * may have an interruptible EQS. This does not need to handle idle threads. It
+ * is safe to over-estimate at the cost of redundant RCU management work.
+ *
+ * Invoked from irqentry_enter()
+ */
+#ifndef arch_in_rcu_eqs
+static __always_inline bool arch_in_rcu_eqs(void) { return false; }
+#endif
+
+/**
* enter_from_user_mode - Establish state when coming from user mode
*
* Syscall/interrupt entry disables interrupts, but user mode is traced as
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index a8dd1f27417c..eb52d38e8099 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -220,7 +220,8 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
* TINY_RCU does not support EQS, so let the compiler eliminate
* this part when enabled.
*/
- if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
+ if (!IS_ENABLED(CONFIG_TINY_RCU) &&
+ (is_idle_task(current) || arch_in_rcu_eqs())) {
/*
* If RCU is not watching then the same careful
* sequence vs. lockdep and tracing is required