summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2025-06-02 03:05:29 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2025-06-02 03:05:29 -0400
commit61374cc145f4a56377eaf87c7409a97ec7a34041 (patch)
treec067f28e1650e8a5022afccbc8074e72ee586867
parent438e22801b1958f86883812af70d402eda29c4f5 (diff)
parent4d62121ce9b58ea23c8d62207cbc604e98ecdc0a (diff)
Merge tag 'kvmarm-fixes-6.16-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 6.16, take #1 - Make the irqbypass hooks resilient to changes in the GSI<->MSI routing, avoiding behind stale vLPI mappings being left behind. The fix is to resolve the VGIC IRQ using the host IRQ (which is stable) and nuking the vLPI mapping upon a routing change. - Close another VGIC race where vCPU creation races with VGIC creation, leading to in-flight vCPUs entering the kernel w/o private IRQs allocated. - Fix a build issue triggered by the recently added workaround for Ampere's AC04_CPU_23 erratum. - Correctly sign-extend the VA when emulating a TLBI instruction potentially targeting a VNCR mapping. - Avoid dereferencing a NULL pointer in the VGIC debug code, which can happen if the device doesn't have any mapping yet.
-rw-r--r--arch/arm64/include/asm/sysreg.h1
-rw-r--r--arch/arm64/kvm/arm.c26
-rw-r--r--arch/arm64/kvm/nested.c6
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c27
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c48
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c92
-rw-r--r--include/kvm/arm_vgic.h3
8 files changed, 134 insertions, 74 deletions
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index cd853801a8f7b..f1bb0d10c39a3 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -12,6 +12,7 @@
#include <linux/bits.h>
#include <linux/stringify.h>
#include <linux/kasan-tags.h>
+#include <linux/kconfig.h>
#include <asm/gpr-num.h>
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 248e257e988b6..de2b4e9c9f9fb 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2747,6 +2747,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
&irqfd->irq_entry);
}
+
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
@@ -2757,8 +2758,29 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
if (irq_entry->type != KVM_IRQ_ROUTING_MSI)
return;
- kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
- &irqfd->irq_entry);
+ kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq);
+}
+
+bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
+ struct kvm_kernel_irq_routing_entry *new)
+{
+ if (new->type != KVM_IRQ_ROUTING_MSI)
+ return true;
+
+ return memcmp(&old->msi, &new->msi, sizeof(new->msi));
+}
+
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set)
+{
+ /*
+ * Remapping the vLPI requires taking the its_lock mutex to resolve
+ * the new translation. We're in spinlock land at this point, so no
+ * chance of resolving the translation.
+ *
+ * Unmap the vLPI and fall back to software LPI injection.
+ */
+ return kvm_vgic_v4_unset_forwarding(kvm, host_irq);
}
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 291dbe38eb5cf..4a53e4147fb01 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -918,6 +918,8 @@ static void invalidate_vncr_va(struct kvm *kvm,
}
}
+#define tlbi_va_s1_to_va(v) (u64)sign_extend64((v) << 12, 48)
+
static void compute_s1_tlbi_range(struct kvm_vcpu *vcpu, u32 inst, u64 val,
struct s1e2_tlbi_scope *scope)
{
@@ -964,7 +966,7 @@ static void compute_s1_tlbi_range(struct kvm_vcpu *vcpu, u32 inst, u64 val,
scope->size = ttl_to_size(FIELD_GET(TLBI_TTL_MASK, val));
if (!scope->size)
scope->size = SZ_1G;
- scope->va = (val << 12) & ~(scope->size - 1);
+ scope->va = tlbi_va_s1_to_va(val) & ~(scope->size - 1);
scope->asid = FIELD_GET(TLBIR_ASID_MASK, val);
break;
case OP_TLBI_ASIDE1:
@@ -992,7 +994,7 @@ static void compute_s1_tlbi_range(struct kvm_vcpu *vcpu, u32 inst, u64 val,
scope->size = ttl_to_size(FIELD_GET(TLBI_TTL_MASK, val));
if (!scope->size)
scope->size = SZ_1G;
- scope->va = (val << 12) & ~(scope->size - 1);
+ scope->va = tlbi_va_s1_to_va(val) & ~(scope->size - 1);
break;
case OP_TLBI_RVAE2:
case OP_TLBI_RVAE2IS:
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index f8425f381de97..2684f273d9e17 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -490,6 +490,9 @@ static int vgic_its_debug_show(struct seq_file *s, void *v)
struct its_device *dev = iter->dev;
struct its_ite *ite = iter->ite;
+ if (!ite)
+ return 0;
+
if (list_is_first(&ite->ite_list, &dev->itt_head)) {
seq_printf(s, "\n");
seq_printf(s, "Device ID: 0x%x, Event ID Range: [0 - %llu]\n",
@@ -498,7 +501,7 @@ static int vgic_its_debug_show(struct seq_file *s, void *v)
seq_printf(s, "-----------------------------------------------\n");
}
- if (ite && ite->irq && ite->collection) {
+ if (ite->irq && ite->collection) {
seq_printf(s, "%8u %8u %8u %8u %8u %2d\n",
ite->event_id, ite->irq->intid, ite->irq->hwintid,
ite->collection->target_addr,
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 6a426d403a6b3..eb1205654ac89 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -84,15 +84,40 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
- /* Must be held to avoid race with vCPU creation */
+ /*
+ * Ensure mutual exclusion with vCPU creation and any vCPU ioctls by:
+ *
+ * - Holding kvm->lock to prevent KVM_CREATE_VCPU from reaching
+ * kvm_arch_vcpu_precreate() and ensuring created_vcpus is stable.
+ * This alone is insufficient, as kvm_vm_ioctl_create_vcpu() drops
+ * the kvm->lock before completing the vCPU creation.
+ */
lockdep_assert_held(&kvm->lock);
+ /*
+ * - Acquiring the vCPU mutex for every *online* vCPU to prevent
+ * concurrent vCPU ioctls for vCPUs already visible to userspace.
+ */
ret = -EBUSY;
if (kvm_trylock_all_vcpus(kvm))
return ret;
+ /*
+ * - Taking the config_lock which protects VGIC data structures such
+ * as the per-vCPU arrays of private IRQs (SGIs, PPIs).
+ */
mutex_lock(&kvm->arch.config_lock);
+ /*
+ * - Bailing on the entire thing if a vCPU is in the middle of creation,
+ * dropped the kvm->lock, but hasn't reached kvm_arch_vcpu_create().
+ *
+ * The whole combination of this guarantees that no vCPU can get into
+ * KVM with a VGIC configuration inconsistent with the VM's VGIC.
+ */
+ if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
+ goto out_unlock;
+
if (irqchip_in_kernel(kvm)) {
ret = -EEXIST;
goto out_unlock;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 2eb3e023f66a8..534049c7c94b2 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -306,39 +306,34 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
}
}
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
-
if (irq->hw)
- return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
+ ret = its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
- return 0;
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ return ret;
}
static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
{
- int ret = 0;
- unsigned long flags;
+ struct its_vlpi_map map;
+ int ret;
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ guard(raw_spinlock_irqsave)(&irq->irq_lock);
irq->target_vcpu = vcpu;
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- if (irq->hw) {
- struct its_vlpi_map map;
-
- ret = its_get_vlpi(irq->host_irq, &map);
- if (ret)
- return ret;
+ if (!irq->hw)
+ return 0;
- if (map.vpe)
- atomic_dec(&map.vpe->vlpi_count);
- map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
- atomic_inc(&map.vpe->vlpi_count);
+ ret = its_get_vlpi(irq->host_irq, &map);
+ if (ret)
+ return ret;
- ret = its_map_vlpi(irq->host_irq, &map);
- }
+ if (map.vpe)
+ atomic_dec(&map.vpe->vlpi_count);
- return ret;
+ map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ atomic_inc(&map.vpe->vlpi_count);
+ return its_map_vlpi(irq->host_irq, &map);
}
static struct kvm_vcpu *collection_to_vcpu(struct kvm *kvm,
@@ -756,12 +751,17 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
/* Requires the its_lock to be held. */
static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
{
+ struct vgic_irq *irq = ite->irq;
list_del(&ite->ite_list);
/* This put matches the get in vgic_add_lpi. */
- if (ite->irq) {
- if (ite->irq->hw)
- WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
+ if (irq) {
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
+ if (irq->hw)
+ WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
+
+ irq->hw = false;
+ }
vgic_put_irq(kvm, ite->irq);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index c7de6154627c4..1939461081923 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -444,7 +444,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
if (IS_ERR(its))
return 0;
- mutex_lock(&its->its_lock);
+ guard(mutex)(&its->its_lock);
/*
* Perform the actual DevID/EventID -> LPI translation.
@@ -455,11 +455,13 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
*/
if (vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
irq_entry->msi.data, &irq))
- goto out;
+ return 0;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
/* Silently exit if the vLPI is already mapped */
if (irq->hw)
- goto out;
+ goto out_unlock_irq;
/*
* Emit the mapping request. If it fails, the ITS probably
@@ -479,68 +481,74 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
ret = its_map_vlpi(virq, &map);
if (ret)
- goto out;
+ goto out_unlock_irq;
irq->hw = true;
irq->host_irq = virq;
atomic_inc(&map.vpe->vlpi_count);
/* Transfer pending state */
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
- if (irq->pending_latch) {
- ret = irq_set_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING,
- irq->pending_latch);
- WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);
+ if (!irq->pending_latch)
+ goto out_unlock_irq;
- /*
- * Clear pending_latch and communicate this state
- * change via vgic_queue_irq_unlock.
- */
- irq->pending_latch = false;
- vgic_queue_irq_unlock(kvm, irq, flags);
- } else {
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- }
+ ret = irq_set_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING,
+ irq->pending_latch);
+ WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);
-out:
- mutex_unlock(&its->its_lock);
+ /*
+ * Clear pending_latch and communicate this state
+ * change via vgic_queue_irq_unlock.
+ */
+ irq->pending_latch = false;
+ vgic_queue_irq_unlock(kvm, irq, flags);
+ return ret;
+
+out_unlock_irq:
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
return ret;
}
-int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
- struct kvm_kernel_irq_routing_entry *irq_entry)
+static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)
{
- struct vgic_its *its;
struct vgic_irq *irq;
- int ret;
+ unsigned long idx;
+
+ guard(rcu)();
+ xa_for_each(&kvm->arch.vgic.lpi_xa, idx, irq) {
+ if (!irq->hw || irq->host_irq != host_irq)
+ continue;
+
+ if (!vgic_try_get_irq_kref(irq))
+ return NULL;
+
+ return irq;
+ }
+
+ return NULL;
+}
+
+int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
+{
+ struct vgic_irq *irq;
+ unsigned long flags;
+ int ret = 0;
if (!vgic_supports_direct_msis(kvm))
return 0;
- /*
- * Get the ITS, and escape early on error (not a valid
- * doorbell for any of our vITSs).
- */
- its = vgic_get_its(kvm, irq_entry);
- if (IS_ERR(its))
+ irq = __vgic_host_irq_get_vlpi(kvm, host_irq);
+ if (!irq)
return 0;
- mutex_lock(&its->its_lock);
-
- ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
- irq_entry->msi.data, &irq);
- if (ret)
- goto out;
-
- WARN_ON(irq->hw && irq->host_irq != virq);
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ WARN_ON(irq->hw && irq->host_irq != host_irq);
if (irq->hw) {
atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
irq->hw = false;
- ret = its_unmap_vlpi(virq);
+ ret = its_unmap_vlpi(host_irq);
}
-out:
- mutex_unlock(&its->its_lock);
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ vgic_put_irq(kvm, irq);
return ret;
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 714cef854c1c3..4a34f7f0a8648 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -434,8 +434,7 @@ struct kvm_kernel_irq_routing_entry;
int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
struct kvm_kernel_irq_routing_entry *irq_entry);
-int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
- struct kvm_kernel_irq_routing_entry *irq_entry);
+int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq);
int vgic_v4_load(struct kvm_vcpu *vcpu);
void vgic_v4_commit(struct kvm_vcpu *vcpu);