summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm/avic.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-06 12:37:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-06 12:37:34 -0700
commit256e3417065b2721f77bcd37331796b59483ef3b (patch)
tree616ede12953ce1dfc226208a864b5a7ce63f8d2f /arch/x86/kvm/svm/avic.c
parentfb5bc347311b1d78dc608c91c2d68327b0a1d1d4 (diff)
parent6b36119b94d0b2bb8cea9d512017efafd461d6ac (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull x86 kvm updates from Paolo Bonzini: "Generic: - Rework almost all of KVM's exports to expose symbols only to KVM's x86 vendor modules (kvm-{amd,intel}.ko and PPC's kvm-{pr,hv}.ko x86: - Rework almost all of KVM x86's exports to expose symbols only to KVM's vendor modules, i.e. to kvm-{amd,intel}.ko - Add support for virtualizing Control-flow Enforcement Technology (CET) on Intel (Shadow Stacks and Indirect Branch Tracking) and AMD (Shadow Stacks). It is worth noting that while SHSTK and IBT can be enabled separately in CPUID, it is not really possible to virtualize them separately. Therefore, Intel processors will really allow both SHSTK and IBT under the hood if either is made visible in the guest's CPUID. The alternative would be to intercept XSAVES/XRSTORS, which is not feasible for performance reasons - Fix a variety of fuzzing WARNs all caused by checking L1 intercepts when completing userspace I/O. KVM has already committed to allowing L2 to to perform I/O at that point - Emulate PERF_CNTR_GLOBAL_STATUS_SET for PerfMonV2 guests, as the MSR is supposed to exist for v2 PMUs - Allow Centaur CPU leaves (base 0xC000_0000) for Zhaoxin CPUs - Add support for the immediate forms of RDMSR and WRMSRNS, sans full emulator support (KVM should never need to emulate the MSRs outside of forced emulation and other contrived testing scenarios) - Clean up the MSR APIs in preparation for CET and FRED virtualization, as well as mediated vPMU support - Clean up a pile of PMU code in anticipation of adding support for mediated vPMUs - Reject in-kernel IOAPIC/PIT for TDX VMs, as KVM can't obtain EOI vmexits needed to faithfully emulate an I/O APIC for such guests - Many cleanups and minor fixes - Recover possible NX huge pages within the TDP MMU under read lock to reduce guest jitter when restoring NX huge pages - Return -EAGAIN during prefault if userspace concurrently deletes/moves the relevant memslot, to fix an issue where prefaulting could deadlock with the memslot update x86 (AMD): - Enable AVIC by default for Zen4+ if x2AVIC (and other prereqs) is supported - Require a minimum GHCB version of 2 when starting SEV-SNP guests via KVM_SEV_INIT2 so that invalid GHCB versions result in immediate errors instead of latent guest failures - Add support for SEV-SNP's CipherText Hiding, an opt-in feature that prevents unauthorized CPU accesses from reading the ciphertext of SNP guest private memory, e.g. to attempt an offline attack. This feature splits the shared SEV-ES/SEV-SNP ASID space into separate ranges for SEV-ES and SEV-SNP guests, therefore a new module parameter is needed to control the number of ASIDs that can be used for VMs with CipherText Hiding vs. how many can be used to run SEV-ES guests - Add support for Secure TSC for SEV-SNP guests, which prevents the untrusted host from tampering with the guest's TSC frequency, while still allowing the the VMM to configure the guest's TSC frequency prior to launch - Validate the XCR0 provided by the guest (via the GHCB) to avoid bugs resulting from bogus XCR0 values - Save an SEV guest's policy if and only if LAUNCH_START fully succeeds to avoid leaving behind stale state (thankfully not consumed in KVM) - Explicitly reject non-positive effective lengths during SNP's LAUNCH_UPDATE instead of subtly relying on guest_memfd to deal with them - Reload the pre-VMRUN TSC_AUX on #VMEXIT for SEV-ES guests, not the host's desired TSC_AUX, to fix a bug where KVM was keeping a different vCPU's TSC_AUX in the host MSR until return to userspace KVM (Intel): - Preparation for FRED support - Don't retry in TDX's anti-zero-step mitigation if the target memslot is invalid, i.e. is being deleted or moved, to fix a deadlock scenario similar to the aforementioned prefaulting case - Misc bugfixes and minor cleanups" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (142 commits) KVM: x86: Export KVM-internal symbols for sub-modules only KVM: x86: Drop pointless exports of kvm_arch_xxx() hooks KVM: x86: Move kvm_intr_is_single_vcpu() to lapic.c KVM: Export KVM-internal symbols for sub-modules only KVM: s390/vfio-ap: Use kvm_is_gpa_in_memslot() instead of open coded equivalent KVM: VMX: Make CR4.CET a guest owned bit KVM: selftests: Verify MSRs are (not) in save/restore list when (un)supported KVM: selftests: Add coverage for KVM-defined registers in MSRs test KVM: selftests: Add KVM_{G,S}ET_ONE_REG coverage to MSRs test KVM: selftests: Extend MSRs test to validate vCPUs without supported features KVM: selftests: Add support for MSR_IA32_{S,U}_CET to MSRs test KVM: selftests: Add an MSR test to exercise guest/host and read/write KVM: x86: Define AMD's #HV, #VC, and #SX exception vectors KVM: x86: Define Control Protection Exception (#CP) vector KVM: x86: Add human friendly formatting for #XM, and #VE KVM: SVM: Enable shadow stack virtualization for SVM KVM: SEV: Synchronize MSR_IA32_XSS from the GHCB when it's valid KVM: SVM: Pass through shadow stack MSRs as appropriate KVM: SVM: Update dump_vmcb with shadow stack save area additions KVM: nSVM: Save/load CET Shadow Stack state to/from vmcb12/vmcb02 ...
Diffstat (limited to 'arch/x86/kvm/svm/avic.c')
-rw-r--r--arch/x86/kvm/svm/avic.c151
1 files changed, 125 insertions, 26 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index a34c5c3b164e..f286b5706d7c 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -64,6 +64,34 @@
static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_IDX_MASK) == -1u);
+#define AVIC_AUTO_MODE -1
+
+static int avic_param_set(const char *val, const struct kernel_param *kp)
+{
+ if (val && sysfs_streq(val, "auto")) {
+ *(int *)kp->arg = AVIC_AUTO_MODE;
+ return 0;
+ }
+
+ return param_set_bint(val, kp);
+}
+
+static const struct kernel_param_ops avic_ops = {
+ .flags = KERNEL_PARAM_OPS_FL_NOARG,
+ .set = avic_param_set,
+ .get = param_get_bool,
+};
+
+/*
+ * Enable / disable AVIC. In "auto" mode (default behavior), AVIC is enabled
+ * for Zen4+ CPUs with x2AVIC (and all other criteria for enablement are met).
+ */
+static int avic = AVIC_AUTO_MODE;
+module_param_cb(avic, &avic_ops, &avic, 0444);
+__MODULE_PARM_TYPE(avic, "bool");
+
+module_param(enable_ipiv, bool, 0444);
+
static bool force_avic;
module_param_unsafe(force_avic, bool, 0444);
@@ -77,7 +105,58 @@ static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
static u32 next_vm_id = 0;
static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
-bool x2avic_enabled;
+static bool x2avic_enabled;
+
+
+static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
+ bool intercept)
+{
+ static const u32 x2avic_passthrough_msrs[] = {
+ X2APIC_MSR(APIC_ID),
+ X2APIC_MSR(APIC_LVR),
+ X2APIC_MSR(APIC_TASKPRI),
+ X2APIC_MSR(APIC_ARBPRI),
+ X2APIC_MSR(APIC_PROCPRI),
+ X2APIC_MSR(APIC_EOI),
+ X2APIC_MSR(APIC_RRR),
+ X2APIC_MSR(APIC_LDR),
+ X2APIC_MSR(APIC_DFR),
+ X2APIC_MSR(APIC_SPIV),
+ X2APIC_MSR(APIC_ISR),
+ X2APIC_MSR(APIC_TMR),
+ X2APIC_MSR(APIC_IRR),
+ X2APIC_MSR(APIC_ESR),
+ X2APIC_MSR(APIC_ICR),
+ X2APIC_MSR(APIC_ICR2),
+
+ /*
+ * Note! Always intercept LVTT, as TSC-deadline timer mode
+ * isn't virtualized by hardware, and the CPU will generate a
+ * #GP instead of a #VMEXIT.
+ */
+ X2APIC_MSR(APIC_LVTTHMR),
+ X2APIC_MSR(APIC_LVTPC),
+ X2APIC_MSR(APIC_LVT0),
+ X2APIC_MSR(APIC_LVT1),
+ X2APIC_MSR(APIC_LVTERR),
+ X2APIC_MSR(APIC_TMICT),
+ X2APIC_MSR(APIC_TMCCT),
+ X2APIC_MSR(APIC_TDCR),
+ };
+ int i;
+
+ if (intercept == svm->x2avic_msrs_intercepted)
+ return;
+
+ if (!x2avic_enabled)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++)
+ svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i],
+ MSR_TYPE_RW, intercept);
+
+ svm->x2avic_msrs_intercepted = intercept;
+}
static void avic_activate_vmcb(struct vcpu_svm *svm)
{
@@ -99,7 +178,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
vmcb->control.int_ctl |= X2APIC_MODE_MASK;
vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
/* Disabling MSR intercept for x2APIC registers */
- svm_set_x2apic_msr_interception(svm, false);
+ avic_set_x2apic_msr_interception(svm, false);
} else {
/*
* Flush the TLB, the guest may have inserted a non-APIC
@@ -110,7 +189,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
/* For xAVIC and hybrid-xAVIC modes */
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
/* Enabling MSR intercept for x2APIC registers */
- svm_set_x2apic_msr_interception(svm, true);
+ avic_set_x2apic_msr_interception(svm, true);
}
}
@@ -130,7 +209,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm)
return;
/* Enabling MSR intercept for x2APIC registers */
- svm_set_x2apic_msr_interception(svm, true);
+ avic_set_x2apic_msr_interception(svm, true);
}
/* Note:
@@ -1090,23 +1169,27 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
avic_vcpu_load(vcpu, vcpu->cpu);
}
-/*
- * Note:
- * - The module param avic enable both xAPIC and x2APIC mode.
- * - Hypervisor can support both xAVIC and x2AVIC in the same guest.
- * - The mode can be switched at run-time.
- */
-bool avic_hardware_setup(void)
+static bool __init avic_want_avic_enabled(void)
{
- if (!npt_enabled)
+ /*
+ * In "auto" mode, enable AVIC by default for Zen4+ if x2AVIC is
+ * supported (to avoid enabling partial support by default, and because
+ * x2AVIC should be supported by all Zen4+ CPUs). Explicitly check for
+ * family 0x19 and later (Zen5+), as the kernel's synthetic ZenX flags
+ * aren't inclusive of previous generations, i.e. the kernel will set
+ * at most one ZenX feature flag.
+ */
+ if (avic == AVIC_AUTO_MODE)
+ avic = boot_cpu_has(X86_FEATURE_X2AVIC) &&
+ (boot_cpu_data.x86 > 0x19 || cpu_feature_enabled(X86_FEATURE_ZEN4));
+
+ if (!avic || !npt_enabled)
return false;
/* AVIC is a prerequisite for x2AVIC. */
if (!boot_cpu_has(X86_FEATURE_AVIC) && !force_avic) {
- if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
- pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
- pr_warn(FW_BUG "Try enable AVIC using force_avic option");
- }
+ if (boot_cpu_has(X86_FEATURE_X2AVIC))
+ pr_warn(FW_BUG "Cannot enable x2AVIC, AVIC is unsupported\n");
return false;
}
@@ -1116,21 +1199,37 @@ bool avic_hardware_setup(void)
return false;
}
- if (boot_cpu_has(X86_FEATURE_AVIC)) {
- pr_info("AVIC enabled\n");
- } else if (force_avic) {
- /*
- * Some older systems does not advertise AVIC support.
- * See Revision Guide for specific AMD processor for more detail.
- */
- pr_warn("AVIC is not supported in CPUID but force enabled");
- pr_warn("Your system might crash and burn");
- }
+ /*
+ * Print a scary message if AVIC is force enabled to make it abundantly
+ * clear that ignoring CPUID could have repercussions. See Revision
+ * Guide for specific AMD processor for more details.
+ */
+ if (!boot_cpu_has(X86_FEATURE_AVIC))
+ pr_warn("AVIC unsupported in CPUID but force enabled, your system might crash and burn\n");
+
+ return true;
+}
+
+/*
+ * Note:
+ * - The module param avic enable both xAPIC and x2APIC mode.
+ * - Hypervisor can support both xAVIC and x2AVIC in the same guest.
+ * - The mode can be switched at run-time.
+ */
+bool __init avic_hardware_setup(void)
+{
+ avic = avic_want_avic_enabled();
+ if (!avic)
+ return false;
+
+ pr_info("AVIC enabled\n");
/* AVIC is a prerequisite for x2AVIC. */
x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
if (x2avic_enabled)
pr_info("x2AVIC enabled\n");
+ else
+ svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;
/*
* Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)