diff options
Diffstat (limited to 'arch/x86')
25 files changed, 477 insertions, 175 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 58d890fe2100..52c8910ba2ef 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2701,6 +2701,15 @@ config MITIGATION_TSA security vulnerability on AMD CPUs which can lead to forwarding of invalid info to subsequent instructions and thus can affect their timing and thereby cause a leakage. + +config MITIGATION_VMSCAPE + bool "Mitigate VMSCAPE" + depends on KVM + default y + help + Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security + vulnerability on Intel and AMD CPUs that may allow a guest to do + Spectre v2 style attacks on userspace hypervisor. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index 916bac09b464..63e037e94e4c 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -106,5 +106,18 @@ void get_cpuflags(void) cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], &cpu.flags[1]); } + + if (max_amd_level >= 0x8000001f) { + u32 ebx; + + /* + * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in + * the virtualization flags entry (word 8) and set by + * scattered.c, so the bit needs to be explicitly set. + */ + cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored); + if (ebx & BIT(31)) + set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags); + } } } diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 7a706db87b93..a34cd19796f9 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -785,6 +785,7 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) pc->entry[0].page_size = RMP_PG_SIZE_4K; pc->entry[0].action = validate; pc->entry[0].ignore_cf = 0; + pc->entry[0].rsvd = 0; pc->entry[0].pfn = paddr >> PAGE_SHIFT; /* Protocol 0, Call ID 1 */ @@ -810,6 +811,13 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, if (ret) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); } + + /* + * If validating memory (making it private) and affected by the + * cache-coherency vulnerability, perform the cache eviction mitigation. + */ + if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO)) + sev_evict_cache((void *)vaddr, 1); } /* diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index fc59ce78c477..14ef5908fb27 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -227,6 +227,7 @@ static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action, pe->page_size = RMP_PG_SIZE_4K; pe->action = action; pe->ignore_cf = 0; + pe->rsvd = 0; pe->pfn = pfn; pe++; @@ -257,6 +258,7 @@ static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int d pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; pe->action = e->operation == SNP_PAGE_STATE_PRIVATE; pe->ignore_cf = 0; + pe->rsvd = 0; pe->pfn = e->gfn; pe++; @@ -358,10 +360,31 @@ static void svsm_pval_pages(struct snp_psc_desc *desc) static void pvalidate_pages(struct snp_psc_desc *desc) { + struct psc_entry *e; + unsigned int i; + if (snp_vmpl) svsm_pval_pages(desc); else pval_pages(desc); + + /* + * If not affected by the cache-coherency vulnerability there is no need + * to perform the cache eviction mitigation. + */ + if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO)) + return; + + for (i = 0; i <= desc->hdr.end_entry; i++) { + e = &desc->entries[i]; + + /* + * If validating memory (making it private) perform the cache + * eviction mitigation. + */ + if (e->operation == SNP_PAGE_STATE_PRIVATE) + sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1); + } } static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c index faf1fce89ed4..c3b4acbde0d8 100644 --- a/arch/x86/coco/sev/vc-handle.c +++ b/arch/x86/coco/sev/vc-handle.c @@ -371,29 +371,30 @@ static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) * executing with Secure TSC enabled, so special handling is required for * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. */ -static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) +static enum es_result __vc_handle_secure_tsc_msrs(struct es_em_ctxt *ctxt, bool write) { + struct pt_regs *regs = ctxt->regs; u64 tsc; /* - * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. - * Terminate the SNP guest when the interception is enabled. + * Writing to MSR_IA32_TSC can cause subsequent reads of the TSC to + * return undefined values, and GUEST_TSC_FREQ is read-only. Generate + * a #GP on all writes. */ - if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) - return ES_VMM_ERROR; + if (write) { + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + return ES_EXCEPTION; + } /* - * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC - * to return undefined values, so ignore all writes. - * - * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use - * the value returned by rdtsc_ordered(). + * GUEST_TSC_FREQ read should not be intercepted when Secure TSC is + * enabled. Terminate the guest if a read is attempted. */ - if (write) { - WARN_ONCE(1, "TSC MSR writes are verboten!\n"); - return ES_OK; - } + if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) + return ES_VMM_ERROR; + /* Reads of MSR_IA32_TSC should return the current TSC value. */ tsc = rdtsc_ordered(); regs->ax = lower_32_bits(tsc); regs->dx = upper_32_bits(tsc); @@ -416,7 +417,7 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) case MSR_IA32_TSC: case MSR_AMD64_GUEST_TSC_FREQ: if (sev_status & MSR_AMD64_SNP_SECURE_TSC) - return __vc_handle_secure_tsc_msrs(regs, write); + return __vc_handle_secure_tsc_msrs(ctxt, write); break; default: break; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 602957dd2609..751ca35386b0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ @@ -494,6 +495,7 @@ #define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ #define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ +#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ /* * BUG word(s) @@ -550,4 +552,5 @@ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ #define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ +#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h deleted file mode 100644 index d5749b25fa10..000000000000 --- a/arch/x86/include/asm/cpuid.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_X86_CPUID_H -#define _ASM_X86_CPUID_H - -#include <asm/cpuid/api.h> - -#endif /* _ASM_X86_CPUID_H */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index d535a97c7284..ce3eb6d5fdf9 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -93,6 +93,13 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, * 8 (ia32) bits. */ choose_random_kstack_offset(rdtsc()); + + /* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */ + if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) && + this_cpu_read(x86_ibpb_exit_to_user)) { + indirect_branch_prediction_barrier(); + this_cpu_write(x86_ibpb_exit_to_user, false); + } } #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 10f261678749..e29f82466f43 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -530,6 +530,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } +DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); + static inline void indirect_branch_prediction_barrier(void) { asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 4604f924d8b8..7eb61ef6a185 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -36,6 +36,9 @@ static inline bool pgtable_l5_enabled(void) #define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57) #endif /* USE_EARLY_PGTABLE_L5 */ +#define ARCH_PAGE_TABLE_SYNC_MASK \ + (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED) + extern unsigned int pgdir_shift; extern unsigned int ptrs_per_p4d; diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 89075ff19afa..02236962fdb1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -619,6 +619,24 @@ int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); void snp_fixup_e820_tables(void); + +static inline void sev_evict_cache(void *va, int npages) +{ + volatile u8 val __always_unused; + u8 *bytes = va; + int page_idx; + + /* + * For SEV guests, a read from the first/last cache-lines of a 4K page + * using the guest key is sufficient to cause a flush of all cache-lines + * associated with that 4K page without incurring all the overhead of a + * full CLFLUSH sequence. + */ + for (page_idx = 0; page_idx < npages; page_idx++) { + val = bytes[page_idx * PAGE_SIZE]; + val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; + } +} #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_rmptable_init(void) { return -ENOSYS; } @@ -634,6 +652,7 @@ static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } static inline void snp_fixup_e820_tables(void) {} +static inline void sev_evict_cache(void *va, int npages) {} #endif #endif diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 59a62c3780a2..a16d4631547c 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -94,12 +94,13 @@ DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func); #ifdef MODULE #define __ADDRESSABLE_xen_hypercall #else -#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall) +#define __ADDRESSABLE_xen_hypercall \ + __stringify(.global STATIC_CALL_KEY(xen_hypercall);) #endif #define __HYPERCALL \ __ADDRESSABLE_xen_hypercall \ - "call __SCT__xen_hypercall" + __stringify(call STATIC_CALL_TRAMP(xen_hypercall)) #define __HYPERCALL_ENTRY(x) "a" (x) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a5ece6ebe8a7..a6f88ca1a6b4 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1326,8 +1326,8 @@ static const char * const s5_reset_reason_txt[] = { static __init int print_s5_reset_status_mmio(void) { - unsigned long value; void __iomem *addr; + u32 value; int i; if (!cpu_feature_enabled(X86_FEATURE_ZEN)) @@ -1340,12 +1340,16 @@ static __init int print_s5_reset_status_mmio(void) value = ioread32(addr); iounmap(addr); + /* Value with "all bits set" is an error response and should be ignored. */ + if (value == U32_MAX) + return 0; + for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) continue; if (s5_reset_reason_txt[i]) { - pr_info("x86/amd: Previous system reset reason [0x%08lx]: %s\n", + pr_info("x86/amd: Previous system reset reason [0x%08x]: %s\n", value, s5_reset_reason_txt[i]); } } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b74bf937cd9f..36dcfc5105be 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -96,6 +96,9 @@ static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); static void __init tsa_select_mitigation(void); static void __init tsa_apply_mitigation(void); +static void __init vmscape_select_mitigation(void); +static void __init vmscape_update_mitigation(void); +static void __init vmscape_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -105,6 +108,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); +/* + * Set when the CPU has run a potentially malicious guest. An IBPB will + * be needed to before running userspace. That IBPB will flush the branch + * predictor content. + */ +DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user); +EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user); + u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; static u64 __ro_after_init x86_arch_cap_msr; @@ -262,6 +273,7 @@ void __init cpu_select_mitigations(void) its_select_mitigation(); bhi_select_mitigation(); tsa_select_mitigation(); + vmscape_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -293,6 +305,7 @@ void __init cpu_select_mitigations(void) bhi_update_mitigation(); /* srso_update_mitigation() depends on retbleed_update_mitigation(). */ srso_update_mitigation(); + vmscape_update_mitigation(); spectre_v1_apply_mitigation(); spectre_v2_apply_mitigation(); @@ -310,6 +323,7 @@ void __init cpu_select_mitigations(void) its_apply_mitigation(); bhi_apply_mitigation(); tsa_apply_mitigation(); + vmscape_apply_mitigation(); } /* @@ -386,7 +400,6 @@ static bool __init should_mitigate_vuln(unsigned int bug) case X86_BUG_SPECTRE_V2: case X86_BUG_RETBLEED: - case X86_BUG_SRSO: case X86_BUG_L1TF: case X86_BUG_ITS: return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) || @@ -417,6 +430,10 @@ static bool __init should_mitigate_vuln(unsigned int bug) cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST) || (smt_mitigations != SMT_MITIGATIONS_OFF); + + case X86_BUG_SPEC_STORE_BYPASS: + return cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER); + default: WARN(1, "Unknown bug %x\n", bug); return false; @@ -1069,10 +1086,8 @@ static void __init gds_select_mitigation(void) if (gds_mitigation == GDS_MITIGATION_AUTO) { if (should_mitigate_vuln(X86_BUG_GDS)) gds_mitigation = GDS_MITIGATION_FULL; - else { + else gds_mitigation = GDS_MITIGATION_OFF; - return; - } } /* No microcode */ @@ -2537,88 +2552,6 @@ static void update_mds_branch_idle(void) } } -#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" -#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" -#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" - -void cpu_bugs_smt_update(void) -{ - mutex_lock(&spec_ctrl_mutex); - - if (sched_smt_active() && unprivileged_ebpf_enabled() && - spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) - pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); - - switch (spectre_v2_user_stibp) { - case SPECTRE_V2_USER_NONE: - break; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: - update_stibp_strict(); - break; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: - update_indir_branch_cond(); - break; - } - - switch (mds_mitigation) { - case MDS_MITIGATION_FULL: - case MDS_MITIGATION_AUTO: - case MDS_MITIGATION_VMWERV: - if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) - pr_warn_once(MDS_MSG_SMT); - update_mds_branch_idle(); - break; - case MDS_MITIGATION_OFF: - break; - } - - switch (taa_mitigation) { - case TAA_MITIGATION_VERW: - case TAA_MITIGATION_AUTO: - case TAA_MITIGATION_UCODE_NEEDED: - if (sched_smt_active()) - pr_warn_once(TAA_MSG_SMT); - break; - case TAA_MITIGATION_TSX_DISABLED: - case TAA_MITIGATION_OFF: - break; - } - - switch (mmio_mitigation) { - case MMIO_MITIGATION_VERW: - case MMIO_MITIGATION_AUTO: - case MMIO_MITIGATION_UCODE_NEEDED: - if (sched_smt_active()) - pr_warn_once(MMIO_MSG_SMT); - break; - case MMIO_MITIGATION_OFF: - break; - } - - switch (tsa_mitigation) { - case TSA_MITIGATION_USER_KERNEL: - case TSA_MITIGATION_VM: - case TSA_MITIGATION_AUTO: - case TSA_MITIGATION_FULL: - /* - * TSA-SQ can potentially lead to info leakage between - * SMT threads. - */ - if (sched_smt_active()) - static_branch_enable(&cpu_buf_idle_clear); - else - static_branch_disable(&cpu_buf_idle_clear); - break; - case TSA_MITIGATION_NONE: - case TSA_MITIGATION_UCODE_NEEDED: - break; - } - - mutex_unlock(&spec_ctrl_mutex); -} - #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt @@ -2713,6 +2646,11 @@ static void __init ssb_select_mitigation(void) ssb_mode = SPEC_STORE_BYPASS_DISABLE; break; case SPEC_STORE_BYPASS_CMD_AUTO: + if (should_mitigate_vuln(X86_BUG_SPEC_STORE_BYPASS)) + ssb_mode = SPEC_STORE_BYPASS_PRCTL; + else + ssb_mode = SPEC_STORE_BYPASS_NONE; + break; case SPEC_STORE_BYPASS_CMD_PRCTL: ssb_mode = SPEC_STORE_BYPASS_PRCTL; break; @@ -3184,8 +3122,18 @@ static void __init srso_select_mitigation(void) } if (srso_mitigation == SRSO_MITIGATION_AUTO) { - if (should_mitigate_vuln(X86_BUG_SRSO)) { + /* + * Use safe-RET if user->kernel or guest->host protection is + * required. Otherwise the 'microcode' mitigation is sufficient + * to protect the user->user and guest->guest vectors. + */ + if (cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_HOST) || + (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_KERNEL) && + !boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO))) { srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else if (cpu_attack_vector_mitigated(CPU_MITIGATE_USER_USER) || + cpu_attack_vector_mitigated(CPU_MITIGATE_GUEST_GUEST)) { + srso_mitigation = SRSO_MITIGATION_MICROCODE; } else { srso_mitigation = SRSO_MITIGATION_NONE; return; @@ -3315,8 +3263,184 @@ static void __init srso_apply_mitigation(void) } #undef pr_fmt +#define pr_fmt(fmt) "VMSCAPE: " fmt + +enum vmscape_mitigations { + VMSCAPE_MITIGATION_NONE, + VMSCAPE_MITIGATION_AUTO, + VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER, + VMSCAPE_MITIGATION_IBPB_ON_VMEXIT, +}; + +static const char * const vmscape_strings[] = { + [VMSCAPE_MITIGATION_NONE] = "Vulnerable", + /* [VMSCAPE_MITIGATION_AUTO] */ + [VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER] = "Mitigation: IBPB before exit to userspace", + [VMSCAPE_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT", +}; + +static enum vmscape_mitigations vmscape_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE; + +static int __init vmscape_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + vmscape_mitigation = VMSCAPE_MITIGATION_NONE; + } else if (!strcmp(str, "ibpb")) { + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; + } else if (!strcmp(str, "force")) { + setup_force_cpu_bug(X86_BUG_VMSCAPE); + vmscape_mitigation = VMSCAPE_MITIGATION_AUTO; + } else { + pr_err("Ignoring unknown vmscape=%s option.\n", str); + } + + return 0; +} +early_param("vmscape", vmscape_parse_cmdline); + +static void __init vmscape_select_mitigation(void) +{ + if (cpu_mitigations_off() || + !boot_cpu_has_bug(X86_BUG_VMSCAPE) || + !boot_cpu_has(X86_FEATURE_IBPB)) { + vmscape_mitigation = VMSCAPE_MITIGATION_NONE; + return; + } + + if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; +} + +static void __init vmscape_update_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_VMSCAPE)) + return; + + if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB || + srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT) + vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT; + + pr_info("%s\n", vmscape_strings[vmscape_mitigation]); +} + +static void __init vmscape_apply_mitigation(void) +{ + if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER) + setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER); +} + +#undef pr_fmt #define pr_fmt(fmt) fmt +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" +#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" +#define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n" + +void cpu_bugs_smt_update(void) +{ + mutex_lock(&spec_ctrl_mutex); + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + + switch (spectre_v2_user_stibp) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + case SPECTRE_V2_USER_STRICT_PREFERRED: + update_stibp_strict(); + break; + case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: + update_indir_branch_cond(); + break; + } + + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_AUTO: + case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); + update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } + + switch (taa_mitigation) { + case TAA_MITIGATION_VERW: + case TAA_MITIGATION_AUTO: + case TAA_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(TAA_MSG_SMT); + break; + case TAA_MITIGATION_TSX_DISABLED: + case TAA_MITIGATION_OFF: + break; + } + + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_AUTO: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + + switch (vmscape_mitigation) { + case VMSCAPE_MITIGATION_NONE: + case VMSCAPE_MITIGATION_AUTO: + break; + case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT: + case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER: + /* + * Hypervisors can be attacked across-threads, warn for SMT when + * STIBP is not already enabled system-wide. + * + * Intel eIBRS (!AUTOIBRS) implies STIBP on. + */ + if (!sched_smt_active() || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + !boot_cpu_has(X86_FEATURE_AUTOIBRS))) + break; + pr_warn_once(VMSCAPE_MSG_SMT); + break; + } + + mutex_unlock(&spec_ctrl_mutex); +} + #ifdef CONFIG_SYSFS #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" @@ -3562,6 +3686,11 @@ static ssize_t tsa_show_state(char *buf) return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); } +static ssize_t vmscape_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3628,6 +3757,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_TSA: return tsa_show_state(buf); + case X86_BUG_VMSCAPE: + return vmscape_show_state(buf); + default: break; } @@ -3719,6 +3851,11 @@ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *bu { return cpu_show_common(dev, attr, buf, X86_BUG_TSA); } + +ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 34a054181c4d..f98ec9c7fc07 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1236,55 +1236,71 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS_NATIVE_ONLY BIT(9) /* CPU is affected by Transient Scheduler Attacks */ #define TSA BIT(10) +/* CPU is affected by VMSCAPE */ +#define VMSCAPE BIT(11) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { - VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO), - VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO), - VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO), - VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), - VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED), + VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), - VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS), - VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS), - VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS), - VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS), - VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS), - VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS), + VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE), + VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE), + VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE), VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS), VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), - VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO | TSA), - VULNBL_AMD(0x1a, SRSO), + VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE), + VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE), + VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE), + VULNBL_AMD(0x1a, SRSO | VMSCAPE), {} }; @@ -1543,6 +1559,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } } + /* + * Set the bug only on bare-metal. A nested hypervisor should already be + * deploying IBPB to isolate itself from nested guests. + */ + if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) && + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) + setup_force_cpu_bug(X86_BUG_VMSCAPE); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 2154f12766fb..1fda6c3a2b65 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -16,6 +16,7 @@ #include <asm/spec-ctrl.h> #include <asm/delay.h> #include <asm/msr.h> +#include <asm/resctrl.h> #include "cpu.h" @@ -117,6 +118,8 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) x86_amd_ls_cfg_ssbd_mask = 1ULL << 10; } } + + resctrl_cpu_detect(c); } static void early_init_hygon(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 076eaa41b8c8..98ae4c37c93e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -262,7 +262,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - } else if ((c->x86_vfm >= INTEL_P4_PRESCOTT && c->x86_vfm <= INTEL_P4_WILLAMETTE) || + } else if ((c->x86_vfm >= INTEL_P4_PRESCOTT && c->x86_vfm <= INTEL_P4_CEDARMILL) || (c->x86_vfm >= INTEL_CORE_YONAH && c->x86_vfm <= INTEL_IVYBRIDGE)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 097e39327942..514f63340880 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -171,8 +171,28 @@ static int cmp_id(const void *key, const void *elem) return 1; } +static u32 cpuid_to_ucode_rev(unsigned int val) +{ + union zen_patch_rev p = {}; + union cpuid_1_eax c; + + c.full = val; + + p.stepping = c.stepping; + p.model = c.model; + p.ext_model = c.ext_model; + p.ext_fam = c.ext_fam; + + return p.ucode_rev; +} + static bool need_sha_check(u32 cur_rev) { + if (!cur_rev) { + cur_rev = cpuid_to_ucode_rev(bsp_cpuid_1_eax); + pr_info_once("No current revision, generating the lowest one: 0x%x\n", cur_rev); + } + switch (cur_rev >> 8) { case 0x80012: return cur_rev <= 0x800126f; break; case 0x80082: return cur_rev <= 0x800820f; break; @@ -749,8 +769,6 @@ static struct ucode_patch *cache_find_patch(struct ucode_cpu_info *uci, u16 equi n.equiv_cpu = equiv_cpu; n.patch_id = uci->cpu_sig.rev; - WARN_ON_ONCE(!n.patch_id); - list_for_each_entry(p, µcode_cache, plist) if (patch_cpus_equivalent(p, &n, false)) return p; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b4a1f6732a3a..6b868afb26c3 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -48,6 +48,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, + { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 843b1655ab45..827dd0dbb6e9 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -81,20 +81,25 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext) cpuid_leaf(0x8000001e, &leaf); - tscan->c->topo.initial_apicid = leaf.ext_apic_id; - /* - * If leaf 0xb is available, then the domain shifts are set - * already and nothing to do here. Only valid for family >= 0x17. + * If leaf 0xb/0x26 is available, then the APIC ID and the domain + * shifts are set already. */ - if (!has_topoext && tscan->c->x86 >= 0x17) { + if (!has_topoext) { + tscan->c->topo.initial_apicid = leaf.ext_apic_id; + /* - * Leaf 0x80000008 set the CORE domain shift already. - * Update the SMT domain, but do not propagate it. + * Leaf 0x8000008 sets the CORE domain shift but not the + * SMT domain shift. On CPUs with family >= 0x17, there + * might be hyperthreads. */ - unsigned int nthreads = leaf.core_nthreads + 1; + if (tscan->c->x86 >= 0x17) { + /* Update the SMT domain, but do not propagate it. */ + unsigned int nthreads = leaf.core_nthreads + 1; - topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); + topology_update_dom(tscan, TOPO_SMT_DOMAIN, + get_count_order(nthreads), nthreads); + } } store_node(tscan, leaf.nnodes_per_socket + 1, leaf.node_id); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 12ed75c1b567..28e4fd65c9da 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1881,19 +1881,20 @@ long fpu_xstate_prctl(int option, unsigned long arg2) #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 - * use in the task. + * use in the task. Report -1 if no AVX-512 usage. */ static void avx512_status(struct seq_file *m, struct task_struct *task) { - unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); - long delta; + unsigned long timestamp; + long delta = -1; - if (!timestamp) { - /* - * Report -1 if no AVX512 usage - */ - delta = -1; - } else { + /* AVX-512 usage is not tracked for kernel threads. Don't report anything. */ + if (task->flags & (PF_KTHREAD | PF_USER_WORKER)) + return; + + timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); + + if (timestamp) { delta = (long)(jiffies - timestamp); /* * Cap to LONG_MAX if time difference > LONG_MAX diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8172c2042dd6..5fc437341e03 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -810,6 +810,8 @@ static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, if (min > map->max_apic_id) return 0; + min = array_index_nospec(min, map->max_apic_id + 1); + for_each_set_bit(i, ipi_bitmap, min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { if (map->phys_map[min + i]) { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 2fbdebf79fbb..0635bd71c10e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -719,13 +719,6 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) static void sev_writeback_caches(struct kvm *kvm) { /* - * Note, the caller is responsible for ensuring correctness if the mask - * can be modified, e.g. if a CPU could be doing VMRUN. - */ - if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus)) - return; - - /* * Ensure that all dirty guest tagged cache entries are written back * before releasing the pages back to the system for use. CLFLUSH will * not do this without SME_COHERENT, and flushing many cache lines @@ -739,6 +732,9 @@ static void sev_writeback_caches(struct kvm *kvm) * serializing multiple calls and having responding CPUs (to the IPI) * mark themselves as still running if they are running (or about to * run) a vCPU for the VM. + * + * Note, the caller is responsible for ensuring correctness if the mask + * can be modified, e.g. if a CPU could be doing VMRUN. */ wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1c49bc681c4..706b6fd56d3c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9908,8 +9908,11 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) rcu_read_lock(); map = rcu_dereference(vcpu->kvm->arch.apic_map); - if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id]) - target = map->phys_map[dest_id]->vcpu; + if (likely(map) && dest_id <= map->max_apic_id) { + dest_id = array_index_nospec(dest_id, map->max_apic_id + 1); + if (map->phys_map[dest_id]) + target = map->phys_map[dest_id]->vcpu; + } rcu_read_unlock(); @@ -11008,6 +11011,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) wrmsrq(MSR_IA32_XFD_ERR, 0); /* + * Mark this CPU as needing a branch predictor flush before running + * userspace. Must be done before enabling preemption to ensure it gets + * set for the CPU that actually ran the guest, and not the CPU that it + * may migrate to. + */ + if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER)) + this_cpu_write(x86_ibpb_exit_to_user, true); + + /* * Consume any pending interrupts, including the possible source of * VM-Exit on SVM and any ticks that occur between VM-Exit and now. * An instruction is required after local_irq_enable() to fully unblock diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 76e33bd7c556..b9426fce5f3e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -224,6 +224,24 @@ static void sync_global_pgds(unsigned long start, unsigned long end) } /* + * Make kernel mappings visible in all page tables in the system. + * This is necessary except when the init task populates kernel mappings + * during the boot process. In that case, all processes originating from + * the init task copies the kernel mappings, so there is no issue. + * Otherwise, missing synchronization could lead to kernel crashes due + * to missing page table entries for certain kernel mappings. + * + * Synchronization is performed at the top level, which is the PGD in + * 5-level paging systems. But in 4-level paging systems, however, + * pgd_populate() is a no-op, so synchronization is done at the P4D level. + * sync_global_pgds() handles this difference between paging levels. + */ +void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +{ + sync_global_pgds(start, end); +} + +/* * NOTE: This function is marked __ref because it calls __init function * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. */ |