diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-09-11 17:37:09 -0700 | 
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-09-11 17:40:13 -0700 | 
| commit | fc3a2810412c163b5df1b377d332e048860f45db (patch) | |
| tree | 9eeb81c7f965176a32ca3062aefcc3532c637b01 /arch | |
| parent | 5f790208d68fe1526c751dc2af366c7b552b8631 (diff) | |
| parent | db87bd2ad1f736c2f7ab231f9b40c885934f6b2c (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-6.17-rc6).
Conflicts:
net/netfilter/nft_set_pipapo.c
net/netfilter/nft_set_pipapo_avx2.c
  c4eaca2e1052 ("netfilter: nft_set_pipapo: don't check genbit from packetpath lookups")
  84c1da7b38d9 ("netfilter: nft_set_pipapo: use avx2 algorithm for insertions too")
Only trivial adjacent changes (in a doc and a Makefile).
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch')
29 files changed, 350 insertions, 143 deletions
| diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 79550b22ba19..fb9b88eebeb1 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -19,6 +19,7 @@ struct mod_arch_specific {  	/* for CONFIG_DYNAMIC_FTRACE */  	struct plt_entry	*ftrace_trampolines; +	struct plt_entry	*init_ftrace_trampolines;  };  u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index b9ae8349e35d..fb944b46846d 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -2,6 +2,7 @@ SECTIONS {  	.plt 0 : { BYTE(0) }  	.init.plt 0 : { BYTE(0) }  	.text.ftrace_trampoline 0 : { BYTE(0) } +	.init.text.ftrace_trampoline 0 : { BYTE(0) }  #ifdef CONFIG_KASAN_SW_TAGS  	/* diff --git a/arch/arm64/include/uapi/asm/bitsperlong.h b/arch/arm64/include/uapi/asm/bitsperlong.h index 485d60bee26c..d59730975f30 100644 --- a/arch/arm64/include/uapi/asm/bitsperlong.h +++ b/arch/arm64/include/uapi/asm/bitsperlong.h @@ -17,7 +17,12 @@  #ifndef __ASM_BITSPERLONG_H  #define __ASM_BITSPERLONG_H +#if defined(__KERNEL__) && !defined(__aarch64__) +/* Used by the compat vDSO */ +#define __BITS_PER_LONG 32 +#else  #define __BITS_PER_LONG 64 +#endif  #include <asm-generic/bitsperlong.h> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 5a890714ee2e..5adad37ab4fa 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -258,10 +258,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)  	return ftrace_modify_code(pc, 0, new, false);  } -static struct plt_entry *get_ftrace_plt(struct module *mod) +static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)  {  #ifdef CONFIG_MODULES -	struct plt_entry *plt = mod->arch.ftrace_trampolines; +	struct plt_entry *plt = NULL; + +	if (within_module_mem_type(addr, mod, MOD_INIT_TEXT)) +		plt = mod->arch.init_ftrace_trampolines; +	else if (within_module_mem_type(addr, mod, MOD_TEXT)) +		plt = mod->arch.ftrace_trampolines; +	else +		return NULL;  	return &plt[FTRACE_PLT_IDX];  #else @@ -332,7 +339,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,  	if (WARN_ON(!mod))  		return false; -	plt = get_ftrace_plt(mod); +	plt = get_ftrace_plt(mod, pc);  	if (!plt) {  		pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);  		return false; diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index af1ca875c52c..410060ebd86d 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -94,7 +94,7 @@ int load_other_segments(struct kimage *image,  			char *initrd, unsigned long initrd_len,  			char *cmdline)  { -	struct kexec_buf kbuf; +	struct kexec_buf kbuf = {};  	void *dtb = NULL;  	unsigned long initrd_load_addr = 0, dtb_len,  		      orig_segments = image->nr_segments; diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index bde32979c06a..7afd370da9f4 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -283,7 +283,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  	unsigned long core_plts = 0;  	unsigned long init_plts = 0;  	Elf64_Sym *syms = NULL; -	Elf_Shdr *pltsec, *tramp = NULL; +	Elf_Shdr *pltsec, *tramp = NULL, *init_tramp = NULL;  	int i;  	/* @@ -298,6 +298,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  		else if (!strcmp(secstrings + sechdrs[i].sh_name,  				 ".text.ftrace_trampoline"))  			tramp = sechdrs + i; +		else if (!strcmp(secstrings + sechdrs[i].sh_name, +				 ".init.text.ftrace_trampoline")) +			init_tramp = sechdrs + i;  		else if (sechdrs[i].sh_type == SHT_SYMTAB)  			syms = (Elf64_Sym *)sechdrs[i].sh_addr;  	} @@ -363,5 +366,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  		tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);  	} +	if (init_tramp) { +		init_tramp->sh_type = SHT_NOBITS; +		init_tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; +		init_tramp->sh_addralign = __alignof__(struct plt_entry); +		init_tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); +	} +  	return 0;  } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 40148d2725ce..d6d443c4a01a 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -466,6 +466,17 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,  	__init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);  	mod->arch.ftrace_trampolines = plts; + +	s = find_section(hdr, sechdrs, ".init.text.ftrace_trampoline"); +	if (!s) +		return -ENOEXEC; + +	plts = (void *)s->sh_addr; + +	__init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR); + +	mod->arch.init_ftrace_trampolines = plts; +  #endif  	return 0;  } diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a4b233a0659e..51dcd8eaa243 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -65,7 +65,7 @@ config RISCV  	select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE  	select ARCH_SUPPORTS_HUGETLBFS if MMU  	# LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 -	select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 +	select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 && CMODEL_MEDANY  	select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000  	select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS if 64BIT && MMU  	select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index a8a2af6dfe9d..2a16e88e13de 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -91,7 +91,7 @@  #endif  .macro asm_per_cpu dst sym tmp -	REG_L \tmp, TASK_TI_CPU_NUM(tp) +	lw    \tmp, TASK_TI_CPU_NUM(tp)  	slli  \tmp, \tmp, PER_CPU_OFFSET_SHIFT  	la    \dst, __per_cpu_offset  	add   \dst, \dst, \tmp diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index b88a6218b7f2..f5f4f7f85543 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -209,7 +209,7 @@ do {									\  		err = 0;						\  		break;							\  __gu_failed:								\ -		x = 0;							\ +		x = (__typeof__(x))0;					\  		err = -EFAULT;						\  } while (0) @@ -311,7 +311,7 @@ do {								\  do {								\  	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&	\  	    !IS_ALIGNED((uintptr_t)__gu_ptr, sizeof(*__gu_ptr))) {	\ -		__inttype(x) ___val = (__inttype(x))x;			\ +		__typeof__(*(__gu_ptr)) ___val = (x);		\  		if (__asm_copy_to_user_sum_enabled(__gu_ptr, &(___val), sizeof(*__gu_ptr))) \  			goto label;				\  		break;						\ @@ -438,10 +438,10 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)  }  #define __get_kernel_nofault(dst, src, type, err_label)			\ -	__get_user_nocheck(*((type *)(dst)), (type *)(src), err_label) +	__get_user_nocheck(*((type *)(dst)), (__force __user type *)(src), err_label)  #define __put_kernel_nofault(dst, src, type, err_label)			\ -	__put_user_nocheck(*((type *)(src)), (type *)(dst), err_label) +	__put_user_nocheck(*((type *)(src)), (__force __user type *)(dst), err_label)  static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)  { diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 3a0ec6fd5956..d0ded2438533 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -46,7 +46,7 @@  	 * a0 = &new_vmalloc[BIT_WORD(cpu)]  	 * a1 = BIT_MASK(cpu)  	 */ -	REG_L 	a2, TASK_TI_CPU(tp) +	lw	a2, TASK_TI_CPU(tp)  	/*  	 * Compute the new_vmalloc element position:  	 * (cpu / 64) * 8 = (cpu >> 6) << 3 diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c index 56444c7bd34e..531d348db84d 100644 --- a/arch/riscv/kernel/kexec_elf.c +++ b/arch/riscv/kernel/kexec_elf.c @@ -28,7 +28,7 @@ static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,  	int i;  	int ret = 0;  	size_t size; -	struct kexec_buf kbuf; +	struct kexec_buf kbuf = {};  	const struct elf_phdr *phdr;  	kbuf.image = image; @@ -66,7 +66,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,  {  	int i;  	int ret; -	struct kexec_buf kbuf; +	struct kexec_buf kbuf = {};  	const struct elf_phdr *phdr;  	unsigned long lowest_paddr = ULONG_MAX;  	unsigned long lowest_vaddr = ULONG_MAX; diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c index 26a81774a78a..8f2eb900910b 100644 --- a/arch/riscv/kernel/kexec_image.c +++ b/arch/riscv/kernel/kexec_image.c @@ -41,7 +41,7 @@ static void *image_load(struct kimage *image,  	struct riscv_image_header *h;  	u64 flags;  	bool be_image, be_kernel; -	struct kexec_buf kbuf; +	struct kexec_buf kbuf = {};  	int ret;  	/* Check Image header */ diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index e36104af2e24..b9eb41b0a975 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -261,7 +261,7 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start,  	int ret;  	void *fdt;  	unsigned long initrd_pbase = 0UL; -	struct kexec_buf kbuf; +	struct kexec_buf kbuf = {};  	char *modified_cmdline = NULL;  	kbuf.image = image; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 10e01ff06312..9883a55d61b5 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1356,7 +1356,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,  				emit_mv(rd, rs, ctx);  #ifdef CONFIG_SMP  			/* Load current CPU number in T1 */ -			emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu), +			emit_lw(RV_REG_T1, offsetof(struct thread_info, cpu),  				RV_REG_TP, ctx);  			/* Load address of __per_cpu_offset array in T2 */  			emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); @@ -1763,7 +1763,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,  		 */  		if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) {  			/* Load current CPU number in R0 */ -			emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), +			emit_lw(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu),  				RV_REG_TP, ctx);  			break;  		} diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c index 4d364de43799..143e34a4eca5 100644 --- a/arch/s390/kernel/kexec_elf.c +++ b/arch/s390/kernel/kexec_elf.c @@ -16,7 +16,7 @@  static int kexec_file_add_kernel_elf(struct kimage *image,  				     struct s390_load_data *data)  { -	struct kexec_buf buf; +	struct kexec_buf buf = {};  	const Elf_Ehdr *ehdr;  	const Elf_Phdr *phdr;  	Elf_Addr entry; diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c index a32ce8bea745..9a439175723c 100644 --- a/arch/s390/kernel/kexec_image.c +++ b/arch/s390/kernel/kexec_image.c @@ -16,7 +16,7 @@  static int kexec_file_add_kernel_image(struct kimage *image,  				       struct s390_load_data *data)  { -	struct kexec_buf buf; +	struct kexec_buf buf = {};  	buf.image = image; diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index c2bac14dd668..a36d7311c668 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -129,7 +129,7 @@ static int kexec_file_update_purgatory(struct kimage *image,  static int kexec_file_add_purgatory(struct kimage *image,  				    struct s390_load_data *data)  { -	struct kexec_buf buf; +	struct kexec_buf buf = {};  	int ret;  	buf.image = image; @@ -152,7 +152,7 @@ static int kexec_file_add_purgatory(struct kimage *image,  static int kexec_file_add_initrd(struct kimage *image,  				 struct s390_load_data *data)  { -	struct kexec_buf buf; +	struct kexec_buf buf = {};  	int ret;  	buf.image = image; @@ -184,7 +184,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,  {  	__u32 *lc_ipl_parmblock_ptr;  	unsigned int len, ncerts; -	struct kexec_buf buf; +	struct kexec_buf buf = {};  	unsigned long addr;  	void *ptr, *end;  	int ret; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 4d09954ebf49..04457d88e589 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -760,8 +760,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)  		break;  	case PERF_TYPE_HARDWARE: -		if (is_sampling_event(event))	/* No sampling support */ -			return -ENOENT;  		ev = attr->config;  		if (!attr->exclude_user && attr->exclude_kernel) {  			/* @@ -859,6 +857,8 @@ static int cpumf_pmu_event_init(struct perf_event *event)  	unsigned int type = event->attr.type;  	int err = -ENOENT; +	if (is_sampling_event(event))	/* No sampling support */ +		return err;  	if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)  		err = __hw_perf_event_init(event, type);  	else if (event->pmu->type == type) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index f373a1009c45..9455f213dc20 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -285,10 +285,10 @@ static int paicrypt_event_init(struct perf_event *event)  	/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */  	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)  		return -ENOENT; -	/* PAI crypto event must be in valid range */ +	/* PAI crypto event must be in valid range, try others if not */  	if (a->config < PAI_CRYPTO_BASE ||  	    a->config > PAI_CRYPTO_BASE + paicrypt_cnt) -		return -EINVAL; +		return -ENOENT;  	/* Allow only CRYPTO_ALL for sampling */  	if (a->sample_period && a->config != PAI_CRYPTO_BASE)  		return -EINVAL; diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index d827473e7f87..7b32935273ce 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -265,7 +265,7 @@ static int paiext_event_valid(struct perf_event *event)  		event->hw.config_base = offsetof(struct paiext_cb, acc);  		return 0;  	} -	return -EINVAL; +	return -ENOENT;  }  /* Might be called on different CPU than the one the event is intended for. */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 60688be4e876..50eb57c976bc 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -335,7 +335,6 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,  	int nodat;  	struct mm_struct *mm = vma->vm_mm; -	preempt_disable();  	pgste = ptep_xchg_start(mm, addr, ptep);  	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);  	old = ptep_flush_lazy(mm, addr, ptep, nodat); @@ -360,7 +359,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,  	} else {  		set_pte(ptep, pte);  	} -	preempt_enable();  }  static inline void pmdp_idte_local(struct mm_struct *mm, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 58d890fe2100..52c8910ba2ef 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2701,6 +2701,15 @@ config MITIGATION_TSA  	  security vulnerability on AMD CPUs which can lead to forwarding of  	  invalid info to subsequent instructions and thus can affect their  	  timing and thereby cause a leakage. + +config MITIGATION_VMSCAPE +	bool "Mitigate VMSCAPE" +	depends on KVM +	default y +	help +	  Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security +	  vulnerability on Intel and AMD CPUs that may allow a guest to do +	  Spectre v2 style attacks on userspace hypervisor.  endif  config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 06fc0479a23f..751ca35386b0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -495,6 +495,7 @@  #define X86_FEATURE_TSA_SQ_NO		(21*32+11) /* AMD CPU not vulnerable to TSA-SQ */  #define X86_FEATURE_TSA_L1_NO		(21*32+12) /* AMD CPU not vulnerable to TSA-L1 */  #define X86_FEATURE_CLEAR_CPU_BUF_VM	(21*32+13) /* Clear CPU buffers using VERW before VMRUN */ +#define X86_FEATURE_IBPB_EXIT_TO_USER	(21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */  /*   * BUG word(s) @@ -551,4 +552,5 @@  #define X86_BUG_ITS			X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */  #define X86_BUG_ITS_NATIVE_ONLY		X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */  #define X86_BUG_TSA			X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ +#define X86_BUG_VMSCAPE			X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */  #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index d535a97c7284..ce3eb6d5fdf9 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -93,6 +93,13 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,  	 * 8 (ia32) bits.  	 */  	choose_random_kstack_offset(rdtsc()); + +	/* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */ +	if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) && +	    this_cpu_read(x86_ibpb_exit_to_user)) { +		indirect_branch_prediction_barrier(); +		this_cpu_write(x86_ibpb_exit_to_user, false); +	}  }  #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 10f261678749..e29f82466f43 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -530,6 +530,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)  		: "memory");  } +DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user); +  static inline void indirect_branch_prediction_barrier(void)  {  	asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index af838b8d845c..36dcfc5105be 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -96,6 +96,9 @@ static void __init its_update_mitigation(void);  static void __init its_apply_mitigation(void);  static void __init tsa_select_mitigation(void);  static void __init tsa_apply_mitigation(void); +static void __init vmscape_select_mitigation(void); +static void __init vmscape_update_mitigation(void); +static void __init vmscape_apply_mitigation(void);  /* The base value of the SPEC_CTRL MSR without task-specific bits set */  u64 x86_spec_ctrl_base; @@ -105,6 +108,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);  DEFINE_PER_CPU(u64, x86_spec_ctrl_current);  EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); +/* + * Set when the CPU has run a potentially malicious guest. An IBPB will + * be needed to before running userspace. That IBPB will flush the branch + * predictor content. + */ +DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user); +EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user); +  u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;  static u64 __ro_after_init x86_arch_cap_msr; @@ -262,6 +273,7 @@ void __init cpu_select_mitigations(void)  	its_select_mitigation();  	bhi_select_mitigation();  	tsa_select_mitigation(); +	vmscape_select_mitigation();  	/*  	 * After mitigations are selected, some may need to update their @@ -293,6 +305,7 @@ void __init cpu_select_mitigations(void)  	bhi_update_mitigation();  	/* srso_update_mitigation() depends on retbleed_update_mitigation(). */  	srso_update_mitigation(); +	vmscape_update_mitigation();  	spectre_v1_apply_mitigation();  	spectre_v2_apply_mitigation(); @@ -310,6 +323,7 @@ void __init cpu_select_mitigations(void)  	its_apply_mitigation();  	bhi_apply_mitigation();  	tsa_apply_mitigation(); +	vmscape_apply_mitigation();  }  /* @@ -2538,88 +2552,6 @@ static void update_mds_branch_idle(void)  	}  } -#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" -#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" -#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" - -void cpu_bugs_smt_update(void) -{ -	mutex_lock(&spec_ctrl_mutex); - -	if (sched_smt_active() && unprivileged_ebpf_enabled() && -	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) -		pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); - -	switch (spectre_v2_user_stibp) { -	case SPECTRE_V2_USER_NONE: -		break; -	case SPECTRE_V2_USER_STRICT: -	case SPECTRE_V2_USER_STRICT_PREFERRED: -		update_stibp_strict(); -		break; -	case SPECTRE_V2_USER_PRCTL: -	case SPECTRE_V2_USER_SECCOMP: -		update_indir_branch_cond(); -		break; -	} - -	switch (mds_mitigation) { -	case MDS_MITIGATION_FULL: -	case MDS_MITIGATION_AUTO: -	case MDS_MITIGATION_VMWERV: -		if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) -			pr_warn_once(MDS_MSG_SMT); -		update_mds_branch_idle(); -		break; -	case MDS_MITIGATION_OFF: -		break; -	} - -	switch (taa_mitigation) { -	case TAA_MITIGATION_VERW: -	case TAA_MITIGATION_AUTO: -	case TAA_MITIGATION_UCODE_NEEDED: -		if (sched_smt_active()) -			pr_warn_once(TAA_MSG_SMT); -		break; -	case TAA_MITIGATION_TSX_DISABLED: -	case TAA_MITIGATION_OFF: -		break; -	} - -	switch (mmio_mitigation) { -	case MMIO_MITIGATION_VERW: -	case MMIO_MITIGATION_AUTO: -	case MMIO_MITIGATION_UCODE_NEEDED: -		if (sched_smt_active()) -			pr_warn_once(MMIO_MSG_SMT); -		break; -	case MMIO_MITIGATION_OFF: -		break; -	} - -	switch (tsa_mitigation) { -	case TSA_MITIGATION_USER_KERNEL: -	case TSA_MITIGATION_VM: -	case TSA_MITIGATION_AUTO: -	case TSA_MITIGATION_FULL: -		/* -		 * TSA-SQ can potentially lead to info leakage between -		 * SMT threads. -		 */ -		if (sched_smt_active()) -			static_branch_enable(&cpu_buf_idle_clear); -		else -			static_branch_disable(&cpu_buf_idle_clear); -		break; -	case TSA_MITIGATION_NONE: -	case TSA_MITIGATION_UCODE_NEEDED: -		break; -	} - -	mutex_unlock(&spec_ctrl_mutex); -} -  #undef pr_fmt  #define pr_fmt(fmt)	"Speculative Store Bypass: " fmt @@ -3331,8 +3263,184 @@ static void __init srso_apply_mitigation(void)  }  #undef pr_fmt +#define pr_fmt(fmt)	"VMSCAPE: " fmt + +enum vmscape_mitigations { +	VMSCAPE_MITIGATION_NONE, +	VMSCAPE_MITIGATION_AUTO, +	VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER, +	VMSCAPE_MITIGATION_IBPB_ON_VMEXIT, +}; + +static const char * const vmscape_strings[] = { +	[VMSCAPE_MITIGATION_NONE]		= "Vulnerable", +	/* [VMSCAPE_MITIGATION_AUTO] */ +	[VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER]	= "Mitigation: IBPB before exit to userspace", +	[VMSCAPE_MITIGATION_IBPB_ON_VMEXIT]	= "Mitigation: IBPB on VMEXIT", +}; + +static enum vmscape_mitigations vmscape_mitigation __ro_after_init = +	IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE; + +static int __init vmscape_parse_cmdline(char *str) +{ +	if (!str) +		return -EINVAL; + +	if (!strcmp(str, "off")) { +		vmscape_mitigation = VMSCAPE_MITIGATION_NONE; +	} else if (!strcmp(str, "ibpb")) { +		vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; +	} else if (!strcmp(str, "force")) { +		setup_force_cpu_bug(X86_BUG_VMSCAPE); +		vmscape_mitigation = VMSCAPE_MITIGATION_AUTO; +	} else { +		pr_err("Ignoring unknown vmscape=%s option.\n", str); +	} + +	return 0; +} +early_param("vmscape", vmscape_parse_cmdline); + +static void __init vmscape_select_mitigation(void) +{ +	if (cpu_mitigations_off() || +	    !boot_cpu_has_bug(X86_BUG_VMSCAPE) || +	    !boot_cpu_has(X86_FEATURE_IBPB)) { +		vmscape_mitigation = VMSCAPE_MITIGATION_NONE; +		return; +	} + +	if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO) +		vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER; +} + +static void __init vmscape_update_mitigation(void) +{ +	if (!boot_cpu_has_bug(X86_BUG_VMSCAPE)) +		return; + +	if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB || +	    srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT) +		vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT; + +	pr_info("%s\n", vmscape_strings[vmscape_mitigation]); +} + +static void __init vmscape_apply_mitigation(void) +{ +	if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER) +		setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER); +} + +#undef pr_fmt  #define pr_fmt(fmt) fmt +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" +#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" +#define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n" + +void cpu_bugs_smt_update(void) +{ +	mutex_lock(&spec_ctrl_mutex); + +	if (sched_smt_active() && unprivileged_ebpf_enabled() && +	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) +		pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + +	switch (spectre_v2_user_stibp) { +	case SPECTRE_V2_USER_NONE: +		break; +	case SPECTRE_V2_USER_STRICT: +	case SPECTRE_V2_USER_STRICT_PREFERRED: +		update_stibp_strict(); +		break; +	case SPECTRE_V2_USER_PRCTL: +	case SPECTRE_V2_USER_SECCOMP: +		update_indir_branch_cond(); +		break; +	} + +	switch (mds_mitigation) { +	case MDS_MITIGATION_FULL: +	case MDS_MITIGATION_AUTO: +	case MDS_MITIGATION_VMWERV: +		if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) +			pr_warn_once(MDS_MSG_SMT); +		update_mds_branch_idle(); +		break; +	case MDS_MITIGATION_OFF: +		break; +	} + +	switch (taa_mitigation) { +	case TAA_MITIGATION_VERW: +	case TAA_MITIGATION_AUTO: +	case TAA_MITIGATION_UCODE_NEEDED: +		if (sched_smt_active()) +			pr_warn_once(TAA_MSG_SMT); +		break; +	case TAA_MITIGATION_TSX_DISABLED: +	case TAA_MITIGATION_OFF: +		break; +	} + +	switch (mmio_mitigation) { +	case MMIO_MITIGATION_VERW: +	case MMIO_MITIGATION_AUTO: +	case MMIO_MITIGATION_UCODE_NEEDED: +		if (sched_smt_active()) +			pr_warn_once(MMIO_MSG_SMT); +		break; +	case MMIO_MITIGATION_OFF: +		break; +	} + +	switch (tsa_mitigation) { +	case TSA_MITIGATION_USER_KERNEL: +	case TSA_MITIGATION_VM: +	case TSA_MITIGATION_AUTO: +	case TSA_MITIGATION_FULL: +		/* +		 * TSA-SQ can potentially lead to info leakage between +		 * SMT threads. +		 */ +		if (sched_smt_active()) +			static_branch_enable(&cpu_buf_idle_clear); +		else +			static_branch_disable(&cpu_buf_idle_clear); +		break; +	case TSA_MITIGATION_NONE: +	case TSA_MITIGATION_UCODE_NEEDED: +		break; +	} + +	switch (vmscape_mitigation) { +	case VMSCAPE_MITIGATION_NONE: +	case VMSCAPE_MITIGATION_AUTO: +		break; +	case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT: +	case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER: +		/* +		 * Hypervisors can be attacked across-threads, warn for SMT when +		 * STIBP is not already enabled system-wide. +		 * +		 * Intel eIBRS (!AUTOIBRS) implies STIBP on. +		 */ +		if (!sched_smt_active() || +		    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || +		    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || +		    (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && +		     !boot_cpu_has(X86_FEATURE_AUTOIBRS))) +			break; +		pr_warn_once(VMSCAPE_MSG_SMT); +		break; +	} + +	mutex_unlock(&spec_ctrl_mutex); +} +  #ifdef CONFIG_SYSFS  #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" @@ -3578,6 +3686,11 @@ static ssize_t tsa_show_state(char *buf)  	return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);  } +static ssize_t vmscape_show_state(char *buf) +{ +	return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]); +} +  static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,  			       char *buf, unsigned int bug)  { @@ -3644,6 +3757,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr  	case X86_BUG_TSA:  		return tsa_show_state(buf); +	case X86_BUG_VMSCAPE: +		return vmscape_show_state(buf); +  	default:  		break;  	} @@ -3735,6 +3851,11 @@ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *bu  {  	return cpu_show_common(dev, attr, buf, X86_BUG_TSA);  } + +ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf) +{ +	return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE); +}  #endif  void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 34a054181c4d..f98ec9c7fc07 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1236,55 +1236,71 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {  #define ITS_NATIVE_ONLY	BIT(9)  /* CPU is affected by Transient Scheduler Attacks */  #define TSA		BIT(10) +/* CPU is affected by VMSCAPE */ +#define VMSCAPE		BIT(11)  static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { -	VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE,	     X86_STEP_MAX,	SRBDS), -	VULNBL_INTEL_STEPS(INTEL_HASWELL,	     X86_STEP_MAX,	SRBDS), -	VULNBL_INTEL_STEPS(INTEL_HASWELL_L,	     X86_STEP_MAX,	SRBDS), -	VULNBL_INTEL_STEPS(INTEL_HASWELL_G,	     X86_STEP_MAX,	SRBDS), -	VULNBL_INTEL_STEPS(INTEL_HASWELL_X,	     X86_STEP_MAX,	MMIO), -	VULNBL_INTEL_STEPS(INTEL_BROADWELL_D,	     X86_STEP_MAX,	MMIO), -	VULNBL_INTEL_STEPS(INTEL_BROADWELL_G,	     X86_STEP_MAX,	SRBDS), -	VULNBL_INTEL_STEPS(INTEL_BROADWELL_X,	     X86_STEP_MAX,	MMIO), -	VULNBL_INTEL_STEPS(INTEL_BROADWELL,	     X86_STEP_MAX,	SRBDS), -	VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X,		      0x5,	MMIO | RETBLEED | GDS), -	VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | ITS), -	VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | SRBDS), -	VULNBL_INTEL_STEPS(INTEL_SKYLAKE,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | SRBDS), -	VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L,		      0xb,	MMIO | RETBLEED | GDS | SRBDS), -	VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | SRBDS | ITS), -	VULNBL_INTEL_STEPS(INTEL_KABYLAKE,		      0xc,	MMIO | RETBLEED | GDS | SRBDS), -	VULNBL_INTEL_STEPS(INTEL_KABYLAKE,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | SRBDS | ITS), -	VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L,	     X86_STEP_MAX,	RETBLEED), +	VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE,	     X86_STEP_MAX,	SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_HASWELL,	     X86_STEP_MAX,	SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_HASWELL_L,	     X86_STEP_MAX,	SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_HASWELL_G,	     X86_STEP_MAX,	SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_HASWELL_X,	     X86_STEP_MAX,	MMIO | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_BROADWELL_D,	     X86_STEP_MAX,	MMIO | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_BROADWELL_X,	     X86_STEP_MAX,	MMIO | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_BROADWELL_G,	     X86_STEP_MAX,	SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_BROADWELL,	     X86_STEP_MAX,	SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X,		      0x5,	MMIO | RETBLEED | GDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | ITS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_SKYLAKE,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L,		      0xb,	MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_KABYLAKE,		      0xc,	MMIO | RETBLEED | GDS | SRBDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_KABYLAKE,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L,	     X86_STEP_MAX,	RETBLEED | VMSCAPE),  	VULNBL_INTEL_STEPS(INTEL_ICELAKE_L,	     X86_STEP_MAX,	MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),  	VULNBL_INTEL_STEPS(INTEL_ICELAKE_D,	     X86_STEP_MAX,	MMIO | GDS | ITS | ITS_NATIVE_ONLY),  	VULNBL_INTEL_STEPS(INTEL_ICELAKE_X,	     X86_STEP_MAX,	MMIO | GDS | ITS | ITS_NATIVE_ONLY), -	VULNBL_INTEL_STEPS(INTEL_COMETLAKE,	     X86_STEP_MAX,	MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), -	VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L,		      0x0,	MMIO | RETBLEED | ITS), -	VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L,	     X86_STEP_MAX,	MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), +	VULNBL_INTEL_STEPS(INTEL_COMETLAKE,	     X86_STEP_MAX,	MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L,		      0x0,	MMIO | RETBLEED | ITS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L,	     X86_STEP_MAX,	MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),  	VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L,	     X86_STEP_MAX,	GDS | ITS | ITS_NATIVE_ONLY),  	VULNBL_INTEL_STEPS(INTEL_TIGERLAKE,	     X86_STEP_MAX,	GDS | ITS | ITS_NATIVE_ONLY),  	VULNBL_INTEL_STEPS(INTEL_LAKEFIELD,	     X86_STEP_MAX,	MMIO | MMIO_SBDS | RETBLEED),  	VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE,	     X86_STEP_MAX,	MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), -	VULNBL_INTEL_TYPE(INTEL_ALDERLAKE,		     ATOM,	RFDS), -	VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L,	     X86_STEP_MAX,	RFDS), -	VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE,		     ATOM,	RFDS), -	VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P,	     X86_STEP_MAX,	RFDS), -	VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S,	     X86_STEP_MAX,	RFDS), -	VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT,     X86_STEP_MAX,	RFDS), +	VULNBL_INTEL_TYPE(INTEL_ALDERLAKE,		     ATOM,	RFDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_ALDERLAKE,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L,	     X86_STEP_MAX,	RFDS | VMSCAPE), +	VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE,		     ATOM,	RFDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P,	     X86_STEP_MAX,	RFDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S,	     X86_STEP_MAX,	RFDS | VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_ARROWLAKE,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M,	     X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X,   X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X,    X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X,    X86_STEP_MAX,	VMSCAPE), +	VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT,     X86_STEP_MAX,	RFDS | VMSCAPE),  	VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT,	     X86_STEP_MAX,	MMIO | MMIO_SBDS | RFDS),  	VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D,     X86_STEP_MAX,	MMIO | RFDS),  	VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L,     X86_STEP_MAX,	MMIO | MMIO_SBDS | RFDS),  	VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT,      X86_STEP_MAX,	RFDS),  	VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D,    X86_STEP_MAX,	RFDS),  	VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX,	RFDS), +	VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X,   X86_STEP_MAX,	VMSCAPE),  	VULNBL_AMD(0x15, RETBLEED),  	VULNBL_AMD(0x16, RETBLEED), -	VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), -	VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), -	VULNBL_AMD(0x19, SRSO | TSA), -	VULNBL_AMD(0x1a, SRSO), +	VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE), +	VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE), +	VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE), +	VULNBL_AMD(0x1a, SRSO | VMSCAPE),  	{}  }; @@ -1543,6 +1559,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)  		}  	} +	/* +	 * Set the bug only on bare-metal. A nested hypervisor should already be +	 * deploying IBPB to isolate itself from nested guests. +	 */ +	if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) && +	    !boot_cpu_has(X86_FEATURE_HYPERVISOR)) +		setup_force_cpu_bug(X86_BUG_VMSCAPE); +  	if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))  		return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 604490b1cb19..706b6fd56d3c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11011,6 +11011,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)  		wrmsrq(MSR_IA32_XFD_ERR, 0);  	/* +	 * Mark this CPU as needing a branch predictor flush before running +	 * userspace. Must be done before enabling preemption to ensure it gets +	 * set for the CPU that actually ran the guest, and not the CPU that it +	 * may migrate to. +	 */ +	if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER)) +		this_cpu_write(x86_ibpb_exit_to_user, true); + +	/*  	 * Consume any pending interrupts, including the possible source of  	 * VM-Exit on SVM and any ticks that occur between VM-Exit and now.  	 * An instruction is required after local_irq_enable() to fully unblock | 
