summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2024-10-14 10:05:48 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-05-18 08:24:11 +0200
commit3b2234cd50a9b4ed664324054901b6f763890104 (patch)
tree56da30cd0f98d209bd7471ac6758c60e28658a56
parent6699bf27a47156baafde7e3f4a732aab2ad9f8cb (diff)
x86/its: Use dynamic thunks for indirect branches
commit 872df34d7c51a79523820ea6a14860398c639b87 upstream. ITS mitigation moves the unsafe indirect branches to a safe thunk. This could degrade the prediction accuracy as the source address of indirect branches becomes same for different execution paths. To improve the predictions, and hence the performance, assign a separate thunk for each indirect callsite. This is also a defense-in-depth measure to avoid indirect branches aliasing with each other. As an example, 5000 dynamic thunks would utilize around 16 bits of the address space, thereby gaining entropy. For a BTB that uses 32 bits for indexing, dynamic thunks could provide better prediction accuracy over fixed thunks. Have ITS thunks be variable sized and use EXECMEM_MODULE_TEXT such that they are both more flexible (got to extend them later) and live in 2M TLBs, just like kernel code, avoiding undue TLB pressure. [ pawan: CONFIG_EXECMEM and CONFIG_EXECMEM_ROX are not supported on backport kernel, made changes to use module_alloc() and set_memory_*() for dynamic thunks. ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--arch/x86/include/asm/alternative.h10
-rw-r--r--arch/x86/kernel/alternative.c132
-rw-r--r--arch/x86/kernel/module.c7
-rw-r--r--include/linux/module.h5
4 files changed, 151 insertions, 3 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7e879b521726..7a27a9e1df59 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -130,6 +130,16 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
}
#endif
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_init_mod(struct module *mod);
+extern void its_fini_mod(struct module *mod);
+extern void its_free_mod(struct module *mod);
+#else /* CONFIG_MITIGATION_ITS */
+static inline void its_init_mod(struct module *mod) { }
+static inline void its_fini_mod(struct module *mod) { }
+static inline void its_free_mod(struct module *mod) { }
+#endif
+
#if defined(CONFIG_RETHUNK) && defined(CONFIG_OBJTOOL)
extern bool cpu_wants_rethunk(void);
extern bool cpu_wants_rethunk_at(void *addr);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a179cfff3903..6085919d3b3e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -18,6 +18,8 @@
#include <linux/mmu_context.h>
#include <linux/bsearch.h>
#include <linux/sync_core.h>
+#include <linux/moduleloader.h>
+#include <linux/cleanup.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -31,6 +33,7 @@
#include <asm/paravirt.h>
#include <asm/asm-prototypes.h>
#include <asm/cfi.h>
+#include <asm/set_memory.h>
int __read_mostly alternatives_patched;
@@ -124,6 +127,125 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
#endif
};
+#ifdef CONFIG_MITIGATION_ITS
+
+static struct module *its_mod;
+static void *its_page;
+static unsigned int its_offset;
+
+/* Initialize a thunk with the "jmp *reg; int3" instructions. */
+static void *its_init_thunk(void *thunk, int reg)
+{
+ u8 *bytes = thunk;
+ int i = 0;
+
+ if (reg >= 8) {
+ bytes[i++] = 0x41; /* REX.B prefix */
+ reg -= 8;
+ }
+ bytes[i++] = 0xff;
+ bytes[i++] = 0xe0 + reg; /* jmp *reg */
+ bytes[i++] = 0xcc;
+
+ return thunk;
+}
+
+void its_init_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ mutex_lock(&text_mutex);
+ its_mod = mod;
+ its_page = NULL;
+}
+
+void its_fini_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ WARN_ON_ONCE(its_mod != mod);
+
+ its_mod = NULL;
+ its_page = NULL;
+ mutex_unlock(&text_mutex);
+
+ for (int i = 0; i < mod->its_num_pages; i++) {
+ void *page = mod->its_page_array[i];
+ set_memory_rox((unsigned long)page, 1);
+ }
+}
+
+void its_free_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ for (int i = 0; i < mod->its_num_pages; i++) {
+ void *page = mod->its_page_array[i];
+ module_memfree(page);
+ }
+ kfree(mod->its_page_array);
+}
+
+DEFINE_FREE(its_execmem, void *, if (_T) module_memfree(_T));
+
+static void *its_alloc(void)
+{
+ void *page __free(its_execmem) = module_alloc(PAGE_SIZE);
+
+ if (!page)
+ return NULL;
+
+ if (its_mod) {
+ void *tmp = krealloc(its_mod->its_page_array,
+ (its_mod->its_num_pages+1) * sizeof(void *),
+ GFP_KERNEL);
+ if (!tmp)
+ return NULL;
+
+ its_mod->its_page_array = tmp;
+ its_mod->its_page_array[its_mod->its_num_pages++] = page;
+ }
+
+ return no_free_ptr(page);
+}
+
+static void *its_allocate_thunk(int reg)
+{
+ int size = 3 + (reg / 8);
+ void *thunk;
+
+ if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) {
+ its_page = its_alloc();
+ if (!its_page) {
+ pr_err("ITS page allocation failed\n");
+ return NULL;
+ }
+ memset(its_page, INT3_INSN_OPCODE, PAGE_SIZE);
+ its_offset = 32;
+ }
+
+ /*
+ * If the indirect branch instruction will be in the lower half
+ * of a cacheline, then update the offset to reach the upper half.
+ */
+ if ((its_offset + size - 1) % 64 < 32)
+ its_offset = ((its_offset - 1) | 0x3F) + 33;
+
+ thunk = its_page + its_offset;
+ its_offset += size;
+
+ set_memory_rw((unsigned long)its_page, 1);
+ thunk = its_init_thunk(thunk, reg);
+ set_memory_rox((unsigned long)its_page, 1);
+
+ return thunk;
+}
+
+#endif
+
/*
* Fill the buffer with a single effective instruction of size @len.
*
@@ -577,9 +699,13 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8
#ifdef CONFIG_MITIGATION_ITS
static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes)
{
- return __emit_trampoline(addr, insn, bytes,
- __x86_indirect_its_thunk_array[reg],
- __x86_indirect_its_thunk_array[reg]);
+ u8 *thunk = __x86_indirect_its_thunk_array[reg];
+ u8 *tmp = its_allocate_thunk(reg);
+
+ if (tmp)
+ thunk = tmp;
+
+ return __emit_trampoline(addr, insn, bytes, thunk, thunk);
}
/* Check if an indirect branch is at ITS-unsafe address */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 5f71a0cf4399..cfb7163cf90e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -312,6 +312,9 @@ int module_finalize(const Elf_Ehdr *hdr,
void *pseg = (void *)para->sh_addr;
apply_paravirt(pseg, pseg + para->sh_size);
}
+
+ its_init_mod(me);
+
if (retpolines || cfi) {
void *rseg = NULL, *cseg = NULL;
unsigned int rsize = 0, csize = 0;
@@ -332,6 +335,9 @@ int module_finalize(const Elf_Ehdr *hdr,
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
+
+ its_fini_mod(me);
+
if (returns) {
void *rseg = (void *)returns->sh_addr;
apply_returns(rseg, rseg + returns->sh_size);
@@ -379,4 +385,5 @@ int module_finalize(const Elf_Ehdr *hdr,
void module_arch_cleanup(struct module *mod)
{
alternatives_smp_module_del(mod);
+ its_free_mod(mod);
}
diff --git a/include/linux/module.h b/include/linux/module.h
index f2a8624eef1e..f58d1eb260fa 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -572,6 +572,11 @@ struct module {
atomic_t refcnt;
#endif
+#ifdef CONFIG_MITIGATION_ITS
+ int its_num_pages;
+ void **its_page_array;
+#endif
+
#ifdef CONFIG_CONSTRUCTORS
/* Constructor functions. */
ctor_fn_t *ctors;