From 91b7fbf3936f5c27d1673264dc24a713290e2165 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 22 Jul 2024 16:38:37 -0700 Subject: bpf, x86, riscv, arm: no_caller_saved_registers for bpf_get_smp_processor_id() The function bpf_get_smp_processor_id() is processed in a different way, depending on the arch: - on x86 verifier replaces call to bpf_get_smp_processor_id() with a sequence of instructions that modify only r0; - on riscv64 jit replaces call to bpf_get_smp_processor_id() with a sequence of instructions that modify only r0; - on arm64 jit replaces call to bpf_get_smp_processor_id() with a sequence of instructions that modify only r0 and tmp registers. These rewrites satisfy attribute no_caller_saved_registers contract. Allow rewrite of no_caller_saved_registers patterns for bpf_get_smp_processor_id() in order to use this function as a canary for no_caller_saved_registers tests. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240722233844.1406874-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko --- kernel/bpf/helpers.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b5f0adae8293..d02ae323996b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -158,6 +158,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .func = bpf_get_smp_processor_id, .gpl_only = false, .ret_type = RET_INTEGER, + .allow_nocsr = true, }; BPF_CALL_0(bpf_get_numa_node_id) -- cgit v1.2.3 From 67666479edf1e2b732f4d0ac797885e859a78de4 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 19 Aug 2024 18:28:04 +0200 Subject: bpf: Enable generic kfuncs for BPF_CGROUP_* programs These kfuncs are enabled even in BPF_PROG_TYPE_TRACING, so they should be safe also in BPF_CGROUP_* programs. Since all BPF_CGROUP_* programs share the same hook, call register_btf_kfunc_id_set() only once. In enum btf_kfunc_hook, rename BTF_KFUNC_HOOK_CGROUP_SKB to a more generic BTF_KFUNC_HOOK_CGROUP, since it's used for all the cgroup related program types. Signed-off-by: Matteo Croce Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240819162805.78235-2-technoboy85@gmail.com --- kernel/bpf/btf.c | 8 ++++++-- kernel/bpf/helpers.c | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index bfb0d89ccc8b..b12db397303e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -212,7 +212,7 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_FMODRET, - BTF_KFUNC_HOOK_CGROUP_SKB, + BTF_KFUNC_HOOK_CGROUP, BTF_KFUNC_HOOK_SCHED_ACT, BTF_KFUNC_HOOK_SK_SKB, BTF_KFUNC_HOOK_SOCKET_FILTER, @@ -8307,8 +8307,12 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - return BTF_KFUNC_HOOK_CGROUP_SKB; + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: + return BTF_KFUNC_HOOK_CGROUP; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; case BPF_PROG_TYPE_SK_SKB: diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index d02ae323996b..26b9649ab4ce 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3052,6 +3052,7 @@ static int __init kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &generic_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set); ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors, ARRAY_SIZE(generic_dtors), THIS_MODULE); -- cgit v1.2.3 From 7f6287417baf57754f47687c6ea1a749a0686ab0 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 19 Aug 2024 18:28:05 +0200 Subject: bpf: Allow bpf_current_task_under_cgroup() with BPF_CGROUP_* The helper bpf_current_task_under_cgroup() currently is only allowed for tracing programs, allow its usage also in the BPF_CGROUP_* program types. Move the code from kernel/trace/bpf_trace.c to kernel/bpf/helpers.c, so it compiles also without CONFIG_BPF_EVENTS. This will be used in systemd-networkd to monitor the sysctl writes, and filter it's own writes from others: https://github.com/systemd/systemd/pull/32212 Signed-off-by: Matteo Croce Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240819162805.78235-3-technoboy85@gmail.com --- include/linux/bpf.h | 1 + kernel/bpf/cgroup.c | 2 ++ kernel/bpf/helpers.c | 23 +++++++++++++++++++++++ kernel/trace/bpf_trace.c | 27 ++------------------------- 4 files changed, 28 insertions(+), 25 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b9425e410bcb..f0192c173ed8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3206,6 +3206,7 @@ extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto; +extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 8ba73042a239..e7113d700b87 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -2581,6 +2581,8 @@ cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; #endif + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; default: return NULL; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 26b9649ab4ce..12e3aa40b180 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2458,6 +2458,29 @@ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task, return ret; } +BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + + if (unlikely(idx >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[idx]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return task_under_cgroup_hierarchy(current, cgrp); +} + +const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { + .func = bpf_current_task_under_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + /** * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d557bb11e0ff..b69a39316c0c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -797,29 +797,6 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = { .ret_btf_id = &bpf_task_pt_regs_ids[0], }; -BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) -{ - struct bpf_array *array = container_of(map, struct bpf_array, map); - struct cgroup *cgrp; - - if (unlikely(idx >= array->map.max_entries)) - return -E2BIG; - - cgrp = READ_ONCE(array->ptrs[idx]); - if (unlikely(!cgrp)) - return -EAGAIN; - - return task_under_cgroup_hierarchy(current, cgrp); -} - -static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { - .func = bpf_current_task_under_cgroup, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_ANYTHING, -}; - struct send_signal_irq_work { struct irq_work irq_work; struct task_struct *task; @@ -1480,8 +1457,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; - case BPF_FUNC_current_task_under_cgroup: - return &bpf_current_task_under_cgroup_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_probe_write_user: @@ -1510,6 +1485,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; #endif case BPF_FUNC_send_signal: return &bpf_send_signal_proto; -- cgit v1.2.3 From 6d641ca50d7ec7d5e4e889c3f8ea22afebc2a403 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 11 Aug 2024 18:13:33 +0200 Subject: bpf: Fix percpu address space issues In arraymap.c: In bpf_array_map_seq_start() and bpf_array_map_seq_next() cast return values from the __percpu address space to the generic address space via uintptr_t [1]. Correct the declaration of pptr pointer in __bpf_array_map_seq_show() to void __percpu * and cast the value from the generic address space to the __percpu address space via uintptr_t [1]. In hashtab.c: Assign the return value from bpf_mem_cache_alloc() to void pointer and cast the value to void __percpu ** (void pointer to percpu void pointer) before dereferencing. In memalloc.c: Explicitly declare __percpu variables. Cast obj to void __percpu **. In helpers.c: Cast ptr in BPF_CALL_1 and BPF_CALL_2 from generic address space to __percpu address space via const uintptr_t [1]. Found by GCC's named address space checks. There were no changes in the resulting object files. [1] https://sparse.docs.kernel.org/en/latest/annotations.html#address-space-name Signed-off-by: Uros Bizjak Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: Eduard Zingerman Cc: Song Liu Cc: Yonghong Song Cc: John Fastabend Cc: KP Singh Cc: Stanislav Fomichev Cc: Hao Luo Cc: Jiri Olsa Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240811161414.56744-1-ubizjak@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 8 ++++---- kernel/bpf/hashtab.c | 9 +++++---- kernel/bpf/helpers.c | 4 ++-- kernel/bpf/memalloc.c | 12 ++++++------ 4 files changed, 17 insertions(+), 16 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 188e3c2effb2..a43e62e2a8bb 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -600,7 +600,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) array = container_of(map, struct bpf_array, map); index = info->index & array->index_mask; if (info->percpu_value_buf) - return array->pptrs[index]; + return (void *)(uintptr_t)array->pptrs[index]; return array_map_elem_ptr(array, index); } @@ -619,7 +619,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) array = container_of(map, struct bpf_array, map); index = info->index & array->index_mask; if (info->percpu_value_buf) - return array->pptrs[index]; + return (void *)(uintptr_t)array->pptrs[index]; return array_map_elem_ptr(array, index); } @@ -632,7 +632,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) struct bpf_iter_meta meta; struct bpf_prog *prog; int off = 0, cpu = 0; - void __percpu **pptr; + void __percpu *pptr; u32 size; meta.seq = seq; @@ -648,7 +648,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) if (!info->percpu_value_buf) { ctx.value = v; } else { - pptr = v; + pptr = (void __percpu *)(uintptr_t)v; size = array->elem_size; for_each_possible_cpu(cpu) { copy_map_value_long(map, info->percpu_value_buf + off, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index be1f64c20125..45c7195b65ba 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1049,14 +1049,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, pptr = htab_elem_get_ptr(l_new, key_size); } else { /* alloc_percpu zero-fills */ - pptr = bpf_mem_cache_alloc(&htab->pcpu_ma); - if (!pptr) { + void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma); + + if (!ptr) { bpf_mem_cache_free(&htab->ma, l_new); l_new = ERR_PTR(-ENOMEM); goto dec_count; } - l_new->ptr_to_pptr = pptr; - pptr = *(void **)pptr; + l_new->ptr_to_pptr = ptr; + pptr = *(void __percpu **)ptr; } pcpu_init_value(htab, pptr, value, onallcpus); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 12e3aa40b180..ccca6fe0367c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -715,7 +715,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) if (cpu >= nr_cpu_ids) return (unsigned long)NULL; - return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); + return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu); } const struct bpf_func_proto bpf_per_cpu_ptr_proto = { @@ -728,7 +728,7 @@ const struct bpf_func_proto bpf_per_cpu_ptr_proto = { BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) { - return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); + return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr); } const struct bpf_func_proto bpf_this_cpu_ptr_proto = { diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index dec892ded031..b3858a76e0b3 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head) static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags) { if (c->percpu_size) { - void **obj = kmalloc_node(c->percpu_size, flags, node); - void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags); + void __percpu **obj = kmalloc_node(c->percpu_size, flags, node); + void __percpu *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags); if (!obj || !pptr) { free_percpu(pptr); @@ -253,7 +253,7 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic) static void free_one(void *obj, bool percpu) { if (percpu) { - free_percpu(((void **)obj)[1]); + free_percpu(((void __percpu **)obj)[1]); kfree(obj); return; } @@ -509,8 +509,8 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { - struct bpf_mem_caches *cc, __percpu *pcc; - struct bpf_mem_cache *c, __percpu *pc; + struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc; + struct bpf_mem_cache *c; struct bpf_mem_cache __percpu *pc; struct obj_cgroup *objcg = NULL; int cpu, i, unit_size, percpu_size = 0; @@ -591,7 +591,7 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size) { - struct bpf_mem_caches *cc, __percpu *pcc; + struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc; int cpu, i, unit_size, percpu_size; struct obj_cgroup *objcg; struct bpf_mem_cache *c; -- cgit v1.2.3 From ae010757a55b57c8b82628e8ea9b7da2269131d9 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 22 Aug 2024 01:41:07 -0700 Subject: bpf: rename nocsr -> bpf_fastcall in verifier Attribute used by LLVM implementation of the feature had been changed from no_caller_saved_registers to bpf_fastcall (see [1]). This commit replaces references to nocsr by references to bpf_fastcall to keep LLVM and Kernel parts in sync. [1] https://github.com/llvm/llvm-project/pull/105417 Acked-by: Yonghong Song Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240822084112.3257995-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 +- include/linux/bpf_verifier.h | 18 +++--- kernel/bpf/helpers.c | 2 +- kernel/bpf/verifier.c | 143 +++++++++++++++++++++---------------------- 4 files changed, 84 insertions(+), 85 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f0192c173ed8..00dc4dd28cbd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -808,12 +808,12 @@ struct bpf_func_proto { bool gpl_only; bool pkt_access; bool might_sleep; - /* set to true if helper follows contract for gcc/llvm - * attribute no_caller_saved_registers: + /* set to true if helper follows contract for llvm + * attribute bpf_fastcall: * - void functions do not scratch r0 * - functions taking N arguments scratch only registers r1-rN */ - bool allow_nocsr; + bool allow_fastcall; enum bpf_return_type ret_type; union { struct { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5cea15c81b8a..634a302a39e3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -577,13 +577,13 @@ struct bpf_insn_aux_data { bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ /* true if STX or LDX instruction is a part of a spill/fill - * pattern for a no_caller_saved_registers call. + * pattern for a bpf_fastcall call. */ - u8 nocsr_pattern:1; + u8 fastcall_pattern:1; /* for CALL instructions, a number of spill/fill pairs in the - * no_caller_saved_registers pattern. + * bpf_fastcall pattern. */ - u8 nocsr_spills_num:3; + u8 fastcall_spills_num:3; /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ @@ -653,10 +653,10 @@ struct bpf_subprog_info { u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ u16 stack_extra; - /* offsets in range [stack_depth .. nocsr_stack_off) - * are used for no_caller_saved_registers spills and fills. + /* offsets in range [stack_depth .. fastcall_stack_off) + * are used for bpf_fastcall spills and fills. */ - s16 nocsr_stack_off; + s16 fastcall_stack_off; bool has_tail_call: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; @@ -664,8 +664,8 @@ struct bpf_subprog_info { bool is_async_cb: 1; bool is_exception_cb: 1; bool args_cached: 1; - /* true if nocsr stack region is used by functions that can't be inlined */ - bool keep_nocsr_stack: 1; + /* true if bpf_fastcall stack region is used by functions that can't be inlined */ + bool keep_fastcall_stack: 1; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ccca6fe0367c..4105b4af6e03 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -158,7 +158,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .func = bpf_get_smp_processor_id, .gpl_only = false, .ret_type = RET_INTEGER, - .allow_nocsr = true, + .allow_fastcall = true, }; BPF_CALL_0(bpf_get_numa_node_id) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 920d7c5fe944..0dfd91f36417 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4579,28 +4579,28 @@ static int get_reg_width(struct bpf_reg_state *reg) return fls64(reg->umax_value); } -/* See comment for mark_nocsr_pattern_for_call() */ -static void check_nocsr_stack_contract(struct bpf_verifier_env *env, struct bpf_func_state *state, - int insn_idx, int off) +/* See comment for mark_fastcall_pattern_for_call() */ +static void check_fastcall_stack_contract(struct bpf_verifier_env *env, + struct bpf_func_state *state, int insn_idx, int off) { struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno]; struct bpf_insn_aux_data *aux = env->insn_aux_data; int i; - if (subprog->nocsr_stack_off <= off || aux[insn_idx].nocsr_pattern) + if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern) return; - /* access to the region [max_stack_depth .. nocsr_stack_off) - * from something that is not a part of the nocsr pattern, - * disable nocsr rewrites for current subprogram by setting - * nocsr_stack_off to a value smaller than any possible offset. + /* access to the region [max_stack_depth .. fastcall_stack_off) + * from something that is not a part of the fastcall pattern, + * disable fastcall rewrites for current subprogram by setting + * fastcall_stack_off to a value smaller than any possible offset. */ - subprog->nocsr_stack_off = S16_MIN; - /* reset nocsr aux flags within subprogram, + subprog->fastcall_stack_off = S16_MIN; + /* reset fastcall aux flags within subprogram, * happens at most once per subprogram */ for (i = subprog->start; i < (subprog + 1)->start; ++i) { - aux[i].nocsr_spills_num = 0; - aux[i].nocsr_pattern = 0; + aux[i].fastcall_spills_num = 0; + aux[i].fastcall_pattern = 0; } } @@ -4652,7 +4652,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, if (err) return err; - check_nocsr_stack_contract(env, state, insn_idx, off); + check_fastcall_stack_contract(env, state, insn_idx, off); mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) { bool reg_value_fits; @@ -4787,7 +4787,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, return err; } - check_nocsr_stack_contract(env, state, insn_idx, min_off); + check_fastcall_stack_contract(env, state, insn_idx, min_off); /* Variable offset writes destroy any spilled pointers in range. */ for (i = min_off; i < max_off; i++) { u8 new_type, *stype; @@ -4926,7 +4926,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, reg = ®_state->stack[spi].spilled_ptr; mark_stack_slot_scratched(env, spi); - check_nocsr_stack_contract(env, state, env->insn_idx, off); + check_fastcall_stack_contract(env, state, env->insn_idx, off); if (is_spilled_reg(®_state->stack[spi])) { u8 spill_size = 1; @@ -5087,7 +5087,7 @@ static int check_stack_read_var_off(struct bpf_verifier_env *env, min_off = reg->smin_value + off; max_off = reg->smax_value + off; mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno); - check_nocsr_stack_contract(env, ptr_state, env->insn_idx, min_off); + check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off); return 0; } @@ -6804,13 +6804,13 @@ static int check_stack_slot_within_bounds(struct bpf_verifier_env *env, struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx]; int min_valid_off, max_bpf_stack; - /* If accessing instruction is a spill/fill from nocsr pattern, + /* If accessing instruction is a spill/fill from bpf_fastcall pattern, * add room for all caller saved registers below MAX_BPF_STACK. - * In case if nocsr rewrite won't happen maximal stack depth + * In case if bpf_fastcall rewrite won't happen maximal stack depth * would be checked by check_max_stack_depth_subprog(). */ max_bpf_stack = MAX_BPF_STACK; - if (aux->nocsr_pattern) + if (aux->fastcall_pattern) max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE; if (t == BPF_WRITE || env->allow_uninit_stack) @@ -16118,12 +16118,12 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, /* Return a bitmask specifying which caller saved registers are * clobbered by a call to a helper *as if* this helper follows - * no_caller_saved_registers contract: + * bpf_fastcall contract: * - includes R0 if function is non-void; * - includes R1-R5 if corresponding parameter has is described * in the function prototype. */ -static u32 helper_nocsr_clobber_mask(const struct bpf_func_proto *fn) +static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn) { u8 mask; int i; @@ -16138,8 +16138,8 @@ static u32 helper_nocsr_clobber_mask(const struct bpf_func_proto *fn) } /* True if do_misc_fixups() replaces calls to helper number 'imm', - * replacement patch is presumed to follow no_caller_saved_registers contract - * (see mark_nocsr_pattern_for_call() below). + * replacement patch is presumed to follow bpf_fastcall contract + * (see mark_fastcall_pattern_for_call() below). */ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) { @@ -16153,7 +16153,7 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) } } -/* GCC and LLVM define a no_caller_saved_registers function attribute. +/* LLVM define a bpf_fastcall function attribute. * This attribute means that function scratches only some of * the caller saved registers defined by ABI. * For BPF the set of such registers could be defined as follows: @@ -16163,13 +16163,12 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) * * The contract between kernel and clang allows to simultaneously use * such functions and maintain backwards compatibility with old - * kernels that don't understand no_caller_saved_registers calls - * (nocsr for short): + * kernels that don't understand bpf_fastcall calls: * - * - for nocsr calls clang allocates registers as-if relevant r0-r5 + * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5 * registers are not scratched by the call; * - * - as a post-processing step, clang visits each nocsr call and adds + * - as a post-processing step, clang visits each bpf_fastcall call and adds * spill/fill for every live r0-r5; * * - stack offsets used for the spill/fill are allocated as lowest @@ -16177,11 +16176,11 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) * purposes; * * - when kernel loads a program, it looks for such patterns - * (nocsr function surrounded by spills/fills) and checks if - * spill/fill stack offsets are used exclusively in nocsr patterns; + * (bpf_fastcall function surrounded by spills/fills) and checks if + * spill/fill stack offsets are used exclusively in fastcall patterns; * * - if so, and if verifier or current JIT inlines the call to the - * nocsr function (e.g. a helper call), kernel removes unnecessary + * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary * spill/fill pairs; * * - when old kernel loads a program, presence of spill/fill pairs @@ -16200,22 +16199,22 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) * r0 += r2; * exit; * - * The purpose of mark_nocsr_pattern_for_call is to: + * The purpose of mark_fastcall_pattern_for_call is to: * - look for such patterns; - * - mark spill and fill instructions in env->insn_aux_data[*].nocsr_pattern; - * - mark set env->insn_aux_data[*].nocsr_spills_num for call instruction; - * - update env->subprog_info[*]->nocsr_stack_off to find an offset - * at which nocsr spill/fill stack slots start; - * - update env->subprog_info[*]->keep_nocsr_stack. + * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern; + * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction; + * - update env->subprog_info[*]->fastcall_stack_off to find an offset + * at which bpf_fastcall spill/fill stack slots start; + * - update env->subprog_info[*]->keep_fastcall_stack. * - * The .nocsr_pattern and .nocsr_stack_off are used by - * check_nocsr_stack_contract() to check if every stack access to - * nocsr spill/fill stack slot originates from spill/fill - * instructions, members of nocsr patterns. + * The .fastcall_pattern and .fastcall_stack_off are used by + * check_fastcall_stack_contract() to check if every stack access to + * fastcall spill/fill stack slot originates from spill/fill + * instructions, members of fastcall patterns. * - * If such condition holds true for a subprogram, nocsr patterns could - * be rewritten by remove_nocsr_spills_fills(). - * Otherwise nocsr patterns are not changed in the subprogram + * If such condition holds true for a subprogram, fastcall patterns could + * be rewritten by remove_fastcall_spills_fills(). + * Otherwise bpf_fastcall patterns are not changed in the subprogram * (code, presumably, generated by an older clang version). * * For example, it is *not* safe to remove spill/fill below: @@ -16228,9 +16227,9 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) * r0 += r1; exit; * exit; */ -static void mark_nocsr_pattern_for_call(struct bpf_verifier_env *env, - struct bpf_subprog_info *subprog, - int insn_idx, s16 lowest_off) +static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env, + struct bpf_subprog_info *subprog, + int insn_idx, s16 lowest_off) { struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx; struct bpf_insn *call = &env->prog->insnsi[insn_idx]; @@ -16245,8 +16244,8 @@ static void mark_nocsr_pattern_for_call(struct bpf_verifier_env *env, if (get_helper_proto(env, call->imm, &fn) < 0) /* error would be reported later */ return; - clobbered_regs_mask = helper_nocsr_clobber_mask(fn); - can_be_inlined = fn->allow_nocsr && + clobbered_regs_mask = helper_fastcall_clobber_mask(fn); + can_be_inlined = fn->allow_fastcall && (verifier_inlines_helper_call(env, call->imm) || bpf_jit_inlines_helper_call(call->imm)); } @@ -16289,36 +16288,36 @@ static void mark_nocsr_pattern_for_call(struct bpf_verifier_env *env, if (stx->off != off || ldx->off != off) break; expected_regs_mask &= ~BIT(stx->src_reg); - env->insn_aux_data[insn_idx - i].nocsr_pattern = 1; - env->insn_aux_data[insn_idx + i].nocsr_pattern = 1; + env->insn_aux_data[insn_idx - i].fastcall_pattern = 1; + env->insn_aux_data[insn_idx + i].fastcall_pattern = 1; } if (i == 1) return; - /* Conditionally set 'nocsr_spills_num' to allow forward + /* Conditionally set 'fastcall_spills_num' to allow forward * compatibility when more helper functions are marked as - * nocsr at compile time than current kernel supports, e.g: + * bpf_fastcall at compile time than current kernel supports, e.g: * * 1: *(u64 *)(r10 - 8) = r1 - * 2: call A ;; assume A is nocsr for current kernel + * 2: call A ;; assume A is bpf_fastcall for current kernel * 3: r1 = *(u64 *)(r10 - 8) * 4: *(u64 *)(r10 - 8) = r1 - * 5: call B ;; assume B is not nocsr for current kernel + * 5: call B ;; assume B is not bpf_fastcall for current kernel * 6: r1 = *(u64 *)(r10 - 8) * - * There is no need to block nocsr rewrite for such program. - * Set 'nocsr_pattern' for both calls to keep check_nocsr_stack_contract() happy, - * don't set 'nocsr_spills_num' for call B so that remove_nocsr_spills_fills() + * There is no need to block bpf_fastcall rewrite for such program. + * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy, + * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills() * does not remove spill/fill pair {4,6}. */ if (can_be_inlined) - env->insn_aux_data[insn_idx].nocsr_spills_num = i - 1; + env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1; else - subprog->keep_nocsr_stack = 1; - subprog->nocsr_stack_off = min(subprog->nocsr_stack_off, off); + subprog->keep_fastcall_stack = 1; + subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off); } -static int mark_nocsr_patterns(struct bpf_verifier_env *env) +static int mark_fastcall_patterns(struct bpf_verifier_env *env) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn; @@ -16335,12 +16334,12 @@ static int mark_nocsr_patterns(struct bpf_verifier_env *env) continue; lowest_off = min(lowest_off, insn->off); } - /* use this offset to find nocsr patterns */ + /* use this offset to find fastcall patterns */ for (i = subprog->start; i < (subprog + 1)->start; ++i) { insn = env->prog->insnsi + i; if (insn->code != (BPF_JMP | BPF_CALL)) continue; - mark_nocsr_pattern_for_call(env, subprog, i, lowest_off); + mark_fastcall_pattern_for_call(env, subprog, i, lowest_off); } } return 0; @@ -21244,10 +21243,10 @@ static int optimize_bpf_loop(struct bpf_verifier_env *env) return 0; } -/* Remove unnecessary spill/fill pairs, members of nocsr pattern, +/* Remove unnecessary spill/fill pairs, members of fastcall pattern, * adjust subprograms stack depth when possible. */ -static int remove_nocsr_spills_fills(struct bpf_verifier_env *env) +static int remove_fastcall_spills_fills(struct bpf_verifier_env *env) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn_aux_data *aux = env->insn_aux_data; @@ -21258,8 +21257,8 @@ static int remove_nocsr_spills_fills(struct bpf_verifier_env *env) int i, j; for (i = 0; i < insn_cnt; i++, insn++) { - if (aux[i].nocsr_spills_num > 0) { - spills_num = aux[i].nocsr_spills_num; + if (aux[i].fastcall_spills_num > 0) { + spills_num = aux[i].fastcall_spills_num; /* NOPs would be removed by opt_remove_nops() */ for (j = 1; j <= spills_num; ++j) { *(insn - j) = NOP; @@ -21268,8 +21267,8 @@ static int remove_nocsr_spills_fills(struct bpf_verifier_env *env) modified = true; } if ((subprog + 1)->start == i + 1) { - if (modified && !subprog->keep_nocsr_stack) - subprog->stack_depth = -subprog->nocsr_stack_off; + if (modified && !subprog->keep_fastcall_stack) + subprog->stack_depth = -subprog->fastcall_stack_off; subprog++; modified = false; } @@ -22192,7 +22191,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; - ret = mark_nocsr_patterns(env); + ret = mark_fastcall_patterns(env); if (ret < 0) goto skip_full_check; @@ -22209,7 +22208,7 @@ skip_full_check: * allocate additional slots. */ if (ret == 0) - ret = remove_nocsr_spills_fills(env); + ret = remove_fastcall_spills_fills(env); if (ret == 0) ret = check_max_stack_depth(env); -- cgit v1.2.3 From d59232afb0344e33e9399f308d9b4a03876e7676 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 13 Aug 2024 21:24:22 +0000 Subject: bpf: Rename ARG_PTR_TO_KPTR -> ARG_KPTR_XCHG_DEST ARG_PTR_TO_KPTR is currently only used by the bpf_kptr_xchg helper. Although it limits reg types for that helper's first arg to PTR_TO_MAP_VALUE, any arbitrary mapval won't do: further custom verification logic ensures that the mapval reg being xchgd-into is pointing to a kptr field. If this is not the case, it's not safe to xchg into that reg's pointee. Let's rename the bpf_arg_type to more accurately describe the fairly specific expectations that this arg type encodes. This is a nonfunctional change. Acked-by: Martin KaFai Lau Signed-off-by: Dave Marchevsky Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20240813212424.2871455-4-amery.hung@bytedance.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- kernel/bpf/helpers.c | 2 +- kernel/bpf/verifier.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 00dc4dd28cbd..dc63083f76b7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -744,7 +744,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ - ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */ ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 4105b4af6e03..bf55b81b3e06 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1636,7 +1636,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, .ret_btf_id = BPF_PTR_POISON, - .arg1_type = ARG_PTR_TO_KPTR, + .arg1_type = ARG_KPTR_XCHG_DEST, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, .arg2_btf_id = BPF_PTR_POISON, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 148536446457..5fe2a8978bb8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8412,7 +8412,7 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; -static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types kptr_xchg_dest_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types dynptr_types = { .types = { PTR_TO_STACK, @@ -8444,7 +8444,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_STACK] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, - [ARG_PTR_TO_KPTR] = &kptr_types, + [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types, [ARG_PTR_TO_DYNPTR] = &dynptr_types, }; @@ -9044,7 +9044,7 @@ skip_type_check: return err; break; } - case ARG_PTR_TO_KPTR: + case ARG_KPTR_XCHG_DEST: err = process_kptr_func(env, regno, meta); if (err) return err; -- cgit v1.2.3 From b0966c724584a5a9fd7fb529de19807c31f27a45 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 13 Aug 2024 21:24:23 +0000 Subject: bpf: Support bpf_kptr_xchg into local kptr Currently, users can only stash kptr into map values with bpf_kptr_xchg(). This patch further supports stashing kptr into local kptr by adding local kptr as a valid destination type. When stashing into local kptr, btf_record in program BTF is used instead of btf_record in map to search for the btf_field of the local kptr. The local kptr specific checks in check_reg_type() only apply when the source argument of bpf_kptr_xchg() is local kptr. Therefore, we make the scope of the check explicit as the destination now can also be local kptr. Acked-by: Martin KaFai Lau Signed-off-by: Dave Marchevsky Signed-off-by: Amery Hung Link: https://lore.kernel.org/r/20240813212424.2871455-5-amery.hung@bytedance.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++---- kernel/bpf/helpers.c | 4 ++-- kernel/bpf/verifier.c | 44 ++++++++++++++++++++++++++++++-------------- 3 files changed, 37 insertions(+), 20 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 35bcf52dbc65..e2629457d72d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5519,11 +5519,12 @@ union bpf_attr { * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. * - * void *bpf_kptr_xchg(void *map_value, void *ptr) + * void *bpf_kptr_xchg(void *dst, void *ptr) * Description - * Exchange kptr at pointer *map_value* with *ptr*, and return the - * old value. *ptr* can be NULL, otherwise it must be a referenced - * pointer which will be released when this helper is called. + * Exchange kptr at pointer *dst* with *ptr*, and return the old value. + * *dst* can be map value or local kptr. *ptr* can be NULL, otherwise + * it must be a referenced pointer which will be released when this helper + * is called. * Return * The old value of kptr (which can be NULL). The returned pointer * if not NULL, is a reference which must be released using its diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bf55b81b3e06..f12f075b70c5 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1619,9 +1619,9 @@ void bpf_wq_cancel_and_free(void *val) schedule_work(&work->delete_work); } -BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) +BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr) { - unsigned long *kptr = map_value; + unsigned long *kptr = dst; /* This helper may be inlined by verifier. */ return xchg(kptr, (unsigned long)ptr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5fe2a8978bb8..33270b363689 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7803,29 +7803,38 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; - struct bpf_map *map_ptr = reg->map_ptr; struct btf_field *kptr_field; + struct bpf_map *map_ptr; + struct btf_record *rec; u32 kptr_off; + if (type_is_ptr_alloc_obj(reg->type)) { + rec = reg_btf_record(reg); + } else { /* PTR_TO_MAP_VALUE */ + map_ptr = reg->map_ptr; + if (!map_ptr->btf) { + verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", + map_ptr->name); + return -EINVAL; + } + rec = map_ptr->record; + meta->map_ptr = map_ptr; + } + if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d doesn't have constant offset. kptr has to be at the constant offset\n", regno); return -EINVAL; } - if (!map_ptr->btf) { - verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", - map_ptr->name); - return -EINVAL; - } - if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) { - verbose(env, "map '%s' has no valid kptr\n", map_ptr->name); + + if (!btf_record_has_field(rec, BPF_KPTR)) { + verbose(env, "R%d has no valid kptr\n", regno); return -EINVAL; } - meta->map_ptr = map_ptr; kptr_off = reg->off + reg->var_off.value; - kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR); + kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR); if (!kptr_field) { verbose(env, "off=%d doesn't point to kptr\n", kptr_off); return -EACCES; @@ -8412,7 +8421,12 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; -static const struct bpf_reg_types kptr_xchg_dest_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types kptr_xchg_dest_types = { + .types = { + PTR_TO_MAP_VALUE, + PTR_TO_BTF_ID | MEM_ALLOC + } +}; static const struct bpf_reg_types dynptr_types = { .types = { PTR_TO_STACK, @@ -8483,7 +8497,8 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, if (base_type(arg_type) == ARG_PTR_TO_MEM) type &= ~DYNPTR_TYPE_FLAG_MASK; - if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) { + /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */ + if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) { type &= ~MEM_ALLOC; type &= ~MEM_PERCPU; } @@ -8576,7 +8591,8 @@ found: verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); return -EFAULT; } - if (meta->func_id == BPF_FUNC_kptr_xchg) { + /* Check if local kptr in src arg matches kptr in dst arg */ + if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) { if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) return -EACCES; } @@ -8887,7 +8903,7 @@ skip_type_check: meta->release_regno = regno; } - if (reg->ref_obj_id) { + if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) { if (meta->ref_obj_id) { verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", regno, reg->ref_obj_id, -- cgit v1.2.3 From 65ab5ac4df012388481d0414fcac1d5ac1721fb3 Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Fri, 23 Aug 2024 12:51:00 -0700 Subject: bpf: Add bpf_copy_from_user_str kfunc This adds a kfunc wrapper around strncpy_from_user, which can be called from sleepable BPF programs. This matches the non-sleepable 'bpf_probe_read_user_str' helper except it includes an additional 'flags' param, which allows consumers to clear the entire destination buffer on success or failure. Signed-off-by: Jordan Rome Link: https://lore.kernel.org/r/20240823195101.3621028-1-linux@jordanrome.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++++++ kernel/bpf/helpers.c | 42 ++++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 3 files changed, 60 insertions(+) (limited to 'kernel/bpf/helpers.c') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e2629457d72d..c3a5728db115 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7513,4 +7513,13 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +/* + * Flags to control BPF kfunc behaviour. + * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective + * helper documentation for details.) + */ +enum bpf_kfunc_flags { + BPF_F_PAD_ZEROS = (1ULL << 0), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f12f075b70c5..7e7761c537f8 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2962,6 +2962,47 @@ __bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it) bpf_mem_free(&bpf_global_ma, kit->bits); } +/** + * bpf_copy_from_user_str() - Copy a string from an unsafe user address + * @dst: Destination address, in kernel space. This buffer must be + * at least @dst__sz bytes long. + * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. + * @unsafe_ptr__ign: Source address, in user space. + * @flags: The only supported flag is BPF_F_PAD_ZEROS + * + * Copies a NUL-terminated string from userspace to BPF space. If user string is + * too long this will still ensure zero termination in the dst buffer unless + * buffer size is 0. + * + * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and + * memset all of @dst on failure. + */ +__bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, u64 flags) +{ + int ret; + + if (unlikely(flags & ~BPF_F_PAD_ZEROS)) + return -EINVAL; + + if (unlikely(!dst__sz)) + return 0; + + ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1); + if (ret < 0) { + if (flags & BPF_F_PAD_ZEROS) + memset((char *)dst, 0, dst__sz); + + return ret; + } + + if (flags & BPF_F_PAD_ZEROS) + memset((char *)dst + ret, 0, dst__sz - ret); + else + ((char *)dst)[ret] = '\0'; + + return ret + 1; +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) @@ -3047,6 +3088,7 @@ BTF_ID_FLAGS(func, bpf_preempt_enable) BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 35bcf52dbc65..f329ee44627a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7512,4 +7512,13 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +/* + * Flags to control BPF kfunc behaviour. + * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective + * helper documentation for details.) + */ +enum bpf_kfunc_flags { + BPF_F_PAD_ZEROS = (1ULL << 0), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 866d571e6201cb8ccb18cb8407ab3ad3adb474b8 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 29 Aug 2024 14:08:26 -0700 Subject: bpf: Export bpf_base_func_proto The bpf_testmod needs to use the bpf_tail_call helper in a later selftest patch. This patch is to EXPORT_GPL_SYMBOL the bpf_base_func_proto. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240829210833.388152-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 7e7761c537f8..3956be5d6440 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2034,6 +2034,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } } +EXPORT_SYMBOL_GPL(bpf_base_func_proto); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock) -- cgit v1.2.3 From cfe69c50b05510b24e26ccb427c7cc70beafd6c1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:46 +0200 Subject: bpf: Fix bpf_strtol and bpf_strtoul helpers for 32bit The bpf_strtol() and bpf_strtoul() helpers are currently broken on 32bit: The argument type ARG_PTR_TO_LONG is BPF-side "long", not kernel-side "long" and therefore always considered fixed 64bit no matter if 64 or 32bit underlying architecture. This contract breaks in case of the two mentioned helpers since their BPF_CALL definition for the helpers was added with {unsigned,}long *res. Meaning, the transition from BPF-side "long" (BPF program) to kernel-side "long" (BPF helper) breaks here. Both helpers call __bpf_strtoll() with "long long" correctly, but later assigning the result into 32-bit "*(long *)" on 32bit architectures. From a BPF program point of view, this means upper bits will be seen as uninitialised. Therefore, fix both BPF_CALL signatures to {s,u}64 types to fix this situation. Now, changing also uapi/bpf.h helper documentation which generates bpf_helper_defs.h for BPF programs is tricky: Changing signatures there to __{s,u}64 would trigger compiler warnings (incompatible pointer types passing 'long *' to parameter of type '__s64 *' (aka 'long long *')) for existing BPF programs. Leaving the signatures as-is would be fine as from BPF program point of view it is still BPF-side "long" and thus equivalent to __{s,u}64 on 64 or 32bit underlying architectures. Note that bpf_strtol() and bpf_strtoul() are the only helpers with this issue. Fixes: d7a4cb9b6705 ("bpf: Introduce bpf_strtol and bpf_strtoul helpers") Reported-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/481fcec8-c12c-9abb-8ecb-76c71c009959@iogearbox.net Link: https://lore.kernel.org/r/20240913191754.13290-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3956be5d6440..0cf42be52890 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -518,7 +518,7 @@ static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, } BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, - long *, res) + s64 *, res) { long long _res; int err; @@ -543,7 +543,7 @@ const struct bpf_func_proto bpf_strtol_proto = { }; BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, - unsigned long *, res) + u64 *, res) { unsigned long long _res; bool is_negative; -- cgit v1.2.3 From 7d71f59e028028f1160602121f40f45e89b3664e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:47 +0200 Subject: bpf: Remove truncation test in bpf_strtol and bpf_strtoul helpers Both bpf_strtol() and bpf_strtoul() helpers passed a temporary "long long" respectively "unsigned long long" to __bpf_strtoll() / __bpf_strtoull(). Later, the result was checked for truncation via _res != ({unsigned,} long)_res as the destination buffer for the BPF helpers was of type {unsigned,} long which is 32bit on 32bit architectures. Given the latter was a bug in the helper signatures where the destination buffer got adjusted to {s,u}64, the truncation check can now be removed. Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240913191754.13290-2-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 0cf42be52890..5404bb964d83 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -526,8 +526,6 @@ BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, err = __bpf_strtoll(buf, buf_len, flags, &_res); if (err < 0) return err; - if (_res != (long)_res) - return -ERANGE; *res = _res; return err; } @@ -554,8 +552,6 @@ BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, return err; if (is_negative) return -EINVAL; - if (_res != (unsigned long)_res) - return -ERANGE; *res = _res; return err; } -- cgit v1.2.3 From 32556ce93bc45c730829083cb60f95a2728ea48b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:48 +0200 Subject: bpf: Fix helper writes to read-only maps Lonial found an issue that despite user- and BPF-side frozen BPF map (like in case of .rodata), it was still possible to write into it from a BPF program side through specific helpers having ARG_PTR_TO_{LONG,INT} as arguments. In check_func_arg() when the argument is as mentioned, the meta->raw_mode is never set. Later, check_helper_mem_access(), under the case of PTR_TO_MAP_VALUE as register base type, it assumes BPF_READ for the subsequent call to check_map_access_type() and given the BPF map is read-only it succeeds. The helpers really need to be annotated as ARG_PTR_TO_{LONG,INT} | MEM_UNINIT when results are written into them as opposed to read out of them. The latter indicates that it's okay to pass a pointer to uninitialized memory as the memory is written to anyway. However, ARG_PTR_TO_{LONG,INT} is a special case of ARG_PTR_TO_FIXED_SIZE_MEM just with additional alignment requirement. So it is better to just get rid of the ARG_PTR_TO_{LONG,INT} special cases altogether and reuse the fixed size memory types. For this, add MEM_ALIGNED to additionally ensure alignment given these helpers write directly into the args via * = val. The .arg*_size has been initialized reflecting the actual sizeof(*). MEM_ALIGNED can only be used in combination with MEM_FIXED_SIZE annotated argument types, since in !MEM_FIXED_SIZE cases the verifier does not know the buffer size a priori and therefore cannot blindly write * = val. Fixes: 57c3bb725a3d ("bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types") Reported-by: Lonial Con Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20240913191754.13290-3-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 +++++-- kernel/bpf/helpers.c | 6 ++++-- kernel/bpf/syscall.c | 3 ++- kernel/bpf/verifier.c | 41 +++++------------------------------------ kernel/trace/bpf_trace.c | 6 ++++-- net/core/filter.c | 6 ++++-- 6 files changed, 24 insertions(+), 45 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7f3e8477d5e7..0c3893c47171 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -695,6 +695,11 @@ enum bpf_type_flag { /* DYNPTR points to xdp_buff */ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), + /* Memory must be aligned on some architectures, used in combination with + * MEM_FIXED_SIZE. + */ + MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -732,8 +737,6 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ - ARG_PTR_TO_INT, /* pointer to int */ - ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5404bb964d83..5e97fdc6b20f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -537,7 +537,8 @@ const struct bpf_func_proto bpf_strtol_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_size = sizeof(s64), }; BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, @@ -563,7 +564,8 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_size = sizeof(u64), }; BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6988e432fc3d..a7cd3719e26a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5954,7 +5954,8 @@ static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_size = sizeof(u64), }; static const struct bpf_func_proto * diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 068f763dcefb..7d62fd576089 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8301,16 +8301,6 @@ static bool arg_type_is_dynptr(enum bpf_arg_type type) return base_type(type) == ARG_PTR_TO_DYNPTR; } -static int int_ptr_type_to_size(enum bpf_arg_type type) -{ - if (type == ARG_PTR_TO_INT) - return sizeof(u32); - else if (type == ARG_PTR_TO_LONG) - return sizeof(u64); - - return -EINVAL; -} - static int resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_arg_type *arg_type) @@ -8383,16 +8373,6 @@ static const struct bpf_reg_types mem_types = { }, }; -static const struct bpf_reg_types int_ptr_types = { - .types = { - PTR_TO_STACK, - PTR_TO_PACKET, - PTR_TO_PACKET_META, - PTR_TO_MAP_KEY, - PTR_TO_MAP_VALUE, - }, -}; - static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE, @@ -8453,8 +8433,6 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types, - [ARG_PTR_TO_INT] = &int_ptr_types, - [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, [ARG_PTR_TO_STACK] = &stack_ptr_types, @@ -9017,9 +8995,11 @@ skip_type_check: */ meta->raw_mode = arg_type & MEM_UNINIT; if (arg_type & MEM_FIXED_SIZE) { - err = check_helper_mem_access(env, regno, - fn->arg_size[arg], false, - meta); + err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta); + if (err) + return err; + if (arg_type & MEM_ALIGNED) + err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true); } break; case ARG_CONST_SIZE: @@ -9044,17 +9024,6 @@ skip_type_check: if (err) return err; break; - case ARG_PTR_TO_INT: - case ARG_PTR_TO_LONG: - { - int size = int_ptr_type_to_size(arg_type); - - err = check_helper_mem_access(env, regno, size, false, meta); - if (err) - return err; - err = check_ptr_alignment(env, reg, 0, size, true); - break; - } case ARG_PTR_TO_CONST_STR: { err = check_reg_const_str(env, reg, regno); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 68b5905c6eb5..b3283c250e27 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1202,7 +1202,8 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_LONG, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_size = sizeof(u64), }; BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) @@ -1218,7 +1219,8 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = { .func = get_func_ret, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_LONG, + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg2_size = sizeof(u64), }; BPF_CALL_1(get_func_arg_cnt, void *, ctx) diff --git a/net/core/filter.c b/net/core/filter.c index ecf2ddf633bf..d0550c87e520 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6346,7 +6346,8 @@ static const struct bpf_func_proto bpf_skb_check_mtu_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_INT, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_size = sizeof(u32), .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; @@ -6357,7 +6358,8 @@ static const struct bpf_func_proto bpf_xdp_check_mtu_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_INT, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_size = sizeof(u32), .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, }; -- cgit v1.2.3 From 4b3786a6c5397dc220b1483d8e2f4867743e966f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:50 +0200 Subject: bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error For all non-tracing helpers which formerly had ARG_PTR_TO_{LONG,INT} as input arguments, zero the value for the case of an error as otherwise it could leak memory. For tracing, it is not needed given CAP_PERFMON can already read all kernel memory anyway hence bpf_get_func_arg() and bpf_get_func_ret() is skipped in here. Also, the MTU helpers mtu_len pointer value is being written but also read. Technically, the MEM_UNINIT should not be there in order to always force init. Removing MEM_UNINIT needs more verifier rework though: MEM_UNINIT right now implies two things actually: i) write into memory, ii) memory does not have to be initialized. If we lift MEM_UNINIT, it then becomes: i) read into memory, ii) memory must be initialized. This means that for bpf_*_check_mtu() we're readding the issue we're trying to fix, that is, it would then be able to write back into things like .rodata BPF maps. Follow-up work will rework the MEM_UNINIT semantics such that the intent can be better expressed. For now just clear the *mtu_len on error path which can be lifted later again. Fixes: 8a67f2de9b1d ("bpf: expose bpf_strtol and bpf_strtoul to all program types") Fixes: d7a4cb9b6705 ("bpf: Introduce bpf_strtol and bpf_strtoul helpers") Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/e5edd241-59e7-5e39-0ee5-a51e31b6840a@iogearbox.net Link: https://lore.kernel.org/r/20240913191754.13290-5-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 ++ kernel/bpf/syscall.c | 1 + net/core/filter.c | 44 +++++++++++++++++++++++--------------------- 3 files changed, 26 insertions(+), 21 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5e97fdc6b20f..1a43d06eab28 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -523,6 +523,7 @@ BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, long long _res; int err; + *res = 0; err = __bpf_strtoll(buf, buf_len, flags, &_res); if (err < 0) return err; @@ -548,6 +549,7 @@ BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, bool is_negative; int err; + *res = 0; err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); if (err < 0) return err; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a7cd3719e26a..593822d3d393 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5934,6 +5934,7 @@ static const struct bpf_func_proto bpf_sys_close_proto = { BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) { + *res = 0; if (flags) return -EINVAL; diff --git a/net/core/filter.c b/net/core/filter.c index d0550c87e520..e4a4454df5f9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6262,20 +6262,25 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb, int ret = BPF_MTU_CHK_RET_FRAG_NEEDED; struct net_device *dev = skb->dev; int skb_len, dev_len; - int mtu; + int mtu = 0; - if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) - return -EINVAL; + if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) { + ret = -EINVAL; + goto out; + } - if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) - return -EINVAL; + if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) { + ret = -EINVAL; + goto out; + } dev = __dev_via_ifindex(dev, ifindex); - if (unlikely(!dev)) - return -ENODEV; + if (unlikely(!dev)) { + ret = -ENODEV; + goto out; + } mtu = READ_ONCE(dev->mtu); - dev_len = mtu + dev->hard_header_len; /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */ @@ -6293,15 +6298,12 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb, */ if (skb_is_gso(skb)) { ret = BPF_MTU_CHK_RET_SUCCESS; - if (flags & BPF_MTU_CHK_SEGS && !skb_gso_validate_network_len(skb, mtu)) ret = BPF_MTU_CHK_RET_SEGS_TOOBIG; } out: - /* BPF verifier guarantees valid pointer */ *mtu_len = mtu; - return ret; } @@ -6311,19 +6313,21 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp, struct net_device *dev = xdp->rxq->dev; int xdp_len = xdp->data_end - xdp->data; int ret = BPF_MTU_CHK_RET_SUCCESS; - int mtu, dev_len; + int mtu = 0, dev_len; /* XDP variant doesn't support multi-buffer segment check (yet) */ - if (unlikely(flags)) - return -EINVAL; + if (unlikely(flags)) { + ret = -EINVAL; + goto out; + } dev = __dev_via_ifindex(dev, ifindex); - if (unlikely(!dev)) - return -ENODEV; + if (unlikely(!dev)) { + ret = -ENODEV; + goto out; + } mtu = READ_ONCE(dev->mtu); - - /* Add L2-header as dev MTU is L3 size */ dev_len = mtu + dev->hard_header_len; /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */ @@ -6333,10 +6337,8 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp, xdp_len += len_diff; /* minus result pass check */ if (xdp_len > dev_len) ret = BPF_MTU_CHK_RET_FRAG_NEEDED; - - /* BPF verifier guarantees valid pointer */ +out: *mtu_len = mtu; - return ret; } -- cgit v1.2.3