summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst7
-rw-r--r--Documentation/bpf/standardization/instruction-set.rst6
-rw-r--r--arch/arm64/net/bpf_jit.h5
-rw-r--r--arch/arm64/net/bpf_jit_comp.c28
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c79
-rw-r--r--include/linux/bpf-cgroup-defs.h1
-rw-r--r--include/linux/bpf.h11
-rw-r--r--include/linux/bpf_verifier.h80
-rw-r--r--include/linux/btf.h2
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/linux/tnum.h2
-rw-r--r--include/linux/uaccess.h2
-rw-r--r--include/uapi/linux/bpf.h13
-rw-r--r--kernel/bpf/arraymap.c11
-rw-r--r--kernel/bpf/bpf_iter.c11
-rw-r--r--kernel/bpf/bpf_local_storage.c8
-rw-r--r--kernel/bpf/btf.c56
-rw-r--r--kernel/bpf/cgroup.c182
-rw-r--r--kernel/bpf/core.c32
-rw-r--r--kernel/bpf/helpers.c382
-rw-r--r--kernel/bpf/local_storage.c9
-rw-r--r--kernel/bpf/syscall.c180
-rw-r--r--kernel/bpf/tnum.c5
-rw-r--r--kernel/bpf/trampoline.c41
-rw-r--r--kernel/bpf/verifier.c1785
-rw-r--r--kernel/cgroup/cgroup.c5
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--net/bpf/test_run.c2
-rw-r--r--tools/bpf/bpf_jit_disasm.c2
-rw-r--r--tools/bpf/bpftool/btf.c8
-rw-r--r--tools/bpf/bpftool/common.c59
-rw-r--r--tools/bpf/bpftool/iter.c2
-rw-r--r--tools/bpf/bpftool/link.c8
-rw-r--r--tools/bpf/bpftool/main.c6
-rw-r--r--tools/bpf/bpftool/main.h13
-rw-r--r--tools/bpf/bpftool/map.c56
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c3
-rw-r--r--tools/bpf/bpftool/net.c15
-rw-r--r--tools/bpf/bpftool/prog.c4
-rw-r--r--tools/include/uapi/linux/bpf.h13
-rw-r--r--tools/lib/bpf/bpf.c44
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/btf.h3
-rw-r--r--tools/lib/bpf/btf_dump.c55
-rw-r--r--tools/lib/bpf/libbpf.c28
-rw-r--r--tools/lib/bpf/libbpf.h15
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/usdt.c10
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h2
-rw-r--r--tools/testing/selftests/bpf/config.ppc64el93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c118
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c617
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_buf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_helpers.h28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/user_ringbuf.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c13
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h18
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_mprog.c30
-rw-r--r--tools/testing/selftests/bpf/progs/compute_live_registers.c16
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c277
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c136
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c14
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c5
-rw-r--r--tools/testing/selftests/bpf/progs/security_bpf_map.c69
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c87
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c23
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_key.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sig_in_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_change_tail.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_and.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c222
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c11
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_overflow.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_in_map.c118
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c70
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ref_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c84
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c38
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_map.sh398
-rw-r--r--tools/testing/selftests/bpf/test_loader.c30
-rw-r--r--tools/testing/selftests/bpf/test_progs.h28
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.c94
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c3
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c33
-rw-r--r--tools/testing/selftests/bpf/verifier/jset.c10
-rw-r--r--tools/testing/selftests/bpf/veristat.c250
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh9
106 files changed, 5686 insertions, 1050 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 0acb4c9b8d90..45bc5c5cd793 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -611,9 +611,10 @@ Q: I have added a new BPF instruction to the kernel, how can I integrate
it into LLVM?
A: LLVM has a ``-mcpu`` selector for the BPF back end in order to allow
-the selection of BPF instruction set extensions. By default the
-``generic`` processor target is used, which is the base instruction set
-(v1) of BPF.
+the selection of BPF instruction set extensions. Before llvm version 20,
+the ``generic`` processor target is used, which is the base instruction
+set (v1) of BPF. Since llvm 20, the default processor target has changed
+to instruction set v3.
LLVM has an option to select ``-mcpu=probe`` where it will probe the host
kernel for supported BPF instruction set extensions and selects the
diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index fbe975585236..39c74611752b 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -350,9 +350,9 @@ Underflow and overflow are allowed during arithmetic operations, meaning
the 64-bit or 32-bit value will wrap. If BPF program execution would
result in division by zero, the destination register is instead set to zero.
Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN``
-dividing -1, the desination register is instead set to ``LLONG_MIN``. For
-``ALU``, if execution would result in ``INT_MIN`` dividing -1, the
-desination register is instead set to ``INT_MIN``.
+divided by -1, the destination register is instead set to ``LLONG_MIN``. For
+``ALU``, if execution would result in ``INT_MIN`` divided by -1, the
+destination register is instead set to ``INT_MIN``.
If execution would result in modulo by zero, for ``ALU64`` the value of
the destination register is unchanged whereas for ``ALU`` the upper
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index a3b0e693a125..bbea4f36f9f2 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -325,4 +325,9 @@
#define A64_MRS_SP_EL0(Rt) \
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_SP_EL0)
+/* Barriers */
+#define A64_SB aarch64_insn_get_sb_value()
+#define A64_DSB_NSH (aarch64_insn_get_dsb_base_value() | 0x7 << 8)
+#define A64_ISB aarch64_insn_get_isb_value()
+
#endif /* _BPF_JIT_H */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index da8b89dd2910..b6c42b5c9668 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1630,17 +1630,14 @@ emit_cond_jmp:
return ret;
break;
- /* speculation barrier */
+ /* speculation barrier against v1 and v4 */
case BPF_ST | BPF_NOSPEC:
- /*
- * Nothing required here.
- *
- * In case of arm64, we rely on the firmware mitigation of
- * Speculative Store Bypass as controlled via the ssbd kernel
- * parameter. Whenever the mitigation is enabled, it works
- * for all of the kernel code with no need to provide any
- * additional instructions.
- */
+ if (alternative_has_cap_likely(ARM64_HAS_SB)) {
+ emit(A64_SB, ctx);
+ } else {
+ emit(A64_DSB_NSH, ctx);
+ emit(A64_ISB, ctx);
+ }
break;
/* ST: *(size *)(dst + off) = imm */
@@ -2911,6 +2908,17 @@ bool bpf_jit_supports_percpu_insn(void)
return true;
}
+bool bpf_jit_bypass_spec_v4(void)
+{
+ /* In case of arm64, we rely on the firmware mitigation of Speculative
+ * Store Bypass as controlled via the ssbd kernel parameter. Whenever
+ * the mitigation is enabled, it works for all of the kernel code with
+ * no need to provide any additional instructions. Therefore, skip
+ * inserting nospec insns against Spectre v4.
+ */
+ return true;
+}
+
bool bpf_jit_inlines_helper_call(s32 imm)
{
switch (imm) {
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 5daa77aee7f7..a25a6ffe7d7c 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -370,6 +370,23 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
return 0;
}
+bool bpf_jit_bypass_spec_v1(void)
+{
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
+ return !(security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR));
+#else
+ return true;
+#endif
+}
+
+bool bpf_jit_bypass_spec_v4(void)
+{
+ return !(security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_STF_BARRIER) &&
+ stf_barrier_type_get() != STF_BARRIER_NONE);
+}
+
/*
* We spill into the redzone always, even if the bpf program has its own stackframe.
* Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local()
@@ -397,6 +414,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
u32 *addrs, int pass, bool extra_pass)
{
enum stf_barrier_type stf_barrier = stf_barrier_type_get();
+ bool sync_emitted, ori31_emitted;
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
int i, ret;
@@ -789,30 +807,51 @@ emit_clear:
/*
* BPF_ST NOSPEC (speculation barrier)
+ *
+ * The following must act as a barrier against both Spectre v1
+ * and v4 if we requested both mitigations. Therefore, also emit
+ * 'isync; sync' on E500 or 'ori31' on BOOK3S_64 in addition to
+ * the insns needed for a Spectre v4 barrier.
+ *
+ * If we requested only !bypass_spec_v1 OR only !bypass_spec_v4,
+ * we can skip the respective other barrier type as an
+ * optimization.
*/
case BPF_ST | BPF_NOSPEC:
- if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) ||
- !security_ftr_enabled(SEC_FTR_STF_BARRIER))
- break;
-
- switch (stf_barrier) {
- case STF_BARRIER_EIEIO:
- EMIT(PPC_RAW_EIEIO() | 0x02000000);
- break;
- case STF_BARRIER_SYNC_ORI:
+ sync_emitted = false;
+ ori31_emitted = false;
+ if (IS_ENABLED(CONFIG_PPC_E500) &&
+ !bpf_jit_bypass_spec_v1()) {
+ EMIT(PPC_RAW_ISYNC());
EMIT(PPC_RAW_SYNC());
- EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0));
- EMIT(PPC_RAW_ORI(_R31, _R31, 0));
- break;
- case STF_BARRIER_FALLBACK:
- ctx->seen |= SEEN_FUNC;
- PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier));
- EMIT(PPC_RAW_MTCTR(_R12));
- EMIT(PPC_RAW_BCTRL());
- break;
- case STF_BARRIER_NONE:
- break;
+ sync_emitted = true;
}
+ if (!bpf_jit_bypass_spec_v4()) {
+ switch (stf_barrier) {
+ case STF_BARRIER_EIEIO:
+ EMIT(PPC_RAW_EIEIO() | 0x02000000);
+ break;
+ case STF_BARRIER_SYNC_ORI:
+ if (!sync_emitted)
+ EMIT(PPC_RAW_SYNC());
+ EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0));
+ EMIT(PPC_RAW_ORI(_R31, _R31, 0));
+ ori31_emitted = true;
+ break;
+ case STF_BARRIER_FALLBACK:
+ ctx->seen |= SEEN_FUNC;
+ PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier));
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+ break;
+ case STF_BARRIER_NONE:
+ break;
+ }
+ }
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+ !bpf_jit_bypass_spec_v1() &&
+ !ori31_emitted)
+ EMIT(PPC_RAW_ORI(_R31, _R31, 0));
break;
/*
diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 0985221d5478..c9e6b26abab6 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -63,6 +63,7 @@ struct cgroup_bpf {
*/
struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
+ u64 revisions[MAX_CGROUP_BPF_ATTACH_TYPE];
/* list of cgroup shared storages */
struct list_head storages;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b25d278409b..5dd556e89cce 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2288,6 +2288,9 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array *array,
return ret;
}
+bool bpf_jit_bypass_spec_v1(void);
+bool bpf_jit_bypass_spec_v4(void);
+
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
@@ -2475,12 +2478,16 @@ static inline bool bpf_allow_uninit_stack(const struct bpf_token *token)
static inline bool bpf_bypass_spec_v1(const struct bpf_token *token)
{
- return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
+ return bpf_jit_bypass_spec_v1() ||
+ cpu_mitigations_off() ||
+ bpf_token_capable(token, CAP_PERFMON);
}
static inline bool bpf_bypass_spec_v4(const struct bpf_token *token)
{
- return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
+ return bpf_jit_bypass_spec_v4() ||
+ cpu_mitigations_off() ||
+ bpf_token_capable(token, CAP_PERFMON);
}
int bpf_map_new_fd(struct bpf_map *map, int flags);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 256274acb1d8..7e459e839f8b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -344,7 +344,7 @@ struct bpf_func_state {
#define MAX_CALL_FRAMES 8
-/* instruction history flags, used in bpf_insn_hist_entry.flags field */
+/* instruction history flags, used in bpf_jmp_history_entry.flags field */
enum {
/* instruction references stack slot through PTR_TO_STACK register;
* we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8)
@@ -366,7 +366,7 @@ enum {
static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
-struct bpf_insn_hist_entry {
+struct bpf_jmp_history_entry {
u32 idx;
/* insn idx can't be bigger than 1 million */
u32 prev_idx : 20;
@@ -449,32 +449,20 @@ struct bpf_verifier_state {
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
u32 last_insn_idx;
- /* If this state is a part of states loop this field points to some
- * parent of this state such that:
- * - it is also a member of the same states loop;
- * - DFS states traversal starting from initial state visits loop_entry
- * state before this state.
- * Used to compute topmost loop entry for state loops.
- * State loops might appear because of open coded iterators logic.
- * See get_loop_entry() for more information.
+ /* if this state is a backedge state then equal_state
+ * records cached state to which this state is equal.
*/
- struct bpf_verifier_state *loop_entry;
- /* Sub-range of env->insn_hist[] corresponding to this state's
- * instruction history.
- * Backtracking is using it to go from last to first.
- * For most states instruction history is short, 0-3 instructions.
+ struct bpf_verifier_state *equal_state;
+ /* jmp history recorded from first to last.
+ * backtracking is using it to go from last to first.
+ * For most states jmp_history_cnt is [0-3].
* For loops can go up to ~40.
*/
- u32 insn_hist_start;
- u32 insn_hist_end;
+ struct bpf_jmp_history_entry *jmp_history;
+ u32 jmp_history_cnt;
u32 dfs_depth;
u32 callback_unroll_depth;
u32 may_goto_depth;
- /* If this state was ever pointed-to by other state's loop_entry field
- * this flag would be set to true. Used to avoid freeing such states
- * while they are still in use.
- */
- u32 used_as_loop_entry;
};
#define bpf_get_spilled_reg(slot, frame, mask) \
@@ -580,7 +568,8 @@ struct bpf_insn_aux_data {
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
- bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
+ bool nospec; /* do not execute this instruction speculatively */
+ bool nospec_result; /* result is unsafe under speculation, nospec must follow */
bool zext_dst; /* this insn zero extends dst reg */
bool needs_zext; /* alu op needs to clear upper bits */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
@@ -609,6 +598,11 @@ struct bpf_insn_aux_data {
* accepts callback function as a parameter.
*/
bool calls_callback;
+ /*
+ * CFG strongly connected component this instruction belongs to,
+ * zero if it is a singleton SCC.
+ */
+ u32 scc;
/* registers alive before this instruction. */
u16 live_regs_before;
};
@@ -718,6 +712,38 @@ struct bpf_idset {
u32 ids[BPF_ID_MAP_SIZE];
};
+/* see verifier.c:compute_scc_callchain() */
+struct bpf_scc_callchain {
+ /* call sites from bpf_verifier_state->frame[*]->callsite leading to this SCC */
+ u32 callsites[MAX_CALL_FRAMES - 1];
+ /* last frame in a chain is identified by SCC id */
+ u32 scc;
+};
+
+/* verifier state waiting for propagate_backedges() */
+struct bpf_scc_backedge {
+ struct bpf_scc_backedge *next;
+ struct bpf_verifier_state state;
+};
+
+struct bpf_scc_visit {
+ struct bpf_scc_callchain callchain;
+ /* first state in current verification path that entered SCC
+ * identified by the callchain
+ */
+ struct bpf_verifier_state *entry_state;
+ struct bpf_scc_backedge *backedges; /* list of backedges */
+ u32 num_backedges;
+};
+
+/* An array of bpf_scc_visit structs sharing tht same bpf_scc_callchain->scc
+ * but having different bpf_scc_callchain->callsites.
+ */
+struct bpf_scc_info {
+ u32 num_visits;
+ struct bpf_scc_visit visits[];
+};
+
/* single container for all structs
* one verifier_env per bpf_check() call
*/
@@ -775,9 +801,7 @@ struct bpf_verifier_env {
int cur_postorder;
} cfg;
struct backtrack_state bt;
- struct bpf_insn_hist_entry *insn_hist;
- struct bpf_insn_hist_entry *cur_hist_ent;
- u32 insn_hist_cap;
+ struct bpf_jmp_history_entry *cur_hist_ent;
u32 pass_cnt; /* number of times do_check() was called */
u32 subprog_cnt;
/* number of instructions analyzed by the verifier */
@@ -799,6 +823,7 @@ struct bpf_verifier_env {
u32 longest_mark_read_walk;
u32 free_list_size;
u32 explored_states_size;
+ u32 num_backedges;
bpfptr_t fd_array;
/* bit mask to keep track of whether a register has been accessed
@@ -816,6 +841,9 @@ struct bpf_verifier_env {
char tmp_str_buf[TMP_STR_BUF_LEN];
struct bpf_insn insn_buf[INSN_BUF_SIZE];
struct bpf_insn epilogue_buf[INSN_BUF_SIZE];
+ /* array of pointers to bpf_scc_info indexed by SCC id */
+ struct bpf_scc_info **scc_info;
+ u32 scc_cnt;
};
static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
diff --git a/include/linux/btf.h b/include/linux/btf.h
index b2983706292f..a40beb9cf160 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -221,6 +221,8 @@ bool btf_is_vmlinux(const struct btf *btf);
struct module *btf_try_get_module(const struct btf *btf);
u32 btf_nr_types(const struct btf *btf);
struct btf *btf_base_btf(const struct btf *btf);
+bool btf_type_is_i32(const struct btf_type *t);
+bool btf_type_is_i64(const struct btf_type *t);
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
const struct btf_member *m,
u32 expected_offset, u32 expected_size);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f5cf4d35d83e..eca229752cbe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -82,7 +82,7 @@ struct ctl_table_header;
#define BPF_CALL_ARGS 0xe0
/* unused opcode to mark speculation barrier for mitigating
- * Speculative Store Bypass
+ * Spectre v1 and v4
*/
#define BPF_NOSPEC 0xc0
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 3c13240077b8..57ed3035cc30 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -40,6 +40,8 @@ struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness);
struct tnum tnum_add(struct tnum a, struct tnum b);
/* Subtract two tnums, return @a - @b */
struct tnum tnum_sub(struct tnum a, struct tnum b);
+/* Neg of a tnum, return 0 - @a */
+struct tnum tnum_neg(struct tnum a);
/* Bitwise-AND, return @a & @b */
struct tnum tnum_and(struct tnum a, struct tnum b);
/* Bitwise-OR, return @a | @b */
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 7c06f4795670..1beb5b395d81 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -296,6 +296,8 @@ static inline bool pagefault_disabled(void)
*/
#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
+DEFINE_LOCK_GUARD_0(pagefault, pagefault_disable(), pagefault_enable())
+
#ifndef CONFIG_ARCH_HAS_SUBPAGE_FAULTS
/**
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0b4a2f124d11..719ba230032f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1794,6 +1794,13 @@ union bpf_attr {
};
__u64 expected_revision;
} netkit;
+ struct {
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
+ } cgroup;
};
} link_create;
@@ -2403,7 +2410,7 @@ union bpf_attr {
* into it. An example is available in file
* *samples/bpf/trace_output_user.c* in the Linux kernel source
* tree (the eBPF program counterpart is in
- * *samples/bpf/trace_output_kern.c*).
+ * *samples/bpf/trace_output.bpf.c*).
*
* **bpf_perf_event_output**\ () achieves better performance
* than **bpf_trace_printk**\ () for sharing data with user
@@ -6653,11 +6660,15 @@ struct bpf_link_info {
struct {
__aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
__u32 tp_name_len; /* in/out: tp_name buffer len */
+ __u32 :32;
+ __u64 cookie;
} raw_tracepoint;
struct {
__u32 attach_type;
__u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
__u32 target_btf_id; /* BTF type id inside the object */
+ __u32 :32;
+ __u64 cookie;
} tracing;
struct {
__u64 cgroup_id;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index eb28c0f219ee..3d080916faf9 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -530,8 +530,6 @@ static int array_map_check_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type)
{
- u32 int_data;
-
/* One exception for keyless BTF: .bss/.data/.rodata map */
if (btf_type_is_void(key_type)) {
if (map->map_type != BPF_MAP_TYPE_ARRAY ||
@@ -544,14 +542,11 @@ static int array_map_check_btf(const struct bpf_map *map,
return 0;
}
- if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
- return -EINVAL;
-
- int_data = *(u32 *)(key_type + 1);
- /* bpf array can only take a u32 key. This check makes sure
+ /*
+ * Bpf array can only take a u32 key. This check makes sure
* that the btf matches the attr used during map_create.
*/
- if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
+ if (!btf_type_is_i32(key_type))
return -EINVAL;
return 0;
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 380e9a7cac75..303ab1f42d3a 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -38,8 +38,7 @@ static DEFINE_MUTEX(link_mutex);
/* incremented on every opened seq_file */
static atomic64_t session_id;
-static int prepare_seq_file(struct file *file, struct bpf_iter_link *link,
- const struct bpf_iter_seq_info *seq_info);
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
static void bpf_iter_inc_seq_num(struct seq_file *seq)
{
@@ -257,7 +256,7 @@ static int iter_open(struct inode *inode, struct file *file)
{
struct bpf_iter_link *link = inode->i_private;
- return prepare_seq_file(file, link, __get_seq_info(link));
+ return prepare_seq_file(file, link);
}
static int iter_release(struct inode *inode, struct file *file)
@@ -586,9 +585,9 @@ static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
priv_data->done_stop = false;
}
-static int prepare_seq_file(struct file *file, struct bpf_iter_link *link,
- const struct bpf_iter_seq_info *seq_info)
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
{
+ const struct bpf_iter_seq_info *seq_info = __get_seq_info(link);
struct bpf_iter_priv_data *priv_data;
struct bpf_iter_target_info *tinfo;
struct bpf_prog *prog;
@@ -653,7 +652,7 @@ int bpf_iter_new_fd(struct bpf_link *link)
}
iter_link = container_of(link, struct bpf_iter_link, link);
- err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link));
+ err = prepare_seq_file(file, iter_link);
if (err)
goto free_file;
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index fa56c30833ff..b931fbceb54d 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -722,13 +722,7 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type)
{
- u32 int_data;
-
- if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
- return -EINVAL;
-
- int_data = *(u32 *)(key_type + 1);
- if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
+ if (!btf_type_is_i32(key_type))
return -EINVAL;
return 0;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 1d2cf898e21e..05fd64a371af 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -858,26 +858,37 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
EXPORT_SYMBOL_GPL(btf_type_by_id);
/*
- * Regular int is not a bit field and it must be either
- * u8/u16/u32/u64 or __int128.
+ * Check that the type @t is a regular int. This means that @t is not
+ * a bit field and it has the same size as either of u8/u16/u32/u64
+ * or __int128. If @expected_size is not zero, then size of @t should
+ * be the same. A caller should already have checked that the type @t
+ * is an integer.
*/
+static bool __btf_type_int_is_regular(const struct btf_type *t, size_t expected_size)
+{
+ u32 int_data = btf_type_int(t);
+ u8 nr_bits = BTF_INT_BITS(int_data);
+ u8 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+
+ return BITS_PER_BYTE_MASKED(nr_bits) == 0 &&
+ BTF_INT_OFFSET(int_data) == 0 &&
+ (nr_bytes <= 16 && is_power_of_2(nr_bytes)) &&
+ (expected_size == 0 || nr_bytes == expected_size);
+}
+
static bool btf_type_int_is_regular(const struct btf_type *t)
{
- u8 nr_bits, nr_bytes;
- u32 int_data;
+ return __btf_type_int_is_regular(t, 0);
+}
- int_data = btf_type_int(t);
- nr_bits = BTF_INT_BITS(int_data);
- nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
- if (BITS_PER_BYTE_MASKED(nr_bits) ||
- BTF_INT_OFFSET(int_data) ||
- (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
- nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) &&
- nr_bytes != (2 * sizeof(u64)))) {
- return false;
- }
+bool btf_type_is_i32(const struct btf_type *t)
+{
+ return btf_type_is_int(t) && __btf_type_int_is_regular(t, 4);
+}
- return true;
+bool btf_type_is_i64(const struct btf_type *t)
+{
+ return btf_type_is_int(t) && __btf_type_int_is_regular(t, 8);
}
/*
@@ -3443,7 +3454,8 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
node_field_name = strstr(value_type, ":");
if (!node_field_name)
return -EINVAL;
- value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN);
+ value_type = kstrndup(value_type, node_field_name - value_type,
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!value_type)
return -ENOMEM;
id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
@@ -3958,7 +3970,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* This needs to be kzalloc to zero out padding and unused fields, see
* comment in btf_record_equal.
*/
- rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL | __GFP_NOWARN);
+ rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!rec)
return ERR_PTR(-ENOMEM);
@@ -9019,7 +9031,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
bpf_free_cands_from_cache(*cc);
*cc = NULL;
}
- new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL);
+ new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL_ACCOUNT);
if (!new_cands) {
bpf_free_cands(cands);
return ERR_PTR(-ENOMEM);
@@ -9027,7 +9039,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
/* strdup the name, since it will stay in cache.
* the cands->name points to strings in prog's BTF and the prog can be unloaded.
*/
- new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL);
+ new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL_ACCOUNT);
bpf_free_cands(cands);
if (!new_cands->name) {
kfree(new_cands);
@@ -9111,7 +9123,7 @@ bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf,
continue;
/* most of the time there is only one candidate for a given kind+name pair */
- new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL);
+ new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL_ACCOUNT);
if (!new_cands) {
bpf_free_cands(cands);
return ERR_PTR(-ENOMEM);
@@ -9228,7 +9240,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
* into arrays of btf_ids of struct fields and array indices.
*/
- specs = kcalloc(3, sizeof(*specs), GFP_KERNEL);
+ specs = kcalloc(3, sizeof(*specs), GFP_KERNEL_ACCOUNT);
if (!specs)
return -ENOMEM;
@@ -9253,7 +9265,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
goto out;
}
if (cc->cnt) {
- cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL);
+ cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL_ACCOUNT);
if (!cands.cands) {
err = -ENOMEM;
goto out;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f4885514f007..cd220e861d67 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -658,6 +658,116 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
return NULL;
}
+static struct bpf_link *bpf_get_anchor_link(u32 flags, u32 id_or_fd)
+{
+ struct bpf_link *link = ERR_PTR(-EINVAL);
+
+ if (flags & BPF_F_ID)
+ link = bpf_link_by_id(id_or_fd);
+ else if (id_or_fd)
+ link = bpf_link_get_from_fd(id_or_fd);
+ return link;
+}
+
+static struct bpf_prog *bpf_get_anchor_prog(u32 flags, u32 id_or_fd)
+{
+ struct bpf_prog *prog = ERR_PTR(-EINVAL);
+
+ if (flags & BPF_F_ID)
+ prog = bpf_prog_by_id(id_or_fd);
+ else if (id_or_fd)
+ prog = bpf_prog_get(id_or_fd);
+ return prog;
+}
+
+static struct bpf_prog_list *get_prog_list(struct hlist_head *progs, struct bpf_prog *prog,
+ struct bpf_cgroup_link *link, u32 flags, u32 id_or_fd)
+{
+ bool is_link = flags & BPF_F_LINK, is_id = flags & BPF_F_ID;
+ struct bpf_prog_list *pltmp, *pl = ERR_PTR(-EINVAL);
+ bool preorder = flags & BPF_F_PREORDER;
+ struct bpf_link *anchor_link = NULL;
+ struct bpf_prog *anchor_prog = NULL;
+ bool is_before, is_after;
+
+ is_before = flags & BPF_F_BEFORE;
+ is_after = flags & BPF_F_AFTER;
+ if (is_link || is_id || id_or_fd) {
+ /* flags must have either BPF_F_BEFORE or BPF_F_AFTER */
+ if (is_before == is_after)
+ return ERR_PTR(-EINVAL);
+ if ((is_link && !link) || (!is_link && !prog))
+ return ERR_PTR(-EINVAL);
+ } else if (!hlist_empty(progs)) {
+ /* flags cannot have both BPF_F_BEFORE and BPF_F_AFTER */
+ if (is_before && is_after)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (is_link) {
+ anchor_link = bpf_get_anchor_link(flags, id_or_fd);
+ if (IS_ERR(anchor_link))
+ return ERR_PTR(PTR_ERR(anchor_link));
+ } else if (is_id || id_or_fd) {
+ anchor_prog = bpf_get_anchor_prog(flags, id_or_fd);
+ if (IS_ERR(anchor_prog))
+ return ERR_PTR(PTR_ERR(anchor_prog));
+ }
+
+ if (!anchor_prog && !anchor_link) {
+ /* if there is no anchor_prog/anchor_link, then BPF_F_PREORDER
+ * doesn't matter since either prepend or append to a combined
+ * list of progs will end up with correct result.
+ */
+ hlist_for_each_entry(pltmp, progs, node) {
+ if (is_before)
+ return pltmp;
+ if (pltmp->node.next)
+ continue;
+ return pltmp;
+ }
+ return NULL;
+ }
+
+ hlist_for_each_entry(pltmp, progs, node) {
+ if ((anchor_prog && anchor_prog == pltmp->prog) ||
+ (anchor_link && anchor_link == &pltmp->link->link)) {
+ if (!!(pltmp->flags & BPF_F_PREORDER) != preorder)
+ goto out;
+ pl = pltmp;
+ goto out;
+ }
+ }
+
+ pl = ERR_PTR(-ENOENT);
+out:
+ if (anchor_link)
+ bpf_link_put(anchor_link);
+ else
+ bpf_prog_put(anchor_prog);
+ return pl;
+}
+
+static int insert_pl_to_hlist(struct bpf_prog_list *pl, struct hlist_head *progs,
+ struct bpf_prog *prog, struct bpf_cgroup_link *link,
+ u32 flags, u32 id_or_fd)
+{
+ struct bpf_prog_list *pltmp;
+
+ pltmp = get_prog_list(progs, prog, link, flags, id_or_fd);
+ if (IS_ERR(pltmp))
+ return PTR_ERR(pltmp);
+
+ if (!pltmp)
+ hlist_add_head(&pl->node, progs);
+ else if (flags & BPF_F_BEFORE)
+ hlist_add_before(&pl->node, &pltmp->node);
+ else
+ hlist_add_behind(&pl->node, &pltmp->node);
+
+ return 0;
+}
+
/**
* __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
* propagate the change to descendants
@@ -667,6 +777,8 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
* @type: Type of attach operation
* @flags: Option flags
+ * @id_or_fd: Relative prog id or fd
+ * @revision: bpf_prog_list revision
*
* Exactly one of @prog or @link can be non-null.
* Must be called with cgroup_mutex held.
@@ -674,7 +786,8 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
static int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
- enum bpf_attach_type type, u32 flags)
+ enum bpf_attach_type type, u32 flags, u32 id_or_fd,
+ u64 revision)
{
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
struct bpf_prog *old_prog = NULL;
@@ -690,6 +803,9 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
/* invalid combination */
return -EINVAL;
+ if ((flags & BPF_F_REPLACE) && (flags & (BPF_F_BEFORE | BPF_F_AFTER)))
+ /* only either replace or insertion with before/after */
+ return -EINVAL;
if (link && (prog || replace_prog))
/* only either link or prog/replace_prog can be specified */
return -EINVAL;
@@ -700,6 +816,8 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
if (atype < 0)
return -EINVAL;
+ if (revision && revision != cgrp->bpf.revisions[atype])
+ return -ESTALE;
progs = &cgrp->bpf.progs[atype];
@@ -728,22 +846,18 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
if (pl) {
old_prog = pl->prog;
} else {
- struct hlist_node *last = NULL;
-
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
bpf_cgroup_storages_free(new_storage);
return -ENOMEM;
}
- if (hlist_empty(progs))
- hlist_add_head(&pl->node, progs);
- else
- hlist_for_each(last, progs) {
- if (last->next)
- continue;
- hlist_add_behind(&pl->node, last);
- break;
- }
+
+ err = insert_pl_to_hlist(pl, progs, prog, link, flags, id_or_fd);
+ if (err) {
+ kfree(pl);
+ bpf_cgroup_storages_free(new_storage);
+ return err;
+ }
}
pl->prog = prog;
@@ -762,6 +876,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
if (err)
goto cleanup_trampoline;
+ cgrp->bpf.revisions[atype] += 1;
if (old_prog) {
if (type == BPF_LSM_CGROUP)
bpf_trampoline_unlink_cgroup_shim(old_prog);
@@ -793,12 +908,13 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type,
- u32 flags)
+ u32 flags, u32 id_or_fd, u64 revision)
{
int ret;
cgroup_lock();
- ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
+ ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags,
+ id_or_fd, revision);
cgroup_unlock();
return ret;
}
@@ -886,6 +1002,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
if (!found)
return -ENOENT;
+ cgrp->bpf.revisions[atype] += 1;
old_prog = xchg(&link->link.prog, new_prog);
replace_effective_prog(cgrp, atype, link);
bpf_prog_put(old_prog);
@@ -1011,12 +1128,14 @@ found:
* @prog: A program to detach or NULL
* @link: A link to detach or NULL
* @type: Type of detach operation
+ * @revision: bpf_prog_list revision
*
* At most one of @prog or @link can be non-NULL.
* Must be called with cgroup_mutex held.
*/
static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
- struct bpf_cgroup_link *link, enum bpf_attach_type type)
+ struct bpf_cgroup_link *link, enum bpf_attach_type type,
+ u64 revision)
{
enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
@@ -1034,6 +1153,9 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
if (atype < 0)
return -EINVAL;
+ if (revision && revision != cgrp->bpf.revisions[atype])
+ return -ESTALE;
+
progs = &cgrp->bpf.progs[atype];
flags = cgrp->bpf.flags[atype];
@@ -1059,6 +1181,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
/* now can actually delete it from this cgroup list */
hlist_del(&pl->node);
+ cgrp->bpf.revisions[atype] += 1;
kfree(pl);
if (hlist_empty(progs))
@@ -1074,12 +1197,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
}
static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type)
+ enum bpf_attach_type type, u64 revision)
{
int ret;
cgroup_lock();
- ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
+ ret = __cgroup_bpf_detach(cgrp, prog, NULL, type, revision);
cgroup_unlock();
return ret;
}
@@ -1097,6 +1220,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
struct bpf_prog_array *effective;
int cnt, ret = 0, i;
int total_cnt = 0;
+ u64 revision = 0;
u32 flags;
if (effective_query && prog_attach_flags)
@@ -1134,6 +1258,10 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
return -EFAULT;
if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
return -EFAULT;
+ if (!effective_query && from_atype == to_atype)
+ revision = cgrp->bpf.revisions[from_atype];
+ if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision)))
+ return -EFAULT;
if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
/* return early if user requested only program count + flags */
return 0;
@@ -1216,7 +1344,8 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
}
ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
- attr->attach_type, attr->attach_flags);
+ attr->attach_type, attr->attach_flags,
+ attr->relative_fd, attr->expected_revision);
if (replace_prog)
bpf_prog_put(replace_prog);
@@ -1238,7 +1367,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
if (IS_ERR(prog))
prog = NULL;
- ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
+ ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, attr->expected_revision);
if (prog)
bpf_prog_put(prog);
@@ -1267,7 +1396,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
}
WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
- cg_link->type));
+ cg_link->type, 0));
if (cg_link->type == BPF_LSM_CGROUP)
bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
@@ -1339,6 +1468,13 @@ static const struct bpf_link_ops bpf_cgroup_link_lops = {
.fill_link_info = bpf_cgroup_link_fill_link_info,
};
+#define BPF_F_LINK_ATTACH_MASK \
+ (BPF_F_ID | \
+ BPF_F_BEFORE | \
+ BPF_F_AFTER | \
+ BPF_F_PREORDER | \
+ BPF_F_LINK)
+
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_link_primer link_primer;
@@ -1346,7 +1482,7 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
struct cgroup *cgrp;
int err;
- if (attr->link_create.flags)
+ if (attr->link_create.flags & (~BPF_F_LINK_ATTACH_MASK))
return -EINVAL;
cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
@@ -1370,7 +1506,9 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
}
err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
- link->type, BPF_F_ALLOW_MULTI);
+ link->type, BPF_F_ALLOW_MULTI | attr->link_create.flags,
+ attr->link_create.cgroup.relative_fd,
+ attr->link_create.cgroup.expected_revision);
if (err) {
bpf_link_cleanup(&link_primer);
goto out_put_cgroup;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c20babbf998f..e536a34a32c8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2102,14 +2102,15 @@ out:
#undef COND_JMP
/* ST, STX and LDX*/
ST_NOSPEC:
- /* Speculation barrier for mitigating Speculative Store Bypass.
- * In case of arm64, we rely on the firmware mitigation as
- * controlled via the ssbd kernel parameter. Whenever the
- * mitigation is enabled, it works for all of the kernel code
- * with no need to provide any additional instructions here.
- * In case of x86, we use 'lfence' insn for mitigation. We
- * reuse preexisting logic from Spectre v1 mitigation that
- * happens to produce the required code on x86 for v4 as well.
+ /* Speculation barrier for mitigating Speculative Store Bypass,
+ * Bounds-Check Bypass and Type Confusion. In case of arm64, we
+ * rely on the firmware mitigation as controlled via the ssbd
+ * kernel parameter. Whenever the mitigation is enabled, it
+ * works for all of the kernel code with no need to provide any
+ * additional instructions here. In case of x86, we use 'lfence'
+ * insn for mitigation. We reuse preexisting logic from Spectre
+ * v1 mitigation that happens to produce the required code on
+ * x86 for v4 as well.
*/
barrier_nospec();
CONT;
@@ -3034,6 +3035,21 @@ bool __weak bpf_jit_needs_zext(void)
return false;
}
+/* By default, enable the verifier's mitigations against Spectre v1 and v4 for
+ * all archs. The value returned must not change at runtime as there is
+ * currently no support for reloading programs that were loaded without
+ * mitigations.
+ */
+bool __weak bpf_jit_bypass_spec_v1(void)
+{
+ return false;
+}
+
+bool __weak bpf_jit_bypass_spec_v4(void)
+{
+ return false;
+}
+
/* Return true if the JIT inlines the call to the helper corresponding to
* the imm.
*
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index b71e428ad936..2cdcf7b2c91e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -24,6 +24,7 @@
#include <linux/bpf_mem_alloc.h>
#include <linux/kasan.h>
#include <linux/bpf_verifier.h>
+#include <linux/uaccess.h>
#include "../../lib/kstrtox.h"
@@ -3278,6 +3279,376 @@ __bpf_kfunc void __bpf_trap(void)
{
}
+/*
+ * Kfuncs for string operations.
+ *
+ * Since strings are not necessarily %NUL-terminated, we cannot directly call
+ * in-kernel implementations. Instead, we open-code the implementations using
+ * __get_kernel_nofault instead of plain dereference to make them safe.
+ */
+
+/**
+ * bpf_strcmp - Compare two strings
+ * @s1__ign: One string
+ * @s2__ign: Another string
+ *
+ * Return:
+ * * %0 - Strings are equal
+ * * %-1 - @s1__ign is smaller
+ * * %1 - @s2__ign is smaller
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG - One of strings is too large
+ * * %-ERANGE - One of strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
+{
+ char c1, c2;
+ int i;
+
+ if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
+ !copy_from_kernel_nofault_allowed(s2__ign, 1)) {
+ return -ERANGE;
+ }
+
+ guard(pagefault)();
+ for (i = 0; i < XATTR_SIZE_MAX; i++) {
+ __get_kernel_nofault(&c1, s1__ign, char, err_out);
+ __get_kernel_nofault(&c2, s2__ign, char, err_out);
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (c1 == '\0')
+ return 0;
+ s1__ign++;
+ s2__ign++;
+ }
+ return -E2BIG;
+err_out:
+ return -EFAULT;
+}
+
+/**
+ * bpf_strnchr - Find a character in a length limited string
+ * @s__ign: The string to be searched
+ * @count: The number of characters to be searched
+ * @c: The character to search for
+ *
+ * Note that the %NUL-terminator is considered part of the string, and can
+ * be searched for.
+ *
+ * Return:
+ * * >=0 - Index of the first occurrence of @c within @s__ign
+ * * %-ENOENT - @c not found in the first @count characters of @s__ign
+ * * %-EFAULT - Cannot read @s__ign
+ * * %-E2BIG - @s__ign is too large
+ * * %-ERANGE - @s__ign is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strnchr(const char *s__ign, size_t count, char c)
+{
+ char sc;
+ int i;
+
+ if (!copy_from_kernel_nofault_allowed(s__ign, 1))
+ return -ERANGE;
+
+ guard(pagefault)();
+ for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) {
+ __get_kernel_nofault(&sc, s__ign, char, err_out);
+ if (sc == c)
+ return i;
+ if (sc == '\0')
+ return -ENOENT;
+ s__ign++;
+ }
+ return i == XATTR_SIZE_MAX ? -E2BIG : -ENOENT;
+err_out:
+ return -EFAULT;
+}
+
+/**
+ * bpf_strchr - Find the first occurrence of a character in a string
+ * @s__ign: The string to be searched
+ * @c: The character to search for
+ *
+ * Note that the %NUL-terminator is considered part of the string, and can
+ * be searched for.
+ *
+ * Return:
+ * * >=0 - The index of the first occurrence of @c within @s__ign
+ * * %-ENOENT - @c not found in @s__ign
+ * * %-EFAULT - Cannot read @s__ign
+ * * %-E2BIG - @s__ign is too large
+ * * %-ERANGE - @s__ign is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strchr(const char *s__ign, char c)
+{
+ return bpf_strnchr(s__ign, XATTR_SIZE_MAX, c);
+}
+
+/**
+ * bpf_strchrnul - Find and return a character in a string, or end of string
+ * @s__ign: The string to be searched
+ * @c: The character to search for
+ *
+ * Return:
+ * * >=0 - Index of the first occurrence of @c within @s__ign or index of
+ * the null byte at the end of @s__ign when @c is not found
+ * * %-EFAULT - Cannot read @s__ign
+ * * %-E2BIG - @s__ign is too large
+ * * %-ERANGE - @s__ign is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strchrnul(const char *s__ign, char c)
+{
+ char sc;
+ int i;
+
+ if (!copy_from_kernel_nofault_allowed(s__ign, 1))
+ return -ERANGE;
+
+ guard(pagefault)();
+ for (i = 0; i < XATTR_SIZE_MAX; i++) {
+ __get_kernel_nofault(&sc, s__ign, char, err_out);
+ if (sc == '\0' || sc == c)
+ return i;
+ s__ign++;
+ }
+ return -E2BIG;
+err_out:
+ return -EFAULT;
+}
+
+/**
+ * bpf_strrchr - Find the last occurrence of a character in a string
+ * @s__ign: The string to be searched
+ * @c: The character to search for
+ *
+ * Return:
+ * * >=0 - Index of the last occurrence of @c within @s__ign
+ * * %-ENOENT - @c not found in @s__ign
+ * * %-EFAULT - Cannot read @s__ign
+ * * %-E2BIG - @s__ign is too large
+ * * %-ERANGE - @s__ign is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strrchr(const char *s__ign, int c)
+{
+ char sc;
+ int i, last = -ENOENT;
+
+ if (!copy_from_kernel_nofault_allowed(s__ign, 1))
+ return -ERANGE;
+
+ guard(pagefault)();
+ for (i = 0; i < XATTR_SIZE_MAX; i++) {
+ __get_kernel_nofault(&sc, s__ign, char, err_out);
+ if (sc == c)
+ last = i;
+ if (sc == '\0')
+ return last;
+ s__ign++;
+ }
+ return -E2BIG;
+err_out:
+ return -EFAULT;
+}
+
+/**
+ * bpf_strlen - Calculate the length of a length-limited string
+ * @s__ign: The string
+ * @count: The maximum number of characters to count
+ *
+ * Return:
+ * * >=0 - The length of @s__ign
+ * * %-EFAULT - Cannot read @s__ign
+ * * %-E2BIG - @s__ign is too large
+ * * %-ERANGE - @s__ign is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strnlen(const char *s__ign, size_t count)
+{
+ char c;
+ int i;
+
+ if (!copy_from_kernel_nofault_allowed(s__ign, 1))
+ return -ERANGE;
+
+ guard(pagefault)();
+ for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) {
+ __get_kernel_nofault(&c, s__ign, char, err_out);
+ if (c == '\0')
+ return i;
+ s__ign++;
+ }
+ return i == XATTR_SIZE_MAX ? -E2BIG : i;
+err_out:
+ return -EFAULT;
+}
+
+/**
+ * bpf_strlen - Calculate the length of a string
+ * @s__ign: The string
+ *
+ * Return:
+ * * >=0 - The length of @s__ign
+ * * %-EFAULT - Cannot read @s__ign
+ * * %-E2BIG - @s__ign is too large
+ * * %-ERANGE - @s__ign is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strlen(const char *s__ign)
+{
+ return bpf_strnlen(s__ign, XATTR_SIZE_MAX);
+}
+
+/**
+ * bpf_strspn - Calculate the length of the initial substring of @s__ign which
+ * only contains letters in @accept__ign
+ * @s__ign: The string to be searched
+ * @accept__ign: The string to search for
+ *
+ * Return:
+ * * >=0 - The length of the initial substring of @s__ign which only
+ * contains letters from @accept__ign
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG - One of the strings is too large
+ * * %-ERANGE - One of the strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strspn(const char *s__ign, const char *accept__ign)
+{
+ char cs, ca;
+ int i, j;
+
+ if (!copy_from_kernel_nofault_allowed(s__ign, 1) ||
+ !copy_from_kernel_nofault_allowed(accept__ign, 1)) {
+ return -ERANGE;
+ }
+
+ guard(pagefault)();
+ for (i = 0; i < XATTR_SIZE_MAX; i++) {
+ __get_kernel_nofault(&cs, s__ign, char, err_out);
+ if (cs == '\0')
+ return i;
+ for (j = 0; j < XATTR_SIZE_MAX; j++) {
+ __get_kernel_nofault(&ca, accept__ign + j, char, err_out);
+ if (cs == ca || ca == '\0')
+ break;
+ }
+ if (j == XATTR_SIZE_MAX)
+ return -E2BIG;
+ if (ca == '\0')
+ return i;
+ s__ign++;
+ }
+ return -E2BIG;
+err_out:
+ return -EFAULT;
+}
+
+/**
+ * strcspn - Calculate the length of the initial substring of @s__ign which
+ * does not contain letters in @reject__ign
+ * @s__ign: The string to be searched
+ * @reject__ign: The string to search for
+ *
+ * Return:
+ * * >=0 - The length of the initial substring of @s__ign which does not
+ * contain letters from @reject__ign
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG - One of the strings is too large
+ * * %-ERANGE - One of the strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strcspn(const char *s__ign, const char *reject__ign)
+{
+ char cs, cr;
+ int i, j;
+
+ if (!copy_from_kernel_nofault_allowed(s__ign, 1) ||
+ !copy_from_kernel_nofault_allowed(reject__ign, 1)) {
+ return -ERANGE;
+ }
+
+ guard(pagefault)();
+ for (i = 0; i < XATTR_SIZE_MAX; i++) {
+ __get_kernel_nofault(&cs, s__ign, char, err_out);
+ if (cs == '\0')
+ return i;
+ for (j = 0; j < XATTR_SIZE_MAX; j++) {
+ __get_kernel_nofault(&cr, reject__ign + j, char, err_out);
+ if (cs == cr || cr == '\0')
+ break;
+ }
+ if (j == XATTR_SIZE_MAX)
+ return -E2BIG;
+ if (cr != '\0')
+ return i;
+ s__ign++;
+ }
+ return -E2BIG;
+err_out:
+ return -EFAULT;
+}
+
+/**
+ * bpf_strnstr - Find the first substring in a length-limited string
+ * @s1__ign: The string to be searched
+ * @s2__ign: The string to search for
+ * @len: the maximum number of characters to search
+ *
+ * Return:
+ * * >=0 - Index of the first character of the first occurrence of @s2__ign
+ * within the first @len characters of @s1__ign
+ * * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG - One of the strings is too large
+ * * %-ERANGE - One of the strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len)
+{
+ char c1, c2;
+ int i, j;
+
+ if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
+ !copy_from_kernel_nofault_allowed(s2__ign, 1)) {
+ return -ERANGE;
+ }
+
+ guard(pagefault)();
+ for (i = 0; i < XATTR_SIZE_MAX; i++) {
+ for (j = 0; i + j < len && j < XATTR_SIZE_MAX; j++) {
+ __get_kernel_nofault(&c2, s2__ign + j, char, err_out);
+ if (c2 == '\0')
+ return i;
+ __get_kernel_nofault(&c1, s1__ign + j, char, err_out);
+ if (c1 == '\0')
+ return -ENOENT;
+ if (c1 != c2)
+ break;
+ }
+ if (j == XATTR_SIZE_MAX)
+ return -E2BIG;
+ if (i + j == len)
+ return -ENOENT;
+ s1__ign++;
+ }
+ return -E2BIG;
+err_out:
+ return -EFAULT;
+}
+
+/**
+ * bpf_strstr - Find the first substring in a string
+ * @s1__ign: The string to be searched
+ * @s2__ign: The string to search for
+ *
+ * Return:
+ * * >=0 - Index of the first character of the first occurrence of @s2__ign
+ * within @s1__ign
+ * * %-ENOENT - @s2__ign is not a substring of @s1__ign
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG - One of the strings is too large
+ * * %-ERANGE - One of the strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
+{
+ return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
+}
+
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
@@ -3397,6 +3768,17 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPAB
BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
#endif
BTF_ID_FLAGS(func, __bpf_trap)
+BTF_ID_FLAGS(func, bpf_strcmp);
+BTF_ID_FLAGS(func, bpf_strchr);
+BTF_ID_FLAGS(func, bpf_strchrnul);
+BTF_ID_FLAGS(func, bpf_strnchr);
+BTF_ID_FLAGS(func, bpf_strrchr);
+BTF_ID_FLAGS(func, bpf_strlen);
+BTF_ID_FLAGS(func, bpf_strnlen);
+BTF_ID_FLAGS(func, bpf_strspn);
+BTF_ID_FLAGS(func, bpf_strcspn);
+BTF_ID_FLAGS(func, bpf_strstr);
+BTF_ID_FLAGS(func, bpf_strnstr);
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 3969eb0382af..632d51b05fe9 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -394,17 +394,10 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
return -EINVAL;
} else {
- u32 int_data;
-
/*
* Key is expected to be u64, which stores the cgroup_inode_id
*/
-
- if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
- return -EINVAL;
-
- int_data = *(u32 *)(key_type + 1);
- if (BTF_INT_BITS(int_data) != 64 || BTF_INT_OFFSET(int_data))
+ if (!btf_type_is_i64(key_type))
return -EINVAL;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index dd5304c6ac3c..56500381c28a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3403,10 +3403,12 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
seq_printf(seq,
"attach_type:\t%d\n"
"target_obj_id:\t%u\n"
- "target_btf_id:\t%u\n",
+ "target_btf_id:\t%u\n"
+ "cookie:\t%llu\n",
tr_link->attach_type,
target_obj_id,
- target_btf_id);
+ target_btf_id,
+ tr_link->link.cookie);
}
static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
@@ -3416,6 +3418,7 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
container_of(link, struct bpf_tracing_link, link.link);
info->tracing.attach_type = tr_link->attach_type;
+ info->tracing.cookie = tr_link->link.cookie;
bpf_trampoline_unpack_key(tr_link->trampoline->key,
&info->tracing.target_obj_id,
&info->tracing.target_btf_id);
@@ -3651,8 +3654,10 @@ static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
container_of(link, struct bpf_raw_tp_link, link);
seq_printf(seq,
- "tp_name:\t%s\n",
- raw_tp_link->btp->tp->name);
+ "tp_name:\t%s\n"
+ "cookie:\t%llu\n",
+ raw_tp_link->btp->tp->name,
+ raw_tp_link->cookie);
}
static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen,
@@ -3688,6 +3693,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
return -EINVAL;
info->raw_tracepoint.tp_name_len = tp_len + 1;
+ info->raw_tracepoint.cookie = raw_tp_link->cookie;
if (!ubuf)
return 0;
@@ -3794,6 +3800,32 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
info->perf_event.kprobe.cookie = event->bpf_cookie;
return 0;
}
+
+static void bpf_perf_link_fdinfo_kprobe(const struct perf_event *event,
+ struct seq_file *seq)
+{
+ const char *name;
+ int err;
+ u32 prog_id, type;
+ u64 offset, addr;
+ unsigned long missed;
+
+ err = bpf_get_perf_event_info(event, &prog_id, &type, &name,
+ &offset, &addr, &missed);
+ if (err)
+ return;
+
+ seq_printf(seq,
+ "name:\t%s\n"
+ "offset:\t%#llx\n"
+ "missed:\t%lu\n"
+ "addr:\t%#llx\n"
+ "event_type:\t%s\n"
+ "cookie:\t%llu\n",
+ name, offset, missed, addr,
+ type == BPF_FD_TYPE_KRETPROBE ? "kretprobe" : "kprobe",
+ event->bpf_cookie);
+}
#endif
#ifdef CONFIG_UPROBE_EVENTS
@@ -3822,6 +3854,31 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset;
return 0;
}
+
+static void bpf_perf_link_fdinfo_uprobe(const struct perf_event *event,
+ struct seq_file *seq)
+{
+ const char *name;
+ int err;
+ u32 prog_id, type;
+ u64 offset, ref_ctr_offset;
+ unsigned long missed;
+
+ err = bpf_get_perf_event_info(event, &prog_id, &type, &name,
+ &offset, &ref_ctr_offset, &missed);
+ if (err)
+ return;
+
+ seq_printf(seq,
+ "name:\t%s\n"
+ "offset:\t%#llx\n"
+ "ref_ctr_offset:\t%#llx\n"
+ "event_type:\t%s\n"
+ "cookie:\t%llu\n",
+ name, offset, ref_ctr_offset,
+ type == BPF_FD_TYPE_URETPROBE ? "uretprobe" : "uprobe",
+ event->bpf_cookie);
+}
#endif
static int bpf_perf_link_fill_probe(const struct perf_event *event,
@@ -3890,10 +3947,79 @@ static int bpf_perf_link_fill_link_info(const struct bpf_link *link,
}
}
+static void bpf_perf_event_link_show_fdinfo(const struct perf_event *event,
+ struct seq_file *seq)
+{
+ seq_printf(seq,
+ "type:\t%u\n"
+ "config:\t%llu\n"
+ "event_type:\t%s\n"
+ "cookie:\t%llu\n",
+ event->attr.type, event->attr.config,
+ "event", event->bpf_cookie);
+}
+
+static void bpf_tracepoint_link_show_fdinfo(const struct perf_event *event,
+ struct seq_file *seq)
+{
+ int err;
+ const char *name;
+ u32 prog_id;
+
+ err = bpf_get_perf_event_info(event, &prog_id, NULL, &name, NULL,
+ NULL, NULL);
+ if (err)
+ return;
+
+ seq_printf(seq,
+ "tp_name:\t%s\n"
+ "event_type:\t%s\n"
+ "cookie:\t%llu\n",
+ name, "tracepoint", event->bpf_cookie);
+}
+
+static void bpf_probe_link_show_fdinfo(const struct perf_event *event,
+ struct seq_file *seq)
+{
+#ifdef CONFIG_KPROBE_EVENTS
+ if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE)
+ return bpf_perf_link_fdinfo_kprobe(event, seq);
+#endif
+
+#ifdef CONFIG_UPROBE_EVENTS
+ if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE)
+ return bpf_perf_link_fdinfo_uprobe(event, seq);
+#endif
+}
+
+static void bpf_perf_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_perf_link *perf_link;
+ const struct perf_event *event;
+
+ perf_link = container_of(link, struct bpf_perf_link, link);
+ event = perf_get_event(perf_link->perf_file);
+ if (IS_ERR(event))
+ return;
+
+ switch (event->prog->type) {
+ case BPF_PROG_TYPE_PERF_EVENT:
+ return bpf_perf_event_link_show_fdinfo(event, seq);
+ case BPF_PROG_TYPE_TRACEPOINT:
+ return bpf_tracepoint_link_show_fdinfo(event, seq);
+ case BPF_PROG_TYPE_KPROBE:
+ return bpf_probe_link_show_fdinfo(event, seq);
+ default:
+ return;
+ }
+}
+
static const struct bpf_link_ops bpf_perf_link_lops = {
.release = bpf_perf_link_release,
.dealloc = bpf_perf_link_dealloc,
.fill_link_info = bpf_perf_link_fill_link_info,
+ .show_fdinfo = bpf_perf_link_show_fdinfo,
};
static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
@@ -4185,6 +4311,25 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
}
}
+static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype,
+ bool check_atype)
+{
+ switch (ptype) {
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ return true;
+ case BPF_PROG_TYPE_LSM:
+ return check_atype ? atype == BPF_LSM_CGROUP : true;
+ default:
+ return false;
+ }
+}
+
#define BPF_PROG_ATTACH_LAST_FIELD expected_revision
#define BPF_F_ATTACH_MASK_BASE \
@@ -4215,6 +4360,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (bpf_mprog_supported(ptype)) {
if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
return -EINVAL;
+ } else if (is_cgroup_prog_type(ptype, 0, false)) {
+ if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE | BPF_F_ATTACH_MASK_MPROG))
+ return -EINVAL;
} else {
if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE)
return -EINVAL;
@@ -4232,6 +4380,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return -EINVAL;
}
+ if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) {
+ ret = cgroup_bpf_prog_attach(attr, ptype, prog);
+ goto out;
+ }
+
switch (ptype) {
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_SK_MSG:
@@ -4243,20 +4396,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_PROG_TYPE_FLOW_DISSECTOR:
ret = netns_bpf_prog_attach(attr, prog);
break;
- case BPF_PROG_TYPE_CGROUP_DEVICE:
- case BPF_PROG_TYPE_CGROUP_SKB:
- case BPF_PROG_TYPE_CGROUP_SOCK:
- case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
- case BPF_PROG_TYPE_CGROUP_SOCKOPT:
- case BPF_PROG_TYPE_CGROUP_SYSCTL:
- case BPF_PROG_TYPE_SOCK_OPS:
- case BPF_PROG_TYPE_LSM:
- if (ptype == BPF_PROG_TYPE_LSM &&
- prog->expected_attach_type != BPF_LSM_CGROUP)
- ret = -EINVAL;
- else
- ret = cgroup_bpf_prog_attach(attr, ptype, prog);
- break;
case BPF_PROG_TYPE_SCHED_CLS:
if (attr->attach_type == BPF_TCX_INGRESS ||
attr->attach_type == BPF_TCX_EGRESS)
@@ -4267,7 +4406,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
default:
ret = -EINVAL;
}
-
+out:
if (ret)
bpf_prog_put(prog);
return ret;
@@ -4295,6 +4434,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
if (IS_ERR(prog))
return PTR_ERR(prog);
}
+ } else if (is_cgroup_prog_type(ptype, 0, false)) {
+ if (attr->attach_flags || attr->relative_fd)
+ return -EINVAL;
} else if (attr->attach_flags ||
attr->relative_fd ||
attr->expected_revision) {
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 9dbc31b25e3d..fa353c5d550f 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -83,6 +83,11 @@ struct tnum tnum_sub(struct tnum a, struct tnum b)
return TNUM(dv & ~mu, mu);
}
+struct tnum tnum_neg(struct tnum a)
+{
+ return tnum_sub(TNUM(0, 0), a);
+}
+
struct tnum tnum_and(struct tnum a, struct tnum b)
{
u64 alpha, beta, v;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index c4b1a98ff726..b1e358c16eeb 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -911,27 +911,32 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram
return bpf_prog_start_time();
}
-static void notrace update_prog_stats(struct bpf_prog *prog,
- u64 start)
+static void notrace __update_prog_stats(struct bpf_prog *prog, u64 start)
{
struct bpf_prog_stats *stats;
+ unsigned long flags;
+ u64 duration;
- if (static_branch_unlikely(&bpf_stats_enabled_key) &&
- /* static_key could be enabled in __bpf_prog_enter*
- * and disabled in __bpf_prog_exit*.
- * And vice versa.
- * Hence check that 'start' is valid.
- */
- start > NO_START_TIME) {
- u64 duration = sched_clock() - start;
- unsigned long flags;
-
- stats = this_cpu_ptr(prog->stats);
- flags = u64_stats_update_begin_irqsave(&stats->syncp);
- u64_stats_inc(&stats->cnt);
- u64_stats_add(&stats->nsecs, duration);
- u64_stats_update_end_irqrestore(&stats->syncp, flags);
- }
+ /*
+ * static_key could be enabled in __bpf_prog_enter* and disabled in
+ * __bpf_prog_exit*. And vice versa. Check that 'start' is valid.
+ */
+ if (start <= NO_START_TIME)
+ return;
+
+ duration = sched_clock() - start;
+ stats = this_cpu_ptr(prog->stats);
+ flags = u64_stats_update_begin_irqsave(&stats->syncp);
+ u64_stats_inc(&stats->cnt);
+ u64_stats_add(&stats->nsecs, duration);
+ u64_stats_update_end_irqrestore(&stats->syncp, flags);
+}
+
+static __always_inline void notrace update_prog_stats(struct bpf_prog *prog,
+ u64 start)
+{
+ if (static_branch_unlikely(&bpf_stats_enabled_key))
+ __update_prog_stats(prog, start);
}
static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 169845710c7e..dd670ba41667 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -44,6 +44,11 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
#undef BPF_LINK_TYPE
};
+enum bpf_features {
+ BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
+ __MAX_BPF_FEAT,
+};
+
struct bpf_mem_alloc bpf_global_percpu_ma;
static bool bpf_global_percpu_ma_set;
@@ -405,7 +410,8 @@ static bool reg_not_null(const struct bpf_reg_state *reg)
type == PTR_TO_MAP_KEY ||
type == PTR_TO_SOCK_COMMON ||
(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
- type == PTR_TO_MEM;
+ type == PTR_TO_MEM ||
+ type == CONST_PTR_TO_MAP;
}
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
@@ -1403,7 +1409,7 @@ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
goto out;
alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
- new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
+ new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
if (!new_arr) {
kfree(arr);
return NULL;
@@ -1420,7 +1426,7 @@ out:
static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
{
dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
- sizeof(struct bpf_reference_state), GFP_KERNEL);
+ sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
if (!dst->refs)
return -ENOMEM;
@@ -1439,7 +1445,7 @@ static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_st
size_t n = src->allocated_stack / BPF_REG_SIZE;
dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!dst->stack)
return -ENOMEM;
@@ -1647,7 +1653,7 @@ static void update_peak_states(struct bpf_verifier_env *env)
{
u32 cur_states;
- cur_states = env->explored_states_size + env->free_list_size;
+ cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
env->peak_states = max(env->peak_states, cur_states);
}
@@ -1659,6 +1665,13 @@ static void free_func_state(struct bpf_func_state *state)
kfree(state);
}
+static void clear_jmp_history(struct bpf_verifier_state *state)
+{
+ kfree(state->jmp_history);
+ state->jmp_history = NULL;
+ state->jmp_history_cnt = 0;
+}
+
static void free_verifier_state(struct bpf_verifier_state *state,
bool free_self)
{
@@ -1669,11 +1682,12 @@ static void free_verifier_state(struct bpf_verifier_state *state,
state->frame[i] = NULL;
}
kfree(state->refs);
+ clear_jmp_history(state);
if (free_self)
kfree(state);
}
-/* struct bpf_verifier_state->{parent,loop_entry} refer to states
+/* struct bpf_verifier_state->parent refers to states
* that are in either of env->{expored_states,free_list}.
* In both cases the state is contained in struct bpf_verifier_state_list.
*/
@@ -1684,37 +1698,24 @@ static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_
return NULL;
}
-static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st)
-{
- if (st->loop_entry)
- return container_of(st->loop_entry, struct bpf_verifier_state_list, state);
- return NULL;
-}
+static bool incomplete_read_marks(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st);
/* A state can be freed if it is no longer referenced:
* - is in the env->free_list;
* - has no children states;
- * - is not used as loop_entry.
- *
- * Freeing a state can make it's loop_entry free-able.
*/
static void maybe_free_verifier_state(struct bpf_verifier_env *env,
struct bpf_verifier_state_list *sl)
{
- struct bpf_verifier_state_list *loop_entry_sl;
-
- while (sl && sl->in_free_list &&
- sl->state.branches == 0 &&
- sl->state.used_as_loop_entry == 0) {
- loop_entry_sl = state_loop_entry_as_list(&sl->state);
- if (loop_entry_sl)
- loop_entry_sl->state.used_as_loop_entry--;
- list_del(&sl->node);
- free_verifier_state(&sl->state, false);
- kfree(sl);
- env->free_list_size--;
- sl = loop_entry_sl;
- }
+ if (!sl->in_free_list
+ || sl->state.branches != 0
+ || incomplete_read_marks(env, &sl->state))
+ return;
+ list_del(&sl->node);
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ env->free_list_size--;
}
/* copy verifier state from src to dst growing dst stack space
@@ -1733,6 +1734,13 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
struct bpf_func_state *dst;
int i, err;
+ dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
+ src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
+ GFP_KERNEL_ACCOUNT);
+ if (!dst_state->jmp_history)
+ return -ENOMEM;
+ dst_state->jmp_history_cnt = src->jmp_history_cnt;
+
/* if dst has more stack frames then src frame, free them, this is also
* necessary in case of exceptional exits using bpf_throw.
*/
@@ -1750,17 +1758,14 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->parent = src->parent;
dst_state->first_insn_idx = src->first_insn_idx;
dst_state->last_insn_idx = src->last_insn_idx;
- dst_state->insn_hist_start = src->insn_hist_start;
- dst_state->insn_hist_end = src->insn_hist_end;
dst_state->dfs_depth = src->dfs_depth;
dst_state->callback_unroll_depth = src->callback_unroll_depth;
- dst_state->used_as_loop_entry = src->used_as_loop_entry;
dst_state->may_goto_depth = src->may_goto_depth;
- dst_state->loop_entry = src->loop_entry;
+ dst_state->equal_state = src->equal_state;
for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i];
if (!dst) {
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL_ACCOUNT);
if (!dst)
return -ENOMEM;
dst_state->frame[i] = dst;
@@ -1799,173 +1804,232 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
return true;
}
-/* Open coded iterators allow back-edges in the state graph in order to
- * check unbounded loops that iterators.
- *
- * In is_state_visited() it is necessary to know if explored states are
- * part of some loops in order to decide whether non-exact states
- * comparison could be used:
- * - non-exact states comparison establishes sub-state relation and uses
- * read and precision marks to do so, these marks are propagated from
- * children states and thus are not guaranteed to be final in a loop;
- * - exact states comparison just checks if current and explored states
- * are identical (and thus form a back-edge).
- *
- * Paper "A New Algorithm for Identifying Loops in Decompilation"
- * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
- * algorithm for loop structure detection and gives an overview of
- * relevant terminology. It also has helpful illustrations.
- *
- * [1] https://api.semanticscholar.org/CorpusID:15784067
- *
- * We use a similar algorithm but because loop nested structure is
- * irrelevant for verifier ours is significantly simpler and resembles
- * strongly connected components algorithm from Sedgewick's textbook.
- *
- * Define topmost loop entry as a first node of the loop traversed in a
- * depth first search starting from initial state. The goal of the loop
- * tracking algorithm is to associate topmost loop entries with states
- * derived from these entries.
- *
- * For each step in the DFS states traversal algorithm needs to identify
- * the following situations:
- *
- * initial initial initial
- * | | |
- * V V V
- * ... ... .---------> hdr
- * | | | |
- * V V | V
- * cur .-> succ | .------...
- * | | | | | |
- * V | V | V V
- * succ '-- cur | ... ...
- * | | |
- * | V V
- * | succ <- cur
- * | |
- * | V
- * | ...
- * | |
- * '----'
- *
- * (A) successor state of cur (B) successor state of cur or it's entry
- * not yet traversed are in current DFS path, thus cur and succ
- * are members of the same outermost loop
- *
- * initial initial
- * | |
- * V V
- * ... ...
- * | |
- * V V
- * .------... .------...
- * | | | |
- * V V V V
- * .-> hdr ... ... ...
- * | | | | |
- * | V V V V
- * | succ <- cur succ <- cur
- * | | |
- * | V V
- * | ... ...
- * | | |
- * '----' exit
- *
- * (C) successor state of cur is a part of some loop but this loop
- * does not include cur or successor state is not in a loop at all.
- *
- * Algorithm could be described as the following python code:
- *
- * traversed = set() # Set of traversed nodes
- * entries = {} # Mapping from node to loop entry
- * depths = {} # Depth level assigned to graph node
- * path = set() # Current DFS path
- *
- * # Find outermost loop entry known for n
- * def get_loop_entry(n):
- * h = entries.get(n, None)
- * while h in entries:
- * h = entries[h]
- * return h
- *
- * # Update n's loop entry if h comes before n in current DFS path.
- * def update_loop_entry(n, h):
- * if h in path and depths[entries.get(n, n)] < depths[n]:
- * entries[n] = h1
+/* Return IP for a given frame in a call stack */
+static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame)
+{
+ return frame == st->curframe
+ ? st->insn_idx
+ : st->frame[frame + 1]->callsite;
+}
+
+/* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
+ * if such frame exists form a corresponding @callchain as an array of
+ * call sites leading to this frame and SCC id.
+ * E.g.:
*
- * def dfs(n, depth):
- * traversed.add(n)
- * path.add(n)
- * depths[n] = depth
- * for succ in G.successors(n):
- * if succ not in traversed:
- * # Case A: explore succ and update cur's loop entry
- * # only if succ's entry is in current DFS path.
- * dfs(succ, depth + 1)
- * h = entries.get(succ, None)
- * update_loop_entry(n, h)
- * else:
- * # Case B or C depending on `h1 in path` check in update_loop_entry().
- * update_loop_entry(n, succ)
- * path.remove(n)
+ * void foo() { A: loop {... SCC#1 ...}; }
+ * void bar() { B: loop { C: foo(); ... SCC#2 ... }
+ * D: loop { E: foo(); ... SCC#3 ... } }
+ * void main() { F: bar(); }
*
- * To adapt this algorithm for use with verifier:
- * - use st->branch == 0 as a signal that DFS of succ had been finished
- * and cur's loop entry has to be updated (case A), handle this in
- * update_branch_counts();
- * - use st->branch > 0 as a signal that st is in the current DFS path;
- * - handle cases B and C in is_state_visited().
+ * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
+ * on @st frame call sites being (F,C,A) or (F,E,A).
*/
-static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st)
+static bool compute_scc_callchain(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st,
+ struct bpf_scc_callchain *callchain)
{
- struct bpf_verifier_state *topmost = st->loop_entry;
- u32 steps = 0;
+ u32 i, scc, insn_idx;
- while (topmost && topmost->loop_entry) {
- if (verifier_bug_if(steps++ > st->dfs_depth, env, "infinite loop"))
- return ERR_PTR(-EFAULT);
- topmost = topmost->loop_entry;
+ memset(callchain, 0, sizeof(*callchain));
+ for (i = 0; i <= st->curframe; i++) {
+ insn_idx = frame_insn_idx(st, i);
+ scc = env->insn_aux_data[insn_idx].scc;
+ if (scc) {
+ callchain->scc = scc;
+ break;
+ } else if (i < st->curframe) {
+ callchain->callsites[i] = insn_idx;
+ } else {
+ return false;
+ }
}
- return topmost;
+ return true;
}
-static void update_loop_entry(struct bpf_verifier_env *env,
- struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
+/* Check if bpf_scc_visit instance for @callchain exists. */
+static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
+ struct bpf_scc_callchain *callchain)
{
- /* The hdr->branches check decides between cases B and C in
- * comment for get_loop_entry(). If hdr->branches == 0 then
- * head's topmost loop entry is not in current DFS path,
- * hence 'cur' and 'hdr' are not in the same loop and there is
- * no need to update cur->loop_entry.
- */
- if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) {
- if (cur->loop_entry) {
- cur->loop_entry->used_as_loop_entry--;
- maybe_free_verifier_state(env, state_loop_entry_as_list(cur));
- }
- cur->loop_entry = hdr;
- hdr->used_as_loop_entry++;
+ struct bpf_scc_info *info = env->scc_info[callchain->scc];
+ struct bpf_scc_visit *visits = info->visits;
+ u32 i;
+
+ if (!info)
+ return NULL;
+ for (i = 0; i < info->num_visits; i++)
+ if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
+ return &visits[i];
+ return NULL;
+}
+
+/* Allocate a new bpf_scc_visit instance corresponding to @callchain.
+ * Allocated instances are alive for a duration of the do_check_common()
+ * call and are freed by free_states().
+ */
+static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
+ struct bpf_scc_callchain *callchain)
+{
+ struct bpf_scc_visit *visit;
+ struct bpf_scc_info *info;
+ u32 scc, num_visits;
+ u64 new_sz;
+
+ scc = callchain->scc;
+ info = env->scc_info[scc];
+ num_visits = info ? info->num_visits : 0;
+ new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
+ info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
+ if (!info)
+ return NULL;
+ env->scc_info[scc] = info;
+ info->num_visits = num_visits + 1;
+ visit = &info->visits[num_visits];
+ memset(visit, 0, sizeof(*visit));
+ memcpy(&visit->callchain, callchain, sizeof(*callchain));
+ return visit;
+}
+
+/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
+static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
+{
+ char *buf = env->tmp_str_buf;
+ int i, delta = 0;
+
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
+ for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
+ if (!callchain->callsites[i])
+ break;
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
+ callchain->callsites[i]);
+ }
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
+ return env->tmp_str_buf;
+}
+
+/* If callchain for @st exists (@st is in some SCC), ensure that
+ * bpf_scc_visit instance for this callchain exists.
+ * If instance does not exist or is empty, assign visit->entry_state to @st.
+ */
+static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain callchain;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, &callchain))
+ return 0;
+ visit = scc_visit_lookup(env, &callchain);
+ visit = visit ?: scc_visit_alloc(env, &callchain);
+ if (!visit)
+ return -ENOMEM;
+ if (!visit->entry_state) {
+ visit->entry_state = st;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC enter %s\n", format_callchain(env, &callchain));
+ }
+ return 0;
+}
+
+static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
+
+/* If callchain for @st exists (@st is in some SCC), make it empty:
+ * - set visit->entry_state to NULL;
+ * - flush accumulated backedges.
+ */
+static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain callchain;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, &callchain))
+ return 0;
+ visit = scc_visit_lookup(env, &callchain);
+ if (!visit) {
+ verifier_bug(env, "scc exit: no visit info for call chain %s",
+ format_callchain(env, &callchain));
+ return -EFAULT;
+ }
+ if (visit->entry_state != st)
+ return 0;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC exit %s\n", format_callchain(env, &callchain));
+ visit->entry_state = NULL;
+ env->num_backedges -= visit->num_backedges;
+ visit->num_backedges = 0;
+ update_peak_states(env);
+ return propagate_backedges(env, visit);
+}
+
+/* Lookup an bpf_scc_visit instance corresponding to @st callchain
+ * and add @backedge to visit->backedges. @st callchain must exist.
+ */
+static int add_scc_backedge(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st,
+ struct bpf_scc_backedge *backedge)
+{
+ struct bpf_scc_callchain callchain;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, &callchain)) {
+ verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
+ st->insn_idx);
+ return -EFAULT;
+ }
+ visit = scc_visit_lookup(env, &callchain);
+ if (!visit) {
+ verifier_bug(env, "add backedge: no visit info for call chain %s",
+ format_callchain(env, &callchain));
+ return -EFAULT;
+ }
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC backedge %s\n", format_callchain(env, &callchain));
+ backedge->next = visit->backedges;
+ visit->backedges = backedge;
+ visit->num_backedges++;
+ env->num_backedges++;
+ update_peak_states(env);
+ return 0;
+}
+
+/* bpf_reg_state->live marks for registers in a state @st are incomplete,
+ * if state @st is in some SCC and not all execution paths starting at this
+ * SCC are fully explored.
+ */
+static bool incomplete_read_marks(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain callchain;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, &callchain))
+ return false;
+ visit = scc_visit_lookup(env, &callchain);
+ if (!visit)
+ return false;
+ return !!visit->backedges;
+}
+
+static void free_backedges(struct bpf_scc_visit *visit)
+{
+ struct bpf_scc_backedge *backedge, *next;
+
+ for (backedge = visit->backedges; backedge; backedge = next) {
+ free_verifier_state(&backedge->state, false);
+ next = backedge->next;
+ kvfree(backedge);
}
+ visit->backedges = NULL;
}
-static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+static int update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
struct bpf_verifier_state_list *sl = NULL, *parent_sl;
struct bpf_verifier_state *parent;
+ int err;
while (st) {
u32 br = --st->branches;
- /* br == 0 signals that DFS exploration for 'st' is finished,
- * thus it is necessary to update parent's loop entry if it
- * turned out that st is a part of some loop.
- * This is a part of 'case A' in get_loop_entry() comment.
- */
- if (br == 0 && st->parent && st->loop_entry)
- update_loop_entry(env, st->parent, st->loop_entry);
-
/* WARN_ON(br > 1) technically makes sense here,
* but see comment in push_stack(), hence:
*/
@@ -1974,6 +2038,9 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
br);
if (br)
break;
+ err = maybe_exit_scc(env, st);
+ if (err)
+ return err;
parent = st->parent;
parent_sl = state_parent_as_list(st);
if (sl)
@@ -1981,6 +2048,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
st = parent;
sl = parent_sl;
}
+ return 0;
}
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
@@ -2012,6 +2080,18 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
return 0;
}
+static bool error_recoverable_with_nospec(int err)
+{
+ /* Should only return true for non-fatal errors that are allowed to
+ * occur during speculative verification. For these we can insert a
+ * nospec and the program might still be accepted. Do not include
+ * something like ENOMEM because it is likely to re-occur for the next
+ * architectural path once it has been recovered-from in all speculative
+ * paths.
+ */
+ return err == -EPERM || err == -EACCES || err == -EINVAL;
+}
+
static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx,
bool speculative)
@@ -2020,9 +2100,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
struct bpf_verifier_stack_elem *elem;
int err;
- elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
+ elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT);
if (!elem)
- goto err;
+ return NULL;
elem->insn_idx = insn_idx;
elem->prev_insn_idx = prev_insn_idx;
@@ -2032,12 +2112,12 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
env->stack_size++;
err = copy_verifier_state(&elem->st, cur);
if (err)
- goto err;
+ return NULL;
elem->st.speculative |= speculative;
if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
verbose(env, "The sequence of %d jumps is too complex.\n",
env->stack_size);
- goto err;
+ return NULL;
}
if (elem->st.parent) {
++elem->st.parent->branches;
@@ -2052,12 +2132,6 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
*/
}
return &elem->st;
-err:
- free_verifier_state(env->cur_state, true);
- env->cur_state = NULL;
- /* pop all elements and return */
- while (!pop_stack(env, NULL, NULL, false));
- return NULL;
}
#define CALLER_SAVED_REGS 6
@@ -2787,9 +2861,9 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
struct bpf_verifier_stack_elem *elem;
struct bpf_func_state *frame;
- elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
+ elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT);
if (!elem)
- goto err;
+ return NULL;
elem->insn_idx = insn_idx;
elem->prev_insn_idx = prev_insn_idx;
@@ -2801,35 +2875,24 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
verbose(env,
"The sequence of %d jumps is too complex for async cb.\n",
env->stack_size);
- goto err;
+ return NULL;
}
/* Unlike push_stack() do not copy_verifier_state().
* The caller state doesn't matter.
* This is async callback. It starts in a fresh stack.
* Initialize it similar to do_check_common().
- * But we do need to make sure to not clobber insn_hist, so we keep
- * chaining insn_hist_start/insn_hist_end indices as for a normal
- * child state.
*/
elem->st.branches = 1;
elem->st.in_sleepable = is_sleepable;
- elem->st.insn_hist_start = env->cur_state->insn_hist_end;
- elem->st.insn_hist_end = elem->st.insn_hist_start;
- frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL_ACCOUNT);
if (!frame)
- goto err;
+ return NULL;
init_func_state(env, frame,
BPF_MAIN_FUNC /* callsite */,
0 /* frameno within this callchain */,
subprog /* subprog number within this prog */);
elem->st.frame[0] = frame;
return &elem->st;
-err:
- free_verifier_state(env->cur_state, true);
- env->cur_state = NULL;
- /* pop all elements and return */
- while (!pop_stack(env, NULL, NULL, false));
- return NULL;
}
@@ -3167,7 +3230,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return -EINVAL;
}
- tab = kzalloc(sizeof(*tab), GFP_KERNEL);
+ tab = kzalloc(sizeof(*tab), GFP_KERNEL_ACCOUNT);
if (!tab)
return -ENOMEM;
prog_aux->kfunc_tab = tab;
@@ -3183,7 +3246,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return 0;
if (!btf_tab && offset) {
- btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
+ btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL_ACCOUNT);
if (!btf_tab)
return -ENOMEM;
prog_aux->kfunc_btf_tab = btf_tab;
@@ -3843,10 +3906,11 @@ static void linked_regs_unpack(u64 val, struct linked_regs *s)
}
/* for any branch, call, exit record the history of jmps in the given state */
-static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
- int insn_flags, u64 linked_regs)
+static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+ int insn_flags, u64 linked_regs)
{
- struct bpf_insn_hist_entry *p;
+ u32 cnt = cur->jmp_history_cnt;
+ struct bpf_jmp_history_entry *p;
size_t alloc_size;
/* combine instruction flags if we already recorded this instruction */
@@ -3866,32 +3930,29 @@ static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_s
return 0;
}
- if (cur->insn_hist_end + 1 > env->insn_hist_cap) {
- alloc_size = size_mul(cur->insn_hist_end + 1, sizeof(*p));
- p = kvrealloc(env->insn_hist, alloc_size, GFP_USER);
- if (!p)
- return -ENOMEM;
- env->insn_hist = p;
- env->insn_hist_cap = alloc_size / sizeof(*p);
- }
+ cnt++;
+ alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
+ p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
+ if (!p)
+ return -ENOMEM;
+ cur->jmp_history = p;
- p = &env->insn_hist[cur->insn_hist_end];
+ p = &cur->jmp_history[cnt - 1];
p->idx = env->insn_idx;
p->prev_idx = env->prev_insn_idx;
p->flags = insn_flags;
p->linked_regs = linked_regs;
-
- cur->insn_hist_end++;
+ cur->jmp_history_cnt = cnt;
env->cur_hist_ent = p;
return 0;
}
-static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env *env,
- u32 hist_start, u32 hist_end, int insn_idx)
+static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
+ u32 hist_end, int insn_idx)
{
- if (hist_end > hist_start && env->insn_hist[hist_end - 1].idx == insn_idx)
- return &env->insn_hist[hist_end - 1];
+ if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
+ return &st->jmp_history[hist_end - 1];
return NULL;
}
@@ -3908,26 +3969,25 @@ static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env *
* history entry recording a jump from last instruction of parent state and
* first instruction of given state.
*/
-static int get_prev_insn_idx(const struct bpf_verifier_env *env,
- struct bpf_verifier_state *st,
- int insn_idx, u32 hist_start, u32 *hist_endp)
+static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
+ u32 *history)
{
- u32 hist_end = *hist_endp;
- u32 cnt = hist_end - hist_start;
+ u32 cnt = *history;
- if (insn_idx == st->first_insn_idx) {
+ if (i == st->first_insn_idx) {
if (cnt == 0)
return -ENOENT;
- if (cnt == 1 && env->insn_hist[hist_start].idx == insn_idx)
+ if (cnt == 1 && st->jmp_history[0].idx == i)
return -ENOENT;
}
- if (cnt && env->insn_hist[hist_end - 1].idx == insn_idx) {
- (*hist_endp)--;
- return env->insn_hist[hist_end - 1].prev_idx;
+ if (cnt && st->jmp_history[cnt - 1].idx == i) {
+ i = st->jmp_history[cnt - 1].prev_idx;
+ (*history)--;
} else {
- return insn_idx - 1;
+ i--;
}
+ return i;
}
static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
@@ -4108,7 +4168,7 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
/* If any register R in hist->linked_regs is marked as precise in bt,
* do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
*/
-static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_insn_hist_entry *hist)
+static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
{
struct linked_regs linked_regs;
bool some_precise = false;
@@ -4153,7 +4213,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
* - *was* processed previously during backtracking.
*/
static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
- struct bpf_insn_hist_entry *hist, struct backtrack_state *bt)
+ struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
{
struct bpf_insn *insn = env->prog->insnsi + idx;
u8 class = BPF_CLASS(insn->code);
@@ -4571,7 +4631,7 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
* SCALARS, as well as any other registers and slots that contribute to
* a tracked state of given registers/stack slots, depending on specific BPF
* assembly instructions (see backtrack_insns() for exact instruction handling
- * logic). This backtracking relies on recorded insn_hist and is able to
+ * logic). This backtracking relies on recorded jmp_history and is able to
* traverse entire chain of parent states. This process ends only when all the
* necessary registers/slots and their transitive dependencies are marked as
* precise.
@@ -4651,23 +4711,27 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
* mark_all_scalars_imprecise() to hopefully get more permissive and generic
* finalized states which help in short circuiting more future states.
*/
-static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
+static int __mark_chain_precision(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *starting_state,
+ int regno,
+ bool *changed)
{
+ struct bpf_verifier_state *st = starting_state;
struct backtrack_state *bt = &env->bt;
- struct bpf_verifier_state *st = env->cur_state;
int first_idx = st->first_insn_idx;
- int last_idx = env->insn_idx;
+ int last_idx = starting_state->insn_idx;
int subseq_idx = -1;
struct bpf_func_state *func;
+ bool tmp, skip_first = true;
struct bpf_reg_state *reg;
- bool skip_first = true;
int i, fr, err;
if (!env->bpf_capable)
return 0;
+ changed = changed ?: &tmp;
/* set frame number from which we are starting to backtrack */
- bt_init(bt, env->cur_state->curframe);
+ bt_init(bt, starting_state->curframe);
/* Do sanity checks against current state of register and/or stack
* slot, but don't set precise flag in current state, as precision
@@ -4688,9 +4752,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
for (;;) {
DECLARE_BITMAP(mask, 64);
- u32 hist_start = st->insn_hist_start;
- u32 hist_end = st->insn_hist_end;
- struct bpf_insn_hist_entry *hist;
+ u32 history = st->jmp_history_cnt;
+ struct bpf_jmp_history_entry *hist;
if (env->log.level & BPF_LOG_LEVEL2) {
verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
@@ -4712,8 +4775,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
for_each_set_bit(i, mask, 32) {
reg = &st->frame[0]->regs[i];
bt_clear_reg(bt, i);
- if (reg->type == SCALAR_VALUE)
+ if (reg->type == SCALAR_VALUE) {
reg->precise = true;
+ *changed = true;
+ }
}
return 0;
}
@@ -4728,11 +4793,11 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
err = 0;
skip_first = false;
} else {
- hist = get_insn_hist_entry(env, hist_start, hist_end, i);
+ hist = get_jmp_hist_entry(st, history, i);
err = backtrack_insn(env, i, subseq_idx, hist, bt);
}
if (err == -ENOTSUPP) {
- mark_all_scalars_precise(env, env->cur_state);
+ mark_all_scalars_precise(env, starting_state);
bt_reset(bt);
return 0;
} else if (err) {
@@ -4745,7 +4810,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
*/
return 0;
subseq_idx = i;
- i = get_prev_insn_idx(env, st, i, hist_start, &hist_end);
+ i = get_prev_insn_idx(st, i, &history);
if (i == -ENOENT)
break;
if (i >= env->prog->len) {
@@ -4772,10 +4837,12 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bt_clear_frame_reg(bt, fr, i);
continue;
}
- if (reg->precise)
+ if (reg->precise) {
bt_clear_frame_reg(bt, fr, i);
- else
+ } else {
reg->precise = true;
+ *changed = true;
+ }
}
bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
@@ -4790,10 +4857,12 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
continue;
}
reg = &func->stack[i].spilled_ptr;
- if (reg->precise)
+ if (reg->precise) {
bt_clear_frame_slot(bt, fr, i);
- else
+ } else {
reg->precise = true;
+ *changed = true;
+ }
}
if (env->log.level & BPF_LOG_LEVEL2) {
fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
@@ -4820,7 +4889,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
* fallback to marking all precise
*/
if (!bt_empty(bt)) {
- mark_all_scalars_precise(env, env->cur_state);
+ mark_all_scalars_precise(env, starting_state);
bt_reset(bt);
}
@@ -4829,15 +4898,16 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
int mark_chain_precision(struct bpf_verifier_env *env, int regno)
{
- return __mark_chain_precision(env, regno);
+ return __mark_chain_precision(env, env->cur_state, regno, NULL);
}
/* mark_chain_precision_batch() assumes that env->bt is set in the caller to
* desired reg and stack masks across all relevant frames
*/
-static int mark_chain_precision_batch(struct bpf_verifier_env *env)
+static int mark_chain_precision_batch(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *starting_state)
{
- return __mark_chain_precision(env, -1);
+ return __mark_chain_precision(env, starting_state, -1, NULL);
}
static bool is_spillable_regtype(enum bpf_reg_type type)
@@ -5026,7 +5096,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
}
if (sanitize)
- env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+ env->insn_aux_data[insn_idx].nospec_result = true;
}
err = destroy_if_dynptr_stack_slot(env, state, spi);
@@ -5109,7 +5179,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
}
if (insn_flags)
- return push_insn_history(env, env->cur_state, insn_flags, 0);
+ return push_jmp_history(env, env->cur_state, insn_flags, 0);
return 0;
}
@@ -5416,7 +5486,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
insn_flags = 0; /* we are not restoring spilled register */
}
if (insn_flags)
- return push_insn_history(env, env->cur_state, insn_flags, 0);
+ return push_jmp_history(env, env->cur_state, insn_flags, 0);
return 0;
}
@@ -7469,6 +7539,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
}
} else if (base_type(reg->type) == PTR_TO_MEM) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
+ bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
if (type_may_be_null(reg->type)) {
verbose(env, "R%d invalid mem access '%s'\n", regno,
@@ -7488,8 +7559,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
- err = check_mem_region_access(env, regno, off, size,
- reg->mem_size, false);
+ /*
+ * Accesses to untrusted PTR_TO_MEM are done through probe
+ * instructions, hence no need to check bounds in that case.
+ */
+ if (!rdonly_untrusted)
+ err = check_mem_region_access(env, regno, off, size,
+ reg->mem_size, false);
if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
@@ -8953,7 +9029,7 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
if (!meta->map_ptr) {
/* kernel subsystem misconfigured verifier */
verbose(env, "invalid map_ptr to access map->type\n");
- return -EACCES;
+ return -EFAULT;
}
switch (meta->map_ptr->map_type) {
@@ -9489,7 +9565,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
* to prevent pruning on it.
*/
bt_set_frame_slot(&env->bt, key->frameno, spi);
- err = mark_chain_precision_batch(env);
+ err = mark_chain_precision_batch(env, env->cur_state);
if (err < 0)
return err;
@@ -9641,7 +9717,7 @@ skip_type_check:
* that kernel subsystem misconfigured verifier
*/
verbose(env, "invalid map_ptr to access map->key\n");
- return -EACCES;
+ return -EFAULT;
}
key_size = meta->map_ptr->key_size;
err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
@@ -9668,7 +9744,7 @@ skip_type_check:
if (!meta->map_ptr) {
/* kernel subsystem misconfigured verifier */
verbose(env, "invalid map_ptr to access map->value\n");
- return -EACCES;
+ return -EFAULT;
}
meta->raw_mode = arg_type & MEM_UNINIT;
err = check_helper_mem_access(env, regno, meta->map_ptr->value_size,
@@ -10278,7 +10354,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls
}
caller = state->frame[state->curframe];
- callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+ callee = kzalloc(sizeof(*callee), GFP_KERNEL_ACCOUNT);
if (!callee)
return -ENOMEM;
state->frame[state->curframe + 1] = callee;
@@ -10964,7 +11040,7 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
if (map == NULL) {
verbose(env, "kernel subsystem misconfigured verifier\n");
- return -EINVAL;
+ return -EFAULT;
}
/* In case of read-only, some additional restrictions
@@ -11003,7 +11079,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return 0;
if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
verbose(env, "kernel subsystem misconfigured verifier\n");
- return -EINVAL;
+ return -EFAULT;
}
reg = &regs[BPF_REG_3];
@@ -11145,7 +11221,7 @@ static int check_get_func_ip(struct bpf_verifier_env *env)
return -ENOTSUPP;
}
-static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
{
return &env->insn_aux_data[env->insn_idx];
}
@@ -11257,7 +11333,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
func_id_name(func_id), func_id);
- return -EINVAL;
+ return -EFAULT;
}
memset(&meta, 0, sizeof(meta));
@@ -11559,7 +11635,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (meta.map_ptr == NULL) {
verbose(env,
"kernel subsystem misconfigured verifier\n");
- return -EINVAL;
+ return -EFAULT;
}
if (func_id == BPF_FUNC_map_lookup_elem &&
@@ -11652,7 +11728,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
verbose(env, "verifier internal error:");
verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
func_id_name(func_id));
- return -EINVAL;
+ return -EFAULT;
}
ret_btf = btf_vmlinux;
ret_btf_id = *fn->ret_btf_id;
@@ -13536,16 +13612,24 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
regs[BPF_REG_0].btf_id = meta->ret_btf_id;
} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
- if (!ret_t || !btf_type_is_struct(ret_t)) {
+ if (!ret_t) {
+ verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
+ meta->arg_constant.value);
+ return -EINVAL;
+ } else if (btf_type_is_struct(ret_t)) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].btf_id = meta->arg_constant.value;
+ } else if (btf_type_is_void(ret_t)) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
+ regs[BPF_REG_0].mem_size = 0;
+ } else {
verbose(env,
- "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
+ "kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
return -EINVAL;
}
-
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
- regs[BPF_REG_0].btf = desc_btf;
- regs[BPF_REG_0].btf_id = meta->arg_constant.value;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
@@ -14013,7 +14097,9 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
const struct bpf_insn *insn)
{
- return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
+ return env->bypass_spec_v1 ||
+ BPF_SRC(insn->code) == BPF_K ||
+ cur_aux(env)->nospec;
}
static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
@@ -14213,7 +14299,7 @@ static int sanitize_err(struct bpf_verifier_env *env,
case REASON_STACK:
verbose(env, "R%d could not be pushed for speculative verification, %s\n",
dst, err);
- break;
+ return -ENOMEM;
default:
verbose(env, "verifier internal error: unknown reason (%d)\n",
reason);
@@ -14283,7 +14369,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env,
}
break;
default:
- break;
+ return -EOPNOTSUPP;
}
return 0;
@@ -14310,7 +14396,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
struct bpf_sanitize_info info = {};
u8 opcode = BPF_OP(insn->code);
u32 dst = insn->dst_reg;
- int ret;
+ int ret, bounds_ret;
dst_reg = &regs[dst];
@@ -14342,6 +14428,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
return -EACCES;
}
+ /*
+ * Accesses to untrusted PTR_TO_MEM are done through probe
+ * instructions, hence no need to track offsets.
+ */
+ if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
+ return 0;
+
switch (base_type(ptr_reg->type)) {
case PTR_TO_CTX:
case PTR_TO_MAP_VALUE:
@@ -14510,11 +14603,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
return -EINVAL;
reg_bounds_sync(dst_reg);
- if (sanitize_check_bounds(env, insn, dst_reg) < 0)
- return -EACCES;
+ bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
+ if (bounds_ret == -EACCES)
+ return bounds_ret;
if (sanitize_needed(opcode)) {
ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
&info, true);
+ if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
+ && !env->cur_state->speculative
+ && bounds_ret
+ && !ret,
+ env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
+ return -EFAULT;
+ }
if (ret < 0)
return sanitize_err(env, insn, ret, off_reg, dst_reg);
}
@@ -14529,14 +14630,25 @@ static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
s32 *dst_smax = &dst_reg->s32_max_value;
u32 *dst_umin = &dst_reg->u32_min_value;
u32 *dst_umax = &dst_reg->u32_max_value;
+ u32 umin_val = src_reg->u32_min_value;
+ u32 umax_val = src_reg->u32_max_value;
+ bool min_overflow, max_overflow;
if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) ||
check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) {
*dst_smin = S32_MIN;
*dst_smax = S32_MAX;
}
- if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) ||
- check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) {
+
+ /* If either all additions overflow or no additions overflow, then
+ * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
+ * dst_umax + src_umax. Otherwise (some additions overflow), set
+ * the output bounds to unbounded.
+ */
+ min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
+ max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
+
+ if (!min_overflow && max_overflow) {
*dst_umin = 0;
*dst_umax = U32_MAX;
}
@@ -14549,14 +14661,25 @@ static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
s64 *dst_smax = &dst_reg->smax_value;
u64 *dst_umin = &dst_reg->umin_value;
u64 *dst_umax = &dst_reg->umax_value;
+ u64 umin_val = src_reg->umin_value;
+ u64 umax_val = src_reg->umax_value;
+ bool min_overflow, max_overflow;
if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) ||
check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) {
*dst_smin = S64_MIN;
*dst_smax = S64_MAX;
}
- if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) ||
- check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) {
+
+ /* If either all additions overflow or no additions overflow, then
+ * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
+ * dst_umax + src_umax. Otherwise (some additions overflow), set
+ * the output bounds to unbounded.
+ */
+ min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
+ max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
+
+ if (!min_overflow && max_overflow) {
*dst_umin = 0;
*dst_umax = U64_MAX;
}
@@ -14567,8 +14690,11 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
{
s32 *dst_smin = &dst_reg->s32_min_value;
s32 *dst_smax = &dst_reg->s32_max_value;
+ u32 *dst_umin = &dst_reg->u32_min_value;
+ u32 *dst_umax = &dst_reg->u32_max_value;
u32 umin_val = src_reg->u32_min_value;
u32 umax_val = src_reg->u32_max_value;
+ bool min_underflow, max_underflow;
if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) ||
check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) {
@@ -14576,14 +14702,18 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
*dst_smin = S32_MIN;
*dst_smax = S32_MAX;
}
- if (dst_reg->u32_min_value < umax_val) {
- /* Overflow possible, we know nothing */
- dst_reg->u32_min_value = 0;
- dst_reg->u32_max_value = U32_MAX;
- } else {
- /* Cannot overflow (as long as bounds are consistent) */
- dst_reg->u32_min_value -= umax_val;
- dst_reg->u32_max_value -= umin_val;
+
+ /* If either all subtractions underflow or no subtractions
+ * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
+ * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
+ * underflow), set the output bounds to unbounded.
+ */
+ min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
+ max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
+
+ if (min_underflow && !max_underflow) {
+ *dst_umin = 0;
+ *dst_umax = U32_MAX;
}
}
@@ -14592,8 +14722,11 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
{
s64 *dst_smin = &dst_reg->smin_value;
s64 *dst_smax = &dst_reg->smax_value;
+ u64 *dst_umin = &dst_reg->umin_value;
+ u64 *dst_umax = &dst_reg->umax_value;
u64 umin_val = src_reg->umin_value;
u64 umax_val = src_reg->umax_value;
+ bool min_underflow, max_underflow;
if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) ||
check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) {
@@ -14601,14 +14734,18 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
*dst_smin = S64_MIN;
*dst_smax = S64_MAX;
}
- if (dst_reg->umin_value < umax_val) {
- /* Overflow possible, we know nothing */
- dst_reg->umin_value = 0;
- dst_reg->umax_value = U64_MAX;
- } else {
- /* Cannot overflow (as long as bounds are consistent) */
- dst_reg->umin_value -= umax_val;
- dst_reg->umax_value -= umin_val;
+
+ /* If either all subtractions underflow or no subtractions
+ * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
+ * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
+ * underflow), set the output bounds to unbounded.
+ */
+ min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
+ max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
+
+ if (min_underflow && !max_underflow) {
+ *dst_umin = 0;
+ *dst_umax = U64_MAX;
}
}
@@ -15070,6 +15207,7 @@ static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
switch (BPF_OP(insn->code)) {
case BPF_ADD:
case BPF_SUB:
+ case BPF_NEG:
case BPF_AND:
case BPF_XOR:
case BPF_OR:
@@ -15138,6 +15276,13 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
scalar_min_max_sub(dst_reg, &src_reg);
dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
break;
+ case BPF_NEG:
+ env->fake_reg[0] = *dst_reg;
+ __mark_reg_known(dst_reg, 0);
+ scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
+ scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
+ dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
+ break;
case BPF_MUL:
dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
scalar32_min_max_mul(dst_reg, &src_reg);
@@ -15277,12 +15422,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
if (WARN_ON_ONCE(ptr_reg)) {
print_verifier_state(env, vstate, vstate->curframe, true);
verbose(env, "verifier internal error: unexpected ptr_reg\n");
- return -EINVAL;
+ return -EFAULT;
}
if (WARN_ON(!src_reg)) {
print_verifier_state(env, vstate, vstate->curframe, true);
verbose(env, "verifier internal error: no src_reg\n");
- return -EINVAL;
+ return -EFAULT;
}
err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
if (err)
@@ -15361,7 +15506,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
/* check dest operand */
- err = check_reg_arg(env, insn->dst_reg, DST_OP);
+ if (opcode == BPF_NEG) {
+ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+ err = err ?: adjust_scalar_min_max_vals(env, insn,
+ &regs[insn->dst_reg],
+ regs[insn->dst_reg]);
+ } else {
+ err = check_reg_arg(env, insn->dst_reg, DST_OP);
+ }
if (err)
return err;
@@ -16472,7 +16624,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
}
if (insn_flags) {
- err = push_insn_history(env, this_branch, insn_flags, 0);
+ err = push_jmp_history(env, this_branch, insn_flags, 0);
if (err)
return err;
}
@@ -16530,7 +16682,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
collect_linked_regs(this_branch, dst_reg->id, &linked_regs);
if (linked_regs.cnt > 1) {
- err = push_insn_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
+ err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
if (err)
return err;
}
@@ -16728,7 +16880,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
dst_reg->type = CONST_PTR_TO_MAP;
} else {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
return 0;
@@ -16775,7 +16927,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (!env->ops->gen_ld_abs) {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
@@ -17605,17 +17757,18 @@ static int check_cfg(struct bpf_verifier_env *env)
int *insn_stack, *insn_state, *insn_postorder;
int ex_insn_beg, i, ret = 0;
- insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
if (!insn_state)
return -ENOMEM;
- insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
if (!insn_stack) {
kvfree(insn_state);
return -ENOMEM;
}
- insn_postorder = env->cfg.insn_postorder = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_postorder = env->cfg.insn_postorder =
+ kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
if (!insn_postorder) {
kvfree(insn_state);
kvfree(insn_stack);
@@ -17749,7 +17902,7 @@ static int check_btf_func_early(struct bpf_verifier_env *env,
urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
min_size = min_t(u32, krec_size, urec_size);
- krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
+ krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!krecord)
return -ENOMEM;
@@ -17849,7 +18002,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
krecord = prog->aux->func_info;
- info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!info_aux)
return -ENOMEM;
@@ -17935,7 +18088,7 @@ static int check_btf_line(struct bpf_verifier_env *env,
* pass in a smaller bpf_line_info object.
*/
linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
- GFP_KERNEL | __GFP_NOWARN);
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!linfo)
return -ENOMEM;
@@ -18258,10 +18411,6 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
{
int i;
- if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
- /* all regs in this state in all frames were already marked */
- return;
-
for (i = 0; i <= st->curframe; i++)
clean_func_state(env, st->frame[i]);
}
@@ -18301,7 +18450,6 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
static void clean_live_states(struct bpf_verifier_env *env, int insn,
struct bpf_verifier_state *cur)
{
- struct bpf_verifier_state *loop_entry;
struct bpf_verifier_state_list *sl;
struct list_head *pos, *head;
@@ -18310,12 +18458,14 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
sl = container_of(pos, struct bpf_verifier_state_list, node);
if (sl->state.branches)
continue;
- loop_entry = get_loop_entry(env, &sl->state);
- if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches)
- continue;
if (sl->state.insn_idx != insn ||
!same_callsites(&sl->state, cur))
continue;
+ if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE)
+ /* all regs in this state in all frames were already marked */
+ continue;
+ if (incomplete_read_marks(env, &sl->state))
+ continue;
clean_verifier_state(env, &sl->state);
}
}
@@ -18762,9 +18912,7 @@ static bool states_equal(struct bpf_verifier_env *env,
* and all frame states need to be equivalent
*/
for (i = 0; i <= old->curframe; i++) {
- insn_idx = i == old->curframe
- ? env->insn_idx
- : old->frame[i + 1]->callsite;
+ insn_idx = frame_insn_idx(old, i);
if (old->frame[i]->callsite != cur->frame[i]->callsite)
return false;
if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
@@ -18811,12 +18959,15 @@ static int propagate_liveness_reg(struct bpf_verifier_env *env,
*/
static int propagate_liveness(struct bpf_verifier_env *env,
const struct bpf_verifier_state *vstate,
- struct bpf_verifier_state *vparent)
+ struct bpf_verifier_state *vparent,
+ bool *changed)
{
struct bpf_reg_state *state_reg, *parent_reg;
struct bpf_func_state *state, *parent;
int i, frame, err = 0;
+ bool tmp = false;
+ changed = changed ?: &tmp;
if (vparent->curframe != vstate->curframe) {
WARN(1, "propagate_live: parent frame %d current frame %d\n",
vparent->curframe, vstate->curframe);
@@ -18835,6 +18986,7 @@ static int propagate_liveness(struct bpf_verifier_env *env,
&parent_reg[i]);
if (err < 0)
return err;
+ *changed |= err > 0;
if (err == REG_LIVE_READ64)
mark_insn_zext(env, &parent_reg[i]);
}
@@ -18846,6 +18998,7 @@ static int propagate_liveness(struct bpf_verifier_env *env,
state_reg = &state->stack[i].spilled_ptr;
err = propagate_liveness_reg(env, state_reg,
parent_reg);
+ *changed |= err > 0;
if (err < 0)
return err;
}
@@ -18857,7 +19010,9 @@ static int propagate_liveness(struct bpf_verifier_env *env,
* propagate them into the current state
*/
static int propagate_precision(struct bpf_verifier_env *env,
- const struct bpf_verifier_state *old)
+ const struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur,
+ bool *changed)
{
struct bpf_reg_state *state_reg;
struct bpf_func_state *state;
@@ -18905,13 +19060,53 @@ static int propagate_precision(struct bpf_verifier_env *env,
verbose(env, "\n");
}
- err = mark_chain_precision_batch(env);
+ err = __mark_chain_precision(env, cur, -1, changed);
if (err < 0)
return err;
return 0;
}
+#define MAX_BACKEDGE_ITERS 64
+
+/* Propagate read and precision marks from visit->backedges[*].state->equal_state
+ * to corresponding parent states of visit->backedges[*].state until fixed point is reached,
+ * then free visit->backedges.
+ * After execution of this function incomplete_read_marks() will return false
+ * for all states corresponding to @visit->callchain.
+ */
+static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
+{
+ struct bpf_scc_backedge *backedge;
+ struct bpf_verifier_state *st;
+ bool changed;
+ int i, err;
+
+ i = 0;
+ do {
+ if (i++ > MAX_BACKEDGE_ITERS) {
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "%s: too many iterations\n", __func__);
+ for (backedge = visit->backedges; backedge; backedge = backedge->next)
+ mark_all_scalars_precise(env, &backedge->state);
+ break;
+ }
+ changed = false;
+ for (backedge = visit->backedges; backedge; backedge = backedge->next) {
+ st = &backedge->state;
+ err = propagate_liveness(env, st->equal_state, st, &changed);
+ if (err)
+ return err;
+ err = propagate_precision(env, st->equal_state, st, &changed);
+ if (err)
+ return err;
+ }
+ } while (changed);
+
+ free_backedges(visit);
+ return 0;
+}
+
static bool states_maybe_looping(struct bpf_verifier_state *old,
struct bpf_verifier_state *cur)
{
@@ -19021,14 +19216,14 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct bpf_verifier_state_list *new_sl;
struct bpf_verifier_state_list *sl;
- struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
+ struct bpf_verifier_state *cur = env->cur_state, *new;
+ bool force_new_state, add_new_state, loop;
int i, j, n, err, states_cnt = 0;
- bool force_new_state, add_new_state, force_exact;
struct list_head *pos, *tmp, *head;
force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
/* Avoid accumulating infinitely long jmp history */
- cur->insn_hist_end - cur->insn_hist_start > 40;
+ cur->jmp_history_cnt > 40;
/* bpf progs typically have pruning point every 4 instructions
* http://vger.kernel.org/bpfconf2019.html#session-1
@@ -19045,6 +19240,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
clean_live_states(env, insn_idx, cur);
+ loop = false;
head = explored_state(env, insn_idx);
list_for_each_safe(pos, tmp, head) {
sl = container_of(pos, struct bpf_verifier_state_list, node);
@@ -19124,7 +19320,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
- update_loop_entry(env, cur, &sl->state);
+ loop = true;
goto hit;
}
}
@@ -19133,7 +19329,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
if (is_may_goto_insn_at(env, insn_idx)) {
if (sl->state.may_goto_depth != cur->may_goto_depth &&
states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
- update_loop_entry(env, cur, &sl->state);
+ loop = true;
goto hit;
}
}
@@ -19175,38 +19371,9 @@ skip_inf_loop_check:
add_new_state = false;
goto miss;
}
- /* If sl->state is a part of a loop and this loop's entry is a part of
- * current verification path then states have to be compared exactly.
- * 'force_exact' is needed to catch the following case:
- *
- * initial Here state 'succ' was processed first,
- * | it was eventually tracked to produce a
- * V state identical to 'hdr'.
- * .---------> hdr All branches from 'succ' had been explored
- * | | and thus 'succ' has its .branches == 0.
- * | V
- * | .------... Suppose states 'cur' and 'succ' correspond
- * | | | to the same instruction + callsites.
- * | V V In such case it is necessary to check
- * | ... ... if 'succ' and 'cur' are states_equal().
- * | | | If 'succ' and 'cur' are a part of the
- * | V V same loop exact flag has to be set.
- * | succ <- cur To check if that is the case, verify
- * | | if loop entry of 'succ' is in current
- * | V DFS path.
- * | ...
- * | |
- * '----'
- *
- * Additional details are in the comment before get_loop_entry().
- */
- loop_entry = get_loop_entry(env, &sl->state);
- if (IS_ERR(loop_entry))
- return PTR_ERR(loop_entry);
- force_exact = loop_entry && loop_entry->branches > 0;
- if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
- if (force_exact)
- update_loop_entry(env, cur, loop_entry);
+ /* See comments for mark_all_regs_read_and_precise() */
+ loop = incomplete_read_marks(env, &sl->state);
+ if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
hit:
sl->hit_cnt++;
/* reached equivalent register/stack state,
@@ -19219,7 +19386,7 @@ hit:
* they'll be immediately forgotten as we're pruning
* this state and will pop a new one.
*/
- err = propagate_liveness(env, &sl->state, cur);
+ err = propagate_liveness(env, &sl->state, cur, NULL);
/* if previous state reached the exit with precision and
* current state is equivalent to it (except precision marks)
@@ -19227,10 +19394,98 @@ hit:
* the current state.
*/
if (is_jmp_point(env, env->insn_idx))
- err = err ? : push_insn_history(env, cur, 0, 0);
- err = err ? : propagate_precision(env, &sl->state);
+ err = err ? : push_jmp_history(env, cur, 0, 0);
+ err = err ? : propagate_precision(env, &sl->state, cur, NULL);
if (err)
return err;
+ /* When processing iterator based loops above propagate_liveness and
+ * propagate_precision calls are not sufficient to transfer all relevant
+ * read and precision marks. E.g. consider the following case:
+ *
+ * .-> A --. Assume the states are visited in the order A, B, C.
+ * | | | Assume that state B reaches a state equivalent to state A.
+ * | v v At this point, state C is not processed yet, so state A
+ * '-- B C has not received any read or precision marks from C.
+ * Thus, marks propagated from A to B are incomplete.
+ *
+ * The verifier mitigates this by performing the following steps:
+ *
+ * - Prior to the main verification pass, strongly connected components
+ * (SCCs) are computed over the program's control flow graph,
+ * intraprocedurally.
+ *
+ * - During the main verification pass, `maybe_enter_scc()` checks
+ * whether the current verifier state is entering an SCC. If so, an
+ * instance of a `bpf_scc_visit` object is created, and the state
+ * entering the SCC is recorded as the entry state.
+ *
+ * - This instance is associated not with the SCC itself, but with a
+ * `bpf_scc_callchain`: a tuple consisting of the call sites leading to
+ * the SCC and the SCC id. See `compute_scc_callchain()`.
+ *
+ * - When a verification path encounters a `states_equal(...,
+ * RANGE_WITHIN)` condition, there exists a call chain describing the
+ * current state and a corresponding `bpf_scc_visit` instance. A copy
+ * of the current state is created and added to
+ * `bpf_scc_visit->backedges`.
+ *
+ * - When a verification path terminates, `maybe_exit_scc()` is called
+ * from `update_branch_counts()`. For states with `branches == 0`, it
+ * checks whether the state is the entry state of any `bpf_scc_visit`
+ * instance. If it is, this indicates that all paths originating from
+ * this SCC visit have been explored. `propagate_backedges()` is then
+ * called, which propagates read and precision marks through the
+ * backedges until a fixed point is reached.
+ * (In the earlier example, this would propagate marks from A to B,
+ * from C to A, and then again from A to B.)
+ *
+ * A note on callchains
+ * --------------------
+ *
+ * Consider the following example:
+ *
+ * void foo() { loop { ... SCC#1 ... } }
+ * void main() {
+ * A: foo();
+ * B: ...
+ * C: foo();
+ * }
+ *
+ * Here, there are two distinct callchains leading to SCC#1:
+ * - (A, SCC#1)
+ * - (C, SCC#1)
+ *
+ * Each callchain identifies a separate `bpf_scc_visit` instance that
+ * accumulates backedge states. The `propagate_{liveness,precision}()`
+ * functions traverse the parent state of each backedge state, which
+ * means these parent states must remain valid (i.e., not freed) while
+ * the corresponding `bpf_scc_visit` instance exists.
+ *
+ * Associating `bpf_scc_visit` instances directly with SCCs instead of
+ * callchains would break this invariant:
+ * - States explored during `C: foo()` would contribute backedges to
+ * SCC#1, but SCC#1 would only be exited once the exploration of
+ * `A: foo()` completes.
+ * - By that time, the states explored between `A: foo()` and `C: foo()`
+ * (i.e., `B: ...`) may have already been freed, causing the parent
+ * links for states from `C: foo()` to become invalid.
+ */
+ if (loop) {
+ struct bpf_scc_backedge *backedge;
+
+ backedge = kzalloc(sizeof(*backedge), GFP_KERNEL_ACCOUNT);
+ if (!backedge)
+ return -ENOMEM;
+ err = copy_verifier_state(&backedge->state, cur);
+ backedge->state.equal_state = &sl->state;
+ backedge->state.insn_idx = insn_idx;
+ err = err ?: add_scc_backedge(env, &sl->state, backedge);
+ if (err) {
+ free_verifier_state(&backedge->state, false);
+ kvfree(backedge);
+ return err;
+ }
+ }
return 1;
}
miss:
@@ -19282,7 +19537,7 @@ miss:
* When looping the sl->state.branches will be > 0 and this state
* will not be considered for equivalence until branches == 0.
*/
- new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
+ new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL_ACCOUNT);
if (!new_sl)
return -ENOMEM;
env->total_states++;
@@ -19306,11 +19561,17 @@ miss:
new->insn_idx = insn_idx;
WARN_ONCE(new->branches != 1,
"BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
+ err = maybe_enter_scc(env, new);
+ if (err) {
+ free_verifier_state(new, false);
+ kvfree(new_sl);
+ return err;
+ }
cur->parent = new;
cur->first_insn_idx = insn_idx;
- cur->insn_hist_start = cur->insn_hist_end;
cur->dfs_depth = new->dfs_depth + 1;
+ clear_jmp_history(cur);
list_add(&new_sl->node, head);
/* connect new state to parentage chain. Current frame needs all
@@ -19382,10 +19643,27 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
!reg_type_mismatch_ok(prev));
}
+static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
+{
+ switch (base_type(type)) {
+ case PTR_TO_MEM:
+ case PTR_TO_BTF_ID:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_ptr_to_mem(enum bpf_reg_type type)
+{
+ return base_type(type) == PTR_TO_MEM;
+}
+
static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
bool allow_trust_mismatch)
{
enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
+ enum bpf_reg_type merged_type;
if (*prev_type == NOT_INIT) {
/* Saw a valid insn
@@ -19402,15 +19680,24 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ
* Reject it.
*/
if (allow_trust_mismatch &&
- base_type(type) == PTR_TO_BTF_ID &&
- base_type(*prev_type) == PTR_TO_BTF_ID) {
+ is_ptr_to_mem_or_btf_id(type) &&
+ is_ptr_to_mem_or_btf_id(*prev_type)) {
/*
* Have to support a use case when one path through
* the program yields TRUSTED pointer while another
* is UNTRUSTED. Fallback to UNTRUSTED to generate
* BPF_PROBE_MEM/BPF_PROBE_MEMSX.
+ * Same behavior of MEM_RDONLY flag.
*/
- *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
+ merged_type = PTR_TO_MEM;
+ else
+ merged_type = PTR_TO_BTF_ID;
+ if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
+ merged_type |= PTR_UNTRUSTED;
+ if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
+ merged_type |= MEM_RDONLY;
+ *prev_type = merged_type;
} else {
verbose(env, "same insn cannot be used with different pointers\n");
return -EINVAL;
@@ -19420,20 +19707,223 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ
return 0;
}
+enum {
+ PROCESS_BPF_EXIT = 1
+};
+
+static int process_bpf_exit_full(struct bpf_verifier_env *env,
+ bool *do_print_state,
+ bool exception_exit)
+{
+ /* We must do check_reference_leak here before
+ * prepare_func_exit to handle the case when
+ * state->curframe > 0, it may be a callback function,
+ * for which reference_state must match caller reference
+ * state when it exits.
+ */
+ int err = check_resource_leak(env, exception_exit,
+ !env->cur_state->curframe,
+ "BPF_EXIT instruction in main prog");
+ if (err)
+ return err;
+
+ /* The side effect of the prepare_func_exit which is
+ * being skipped is that it frees bpf_func_state.
+ * Typically, process_bpf_exit will only be hit with
+ * outermost exit. copy_verifier_state in pop_stack will
+ * handle freeing of any extra bpf_func_state left over
+ * from not processing all nested function exits. We
+ * also skip return code checks as they are not needed
+ * for exceptional exits.
+ */
+ if (exception_exit)
+ return PROCESS_BPF_EXIT;
+
+ if (env->cur_state->curframe) {
+ /* exit from nested function */
+ err = prepare_func_exit(env, &env->insn_idx);
+ if (err)
+ return err;
+ *do_print_state = true;
+ return 0;
+ }
+
+ err = check_return_code(env, BPF_REG_0, "R0");
+ if (err)
+ return err;
+ return PROCESS_BPF_EXIT;
+}
+
+static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
+{
+ int err;
+ struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
+ u8 class = BPF_CLASS(insn->code);
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ err = check_alu_op(env, insn);
+ if (err)
+ return err;
+
+ } else if (class == BPF_LDX) {
+ bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX;
+
+ /* Check for reserved fields is already done in
+ * resolve_pseudo_ldimm64().
+ */
+ err = check_load_mem(env, insn, false, is_ldsx, true, "ldx");
+ if (err)
+ return err;
+ } else if (class == BPF_STX) {
+ if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+ err = check_atomic(env, insn);
+ if (err)
+ return err;
+ env->insn_idx++;
+ return 0;
+ }
+
+ if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
+ verbose(env, "BPF_STX uses reserved fields\n");
+ return -EINVAL;
+ }
+
+ err = check_store_reg(env, insn, false);
+ if (err)
+ return err;
+ } else if (class == BPF_ST) {
+ enum bpf_reg_type dst_reg_type;
+
+ if (BPF_MODE(insn->code) != BPF_MEM ||
+ insn->src_reg != BPF_REG_0) {
+ verbose(env, "BPF_ST uses reserved fields\n");
+ return -EINVAL;
+ }
+ /* check src operand */
+ err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+ if (err)
+ return err;
+
+ dst_reg_type = cur_regs(env)[insn->dst_reg].type;
+
+ /* check that memory (dst_reg + off) is writeable */
+ err = check_mem_access(env, env->insn_idx, insn->dst_reg,
+ insn->off, BPF_SIZE(insn->code),
+ BPF_WRITE, -1, false, false);
+ if (err)
+ return err;
+
+ err = save_aux_ptr_type(env, dst_reg_type, false);
+ if (err)
+ return err;
+ } else if (class == BPF_JMP || class == BPF_JMP32) {
+ u8 opcode = BPF_OP(insn->code);
+
+ env->jmps_processed++;
+ if (opcode == BPF_CALL) {
+ if (BPF_SRC(insn->code) != BPF_K ||
+ (insn->src_reg != BPF_PSEUDO_KFUNC_CALL &&
+ insn->off != 0) ||
+ (insn->src_reg != BPF_REG_0 &&
+ insn->src_reg != BPF_PSEUDO_CALL &&
+ insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
+ insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
+ verbose(env, "BPF_CALL uses reserved fields\n");
+ return -EINVAL;
+ }
+
+ if (env->cur_state->active_locks) {
+ if ((insn->src_reg == BPF_REG_0 &&
+ insn->imm != BPF_FUNC_spin_unlock) ||
+ (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
+ (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
+ verbose(env,
+ "function calls are not allowed while holding a lock\n");
+ return -EINVAL;
+ }
+ }
+ if (insn->src_reg == BPF_PSEUDO_CALL) {
+ err = check_func_call(env, insn, &env->insn_idx);
+ } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ err = check_kfunc_call(env, insn, &env->insn_idx);
+ if (!err && is_bpf_throw_kfunc(insn))
+ return process_bpf_exit_full(env, do_print_state, true);
+ } else {
+ err = check_helper_call(env, insn, &env->insn_idx);
+ }
+ if (err)
+ return err;
+
+ mark_reg_scratched(env, BPF_REG_0);
+ } else if (opcode == BPF_JA) {
+ if (BPF_SRC(insn->code) != BPF_K ||
+ insn->src_reg != BPF_REG_0 ||
+ insn->dst_reg != BPF_REG_0 ||
+ (class == BPF_JMP && insn->imm != 0) ||
+ (class == BPF_JMP32 && insn->off != 0)) {
+ verbose(env, "BPF_JA uses reserved fields\n");
+ return -EINVAL;
+ }
+
+ if (class == BPF_JMP)
+ env->insn_idx += insn->off + 1;
+ else
+ env->insn_idx += insn->imm + 1;
+ return 0;
+ } else if (opcode == BPF_EXIT) {
+ if (BPF_SRC(insn->code) != BPF_K ||
+ insn->imm != 0 ||
+ insn->src_reg != BPF_REG_0 ||
+ insn->dst_reg != BPF_REG_0 ||
+ class == BPF_JMP32) {
+ verbose(env, "BPF_EXIT uses reserved fields\n");
+ return -EINVAL;
+ }
+ return process_bpf_exit_full(env, do_print_state, false);
+ } else {
+ err = check_cond_jmp_op(env, insn, &env->insn_idx);
+ if (err)
+ return err;
+ }
+ } else if (class == BPF_LD) {
+ u8 mode = BPF_MODE(insn->code);
+
+ if (mode == BPF_ABS || mode == BPF_IND) {
+ err = check_ld_abs(env, insn);
+ if (err)
+ return err;
+
+ } else if (mode == BPF_IMM) {
+ err = check_ld_imm(env, insn);
+ if (err)
+ return err;
+
+ env->insn_idx++;
+ sanitize_mark_insn_seen(env);
+ } else {
+ verbose(env, "invalid BPF_LD mode\n");
+ return -EINVAL;
+ }
+ } else {
+ verbose(env, "unknown insn class %d\n", class);
+ return -EINVAL;
+ }
+
+ env->insn_idx++;
+ return 0;
+}
+
static int do_check(struct bpf_verifier_env *env)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
struct bpf_verifier_state *state = env->cur_state;
struct bpf_insn *insns = env->prog->insnsi;
- struct bpf_reg_state *regs;
int insn_cnt = env->prog->len;
bool do_print_state = false;
int prev_insn_idx = -1;
for (;;) {
- bool exception_exit = false;
struct bpf_insn *insn;
- u8 class;
int err;
/* reset current history entry on each new instruction */
@@ -19447,7 +19937,6 @@ static int do_check(struct bpf_verifier_env *env)
}
insn = &insns[env->insn_idx];
- class = BPF_CLASS(insn->code);
if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
verbose(env,
@@ -19457,6 +19946,7 @@ static int do_check(struct bpf_verifier_env *env)
}
state->last_insn_idx = env->prev_insn_idx;
+ state->insn_idx = env->insn_idx;
if (is_prune_point(env, env->insn_idx)) {
err = is_state_visited(env, env->insn_idx);
@@ -19478,7 +19968,7 @@ static int do_check(struct bpf_verifier_env *env)
}
if (is_jmp_point(env, env->insn_idx)) {
- err = push_insn_history(env, state, 0, 0);
+ err = push_jmp_history(env, state, 0, 0);
if (err)
return err;
}
@@ -19517,215 +20007,60 @@ static int do_check(struct bpf_verifier_env *env)
return err;
}
- regs = cur_regs(env);
sanitize_mark_insn_seen(env);
prev_insn_idx = env->insn_idx;
- if (class == BPF_ALU || class == BPF_ALU64) {
- err = check_alu_op(env, insn);
- if (err)
- return err;
-
- } else if (class == BPF_LDX) {
- bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX;
+ /* Reduce verification complexity by stopping speculative path
+ * verification when a nospec is encountered.
+ */
+ if (state->speculative && cur_aux(env)->nospec)
+ goto process_bpf_exit;
- /* Check for reserved fields is already done in
- * resolve_pseudo_ldimm64().
+ err = do_check_insn(env, &do_print_state);
+ if (error_recoverable_with_nospec(err) && state->speculative) {
+ /* Prevent this speculative path from ever reaching the
+ * insn that would have been unsafe to execute.
*/
- err = check_load_mem(env, insn, false, is_ldsx, true,
- "ldx");
- if (err)
- return err;
- } else if (class == BPF_STX) {
- if (BPF_MODE(insn->code) == BPF_ATOMIC) {
- err = check_atomic(env, insn);
- if (err)
- return err;
- env->insn_idx++;
- continue;
- }
-
- if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
- verbose(env, "BPF_STX uses reserved fields\n");
- return -EINVAL;
- }
-
- err = check_store_reg(env, insn, false);
- if (err)
- return err;
- } else if (class == BPF_ST) {
- enum bpf_reg_type dst_reg_type;
-
- if (BPF_MODE(insn->code) != BPF_MEM ||
- insn->src_reg != BPF_REG_0) {
- verbose(env, "BPF_ST uses reserved fields\n");
- return -EINVAL;
- }
- /* check src operand */
- err = check_reg_arg(env, insn->dst_reg, SRC_OP);
- if (err)
- return err;
-
- dst_reg_type = regs[insn->dst_reg].type;
-
- /* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, env->insn_idx, insn->dst_reg,
- insn->off, BPF_SIZE(insn->code),
- BPF_WRITE, -1, false, false);
- if (err)
- return err;
-
- err = save_aux_ptr_type(env, dst_reg_type, false);
+ cur_aux(env)->nospec = true;
+ /* If it was an ADD/SUB insn, potentially remove any
+ * markings for alu sanitization.
+ */
+ cur_aux(env)->alu_state = 0;
+ goto process_bpf_exit;
+ } else if (err < 0) {
+ return err;
+ } else if (err == PROCESS_BPF_EXIT) {
+ goto process_bpf_exit;
+ }
+ WARN_ON_ONCE(err);
+
+ if (state->speculative && cur_aux(env)->nospec_result) {
+ /* If we are on a path that performed a jump-op, this
+ * may skip a nospec patched-in after the jump. This can
+ * currently never happen because nospec_result is only
+ * used for the write-ops
+ * `*(size*)(dst_reg+off)=src_reg|imm32` which must
+ * never skip the following insn. Still, add a warning
+ * to document this in case nospec_result is used
+ * elsewhere in the future.
+ */
+ WARN_ON_ONCE(env->insn_idx != prev_insn_idx + 1);
+process_bpf_exit:
+ mark_verifier_state_scratched(env);
+ err = update_branch_counts(env, env->cur_state);
if (err)
return err;
- } else if (class == BPF_JMP || class == BPF_JMP32) {
- u8 opcode = BPF_OP(insn->code);
-
- env->jmps_processed++;
- if (opcode == BPF_CALL) {
- if (BPF_SRC(insn->code) != BPF_K ||
- (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
- && insn->off != 0) ||
- (insn->src_reg != BPF_REG_0 &&
- insn->src_reg != BPF_PSEUDO_CALL &&
- insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
- insn->dst_reg != BPF_REG_0 ||
- class == BPF_JMP32) {
- verbose(env, "BPF_CALL uses reserved fields\n");
- return -EINVAL;
- }
-
- if (env->cur_state->active_locks) {
- if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
- (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
- (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
- verbose(env, "function calls are not allowed while holding a lock\n");
- return -EINVAL;
- }
- }
- if (insn->src_reg == BPF_PSEUDO_CALL) {
- err = check_func_call(env, insn, &env->insn_idx);
- } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
- err = check_kfunc_call(env, insn, &env->insn_idx);
- if (!err && is_bpf_throw_kfunc(insn)) {
- exception_exit = true;
- goto process_bpf_exit_full;
- }
- } else {
- err = check_helper_call(env, insn, &env->insn_idx);
- }
- if (err)
- return err;
-
- mark_reg_scratched(env, BPF_REG_0);
- } else if (opcode == BPF_JA) {
- if (BPF_SRC(insn->code) != BPF_K ||
- insn->src_reg != BPF_REG_0 ||
- insn->dst_reg != BPF_REG_0 ||
- (class == BPF_JMP && insn->imm != 0) ||
- (class == BPF_JMP32 && insn->off != 0)) {
- verbose(env, "BPF_JA uses reserved fields\n");
- return -EINVAL;
- }
-
- if (class == BPF_JMP)
- env->insn_idx += insn->off + 1;
- else
- env->insn_idx += insn->imm + 1;
- continue;
-
- } else if (opcode == BPF_EXIT) {
- if (BPF_SRC(insn->code) != BPF_K ||
- insn->imm != 0 ||
- insn->src_reg != BPF_REG_0 ||
- insn->dst_reg != BPF_REG_0 ||
- class == BPF_JMP32) {
- verbose(env, "BPF_EXIT uses reserved fields\n");
- return -EINVAL;
- }
-process_bpf_exit_full:
- /* We must do check_reference_leak here before
- * prepare_func_exit to handle the case when
- * state->curframe > 0, it may be a callback
- * function, for which reference_state must
- * match caller reference state when it exits.
- */
- err = check_resource_leak(env, exception_exit, !env->cur_state->curframe,
- "BPF_EXIT instruction in main prog");
- if (err)
- return err;
-
- /* The side effect of the prepare_func_exit
- * which is being skipped is that it frees
- * bpf_func_state. Typically, process_bpf_exit
- * will only be hit with outermost exit.
- * copy_verifier_state in pop_stack will handle
- * freeing of any extra bpf_func_state left over
- * from not processing all nested function
- * exits. We also skip return code checks as
- * they are not needed for exceptional exits.
- */
- if (exception_exit)
- goto process_bpf_exit;
-
- if (state->curframe) {
- /* exit from nested function */
- err = prepare_func_exit(env, &env->insn_idx);
- if (err)
- return err;
- do_print_state = true;
- continue;
- }
-
- err = check_return_code(env, BPF_REG_0, "R0");
- if (err)
- return err;
-process_bpf_exit:
- mark_verifier_state_scratched(env);
- update_branch_counts(env, env->cur_state);
- err = pop_stack(env, &prev_insn_idx,
- &env->insn_idx, pop_log);
- if (err < 0) {
- if (err != -ENOENT)
- return err;
- break;
- } else {
- if (verifier_bug_if(env->cur_state->loop_entry, env,
- "broken loop detection"))
- return -EFAULT;
- do_print_state = true;
- continue;
- }
- } else {
- err = check_cond_jmp_op(env, insn, &env->insn_idx);
- if (err)
- return err;
- }
- } else if (class == BPF_LD) {
- u8 mode = BPF_MODE(insn->code);
-
- if (mode == BPF_ABS || mode == BPF_IND) {
- err = check_ld_abs(env, insn);
- if (err)
- return err;
-
- } else if (mode == BPF_IMM) {
- err = check_ld_imm(env, insn);
- if (err)
+ err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
+ pop_log);
+ if (err < 0) {
+ if (err != -ENOENT)
return err;
-
- env->insn_idx++;
- sanitize_mark_insn_seen(env);
+ break;
} else {
- verbose(env, "invalid BPF_LD mode\n");
- return -EINVAL;
+ do_print_state = true;
+ continue;
}
- } else {
- verbose(env, "unknown insn class %d\n", class);
- return -EINVAL;
}
-
- env->insn_idx++;
}
return 0;
@@ -20797,7 +21132,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
-(subprogs[0].stack_depth + 8));
if (epilogue_cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
} else if (epilogue_cnt) {
/* Save the ARG_PTR_TO_CTX for the epilogue to use */
cnt = 0;
@@ -20820,13 +21155,13 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (ops->gen_prologue || env->seen_direct_write) {
if (!ops->gen_prologue) {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
env->prog);
if (cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
} else if (cnt) {
new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
if (!new_prog)
@@ -20853,6 +21188,29 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
bpf_convert_ctx_access_t convert_ctx_access;
u8 mode;
+ if (env->insn_aux_data[i + delta].nospec) {
+ WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
+ struct bpf_insn patch[] = {
+ BPF_ST_NOSPEC(),
+ *insn,
+ };
+
+ cnt = ARRAY_SIZE(patch);
+ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ /* This can not be easily merged with the
+ * nospec_result-case, because an insn may require a
+ * nospec before and after itself. Therefore also do not
+ * 'continue' here but potentially apply further
+ * patching to insn. *insn should equal patch[1] now.
+ */
+ }
+
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
@@ -20902,7 +21260,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
if (type == BPF_WRITE &&
- env->insn_aux_data[i + delta].sanitize_stack_spill) {
+ env->insn_aux_data[i + delta].nospec_result) {
+ /* nospec_result is only used to mitigate Spectre v4 and
+ * to limit verification-time for Spectre v1.
+ */
struct bpf_insn patch[] = {
*insn,
BPF_ST_NOSPEC(),
@@ -20944,6 +21305,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
* for this case.
*/
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
+ case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
if (type == BPF_READ) {
if (BPF_MODE(insn->code) == BPF_MEM)
insn->code = BPF_LDX | BPF_PROBE_MEM |
@@ -20983,7 +21345,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (type == BPF_WRITE) {
verbose(env, "bpf verifier narrow ctx access misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
size_code = BPF_H;
@@ -21002,7 +21364,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
(ctx_field_size && !target_size)) {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
if (is_narrower_load && size < target_size) {
@@ -21010,7 +21372,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
off, size, size_default) * 8;
if (shift && cnt + 1 >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier narrow ctx load misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
if (ctx_field_size <= 4) {
if (shift)
@@ -21775,7 +22137,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
cnt = env->ops->gen_ld_abs(insn, insn_buf);
if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
@@ -22111,7 +22473,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_map_ops_generic;
if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
new_prog = bpf_patch_insn_data(env, i + delta,
@@ -22471,7 +22833,7 @@ next_insn:
!map_ptr->ops->map_poke_untrack ||
!map_ptr->ops->map_poke_run) {
verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ return -EFAULT;
}
ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
@@ -22661,7 +23023,12 @@ static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl;
struct list_head *head, *pos, *tmp;
- int i;
+ struct bpf_scc_info *info;
+ int i, j;
+
+ free_verifier_state(env->cur_state, true);
+ env->cur_state = NULL;
+ while (!pop_stack(env, NULL, NULL, false));
list_for_each_safe(pos, tmp, &env->free_list) {
sl = container_of(pos, struct bpf_verifier_state_list, node);
@@ -22670,6 +23037,14 @@ static void free_states(struct bpf_verifier_env *env)
}
INIT_LIST_HEAD(&env->free_list);
+ for (i = 0; i < env->scc_cnt; ++i) {
+ info = env->scc_info[i];
+ for (j = 0; j < info->num_visits; j++)
+ free_backedges(&info->visits[j]);
+ kvfree(info);
+ env->scc_info[i] = NULL;
+ }
+
if (!env->explored_states)
return;
@@ -22697,13 +23072,13 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
env->prev_linfo = NULL;
env->pass_cnt++;
- state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
+ state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL_ACCOUNT);
if (!state)
return -ENOMEM;
state->curframe = 0;
state->speculative = false;
state->branches = 1;
- state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
+ state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL_ACCOUNT);
if (!state->frame[0]) {
kfree(state);
return -ENOMEM;
@@ -22806,14 +23181,6 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
ret = do_check(env);
out:
- /* check for NULL is necessary, since cur_state can be freed inside
- * do_check() under memory pressure.
- */
- if (env->cur_state) {
- free_verifier_state(env->cur_state, true);
- env->cur_state = NULL;
- }
- while (!pop_stack(env, NULL, NULL, false));
if (!ret && pop_log)
bpf_vlog_reset(&env->log, 0);
free_states(env);
@@ -22929,7 +23296,7 @@ static void print_verification_stats(struct bpf_verifier_env *env)
int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
const struct bpf_ctx_arg_aux *info, u32 cnt)
{
- prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL);
+ prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
prog->aux->ctx_arg_info_size = cnt;
return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
@@ -23671,6 +24038,7 @@ static bool can_jump(struct bpf_insn *insn)
case BPF_JSLT:
case BPF_JSLE:
case BPF_JCOND:
+ case BPF_JSET:
return true;
}
@@ -23873,7 +24241,7 @@ static int compute_live_registers(struct bpf_verifier_env *env)
* - repeat the computation while {in,out} fields changes for
* any instruction.
*/
- state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL);
+ state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL_ACCOUNT);
if (!state) {
err = -ENOMEM;
goto out;
@@ -23911,6 +24279,10 @@ static int compute_live_registers(struct bpf_verifier_env *env)
if (env->log.level & BPF_LOG_LEVEL2) {
verbose(env, "Live regs before insn:\n");
for (i = 0; i < insn_cnt; ++i) {
+ if (env->insn_aux_data[i].scc)
+ verbose(env, "%3d ", env->insn_aux_data[i].scc);
+ else
+ verbose(env, " ");
verbose(env, "%3d: ", i);
for (j = BPF_REG_0; j < BPF_REG_10; ++j)
if (insn_aux[i].live_regs_before & BIT(j))
@@ -23932,6 +24304,185 @@ out:
return err;
}
+/*
+ * Compute strongly connected components (SCCs) on the CFG.
+ * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
+ * If instruction is a sole member of its SCC and there are no self edges,
+ * assign it SCC number of zero.
+ * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
+ */
+static int compute_scc(struct bpf_verifier_env *env)
+{
+ const u32 NOT_ON_STACK = U32_MAX;
+
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ const u32 insn_cnt = env->prog->len;
+ int stack_sz, dfs_sz, err = 0;
+ u32 *stack, *pre, *low, *dfs;
+ u32 succ_cnt, i, j, t, w;
+ u32 next_preorder_num;
+ u32 next_scc_id;
+ bool assign_scc;
+ u32 succ[2];
+
+ next_preorder_num = 1;
+ next_scc_id = 1;
+ /*
+ * - 'stack' accumulates vertices in DFS order, see invariant comment below;
+ * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
+ * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
+ * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
+ */
+ stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT);
+ if (!stack || !pre || !low || !dfs) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ /*
+ * References:
+ * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
+ * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
+ *
+ * The algorithm maintains the following invariant:
+ * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
+ * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
+ *
+ * Consequently:
+ * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
+ * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
+ * and thus there is an SCC (loop) containing both 'u' and 'v'.
+ * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
+ * and 'v' can be considered the root of some SCC.
+ *
+ * Here is a pseudo-code for an explicitly recursive version of the algorithm:
+ *
+ * NOT_ON_STACK = insn_cnt + 1
+ * pre = [0] * insn_cnt
+ * low = [0] * insn_cnt
+ * scc = [0] * insn_cnt
+ * stack = []
+ *
+ * next_preorder_num = 1
+ * next_scc_id = 1
+ *
+ * def recur(w):
+ * nonlocal next_preorder_num
+ * nonlocal next_scc_id
+ *
+ * pre[w] = next_preorder_num
+ * low[w] = next_preorder_num
+ * next_preorder_num += 1
+ * stack.append(w)
+ * for s in successors(w):
+ * # Note: for classic algorithm the block below should look as:
+ * #
+ * # if pre[s] == 0:
+ * # recur(s)
+ * # low[w] = min(low[w], low[s])
+ * # elif low[s] != NOT_ON_STACK:
+ * # low[w] = min(low[w], pre[s])
+ * #
+ * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
+ * # does not break the invariant and makes itartive version of the algorithm
+ * # simpler. See 'Algorithm #3' from [2].
+ *
+ * # 's' not yet visited
+ * if pre[s] == 0:
+ * recur(s)
+ * # if 's' is on stack, pick lowest reachable preorder number from it;
+ * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
+ * # so 'min' would be a noop.
+ * low[w] = min(low[w], low[s])
+ *
+ * if low[w] == pre[w]:
+ * # 'w' is the root of an SCC, pop all vertices
+ * # below 'w' on stack and assign same SCC to them.
+ * while True:
+ * t = stack.pop()
+ * low[t] = NOT_ON_STACK
+ * scc[t] = next_scc_id
+ * if t == w:
+ * break
+ * next_scc_id += 1
+ *
+ * for i in range(0, insn_cnt):
+ * if pre[i] == 0:
+ * recur(i)
+ *
+ * Below implementation replaces explicit recusion with array 'dfs'.
+ */
+ for (i = 0; i < insn_cnt; i++) {
+ if (pre[i])
+ continue;
+ stack_sz = 0;
+ dfs_sz = 1;
+ dfs[0] = i;
+dfs_continue:
+ while (dfs_sz) {
+ w = dfs[dfs_sz - 1];
+ if (pre[w] == 0) {
+ low[w] = next_preorder_num;
+ pre[w] = next_preorder_num;
+ next_preorder_num++;
+ stack[stack_sz++] = w;
+ }
+ /* Visit 'w' successors */
+ succ_cnt = insn_successors(env->prog, w, succ);
+ for (j = 0; j < succ_cnt; ++j) {
+ if (pre[succ[j]]) {
+ low[w] = min(low[w], low[succ[j]]);
+ } else {
+ dfs[dfs_sz++] = succ[j];
+ goto dfs_continue;
+ }
+ }
+ /*
+ * Preserve the invariant: if some vertex above in the stack
+ * is reachable from 'w', keep 'w' on the stack.
+ */
+ if (low[w] < pre[w]) {
+ dfs_sz--;
+ goto dfs_continue;
+ }
+ /*
+ * Assign SCC number only if component has two or more elements,
+ * or if component has a self reference.
+ */
+ assign_scc = stack[stack_sz - 1] != w;
+ for (j = 0; j < succ_cnt; ++j) {
+ if (succ[j] == w) {
+ assign_scc = true;
+ break;
+ }
+ }
+ /* Pop component elements from stack */
+ do {
+ t = stack[--stack_sz];
+ low[t] = NOT_ON_STACK;
+ if (assign_scc)
+ aux[t].scc = next_scc_id;
+ } while (t != w);
+ if (assign_scc)
+ next_scc_id++;
+ dfs_sz--;
+ }
+ }
+ env->scc_info = kvcalloc(next_scc_id, sizeof(*env->scc_info), GFP_KERNEL_ACCOUNT);
+ if (!env->scc_info) {
+ err = -ENOMEM;
+ goto exit;
+ }
+exit:
+ kvfree(stack);
+ kvfree(pre);
+ kvfree(low);
+ kvfree(dfs);
+ return err;
+}
+
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
u64 start_time = ktime_get_ns();
@@ -23940,6 +24491,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
u32 log_true_size;
bool is_priv;
+ BTF_TYPE_EMIT(enum bpf_features);
+
/* no program is valid */
if (ARRAY_SIZE(bpf_verifier_ops) == 0)
return -EINVAL;
@@ -23947,7 +24500,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
/* 'struct bpf_verifier_env' can be global, but since it's not small,
* allocate/free it every time bpf_check() is called
*/
- env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+ env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL_ACCOUNT);
if (!env)
return -ENOMEM;
@@ -24010,7 +24563,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
env->explored_states = kvcalloc(state_htab_size(env),
sizeof(struct list_head),
- GFP_USER);
+ GFP_KERNEL_ACCOUNT);
ret = -ENOMEM;
if (!env->explored_states)
goto skip_full_check;
@@ -24053,6 +24606,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret)
goto skip_full_check;
+ ret = compute_scc(env);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = compute_live_registers(env);
if (ret < 0)
goto skip_full_check;
@@ -24137,7 +24694,7 @@ skip_full_check:
/* if program passed verifier, update used_maps in bpf_prog_info */
env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
sizeof(env->used_maps[0]),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!env->prog->aux->used_maps) {
ret = -ENOMEM;
@@ -24152,7 +24709,7 @@ skip_full_check:
/* if program passed verifier, update used_btfs in bpf_prog_aux */
env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
sizeof(env->used_btfs[0]),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!env->prog->aux->used_btfs) {
ret = -ENOMEM;
goto err_release_maps;
@@ -24193,9 +24750,9 @@ err_unlock:
if (!is_priv)
mutex_unlock(&bpf_verifier_lock);
vfree(env->insn_aux_data);
- kvfree(env->insn_hist);
err_free_env:
kvfree(env->cfg.insn_postorder);
+ kvfree(env->scc_info);
kvfree(env);
return ret;
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a723b7dc6e4e..312c6a8b55bb 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2074,6 +2074,11 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
for_each_subsys(ss, ssid)
INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
+#ifdef CONFIG_CGROUP_BPF
+ for (int i = 0; i < ARRAY_SIZE(cgrp->bpf.revisions); i++)
+ cgrp->bpf.revisions[i] = 1;
+#endif
+
init_waitqueue_head(&cgrp->offline_waitq);
INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 132c8be6f635..0a06ea6638fe 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1270,7 +1270,7 @@ __bpf_kfunc_start_defs();
* Return: a bpf_key pointer with a valid key pointer if the key is found, a
* NULL pointer otherwise.
*/
-__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
{
key_ref_t key_ref;
struct bpf_key *bkey;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index aaf13a7d58ed..9728dbd4c66c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1255,7 +1255,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
headroom -= ctx->data;
}
- max_data_sz = 4096 - headroom - tailroom;
+ max_data_sz = PAGE_SIZE - headroom - tailroom;
if (size > max_data_sz) {
/* disallow live data mode for jumbo frames */
if (do_live)
diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index 1baee9e2aba9..5ab8f80e2834 100644
--- a/tools/bpf/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -45,6 +45,8 @@ static void get_exec_path(char *tpath, size_t size)
assert(path);
len = readlink(path, tpath, size);
+ if (len < 0)
+ len = 0;
tpath[len] = 0;
free(path);
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 6b14cbfa58aa..946612029dee 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -905,7 +905,8 @@ static int do_dump(int argc, char **argv)
return -1;
}
- fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len,
+ BPF_F_RDONLY);
if (fd < 0)
return -1;
@@ -1118,10 +1119,13 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
[BPF_OBJ_PROG] = "prog",
[BPF_OBJ_MAP] = "map",
};
+ LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts_ro);
__u32 btf_id, id = 0;
int err;
int fd;
+ opts_ro.open_flags = BPF_F_RDONLY;
+
while (true) {
switch (type) {
case BPF_OBJ_PROG:
@@ -1151,7 +1155,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
fd = bpf_prog_get_fd_by_id(id);
break;
case BPF_OBJ_MAP:
- fd = bpf_map_get_fd_by_id(id);
+ fd = bpf_map_get_fd_by_id_opts(id, &opts_ro);
break;
default:
err = -1;
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index ecfa790adc13..b07317d2842f 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -4,6 +4,7 @@
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
+#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
@@ -193,7 +194,8 @@ int mount_tracefs(const char *target)
return err;
}
-int open_obj_pinned(const char *path, bool quiet)
+int open_obj_pinned(const char *path, bool quiet,
+ const struct bpf_obj_get_opts *opts)
{
char *pname;
int fd = -1;
@@ -205,7 +207,7 @@ int open_obj_pinned(const char *path, bool quiet)
goto out_ret;
}
- fd = bpf_obj_get(pname);
+ fd = bpf_obj_get_opts(pname, opts);
if (fd < 0) {
if (!quiet)
p_err("bpf obj get (%s): %s", pname,
@@ -221,12 +223,13 @@ out_ret:
return fd;
}
-int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type,
+ const struct bpf_obj_get_opts *opts)
{
enum bpf_obj_type type;
int fd;
- fd = open_obj_pinned(path, false);
+ fd = open_obj_pinned(path, false, opts);
if (fd < 0)
return -1;
@@ -555,7 +558,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
if (typeflag != FTW_F)
goto out_ret;
- fd = open_obj_pinned(fpath, true);
+ fd = open_obj_pinned(fpath, true, NULL);
if (fd < 0)
goto out_ret;
@@ -928,7 +931,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds)
path = **argv;
NEXT_ARGP();
- (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG, NULL);
if ((*fds)[0] < 0)
return -1;
return 1;
@@ -965,7 +968,8 @@ exit_free:
return fd;
}
-static int map_fd_by_name(char *name, int **fds)
+static int map_fd_by_name(char *name, int **fds,
+ const struct bpf_get_fd_by_id_opts *opts)
{
unsigned int id = 0;
int fd, nb_fds = 0;
@@ -973,6 +977,7 @@ static int map_fd_by_name(char *name, int **fds)
int err;
while (true) {
+ LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts_ro);
struct bpf_map_info info = {};
__u32 len = sizeof(info);
@@ -985,7 +990,9 @@ static int map_fd_by_name(char *name, int **fds)
return nb_fds;
}
- fd = bpf_map_get_fd_by_id(id);
+ /* Request a read-only fd to query the map info */
+ opts_ro.open_flags = BPF_F_RDONLY;
+ fd = bpf_map_get_fd_by_id_opts(id, &opts_ro);
if (fd < 0) {
p_err("can't get map by id (%u): %s",
id, strerror(errno));
@@ -1004,6 +1011,19 @@ static int map_fd_by_name(char *name, int **fds)
continue;
}
+ /* Get an fd with the requested options, if they differ
+ * from the read-only options used to get the fd above.
+ */
+ if (memcmp(opts, &opts_ro, sizeof(opts_ro))) {
+ close(fd);
+ fd = bpf_map_get_fd_by_id_opts(id, opts);
+ if (fd < 0) {
+ p_err("can't get map by id (%u): %s", id,
+ strerror(errno));
+ goto err_close_fds;
+ }
+ }
+
if (nb_fds > 0) {
tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
if (!tmp) {
@@ -1023,8 +1043,13 @@ err_close_fds:
return -1;
}
-int map_parse_fds(int *argc, char ***argv, int **fds)
+int map_parse_fds(int *argc, char ***argv, int **fds, __u32 open_flags)
{
+ LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts);
+
+ assert((open_flags & ~BPF_F_RDONLY) == 0);
+ opts.open_flags = open_flags;
+
if (is_prefix(**argv, "id")) {
unsigned int id;
char *endptr;
@@ -1038,7 +1063,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds)
}
NEXT_ARGP();
- (*fds)[0] = bpf_map_get_fd_by_id(id);
+ (*fds)[0] = bpf_map_get_fd_by_id_opts(id, &opts);
if ((*fds)[0] < 0) {
p_err("get map by id (%u): %s", id, strerror(errno));
return -1;
@@ -1056,16 +1081,18 @@ int map_parse_fds(int *argc, char ***argv, int **fds)
}
NEXT_ARGP();
- return map_fd_by_name(name, fds);
+ return map_fd_by_name(name, fds, &opts);
} else if (is_prefix(**argv, "pinned")) {
char *path;
+ LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+ get_opts.file_flags = open_flags;
NEXT_ARGP();
path = **argv;
NEXT_ARGP();
- (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP, &get_opts);
if ((*fds)[0] < 0)
return -1;
return 1;
@@ -1075,7 +1102,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds)
return -1;
}
-int map_parse_fd(int *argc, char ***argv)
+int map_parse_fd(int *argc, char ***argv, __u32 open_flags)
{
int *fds = NULL;
int nb_fds, fd;
@@ -1085,7 +1112,7 @@ int map_parse_fd(int *argc, char ***argv)
p_err("mem alloc failed");
return -1;
}
- nb_fds = map_parse_fds(argc, argv, &fds);
+ nb_fds = map_parse_fds(argc, argv, &fds, open_flags);
if (nb_fds != 1) {
if (nb_fds > 1) {
p_err("several maps match this handle");
@@ -1103,12 +1130,12 @@ exit_free:
}
int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info,
- __u32 *info_len)
+ __u32 *info_len, __u32 open_flags)
{
int err;
int fd;
- fd = map_parse_fd(argc, argv);
+ fd = map_parse_fd(argc, argv, open_flags);
if (fd < 0)
return -1;
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index 5c39c2ed36a2..df5f0d1e07e8 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -37,7 +37,7 @@ static int do_pin(int argc, char **argv)
return -1;
}
- map_fd = map_parse_fd(&argc, &argv);
+ map_fd = map_parse_fd(&argc, &argv, BPF_F_RDONLY);
if (map_fd < 0)
return -1;
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 3535afc80a49..a773e05d5ade 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -117,7 +117,7 @@ static int link_parse_fd(int *argc, char ***argv)
path = **argv;
NEXT_ARGP();
- return open_obj_pinned_any(path, BPF_OBJ_LINK);
+ return open_obj_pinned_any(path, BPF_OBJ_LINK, NULL);
}
p_err("expected 'id' or 'pinned', got: '%s'?", **argv);
@@ -485,6 +485,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
case BPF_LINK_TYPE_RAW_TRACEPOINT:
jsonw_string_field(json_wtr, "tp_name",
u64_to_ptr(info->raw_tracepoint.tp_name));
+ jsonw_uint_field(json_wtr, "cookie", info->raw_tracepoint.cookie);
break;
case BPF_LINK_TYPE_TRACING:
err = get_prog_info(info->prog_id, &prog_info);
@@ -502,6 +503,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
json_wtr);
jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id);
jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id);
+ jsonw_uint_field(json_wtr, "cookie", info->tracing.cookie);
break;
case BPF_LINK_TYPE_CGROUP:
jsonw_lluint_field(json_wtr, "cgroup_id",
@@ -879,6 +881,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
case BPF_LINK_TYPE_RAW_TRACEPOINT:
printf("\n\ttp '%s' ",
(const char *)u64_to_ptr(info->raw_tracepoint.tp_name));
+ if (info->raw_tracepoint.cookie)
+ printf("cookie %llu ", info->raw_tracepoint.cookie);
break;
case BPF_LINK_TYPE_TRACING:
err = get_prog_info(info->prog_id, &prog_info);
@@ -897,6 +901,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
printf("\n\ttarget_obj_id %u target_btf_id %u ",
info->tracing.target_obj_id,
info->tracing.target_btf_id);
+ if (info->tracing.cookie)
+ printf("\n\tcookie %llu ", info->tracing.cookie);
break;
case BPF_LINK_TYPE_CGROUP:
printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index cd5963cb6058..2b7f2bd3a7db 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -534,9 +534,9 @@ int main(int argc, char **argv)
usage();
if (version_requested)
- return do_version(argc, argv);
-
- ret = cmd_select(commands, argc, argv, do_help);
+ ret = do_version(argc, argv);
+ else
+ ret = cmd_select(commands, argc, argv, do_help);
if (json_output)
jsonw_destroy(&json_wtr);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 9eb764fe4cc8..6db704fda5c0 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -15,6 +15,7 @@
#include <bpf/hashmap.h>
#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
#include "json_writer.h"
@@ -140,8 +141,10 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd,
int get_fd_type(int fd);
const char *get_fd_type_name(enum bpf_obj_type type);
char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(const char *path, bool quiet);
-int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
+int open_obj_pinned(const char *path, bool quiet,
+ const struct bpf_obj_get_opts *opts);
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type,
+ const struct bpf_obj_get_opts *opts);
int mount_bpffs_for_file(const char *file_name);
int create_and_mount_bpffs_dir(const char *dir_name);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
@@ -167,10 +170,10 @@ int do_iter(int argc, char **argv) __weak;
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
int prog_parse_fds(int *argc, char ***argv, int **fds);
-int map_parse_fd(int *argc, char ***argv);
-int map_parse_fds(int *argc, char ***argv, int **fds);
+int map_parse_fd(int *argc, char ***argv, __u32 open_flags);
+int map_parse_fds(int *argc, char ***argv, int **fds, __u32 open_flags);
int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info,
- __u32 *info_len);
+ __u32 *info_len, __u32 open_flags);
struct bpf_prog_linfo;
#if defined(HAVE_LLVM_SUPPORT) || defined(HAVE_LIBBFD_SUPPORT)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 81cc668b4b05..c9de44a45778 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -337,9 +337,9 @@ static void fill_per_cpu_value(struct bpf_map_info *info, void *value)
memcpy(value + i * step, value, info->value_size);
}
-static int parse_elem(char **argv, struct bpf_map_info *info,
- void *key, void *value, __u32 key_size, __u32 value_size,
- __u32 *flags, __u32 **value_fd)
+static int parse_elem(char **argv, struct bpf_map_info *info, void *key,
+ void *value, __u32 key_size, __u32 value_size,
+ __u32 *flags, __u32 **value_fd, __u32 open_flags)
{
if (!*argv) {
if (!key && !value)
@@ -362,7 +362,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
return -1;
return parse_elem(argv, info, NULL, value, key_size, value_size,
- flags, value_fd);
+ flags, value_fd, open_flags);
} else if (is_prefix(*argv, "value")) {
int fd;
@@ -388,7 +388,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
return -1;
}
- fd = map_parse_fd(&argc, &argv);
+ fd = map_parse_fd(&argc, &argv, open_flags);
if (fd < 0)
return -1;
@@ -424,7 +424,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
}
return parse_elem(argv, info, key, NULL, key_size, value_size,
- flags, NULL);
+ flags, NULL, open_flags);
} else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") ||
is_prefix(*argv, "exist")) {
if (!flags) {
@@ -440,7 +440,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
*flags = BPF_EXIST;
return parse_elem(argv + 1, info, key, value, key_size,
- value_size, NULL, value_fd);
+ value_size, NULL, value_fd, open_flags);
}
p_err("expected key or value, got: %s", *argv);
@@ -639,7 +639,7 @@ static int do_show_subset(int argc, char **argv)
p_err("mem alloc failed");
return -1;
}
- nb_fds = map_parse_fds(&argc, &argv, &fds);
+ nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY);
if (nb_fds < 1)
goto exit_free;
@@ -672,12 +672,15 @@ exit_free:
static int do_show(int argc, char **argv)
{
+ LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts);
struct bpf_map_info info = {};
__u32 len = sizeof(info);
__u32 id = 0;
int err;
int fd;
+ opts.open_flags = BPF_F_RDONLY;
+
if (show_pinned) {
map_table = hashmap__new(hash_fn_for_key_as_id,
equal_fn_for_key_as_id, NULL);
@@ -707,7 +710,7 @@ static int do_show(int argc, char **argv)
break;
}
- fd = bpf_map_get_fd_by_id(id);
+ fd = bpf_map_get_fd_by_id_opts(id, &opts);
if (fd < 0) {
if (errno == ENOENT)
continue;
@@ -909,7 +912,7 @@ static int do_dump(int argc, char **argv)
p_err("mem alloc failed");
return -1;
}
- nb_fds = map_parse_fds(&argc, &argv, &fds);
+ nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY);
if (nb_fds < 1)
goto exit_free;
@@ -997,7 +1000,7 @@ static int do_update(int argc, char **argv)
if (argc < 2)
usage();
- fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0);
if (fd < 0)
return -1;
@@ -1006,7 +1009,7 @@ static int do_update(int argc, char **argv)
goto exit_free;
err = parse_elem(argv, &info, key, value, info.key_size,
- info.value_size, &flags, &value_fd);
+ info.value_size, &flags, &value_fd, 0);
if (err)
goto exit_free;
@@ -1076,7 +1079,7 @@ static int do_lookup(int argc, char **argv)
if (argc < 2)
usage();
- fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len, BPF_F_RDONLY);
if (fd < 0)
return -1;
@@ -1084,7 +1087,8 @@ static int do_lookup(int argc, char **argv)
if (err)
goto exit_free;
- err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL);
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL,
+ BPF_F_RDONLY);
if (err)
goto exit_free;
@@ -1127,7 +1131,7 @@ static int do_getnext(int argc, char **argv)
if (argc < 2)
usage();
- fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len, BPF_F_RDONLY);
if (fd < 0)
return -1;
@@ -1140,8 +1144,8 @@ static int do_getnext(int argc, char **argv)
}
if (argc) {
- err = parse_elem(argv, &info, key, NULL, info.key_size, 0,
- NULL, NULL);
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL,
+ NULL, BPF_F_RDONLY);
if (err)
goto exit_free;
} else {
@@ -1198,7 +1202,7 @@ static int do_delete(int argc, char **argv)
if (argc < 2)
usage();
- fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0);
if (fd < 0)
return -1;
@@ -1209,7 +1213,8 @@ static int do_delete(int argc, char **argv)
goto exit_free;
}
- err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL);
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL,
+ 0);
if (err)
goto exit_free;
@@ -1226,11 +1231,16 @@ exit_free:
return err;
}
+static int map_parse_read_only_fd(int *argc, char ***argv)
+{
+ return map_parse_fd(argc, argv, BPF_F_RDONLY);
+}
+
static int do_pin(int argc, char **argv)
{
int err;
- err = do_pin_any(argc, argv, map_parse_fd);
+ err = do_pin_any(argc, argv, map_parse_read_only_fd);
if (!err && json_output)
jsonw_null(json_wtr);
return err;
@@ -1319,7 +1329,7 @@ offload_dev:
if (!REQ_ARGS(2))
usage();
inner_map_fd = map_parse_fd_and_info(&argc, &argv,
- &info, &len);
+ &info, &len, BPF_F_RDONLY);
if (inner_map_fd < 0)
return -1;
attr.inner_map_fd = inner_map_fd;
@@ -1368,7 +1378,7 @@ static int do_pop_dequeue(int argc, char **argv)
if (argc < 2)
usage();
- fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0);
if (fd < 0)
return -1;
@@ -1407,7 +1417,7 @@ static int do_freeze(int argc, char **argv)
if (!REQ_ARGS(2))
return -1;
- fd = map_parse_fd(&argc, &argv);
+ fd = map_parse_fd(&argc, &argv, 0);
if (fd < 0)
return -1;
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 552b4ca40c27..bcb767e2d673 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -128,7 +128,8 @@ int do_event_pipe(int argc, char **argv)
int err, map_fd;
map_info_len = sizeof(map_info);
- map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
+ map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len,
+ 0);
if (map_fd < 0)
return -1;
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 64f958f437b0..cfc6f944f7c3 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -366,17 +366,18 @@ static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
{
struct bpf_netdev_t *netinfo = cookie;
struct ifinfomsg *ifinfo = msg;
+ struct ip_devname_ifindex *tmp;
if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index)
return 0;
if (netinfo->used_len == netinfo->array_len) {
- netinfo->devices = realloc(netinfo->devices,
- (netinfo->array_len + 16) *
- sizeof(struct ip_devname_ifindex));
- if (!netinfo->devices)
+ tmp = realloc(netinfo->devices,
+ (netinfo->array_len + 16) * sizeof(struct ip_devname_ifindex));
+ if (!tmp)
return -ENOMEM;
+ netinfo->devices = tmp;
netinfo->array_len += 16;
}
netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index;
@@ -395,6 +396,7 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb)
{
struct bpf_tcinfo_t *tcinfo = cookie;
struct tcmsg *info = msg;
+ struct tc_kind_handle *tmp;
if (tcinfo->is_qdisc) {
/* skip clsact qdisc */
@@ -406,11 +408,12 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb)
}
if (tcinfo->used_len == tcinfo->array_len) {
- tcinfo->handle_array = realloc(tcinfo->handle_array,
+ tmp = realloc(tcinfo->handle_array,
(tcinfo->array_len + 16) * sizeof(struct tc_kind_handle));
- if (!tcinfo->handle_array)
+ if (!tmp)
return -ENOMEM;
+ tcinfo->handle_array = tmp;
tcinfo->array_len += 16;
}
tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 96eea8a67225..deeaa5c1ed7d 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1062,7 +1062,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd,
if (!REQ_ARGS(2))
return -EINVAL;
- *mapfd = map_parse_fd(&argc, &argv);
+ *mapfd = map_parse_fd(&argc, &argv, 0);
if (*mapfd < 0)
return *mapfd;
@@ -1608,7 +1608,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
}
NEXT_ARG();
- fd = map_parse_fd(&argc, &argv);
+ fd = map_parse_fd(&argc, &argv, 0);
if (fd < 0)
goto err_free_reuse_maps;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0b4a2f124d11..719ba230032f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1794,6 +1794,13 @@ union bpf_attr {
};
__u64 expected_revision;
} netkit;
+ struct {
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
+ } cgroup;
};
} link_create;
@@ -2403,7 +2410,7 @@ union bpf_attr {
* into it. An example is available in file
* *samples/bpf/trace_output_user.c* in the Linux kernel source
* tree (the eBPF program counterpart is in
- * *samples/bpf/trace_output_kern.c*).
+ * *samples/bpf/trace_output.bpf.c*).
*
* **bpf_perf_event_output**\ () achieves better performance
* than **bpf_trace_printk**\ () for sharing data with user
@@ -6653,11 +6660,15 @@ struct bpf_link_info {
struct {
__aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
__u32 tp_name_len; /* in/out: tp_name buffer len */
+ __u32 :32;
+ __u64 cookie;
} raw_tracepoint;
struct {
__u32 attach_type;
__u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
__u32 target_btf_id; /* BTF type id inside the object */
+ __u32 :32;
+ __u64 cookie;
} tracing;
struct {
__u64 cgroup_id;
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a9c3e33d0f8a..6eb421ccf91b 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -837,6 +837,50 @@ int bpf_link_create(int prog_fd, int target_fd,
if (!OPTS_ZEROED(opts, netkit))
return libbpf_err(-EINVAL);
break;
+ case BPF_CGROUP_INET_INGRESS:
+ case BPF_CGROUP_INET_EGRESS:
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_INET_SOCK_RELEASE:
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ case BPF_CGROUP_INET4_POST_BIND:
+ case BPF_CGROUP_INET6_POST_BIND:
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
+ case BPF_CGROUP_INET4_GETPEERNAME:
+ case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
+ case BPF_CGROUP_INET4_GETSOCKNAME:
+ case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
+ case BPF_CGROUP_SOCK_OPS:
+ case BPF_CGROUP_DEVICE:
+ case BPF_CGROUP_SYSCTL:
+ case BPF_CGROUP_GETSOCKOPT:
+ case BPF_CGROUP_SETSOCKOPT:
+ case BPF_LSM_CGROUP:
+ relative_fd = OPTS_GET(opts, cgroup.relative_fd, 0);
+ relative_id = OPTS_GET(opts, cgroup.relative_id, 0);
+ if (relative_fd && relative_id)
+ return libbpf_err(-EINVAL);
+ if (relative_id) {
+ attr.link_create.cgroup.relative_id = relative_id;
+ attr.link_create.flags |= BPF_F_ID;
+ } else {
+ attr.link_create.cgroup.relative_fd = relative_fd;
+ }
+ attr.link_create.cgroup.expected_revision =
+ OPTS_GET(opts, cgroup.expected_revision, 0);
+ if (!OPTS_ZEROED(opts, cgroup))
+ return libbpf_err(-EINVAL);
+ break;
default:
if (!OPTS_ZEROED(opts, flags))
return libbpf_err(-EINVAL);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 777627d33d25..1342564214c8 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -438,6 +438,11 @@ struct bpf_link_create_opts {
__u32 relative_id;
__u64 expected_revision;
} netkit;
+ struct {
+ __u32 relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ } cgroup;
};
size_t :0;
};
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 4392451d634b..ccfd905f03df 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -326,9 +326,10 @@ struct btf_dump_type_data_opts {
bool compact; /* no newlines/indentation */
bool skip_names; /* skip member/type names */
bool emit_zeroes; /* show 0-valued fields */
+ bool emit_strings; /* print char arrays as strings */
size_t :0;
};
-#define btf_dump_type_data_opts__last_field emit_zeroes
+#define btf_dump_type_data_opts__last_field emit_strings
LIBBPF_API int
btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 0381f209920a..f09f25eccf3c 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -68,6 +68,7 @@ struct btf_dump_data {
bool compact;
bool skip_names;
bool emit_zeroes;
+ bool emit_strings;
__u8 indent_lvl; /* base indent level */
char indent_str[BTF_DATA_INDENT_STR_LEN];
/* below are used during iteration */
@@ -2031,6 +2032,52 @@ static int btf_dump_var_data(struct btf_dump *d,
return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
}
+static int btf_dump_string_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_array *array = btf_array(t);
+ const char *chars = data;
+ __u32 i;
+
+ /* Make sure it is a NUL-terminated string. */
+ for (i = 0; i < array->nelems; i++) {
+ if ((void *)(chars + i) >= d->typed_dump->data_end)
+ return -E2BIG;
+ if (chars[i] == '\0')
+ break;
+ }
+ if (i == array->nelems) {
+ /* The caller will print this as a regular array. */
+ return -EINVAL;
+ }
+
+ btf_dump_data_pfx(d);
+ btf_dump_printf(d, "\"");
+
+ for (i = 0; i < array->nelems; i++) {
+ char c = chars[i];
+
+ if (c == '\0') {
+ /*
+ * When printing character arrays as strings, NUL bytes
+ * are always treated as string terminators; they are
+ * never printed.
+ */
+ break;
+ }
+ if (isprint(c))
+ btf_dump_printf(d, "%c", c);
+ else
+ btf_dump_printf(d, "\\x%02x", (__u8)c);
+ }
+
+ btf_dump_printf(d, "\"");
+
+ return 0;
+}
+
static int btf_dump_array_data(struct btf_dump *d,
const struct btf_type *t,
__u32 id,
@@ -2058,8 +2105,13 @@ static int btf_dump_array_data(struct btf_dump *d,
* char arrays, so if size is 1 and element is
* printable as a char, we'll do that.
*/
- if (elem_size == 1)
+ if (elem_size == 1) {
+ if (d->typed_dump->emit_strings &&
+ btf_dump_string_data(d, t, id, data) == 0) {
+ return 0;
+ }
d->typed_dump->is_array_char = true;
+ }
}
/* note that we increment depth before calling btf_dump_print() below;
@@ -2547,6 +2599,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
d->typed_dump->compact = OPTS_GET(opts, compact, false);
d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+ d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false);
ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 52e353368f58..aee36402f0a3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -12842,6 +12842,34 @@ struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifi
}
struct bpf_link *
+bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd,
+ const struct bpf_cgroup_opts *opts)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
+ __u32 relative_id;
+ int relative_fd;
+
+ if (!OPTS_VALID(opts, bpf_cgroup_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ relative_id = OPTS_GET(opts, relative_id, 0);
+ relative_fd = OPTS_GET(opts, relative_fd, 0);
+
+ if (relative_fd && relative_id) {
+ pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0);
+ link_create_opts.cgroup.relative_fd = relative_fd;
+ link_create_opts.cgroup.relative_id = relative_id;
+ link_create_opts.flags = OPTS_GET(opts, flags, 0);
+
+ return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts);
+}
+
+struct bpf_link *
bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
const struct bpf_tcx_opts *opts)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 1137e7d2e1b5..d1cf813a057b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -877,6 +877,21 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
const struct bpf_netkit_opts *opts);
+struct bpf_cgroup_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ __u32 flags;
+ __u32 relative_fd;
+ __u32 relative_id;
+ __u64 expected_revision;
+ size_t :0;
+};
+#define bpf_cgroup_opts__last_field expected_revision
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd,
+ const struct bpf_cgroup_opts *opts);
+
struct bpf_map;
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 1205f9a4fe04..c7fc0bde5648 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -437,6 +437,7 @@ LIBBPF_1.6.0 {
bpf_linker__add_fd;
bpf_linker__new_fd;
bpf_object__prepare;
+ bpf_program__attach_cgroup_opts;
bpf_program__func_info;
bpf_program__func_info_cnt;
bpf_program__line_info;
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 4e4a52742b01..3373b9d45ac4 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -59,7 +59,7 @@
*
* STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y);
*
- * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each
+ * USDT is identified by its <provider-name>:<probe-name> pair of names. Each
* individual USDT has a fixed number of arguments (3 in the above example)
* and specifies values of each argument as if it was a function call.
*
@@ -81,7 +81,7 @@
* NOP instruction that kernel can replace with an interrupt instruction to
* trigger instrumentation code (BPF program for all that we care about).
*
- * Semaphore above is and optional feature. It records an address of a 2-byte
+ * Semaphore above is an optional feature. It records an address of a 2-byte
* refcount variable (normally in '.probes' ELF section) used for signaling if
* there is anything that is attached to USDT. This is useful for user
* applications if, for example, they need to prepare some arguments that are
@@ -121,7 +121,7 @@
* a uprobe BPF program (which for kernel, at least currently, is just a kprobe
* program, so BPF_PROG_TYPE_KPROBE program type). With the only difference
* that uprobe is usually attached at the function entry, while USDT will
- * normally will be somewhere inside the function. But it should always be
+ * normally be somewhere inside the function. But it should always be
* pointing to NOP instruction, which makes such uprobes the fastest uprobe
* kind.
*
@@ -151,7 +151,7 @@
* libbpf sets to spec ID during attach time, or, if kernel is too old to
* support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such
* case. The latter means that some modes of operation can't be supported
- * without BPF cookie. Such mode is attaching to shared library "generically",
+ * without BPF cookie. Such a mode is attaching to shared library "generically",
* without specifying target process. In such case, it's impossible to
* calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode
* is not supported without BPF cookie support.
@@ -185,7 +185,7 @@
* as even if USDT spec string is the same, USDT cookie value can be
* different. It was deemed excessive to try to deduplicate across independent
* USDT attachments by taking into account USDT spec string *and* USDT cookie
- * value, which would complicated spec ID accounting significantly for little
+ * value, which would complicate spec ID accounting significantly for little
* gain.
*/
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 910d8d6402ef..4863106034df 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -109,6 +109,7 @@ TEST_PROGS := test_kmod.sh \
test_xdping.sh \
test_bpftool_build.sh \
test_bpftool.sh \
+ test_bpftool_map.sh \
test_bpftool_metadata.sh \
test_doc_build.sh \
test_xsk.sh \
@@ -840,6 +841,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
+# This works around GCC warning about snprintf truncating strings like:
+#
+# char a[PATH_MAX], b[PATH_MAX];
+# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation
+$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation
$(OUTPUT)/veristat.o: $(BPFOBJ)
$(OUTPUT)/veristat: $(OUTPUT)/veristat.o
$(call msg,BINARY,,$@)
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 8215c9b3115e..9386dfe8b884 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -69,7 +69,7 @@ extern int bpf_get_file_xattr(struct file *file, const char *name,
struct bpf_dynptr *value_ptr) __ksym;
extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
extern void bpf_key_put(struct bpf_key *key) __ksym;
extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el
new file mode 100644
index 000000000000..9acf389dc4ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.ppc64el
@@ -0,0 +1,93 @@
+CONFIG_ALTIVEC=y
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CPUSETS=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_FS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FRAME_POINTER=y
+CONFIG_FRAME_WARN=1280
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_INET=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_MEMCG=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NONPORTABLE=y
+CONFIG_NR_CPUS=256
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PPC64=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_PSERIES=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SMP=y
+CONFIG_SOC_VIRT=y
+CONFIG_SYSVIPC=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_THREAD_SHIFT=14
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_VSX=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 6befa870434b..4a0670c056ba 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -489,10 +489,28 @@ cleanup:
bpf_link__destroy(link);
}
+static int verify_tracing_link_info(int fd, u64 cookie)
+{
+ struct bpf_link_info info;
+ int err;
+ u32 len = sizeof(info);
+
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type"))
+ return -1;
+
+ ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie");
+
+ return 0;
+}
+
static void tracing_subtest(struct test_bpf_cookie *skel)
{
__u64 cookie;
- int prog_fd;
+ int prog_fd, err;
int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1;
LIBBPF_OPTS(bpf_test_run_opts, opts);
LIBBPF_OPTS(bpf_link_create_opts, link_opts);
@@ -507,6 +525,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel)
if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create"))
goto cleanup;
+ err = verify_tracing_link_info(fentry_fd, cookie);
+ if (!ASSERT_OK(err, "verify_tracing_link_info"))
+ goto cleanup;
+
cookie = 0x20000000000000L;
prog_fd = bpf_program__fd(skel->progs.fexit_test1);
link_opts.tracing.cookie = cookie;
@@ -635,10 +657,29 @@ cleanup:
bpf_link__destroy(link);
}
+static int verify_raw_tp_link_info(int fd, u64 cookie)
+{
+ struct bpf_link_info info;
+ int err;
+ u32 len = sizeof(info);
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type"))
+ return -1;
+
+ ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie");
+
+ return 0;
+}
+
static void raw_tp_subtest(struct test_bpf_cookie *skel)
{
__u64 cookie;
- int prog_fd, link_fd = -1;
+ int err, prog_fd, link_fd = -1;
struct bpf_link *link = NULL;
LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts);
@@ -656,6 +697,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel)
goto cleanup;
usleep(1); /* trigger */
+
+ err = verify_raw_tp_link_info(link_fd, cookie);
+ if (!ASSERT_OK(err, "verify_raw_tp_link_info"))
+ goto cleanup;
+
close(link_fd); /* detach */
link_fd = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
index fe2c502e5089..ecc3d47919ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr)
}
uffd_register.range.start = (unsigned long)fault_addr;
- uffd_register.range.len = 4096;
+ uffd_register.range.len = getpagesize();
uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
close(uffd);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index c0a776feec23..82903585c870 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -879,6 +879,122 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
"static int bpf_cgrp_storage_busy = (int)2", 2);
}
+struct btf_dump_string_ctx {
+ struct btf *btf;
+ struct btf_dump *d;
+ char *str;
+ struct btf_dump_type_data_opts *opts;
+ int array_id;
+};
+
+static int btf_dump_one_string(struct btf_dump_string_ctx *ctx,
+ char *ptr, size_t ptr_sz,
+ const char *expected_val)
+{
+ size_t type_sz;
+ int ret;
+
+ ctx->str[0] = '\0';
+ type_sz = btf__resolve_size(ctx->btf, ctx->array_id);
+ ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+static void btf_dump_strings(struct btf_dump_string_ctx *ctx)
+{
+ struct btf_dump_type_data_opts *opts = ctx->opts;
+
+ opts->emit_strings = true;
+
+ opts->compact = true;
+ opts->emit_zeroes = false;
+
+ opts->skip_names = false;
+ btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\"");
+
+ opts->skip_names = true;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* This should have no effect. */
+ opts->emit_zeroes = false;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* This should have no effect. */
+ opts->compact = false;
+ btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+ /* Non-printable characters come out as hex. */
+ btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\"");
+ btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\"");
+
+ /*
+ * Strings that are too long for the specified type ("char[4]")
+ * should fall back to the current behavior.
+ */
+ opts->compact = true;
+ btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]");
+
+ /*
+ * Strings that are too short for the specified type ("char[4]")
+ * should work normally.
+ */
+ btf_dump_one_string(ctx, "ab", 3, "\"ab\"");
+
+ /* Non-NUL-terminated arrays don't get printed as strings. */
+ char food[4] = { 'f', 'o', 'o', 'd' };
+ char bye[3] = { 'b', 'y', 'e' };
+
+ btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]");
+ btf_dump_one_string(ctx, bye, 3, "['b','y','e',]");
+
+ /* The embedded NUL should terminate the string. */
+ char embed[4] = { 'f', 'o', '\0', 'd' };
+
+ btf_dump_one_string(ctx, embed, 4, "\"fo\"");
+}
+
+static void test_btf_dump_string_data(void)
+{
+ struct test_ctx t = {};
+ char str[STRSIZE];
+ struct btf_dump *d;
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ struct btf_dump_string_ctx ctx;
+ int char_id, int_id, array_id;
+
+ if (test_ctx__init(&t))
+ return;
+
+ d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Generate BTF for a four-element char array. */
+ char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR);
+ ASSERT_EQ(char_id, 1, "char_id");
+ int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(int_id, 2, "int_id");
+ array_id = btf__add_array(t.btf, int_id, char_id, 4);
+ ASSERT_EQ(array_id, 3, "array_id");
+
+ ctx.btf = t.btf;
+ ctx.d = d;
+ ctx.str = str;
+ ctx.opts = &opts;
+ ctx.array_id = array_id;
+
+ btf_dump_strings(&ctx);
+
+ btf_dump__free(d);
+ test_ctx__free(&t);
+}
+
static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
const char *name, const char *expected_val,
void *data, size_t data_sz)
@@ -970,6 +1086,8 @@ void test_btf_dump() {
test_btf_dump_struct_data(btf, d, str);
if (test__start_subtest("btf_dump: var_data"))
test_btf_dump_var_data(btf, d, str);
+ if (test__start_subtest("btf_dump: string_data"))
+ test_btf_dump_string_data();
btf_dump__free(d);
btf__free(btf);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
new file mode 100644
index 000000000000..bb60704a3ef9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_mprog.skel.h"
+
+static void assert_mprog_count(int cg, int atype, int expected)
+{
+ __u32 count = 0, attach_flags = 0;
+ int err;
+
+ err = bpf_prog_query(cg, atype, 0, &attach_flags,
+ NULL, &count);
+ ASSERT_EQ(count, expected, "count");
+ ASSERT_EQ(err, 0, "prog_query");
+}
+
+static void test_prog_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct cgroup_mprog *skel;
+ __u32 prog_ids[10];
+ int cg, err;
+
+ cg = test__join_cgroup("/prog_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd2, fd1] */
+ err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd2, fd3, fd1] */
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ /* retrieve optq.prog_cnt */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ /* optq.prog_cnt will be used in below query */
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+ optd.expected_revision = 5;
+ err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ LIBBPF_OPTS_RESET(optd);
+ err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 2);
+
+ /* Check revision after two detach operations */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ ASSERT_OK(err, "prog_query");
+ ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_link_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_cgroup_opts, opta);
+ LIBBPF_OPTS(bpf_cgroup_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ struct bpf_link *link1, *link2, *link3, *link4;
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct cgroup_mprog *skel;
+ __u32 prog_ids[10];
+ int cg, err;
+
+ cg = test__join_cgroup("/link_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+ if (!ASSERT_OK_PTR(link1, "link_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_id = id_from_link_fd(bpf_link__fd(link1)),
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd2, fd1] */
+ link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+ if (!ASSERT_OK_PTR(link2, "link_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(link2),
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd2, fd3, fd1] */
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_OK_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+ if (!ASSERT_OK_PTR(link4, "link_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ /* retrieve optq.prog_cnt */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ /* optq.prog_cnt will be used in below query */
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+ bpf_link__destroy(link4);
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ bpf_link__destroy(link3);
+ assert_mprog_count(cg, atype, 2);
+
+ /* Check revision after two detach operations */
+ err = bpf_prog_query_opts(cg, atype, &optq);
+ ASSERT_OK(err, "prog_query");
+ ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+ bpf_link__destroy(link2);
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ bpf_link__destroy(link1);
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_preorder_prog_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, fd3, fd4;
+ struct cgroup_mprog *skel;
+ int cg, err;
+
+ cg = test__join_cgroup("/preorder_prog_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+ fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+ fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd1, fd2] */
+ err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, -EINVAL, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd1, fd2, fd3] */
+ err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_preorder_link_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_cgroup_opts, opta);
+ struct bpf_link *link1, *link2, *link3, *link4;
+ struct cgroup_mprog *skel;
+ __u32 fd2;
+ int cg;
+
+ cg = test__join_cgroup("/preorder_link_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ /* ordering: [fd1] */
+ link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+ if (!ASSERT_OK_PTR(link1, "link_attach"))
+ goto cleanup;
+
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_PREORDER,
+ .expected_revision = 2,
+ );
+
+ /* ordering: [fd1, fd2] */
+ link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+ if (!ASSERT_OK_PTR(link2, "link_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_ERR_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(link2),
+ .expected_revision = 3,
+ );
+
+ /* ordering: [fd1, fd2, fd3] */
+ link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+ if (!ASSERT_OK_PTR(link3, "link_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(cg, atype, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ /* ordering: [fd2, fd3, fd1, fd4] */
+ link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+ if (!ASSERT_OK_PTR(link4, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(cg, atype, 4);
+
+ bpf_link__destroy(link4);
+ assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+ bpf_link__destroy(link3);
+ assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+ bpf_link__destroy(link2);
+ assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+ bpf_link__destroy(link1);
+ assert_mprog_count(cg, atype, 0);
+
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+static void test_invalid_attach_detach(int atype)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ __u32 fd1, fd2, id2;
+ struct cgroup_mprog *skel;
+ int cg, err;
+
+ cg = test__join_cgroup("/invalid_attach_detach");
+ if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach"))
+ return;
+
+ skel = cgroup_mprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+ fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+ id2 = id_from_prog_fd(fd2);
+
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(cg, atype, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER,
+ .replace_prog_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(cg, atype, 1);
+cleanup:
+ cgroup_mprog__destroy(skel);
+ close(cg);
+}
+
+void test_cgroup_mprog_opts(void)
+{
+ if (test__start_subtest("prog_attach_detach"))
+ test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("link_attach_detach"))
+ test_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("preorder_prog_attach_detach"))
+ test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("preorder_link_attach_detach"))
+ test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+ if (test__start_subtest("invalid_attach_detach"))
+ test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
new file mode 100644
index 000000000000..a36d2e968bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+ enum bpf_attach_type prog_p_atype, prog_p2_atype;
+ int prog_p_fd, prog_p2_fd;
+ struct cgroup_preorder *skel = NULL;
+ struct bpf_program *prog;
+ __u8 *result, buf;
+ socklen_t optlen = 1;
+ int err = 0;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return 0;
+
+ LIBBPF_OPTS_RESET(opts);
+ opts.flags = BPF_F_ALLOW_MULTI;
+ prog = skel->progs.parent;
+ prog_p_fd = bpf_program__fd(prog);
+ prog_p_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE;
+ if (has_relative_fd)
+ opts.relative_fd = prog_p_fd;
+ prog = skel->progs.parent_2;
+ prog_p2_fd = bpf_program__fd(prog);
+ prog_p2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+ goto detach_parent;
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_parent_2;
+
+ result = skel->bss->result;
+ ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values");
+
+detach_parent_2:
+ ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+ "bpf_prog_detach2-parent_2");
+detach_parent:
+ ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+ "bpf_prog_detach2-parent");
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
+void test_cgroup_mprog_ordering(void)
+{
+ int cg_parent = -1, sock_fd = -1;
+
+ cg_parent = test__join_cgroup("/parent");
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+ goto out;
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1");
+ ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2");
+
+out:
+ close(sock_fd);
+ close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index 9add890c2d37..241b2c8c6e0f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -312,7 +312,7 @@ static void check_fd_array_cnt__referenced_btfs(void)
/* btf should still exist when original file descriptor is closed */
err = get_btf_id_by_fd(extra_fds[0], &btf_id);
- if (!ASSERT_GE(err, 0, "get_btf_id_by_fd"))
+ if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd"))
goto cleanup;
Close(extra_fds[0]);
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index 169ce689b97c..d6f14a232002 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -7,6 +7,10 @@
#include "test_log_buf.skel.h"
#include "bpf_util.h"
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
static size_t libbpf_log_pos;
static char libbpf_log_buf[1024 * 1024];
static bool libbpf_log_error;
diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..40d4f687bd9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "mem_rdonly_untrusted.skel.h"
+
+void test_mem_rdonly_untrusted(void)
+{
+ RUN_TESTS(mem_rdonly_untrusted);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index da430df45aa4..d1e4cb28a72c 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -97,7 +97,7 @@ static void ringbuf_write_subtest(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- skel->maps.ringbuf.max_entries = 0x4000;
+ skel->maps.ringbuf.max_entries = 0x40000;
err = test_ringbuf_write_lskel__load(skel);
if (!ASSERT_OK(err, "skel_load"))
@@ -108,7 +108,7 @@ static void ringbuf_write_subtest(void)
mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
goto cleanup;
- *mmap_ptr = 0x3000;
+ *mmap_ptr = 0x30000;
ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
skel->bss->pid = getpid();
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 1d98eee7a2c3..f1bdccc7e4e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map)
goto close;
n = xsend(c1, buf, sizeof(buf), 0);
+ if (n == -1)
+ goto close;
if (n < sizeof(buf))
FAIL("incomplete write");
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
new file mode 100644
index 000000000000..35af8044d059
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include <test_progs.h>
+#include "string_kfuncs_success.skel.h"
+#include "string_kfuncs_failure1.skel.h"
+#include "string_kfuncs_failure2.skel.h"
+#include <sys/mman.h>
+
+static const char * const test_cases[] = {
+ "strcmp",
+ "strchr",
+ "strchrnul",
+ "strnchr",
+ "strrchr",
+ "strlen",
+ "strnlen",
+ "strspn_str",
+ "strspn_accept",
+ "strcspn_str",
+ "strcspn_reject",
+ "strstr",
+ "strnstr",
+};
+
+void run_too_long_tests(void)
+{
+ struct string_kfuncs_failure2 *skel;
+ struct bpf_program *prog;
+ char test_name[256];
+ int err, i;
+
+ skel = string_kfuncs_failure2__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load"))
+ return;
+
+ memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str));
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ sprintf(test_name, "test_%s_too_long", test_cases[i]);
+ if (!test__start_subtest(test_name))
+ continue;
+
+ prog = bpf_object__find_program_by_name(skel->obj, test_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto cleanup;
+
+ ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG");
+ }
+
+cleanup:
+ string_kfuncs_failure2__destroy(skel);
+}
+
+void test_string_kfuncs(void)
+{
+ RUN_TESTS(string_kfuncs_success);
+ RUN_TESTS(string_kfuncs_failure1);
+
+ run_too_long_tests();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 66a900327f91..0ab36503c3b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1195,7 +1195,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry,
bool test_fexit,
bool test_fentry_entry)
{
- int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val;
+ int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val;
struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
struct bpf_program *prog, *fentry_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
index 924d0e25320c..d52a62af77bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -8,34 +8,6 @@
# define loopback 1
#endif
-static inline __u32 id_from_prog_fd(int fd)
-{
- struct bpf_prog_info prog_info = {};
- __u32 prog_info_len = sizeof(prog_info);
- int err;
-
- err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
- if (!ASSERT_OK(err, "id_from_prog_fd"))
- return 0;
-
- ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
- return prog_info.id;
-}
-
-static inline __u32 id_from_link_fd(int fd)
-{
- struct bpf_link_info link_info = {};
- __u32 link_info_len = sizeof(link_info);
- int err;
-
- err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
- if (!ASSERT_OK(err, "id_from_link_fd"))
- return 0;
-
- ASSERT_NEQ(link_info.id, 0, "link_info.id");
- return link_info.id;
-}
-
static inline __u32 ifindex_from_link_fd(int fd)
{
struct bpf_link_info link_info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 495d66414b57..9057e983cc54 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -270,8 +270,16 @@ static void subtest_multispec_usdt(void)
*/
trigger_300_usdts();
- /* we'll reuse usdt_100 BPF program for usdt_300 test */
bpf_link__destroy(skel->links.usdt_100);
+
+ bss->usdt_100_called = 0;
+ bss->usdt_100_sum = 0;
+
+ /* If built with arm64/clang, there will be much less number of specs
+ * for usdt_300 call sites.
+ */
+#if !defined(__aarch64__) || !defined(__clang__)
+ /* we'll reuse usdt_100 BPF program for usdt_300 test */
skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
"test", "usdt_300", NULL);
err = -errno;
@@ -282,13 +290,11 @@ static void subtest_multispec_usdt(void)
/* let's check that there are no "dangling" BPF programs attached due
* to partial success of the above test:usdt_300 attachment
*/
- bss->usdt_100_called = 0;
- bss->usdt_100_sum = 0;
-
f300(777); /* this is 301st instance of usdt_300 */
ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+#endif
/* This time we have USDT with 400 inlined invocations, but arg specs
* should be the same across all sites, so libbpf will only need to
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index d424e7ecbd12..9fd3ae987321 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -21,8 +21,7 @@
#include "../progs/test_user_ringbuf.h"
static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
-static const long c_ringbuf_size = 1 << 12; /* 1 small page */
-static const long c_max_entries = c_ringbuf_size / c_sample_size;
+static long c_ringbuf_size, c_max_entries;
static void drain_current_samples(void)
{
@@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void)
uint32_t remaining_samples = total_samples;
int err;
- BUILD_BUG_ON(total_samples <= c_max_entries);
+ if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries"))
+ return;
+
err = load_skel_create_user_ringbuf(&skel, &ringbuf);
if (err)
return;
@@ -686,6 +687,9 @@ void test_user_ringbuf(void)
{
int i;
+ c_ringbuf_size = getpagesize(); /* 1 page */
+ c_max_entries = c_ringbuf_size / c_sample_size;
+
for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
if (!test__start_subtest(success_tests[i].test_name))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index b2b2d85dbb1b..43264347e7d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void)
bpf_object__close(obj);
}
-static void test_xdp_adjust_tail_grow(void)
+static void test_xdp_adjust_tail_grow(bool is_64k_pagesize)
{
const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
struct bpf_object *obj;
- char buf[4096]; /* avoid segfault: large buf to hold grow results */
+ char buf[8192]; /* avoid segfault: large buf to hold grow results */
__u32 expect_sz;
int err, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
- .data_size_in = sizeof(pkt_v4),
.data_out = buf,
.data_size_out = sizeof(buf),
.repeat = 1,
);
+ /* topts.data_size_in as a special signal to bpf prog */
+ if (is_64k_pagesize)
+ topts.data_size_in = sizeof(pkt_v4) - 1;
+ else
+ topts.data_size_in = sizeof(pkt_v4);
+
err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
@@ -208,7 +213,7 @@ out:
bpf_object__close(obj);
}
-static void test_xdp_adjust_frags_tail_grow(void)
+static void test_xdp_adjust_frags_tail_grow_4k(void)
{
const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
__u32 exp_size;
@@ -246,14 +251,20 @@ static void test_xdp_adjust_frags_tail_grow(void)
ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
- for (i = 0; i < 9000; i++)
- ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+ for (i = 0; i < 9000; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+ }
- for (i = 9000; i < 9010; i++)
- ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+ for (i = 9000; i < 9010; i++) {
+ if (buf[i] != 0)
+ ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+ }
- for (i = 9010; i < 16384; i++)
- ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+ for (i = 9010; i < 16384; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+ }
/* Test a too large grow */
memset(buf, 1, 16384);
@@ -273,16 +284,93 @@ out:
bpf_object__close(obj);
}
+static void test_xdp_adjust_frags_tail_grow_64k(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, i, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(262144);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb"))
+ goto out;
+
+ /* Test case add 10 bytes to last frag */
+ memset(buf, 1, 262144);
+ exp_size = 90000 + 10;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 90000;
+ topts.data_size_out = 262144;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "90Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+ for (i = 0; i < 90000; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "90Kb+10b-old");
+ }
+
+ for (i = 90000; i < 90010; i++) {
+ if (buf[i] != 0)
+ ASSERT_EQ(buf[i], 0, "90Kb+10b-new");
+ }
+
+ for (i = 90010; i < 262144; i++) {
+ if (buf[i] != 1)
+ ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched");
+ }
+
+ /* Test a too large grow */
+ memset(buf, 1, 262144);
+ exp_size = 90001;
+
+ topts.data_in = topts.data_out = buf;
+ topts.data_size_in = 90001;
+ topts.data_size_out = 262144;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "90Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
void test_xdp_adjust_tail(void)
{
+ int page_size = getpagesize();
+
if (test__start_subtest("xdp_adjust_tail_shrink"))
test_xdp_adjust_tail_shrink();
if (test__start_subtest("xdp_adjust_tail_grow"))
- test_xdp_adjust_tail_grow();
+ test_xdp_adjust_tail_grow(page_size == 65536);
if (test__start_subtest("xdp_adjust_tail_grow2"))
test_xdp_adjust_tail_grow2();
if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
test_xdp_adjust_frags_tail_shrink();
- if (test__start_subtest("xdp_adjust_frags_tail_grow"))
- test_xdp_adjust_frags_tail_grow();
+ if (test__start_subtest("xdp_adjust_frags_tail_grow")) {
+ if (page_size == 65536)
+ test_xdp_adjust_frags_tail_grow_64k();
+ else
+ test_xdp_adjust_frags_tail_grow_4k();
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 7dac044664ac..dd34b0cc4b4e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -66,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
#else
#define MAX_PKT_SIZE 3408
#endif
+
+#define PAGE_SIZE_4K 4096
+#define PAGE_SIZE_64K 65536
+
static void test_max_pkt_size(int fd)
{
- char data[MAX_PKT_SIZE + 1] = {};
+ char data[PAGE_SIZE_64K + 1] = {};
int err;
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = MAX_PKT_SIZE,
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
+
+ if (getpagesize() == PAGE_SIZE_64K)
+ opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K;
+ else
+ opts.data_size_in = MAX_PKT_SIZE;
+
err = bpf_prog_test_run_opts(fd, &opts);
ASSERT_OK(err, "prog_run_max_size");
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
new file mode 100644
index 000000000000..2f20485e0de3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 value = 0;
+
+ if (ctx->value == (void *)0)
+ return 0;
+
+ bpf_probe_read_kernel(&value, sizeof(value), ctx->value);
+ value_sum += value;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 6e208e24ba3b..20dce508d8e0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -83,9 +83,11 @@
* expect return value to match passed parameter:
* - a decimal number
* - a hexadecimal number, when starts from 0x
- * - literal INT_MIN
- * - literal POINTER_VALUE (see definition below)
- * - literal TEST_DATA_LEN (see definition below)
+ * - a macro which expands to one of the above
+ * - literal _INT_MIN (expands to INT_MIN)
+ * In addition, two special macros are defined below:
+ * - POINTER_VALUE
+ * - TEST_DATA_LEN
* __retval_unpriv Same, but load program in unprivileged mode.
*
* __description Text to be used instead of a program name for display
@@ -125,8 +127,8 @@
#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
-#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val)))
-#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val)))
+#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val))))
+#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val))))
#define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary")))
#define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path)))
@@ -155,7 +157,7 @@
#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
/* Magic constants used with __retval() */
-#define POINTER_VALUE 0xcafe4all
+#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
#ifndef __used
@@ -231,4 +233,8 @@
#define CAN_USE_LOAD_ACQ_STORE_REL
#endif
+#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)
+#define SPEC_V1
+#endif
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
new file mode 100644
index 000000000000..6a0ea02c4de2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/getsockopt")
+int getsockopt_1(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_2(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_3(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_4(struct bpf_sockopt *ctx)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
index f3d79aecbf93..6884ab99a421 100644
--- a/tools/testing/selftests/bpf/progs/compute_live_registers.c
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -240,6 +240,22 @@ __naked void if2(void)
::: __clobber_all);
}
+/* Verifier misses that r2 is alive if jset is not handled properly */
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (45) if r1 & 0x7 goto pc+1")
+__naked void if3_jset_bug(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "if r1 & 0x7 goto +1;"
+ "exit;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
SEC("socket")
__log_level(2)
__msg("0: .......... (b7) r1 = 0")
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 76adf4a8f2da..7dd92a303bf6 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1649,4 +1649,281 @@ int clean_live_states(const void *ctx)
return 0;
}
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * r8 = bpf_get_prandom_u32();
+ * r6 = -32;
+ * bpf_iter_num_new(&fp[-8], 0, 10);
+ * if (unlikely(bpf_get_prandom_u32()))
+ * r6 = -31;
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "r7 = 0;"
+ "r6 = -32;"
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "loop_%=:"
+ "call noop;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int noop(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state2(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * r8 = bpf_get_prandom_u32();
+ * r6 = -32;
+ * bpf_iter_num_new(&fp[-8], 0, 10);
+ * if (unlikely(bpf_get_prandom_u32())) {
+ * r6 = -31;
+ * jump_into_loop:
+ * goto +0;
+ * goto loop;
+ * }
+ * if (unlikely(bpf_get_prandom_u32()))
+ * goto jump_into_loop;
+ * loop:
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "r7 = 0;"
+ "r6 = -32;"
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto jump_into_loop_%=;"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "jump_into_loop_%=: "
+ "goto +0;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state3(void)
+{
+ /*
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * loop1(-32, &fp[-8])
+ * loop1_wrapper(&fp[-8])
+ * bpf_iter_num_destroy(&fp[-8])
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ /* call #1 */
+ "r1 = -32;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call loop1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ /* call #2 */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call loop1_wrapper;"
+ /* return */
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int loop1(void)
+{
+ /*
+ * int loop1(num, iter) {
+ * r6 = num;
+ * r7 = iter;
+ * while (bpf_iter_num_next(r7)) {
+ * if (unlikely(bpf_get_prandom_u32()))
+ * *(fp + r6) = 7;
+ * }
+ * return 0
+ * }
+ */
+ asm volatile (
+ "r6 = r1;"
+ "r7 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "loop_%=:"
+ "r1 = r7;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto use_r6_%=;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r0 = 0;"
+ "exit;"
+ "use_r6_%=:"
+ "r0 = r10;"
+ "r0 += r6;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_next),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+__used __naked
+static int loop1_wrapper(void)
+{
+ /*
+ * int loop1_wrapper(iter) {
+ * r6 = -32;
+ * r7 = iter;
+ * if (unlikely(bpf_get_prandom_u32()))
+ * r6 = -31;
+ * loop1(r6, r7);
+ * return 0;
+ * }
+ */
+ asm volatile (
+ "r6 = -32;"
+ "r7 = r1;"
+ "call %[bpf_get_prandom_u32];"
+ "r8 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == r8 goto change_r6_%=;"
+ "loop_%=:"
+ "r1 = r6;"
+ "r2 = r7;"
+ "call loop1;"
+ "r0 = 0;"
+ "exit;"
+ "change_r6_%=:"
+ "r6 = -31;"
+ "goto loop_%=;"
+ :
+ : __imm(bpf_iter_num_next),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..00604755e698
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("socket")
+__success
+__retval(0)
+int ldx_is_ok_bad_addr(void *ctx)
+{
+ char *p;
+
+ if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID))
+ return 42;
+
+ p = bpf_rdonly_cast(0, 0);
+ return p[0x7fff];
+}
+
+SEC("socket")
+__success
+__retval(1)
+int ldx_is_ok_good_addr(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ return *p;
+}
+
+SEC("socket")
+__success
+int offset_not_tracked(void *ctx)
+{
+ int *p, i, s;
+
+ p = bpf_rdonly_cast(0, 0);
+ s = 0;
+ bpf_for(i, 0, 1000 * 1000 * 1000) {
+ p++;
+ s += *p;
+ }
+ return s;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int stx_not_ok(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ *p = 1;
+ return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_not_ok(void *ctx)
+{
+ int v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ __sync_fetch_and_add(p, 1);
+ return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_rmw_not_ok(void *ctx)
+{
+ long v, *p;
+
+ v = 1;
+ p = bpf_rdonly_cast(&v, 0);
+ return __sync_val_compare_and_swap(p, 0, 42);
+}
+
+SEC("socket")
+__failure
+__msg("invalid access to memory, mem_size=0 off=0 size=4")
+__msg("R1 min value is outside of the allowed memory range")
+int kfunc_param_not_ok(void *ctx)
+{
+ int *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ bpf_kfunc_trusted_num_test(p);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure
+__msg("R1 type=rdonly_untrusted_mem expected=")
+int helper_param_not_ok(void *ctx)
+{
+ char *p;
+
+ p = bpf_rdonly_cast(0, 0);
+ /*
+ * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do,
+ * the most permissive constraint
+ */
+ bpf_copy_from_user(p, 0, (void *)42);
+ return 0;
+}
+
+static __noinline u64 *get_some_addr(void)
+{
+ if (bpf_get_prandom_u32())
+ return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock));
+ else
+ return bpf_rdonly_cast(0, 0);
+}
+
+SEC("socket")
+__success
+__retval(0)
+int mixed_mem_type(void *ctx)
+{
+ u64 *p;
+
+ /* Try to avoid compiler hoisting load to if branches by using __noinline func. */
+ p = get_some_addr();
+ return *p;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
index a3620c15c136..49fe93d7e059 100644
--- a/tools/testing/selftests/bpf/progs/rbtree.c
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -61,19 +61,19 @@ static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
}
m->key = 1;
- bpf_spin_lock(&glock);
- bpf_rbtree_add(&groot, &n->node, less);
- bpf_rbtree_add(&groot, &m->node, less);
- bpf_spin_unlock(&glock);
+ bpf_spin_lock(lock);
+ bpf_rbtree_add(root, &n->node, less);
+ bpf_rbtree_add(root, &m->node, less);
+ bpf_spin_unlock(lock);
n = bpf_obj_new(typeof(*n));
if (!n)
return 3;
n->key = 3;
- bpf_spin_lock(&glock);
- bpf_rbtree_add(&groot, &n->node, less);
- bpf_spin_unlock(&glock);
+ bpf_spin_lock(lock);
+ bpf_rbtree_add(root, &n->node, less);
+ bpf_spin_unlock(lock);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 43637ee2cdcd..3a868a199349 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -16,10 +16,11 @@ struct {
__type(value, long);
} map_a SEC(".maps");
-__u32 user_data, key_serial, target_pid;
+__u32 user_data, target_pid;
+__s32 key_serial;
__u64 flags, task_storage_val, cgroup_id;
-struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
void bpf_key_put(struct bpf_key *key) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c
new file mode 100644
index 000000000000..7216b3450e96
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define EPERM 1 /* Operation not permitted */
+
+/* From include/linux/mm.h. */
+#define FMODE_WRITE 0x2
+
+struct map;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} prot_status_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} prot_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} not_prot_map SEC(".maps");
+
+SEC("fmod_ret/security_bpf_map")
+int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode)
+{
+ __u32 key = 0;
+ __u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key);
+
+ if (!status_ptr || !*status_ptr)
+ return 0;
+
+ if (map == &prot_map) {
+ /* Allow read-only access */
+ if (fmode & FMODE_WRITE)
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+/*
+ * This program keeps references to maps. This is needed to prevent
+ * optimizing them out.
+ */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_dummy1, int a)
+{
+ __u32 key = 0;
+ __u32 val1 = a;
+ __u32 val2 = a + 1;
+
+ bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY);
+ bpf_map_update_elem(&not_prot_map, &key, &val2, BPF_ANY);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
new file mode 100644
index 000000000000..53af438bd998
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char *user_ptr = (char *)1;
+char *invalid_kern_ptr = (char *)-1;
+
+/*
+ * When passing userspace pointers, the error code differs based on arch:
+ * -ERANGE on arches with non-overlapping address spaces
+ * -EFAULT on other arches
+ */
+#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \
+ defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86)
+#define USER_PTR_ERR -ERANGE
+#else
+#define USER_PTR_ERR -EFAULT
+#endif
+
+/*
+ * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and
+ * user_ptr (instead of causing an exception) so the below two groups of tests
+ * are not applicable.
+ */
+#ifndef __TARGET_ARCH_s390
+
+/* Passing NULL to string kfuncs (treated as a userspace ptr) */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); }
+
+/* Passing userspace ptr to string kfuncs */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); }
+
+#endif /* __TARGET_ARCH_s390 */
+
+/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
new file mode 100644
index 000000000000..89fb4669b0e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+
+char long_str[XATTR_SIZE_MAX + 1];
+
+SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
+SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
+SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
+SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
+SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); }
+SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); }
+SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); }
+SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); }
+SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); }
+SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); }
+SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); }
+SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); }
+SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
new file mode 100644
index 000000000000..46697f381878
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char str[] = "hello world";
+
+#define __test(retval) SEC("syscall") __success __retval(retval)
+
+/* Functional tests */
+__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); }
+__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); }
+__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
+__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
+__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
+__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); }
+__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); }
+__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); }
+__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); }
+__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); }
+__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); }
+__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); }
+__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); }
+__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); }
+__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); }
+__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); }
+__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); }
+__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); }
+__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); }
+__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); }
+__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); }
+__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
index cdbbb12f1491..1f7e1e59b073 100644
--- a/tools/testing/selftests/bpf/progs/test_lookup_key.c
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -14,11 +14,11 @@
char _license[] SEC("license") = "GPL";
__u32 monitored_pid;
-__u32 key_serial;
+__s32 key_serial;
__u32 key_id;
__u64 flags;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
extern void bpf_key_put(struct bpf_key *key) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
index 350513c0e4c9..f063a0013f85 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx)
if (cur_pid != pid)
return 0;
- sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
if (!sample1)
return 0;
/* first one can pass */
- sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
if (!sample2) {
bpf_ringbuf_discard(sample1, 0);
__sync_fetch_and_add(&discarded, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
index 8ef6b39335b6..34b30e2603f0 100644
--- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -40,7 +40,7 @@ char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
__u32 monitored_pid;
char sig[MAX_SIG_SIZE];
__u32 sig_size;
-__u32 user_keyring_serial;
+__s32 user_keyring_serial;
SEC("lsm.s/file_open")
int BPF_PROG(test_file_open, struct file *f)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
index 2796dd8545eb..1c7941a4ad00 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
@@ -1,8 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 ByteDance */
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
+
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 1);
@@ -31,7 +36,7 @@ int prog_skb_verdict(struct __sk_buff *skb)
change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0);
return SK_PASS;
} else if (data[0] == 'E') { /* Error */
- change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+ change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
return SK_PASS;
}
return SK_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
index 28edafe803f0..fcba8299f0bc 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/pkt_cls.h>
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
long change_tail_ret = 1;
@@ -94,7 +94,7 @@ int change_tail(struct __sk_buff *skb)
bpf_skb_change_tail(skb, len, 0);
return TCX_PASS;
} else if (payload[0] == 'E') { /* Error */
- change_tail_ret = bpf_skb_change_tail(skb, 65535, 0);
+ change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
return TCX_PASS;
} else if (payload[0] == 'Z') { /* Zero */
change_tail_ret = bpf_skb_change_tail(skb, 0, 0);
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index e96d09e11115..ff8d755548b9 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -17,7 +17,7 @@
#define MAX_SIG_SIZE 1024
__u32 monitored_pid;
-__u32 user_keyring_serial;
+__s32 user_keyring_serial;
__u64 system_keyring_id;
struct data {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index dc74d8cf9e3f..5904f45cfbc4 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -19,7 +19,9 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
/* Data length determine test case */
if (data_len == 54) { /* sizeof(pkt_v4) */
- offset = 4096; /* test too large offset */
+ offset = 4096; /* test too large offset, 4k page size */
+ } else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */
+ offset = 65536; /* test too large offset, 64k page size */
} else if (data_len == 74) { /* sizeof(pkt_v6) */
offset = 40;
} else if (data_len == 64) {
@@ -31,6 +33,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
offset = 10;
} else if (data_len == 9001) {
offset = 4096;
+ } else if (data_len == 90000) {
+ offset = 10; /* test a small offset, 64k page size */
+ } else if (data_len == 90001) {
+ offset = 65536; /* test too large offset, 64k page size */
} else {
return XDP_ABORTED; /* No matching test */
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c
index e97e518516b6..2b4fdca162be 100644
--- a/tools/testing/selftests/bpf/progs/verifier_and.c
+++ b/tools/testing/selftests/bpf/progs/verifier_and.c
@@ -85,8 +85,14 @@ l0_%=: r0 = r0; \
SEC("socket")
__description("check known subreg with unknown reg")
-__success __failure_unpriv __msg_unpriv("R1 !read_ok")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w0 < 0x1 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */
+__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void known_subreg_with_unknown_reg(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 0eb33bb801b5..e52a24e15b34 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -620,8 +620,14 @@ l1_%=: exit; \
SEC("socket")
__description("bounds check mixed 32bit and 64bit arithmetic. test1")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void _32bit_and_64bit_arithmetic_test1(void)
{
asm volatile (" \
@@ -643,8 +649,14 @@ l1_%=: exit; \
SEC("socket")
__description("bounds check mixed 32bit and 64bit arithmetic. test2")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void _32bit_and_64bit_arithmetic_test2(void)
{
asm volatile (" \
@@ -691,9 +703,14 @@ l0_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg = 0, reg xor 1")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_0_reg_xor_1(void)
{
asm volatile (" \
@@ -719,9 +736,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg32 = 0, reg32 xor 1")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg32_0_reg32_xor_1(void)
{
asm volatile (" \
@@ -747,9 +769,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg = 2, reg xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 > 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_2_reg_xor_3(void)
{
asm volatile (" \
@@ -829,9 +856,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg > 0, reg xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg_0_reg_xor_3(void)
{
asm volatile (" \
@@ -858,9 +890,14 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for reg32 > 0, reg32 xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
__naked void reg32_0_reg32_xor_3(void)
{
asm volatile (" \
@@ -1334,4 +1371,165 @@ __naked void mult_sign_ovf(void)
__imm(bpf_skb_store_bytes)
: __clobber_all);
}
+
+SEC("socket")
+__description("64-bit addition, all outcomes overflow")
+__success __log_level(2)
+__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
+__retval(0)
+__naked void add64_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r4 = r0;"
+ "r3 = 0xa000000000000000 ll;"
+ "r3 |= r4;"
+ "r3 += r3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit addition, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()")
+__retval(0)
+__naked void add64_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r4 = r0;"
+ "r3 = 2;"
+ "r3 |= r4;"
+ "r3 += r3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition overflow, all outcomes overflow")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w4 = w0;"
+ "w3 = 0xa0000000;"
+ "w3 |= w4;"
+ "w3 += w3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w4 = w0;"
+ "w3 = 2;"
+ "w3 |= w4;"
+ "w3 += w3;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, all outcomes underflow")
+__success __log_level(2)
+__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)")
+__retval(0)
+__naked void sub64_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r1 = r0;"
+ "r2 = 0x8000000000000000 ll;"
+ "r1 |= r2;"
+ "r3 = 0;"
+ "r3 -= r1;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtration, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()")
+__retval(0)
+__naked void sub64_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r3 = r0;"
+ "r2 = 1;"
+ "r3 -= r2;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction overflow, all outcomes underflow")
+__success __log_level(2)
+__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_full_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w1 = w0;"
+ "w2 = 0x80000000;"
+ "w1 |= w2;"
+ "w3 = 0;"
+ "w3 -= w1;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtration, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_partial_overflow(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "w3 = w0;"
+ "w2 = 1;"
+ "w3 -= w2;"
+ "r0 = 0;"
+ "exit"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
index c506afbdd936..260a6df264e3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
@@ -159,13 +159,16 @@ __failure_unpriv
__naked void deducing_bounds_from_const_10(void)
{
asm volatile (" \
+ r6 = r1; \
r0 = 0; \
if r0 s<= 0 goto l0_%=; \
-l0_%=: /* Marks reg as unknown. */ \
- r0 = -r0; \
- r0 -= r1; \
+l0_%=: /* Marks r0 as unknown. */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 -= r6; \
exit; \
-" ::: __clobber_all);
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
index 458984da804c..34e0c012ee76 100644
--- a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
+++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
@@ -77,7 +77,7 @@ l0_%=: exit; \
SEC("tc")
__description("MOD32 overflow, check 1")
-__success __retval(INT_MIN)
+__success __retval(_INT_MIN)
__naked void mod32_overflow_check_1(void)
{
asm volatile (" \
@@ -92,7 +92,7 @@ __naked void mod32_overflow_check_1(void)
SEC("tc")
__description("MOD32 overflow, check 2")
-__success __retval(INT_MIN)
+__success __retval(_INT_MIN)
__naked void mod32_overflow_check_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
index 7d088ba99ea5..16b761e510f0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
@@ -139,4 +139,122 @@ __naked void on_the_inner_map_pointer(void)
: __clobber_all);
}
+SEC("socket")
+__description("map_ptr is never null")
+__success
+__naked void map_ptr_is_never_null(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = %[map_in_map] ll; \
+ if r1 != 0 goto l0_%=; \
+ r10 = 42; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner")
+__success
+__naked void map_ptr_is_never_null_inner(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ if r0 != 0 goto l0_%=; \
+ r10 = 42; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner spill fill")
+__success
+__naked void map_ptr_is_never_null_inner_spill_fill(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: *(u64 *)(r10 -16) = r0; \
+ r1 = *(u64 *)(r10 -16); \
+ if r1 == 0 goto l1_%=; \
+ exit; \
+l1_%=: r10 = 42; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 64 * 1024);
+ });
+} rb_in_map SEC(".maps");
+
+struct rb_ctx {
+ void *rb;
+ struct bpf_dynptr dptr;
+};
+
+static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz)
+{
+ struct rb_ctx rb_ctx = {};
+ void *rb;
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u32 rb_slot = cpu & 1;
+
+ rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot);
+ if (!rb)
+ return rb_ctx;
+
+ rb_ctx.rb = rb;
+ bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr);
+
+ return rb_ctx;
+}
+
+static __noinline void __rb_event_submit(struct rb_ctx *ctx)
+{
+ if (!ctx->rb)
+ return;
+
+ /* If the verifier (incorrectly) concludes that ctx->rb can be
+ * NULL at this point, we'll get "BPF_EXIT instruction in main
+ * prog would lead to reference leak" error
+ */
+ bpf_ringbuf_submit_dynptr(&ctx->dptr, 0);
+}
+
+SEC("socket")
+int map_ptr_is_never_null_rb(void *ctx)
+{
+ struct rb_ctx event_ctx = __rb_event_reserve(256);
+ __rb_event_submit(&event_ctx);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index 994bbc346d25..a4d8814eb5ed 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -245,7 +245,13 @@ l0_%=: \
SEC("socket")
__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
__success __retval(0)
-__failure_unpriv __msg_unpriv("frame pointer is read only")
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
__naked void mov64sx_s32_varoff_2(void)
{
asm volatile (" \
@@ -267,7 +273,13 @@ l0_%=: \
SEC("socket")
__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
__success __retval(0)
-__failure_unpriv __msg_unpriv("frame pointer is read only")
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
__naked void mov64sx_s32_varoff_3(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 9fe5d255ee37..73fee2aec698 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -231,4 +231,74 @@ __naked void bpf_cond_op_not_r10(void)
::: __clobber_all);
}
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1 ; R0_w=1")
+__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_2(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -1 is allowed
+ */
+ asm volatile (
+ "r0 = 1;"
+ "w0 = -w0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_3(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -10000 is not allowed.
+ */
+ asm volatile (
+ "r0 = 10000;"
+ "w0 = -w0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1 ; R0_w=1")
+__msg("1: (87) r0 = -r0 ; R0_w=-1")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_4(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -1 is allowed
+ */
+ asm volatile (
+ "r0 = 1;"
+ "r0 = -r0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_5(void)
+{
+ /*
+ * lsm.s/socket_connect requires a return value within [-4095, 0].
+ * Returning -10000 is not allowed.
+ */
+ asm volatile (
+ "r0 = 10000;"
+ "r0 = -r0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
index 683a882b3e6d..910365201f68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
@@ -27,7 +27,7 @@ struct bpf_key {} __attribute__((preserve_access_index));
extern void bpf_key_put(struct bpf_key *key) __ksym;
extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
/* BTF FUNC records are not generated for kfuncs referenced
* from inline assembly. These records are necessary for
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index a4a5e2071604..4470541b5e71 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -572,8 +572,14 @@ l0_%=: exit; \
SEC("socket")
__description("alu32: mov u32 const")
-__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'")
+__success __success_unpriv
__retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 == 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
__naked void alu32_mov_u32_const(void)
{
asm volatile (" \
@@ -619,12 +625,11 @@ __naked void pass_pointer_to_tail_call(void)
SEC("socket")
__description("unpriv: cmp map pointer with zero")
-__success __failure_unpriv __msg_unpriv("R1 pointer comparison")
+__success __success_unpriv
__retval(0)
__naked void cmp_map_pointer_with_zero(void)
{
asm volatile (" \
- r1 = 0; \
r1 = %[map_hash_8b] ll; \
if r1 == 0 goto l0_%=; \
l0_%=: r0 = 0; \
@@ -635,6 +640,22 @@ l0_%=: r0 = 0; \
}
SEC("socket")
+__description("unpriv: cmp map pointer with const")
+__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited")
+__retval(0)
+__naked void cmp_map_pointer_with_const(void)
+{
+ asm volatile (" \
+ r1 = %[map_hash_8b] ll; \
+ if r1 == 0x0000beef goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
__description("unpriv: write into frame pointer")
__failure __msg("frame pointer is read only")
__failure_unpriv
@@ -723,4 +744,61 @@ l0_%=: r0 = 0; \
" ::: __clobber_all);
}
+SEC("socket")
+__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr")
+__success __success_unpriv __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 != 0x1 goto pc+2")
+/* This nospec prevents the exploit because it forces the mispredicted (not
+ * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer.
+ * This causes the CPU to realize that `r6 = r9` should have never executed. It
+ * ensures that r6 always contains a readable stack slot ptr when the insn after
+ * the nospec executes.
+ */
+__xlated_unpriv("nospec")
+__xlated_unpriv("r9 = *(u8 *)(r6 +0)")
+#endif
+__naked void unpriv_spec_v1_type_confusion(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ /* r0: pointer to a map array entry */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ /* r1, r2: prepared call args */ \
+ r6 = r10; \
+ r6 += -8; \
+ /* r6: pointer to readable stack slot */ \
+ r9 = 0xffffc900; \
+ r9 <<= 32; \
+ /* r9: scalar controlled by attacker */ \
+ r0 = *(u64 *)(r0 + 0); /* cache miss */ \
+ if r0 != 0x0 goto l0_%=; \
+ r6 = r9; \
+l0_%=: if r0 != 0x1 goto l1_%=; \
+ r9 = *(u8 *)(r6 + 0); \
+l1_%=: /* leak r9 */ \
+ r9 &= 1; \
+ r9 <<= 9; \
+ *(u64*)(r10 - 8) = r9; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ /* leak secret into is_cached(map[0|512]): */ \
+ r0 = *(u64 *)(r0 + 0); \
+l2_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
index 5ba6e53571c8..af7938ce56cb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
@@ -231,6 +231,10 @@ __retval(1)
__naked void ptr_unknown_vs_unknown_lt(void)
{
asm volatile (" \
+ r8 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r9 = r0; \
+ r1 = r8; \
r0 = *(u32*)(r1 + %[__sk_buff_len]); \
r1 = 0; \
*(u64*)(r10 - 8) = r1; \
@@ -245,11 +249,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \
r4 = *(u8*)(r0 + 0); \
if r4 == 1 goto l3_%=; \
r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x3; \
goto l4_%=; \
l3_%=: r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x7; \
l4_%=: r1 += r0; \
r0 = *(u8*)(r1 + 0); \
@@ -259,7 +263,8 @@ l2_%=: r0 = 1; \
: __imm(bpf_map_lookup_elem),
__imm_addr(map_array_48b),
__imm_addr(map_hash_16b),
- __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm(bpf_get_prandom_u32)
: __clobber_all);
}
@@ -271,6 +276,10 @@ __retval(1)
__naked void ptr_unknown_vs_unknown_gt(void)
{
asm volatile (" \
+ r8 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r9 = r0; \
+ r1 = r8; \
r0 = *(u32*)(r1 + %[__sk_buff_len]); \
r1 = 0; \
*(u64*)(r10 - 8) = r1; \
@@ -285,11 +294,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \
r4 = *(u8*)(r0 + 0); \
if r4 == 1 goto l3_%=; \
r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x7; \
goto l4_%=; \
l3_%=: r1 = 6; \
- r1 = -r1; \
+ r1 = r9; \
r1 &= 0x3; \
l4_%=: r1 += r0; \
r0 = *(u8*)(r1 + 0); \
@@ -299,7 +308,8 @@ l2_%=: r0 = 1; \
: __imm(bpf_map_lookup_elem),
__imm_addr(map_array_48b),
__imm_addr(map_hash_16b),
- __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm(bpf_get_prandom_u32)
: __clobber_all);
}
@@ -398,7 +408,8 @@ l2_%=: r0 = 1; \
SEC("socket")
__description("map access: mixing value pointer and scalar, 1")
-__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
__retval(0)
__naked void value_pointer_and_scalar_1(void)
{
@@ -433,6 +444,7 @@ l2_%=: /* common instruction */ \
l3_%=: /* branch B */ \
r0 = 0x13371337; \
/* verifier follows fall-through */ \
+ /* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\
if r2 != 0x100000 goto l4_%=; \
r0 = 0; \
exit; \
@@ -450,7 +462,8 @@ l4_%=: /* fake-dead code; targeted from branch A to \
SEC("socket")
__description("map access: mixing value pointer and scalar, 2")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
__retval(0)
__naked void value_pointer_and_scalar_2(void)
{
@@ -492,6 +505,7 @@ l4_%=: /* fake-dead code; targeted from branch A to \
* prevent dead code sanitization, rejected \
* via branch B however \
*/ \
+ /* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\
r0 = *(u8*)(r0 + 0); \
r0 = 0; \
exit; \
@@ -1296,9 +1310,13 @@ l0_%=: r0 = 1; \
SEC("socket")
__description("map access: value_ptr -= unknown scalar, 2")
-__success __failure_unpriv
-__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__success __success_unpriv
__retval(1)
+#ifdef SPEC_V1
+__xlated_unpriv("r1 &= 7")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */
+__xlated_unpriv("r0 -= r1")
+#endif
__naked void value_ptr_unknown_scalar_2_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh
new file mode 100755
index 000000000000..515b1df0501e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_map.sh
@@ -0,0 +1,398 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME="bpftool_map"
+BPF_FILE="security_bpf_map.bpf.o"
+BPF_ITER_FILE="bpf_iter_map_elem.bpf.o"
+PROTECTED_MAP_NAME="prot_map"
+NOT_PROTECTED_MAP_NAME="not_prot_map"
+BPF_FS_TMP_PARENT="/tmp"
+BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT}
+# bpftool will mount bpf file system under BPF_DIR if it is not mounted
+# under BPF_FS_PARENT.
+BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME"
+SCRIPT_DIR=$(dirname $(realpath "$0"))
+BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE"
+BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE"
+BPFTOOL_PATH="bpftool"
+# Assume the script is located under tools/testing/selftests/bpf/
+KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../)
+
+_cleanup()
+{
+ set +eu
+
+ # If BPF_DIR is a mount point this will not remove the mount point itself.
+ [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null
+
+ # Unmount if BPF filesystem was temporarily created.
+ if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then
+ # A loop and recursive unmount are required as bpftool might
+ # create multiple mounts. For example, a bind mount of the directory
+ # to itself. The bind mount is created to change mount propagation
+ # flags on an actual mount point.
+ max_attempts=3
+ attempt=0
+ while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do
+ umount -R "$BPF_DIR" 2>/dev/null
+ attempt=$((attempt+1))
+ done
+
+ # The directory still exists. Remove it now.
+ [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null
+ fi
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+check_root_privileges() {
+ if [ $(id -u) -ne 0 ]; then
+ echo "Need root privileges"
+ exit $ksft_skip
+ fi
+}
+
+# Function to verify bpftool path.
+# Parameters:
+# $1: bpftool path
+verify_bpftool_path() {
+ local bpftool_path="$1"
+ if ! "$bpftool_path" version > /dev/null 2>&1; then
+ echo "Could not run test without bpftool"
+ exit $ksft_skip
+ fi
+}
+
+# Function to verify BTF support.
+# The test requires BTF support for fmod_ret programs.
+verify_btf_support() {
+ if [ ! -f /sys/kernel/btf/vmlinux ]; then
+ echo "Could not run test without BTF support"
+ exit $ksft_skip
+ fi
+}
+
+# Function to initialize map entries with keys [0..2] and values set to 0.
+# Parameters:
+# $1: Map name
+# $2: bpftool path
+initialize_map_entries() {
+ local map_name="$1"
+ local bpftool_path="$2"
+
+ for key in 0 1 2; do
+ "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key
+ done
+}
+
+# Test read access to the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+access_for_read() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+
+ # Test read access to the map.
+ if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ echo " Read access to $key in $map_name failed"
+ exit 1
+ fi
+
+ # Test read access to map's BTF data.
+ if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then
+ echo " Read access to $map_name for BTF data failed"
+ exit 1
+ fi
+}
+
+# Test write access to the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+# $5: Whether write should succeed (true/false)
+access_for_write() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+ local write_should_succeed="$5"
+ local value="1 1 1 1"
+
+ if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+ $value 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Write access to $key in $map_name succeeded but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Write access to $key in $map_name failed but should have succeeded"
+ exit 1
+ fi
+ fi
+}
+
+# Test entry deletion for the map.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key
+# $5: Whether write should succeed (true/false)
+access_for_deletion() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key="$4"
+ local write_should_succeed="$5"
+ local value="1 1 1 1"
+
+ # Test deletion by key for the map.
+ # Before deleting, check the key exists.
+ if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ echo " Key $key does not exist in $map_name"
+ exit 1
+ fi
+
+ # Delete by key.
+ if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Deletion for $key in $map_name succeeded but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Deletion for $key in $map_name failed but should have succeeded"
+ exit 1
+ fi
+ fi
+
+ # After deleting, check the entry existence according to the expected status.
+ if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Key $key for $map_name was not deleted but should have been deleted"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "false" ]; then
+ echo "Key $key for $map_name was deleted but should have not been deleted"
+ exit 1
+ fi
+ fi
+
+ # Test creation of map's deleted entry, if deletion was successful.
+ # Otherwise, the entry exists.
+ if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+ $value 2>/dev/null; then
+ if [ "$write_should_succeed" = "false" ]; then
+ echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed"
+ exit 1
+ fi
+ else
+ if [ "$write_should_succeed" = "true" ]; then
+ echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded"
+ exit 1
+ fi
+ fi
+}
+
+# Test map elements iterator.
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: BPF_DIR
+# $5: bpf iterator object file path
+iterate_map_elem() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local bpf_dir="$4"
+ local bpf_file="$5"
+ local pin_path="$bpf_dir/map_iterator"
+
+ "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name"
+ if [ ! -f "$pin_path" ]; then
+ echo " Failed to pin iterator to $pin_path"
+ exit 1
+ fi
+
+ cat "$pin_path" 1>/dev/null
+ rm "$pin_path" 2>/dev/null
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+# $1: Name command (name/pinned)
+# $2: Map name
+# $3: bpftool path
+# $4: key for rw
+# $5: key to delete
+# $6: Whether write should succeed (true/false)
+# $7: BPF_DIR
+# $8: bpf iterator object file path
+access_map() {
+ local name_cmd="$1"
+ local map_name="$2"
+ local bpftool_path="$3"
+ local key_for_rw="$4"
+ local key_to_del="$5"
+ local write_should_succeed="$6"
+ local bpf_dir="$7"
+ local bpf_iter_file_path="$8"
+
+ access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw"
+ access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \
+ "$write_should_succeed"
+ access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \
+ "$write_should_succeed"
+ iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \
+ "$bpf_iter_file_path"
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+# $1: Map name
+# $2: bpftool path
+# $3: BPF_DIR
+# $4: Whether write should succeed (true/false)
+# $5: bpf iterator object file path
+test_map_access() {
+ local map_name="$1"
+ local bpftool_path="$2"
+ local bpf_dir="$3"
+ local pin_path="$bpf_dir/${map_name}_pinned"
+ local write_should_succeed="$4"
+ local bpf_iter_file_path="$5"
+
+ # Test access to the map by name.
+ access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \
+ "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+
+ # Pin the map to the BPF filesystem
+ "$bpftool_path" map pin name "$map_name" "$pin_path"
+ if [ ! -e "$pin_path" ]; then
+ echo " Failed to pin $map_name"
+ exit 1
+ fi
+
+ # Test access to the pinned map.
+ access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \
+ "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+}
+
+# Function to test map creation and map-of-maps
+# Parameters:
+# $1: bpftool path
+# $2: BPF_DIR
+test_map_creation_and_map_of_maps() {
+ local bpftool_path="$1"
+ local bpf_dir="$2"
+ local outer_map_name="outer_map_tt"
+ local inner_map_name="inner_map_tt"
+
+ "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \
+ value 4 entries 4 name "$inner_map_name"
+ if [ ! -f "$bpf_dir/$inner_map_name" ]; then
+ echo " Failed to create inner map file at $bpf_dir/$outer_map_name"
+ return 1
+ fi
+
+ "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \
+ key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name"
+ if [ ! -f "$bpf_dir/$outer_map_name" ]; then
+ echo " Failed to create outer map file at $bpf_dir/$outer_map_name"
+ return 1
+ fi
+
+ # Add entries to the outer map by name and by pinned path.
+ "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \
+ value pinned "$bpf_dir/$inner_map_name"
+ "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \
+ name "$inner_map_name"
+
+ # The outer map should be full by now.
+ # The following map update command is expected to fail.
+ if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \
+ "$inner_map_name" 2>/dev/null; then
+ echo " Update for $outer_map_name succeeded but should have failed"
+ exit 1
+ fi
+}
+
+# Function to test map access with the btf list command
+# Parameters:
+# $1: bpftool path
+test_map_access_with_btf_list() {
+ local bpftool_path="$1"
+
+ # The btf list command iterates over maps for
+ # loaded BPF programs.
+ if ! "$bpftool_path" btf list 1>/dev/null; then
+ echo " Failed to access btf data"
+ exit 1
+ fi
+}
+
+set -eu
+
+trap cleanup_skip EXIT
+
+check_root_privileges
+
+verify_bpftool_path "$BPFTOOL_PATH"
+
+verify_btf_support
+
+trap cleanup EXIT
+
+# Load and attach the BPF programs to control maps access.
+"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach
+
+initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+
+# Activate the map protection mechanism. Protection status is controlled
+# by a value stored in the prot_status_map at index 0.
+"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0
+
+# Test protected map (write should fail).
+test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \
+ "$BPF_ITER_FILE_PATH"
+
+# Test not protected map (write should succeed).
+test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \
+ "$BPF_ITER_FILE_PATH"
+
+test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR"
+
+test_map_access_with_btf_list "$BPFTOOL_PATH"
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 9551d8d5f8f9..78423cf89e01 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -40,7 +40,7 @@
#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
/* Warning: duplicated in bpf_misc.h */
-#define POINTER_VALUE 0xcafe4all
+#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -318,20 +318,14 @@ static int parse_caps(const char *str, __u64 *val, const char *name)
static int parse_retval(const char *str, int *val, const char *name)
{
- struct {
- char *name;
- int val;
- } named_values[] = {
- { "INT_MIN" , INT_MIN },
- { "POINTER_VALUE", POINTER_VALUE },
- { "TEST_DATA_LEN", TEST_DATA_LEN },
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(named_values); ++i) {
- if (strcmp(str, named_values[i].name) != 0)
- continue;
- *val = named_values[i].val;
+ /*
+ * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a
+ * single int and cannot be parsed with strtol, so we handle it
+ * separately here. In addition, it expands to different expressions in
+ * different compilers so we use a prefixed _INT_MIN instead.
+ */
+ if (strcmp(str, "_INT_MIN") == 0) {
+ *val = INT_MIN;
return 0;
}
@@ -1103,9 +1097,9 @@ void run_subtest(struct test_loader *tester,
}
}
- do_prog_test_run(bpf_program__fd(tprog), &retval,
- bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
- if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
+ err = do_prog_test_run(bpf_program__fd(tprog), &retval,
+ bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
+ if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) {
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 870694f2a359..df2222a1806f 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -460,6 +460,34 @@ static inline void *u64_to_ptr(__u64 ptr)
return (void *) (unsigned long) ptr;
}
+static inline __u32 id_from_prog_fd(int fd)
+{
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "id_from_prog_fd"))
+ return 0;
+
+ ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
+ return prog_info.id;
+}
+
+static inline __u32 id_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ ASSERT_NEQ(link_info.id, 0, "link_info.id");
+ return link_info.id;
+}
+
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
index 220f6a963813..f997d7ec8fd0 100644
--- a/tools/testing/selftests/bpf/unpriv_helpers.c
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -1,15 +1,76 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <sys/utsname.h>
#include <unistd.h>
#include <fcntl.h>
+#include <zlib.h>
#include "unpriv_helpers.h"
-static bool get_mitigations_off(void)
+static gzFile open_config(void)
+{
+ struct utsname uts;
+ char buf[PATH_MAX];
+ gzFile config;
+
+ if (uname(&uts)) {
+ perror("uname");
+ goto config_gz;
+ }
+
+ snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release);
+ config = gzopen(buf, "rb");
+ if (config)
+ return config;
+ fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno));
+
+config_gz:
+ config = gzopen("/proc/config.gz", "rb");
+ if (!config)
+ perror("gzopen /proc/config.gz");
+ return config;
+}
+
+static int config_contains(const char *pat)
+{
+ const char *msg;
+ char buf[1024];
+ gzFile config;
+ int n, err;
+
+ config = open_config();
+ if (!config)
+ return -1;
+
+ for (;;) {
+ if (!gzgets(config, buf, sizeof(buf))) {
+ msg = gzerror(config, &err);
+ if (err == Z_ERRNO)
+ perror("gzgets /proc/config.gz");
+ else if (err != Z_OK)
+ fprintf(stderr, "gzgets /proc/config.gz: %s", msg);
+ gzclose(config);
+ return -1;
+ }
+ n = strlen(buf);
+ if (buf[n - 1] == '\n')
+ buf[n - 1] = 0;
+ if (strcmp(buf, pat) == 0) {
+ gzclose(config);
+ return 1;
+ }
+ }
+ gzclose(config);
+ return 0;
+}
+
+static bool cmdline_contains(const char *pat)
{
char cmdline[4096], *c;
int fd, ret = false;
@@ -27,7 +88,7 @@ static bool get_mitigations_off(void)
cmdline[sizeof(cmdline) - 1] = '\0';
for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) {
- if (strncmp(c, "mitigations=off", strlen(c)))
+ if (strncmp(c, pat, strlen(c)))
continue;
ret = true;
break;
@@ -37,8 +98,21 @@ out:
return ret;
}
+static int get_mitigations_off(void)
+{
+ int enabled_in_config;
+
+ if (cmdline_contains("mitigations=off"))
+ return 1;
+ enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y");
+ if (enabled_in_config < 0)
+ return -1;
+ return !enabled_in_config;
+}
+
bool get_unpriv_disabled(void)
{
+ int mitigations_off;
bool disabled;
char buf[2];
FILE *fd;
@@ -52,5 +126,19 @@ bool get_unpriv_disabled(void)
disabled = true;
}
- return disabled ? true : get_mitigations_off();
+ if (disabled)
+ return true;
+
+ /*
+ * Some unpriv tests rely on spectre mitigations being on.
+ * If mitigations are off or status can't be determined
+ * assume that unpriv tests are disabled.
+ */
+ mitigations_off = get_mitigations_off();
+ if (mitigations_off < 0) {
+ fprintf(stderr,
+ "Can't determine if mitigations are enabled, disabling unpriv tests.");
+ return true;
+ }
+ return mitigations_off;
}
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index ee454327e5c6..77207b498c6f 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -2,14 +2,13 @@
"dead code: start",
.insns = {
BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_JMP_IMM(BPF_JA, 0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 7),
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
},
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index 43776f6f92f4..91d83e9cb148 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -84,11 +84,10 @@
BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -149,11 +148,10 @@
BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -214,11 +212,10 @@
BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
BPF_EXIT_INSN(),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -283,11 +280,10 @@
BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -354,11 +350,10 @@
BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -425,11 +420,10 @@
BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -496,11 +490,10 @@
BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -567,11 +560,10 @@
BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -638,11 +630,10 @@
BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -709,11 +700,10 @@
BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -780,11 +770,10 @@
BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1),
BPF_EXIT_INSN(),
BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1),
+ /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "R0 invalid mem access 'scalar'",
- .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 11fc68da735e..e901eefd774a 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -78,12 +78,11 @@
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.retval = 1,
.result = ACCEPT,
},
@@ -136,13 +135,12 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2),
BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -154,16 +152,16 @@
BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff),
BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3),
BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1),
+ /* unpriv: nospec (inserted to prevent "R9 !read_ok") */
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "R9 !read_ok",
- .result_unpriv = REJECT,
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index b2bb20b00952..4da627ca5749 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -49,6 +49,7 @@ enum stat_id {
STACK,
PROG_TYPE,
ATTACH_TYPE,
+ MEMORY_PEAK,
FILE_NAME,
PROG_NAME,
@@ -208,6 +209,9 @@ static struct env {
int top_src_lines;
struct var_preset *presets;
int npresets;
+ char orig_cgroup[PATH_MAX];
+ char stat_cgroup[PATH_MAX];
+ int memory_peak_fd;
} env;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -219,6 +223,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va
return vfprintf(stderr, format, args);
}
+#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+__printf(3, 4)
+static int log_errno_aux(const char *file, int line, const char *fmt, ...)
+{
+ int err = -errno;
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "%s:%d: ", file, line);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, " failed with error '%s'.\n", strerror(errno));
+ va_end(ap);
+ return err;
+}
+
#ifndef VERISTAT_VERSION
#define VERISTAT_VERSION "<kernel>"
#endif
@@ -344,6 +364,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
fprintf(stderr, "invalid top N specifier: %s\n", arg);
argp_usage(state);
}
+ break;
case 'C':
env.comparison_mode = true;
break;
@@ -734,13 +755,13 @@ cleanup:
}
static const struct stat_specs default_csv_output_spec = {
- .spec_cnt = 14,
+ .spec_cnt = 15,
.ids = {
FILE_NAME, PROG_NAME, VERDICT, DURATION,
TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
- STACK,
+ STACK, MEMORY_PEAK,
},
};
@@ -781,6 +802,7 @@ static struct stat_def {
[STACK] = {"Stack depth", {"stack_depth", "stack"}, },
[PROG_TYPE] = { "Program type", {"prog_type"}, },
[ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
+ [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, },
};
static bool parse_stat_id_var(const char *name, size_t len, int *id,
@@ -1182,6 +1204,7 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
case BPF_MAP_TYPE_TASK_STORAGE:
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
break;
case BPF_MAP_TYPE_STRUCT_OPS:
mask_unrelated_struct_ops_progs(obj, map, prog);
@@ -1278,16 +1301,214 @@ static int max_verifier_log_size(void)
return log_size;
}
+static bool output_stat_enabled(int id)
+{
+ int i;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++)
+ if (env.output_spec.ids[i] == id)
+ return true;
+ return false;
+}
+
+__printf(2, 3)
+static int write_one_line(const char *file, const char *fmt, ...)
+{
+ int err, saved_errno;
+ va_list ap;
+ FILE *f;
+
+ f = fopen(file, "w");
+ if (!f)
+ return -1;
+
+ va_start(ap, fmt);
+ errno = 0;
+ err = vfprintf(f, fmt, ap);
+ saved_errno = errno;
+ va_end(ap);
+ fclose(f);
+ errno = saved_errno;
+ return err < 0 ? -1 : 0;
+}
+
+__scanf(3, 4)
+static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...)
+{
+ int res = 0, saved_errno = 0;
+ char *line = NULL;
+ size_t line_len;
+ va_list ap;
+ FILE *f;
+
+ f = fopen(file, "r");
+ if (!f)
+ return -1;
+
+ va_start(ap, fmt);
+ while (getline(&line, &line_len, f) > 0) {
+ res = vsscanf(line, fmt, ap);
+ if (res == fields_expected)
+ goto out;
+ }
+ if (ferror(f)) {
+ saved_errno = errno;
+ res = -1;
+ }
+
+out:
+ va_end(ap);
+ free(line);
+ fclose(f);
+ errno = saved_errno;
+ return res;
+}
+
+static void destroy_stat_cgroup(void)
+{
+ char buf[PATH_MAX];
+ int err;
+
+ close(env.memory_peak_fd);
+
+ if (env.orig_cgroup[0]) {
+ snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup);
+ err = write_one_line(buf, "%d\n", getpid());
+ if (err < 0)
+ log_errno("moving self to original cgroup %s\n", env.orig_cgroup);
+ }
+
+ if (env.stat_cgroup[0]) {
+ err = rmdir(env.stat_cgroup);
+ if (err < 0)
+ log_errno("deletion of cgroup %s", env.stat_cgroup);
+ }
+
+ env.memory_peak_fd = -1;
+ env.orig_cgroup[0] = 0;
+ env.stat_cgroup[0] = 0;
+}
+
+/*
+ * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>,
+ * moves current process to this cgroup.
+ */
+static void create_stat_cgroup(void)
+{
+ char cgroup_fs_mount[4096];
+ char buf[4096];
+ int err;
+
+ env.memory_peak_fd = -1;
+
+ if (!output_stat_enabled(MEMORY_PEAK))
+ return;
+
+ err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s",
+ cgroup_fs_mount, buf);
+ if (err != 2) {
+ if (err < 0)
+ log_errno("reading /proc/self/mounts");
+ else if (!env.quiet)
+ fprintf(stderr, "Can't find cgroupfs v2 mount point.\n");
+ goto err_out;
+ }
+
+ /* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */
+ err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf);
+ if (err != 1) {
+ if (err < 0)
+ log_errno("reading /proc/self/cgroup");
+ else if (!env.quiet)
+ fprintf(stderr, "Can't infer veristat process cgroup.");
+ goto err_out;
+ }
+
+ snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf);
+
+ snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid());
+ err = mkdir(buf, 0777);
+ if (err < 0) {
+ log_errno("creation of cgroup %s", buf);
+ goto err_out;
+ }
+ strcpy(env.stat_cgroup, buf);
+
+ snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup);
+ err = write_one_line(buf, "%d\n", getpid());
+ if (err < 0) {
+ log_errno("entering cgroup %s", buf);
+ goto err_out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup);
+ env.memory_peak_fd = open(buf, O_RDWR | O_APPEND);
+ if (env.memory_peak_fd < 0) {
+ log_errno("opening %s", buf);
+ goto err_out;
+ }
+
+ return;
+
+err_out:
+ if (!env.quiet)
+ fprintf(stderr, "Memory usage metric unavailable.\n");
+ destroy_stat_cgroup();
+}
+
+/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */
+static long cgroup_memory_peak(void)
+{
+ long err, memory_peak;
+ char buf[32];
+
+ if (env.memory_peak_fd < 0)
+ return -1;
+
+ err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0);
+ if (err <= 0) {
+ log_errno("pread(%s/memory.peak)", env.stat_cgroup);
+ return -1;
+ }
+
+ buf[err] = 0;
+ errno = 0;
+ memory_peak = strtoll(buf, NULL, 10);
+ if (errno) {
+ log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf);
+ return -1;
+ }
+
+ return memory_peak;
+}
+
+static int reset_stat_cgroup(void)
+{
+ char buf[] = "r\n";
+ int err;
+
+ if (env.memory_peak_fd < 0)
+ return -1;
+
+ err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0);
+ if (err <= 0) {
+ log_errno("pwrite(%s/memory.peak)", env.stat_cgroup);
+ return -1;
+ }
+ return 0;
+}
+
static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
{
const char *base_filename = basename(strdupa(filename));
const char *prog_name = bpf_program__name(prog);
+ long mem_peak_a, mem_peak_b, mem_peak = -1;
char *buf;
int buf_sz, log_level;
struct verif_stats *stats;
struct bpf_prog_info info;
__u32 info_len = sizeof(info);
- int err = 0;
+ int err = 0, cgroup_err;
void *tmp;
int fd;
@@ -1332,7 +1553,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
if (env.force_reg_invariants)
bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
- err = bpf_object__load(obj);
+ err = bpf_object__prepare(obj);
+ if (!err) {
+ cgroup_err = reset_stat_cgroup();
+ mem_peak_a = cgroup_memory_peak();
+ err = bpf_object__load(obj);
+ mem_peak_b = cgroup_memory_peak();
+ if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0)
+ mem_peak = mem_peak_b - mem_peak_a;
+ }
env.progs_processed++;
stats->file_name = strdup(base_filename);
@@ -1341,6 +1570,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
stats->stats[SIZE] = bpf_program__insn_cnt(prog);
stats->stats[PROG_TYPE] = bpf_program__type(prog);
stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
+ stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024);
memset(&info, 0, info_len);
fd = bpf_program__fd(prog);
@@ -1824,6 +2054,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
case TOTAL_STATES:
case PEAK_STATES:
case MAX_STATES_PER_INSN:
+ case MEMORY_PEAK:
case MARK_READ_MAX_LEN: {
long v1 = s1->stats[id];
long v2 = s2->stats[id];
@@ -2053,6 +2284,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
case STACK:
case SIZE:
case JITED_SIZE:
+ case MEMORY_PEAK:
*val = s ? s->stats[id] : 0;
break;
default:
@@ -2139,6 +2371,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats
case MARK_READ_MAX_LEN:
case SIZE:
case JITED_SIZE:
+ case MEMORY_PEAK:
case STACK: {
long val;
int err, n;
@@ -2776,7 +3009,7 @@ static void output_prog_stats(void)
static int handle_verif_mode(void)
{
- int i, err;
+ int i, err = 0;
if (env.filename_cnt == 0) {
fprintf(stderr, "Please provide path to BPF object file!\n\n");
@@ -2784,11 +3017,12 @@ static int handle_verif_mode(void)
return -EINVAL;
}
+ create_stat_cgroup();
for (i = 0; i < env.filename_cnt; i++) {
err = process_obj(env.filenames[i]);
if (err) {
fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
- return err;
+ goto out;
}
}
@@ -2796,7 +3030,9 @@ static int handle_verif_mode(void)
output_prog_stats();
- return 0;
+out:
+ destroy_stat_cgroup();
+ return err;
}
static int handle_replay_mode(void)
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 79505d294c44..2f869daf8a06 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -43,6 +43,15 @@ riscv64)
BZIMAGE="arch/riscv/boot/Image"
ARCH="riscv"
;;
+ppc64el)
+ QEMU_BINARY=qemu-system-ppc64
+ QEMU_CONSOLE="hvc0"
+ # KVM could not be tested for powerpc, therefore not enabled for now.
+ HOST_FLAGS=(-machine pseries -cpu POWER9)
+ CROSS_FLAGS=(-machine pseries -cpu POWER9)
+ BZIMAGE="vmlinux"
+ ARCH="powerpc"
+ ;;
*)
echo "Unsupported architecture"
exit 1