diff options
Diffstat (limited to 'tools/lib/bpf')
| -rw-r--r-- | tools/lib/bpf/Build | 2 | ||||
| -rw-r--r-- | tools/lib/bpf/Makefile | 10 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf.c | 32 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf.h | 8 | ||||
| -rw-r--r-- | tools/lib/bpf/btf.c | 47 | ||||
| -rw-r--r-- | tools/lib/bpf/btf.h | 31 | ||||
| -rw-r--r-- | tools/lib/bpf/btf_dump.c | 871 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 1756 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.h | 76 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.map | 11 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf_internal.h | 113 | ||||
| -rw-r--r-- | tools/lib/bpf/relo_core.c | 1295 | ||||
| -rw-r--r-- | tools/lib/bpf/relo_core.h | 100 | 
13 files changed, 2849 insertions, 1503 deletions
| diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 430f6874fa41..94f0a146bb7b 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,3 @@  libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \  	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ -	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o +	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index ec14aa725bb0..74c3b73a5fbe 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -4,8 +4,9 @@  RM ?= rm  srctree = $(abs_srctree) +VERSION_SCRIPT := libbpf.map  LIBBPF_VERSION := $(shell \ -	grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \ +	grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \  	sort -rV | head -n1 | cut -d'_' -f2)  LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION))) @@ -110,7 +111,6 @@ SHARED_OBJDIR	:= $(OUTPUT)sharedobjs/  STATIC_OBJDIR	:= $(OUTPUT)staticobjs/  BPF_IN_SHARED	:= $(SHARED_OBJDIR)libbpf-in.o  BPF_IN_STATIC	:= $(STATIC_OBJDIR)libbpf-in.o -VERSION_SCRIPT	:= libbpf.map  BPF_HELPER_DEFS	:= $(OUTPUT)bpf_helper_defs.h  LIB_TARGET	:= $(addprefix $(OUTPUT),$(LIB_TARGET)) @@ -163,10 +163,10 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h  $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) -$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) +$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)  	$(QUIET_LINK)$(CC) $(LDFLAGS) \  		--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ -		-Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@ +		-Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@  	@ln -sf $(@F) $(OUTPUT)libbpf.so  	@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) @@ -181,7 +181,7 @@ $(OUTPUT)libbpf.pc:  check: check_abi -check_abi: $(OUTPUT)libbpf.so +check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)  	@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then	 \  		echo "Warning: Num of global symbols in $(BPF_IN_SHARED)"	 \  		     "($(GLOBAL_SYM_COUNT)) does NOT match with num of"	 \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 86dcac44f32f..2401fad090c5 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -684,8 +684,13 @@ int bpf_link_create(int prog_fd, int target_fd,  	iter_info_len = OPTS_GET(opts, iter_info_len, 0);  	target_btf_id = OPTS_GET(opts, target_btf_id, 0); -	if (iter_info_len && target_btf_id) -		return libbpf_err(-EINVAL); +	/* validate we don't have unexpected combinations of non-zero fields */ +	if (iter_info_len || target_btf_id) { +		if (iter_info_len && target_btf_id) +			return libbpf_err(-EINVAL); +		if (!OPTS_ZEROED(opts, target_btf_id)) +			return libbpf_err(-EINVAL); +	}  	memset(&attr, 0, sizeof(attr));  	attr.link_create.prog_fd = prog_fd; @@ -693,14 +698,27 @@ int bpf_link_create(int prog_fd, int target_fd,  	attr.link_create.attach_type = attach_type;  	attr.link_create.flags = OPTS_GET(opts, flags, 0); -	if (iter_info_len) { -		attr.link_create.iter_info = -			ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); -		attr.link_create.iter_info_len = iter_info_len; -	} else if (target_btf_id) { +	if (target_btf_id) {  		attr.link_create.target_btf_id = target_btf_id; +		goto proceed;  	} +	switch (attach_type) { +	case BPF_TRACE_ITER: +		attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); +		attr.link_create.iter_info_len = iter_info_len; +		break; +	case BPF_PERF_EVENT: +		attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0); +		if (!OPTS_ZEROED(opts, perf_event)) +			return libbpf_err(-EINVAL); +		break; +	default: +		if (!OPTS_ZEROED(opts, flags)) +			return libbpf_err(-EINVAL); +		break; +	} +proceed:  	fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));  	return libbpf_err_errno(fd);  } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 4f758f8f50cd..6fffb3cdf39b 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -177,8 +177,14 @@ struct bpf_link_create_opts {  	union bpf_iter_link_info *iter_info;  	__u32 iter_info_len;  	__u32 target_btf_id; +	union { +		struct { +			__u64 bpf_cookie; +		} perf_event; +	}; +	size_t :0;  }; -#define bpf_link_create_opts__last_field target_btf_id +#define bpf_link_create_opts__last_field perf_event  LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,  			       enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7ff3d5ce44f9..77dc24d58302 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1179,7 +1179,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)  static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf__load(struct btf *btf) +int btf__load_into_kernel(struct btf *btf)  {  	__u32 log_buf_size = 0, raw_size;  	char *log_buf = NULL; @@ -1227,6 +1227,7 @@ done:  	free(log_buf);  	return libbpf_err(err);  } +int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));  int btf__fd(const struct btf *btf)  { @@ -1381,21 +1382,35 @@ exit_free:  	return btf;  } -int btf__get_from_id(__u32 id, struct btf **btf) +struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)  { -	struct btf *res; -	int err, btf_fd; +	struct btf *btf; +	int btf_fd; -	*btf = NULL;  	btf_fd = bpf_btf_get_fd_by_id(id);  	if (btf_fd < 0) -		return libbpf_err(-errno); - -	res = btf_get_from_fd(btf_fd, NULL); -	err = libbpf_get_error(res); +		return libbpf_err_ptr(-errno); +	btf = btf_get_from_fd(btf_fd, base_btf);  	close(btf_fd); +	return libbpf_ptr(btf); +} + +struct btf *btf__load_from_kernel_by_id(__u32 id) +{ +	return btf__load_from_kernel_by_id_split(id, NULL); +} + +int btf__get_from_id(__u32 id, struct btf **btf) +{ +	struct btf *res; +	int err; + +	*btf = NULL; +	res = btf__load_from_kernel_by_id(id); +	err = libbpf_get_error(res); +  	if (err)  		return libbpf_err(err); @@ -4020,7 +4035,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)  		 */  		if (d->hypot_adjust_canon)  			continue; -		 +  		if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)  			d->map[t_id] = c_id; @@ -4393,7 +4408,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d)   * Probe few well-known locations for vmlinux kernel image and try to load BTF   * data out of it to use for target BTF.   */ -struct btf *libbpf_find_kernel_btf(void) +struct btf *btf__load_vmlinux_btf(void)  {  	struct {  		const char *path_fmt; @@ -4439,6 +4454,16 @@ struct btf *libbpf_find_kernel_btf(void)  	return libbpf_err_ptr(-ESRCH);  } +struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf"))); + +struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf) +{ +	char path[80]; + +	snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name); +	return btf__parse_split(path, vmlinux_btf); +} +  int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)  {  	int i, n, err; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b54f1c3ebd57..4a711f990904 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -44,8 +44,17 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b  LIBBPF_API struct btf *btf__parse_raw(const char *path);  LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); +LIBBPF_API struct btf *btf__load_vmlinux_btf(void); +LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf); +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + +LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); +LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +  LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);  LIBBPF_API int btf__load(struct btf *btf); +LIBBPF_API int btf__load_into_kernel(struct btf *btf);  LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,  				   const char *type_name);  LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, @@ -66,7 +75,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);  LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);  LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);  LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);  LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,  				    __u32 expected_key_size,  				    __u32 expected_value_size, @@ -89,8 +97,6 @@ int btf_ext__reloc_line_info(const struct btf *btf,  LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);  LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); -LIBBPF_API struct btf *libbpf_find_kernel_btf(void); -  LIBBPF_API int btf__find_str(struct btf *btf, const char *s);  LIBBPF_API int btf__add_str(struct btf *btf, const char *s);  LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, @@ -184,6 +190,25 @@ LIBBPF_API int  btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,  			 const struct btf_dump_emit_type_decl_opts *opts); + +struct btf_dump_type_data_opts { +	/* size of this struct, for forward/backward compatibility */ +	size_t sz; +	const char *indent_str; +	int indent_level; +	/* below match "show" flags for bpf_show_snprintf() */ +	bool compact;		/* no newlines/indentation */ +	bool skip_names;	/* skip member/type names */ +	bool emit_zeroes;	/* show 0-valued fields */ +	size_t :0; +}; +#define btf_dump_type_data_opts__last_field emit_zeroes + +LIBBPF_API int +btf_dump__dump_type_data(struct btf_dump *d, __u32 id, +			 const void *data, size_t data_sz, +			 const struct btf_dump_type_data_opts *opts); +  /*   * A set of helpers for easier BTF types handling   */ diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 5dc6b5172bb3..e4b483f15fb9 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -10,6 +10,8 @@  #include <stddef.h>  #include <stdlib.h>  #include <string.h> +#include <ctype.h> +#include <endian.h>  #include <errno.h>  #include <linux/err.h>  #include <linux/btf.h> @@ -53,6 +55,26 @@ struct btf_dump_type_aux_state {  	__u8 referenced: 1;  }; +/* indent string length; one indent string is added for each indent level */ +#define BTF_DATA_INDENT_STR_LEN			32 + +/* + * Common internal data for BTF type data dump operations. + */ +struct btf_dump_data { +	const void *data_end;		/* end of valid data to show */ +	bool compact; +	bool skip_names; +	bool emit_zeroes; +	__u8 indent_lvl;	/* base indent level */ +	char indent_str[BTF_DATA_INDENT_STR_LEN]; +	/* below are used during iteration */ +	int depth; +	bool is_array_member; +	bool is_array_terminated; +	bool is_array_char; +}; +  struct btf_dump {  	const struct btf *btf;  	const struct btf_ext *btf_ext; @@ -60,6 +82,7 @@ struct btf_dump {  	struct btf_dump_opts opts;  	int ptr_sz;  	bool strip_mods; +	bool skip_anon_defs;  	int last_id;  	/* per-type auxiliary state */ @@ -89,6 +112,10 @@ struct btf_dump {  	 * name occurrences  	 */  	struct hashmap *ident_names; +	/* +	 * data for typed display; allocated if needed. +	 */ +	struct btf_dump_data *typed_dump;  };  static size_t str_hash_fn(const void *key, void *ctx) @@ -765,11 +792,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)  		break;  	case BTF_KIND_FUNC_PROTO: {  		const struct btf_param *p = btf_params(t); -		__u16 vlen = btf_vlen(t); +		__u16 n = btf_vlen(t);  		int i;  		btf_dump_emit_type(d, t->type, cont_id); -		for (i = 0; i < vlen; i++, p++) +		for (i = 0; i < n; i++, p++)  			btf_dump_emit_type(d, p->type, cont_id);  		break; @@ -852,8 +879,9 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,  static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,  				     const struct btf_type *t)  { -	btf_dump_printf(d, "%s %s", +	btf_dump_printf(d, "%s%s%s",  			btf_is_struct(t) ? "struct" : "union", +			t->name_off ? " " : "",  			btf_dump_type_name(d, id));  } @@ -1259,7 +1287,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,  		case BTF_KIND_UNION:  			btf_dump_emit_mods(d, decls);  			/* inline anonymous struct/union */ -			if (t->name_off == 0) +			if (t->name_off == 0 && !d->skip_anon_defs)  				btf_dump_emit_struct_def(d, id, t, lvl);  			else  				btf_dump_emit_struct_fwd(d, id, t); @@ -1267,7 +1295,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,  		case BTF_KIND_ENUM:  			btf_dump_emit_mods(d, decls);  			/* inline anonymous enum */ -			if (t->name_off == 0) +			if (t->name_off == 0 && !d->skip_anon_defs)  				btf_dump_emit_enum_def(d, id, t, lvl);  			else  				btf_dump_emit_enum_fwd(d, id, t); @@ -1392,6 +1420,39 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,  	btf_dump_emit_name(d, fname, last_was_ptr);  } +/* show type name as (type_name) */ +static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id, +				    bool top_level) +{ +	const struct btf_type *t; + +	/* for array members, we don't bother emitting type name for each +	 * member to avoid the redundancy of +	 * .name = (char[4])[(char)'f',(char)'o',(char)'o',] +	 */ +	if (d->typed_dump->is_array_member) +		return; + +	/* avoid type name specification for variable/section; it will be done +	 * for the associated variable value(s). +	 */ +	t = btf__type_by_id(d->btf, id); +	if (btf_is_var(t) || btf_is_datasec(t)) +		return; + +	if (top_level) +		btf_dump_printf(d, "("); + +	d->skip_anon_defs = true; +	d->strip_mods = true; +	btf_dump_emit_type_decl(d, id, "", 0); +	d->strip_mods = false; +	d->skip_anon_defs = false; + +	if (top_level) +		btf_dump_printf(d, ")"); +} +  /* return number of duplicates (occurrences) of a given name */  static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,  				 const char *orig_name) @@ -1442,3 +1503,803 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)  {  	return btf_dump_resolve_name(d, id, d->ident_names);  } + +static int btf_dump_dump_type_data(struct btf_dump *d, +				   const char *fname, +				   const struct btf_type *t, +				   __u32 id, +				   const void *data, +				   __u8 bits_offset, +				   __u8 bit_sz); + +static const char *btf_dump_data_newline(struct btf_dump *d) +{ +	return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n"; +} + +static const char *btf_dump_data_delim(struct btf_dump *d) +{ +	return d->typed_dump->depth == 0 ? "" : ","; +} + +static void btf_dump_data_pfx(struct btf_dump *d) +{ +	int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth; + +	if (d->typed_dump->compact) +		return; + +	for (i = 0; i < lvl; i++) +		btf_dump_printf(d, "%s", d->typed_dump->indent_str); +} + +/* A macro is used here as btf_type_value[s]() appends format specifiers + * to the format specifier passed in; these do the work of appending + * delimiters etc while the caller simply has to specify the type values + * in the format specifier + value(s). + */ +#define btf_dump_type_values(d, fmt, ...)				\ +	btf_dump_printf(d, fmt "%s%s",					\ +			##__VA_ARGS__,					\ +			btf_dump_data_delim(d),				\ +			btf_dump_data_newline(d)) + +static int btf_dump_unsupported_data(struct btf_dump *d, +				     const struct btf_type *t, +				     __u32 id) +{ +	btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t)); +	return -ENOTSUP; +} + +static int btf_dump_get_bitfield_value(struct btf_dump *d, +				       const struct btf_type *t, +				       const void *data, +				       __u8 bits_offset, +				       __u8 bit_sz, +				       __u64 *value) +{ +	__u16 left_shift_bits, right_shift_bits; +	__u8 nr_copy_bits, nr_copy_bytes; +	const __u8 *bytes = data; +	int sz = t->size; +	__u64 num = 0; +	int i; + +	/* Maximum supported bitfield size is 64 bits */ +	if (sz > 8) { +		pr_warn("unexpected bitfield size %d\n", sz); +		return -EINVAL; +	} + +	/* Bitfield value retrieval is done in two steps; first relevant bytes are +	 * stored in num, then we left/right shift num to eliminate irrelevant bits. +	 */ +	nr_copy_bits = bit_sz + bits_offset; +	nr_copy_bytes = t->size; +#if __BYTE_ORDER == __LITTLE_ENDIAN +	for (i = nr_copy_bytes - 1; i >= 0; i--) +		num = num * 256 + bytes[i]; +#elif __BYTE_ORDER == __BIG_ENDIAN +	for (i = 0; i < nr_copy_bytes; i++) +		num = num * 256 + bytes[i]; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif +	left_shift_bits = 64 - nr_copy_bits; +	right_shift_bits = 64 - bit_sz; + +	*value = (num << left_shift_bits) >> right_shift_bits; + +	return 0; +} + +static int btf_dump_bitfield_check_zero(struct btf_dump *d, +					const struct btf_type *t, +					const void *data, +					__u8 bits_offset, +					__u8 bit_sz) +{ +	__u64 check_num; +	int err; + +	err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num); +	if (err) +		return err; +	if (check_num == 0) +		return -ENODATA; +	return 0; +} + +static int btf_dump_bitfield_data(struct btf_dump *d, +				  const struct btf_type *t, +				  const void *data, +				  __u8 bits_offset, +				  __u8 bit_sz) +{ +	__u64 print_num; +	int err; + +	err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num); +	if (err) +		return err; + +	btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num); + +	return 0; +} + +/* ints, floats and ptrs */ +static int btf_dump_base_type_check_zero(struct btf_dump *d, +					 const struct btf_type *t, +					 __u32 id, +					 const void *data) +{ +	static __u8 bytecmp[16] = {}; +	int nr_bytes; + +	/* For pointer types, pointer size is not defined on a per-type basis. +	 * On dump creation however, we store the pointer size. +	 */ +	if (btf_kind(t) == BTF_KIND_PTR) +		nr_bytes = d->ptr_sz; +	else +		nr_bytes = t->size; + +	if (nr_bytes < 1 || nr_bytes > 16) { +		pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id); +		return -EINVAL; +	} + +	if (memcmp(data, bytecmp, nr_bytes) == 0) +		return -ENODATA; +	return 0; +} + +static bool ptr_is_aligned(const void *data, int data_sz) +{ +	return ((uintptr_t)data) % data_sz == 0; +} + +static int btf_dump_int_data(struct btf_dump *d, +			     const struct btf_type *t, +			     __u32 type_id, +			     const void *data, +			     __u8 bits_offset) +{ +	__u8 encoding = btf_int_encoding(t); +	bool sign = encoding & BTF_INT_SIGNED; +	int sz = t->size; + +	if (sz == 0) { +		pr_warn("unexpected size %d for id [%u]\n", sz, type_id); +		return -EINVAL; +	} + +	/* handle packed int data - accesses of integers not aligned on +	 * int boundaries can cause problems on some platforms. +	 */ +	if (!ptr_is_aligned(data, sz)) +		return btf_dump_bitfield_data(d, t, data, 0, 0); + +	switch (sz) { +	case 16: { +		const __u64 *ints = data; +		__u64 lsi, msi; + +		/* avoid use of __int128 as some 32-bit platforms do not +		 * support it. +		 */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +		lsi = ints[0]; +		msi = ints[1]; +#elif __BYTE_ORDER == __BIG_ENDIAN +		lsi = ints[1]; +		msi = ints[0]; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif +		if (msi == 0) +			btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi); +		else +			btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi, +					     (unsigned long long)lsi); +		break; +	} +	case 8: +		if (sign) +			btf_dump_type_values(d, "%lld", *(long long *)data); +		else +			btf_dump_type_values(d, "%llu", *(unsigned long long *)data); +		break; +	case 4: +		if (sign) +			btf_dump_type_values(d, "%d", *(__s32 *)data); +		else +			btf_dump_type_values(d, "%u", *(__u32 *)data); +		break; +	case 2: +		if (sign) +			btf_dump_type_values(d, "%d", *(__s16 *)data); +		else +			btf_dump_type_values(d, "%u", *(__u16 *)data); +		break; +	case 1: +		if (d->typed_dump->is_array_char) { +			/* check for null terminator */ +			if (d->typed_dump->is_array_terminated) +				break; +			if (*(char *)data == '\0') { +				d->typed_dump->is_array_terminated = true; +				break; +			} +			if (isprint(*(char *)data)) { +				btf_dump_type_values(d, "'%c'", *(char *)data); +				break; +			} +		} +		if (sign) +			btf_dump_type_values(d, "%d", *(__s8 *)data); +		else +			btf_dump_type_values(d, "%u", *(__u8 *)data); +		break; +	default: +		pr_warn("unexpected sz %d for id [%u]\n", sz, type_id); +		return -EINVAL; +	} +	return 0; +} + +union float_data { +	long double ld; +	double d; +	float f; +}; + +static int btf_dump_float_data(struct btf_dump *d, +			       const struct btf_type *t, +			       __u32 type_id, +			       const void *data) +{ +	const union float_data *flp = data; +	union float_data fl; +	int sz = t->size; + +	/* handle unaligned data; copy to local union */ +	if (!ptr_is_aligned(data, sz)) { +		memcpy(&fl, data, sz); +		flp = &fl; +	} + +	switch (sz) { +	case 16: +		btf_dump_type_values(d, "%Lf", flp->ld); +		break; +	case 8: +		btf_dump_type_values(d, "%lf", flp->d); +		break; +	case 4: +		btf_dump_type_values(d, "%f", flp->f); +		break; +	default: +		pr_warn("unexpected size %d for id [%u]\n", sz, type_id); +		return -EINVAL; +	} +	return 0; +} + +static int btf_dump_var_data(struct btf_dump *d, +			     const struct btf_type *v, +			     __u32 id, +			     const void *data) +{ +	enum btf_func_linkage linkage = btf_var(v)->linkage; +	const struct btf_type *t; +	const char *l; +	__u32 type_id; + +	switch (linkage) { +	case BTF_FUNC_STATIC: +		l = "static "; +		break; +	case BTF_FUNC_EXTERN: +		l = "extern "; +		break; +	case BTF_FUNC_GLOBAL: +	default: +		l = ""; +		break; +	} + +	/* format of output here is [linkage] [type] [varname] = (type)value, +	 * for example "static int cpu_profile_flip = (int)1" +	 */ +	btf_dump_printf(d, "%s", l); +	type_id = v->type; +	t = btf__type_by_id(d->btf, type_id); +	btf_dump_emit_type_cast(d, type_id, false); +	btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off)); +	return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); +} + +static int btf_dump_array_data(struct btf_dump *d, +			       const struct btf_type *t, +			       __u32 id, +			       const void *data) +{ +	const struct btf_array *array = btf_array(t); +	const struct btf_type *elem_type; +	__u32 i, elem_size = 0, elem_type_id; +	bool is_array_member; + +	elem_type_id = array->type; +	elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); +	elem_size = btf__resolve_size(d->btf, elem_type_id); +	if (elem_size <= 0) { +		pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); +		return -EINVAL; +	} + +	if (btf_is_int(elem_type)) { +		/* +		 * BTF_INT_CHAR encoding never seems to be set for +		 * char arrays, so if size is 1 and element is +		 * printable as a char, we'll do that. +		 */ +		if (elem_size == 1) +			d->typed_dump->is_array_char = true; +	} + +	/* note that we increment depth before calling btf_dump_print() below; +	 * this is intentional.  btf_dump_data_newline() will not print a +	 * newline for depth 0 (since this leaves us with trailing newlines +	 * at the end of typed display), so depth is incremented first. +	 * For similar reasons, we decrement depth before showing the closing +	 * parenthesis. +	 */ +	d->typed_dump->depth++; +	btf_dump_printf(d, "[%s", btf_dump_data_newline(d)); + +	/* may be a multidimensional array, so store current "is array member" +	 * status so we can restore it correctly later. +	 */ +	is_array_member = d->typed_dump->is_array_member; +	d->typed_dump->is_array_member = true; +	for (i = 0; i < array->nelems; i++, data += elem_size) { +		if (d->typed_dump->is_array_terminated) +			break; +		btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0); +	} +	d->typed_dump->is_array_member = is_array_member; +	d->typed_dump->depth--; +	btf_dump_data_pfx(d); +	btf_dump_type_values(d, "]"); + +	return 0; +} + +static int btf_dump_struct_data(struct btf_dump *d, +				const struct btf_type *t, +				__u32 id, +				const void *data) +{ +	const struct btf_member *m = btf_members(t); +	__u16 n = btf_vlen(t); +	int i, err; + +	/* note that we increment depth before calling btf_dump_print() below; +	 * this is intentional.  btf_dump_data_newline() will not print a +	 * newline for depth 0 (since this leaves us with trailing newlines +	 * at the end of typed display), so depth is incremented first. +	 * For similar reasons, we decrement depth before showing the closing +	 * parenthesis. +	 */ +	d->typed_dump->depth++; +	btf_dump_printf(d, "{%s", btf_dump_data_newline(d)); + +	for (i = 0; i < n; i++, m++) { +		const struct btf_type *mtype; +		const char *mname; +		__u32 moffset; +		__u8 bit_sz; + +		mtype = btf__type_by_id(d->btf, m->type); +		mname = btf_name_of(d, m->name_off); +		moffset = btf_member_bit_offset(t, i); + +		bit_sz = btf_member_bitfield_size(t, i); +		err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8, +					      moffset % 8, bit_sz); +		if (err < 0) +			return err; +	} +	d->typed_dump->depth--; +	btf_dump_data_pfx(d); +	btf_dump_type_values(d, "}"); +	return err; +} + +union ptr_data { +	unsigned int p; +	unsigned long long lp; +}; + +static int btf_dump_ptr_data(struct btf_dump *d, +			      const struct btf_type *t, +			      __u32 id, +			      const void *data) +{ +	if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) { +		btf_dump_type_values(d, "%p", *(void **)data); +	} else { +		union ptr_data pt; + +		memcpy(&pt, data, d->ptr_sz); +		if (d->ptr_sz == 4) +			btf_dump_type_values(d, "0x%x", pt.p); +		else +			btf_dump_type_values(d, "0x%llx", pt.lp); +	} +	return 0; +} + +static int btf_dump_get_enum_value(struct btf_dump *d, +				   const struct btf_type *t, +				   const void *data, +				   __u32 id, +				   __s64 *value) +{ +	int sz = t->size; + +	/* handle unaligned enum value */ +	if (!ptr_is_aligned(data, sz)) { +		__u64 val; +		int err; + +		err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val); +		if (err) +			return err; +		*value = (__s64)val; +		return 0; +	} + +	switch (t->size) { +	case 8: +		*value = *(__s64 *)data; +		return 0; +	case 4: +		*value = *(__s32 *)data; +		return 0; +	case 2: +		*value = *(__s16 *)data; +		return 0; +	case 1: +		*value = *(__s8 *)data; +		return 0; +	default: +		pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id); +		return -EINVAL; +	} +} + +static int btf_dump_enum_data(struct btf_dump *d, +			      const struct btf_type *t, +			      __u32 id, +			      const void *data) +{ +	const struct btf_enum *e; +	__s64 value; +	int i, err; + +	err = btf_dump_get_enum_value(d, t, data, id, &value); +	if (err) +		return err; + +	for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { +		if (value != e->val) +			continue; +		btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); +		return 0; +	} + +	btf_dump_type_values(d, "%d", value); +	return 0; +} + +static int btf_dump_datasec_data(struct btf_dump *d, +				 const struct btf_type *t, +				 __u32 id, +				 const void *data) +{ +	const struct btf_var_secinfo *vsi; +	const struct btf_type *var; +	__u32 i; +	int err; + +	btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off)); + +	for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) { +		var = btf__type_by_id(d->btf, vsi->type); +		err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0); +		if (err < 0) +			return err; +		btf_dump_printf(d, ";"); +	} +	return 0; +} + +/* return size of type, or if base type overflows, return -E2BIG. */ +static int btf_dump_type_data_check_overflow(struct btf_dump *d, +					     const struct btf_type *t, +					     __u32 id, +					     const void *data, +					     __u8 bits_offset) +{ +	__s64 size = btf__resolve_size(d->btf, id); + +	if (size < 0 || size >= INT_MAX) { +		pr_warn("unexpected size [%zu] for id [%u]\n", +			(size_t)size, id); +		return -EINVAL; +	} + +	/* Only do overflow checking for base types; we do not want to +	 * avoid showing part of a struct, union or array, even if we +	 * do not have enough data to show the full object.  By +	 * restricting overflow checking to base types we can ensure +	 * that partial display succeeds, while avoiding overflowing +	 * and using bogus data for display. +	 */ +	t = skip_mods_and_typedefs(d->btf, id, NULL); +	if (!t) { +		pr_warn("unexpected error skipping mods/typedefs for id [%u]\n", +			id); +		return -EINVAL; +	} + +	switch (btf_kind(t)) { +	case BTF_KIND_INT: +	case BTF_KIND_FLOAT: +	case BTF_KIND_PTR: +	case BTF_KIND_ENUM: +		if (data + bits_offset / 8 + size > d->typed_dump->data_end) +			return -E2BIG; +		break; +	default: +		break; +	} +	return (int)size; +} + +static int btf_dump_type_data_check_zero(struct btf_dump *d, +					 const struct btf_type *t, +					 __u32 id, +					 const void *data, +					 __u8 bits_offset, +					 __u8 bit_sz) +{ +	__s64 value; +	int i, err; + +	/* toplevel exceptions; we show zero values if +	 * - we ask for them (emit_zeros) +	 * - if we are at top-level so we see "struct empty { }" +	 * - or if we are an array member and the array is non-empty and +	 *   not a char array; we don't want to be in a situation where we +	 *   have an integer array 0, 1, 0, 1 and only show non-zero values. +	 *   If the array contains zeroes only, or is a char array starting +	 *   with a '\0', the array-level check_zero() will prevent showing it; +	 *   we are concerned with determining zero value at the array member +	 *   level here. +	 */ +	if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 || +	    (d->typed_dump->is_array_member && +	     !d->typed_dump->is_array_char)) +		return 0; + +	t = skip_mods_and_typedefs(d->btf, id, NULL); + +	switch (btf_kind(t)) { +	case BTF_KIND_INT: +		if (bit_sz) +			return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz); +		return btf_dump_base_type_check_zero(d, t, id, data); +	case BTF_KIND_FLOAT: +	case BTF_KIND_PTR: +		return btf_dump_base_type_check_zero(d, t, id, data); +	case BTF_KIND_ARRAY: { +		const struct btf_array *array = btf_array(t); +		const struct btf_type *elem_type; +		__u32 elem_type_id, elem_size; +		bool ischar; + +		elem_type_id = array->type; +		elem_size = btf__resolve_size(d->btf, elem_type_id); +		elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); + +		ischar = btf_is_int(elem_type) && elem_size == 1; + +		/* check all elements; if _any_ element is nonzero, all +		 * of array is displayed.  We make an exception however +		 * for char arrays where the first element is 0; these +		 * are considered zeroed also, even if later elements are +		 * non-zero because the string is terminated. +		 */ +		for (i = 0; i < array->nelems; i++) { +			if (i == 0 && ischar && *(char *)data == 0) +				return -ENODATA; +			err = btf_dump_type_data_check_zero(d, elem_type, +							    elem_type_id, +							    data + +							    (i * elem_size), +							    bits_offset, 0); +			if (err != -ENODATA) +				return err; +		} +		return -ENODATA; +	} +	case BTF_KIND_STRUCT: +	case BTF_KIND_UNION: { +		const struct btf_member *m = btf_members(t); +		__u16 n = btf_vlen(t); + +		/* if any struct/union member is non-zero, the struct/union +		 * is considered non-zero and dumped. +		 */ +		for (i = 0; i < n; i++, m++) { +			const struct btf_type *mtype; +			__u32 moffset; + +			mtype = btf__type_by_id(d->btf, m->type); +			moffset = btf_member_bit_offset(t, i); + +			/* btf_int_bits() does not store member bitfield size; +			 * bitfield size needs to be stored here so int display +			 * of member can retrieve it. +			 */ +			bit_sz = btf_member_bitfield_size(t, i); +			err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8, +							    moffset % 8, bit_sz); +			if (err != ENODATA) +				return err; +		} +		return -ENODATA; +	} +	case BTF_KIND_ENUM: +		err = btf_dump_get_enum_value(d, t, data, id, &value); +		if (err) +			return err; +		if (value == 0) +			return -ENODATA; +		return 0; +	default: +		return 0; +	} +} + +/* returns size of data dumped, or error. */ +static int btf_dump_dump_type_data(struct btf_dump *d, +				   const char *fname, +				   const struct btf_type *t, +				   __u32 id, +				   const void *data, +				   __u8 bits_offset, +				   __u8 bit_sz) +{ +	int size, err; + +	size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); +	if (size < 0) +		return size; +	err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); +	if (err) { +		/* zeroed data is expected and not an error, so simply skip +		 * dumping such data.  Record other errors however. +		 */ +		if (err == -ENODATA) +			return size; +		return err; +	} +	btf_dump_data_pfx(d); + +	if (!d->typed_dump->skip_names) { +		if (fname && strlen(fname) > 0) +			btf_dump_printf(d, ".%s = ", fname); +		btf_dump_emit_type_cast(d, id, true); +	} + +	t = skip_mods_and_typedefs(d->btf, id, NULL); + +	switch (btf_kind(t)) { +	case BTF_KIND_UNKN: +	case BTF_KIND_FWD: +	case BTF_KIND_FUNC: +	case BTF_KIND_FUNC_PROTO: +		err = btf_dump_unsupported_data(d, t, id); +		break; +	case BTF_KIND_INT: +		if (bit_sz) +			err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz); +		else +			err = btf_dump_int_data(d, t, id, data, bits_offset); +		break; +	case BTF_KIND_FLOAT: +		err = btf_dump_float_data(d, t, id, data); +		break; +	case BTF_KIND_PTR: +		err = btf_dump_ptr_data(d, t, id, data); +		break; +	case BTF_KIND_ARRAY: +		err = btf_dump_array_data(d, t, id, data); +		break; +	case BTF_KIND_STRUCT: +	case BTF_KIND_UNION: +		err = btf_dump_struct_data(d, t, id, data); +		break; +	case BTF_KIND_ENUM: +		/* handle bitfield and int enum values */ +		if (bit_sz) { +			__u64 print_num; +			__s64 enum_val; + +			err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, +							  &print_num); +			if (err) +				break; +			enum_val = (__s64)print_num; +			err = btf_dump_enum_data(d, t, id, &enum_val); +		} else +			err = btf_dump_enum_data(d, t, id, data); +		break; +	case BTF_KIND_VAR: +		err = btf_dump_var_data(d, t, id, data); +		break; +	case BTF_KIND_DATASEC: +		err = btf_dump_datasec_data(d, t, id, data); +		break; +	default: +		pr_warn("unexpected kind [%u] for id [%u]\n", +			BTF_INFO_KIND(t->info), id); +		return -EINVAL; +	} +	if (err < 0) +		return err; +	return size; +} + +int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, +			     const void *data, size_t data_sz, +			     const struct btf_dump_type_data_opts *opts) +{ +	struct btf_dump_data typed_dump = {}; +	const struct btf_type *t; +	int ret; + +	if (!OPTS_VALID(opts, btf_dump_type_data_opts)) +		return libbpf_err(-EINVAL); + +	t = btf__type_by_id(d->btf, id); +	if (!t) +		return libbpf_err(-ENOENT); + +	d->typed_dump = &typed_dump; +	d->typed_dump->data_end = data + data_sz; +	d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0); + +	/* default indent string is a tab */ +	if (!opts->indent_str) +		d->typed_dump->indent_str[0] = '\t'; +	else +		strncat(d->typed_dump->indent_str, opts->indent_str, +			sizeof(d->typed_dump->indent_str) - 1); + +	d->typed_dump->compact = OPTS_GET(opts, compact, false); +	d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); +	d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); + +	ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); + +	d->typed_dump = NULL; + +	return libbpf_err(ret); +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6f5e2757bb3c..88d8825fc6f6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -193,6 +193,8 @@ enum kern_feature_id {  	FEAT_MODULE_BTF,  	/* BTF_KIND_FLOAT support */  	FEAT_BTF_FLOAT, +	/* BPF perf link support */ +	FEAT_PERF_LINK,  	__FEAT_CNT,  }; @@ -498,6 +500,10 @@ struct bpf_object {  	 * it at load time.  	 */  	struct btf *btf_vmlinux; +	/* Path to the custom BTF to be used for BPF CO-RE relocations as an +	 * override for vmlinux BTF. +	 */ +	char *btf_custom_path;  	/* vmlinux BTF override for CO-RE relocations */  	struct btf *btf_vmlinux_override;  	/* Lazily initialized kernel module BTFs */ @@ -591,11 +597,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)  	       insn->off == 0;  } -static bool is_ldimm64_insn(struct bpf_insn *insn) -{ -	return insn->code == (BPF_LD | BPF_IMM | BPF_DW); -} -  static bool is_call_insn(const struct bpf_insn *insn)  {  	return insn->code == (BPF_JMP | BPF_CALL); @@ -2645,8 +2646,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)  	struct bpf_program *prog;  	int i; -	/* CO-RE relocations need kernel BTF */ -	if (obj->btf_ext && obj->btf_ext->core_relo_info.len) +	/* CO-RE relocations need kernel BTF, only when btf_custom_path +	 * is not specified +	 */ +	if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)  		return true;  	/* Support for typed ksyms needs kernel BTF */ @@ -2679,7 +2682,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)  	if (!force && !obj_needs_vmlinux_btf(obj))  		return 0; -	obj->btf_vmlinux = libbpf_find_kernel_btf(); +	obj->btf_vmlinux = btf__load_vmlinux_btf();  	err = libbpf_get_error(obj->btf_vmlinux);  	if (err) {  		pr_warn("Error loading vmlinux BTF: %d\n", err); @@ -2768,7 +2771,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)  		 */  		btf__set_fd(kern_btf, 0);  	} else { -		err = btf__load(kern_btf); +		err = btf__load_into_kernel(kern_btf);  	}  	if (sanitize) {  		if (!err) { @@ -3894,6 +3897,42 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)  	return 0;  } +static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) +{ +	char file[PATH_MAX], buff[4096]; +	FILE *fp; +	__u32 val; +	int err; + +	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); +	memset(info, 0, sizeof(*info)); + +	fp = fopen(file, "r"); +	if (!fp) { +		err = -errno; +		pr_warn("failed to open %s: %d. No procfs support?\n", file, +			err); +		return err; +	} + +	while (fgets(buff, sizeof(buff), fp)) { +		if (sscanf(buff, "map_type:\t%u", &val) == 1) +			info->type = val; +		else if (sscanf(buff, "key_size:\t%u", &val) == 1) +			info->key_size = val; +		else if (sscanf(buff, "value_size:\t%u", &val) == 1) +			info->value_size = val; +		else if (sscanf(buff, "max_entries:\t%u", &val) == 1) +			info->max_entries = val; +		else if (sscanf(buff, "map_flags:\t%i", &val) == 1) +			info->map_flags = val; +	} + +	fclose(fp); + +	return 0; +} +  int bpf_map__reuse_fd(struct bpf_map *map, int fd)  {  	struct bpf_map_info info = {}; @@ -3902,6 +3941,8 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)  	char *new_name;  	err = bpf_obj_get_info_by_fd(fd, &info, &len); +	if (err && errno == EINVAL) +		err = bpf_get_map_info_from_fdinfo(fd, &info);  	if (err)  		return libbpf_err(err); @@ -4298,6 +4339,37 @@ static int probe_module_btf(void)  	return !err;  } +static int probe_perf_link(void) +{ +	struct bpf_load_program_attr attr; +	struct bpf_insn insns[] = { +		BPF_MOV64_IMM(BPF_REG_0, 0), +		BPF_EXIT_INSN(), +	}; +	int prog_fd, link_fd, err; + +	memset(&attr, 0, sizeof(attr)); +	attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; +	attr.insns = insns; +	attr.insns_cnt = ARRAY_SIZE(insns); +	attr.license = "GPL"; +	prog_fd = bpf_load_program_xattr(&attr, NULL, 0); +	if (prog_fd < 0) +		return -errno; + +	/* use invalid perf_event FD to get EBADF, if link is supported; +	 * otherwise EINVAL should be returned +	 */ +	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); +	err = -errno; /* close() can clobber errno */ + +	if (link_fd >= 0) +		close(link_fd); +	close(prog_fd); + +	return link_fd < 0 && err == -EBADF; +} +  enum kern_feature_result {  	FEAT_UNKNOWN = 0,  	FEAT_SUPPORTED = 1, @@ -4348,6 +4420,9 @@ static struct kern_feature_desc {  	[FEAT_BTF_FLOAT] = {  		"BTF_KIND_FLOAT support", probe_kern_btf_float,  	}, +	[FEAT_PERF_LINK] = { +		"BPF perf link support", probe_perf_link, +	},  };  static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4381,12 +4456,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)  	struct bpf_map_info map_info = {};  	char msg[STRERR_BUFSIZE];  	__u32 map_info_len; +	int err;  	map_info_len = sizeof(map_info); -	if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { -		pr_warn("failed to get map info for map FD %d: %s\n", -			map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); +	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); +	if (err && errno == EINVAL) +		err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); +	if (err) { +		pr_warn("failed to get map info for map FD %d: %s\n", map_fd, +			libbpf_strerror_r(errno, msg, sizeof(msg)));  		return false;  	} @@ -4479,6 +4558,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  {  	struct bpf_create_map_attr create_attr;  	struct bpf_map_def *def = &map->def; +	int err = 0;  	memset(&create_attr, 0, sizeof(create_attr)); @@ -4521,8 +4601,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  	if (bpf_map_type__is_map_in_map(def->type)) {  		if (map->inner_map) { -			int err; -  			err = bpf_object__create_map(obj, map->inner_map, true);  			if (err) {  				pr_warn("map '%s': failed to create inner map: %d\n", @@ -4547,8 +4625,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  	if (map->fd < 0 && (create_attr.btf_key_type_id ||  			    create_attr.btf_value_type_id)) {  		char *cp, errmsg[STRERR_BUFSIZE]; -		int err = -errno; +		err = -errno;  		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));  		pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",  			map->name, cp, err); @@ -4560,8 +4638,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  		map->fd = bpf_create_map_xattr(&create_attr);  	} -	if (map->fd < 0) -		return -errno; +	err = map->fd < 0 ? -errno : 0;  	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {  		if (obj->gen_loader) @@ -4570,7 +4647,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  		zfree(&map->inner_map);  	} -	return 0; +	return err;  }  static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) @@ -4616,10 +4693,13 @@ bpf_object__create_maps(struct bpf_object *obj)  	char *cp, errmsg[STRERR_BUFSIZE];  	unsigned int i, j;  	int err; +	bool retried;  	for (i = 0; i < obj->nr_maps; i++) {  		map = &obj->maps[i]; +		retried = false; +retry:  		if (map->pin_path) {  			err = bpf_object__reuse_map(map);  			if (err) { @@ -4627,6 +4707,12 @@ bpf_object__create_maps(struct bpf_object *obj)  					map->name);  				goto err_out;  			} +			if (retried && map->fd < 0) { +				pr_warn("map '%s': cannot find pinned map\n", +					map->name); +				err = -ENOENT; +				goto err_out; +			}  		}  		if (map->fd >= 0) { @@ -4660,9 +4746,13 @@ bpf_object__create_maps(struct bpf_object *obj)  		if (map->pin_path && !map->pinned) {  			err = bpf_map__pin(map, NULL);  			if (err) { +				zclose(map->fd); +				if (!retried && err == -EEXIST) { +					retried = true; +					goto retry; +				}  				pr_warn("map '%s': failed to auto-pin at '%s': %d\n",  					map->name, map->pin_path, err); -				zclose(map->fd);  				goto err_out;  			}  		} @@ -4679,279 +4769,6 @@ err_out:  	return err;  } -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { -	__u32 type_id;		/* struct/union type or array element type */ -	__u32 idx;		/* field index or array index */ -	const char *name;	/* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { -	const struct btf *btf; -	/* high-level spec: named fields and array indices only */ -	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; -	/* original unresolved (no skip_mods_or_typedefs) root type ID */ -	__u32 root_type_id; -	/* CO-RE relocation kind */ -	enum bpf_core_relo_kind relo_kind; -	/* high-level spec length */ -	int len; -	/* raw, low-level spec: 1-to-1 with accessor spec string */ -	int raw_spec[BPF_CORE_SPEC_MAX_LEN]; -	/* raw spec length */ -	int raw_len; -	/* field bit offset represented by spec */ -	__u32 bit_offset; -}; - -static bool str_is_empty(const char *s) -{ -	return !s || !s[0]; -} - -static bool is_flex_arr(const struct btf *btf, -			const struct bpf_core_accessor *acc, -			const struct btf_array *arr) -{ -	const struct btf_type *t; - -	/* not a flexible array, if not inside a struct or has non-zero size */ -	if (!acc->name || arr->nelems > 0) -		return false; - -	/* has to be the last member of enclosing struct */ -	t = btf__type_by_id(btf, acc->type_id); -	return acc->idx == btf_vlen(t) - 1; -} - -static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) -{ -	switch (kind) { -	case BPF_FIELD_BYTE_OFFSET: return "byte_off"; -	case BPF_FIELD_BYTE_SIZE: return "byte_sz"; -	case BPF_FIELD_EXISTS: return "field_exists"; -	case BPF_FIELD_SIGNED: return "signed"; -	case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; -	case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; -	case BPF_TYPE_ID_LOCAL: return "local_type_id"; -	case BPF_TYPE_ID_TARGET: return "target_type_id"; -	case BPF_TYPE_EXISTS: return "type_exists"; -	case BPF_TYPE_SIZE: return "type_size"; -	case BPF_ENUMVAL_EXISTS: return "enumval_exists"; -	case BPF_ENUMVAL_VALUE: return "enumval_value"; -	default: return "unknown"; -	} -} - -static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) -{ -	switch (kind) { -	case BPF_FIELD_BYTE_OFFSET: -	case BPF_FIELD_BYTE_SIZE: -	case BPF_FIELD_EXISTS: -	case BPF_FIELD_SIGNED: -	case BPF_FIELD_LSHIFT_U64: -	case BPF_FIELD_RSHIFT_U64: -		return true; -	default: -		return false; -	} -} - -static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) -{ -	switch (kind) { -	case BPF_TYPE_ID_LOCAL: -	case BPF_TYPE_ID_TARGET: -	case BPF_TYPE_EXISTS: -	case BPF_TYPE_SIZE: -		return true; -	default: -		return false; -	} -} - -static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) -{ -	switch (kind) { -	case BPF_ENUMVAL_EXISTS: -	case BPF_ENUMVAL_VALUE: -		return true; -	default: -		return false; -	} -} - -/* - * Turn bpf_core_relo into a low- and high-level spec representation, - * validating correctness along the way, as well as calculating resulting - * field bit offset, specified by accessor string. Low-level spec captures - * every single level of nestedness, including traversing anonymous - * struct/union members. High-level one only captures semantically meaningful - * "turning points": named fields and array indicies. - * E.g., for this case: - * - *   struct sample { - *       int __unimportant; - *       struct { - *           int __1; - *           int __2; - *           int a[7]; - *       }; - *   }; - * - *   struct sample *s = ...; - * - *   int x = &s->a[3]; // access string = '0:1:2:3' - * - * Low-level spec has 1:1 mapping with each element of access string (it's - * just a parsed access string representation): [0, 1, 2, 3]. - * - * High-level spec will capture only 3 points: - *   - intial zero-index access by pointer (&s->... is the same as &s[0]...); - *   - field 'a' access (corresponds to '2' in low-level spec); - *   - array element #3 access (corresponds to '3' in low-level spec). - * - * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, - * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their - * spec and raw_spec are kept empty. - * - * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access - * string to specify enumerator's value index that need to be relocated. - */ -static int bpf_core_parse_spec(const struct btf *btf, -			       __u32 type_id, -			       const char *spec_str, -			       enum bpf_core_relo_kind relo_kind, -			       struct bpf_core_spec *spec) -{ -	int access_idx, parsed_len, i; -	struct bpf_core_accessor *acc; -	const struct btf_type *t; -	const char *name; -	__u32 id; -	__s64 sz; - -	if (str_is_empty(spec_str) || *spec_str == ':') -		return -EINVAL; - -	memset(spec, 0, sizeof(*spec)); -	spec->btf = btf; -	spec->root_type_id = type_id; -	spec->relo_kind = relo_kind; - -	/* type-based relocations don't have a field access string */ -	if (core_relo_is_type_based(relo_kind)) { -		if (strcmp(spec_str, "0")) -			return -EINVAL; -		return 0; -	} - -	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ -	while (*spec_str) { -		if (*spec_str == ':') -			++spec_str; -		if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) -			return -EINVAL; -		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) -			return -E2BIG; -		spec_str += parsed_len; -		spec->raw_spec[spec->raw_len++] = access_idx; -	} - -	if (spec->raw_len == 0) -		return -EINVAL; - -	t = skip_mods_and_typedefs(btf, type_id, &id); -	if (!t) -		return -EINVAL; - -	access_idx = spec->raw_spec[0]; -	acc = &spec->spec[0]; -	acc->type_id = id; -	acc->idx = access_idx; -	spec->len++; - -	if (core_relo_is_enumval_based(relo_kind)) { -		if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) -			return -EINVAL; - -		/* record enumerator name in a first accessor */ -		acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); -		return 0; -	} - -	if (!core_relo_is_field_based(relo_kind)) -		return -EINVAL; - -	sz = btf__resolve_size(btf, id); -	if (sz < 0) -		return sz; -	spec->bit_offset = access_idx * sz * 8; - -	for (i = 1; i < spec->raw_len; i++) { -		t = skip_mods_and_typedefs(btf, id, &id); -		if (!t) -			return -EINVAL; - -		access_idx = spec->raw_spec[i]; -		acc = &spec->spec[spec->len]; - -		if (btf_is_composite(t)) { -			const struct btf_member *m; -			__u32 bit_offset; - -			if (access_idx >= btf_vlen(t)) -				return -EINVAL; - -			bit_offset = btf_member_bit_offset(t, access_idx); -			spec->bit_offset += bit_offset; - -			m = btf_members(t) + access_idx; -			if (m->name_off) { -				name = btf__name_by_offset(btf, m->name_off); -				if (str_is_empty(name)) -					return -EINVAL; - -				acc->type_id = id; -				acc->idx = access_idx; -				acc->name = name; -				spec->len++; -			} - -			id = m->type; -		} else if (btf_is_array(t)) { -			const struct btf_array *a = btf_array(t); -			bool flex; - -			t = skip_mods_and_typedefs(btf, a->type, &id); -			if (!t) -				return -EINVAL; - -			flex = is_flex_arr(btf, acc - 1, a); -			if (!flex && access_idx >= a->nelems) -				return -EINVAL; - -			spec->spec[spec->len].type_id = id; -			spec->spec[spec->len].idx = access_idx; -			spec->len++; - -			sz = btf__resolve_size(btf, id); -			if (sz < 0) -				return sz; -			spec->bit_offset += access_idx * sz * 8; -		} else { -			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", -				type_id, spec_str, i, id, btf_kind_str(t)); -			return -EINVAL; -		} -	} - -	return 0; -} -  static bool bpf_core_is_flavor_sep(const char *s)  {  	/* check X___Y name pattern, where X and Y are not underscores */ @@ -4964,7 +4781,7 @@ static bool bpf_core_is_flavor_sep(const char *s)   * before last triple underscore. Struct name part after last triple   * underscore is ignored by BPF CO-RE relocation during relocation matching.   */ -static size_t bpf_core_essential_name_len(const char *name) +size_t bpf_core_essential_name_len(const char *name)  {  	size_t n = strlen(name);  	int i; @@ -4976,34 +4793,20 @@ static size_t bpf_core_essential_name_len(const char *name)  	return n;  } -struct core_cand -{ -	const struct btf *btf; -	const struct btf_type *t; -	const char *name; -	__u32 id; -}; - -/* dynamically sized list of type IDs and its associated struct btf */ -struct core_cand_list { -	struct core_cand *cands; -	int len; -}; - -static void bpf_core_free_cands(struct core_cand_list *cands) +static void bpf_core_free_cands(struct bpf_core_cand_list *cands)  {  	free(cands->cands);  	free(cands);  } -static int bpf_core_add_cands(struct core_cand *local_cand, +static int bpf_core_add_cands(struct bpf_core_cand *local_cand,  			      size_t local_essent_len,  			      const struct btf *targ_btf,  			      const char *targ_btf_name,  			      int targ_start_id, -			      struct core_cand_list *cands) +			      struct bpf_core_cand_list *cands)  { -	struct core_cand *new_cands, *cand; +	struct bpf_core_cand *new_cands, *cand;  	const struct btf_type *t;  	const char *targ_name;  	size_t targ_essent_len; @@ -5139,11 +4942,11 @@ err_out:  	return 0;  } -static struct core_cand_list * +static struct bpf_core_cand_list *  bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)  { -	struct core_cand local_cand = {}; -	struct core_cand_list *cands; +	struct bpf_core_cand local_cand = {}; +	struct bpf_core_cand_list *cands;  	const struct btf *main_btf;  	size_t local_essent_len;  	int err, i; @@ -5197,165 +5000,6 @@ err_out:  	return ERR_PTR(err);  } -/* Check two types for compatibility for the purpose of field access - * relocation. const/volatile/restrict and typedefs are skipped to ensure we - * are relocating semantically compatible entities: - *   - any two STRUCTs/UNIONs are compatible and can be mixed; - *   - any two FWDs are compatible, if their names match (modulo flavor suffix); - *   - any two PTRs are always compatible; - *   - for ENUMs, names should be the same (ignoring flavor suffix) or at - *     least one of enums should be anonymous; - *   - for ENUMs, check sizes, names are ignored; - *   - for INT, size and signedness are ignored; - *   - any two FLOATs are always compatible; - *   - for ARRAY, dimensionality is ignored, element types are checked for - *     compatibility recursively; - *   - everything else shouldn't be ever a target of relocation. - * These rules are not set in stone and probably will be adjusted as we get - * more experience with using BPF CO-RE relocations. - */ -static int bpf_core_fields_are_compat(const struct btf *local_btf, -				      __u32 local_id, -				      const struct btf *targ_btf, -				      __u32 targ_id) -{ -	const struct btf_type *local_type, *targ_type; - -recur: -	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); -	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); -	if (!local_type || !targ_type) -		return -EINVAL; - -	if (btf_is_composite(local_type) && btf_is_composite(targ_type)) -		return 1; -	if (btf_kind(local_type) != btf_kind(targ_type)) -		return 0; - -	switch (btf_kind(local_type)) { -	case BTF_KIND_PTR: -	case BTF_KIND_FLOAT: -		return 1; -	case BTF_KIND_FWD: -	case BTF_KIND_ENUM: { -		const char *local_name, *targ_name; -		size_t local_len, targ_len; - -		local_name = btf__name_by_offset(local_btf, -						 local_type->name_off); -		targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); -		local_len = bpf_core_essential_name_len(local_name); -		targ_len = bpf_core_essential_name_len(targ_name); -		/* one of them is anonymous or both w/ same flavor-less names */ -		return local_len == 0 || targ_len == 0 || -		       (local_len == targ_len && -			strncmp(local_name, targ_name, local_len) == 0); -	} -	case BTF_KIND_INT: -		/* just reject deprecated bitfield-like integers; all other -		 * integers are by default compatible between each other -		 */ -		return btf_int_offset(local_type) == 0 && -		       btf_int_offset(targ_type) == 0; -	case BTF_KIND_ARRAY: -		local_id = btf_array(local_type)->type; -		targ_id = btf_array(targ_type)->type; -		goto recur; -	default: -		pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", -			btf_kind(local_type), local_id, targ_id); -		return 0; -	} -} - -/* - * Given single high-level named field accessor in local type, find - * corresponding high-level accessor for a target type. Along the way, - * maintain low-level spec for target as well. Also keep updating target - * bit offset. - * - * Searching is performed through recursive exhaustive enumeration of all - * fields of a struct/union. If there are any anonymous (embedded) - * structs/unions, they are recursively searched as well. If field with - * desired name is found, check compatibility between local and target types, - * before returning result. - * - * 1 is returned, if field is found. - * 0 is returned if no compatible field is found. - * <0 is returned on error. - */ -static int bpf_core_match_member(const struct btf *local_btf, -				 const struct bpf_core_accessor *local_acc, -				 const struct btf *targ_btf, -				 __u32 targ_id, -				 struct bpf_core_spec *spec, -				 __u32 *next_targ_id) -{ -	const struct btf_type *local_type, *targ_type; -	const struct btf_member *local_member, *m; -	const char *local_name, *targ_name; -	__u32 local_id; -	int i, n, found; - -	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); -	if (!targ_type) -		return -EINVAL; -	if (!btf_is_composite(targ_type)) -		return 0; - -	local_id = local_acc->type_id; -	local_type = btf__type_by_id(local_btf, local_id); -	local_member = btf_members(local_type) + local_acc->idx; -	local_name = btf__name_by_offset(local_btf, local_member->name_off); - -	n = btf_vlen(targ_type); -	m = btf_members(targ_type); -	for (i = 0; i < n; i++, m++) { -		__u32 bit_offset; - -		bit_offset = btf_member_bit_offset(targ_type, i); - -		/* too deep struct/union/array nesting */ -		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) -			return -E2BIG; - -		/* speculate this member will be the good one */ -		spec->bit_offset += bit_offset; -		spec->raw_spec[spec->raw_len++] = i; - -		targ_name = btf__name_by_offset(targ_btf, m->name_off); -		if (str_is_empty(targ_name)) { -			/* embedded struct/union, we need to go deeper */ -			found = bpf_core_match_member(local_btf, local_acc, -						      targ_btf, m->type, -						      spec, next_targ_id); -			if (found) /* either found or error */ -				return found; -		} else if (strcmp(local_name, targ_name) == 0) { -			/* matching named field */ -			struct bpf_core_accessor *targ_acc; - -			targ_acc = &spec->spec[spec->len++]; -			targ_acc->type_id = targ_id; -			targ_acc->idx = i; -			targ_acc->name = targ_name; - -			*next_targ_id = m->type; -			found = bpf_core_fields_are_compat(local_btf, -							   local_member->type, -							   targ_btf, m->type); -			if (!found) -				spec->len--; /* pop accessor */ -			return found; -		} -		/* member turned out not to be what we looked for */ -		spec->bit_offset -= bit_offset; -		spec->raw_len--; -	} - -	return 0; -} -  /* Check local and target types for compatibility. This check is used for   * type-based CO-RE relocations and follow slightly different rules than   * field-based relocations. This function assumes that root types were already @@ -5375,8 +5019,8 @@ static int bpf_core_match_member(const struct btf *local_btf,   * These rules are not set in stone and probably will be adjusted as we get   * more experience with using BPF CO-RE relocations.   */ -static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, -				     const struct btf *targ_btf, __u32 targ_id) +int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, +			      const struct btf *targ_btf, __u32 targ_id)  {  	const struct btf_type *local_type, *targ_type;  	int depth = 32; /* max recursion depth */ @@ -5450,671 +5094,6 @@ recur:  	}  } -/* - * Try to match local spec to a target type and, if successful, produce full - * target spec (high-level, low-level + bit offset). - */ -static int bpf_core_spec_match(struct bpf_core_spec *local_spec, -			       const struct btf *targ_btf, __u32 targ_id, -			       struct bpf_core_spec *targ_spec) -{ -	const struct btf_type *targ_type; -	const struct bpf_core_accessor *local_acc; -	struct bpf_core_accessor *targ_acc; -	int i, sz, matched; - -	memset(targ_spec, 0, sizeof(*targ_spec)); -	targ_spec->btf = targ_btf; -	targ_spec->root_type_id = targ_id; -	targ_spec->relo_kind = local_spec->relo_kind; - -	if (core_relo_is_type_based(local_spec->relo_kind)) { -		return bpf_core_types_are_compat(local_spec->btf, -						 local_spec->root_type_id, -						 targ_btf, targ_id); -	} - -	local_acc = &local_spec->spec[0]; -	targ_acc = &targ_spec->spec[0]; - -	if (core_relo_is_enumval_based(local_spec->relo_kind)) { -		size_t local_essent_len, targ_essent_len; -		const struct btf_enum *e; -		const char *targ_name; - -		/* has to resolve to an enum */ -		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); -		if (!btf_is_enum(targ_type)) -			return 0; - -		local_essent_len = bpf_core_essential_name_len(local_acc->name); - -		for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { -			targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); -			targ_essent_len = bpf_core_essential_name_len(targ_name); -			if (targ_essent_len != local_essent_len) -				continue; -			if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { -				targ_acc->type_id = targ_id; -				targ_acc->idx = i; -				targ_acc->name = targ_name; -				targ_spec->len++; -				targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; -				targ_spec->raw_len++; -				return 1; -			} -		} -		return 0; -	} - -	if (!core_relo_is_field_based(local_spec->relo_kind)) -		return -EINVAL; - -	for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { -		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, -						   &targ_id); -		if (!targ_type) -			return -EINVAL; - -		if (local_acc->name) { -			matched = bpf_core_match_member(local_spec->btf, -							local_acc, -							targ_btf, targ_id, -							targ_spec, &targ_id); -			if (matched <= 0) -				return matched; -		} else { -			/* for i=0, targ_id is already treated as array element -			 * type (because it's the original struct), for others -			 * we should find array element type first -			 */ -			if (i > 0) { -				const struct btf_array *a; -				bool flex; - -				if (!btf_is_array(targ_type)) -					return 0; - -				a = btf_array(targ_type); -				flex = is_flex_arr(targ_btf, targ_acc - 1, a); -				if (!flex && local_acc->idx >= a->nelems) -					return 0; -				if (!skip_mods_and_typedefs(targ_btf, a->type, -							    &targ_id)) -					return -EINVAL; -			} - -			/* too deep struct/union/array nesting */ -			if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) -				return -E2BIG; - -			targ_acc->type_id = targ_id; -			targ_acc->idx = local_acc->idx; -			targ_acc->name = NULL; -			targ_spec->len++; -			targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; -			targ_spec->raw_len++; - -			sz = btf__resolve_size(targ_btf, targ_id); -			if (sz < 0) -				return sz; -			targ_spec->bit_offset += local_acc->idx * sz * 8; -		} -	} - -	return 1; -} - -static int bpf_core_calc_field_relo(const struct bpf_program *prog, -				    const struct bpf_core_relo *relo, -				    const struct bpf_core_spec *spec, -				    __u32 *val, __u32 *field_sz, __u32 *type_id, -				    bool *validate) -{ -	const struct bpf_core_accessor *acc; -	const struct btf_type *t; -	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; -	const struct btf_member *m; -	const struct btf_type *mt; -	bool bitfield; -	__s64 sz; - -	*field_sz = 0; - -	if (relo->kind == BPF_FIELD_EXISTS) { -		*val = spec ? 1 : 0; -		return 0; -	} - -	if (!spec) -		return -EUCLEAN; /* request instruction poisoning */ - -	acc = &spec->spec[spec->len - 1]; -	t = btf__type_by_id(spec->btf, acc->type_id); - -	/* a[n] accessor needs special handling */ -	if (!acc->name) { -		if (relo->kind == BPF_FIELD_BYTE_OFFSET) { -			*val = spec->bit_offset / 8; -			/* remember field size for load/store mem size */ -			sz = btf__resolve_size(spec->btf, acc->type_id); -			if (sz < 0) -				return -EINVAL; -			*field_sz = sz; -			*type_id = acc->type_id; -		} else if (relo->kind == BPF_FIELD_BYTE_SIZE) { -			sz = btf__resolve_size(spec->btf, acc->type_id); -			if (sz < 0) -				return -EINVAL; -			*val = sz; -		} else { -			pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", -				prog->name, relo->kind, relo->insn_off / 8); -			return -EINVAL; -		} -		if (validate) -			*validate = true; -		return 0; -	} - -	m = btf_members(t) + acc->idx; -	mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); -	bit_off = spec->bit_offset; -	bit_sz = btf_member_bitfield_size(t, acc->idx); - -	bitfield = bit_sz > 0; -	if (bitfield) { -		byte_sz = mt->size; -		byte_off = bit_off / 8 / byte_sz * byte_sz; -		/* figure out smallest int size necessary for bitfield load */ -		while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { -			if (byte_sz >= 8) { -				/* bitfield can't be read with 64-bit read */ -				pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", -					prog->name, relo->kind, relo->insn_off / 8); -				return -E2BIG; -			} -			byte_sz *= 2; -			byte_off = bit_off / 8 / byte_sz * byte_sz; -		} -	} else { -		sz = btf__resolve_size(spec->btf, field_type_id); -		if (sz < 0) -			return -EINVAL; -		byte_sz = sz; -		byte_off = spec->bit_offset / 8; -		bit_sz = byte_sz * 8; -	} - -	/* for bitfields, all the relocatable aspects are ambiguous and we -	 * might disagree with compiler, so turn off validation of expected -	 * value, except for signedness -	 */ -	if (validate) -		*validate = !bitfield; - -	switch (relo->kind) { -	case BPF_FIELD_BYTE_OFFSET: -		*val = byte_off; -		if (!bitfield) { -			*field_sz = byte_sz; -			*type_id = field_type_id; -		} -		break; -	case BPF_FIELD_BYTE_SIZE: -		*val = byte_sz; -		break; -	case BPF_FIELD_SIGNED: -		/* enums will be assumed unsigned */ -		*val = btf_is_enum(mt) || -		       (btf_int_encoding(mt) & BTF_INT_SIGNED); -		if (validate) -			*validate = true; /* signedness is never ambiguous */ -		break; -	case BPF_FIELD_LSHIFT_U64: -#if __BYTE_ORDER == __LITTLE_ENDIAN -		*val = 64 - (bit_off + bit_sz - byte_off  * 8); -#else -		*val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); -#endif -		break; -	case BPF_FIELD_RSHIFT_U64: -		*val = 64 - bit_sz; -		if (validate) -			*validate = true; /* right shift is never ambiguous */ -		break; -	case BPF_FIELD_EXISTS: -	default: -		return -EOPNOTSUPP; -	} - -	return 0; -} - -static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, -				   const struct bpf_core_spec *spec, -				   __u32 *val) -{ -	__s64 sz; - -	/* type-based relos return zero when target type is not found */ -	if (!spec) { -		*val = 0; -		return 0; -	} - -	switch (relo->kind) { -	case BPF_TYPE_ID_TARGET: -		*val = spec->root_type_id; -		break; -	case BPF_TYPE_EXISTS: -		*val = 1; -		break; -	case BPF_TYPE_SIZE: -		sz = btf__resolve_size(spec->btf, spec->root_type_id); -		if (sz < 0) -			return -EINVAL; -		*val = sz; -		break; -	case BPF_TYPE_ID_LOCAL: -	/* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ -	default: -		return -EOPNOTSUPP; -	} - -	return 0; -} - -static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, -				      const struct bpf_core_spec *spec, -				      __u32 *val) -{ -	const struct btf_type *t; -	const struct btf_enum *e; - -	switch (relo->kind) { -	case BPF_ENUMVAL_EXISTS: -		*val = spec ? 1 : 0; -		break; -	case BPF_ENUMVAL_VALUE: -		if (!spec) -			return -EUCLEAN; /* request instruction poisoning */ -		t = btf__type_by_id(spec->btf, spec->spec[0].type_id); -		e = btf_enum(t) + spec->spec[0].idx; -		*val = e->val; -		break; -	default: -		return -EOPNOTSUPP; -	} - -	return 0; -} - -struct bpf_core_relo_res -{ -	/* expected value in the instruction, unless validate == false */ -	__u32 orig_val; -	/* new value that needs to be patched up to */ -	__u32 new_val; -	/* relocation unsuccessful, poison instruction, but don't fail load */ -	bool poison; -	/* some relocations can't be validated against orig_val */ -	bool validate; -	/* for field byte offset relocations or the forms: -	 *     *(T *)(rX + <off>) = rY -	 *     rX = *(T *)(rY + <off>), -	 * we remember original and resolved field size to adjust direct -	 * memory loads of pointers and integers; this is necessary for 32-bit -	 * host kernel architectures, but also allows to automatically -	 * relocate fields that were resized from, e.g., u32 to u64, etc. -	 */ -	bool fail_memsz_adjust; -	__u32 orig_sz; -	__u32 orig_type_id; -	__u32 new_sz; -	__u32 new_type_id; -}; - -/* Calculate original and target relocation values, given local and target - * specs and relocation kind. These values are calculated for each candidate. - * If there are multiple candidates, resulting values should all be consistent - * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. - * If instruction has to be poisoned, *poison will be set to true. - */ -static int bpf_core_calc_relo(const struct bpf_program *prog, -			      const struct bpf_core_relo *relo, -			      int relo_idx, -			      const struct bpf_core_spec *local_spec, -			      const struct bpf_core_spec *targ_spec, -			      struct bpf_core_relo_res *res) -{ -	int err = -EOPNOTSUPP; - -	res->orig_val = 0; -	res->new_val = 0; -	res->poison = false; -	res->validate = true; -	res->fail_memsz_adjust = false; -	res->orig_sz = res->new_sz = 0; -	res->orig_type_id = res->new_type_id = 0; - -	if (core_relo_is_field_based(relo->kind)) { -		err = bpf_core_calc_field_relo(prog, relo, local_spec, -					       &res->orig_val, &res->orig_sz, -					       &res->orig_type_id, &res->validate); -		err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, -						      &res->new_val, &res->new_sz, -						      &res->new_type_id, NULL); -		if (err) -			goto done; -		/* Validate if it's safe to adjust load/store memory size. -		 * Adjustments are performed only if original and new memory -		 * sizes differ. -		 */ -		res->fail_memsz_adjust = false; -		if (res->orig_sz != res->new_sz) { -			const struct btf_type *orig_t, *new_t; - -			orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); -			new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); - -			/* There are two use cases in which it's safe to -			 * adjust load/store's mem size: -			 *   - reading a 32-bit kernel pointer, while on BPF -			 *   size pointers are always 64-bit; in this case -			 *   it's safe to "downsize" instruction size due to -			 *   pointer being treated as unsigned integer with -			 *   zero-extended upper 32-bits; -			 *   - reading unsigned integers, again due to -			 *   zero-extension is preserving the value correctly. -			 * -			 * In all other cases it's incorrect to attempt to -			 * load/store field because read value will be -			 * incorrect, so we poison relocated instruction. -			 */ -			if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) -				goto done; -			if (btf_is_int(orig_t) && btf_is_int(new_t) && -			    btf_int_encoding(orig_t) != BTF_INT_SIGNED && -			    btf_int_encoding(new_t) != BTF_INT_SIGNED) -				goto done; - -			/* mark as invalid mem size adjustment, but this will -			 * only be checked for LDX/STX/ST insns -			 */ -			res->fail_memsz_adjust = true; -		} -	} else if (core_relo_is_type_based(relo->kind)) { -		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); -		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); -	} else if (core_relo_is_enumval_based(relo->kind)) { -		err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); -		err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); -	} - -done: -	if (err == -EUCLEAN) { -		/* EUCLEAN is used to signal instruction poisoning request */ -		res->poison = true; -		err = 0; -	} else if (err == -EOPNOTSUPP) { -		/* EOPNOTSUPP means unknown/unsupported relocation */ -		pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", -			prog->name, relo_idx, core_relo_kind_str(relo->kind), -			relo->kind, relo->insn_off / 8); -	} - -	return err; -} - -/* - * Turn instruction for which CO_RE relocation failed into invalid one with - * distinct signature. - */ -static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, -				 int insn_idx, struct bpf_insn *insn) -{ -	pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", -		 prog->name, relo_idx, insn_idx); -	insn->code = BPF_JMP | BPF_CALL; -	insn->dst_reg = 0; -	insn->src_reg = 0; -	insn->off = 0; -	/* if this instruction is reachable (not a dead code), -	 * verifier will complain with the following message: -	 * invalid func unknown#195896080 -	 */ -	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ -} - -static int insn_bpf_size_to_bytes(struct bpf_insn *insn) -{ -	switch (BPF_SIZE(insn->code)) { -	case BPF_DW: return 8; -	case BPF_W: return 4; -	case BPF_H: return 2; -	case BPF_B: return 1; -	default: return -1; -	} -} - -static int insn_bytes_to_bpf_size(__u32 sz) -{ -	switch (sz) { -	case 8: return BPF_DW; -	case 4: return BPF_W; -	case 2: return BPF_H; -	case 1: return BPF_B; -	default: return -1; -	} -} - -/* - * Patch relocatable BPF instruction. - * - * Patched value is determined by relocation kind and target specification. - * For existence relocations target spec will be NULL if field/type is not found. - * Expected insn->imm value is determined using relocation kind and local - * spec, and is checked before patching instruction. If actual insn->imm value - * is wrong, bail out with error. - * - * Currently supported classes of BPF instruction are: - * 1. rX = <imm> (assignment with immediate operand); - * 2. rX += <imm> (arithmetic operations with immediate operand); - * 3. rX = <imm64> (load with 64-bit immediate value); - * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; - * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; - * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. - */ -static int bpf_core_patch_insn(struct bpf_program *prog, -			       const struct bpf_core_relo *relo, -			       int relo_idx, -			       const struct bpf_core_relo_res *res) -{ -	__u32 orig_val, new_val; -	struct bpf_insn *insn; -	int insn_idx; -	__u8 class; - -	if (relo->insn_off % BPF_INSN_SZ) -		return -EINVAL; -	insn_idx = relo->insn_off / BPF_INSN_SZ; -	/* adjust insn_idx from section frame of reference to the local -	 * program's frame of reference; (sub-)program code is not yet -	 * relocated, so it's enough to just subtract in-section offset -	 */ -	insn_idx = insn_idx - prog->sec_insn_off; -	insn = &prog->insns[insn_idx]; -	class = BPF_CLASS(insn->code); - -	if (res->poison) { -poison: -		/* poison second part of ldimm64 to avoid confusing error from -		 * verifier about "unknown opcode 00" -		 */ -		if (is_ldimm64_insn(insn)) -			bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1); -		bpf_core_poison_insn(prog, relo_idx, insn_idx, insn); -		return 0; -	} - -	orig_val = res->orig_val; -	new_val = res->new_val; - -	switch (class) { -	case BPF_ALU: -	case BPF_ALU64: -		if (BPF_SRC(insn->code) != BPF_K) -			return -EINVAL; -		if (res->validate && insn->imm != orig_val) { -			pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", -				prog->name, relo_idx, -				insn_idx, insn->imm, orig_val, new_val); -			return -EINVAL; -		} -		orig_val = insn->imm; -		insn->imm = new_val; -		pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", -			 prog->name, relo_idx, insn_idx, -			 orig_val, new_val); -		break; -	case BPF_LDX: -	case BPF_ST: -	case BPF_STX: -		if (res->validate && insn->off != orig_val) { -			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", -				prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val); -			return -EINVAL; -		} -		if (new_val > SHRT_MAX) { -			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", -				prog->name, relo_idx, insn_idx, new_val); -			return -ERANGE; -		} -		if (res->fail_memsz_adjust) { -			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " -				"Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", -				prog->name, relo_idx, insn_idx); -			goto poison; -		} - -		orig_val = insn->off; -		insn->off = new_val; -		pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", -			 prog->name, relo_idx, insn_idx, orig_val, new_val); - -		if (res->new_sz != res->orig_sz) { -			int insn_bytes_sz, insn_bpf_sz; - -			insn_bytes_sz = insn_bpf_size_to_bytes(insn); -			if (insn_bytes_sz != res->orig_sz) { -				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", -					prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); -				return -EINVAL; -			} - -			insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); -			if (insn_bpf_sz < 0) { -				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", -					prog->name, relo_idx, insn_idx, res->new_sz); -				return -EINVAL; -			} - -			insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); -			pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", -				 prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz); -		} -		break; -	case BPF_LD: { -		__u64 imm; - -		if (!is_ldimm64_insn(insn) || -		    insn[0].src_reg != 0 || insn[0].off != 0 || -		    insn_idx + 1 >= prog->insns_cnt || -		    insn[1].code != 0 || insn[1].dst_reg != 0 || -		    insn[1].src_reg != 0 || insn[1].off != 0) { -			pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", -				prog->name, relo_idx, insn_idx); -			return -EINVAL; -		} - -		imm = insn[0].imm + ((__u64)insn[1].imm << 32); -		if (res->validate && imm != orig_val) { -			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", -				prog->name, relo_idx, -				insn_idx, (unsigned long long)imm, -				orig_val, new_val); -			return -EINVAL; -		} - -		insn[0].imm = new_val; -		insn[1].imm = 0; /* currently only 32-bit values are supported */ -		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", -			 prog->name, relo_idx, insn_idx, -			 (unsigned long long)imm, new_val); -		break; -	} -	default: -		pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", -			prog->name, relo_idx, insn_idx, insn->code, -			insn->src_reg, insn->dst_reg, insn->off, insn->imm); -		return -EINVAL; -	} - -	return 0; -} - -/* Output spec definition in the format: - * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, - * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b - */ -static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) -{ -	const struct btf_type *t; -	const struct btf_enum *e; -	const char *s; -	__u32 type_id; -	int i; - -	type_id = spec->root_type_id; -	t = btf__type_by_id(spec->btf, type_id); -	s = btf__name_by_offset(spec->btf, t->name_off); - -	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); - -	if (core_relo_is_type_based(spec->relo_kind)) -		return; - -	if (core_relo_is_enumval_based(spec->relo_kind)) { -		t = skip_mods_and_typedefs(spec->btf, type_id, NULL); -		e = btf_enum(t) + spec->raw_spec[0]; -		s = btf__name_by_offset(spec->btf, e->name_off); - -		libbpf_print(level, "::%s = %u", s, e->val); -		return; -	} - -	if (core_relo_is_field_based(spec->relo_kind)) { -		for (i = 0; i < spec->len; i++) { -			if (spec->spec[i].name) -				libbpf_print(level, ".%s", spec->spec[i].name); -			else if (i > 0 || spec->spec[i].idx > 0) -				libbpf_print(level, "[%u]", spec->spec[i].idx); -		} - -		libbpf_print(level, " ("); -		for (i = 0; i < spec->raw_len; i++) -			libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); - -		if (spec->bit_offset % 8) -			libbpf_print(level, " @ offset %u.%u)", -				     spec->bit_offset / 8, spec->bit_offset % 8); -		else -			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); -		return; -	} -} -  static size_t bpf_core_hash_fn(const void *key, void *ctx)  {  	return (size_t)key; @@ -6130,73 +5109,33 @@ static void *u32_as_hash_key(__u32 x)  	return (void *)(uintptr_t)x;  } -/* - * CO-RE relocate single instruction. - * - * The outline and important points of the algorithm: - * 1. For given local type, find corresponding candidate target types. - *    Candidate type is a type with the same "essential" name, ignoring - *    everything after last triple underscore (___). E.g., `sample`, - *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates - *    for each other. Names with triple underscore are referred to as - *    "flavors" and are useful, among other things, to allow to - *    specify/support incompatible variations of the same kernel struct, which - *    might differ between different kernel versions and/or build - *    configurations. - * - *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C - *    converter, when deduplicated BTF of a kernel still contains more than - *    one different types with the same name. In that case, ___2, ___3, etc - *    are appended starting from second name conflict. But start flavors are - *    also useful to be defined "locally", in BPF program, to extract same - *    data from incompatible changes between different kernel - *    versions/configurations. For instance, to handle field renames between - *    kernel versions, one can use two flavors of the struct name with the - *    same common name and use conditional relocations to extract that field, - *    depending on target kernel version. - * 2. For each candidate type, try to match local specification to this - *    candidate target type. Matching involves finding corresponding - *    high-level spec accessors, meaning that all named fields should match, - *    as well as all array accesses should be within the actual bounds. Also, - *    types should be compatible (see bpf_core_fields_are_compat for details). - * 3. It is supported and expected that there might be multiple flavors - *    matching the spec. As long as all the specs resolve to the same set of - *    offsets across all candidates, there is no error. If there is any - *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate - *    imprefection of BTF deduplication, which can cause slight duplication of - *    the same BTF type, if some directly or indirectly referenced (by - *    pointer) type gets resolved to different actual types in different - *    object files. If such situation occurs, deduplicated BTF will end up - *    with two (or more) structurally identical types, which differ only in - *    types they refer to through pointer. This should be OK in most cases and - *    is not an error. - * 4. Candidate types search is performed by linearly scanning through all - *    types in target BTF. It is anticipated that this is overall more - *    efficient memory-wise and not significantly worse (if not better) - *    CPU-wise compared to prebuilding a map from all local type names to - *    a list of candidate type names. It's also sped up by caching resolved - *    list of matching candidates per each local "root" type ID, that has at - *    least one bpf_core_relo associated with it. This list is shared - *    between multiple relocations for the same type ID and is updated as some - *    of the candidates are pruned due to structural incompatibility. - */  static int bpf_core_apply_relo(struct bpf_program *prog,  			       const struct bpf_core_relo *relo,  			       int relo_idx,  			       const struct btf *local_btf,  			       struct hashmap *cand_cache)  { -	struct bpf_core_spec local_spec, cand_spec, targ_spec = {};  	const void *type_key = u32_as_hash_key(relo->type_id); -	struct bpf_core_relo_res cand_res, targ_res; +	struct bpf_core_cand_list *cands = NULL; +	const char *prog_name = prog->name;  	const struct btf_type *local_type;  	const char *local_name; -	struct core_cand_list *cands = NULL; -	__u32 local_id; -	const char *spec_str; -	int i, j, err; +	__u32 local_id = relo->type_id; +	struct bpf_insn *insn; +	int insn_idx, err; + +	if (relo->insn_off % BPF_INSN_SZ) +		return -EINVAL; +	insn_idx = relo->insn_off / BPF_INSN_SZ; +	/* adjust insn_idx from section frame of reference to the local +	 * program's frame of reference; (sub-)program code is not yet +	 * relocated, so it's enough to just subtract in-section offset +	 */ +	insn_idx = insn_idx - prog->sec_insn_off; +	if (insn_idx > prog->insns_cnt) +		return -EINVAL; +	insn = &prog->insns[insn_idx]; -	local_id = relo->type_id;  	local_type = btf__type_by_id(local_btf, local_id);  	if (!local_type)  		return -EINVAL; @@ -6205,51 +5144,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog,  	if (!local_name)  		return -EINVAL; -	spec_str = btf__name_by_offset(local_btf, relo->access_str_off); -	if (str_is_empty(spec_str)) -		return -EINVAL; -  	if (prog->obj->gen_loader) { -		pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n", +		pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",  			prog - prog->obj->programs, relo->insn_off / 8, -			local_name, spec_str, relo->kind); +			local_name, relo->kind);  		return -ENOTSUP;  	} -	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); -	if (err) { -		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", -			prog->name, relo_idx, local_id, btf_kind_str(local_type), -			str_is_empty(local_name) ? "<anon>" : local_name, -			spec_str, err); -		return -EINVAL; -	} - -	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name, -		 relo_idx, core_relo_kind_str(relo->kind), relo->kind); -	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); -	libbpf_print(LIBBPF_DEBUG, "\n"); -	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ -	if (relo->kind == BPF_TYPE_ID_LOCAL) { -		targ_res.validate = true; -		targ_res.poison = false; -		targ_res.orig_val = local_spec.root_type_id; -		targ_res.new_val = local_spec.root_type_id; -		goto patch_insn; -	} - -	/* libbpf doesn't support candidate search for anonymous types */ -	if (str_is_empty(spec_str)) { -		pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", -			prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); -		return -EOPNOTSUPP; -	} - -	if (!hashmap__find(cand_cache, type_key, (void **)&cands)) { +	if (relo->kind != BPF_TYPE_ID_LOCAL && +	    !hashmap__find(cand_cache, type_key, (void **)&cands)) {  		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);  		if (IS_ERR(cands)) {  			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", -				prog->name, relo_idx, local_id, btf_kind_str(local_type), +				prog_name, relo_idx, local_id, btf_kind_str(local_type),  				local_name, PTR_ERR(cands));  			return PTR_ERR(cands);  		} @@ -6260,97 +5167,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,  		}  	} -	for (i = 0, j = 0; i < cands->len; i++) { -		err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, -					  cands->cands[i].id, &cand_spec); -		if (err < 0) { -			pr_warn("prog '%s': relo #%d: error matching candidate #%d ", -				prog->name, relo_idx, i); -			bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); -			libbpf_print(LIBBPF_WARN, ": %d\n", err); -			return err; -		} - -		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name, -			 relo_idx, err == 0 ? "non-matching" : "matching", i); -		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); -		libbpf_print(LIBBPF_DEBUG, "\n"); - -		if (err == 0) -			continue; - -		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res); -		if (err) -			return err; - -		if (j == 0) { -			targ_res = cand_res; -			targ_spec = cand_spec; -		} else if (cand_spec.bit_offset != targ_spec.bit_offset) { -			/* if there are many field relo candidates, they -			 * should all resolve to the same bit offset -			 */ -			pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", -				prog->name, relo_idx, cand_spec.bit_offset, -				targ_spec.bit_offset); -			return -EINVAL; -		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { -			/* all candidates should result in the same relocation -			 * decision and value, otherwise it's dangerous to -			 * proceed due to ambiguity -			 */ -			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", -				prog->name, relo_idx, -				cand_res.poison ? "failure" : "success", cand_res.new_val, -				targ_res.poison ? "failure" : "success", targ_res.new_val); -			return -EINVAL; -		} - -		cands->cands[j++] = cands->cands[i]; -	} - -	/* -	 * For BPF_FIELD_EXISTS relo or when used BPF program has field -	 * existence checks or kernel version/config checks, it's expected -	 * that we might not find any candidates. In this case, if field -	 * wasn't found in any candidate, the list of candidates shouldn't -	 * change at all, we'll just handle relocating appropriately, -	 * depending on relo's kind. -	 */ -	if (j > 0) -		cands->len = j; - -	/* -	 * If no candidates were found, it might be both a programmer error, -	 * as well as expected case, depending whether instruction w/ -	 * relocation is guarded in some way that makes it unreachable (dead -	 * code) if relocation can't be resolved. This is handled in -	 * bpf_core_patch_insn() uniformly by replacing that instruction with -	 * BPF helper call insn (using invalid helper ID). If that instruction -	 * is indeed unreachable, then it will be ignored and eliminated by -	 * verifier. If it was an error, then verifier will complain and point -	 * to a specific instruction number in its log. -	 */ -	if (j == 0) { -		pr_debug("prog '%s': relo #%d: no matching targets found\n", -			 prog->name, relo_idx); - -		/* calculate single target relo result explicitly */ -		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res); -		if (err) -			return err; -	} - -patch_insn: -	/* bpf_core_patch_insn() should know how to handle missing targ_spec */ -	err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res); -	if (err) { -		pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n", -			prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err); -		return -EINVAL; -	} - -	return 0; +	return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);  }  static int @@ -6496,11 +5313,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)  				}  				insn[1].imm = ext->kcfg.data_off;  			} else /* EXT_KSYM */ { -				if (ext->ksym.type_id) { /* typed ksyms */ +				if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */  					insn[0].src_reg = BPF_PSEUDO_BTF_ID;  					insn[0].imm = ext->ksym.kernel_btf_id;  					insn[1].imm = ext->ksym.kernel_btf_obj_fd; -				} else { /* typeless ksyms */ +				} else { /* typeless ksyms or unresolved typed ksyms */  					insn[0].imm = (__u32)ext->ksym.addr;  					insn[1].imm = ext->ksym.addr >> 32;  				} @@ -7190,7 +6007,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)  	for (i = 0; i < obj->nr_programs; i++) {  		struct bpf_program *p = &obj->programs[i]; -		 +  		if (!p->nr_reloc)  			continue; @@ -7554,7 +6371,7 @@ static struct bpf_object *  __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,  		   const struct bpf_object_open_opts *opts)  { -	const char *obj_name, *kconfig; +	const char *obj_name, *kconfig, *btf_tmp_path;  	struct bpf_program *prog;  	struct bpf_object *obj;  	char tmp_name[64]; @@ -7585,11 +6402,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,  	if (IS_ERR(obj))  		return obj; +	btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); +	if (btf_tmp_path) { +		if (strlen(btf_tmp_path) >= PATH_MAX) { +			err = -ENAMETOOLONG; +			goto out; +		} +		obj->btf_custom_path = strdup(btf_tmp_path); +		if (!obj->btf_custom_path) { +			err = -ENOMEM; +			goto out; +		} +	} +  	kconfig = OPTS_GET(opts, kconfig, NULL);  	if (kconfig) {  		obj->kconfig = strdup(kconfig); -		if (!obj->kconfig) -			return ERR_PTR(-ENOMEM); +		if (!obj->kconfig) { +			err = -ENOMEM; +			goto out; +		}  	}  	err = bpf_object__elf_init(obj); @@ -7812,11 +6644,8 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,  				break;  		}  	} -	if (id <= 0) { -		pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n", -			__btf_kind_str(kind), ksym_name); +	if (id <= 0)  		return -ESRCH; -	}  	*res_btf = btf;  	*res_btf_fd = btf_fd; @@ -7833,8 +6662,13 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,  	struct btf *btf = NULL;  	id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd); -	if (id < 0) +	if (id == -ESRCH && ext->is_weak) { +		return 0; +	} else if (id < 0) { +		pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", +			ext->name);  		return id; +	}  	/* find local type_id */  	local_type_id = ext->ksym.type_id; @@ -8055,7 +6889,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  	err = err ? : bpf_object__sanitize_maps(obj);  	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);  	err = err ? : bpf_object__create_maps(obj); -	err = err ? : bpf_object__relocate(obj, attr->target_btf_path); +	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);  	err = err ? : bpf_object__load_progs(obj, attr->log_level);  	if (obj->gen_loader) { @@ -8450,6 +7284,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map)  	return map->pin_path;  } +const char *bpf_map__pin_path(const struct bpf_map *map) +{ +	return map->pin_path; +} +  bool bpf_map__is_pinned(const struct bpf_map *map)  {  	return map->pinned; @@ -8702,6 +7541,7 @@ void bpf_object__close(struct bpf_object *obj)  	for (i = 0; i < obj->nr_maps; i++)  		bpf_map__destroy(&obj->maps[i]); +	zfree(&obj->btf_custom_path);  	zfree(&obj->kconfig);  	zfree(&obj->externs);  	obj->nr_extern = 0; @@ -9471,7 +8311,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,  	ret = snprintf(btf_type_name, sizeof(btf_type_name),  		       "%s%s", prefix, name);  	/* snprintf returns the number of characters written excluding the -	 * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it +	 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it  	 * indicates truncation.  	 */  	if (ret < 0 || ret >= sizeof(btf_type_name)) @@ -9495,7 +8335,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,  	struct btf *btf;  	int err; -	btf = libbpf_find_kernel_btf(); +	btf = btf__load_vmlinux_btf();  	err = libbpf_get_error(btf);  	if (err) {  		pr_warn("vmlinux BTF is not found\n"); @@ -9514,8 +8354,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)  {  	struct bpf_prog_info_linear *info_linear;  	struct bpf_prog_info *info; -	struct btf *btf = NULL; -	int err = -EINVAL; +	struct btf *btf; +	int err;  	info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);  	err = libbpf_get_error(info_linear); @@ -9524,12 +8364,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)  			attach_prog_fd);  		return err;  	} + +	err = -EINVAL;  	info = &info_linear->info;  	if (!info->btf_id) {  		pr_warn("The target program doesn't have BTF\n");  		goto out;  	} -	if (btf__get_from_id(info->btf_id, &btf)) { +	btf = btf__load_from_kernel_by_id(info->btf_id); +	if (libbpf_get_error(btf)) {  		pr_warn("Failed to get BTF of the program\n");  		goto out;  	} @@ -10003,7 +8846,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  struct bpf_link {  	int (*detach)(struct bpf_link *link); -	int (*destroy)(struct bpf_link *link); +	void (*dealloc)(struct bpf_link *link);  	char *pin_path;		/* NULL, if not pinned */  	int fd;			/* hook FD, -1 if not applicable */  	bool disconnected; @@ -10013,7 +8856,7 @@ struct bpf_link {  int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)  {  	int ret; -	 +  	ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);  	return libbpf_err_errno(ret);  } @@ -10042,11 +8885,12 @@ int bpf_link__destroy(struct bpf_link *link)  	if (!link->disconnected && link->detach)  		err = link->detach(link); -	if (link->destroy) -		link->destroy(link);  	if (link->pin_path)  		free(link->pin_path); -	free(link); +	if (link->dealloc) +		link->dealloc(link); +	else +		free(link);  	return libbpf_err(err);  } @@ -10143,23 +8987,42 @@ int bpf_link__unpin(struct bpf_link *link)  	return 0;  } -static int bpf_link__detach_perf_event(struct bpf_link *link) +struct bpf_link_perf { +	struct bpf_link link; +	int perf_event_fd; +}; + +static int bpf_link_perf_detach(struct bpf_link *link)  { -	int err; +	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); +	int err = 0; -	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0); -	if (err) +	if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)  		err = -errno; +	if (perf_link->perf_event_fd != link->fd) +		close(perf_link->perf_event_fd);  	close(link->fd); +  	return libbpf_err(err);  } -struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) +static void bpf_link_perf_dealloc(struct bpf_link *link) +{ +	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); + +	free(perf_link); +} + +struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, +						     const struct bpf_perf_event_opts *opts)  {  	char errmsg[STRERR_BUFSIZE]; -	struct bpf_link *link; -	int prog_fd, err; +	struct bpf_link_perf *link; +	int prog_fd, link_fd = -1, err; + +	if (!OPTS_VALID(opts, bpf_perf_event_opts)) +		return libbpf_err_ptr(-EINVAL);  	if (pfd < 0) {  		pr_warn("prog '%s': invalid perf event FD %d\n", @@ -10176,27 +9039,59 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pf  	link = calloc(1, sizeof(*link));  	if (!link)  		return libbpf_err_ptr(-ENOMEM); -	link->detach = &bpf_link__detach_perf_event; -	link->fd = pfd; +	link->link.detach = &bpf_link_perf_detach; +	link->link.dealloc = &bpf_link_perf_dealloc; +	link->perf_event_fd = pfd; -	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { -		err = -errno; -		free(link); -		pr_warn("prog '%s': failed to attach to pfd %d: %s\n", -			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); -		if (err == -EPROTO) -			pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", -				prog->name, pfd); -		return libbpf_err_ptr(err); +	if (kernel_supports(prog->obj, FEAT_PERF_LINK)) { +		DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, +			.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); + +		link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); +		if (link_fd < 0) { +			err = -errno; +			pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", +				prog->name, pfd, +				err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +			goto err_out; +		} +		link->link.fd = link_fd; +	} else { +		if (OPTS_GET(opts, bpf_cookie, 0)) { +			pr_warn("prog '%s': user context value is not supported\n", prog->name); +			err = -EOPNOTSUPP; +			goto err_out; +		} + +		if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { +			err = -errno; +			pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", +				prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +			if (err == -EPROTO) +				pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", +					prog->name, pfd); +			goto err_out; +		} +		link->link.fd = pfd;  	}  	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {  		err = -errno; -		free(link); -		pr_warn("prog '%s': failed to enable pfd %d: %s\n", +		pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",  			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); -		return libbpf_err_ptr(err); +		goto err_out;  	} -	return link; + +	return &link->link; +err_out: +	if (link_fd >= 0) +		close(link_fd); +	free(link); +	return libbpf_err_ptr(err); +} + +struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) +{ +	return bpf_program__attach_perf_event_opts(prog, pfd, NULL);  }  /* @@ -10257,13 +9152,19 @@ static int determine_uprobe_retprobe_bit(void)  	return parse_uint_from_file(file, "config:%d\n");  } +#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 +#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 +  static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, -				 uint64_t offset, int pid) +				 uint64_t offset, int pid, size_t ref_ctr_off)  {  	struct perf_event_attr attr = {};  	char errmsg[STRERR_BUFSIZE];  	int type, pfd, err; +	if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) +		return -EINVAL; +  	type = uprobe ? determine_uprobe_perf_type()  		      : determine_kprobe_perf_type();  	if (type < 0) { @@ -10286,6 +9187,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,  	}  	attr.size = sizeof(attr);  	attr.type = type; +	attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;  	attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */  	attr.config2 = offset;		 /* kprobe_addr or probe_offset */ @@ -10304,23 +9206,34 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,  	return pfd;  } -struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, -					    bool retprobe, -					    const char *func_name) +struct bpf_link * +bpf_program__attach_kprobe_opts(struct bpf_program *prog, +				const char *func_name, +				const struct bpf_kprobe_opts *opts)  { +	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);  	char errmsg[STRERR_BUFSIZE];  	struct bpf_link *link; +	unsigned long offset; +	bool retprobe;  	int pfd, err; +	if (!OPTS_VALID(opts, bpf_kprobe_opts)) +		return libbpf_err_ptr(-EINVAL); + +	retprobe = OPTS_GET(opts, retprobe, false); +	offset = OPTS_GET(opts, offset, 0); +	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); +  	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, -				    0 /* offset */, -1 /* pid */); +				    offset, -1 /* pid */, 0 /* ref_ctr_off */);  	if (pfd < 0) {  		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",  			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return libbpf_err_ptr(pfd);  	} -	link = bpf_program__attach_perf_event(prog, pfd); +	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);  	err = libbpf_get_error(link);  	if (err) {  		close(pfd); @@ -10332,29 +9245,70 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,  	return link;  } +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, +					    bool retprobe, +					    const char *func_name) +{ +	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, +		.retprobe = retprobe, +	); + +	return bpf_program__attach_kprobe_opts(prog, func_name, &opts); +} +  static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,  				      struct bpf_program *prog)  { +	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); +	unsigned long offset = 0; +	struct bpf_link *link;  	const char *func_name; -	bool retprobe; +	char *func; +	int n, err;  	func_name = prog->sec_name + sec->len; -	retprobe = strcmp(sec->sec, "kretprobe/") == 0; +	opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; + +	n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); +	if (n < 1) { +		err = -EINVAL; +		pr_warn("kprobe name is invalid: %s\n", func_name); +		return libbpf_err_ptr(err); +	} +	if (opts.retprobe && offset != 0) { +		free(func); +		err = -EINVAL; +		pr_warn("kretprobes do not support offset specification\n"); +		return libbpf_err_ptr(err); +	} -	return bpf_program__attach_kprobe(prog, retprobe, func_name); +	opts.offset = offset; +	link = bpf_program__attach_kprobe_opts(prog, func, &opts); +	free(func); +	return link;  } -struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, -					    bool retprobe, pid_t pid, -					    const char *binary_path, -					    size_t func_offset) +LIBBPF_API struct bpf_link * +bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, +				const char *binary_path, size_t func_offset, +				const struct bpf_uprobe_opts *opts)  { +	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);  	char errmsg[STRERR_BUFSIZE];  	struct bpf_link *link; +	size_t ref_ctr_off;  	int pfd, err; +	bool retprobe; + +	if (!OPTS_VALID(opts, bpf_uprobe_opts)) +		return libbpf_err_ptr(-EINVAL); -	pfd = perf_event_open_probe(true /* uprobe */, retprobe, -				    binary_path, func_offset, pid); +	retprobe = OPTS_GET(opts, retprobe, false); +	ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); +	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + +	pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, +				    func_offset, pid, ref_ctr_off);  	if (pfd < 0) {  		pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",  			prog->name, retprobe ? "uretprobe" : "uprobe", @@ -10362,7 +9316,7 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return libbpf_err_ptr(pfd);  	} -	link = bpf_program__attach_perf_event(prog, pfd); +	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);  	err = libbpf_get_error(link);  	if (err) {  		close(pfd); @@ -10375,6 +9329,16 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,  	return link;  } +struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, +					    bool retprobe, pid_t pid, +					    const char *binary_path, +					    size_t func_offset) +{ +	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); + +	return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); +} +  static int determine_tracepoint_id(const char *tp_category,  				   const char *tp_name)  { @@ -10425,14 +9389,21 @@ static int perf_event_open_tracepoint(const char *tp_category,  	return pfd;  } -struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, -						const char *tp_category, -						const char *tp_name) +struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog, +						     const char *tp_category, +						     const char *tp_name, +						     const struct bpf_tracepoint_opts *opts)  { +	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);  	char errmsg[STRERR_BUFSIZE];  	struct bpf_link *link;  	int pfd, err; +	if (!OPTS_VALID(opts, bpf_tracepoint_opts)) +		return libbpf_err_ptr(-EINVAL); + +	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); +  	pfd = perf_event_open_tracepoint(tp_category, tp_name);  	if (pfd < 0) {  		pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", @@ -10440,7 +9411,7 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return libbpf_err_ptr(pfd);  	} -	link = bpf_program__attach_perf_event(prog, pfd); +	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);  	err = libbpf_get_error(link);  	if (err) {  		close(pfd); @@ -10452,6 +9423,13 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,  	return link;  } +struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, +						const char *tp_category, +						const char *tp_name) +{ +	return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); +} +  static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,  				  struct bpf_program *prog)  { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6e61342ba56c..f177d897c5f7 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -94,8 +94,15 @@ struct bpf_object_open_opts {  	 * system Kconfig for CONFIG_xxx externs.  	 */  	const char *kconfig; +	/* Path to the custom BTF to be used for BPF CO-RE relocations. +	 * This custom BTF completely replaces the use of vmlinux BTF +	 * for the purpose of CO-RE relocations. +	 * NOTE: any other BPF feature (e.g., fentry/fexit programs, +	 * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. +	 */ +	const char *btf_custom_path;  }; -#define bpf_object_open_opts__last_field kconfig +#define bpf_object_open_opts__last_field btf_custom_path  LIBBPF_API struct bpf_object *bpf_object__open(const char *path);  LIBBPF_API struct bpf_object * @@ -237,20 +244,86 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);  LIBBPF_API struct bpf_link *  bpf_program__attach(struct bpf_program *prog); + +struct bpf_perf_event_opts { +	/* size of this struct, for forward/backward compatiblity */ +	size_t sz; +	/* custom user-provided value fetchable through bpf_get_attach_cookie() */ +	__u64 bpf_cookie; +}; +#define bpf_perf_event_opts__last_field bpf_cookie +  LIBBPF_API struct bpf_link *  bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); + +LIBBPF_API struct bpf_link * +bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, +				    const struct bpf_perf_event_opts *opts); + +struct bpf_kprobe_opts { +	/* size of this struct, for forward/backward compatiblity */ +	size_t sz; +	/* custom user-provided value fetchable through bpf_get_attach_cookie() */ +	__u64 bpf_cookie; +	/* function's offset to install kprobe to */ +	unsigned long offset; +	/* kprobe is return probe */ +	bool retprobe; +	size_t :0; +}; +#define bpf_kprobe_opts__last_field retprobe +  LIBBPF_API struct bpf_link *  bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,  			   const char *func_name);  LIBBPF_API struct bpf_link * +bpf_program__attach_kprobe_opts(struct bpf_program *prog, +                                const char *func_name, +                                const struct bpf_kprobe_opts *opts); + +struct bpf_uprobe_opts { +	/* size of this struct, for forward/backward compatiblity */ +	size_t sz; +	/* offset of kernel reference counted USDT semaphore, added in +	 * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") +	 */ +	size_t ref_ctr_offset; +	/* custom user-provided value fetchable through bpf_get_attach_cookie() */ +	__u64 bpf_cookie; +	/* uprobe is return probe, invoked at function return time */ +	bool retprobe; +	size_t :0; +}; +#define bpf_uprobe_opts__last_field retprobe + +LIBBPF_API struct bpf_link *  bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,  			   pid_t pid, const char *binary_path,  			   size_t func_offset);  LIBBPF_API struct bpf_link * +bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, +				const char *binary_path, size_t func_offset, +				const struct bpf_uprobe_opts *opts); + +struct bpf_tracepoint_opts { +	/* size of this struct, for forward/backward compatiblity */ +	size_t sz; +	/* custom user-provided value fetchable through bpf_get_attach_cookie() */ +	__u64 bpf_cookie; +}; +#define bpf_tracepoint_opts__last_field bpf_cookie + +LIBBPF_API struct bpf_link *  bpf_program__attach_tracepoint(struct bpf_program *prog,  			       const char *tp_category,  			       const char *tp_name);  LIBBPF_API struct bpf_link * +bpf_program__attach_tracepoint_opts(struct bpf_program *prog, +				    const char *tp_category, +				    const char *tp_name, +				    const struct bpf_tracepoint_opts *opts); + +LIBBPF_API struct bpf_link *  bpf_program__attach_raw_tracepoint(struct bpf_program *prog,  				   const char *tp_name);  LIBBPF_API struct bpf_link * @@ -477,6 +550,7 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);  LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);  LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);  LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);  LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);  LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);  LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 944c99d1ded3..bbc53bb25f68 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -371,7 +371,18 @@ LIBBPF_0.4.0 {  LIBBPF_0.5.0 {  	global:  		bpf_map__initial_value; +		bpf_map__pin_path;  		bpf_map_lookup_and_delete_elem_flags; +		bpf_program__attach_kprobe_opts; +		bpf_program__attach_perf_event_opts; +		bpf_program__attach_tracepoint_opts; +		bpf_program__attach_uprobe_opts;  		bpf_object__gen_loader; +		btf__load_from_kernel_by_id; +		btf__load_from_kernel_by_id_split; +		btf__load_into_kernel; +		btf__load_module_btf; +		btf__load_vmlinux_btf; +		btf_dump__dump_type_data;  		libbpf_set_strict_mode;  } LIBBPF_0.4.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 016ca7cb4f8a..533b0211f40a 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -14,6 +14,7 @@  #include <errno.h>  #include <linux/err.h>  #include "libbpf_legacy.h" +#include "relo_core.h"  /* make sure libbpf doesn't use kernel-only integer typedefs */  #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -195,6 +196,17 @@ void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,  		     size_t cur_cnt, size_t max_cnt, size_t add_cnt);  int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt); +static inline bool libbpf_is_mem_zeroed(const char *p, ssize_t len) +{ +	while (len > 0) { +		if (*p) +			return false; +		p++; +		len--; +	} +	return true; +} +  static inline bool libbpf_validate_opts(const char *opts,  					size_t opts_sz, size_t user_sz,  					const char *type_name) @@ -203,16 +215,9 @@ static inline bool libbpf_validate_opts(const char *opts,  		pr_warn("%s size (%zu) is too small\n", type_name, user_sz);  		return false;  	} -	if (user_sz > opts_sz) { -		size_t i; - -		for (i = opts_sz; i < user_sz; i++) { -			if (opts[i]) { -				pr_warn("%s has non-zero extra bytes\n", -					type_name); -				return false; -			} -		} +	if (!libbpf_is_mem_zeroed(opts + opts_sz, (ssize_t)user_sz - opts_sz)) { +		pr_warn("%s has non-zero extra bytes\n", type_name); +		return false;  	}  	return true;  } @@ -232,6 +237,14 @@ static inline bool libbpf_validate_opts(const char *opts,  			(opts)->field = value;	\  	} while (0) +#define OPTS_ZEROED(opts, last_nonzero_field)				      \ +({									      \ +	ssize_t __off = offsetofend(typeof(*(opts)), last_nonzero_field);     \ +	!(opts) || libbpf_is_mem_zeroed((const void *)opts + __off,	      \ +					(opts)->sz - __off);		      \ +}) + +  int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);  int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);  int libbpf__load_raw_btf(const char *raw_types, size_t types_len, @@ -366,76 +379,6 @@ struct bpf_line_info_min {  	__u32	line_col;  }; -/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value - * has to be adjusted by relocations. - */ -enum bpf_core_relo_kind { -	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */ -	BPF_FIELD_BYTE_SIZE = 1,	/* field size in bytes */ -	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */ -	BPF_FIELD_SIGNED = 3,		/* field signedness (0 - unsigned, 1 - signed) */ -	BPF_FIELD_LSHIFT_U64 = 4,	/* bitfield-specific left bitshift */ -	BPF_FIELD_RSHIFT_U64 = 5,	/* bitfield-specific right bitshift */ -	BPF_TYPE_ID_LOCAL = 6,		/* type ID in local BPF object */ -	BPF_TYPE_ID_TARGET = 7,		/* type ID in target kernel */ -	BPF_TYPE_EXISTS = 8,		/* type existence in target kernel */ -	BPF_TYPE_SIZE = 9,		/* type size in bytes */ -	BPF_ENUMVAL_EXISTS = 10,	/* enum value existence in target kernel */ -	BPF_ENUMVAL_VALUE = 11,		/* enum value integer value */ -}; - -/* The minimum bpf_core_relo checked by the loader - * - * CO-RE relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - *   its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - *   type or field; - * - access_str_off - offset into corresponding .BTF string section. String - *   interpretation depends on specific relocation kind: - *     - for field-based relocations, string encodes an accessed field using - *     a sequence of field and array indices, separated by colon (:). It's - *     conceptually very close to LLVM's getelementptr ([0]) instruction's - *     arguments for identifying offset to a field. - *     - for type-based relocations, strings is expected to be just "0"; - *     - for enum value-based relocations, string contains an index of enum - *     value within its enum type; - * - * Example to provide a better feel. - * - *   struct sample { - *       int a; - *       struct { - *           int b[10]; - *       }; - *   }; - * - *   struct sample *s = ...; - *   int x = &s->a;     // encoded as "0:0" (a is field #0) - *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1,  - *                      // b is field #0 inside anon struct, accessing elem #5) - *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example  will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - *		  __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_core_relo { -	__u32   insn_off; -	__u32   type_id; -	__u32   access_str_off; -	enum bpf_core_relo_kind kind; -};  typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);  typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); @@ -494,4 +437,14 @@ static inline void *libbpf_ptr(void *ret)  	return ret;  } +static inline bool str_is_empty(const char *s) +{ +	return !s || !s[0]; +} + +static inline bool is_ldimm64_insn(struct bpf_insn *insn) +{ +	return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} +  #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c new file mode 100644 index 000000000000..4016ed492d0c --- /dev/null +++ b/tools/lib/bpf/relo_core.c @@ -0,0 +1,1295 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2019 Facebook */ + +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> +#include <linux/err.h> + +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" +#include "str_error.h" +#include "libbpf_internal.h" + +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { +	__u32 type_id;		/* struct/union type or array element type */ +	__u32 idx;		/* field index or array index */ +	const char *name;	/* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { +	const struct btf *btf; +	/* high-level spec: named fields and array indices only */ +	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; +	/* original unresolved (no skip_mods_or_typedefs) root type ID */ +	__u32 root_type_id; +	/* CO-RE relocation kind */ +	enum bpf_core_relo_kind relo_kind; +	/* high-level spec length */ +	int len; +	/* raw, low-level spec: 1-to-1 with accessor spec string */ +	int raw_spec[BPF_CORE_SPEC_MAX_LEN]; +	/* raw spec length */ +	int raw_len; +	/* field bit offset represented by spec */ +	__u32 bit_offset; +}; + +static bool is_flex_arr(const struct btf *btf, +			const struct bpf_core_accessor *acc, +			const struct btf_array *arr) +{ +	const struct btf_type *t; + +	/* not a flexible array, if not inside a struct or has non-zero size */ +	if (!acc->name || arr->nelems > 0) +		return false; + +	/* has to be the last member of enclosing struct */ +	t = btf__type_by_id(btf, acc->type_id); +	return acc->idx == btf_vlen(t) - 1; +} + +static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) +{ +	switch (kind) { +	case BPF_FIELD_BYTE_OFFSET: return "byte_off"; +	case BPF_FIELD_BYTE_SIZE: return "byte_sz"; +	case BPF_FIELD_EXISTS: return "field_exists"; +	case BPF_FIELD_SIGNED: return "signed"; +	case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; +	case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; +	case BPF_TYPE_ID_LOCAL: return "local_type_id"; +	case BPF_TYPE_ID_TARGET: return "target_type_id"; +	case BPF_TYPE_EXISTS: return "type_exists"; +	case BPF_TYPE_SIZE: return "type_size"; +	case BPF_ENUMVAL_EXISTS: return "enumval_exists"; +	case BPF_ENUMVAL_VALUE: return "enumval_value"; +	default: return "unknown"; +	} +} + +static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) +{ +	switch (kind) { +	case BPF_FIELD_BYTE_OFFSET: +	case BPF_FIELD_BYTE_SIZE: +	case BPF_FIELD_EXISTS: +	case BPF_FIELD_SIGNED: +	case BPF_FIELD_LSHIFT_U64: +	case BPF_FIELD_RSHIFT_U64: +		return true; +	default: +		return false; +	} +} + +static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) +{ +	switch (kind) { +	case BPF_TYPE_ID_LOCAL: +	case BPF_TYPE_ID_TARGET: +	case BPF_TYPE_EXISTS: +	case BPF_TYPE_SIZE: +		return true; +	default: +		return false; +	} +} + +static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) +{ +	switch (kind) { +	case BPF_ENUMVAL_EXISTS: +	case BPF_ENUMVAL_VALUE: +		return true; +	default: +		return false; +	} +} + +/* + * Turn bpf_core_relo into a low- and high-level spec representation, + * validating correctness along the way, as well as calculating resulting + * field bit offset, specified by accessor string. Low-level spec captures + * every single level of nestedness, including traversing anonymous + * struct/union members. High-level one only captures semantically meaningful + * "turning points": named fields and array indicies. + * E.g., for this case: + * + *   struct sample { + *       int __unimportant; + *       struct { + *           int __1; + *           int __2; + *           int a[7]; + *       }; + *   }; + * + *   struct sample *s = ...; + * + *   int x = &s->a[3]; // access string = '0:1:2:3' + * + * Low-level spec has 1:1 mapping with each element of access string (it's + * just a parsed access string representation): [0, 1, 2, 3]. + * + * High-level spec will capture only 3 points: + *   - intial zero-index access by pointer (&s->... is the same as &s[0]...); + *   - field 'a' access (corresponds to '2' in low-level spec); + *   - array element #3 access (corresponds to '3' in low-level spec). + * + * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, + * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their + * spec and raw_spec are kept empty. + * + * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access + * string to specify enumerator's value index that need to be relocated. + */ +static int bpf_core_parse_spec(const struct btf *btf, +			       __u32 type_id, +			       const char *spec_str, +			       enum bpf_core_relo_kind relo_kind, +			       struct bpf_core_spec *spec) +{ +	int access_idx, parsed_len, i; +	struct bpf_core_accessor *acc; +	const struct btf_type *t; +	const char *name; +	__u32 id; +	__s64 sz; + +	if (str_is_empty(spec_str) || *spec_str == ':') +		return -EINVAL; + +	memset(spec, 0, sizeof(*spec)); +	spec->btf = btf; +	spec->root_type_id = type_id; +	spec->relo_kind = relo_kind; + +	/* type-based relocations don't have a field access string */ +	if (core_relo_is_type_based(relo_kind)) { +		if (strcmp(spec_str, "0")) +			return -EINVAL; +		return 0; +	} + +	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ +	while (*spec_str) { +		if (*spec_str == ':') +			++spec_str; +		if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) +			return -EINVAL; +		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) +			return -E2BIG; +		spec_str += parsed_len; +		spec->raw_spec[spec->raw_len++] = access_idx; +	} + +	if (spec->raw_len == 0) +		return -EINVAL; + +	t = skip_mods_and_typedefs(btf, type_id, &id); +	if (!t) +		return -EINVAL; + +	access_idx = spec->raw_spec[0]; +	acc = &spec->spec[0]; +	acc->type_id = id; +	acc->idx = access_idx; +	spec->len++; + +	if (core_relo_is_enumval_based(relo_kind)) { +		if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) +			return -EINVAL; + +		/* record enumerator name in a first accessor */ +		acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); +		return 0; +	} + +	if (!core_relo_is_field_based(relo_kind)) +		return -EINVAL; + +	sz = btf__resolve_size(btf, id); +	if (sz < 0) +		return sz; +	spec->bit_offset = access_idx * sz * 8; + +	for (i = 1; i < spec->raw_len; i++) { +		t = skip_mods_and_typedefs(btf, id, &id); +		if (!t) +			return -EINVAL; + +		access_idx = spec->raw_spec[i]; +		acc = &spec->spec[spec->len]; + +		if (btf_is_composite(t)) { +			const struct btf_member *m; +			__u32 bit_offset; + +			if (access_idx >= btf_vlen(t)) +				return -EINVAL; + +			bit_offset = btf_member_bit_offset(t, access_idx); +			spec->bit_offset += bit_offset; + +			m = btf_members(t) + access_idx; +			if (m->name_off) { +				name = btf__name_by_offset(btf, m->name_off); +				if (str_is_empty(name)) +					return -EINVAL; + +				acc->type_id = id; +				acc->idx = access_idx; +				acc->name = name; +				spec->len++; +			} + +			id = m->type; +		} else if (btf_is_array(t)) { +			const struct btf_array *a = btf_array(t); +			bool flex; + +			t = skip_mods_and_typedefs(btf, a->type, &id); +			if (!t) +				return -EINVAL; + +			flex = is_flex_arr(btf, acc - 1, a); +			if (!flex && access_idx >= a->nelems) +				return -EINVAL; + +			spec->spec[spec->len].type_id = id; +			spec->spec[spec->len].idx = access_idx; +			spec->len++; + +			sz = btf__resolve_size(btf, id); +			if (sz < 0) +				return sz; +			spec->bit_offset += access_idx * sz * 8; +		} else { +			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", +				type_id, spec_str, i, id, btf_kind_str(t)); +			return -EINVAL; +		} +	} + +	return 0; +} + +/* Check two types for compatibility for the purpose of field access + * relocation. const/volatile/restrict and typedefs are skipped to ensure we + * are relocating semantically compatible entities: + *   - any two STRUCTs/UNIONs are compatible and can be mixed; + *   - any two FWDs are compatible, if their names match (modulo flavor suffix); + *   - any two PTRs are always compatible; + *   - for ENUMs, names should be the same (ignoring flavor suffix) or at + *     least one of enums should be anonymous; + *   - for ENUMs, check sizes, names are ignored; + *   - for INT, size and signedness are ignored; + *   - any two FLOATs are always compatible; + *   - for ARRAY, dimensionality is ignored, element types are checked for + *     compatibility recursively; + *   - everything else shouldn't be ever a target of relocation. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ +static int bpf_core_fields_are_compat(const struct btf *local_btf, +				      __u32 local_id, +				      const struct btf *targ_btf, +				      __u32 targ_id) +{ +	const struct btf_type *local_type, *targ_type; + +recur: +	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); +	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); +	if (!local_type || !targ_type) +		return -EINVAL; + +	if (btf_is_composite(local_type) && btf_is_composite(targ_type)) +		return 1; +	if (btf_kind(local_type) != btf_kind(targ_type)) +		return 0; + +	switch (btf_kind(local_type)) { +	case BTF_KIND_PTR: +	case BTF_KIND_FLOAT: +		return 1; +	case BTF_KIND_FWD: +	case BTF_KIND_ENUM: { +		const char *local_name, *targ_name; +		size_t local_len, targ_len; + +		local_name = btf__name_by_offset(local_btf, +						 local_type->name_off); +		targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); +		local_len = bpf_core_essential_name_len(local_name); +		targ_len = bpf_core_essential_name_len(targ_name); +		/* one of them is anonymous or both w/ same flavor-less names */ +		return local_len == 0 || targ_len == 0 || +		       (local_len == targ_len && +			strncmp(local_name, targ_name, local_len) == 0); +	} +	case BTF_KIND_INT: +		/* just reject deprecated bitfield-like integers; all other +		 * integers are by default compatible between each other +		 */ +		return btf_int_offset(local_type) == 0 && +		       btf_int_offset(targ_type) == 0; +	case BTF_KIND_ARRAY: +		local_id = btf_array(local_type)->type; +		targ_id = btf_array(targ_type)->type; +		goto recur; +	default: +		pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", +			btf_kind(local_type), local_id, targ_id); +		return 0; +	} +} + +/* + * Given single high-level named field accessor in local type, find + * corresponding high-level accessor for a target type. Along the way, + * maintain low-level spec for target as well. Also keep updating target + * bit offset. + * + * Searching is performed through recursive exhaustive enumeration of all + * fields of a struct/union. If there are any anonymous (embedded) + * structs/unions, they are recursively searched as well. If field with + * desired name is found, check compatibility between local and target types, + * before returning result. + * + * 1 is returned, if field is found. + * 0 is returned if no compatible field is found. + * <0 is returned on error. + */ +static int bpf_core_match_member(const struct btf *local_btf, +				 const struct bpf_core_accessor *local_acc, +				 const struct btf *targ_btf, +				 __u32 targ_id, +				 struct bpf_core_spec *spec, +				 __u32 *next_targ_id) +{ +	const struct btf_type *local_type, *targ_type; +	const struct btf_member *local_member, *m; +	const char *local_name, *targ_name; +	__u32 local_id; +	int i, n, found; + +	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); +	if (!targ_type) +		return -EINVAL; +	if (!btf_is_composite(targ_type)) +		return 0; + +	local_id = local_acc->type_id; +	local_type = btf__type_by_id(local_btf, local_id); +	local_member = btf_members(local_type) + local_acc->idx; +	local_name = btf__name_by_offset(local_btf, local_member->name_off); + +	n = btf_vlen(targ_type); +	m = btf_members(targ_type); +	for (i = 0; i < n; i++, m++) { +		__u32 bit_offset; + +		bit_offset = btf_member_bit_offset(targ_type, i); + +		/* too deep struct/union/array nesting */ +		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) +			return -E2BIG; + +		/* speculate this member will be the good one */ +		spec->bit_offset += bit_offset; +		spec->raw_spec[spec->raw_len++] = i; + +		targ_name = btf__name_by_offset(targ_btf, m->name_off); +		if (str_is_empty(targ_name)) { +			/* embedded struct/union, we need to go deeper */ +			found = bpf_core_match_member(local_btf, local_acc, +						      targ_btf, m->type, +						      spec, next_targ_id); +			if (found) /* either found or error */ +				return found; +		} else if (strcmp(local_name, targ_name) == 0) { +			/* matching named field */ +			struct bpf_core_accessor *targ_acc; + +			targ_acc = &spec->spec[spec->len++]; +			targ_acc->type_id = targ_id; +			targ_acc->idx = i; +			targ_acc->name = targ_name; + +			*next_targ_id = m->type; +			found = bpf_core_fields_are_compat(local_btf, +							   local_member->type, +							   targ_btf, m->type); +			if (!found) +				spec->len--; /* pop accessor */ +			return found; +		} +		/* member turned out not to be what we looked for */ +		spec->bit_offset -= bit_offset; +		spec->raw_len--; +	} + +	return 0; +} + +/* + * Try to match local spec to a target type and, if successful, produce full + * target spec (high-level, low-level + bit offset). + */ +static int bpf_core_spec_match(struct bpf_core_spec *local_spec, +			       const struct btf *targ_btf, __u32 targ_id, +			       struct bpf_core_spec *targ_spec) +{ +	const struct btf_type *targ_type; +	const struct bpf_core_accessor *local_acc; +	struct bpf_core_accessor *targ_acc; +	int i, sz, matched; + +	memset(targ_spec, 0, sizeof(*targ_spec)); +	targ_spec->btf = targ_btf; +	targ_spec->root_type_id = targ_id; +	targ_spec->relo_kind = local_spec->relo_kind; + +	if (core_relo_is_type_based(local_spec->relo_kind)) { +		return bpf_core_types_are_compat(local_spec->btf, +						 local_spec->root_type_id, +						 targ_btf, targ_id); +	} + +	local_acc = &local_spec->spec[0]; +	targ_acc = &targ_spec->spec[0]; + +	if (core_relo_is_enumval_based(local_spec->relo_kind)) { +		size_t local_essent_len, targ_essent_len; +		const struct btf_enum *e; +		const char *targ_name; + +		/* has to resolve to an enum */ +		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); +		if (!btf_is_enum(targ_type)) +			return 0; + +		local_essent_len = bpf_core_essential_name_len(local_acc->name); + +		for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { +			targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); +			targ_essent_len = bpf_core_essential_name_len(targ_name); +			if (targ_essent_len != local_essent_len) +				continue; +			if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { +				targ_acc->type_id = targ_id; +				targ_acc->idx = i; +				targ_acc->name = targ_name; +				targ_spec->len++; +				targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; +				targ_spec->raw_len++; +				return 1; +			} +		} +		return 0; +	} + +	if (!core_relo_is_field_based(local_spec->relo_kind)) +		return -EINVAL; + +	for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { +		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, +						   &targ_id); +		if (!targ_type) +			return -EINVAL; + +		if (local_acc->name) { +			matched = bpf_core_match_member(local_spec->btf, +							local_acc, +							targ_btf, targ_id, +							targ_spec, &targ_id); +			if (matched <= 0) +				return matched; +		} else { +			/* for i=0, targ_id is already treated as array element +			 * type (because it's the original struct), for others +			 * we should find array element type first +			 */ +			if (i > 0) { +				const struct btf_array *a; +				bool flex; + +				if (!btf_is_array(targ_type)) +					return 0; + +				a = btf_array(targ_type); +				flex = is_flex_arr(targ_btf, targ_acc - 1, a); +				if (!flex && local_acc->idx >= a->nelems) +					return 0; +				if (!skip_mods_and_typedefs(targ_btf, a->type, +							    &targ_id)) +					return -EINVAL; +			} + +			/* too deep struct/union/array nesting */ +			if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) +				return -E2BIG; + +			targ_acc->type_id = targ_id; +			targ_acc->idx = local_acc->idx; +			targ_acc->name = NULL; +			targ_spec->len++; +			targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; +			targ_spec->raw_len++; + +			sz = btf__resolve_size(targ_btf, targ_id); +			if (sz < 0) +				return sz; +			targ_spec->bit_offset += local_acc->idx * sz * 8; +		} +	} + +	return 1; +} + +static int bpf_core_calc_field_relo(const char *prog_name, +				    const struct bpf_core_relo *relo, +				    const struct bpf_core_spec *spec, +				    __u32 *val, __u32 *field_sz, __u32 *type_id, +				    bool *validate) +{ +	const struct bpf_core_accessor *acc; +	const struct btf_type *t; +	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; +	const struct btf_member *m; +	const struct btf_type *mt; +	bool bitfield; +	__s64 sz; + +	*field_sz = 0; + +	if (relo->kind == BPF_FIELD_EXISTS) { +		*val = spec ? 1 : 0; +		return 0; +	} + +	if (!spec) +		return -EUCLEAN; /* request instruction poisoning */ + +	acc = &spec->spec[spec->len - 1]; +	t = btf__type_by_id(spec->btf, acc->type_id); + +	/* a[n] accessor needs special handling */ +	if (!acc->name) { +		if (relo->kind == BPF_FIELD_BYTE_OFFSET) { +			*val = spec->bit_offset / 8; +			/* remember field size for load/store mem size */ +			sz = btf__resolve_size(spec->btf, acc->type_id); +			if (sz < 0) +				return -EINVAL; +			*field_sz = sz; +			*type_id = acc->type_id; +		} else if (relo->kind == BPF_FIELD_BYTE_SIZE) { +			sz = btf__resolve_size(spec->btf, acc->type_id); +			if (sz < 0) +				return -EINVAL; +			*val = sz; +		} else { +			pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", +				prog_name, relo->kind, relo->insn_off / 8); +			return -EINVAL; +		} +		if (validate) +			*validate = true; +		return 0; +	} + +	m = btf_members(t) + acc->idx; +	mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); +	bit_off = spec->bit_offset; +	bit_sz = btf_member_bitfield_size(t, acc->idx); + +	bitfield = bit_sz > 0; +	if (bitfield) { +		byte_sz = mt->size; +		byte_off = bit_off / 8 / byte_sz * byte_sz; +		/* figure out smallest int size necessary for bitfield load */ +		while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { +			if (byte_sz >= 8) { +				/* bitfield can't be read with 64-bit read */ +				pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", +					prog_name, relo->kind, relo->insn_off / 8); +				return -E2BIG; +			} +			byte_sz *= 2; +			byte_off = bit_off / 8 / byte_sz * byte_sz; +		} +	} else { +		sz = btf__resolve_size(spec->btf, field_type_id); +		if (sz < 0) +			return -EINVAL; +		byte_sz = sz; +		byte_off = spec->bit_offset / 8; +		bit_sz = byte_sz * 8; +	} + +	/* for bitfields, all the relocatable aspects are ambiguous and we +	 * might disagree with compiler, so turn off validation of expected +	 * value, except for signedness +	 */ +	if (validate) +		*validate = !bitfield; + +	switch (relo->kind) { +	case BPF_FIELD_BYTE_OFFSET: +		*val = byte_off; +		if (!bitfield) { +			*field_sz = byte_sz; +			*type_id = field_type_id; +		} +		break; +	case BPF_FIELD_BYTE_SIZE: +		*val = byte_sz; +		break; +	case BPF_FIELD_SIGNED: +		/* enums will be assumed unsigned */ +		*val = btf_is_enum(mt) || +		       (btf_int_encoding(mt) & BTF_INT_SIGNED); +		if (validate) +			*validate = true; /* signedness is never ambiguous */ +		break; +	case BPF_FIELD_LSHIFT_U64: +#if __BYTE_ORDER == __LITTLE_ENDIAN +		*val = 64 - (bit_off + bit_sz - byte_off  * 8); +#else +		*val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); +#endif +		break; +	case BPF_FIELD_RSHIFT_U64: +		*val = 64 - bit_sz; +		if (validate) +			*validate = true; /* right shift is never ambiguous */ +		break; +	case BPF_FIELD_EXISTS: +	default: +		return -EOPNOTSUPP; +	} + +	return 0; +} + +static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, +				   const struct bpf_core_spec *spec, +				   __u32 *val) +{ +	__s64 sz; + +	/* type-based relos return zero when target type is not found */ +	if (!spec) { +		*val = 0; +		return 0; +	} + +	switch (relo->kind) { +	case BPF_TYPE_ID_TARGET: +		*val = spec->root_type_id; +		break; +	case BPF_TYPE_EXISTS: +		*val = 1; +		break; +	case BPF_TYPE_SIZE: +		sz = btf__resolve_size(spec->btf, spec->root_type_id); +		if (sz < 0) +			return -EINVAL; +		*val = sz; +		break; +	case BPF_TYPE_ID_LOCAL: +	/* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ +	default: +		return -EOPNOTSUPP; +	} + +	return 0; +} + +static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, +				      const struct bpf_core_spec *spec, +				      __u32 *val) +{ +	const struct btf_type *t; +	const struct btf_enum *e; + +	switch (relo->kind) { +	case BPF_ENUMVAL_EXISTS: +		*val = spec ? 1 : 0; +		break; +	case BPF_ENUMVAL_VALUE: +		if (!spec) +			return -EUCLEAN; /* request instruction poisoning */ +		t = btf__type_by_id(spec->btf, spec->spec[0].type_id); +		e = btf_enum(t) + spec->spec[0].idx; +		*val = e->val; +		break; +	default: +		return -EOPNOTSUPP; +	} + +	return 0; +} + +struct bpf_core_relo_res +{ +	/* expected value in the instruction, unless validate == false */ +	__u32 orig_val; +	/* new value that needs to be patched up to */ +	__u32 new_val; +	/* relocation unsuccessful, poison instruction, but don't fail load */ +	bool poison; +	/* some relocations can't be validated against orig_val */ +	bool validate; +	/* for field byte offset relocations or the forms: +	 *     *(T *)(rX + <off>) = rY +	 *     rX = *(T *)(rY + <off>), +	 * we remember original and resolved field size to adjust direct +	 * memory loads of pointers and integers; this is necessary for 32-bit +	 * host kernel architectures, but also allows to automatically +	 * relocate fields that were resized from, e.g., u32 to u64, etc. +	 */ +	bool fail_memsz_adjust; +	__u32 orig_sz; +	__u32 orig_type_id; +	__u32 new_sz; +	__u32 new_type_id; +}; + +/* Calculate original and target relocation values, given local and target + * specs and relocation kind. These values are calculated for each candidate. + * If there are multiple candidates, resulting values should all be consistent + * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. + * If instruction has to be poisoned, *poison will be set to true. + */ +static int bpf_core_calc_relo(const char *prog_name, +			      const struct bpf_core_relo *relo, +			      int relo_idx, +			      const struct bpf_core_spec *local_spec, +			      const struct bpf_core_spec *targ_spec, +			      struct bpf_core_relo_res *res) +{ +	int err = -EOPNOTSUPP; + +	res->orig_val = 0; +	res->new_val = 0; +	res->poison = false; +	res->validate = true; +	res->fail_memsz_adjust = false; +	res->orig_sz = res->new_sz = 0; +	res->orig_type_id = res->new_type_id = 0; + +	if (core_relo_is_field_based(relo->kind)) { +		err = bpf_core_calc_field_relo(prog_name, relo, local_spec, +					       &res->orig_val, &res->orig_sz, +					       &res->orig_type_id, &res->validate); +		err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec, +						      &res->new_val, &res->new_sz, +						      &res->new_type_id, NULL); +		if (err) +			goto done; +		/* Validate if it's safe to adjust load/store memory size. +		 * Adjustments are performed only if original and new memory +		 * sizes differ. +		 */ +		res->fail_memsz_adjust = false; +		if (res->orig_sz != res->new_sz) { +			const struct btf_type *orig_t, *new_t; + +			orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); +			new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); + +			/* There are two use cases in which it's safe to +			 * adjust load/store's mem size: +			 *   - reading a 32-bit kernel pointer, while on BPF +			 *   size pointers are always 64-bit; in this case +			 *   it's safe to "downsize" instruction size due to +			 *   pointer being treated as unsigned integer with +			 *   zero-extended upper 32-bits; +			 *   - reading unsigned integers, again due to +			 *   zero-extension is preserving the value correctly. +			 * +			 * In all other cases it's incorrect to attempt to +			 * load/store field because read value will be +			 * incorrect, so we poison relocated instruction. +			 */ +			if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) +				goto done; +			if (btf_is_int(orig_t) && btf_is_int(new_t) && +			    btf_int_encoding(orig_t) != BTF_INT_SIGNED && +			    btf_int_encoding(new_t) != BTF_INT_SIGNED) +				goto done; + +			/* mark as invalid mem size adjustment, but this will +			 * only be checked for LDX/STX/ST insns +			 */ +			res->fail_memsz_adjust = true; +		} +	} else if (core_relo_is_type_based(relo->kind)) { +		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); +		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); +	} else if (core_relo_is_enumval_based(relo->kind)) { +		err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); +		err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); +	} + +done: +	if (err == -EUCLEAN) { +		/* EUCLEAN is used to signal instruction poisoning request */ +		res->poison = true; +		err = 0; +	} else if (err == -EOPNOTSUPP) { +		/* EOPNOTSUPP means unknown/unsupported relocation */ +		pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", +			prog_name, relo_idx, core_relo_kind_str(relo->kind), +			relo->kind, relo->insn_off / 8); +	} + +	return err; +} + +/* + * Turn instruction for which CO_RE relocation failed into invalid one with + * distinct signature. + */ +static void bpf_core_poison_insn(const char *prog_name, int relo_idx, +				 int insn_idx, struct bpf_insn *insn) +{ +	pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", +		 prog_name, relo_idx, insn_idx); +	insn->code = BPF_JMP | BPF_CALL; +	insn->dst_reg = 0; +	insn->src_reg = 0; +	insn->off = 0; +	/* if this instruction is reachable (not a dead code), +	 * verifier will complain with the following message: +	 * invalid func unknown#195896080 +	 */ +	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ +} + +static int insn_bpf_size_to_bytes(struct bpf_insn *insn) +{ +	switch (BPF_SIZE(insn->code)) { +	case BPF_DW: return 8; +	case BPF_W: return 4; +	case BPF_H: return 2; +	case BPF_B: return 1; +	default: return -1; +	} +} + +static int insn_bytes_to_bpf_size(__u32 sz) +{ +	switch (sz) { +	case 8: return BPF_DW; +	case 4: return BPF_W; +	case 2: return BPF_H; +	case 1: return BPF_B; +	default: return -1; +	} +} + +/* + * Patch relocatable BPF instruction. + * + * Patched value is determined by relocation kind and target specification. + * For existence relocations target spec will be NULL if field/type is not found. + * Expected insn->imm value is determined using relocation kind and local + * spec, and is checked before patching instruction. If actual insn->imm value + * is wrong, bail out with error. + * + * Currently supported classes of BPF instruction are: + * 1. rX = <imm> (assignment with immediate operand); + * 2. rX += <imm> (arithmetic operations with immediate operand); + * 3. rX = <imm64> (load with 64-bit immediate value); + * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; + * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; + * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. + */ +static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, +			       int insn_idx, const struct bpf_core_relo *relo, +			       int relo_idx, const struct bpf_core_relo_res *res) +{ +	__u32 orig_val, new_val; +	__u8 class; + +	class = BPF_CLASS(insn->code); + +	if (res->poison) { +poison: +		/* poison second part of ldimm64 to avoid confusing error from +		 * verifier about "unknown opcode 00" +		 */ +		if (is_ldimm64_insn(insn)) +			bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1); +		bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn); +		return 0; +	} + +	orig_val = res->orig_val; +	new_val = res->new_val; + +	switch (class) { +	case BPF_ALU: +	case BPF_ALU64: +		if (BPF_SRC(insn->code) != BPF_K) +			return -EINVAL; +		if (res->validate && insn->imm != orig_val) { +			pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", +				prog_name, relo_idx, +				insn_idx, insn->imm, orig_val, new_val); +			return -EINVAL; +		} +		orig_val = insn->imm; +		insn->imm = new_val; +		pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", +			 prog_name, relo_idx, insn_idx, +			 orig_val, new_val); +		break; +	case BPF_LDX: +	case BPF_ST: +	case BPF_STX: +		if (res->validate && insn->off != orig_val) { +			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", +				prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val); +			return -EINVAL; +		} +		if (new_val > SHRT_MAX) { +			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", +				prog_name, relo_idx, insn_idx, new_val); +			return -ERANGE; +		} +		if (res->fail_memsz_adjust) { +			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " +				"Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", +				prog_name, relo_idx, insn_idx); +			goto poison; +		} + +		orig_val = insn->off; +		insn->off = new_val; +		pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", +			 prog_name, relo_idx, insn_idx, orig_val, new_val); + +		if (res->new_sz != res->orig_sz) { +			int insn_bytes_sz, insn_bpf_sz; + +			insn_bytes_sz = insn_bpf_size_to_bytes(insn); +			if (insn_bytes_sz != res->orig_sz) { +				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", +					prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); +				return -EINVAL; +			} + +			insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); +			if (insn_bpf_sz < 0) { +				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", +					prog_name, relo_idx, insn_idx, res->new_sz); +				return -EINVAL; +			} + +			insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); +			pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", +				 prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz); +		} +		break; +	case BPF_LD: { +		__u64 imm; + +		if (!is_ldimm64_insn(insn) || +		    insn[0].src_reg != 0 || insn[0].off != 0 || +		    insn[1].code != 0 || insn[1].dst_reg != 0 || +		    insn[1].src_reg != 0 || insn[1].off != 0) { +			pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", +				prog_name, relo_idx, insn_idx); +			return -EINVAL; +		} + +		imm = insn[0].imm + ((__u64)insn[1].imm << 32); +		if (res->validate && imm != orig_val) { +			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", +				prog_name, relo_idx, +				insn_idx, (unsigned long long)imm, +				orig_val, new_val); +			return -EINVAL; +		} + +		insn[0].imm = new_val; +		insn[1].imm = 0; /* currently only 32-bit values are supported */ +		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", +			 prog_name, relo_idx, insn_idx, +			 (unsigned long long)imm, new_val); +		break; +	} +	default: +		pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", +			prog_name, relo_idx, insn_idx, insn->code, +			insn->src_reg, insn->dst_reg, insn->off, insn->imm); +		return -EINVAL; +	} + +	return 0; +} + +/* Output spec definition in the format: + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b + */ +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) +{ +	const struct btf_type *t; +	const struct btf_enum *e; +	const char *s; +	__u32 type_id; +	int i; + +	type_id = spec->root_type_id; +	t = btf__type_by_id(spec->btf, type_id); +	s = btf__name_by_offset(spec->btf, t->name_off); + +	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); + +	if (core_relo_is_type_based(spec->relo_kind)) +		return; + +	if (core_relo_is_enumval_based(spec->relo_kind)) { +		t = skip_mods_and_typedefs(spec->btf, type_id, NULL); +		e = btf_enum(t) + spec->raw_spec[0]; +		s = btf__name_by_offset(spec->btf, e->name_off); + +		libbpf_print(level, "::%s = %u", s, e->val); +		return; +	} + +	if (core_relo_is_field_based(spec->relo_kind)) { +		for (i = 0; i < spec->len; i++) { +			if (spec->spec[i].name) +				libbpf_print(level, ".%s", spec->spec[i].name); +			else if (i > 0 || spec->spec[i].idx > 0) +				libbpf_print(level, "[%u]", spec->spec[i].idx); +		} + +		libbpf_print(level, " ("); +		for (i = 0; i < spec->raw_len; i++) +			libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + +		if (spec->bit_offset % 8) +			libbpf_print(level, " @ offset %u.%u)", +				     spec->bit_offset / 8, spec->bit_offset % 8); +		else +			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); +		return; +	} +} + +/* + * CO-RE relocate single instruction. + * + * The outline and important points of the algorithm: + * 1. For given local type, find corresponding candidate target types. + *    Candidate type is a type with the same "essential" name, ignoring + *    everything after last triple underscore (___). E.g., `sample`, + *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates + *    for each other. Names with triple underscore are referred to as + *    "flavors" and are useful, among other things, to allow to + *    specify/support incompatible variations of the same kernel struct, which + *    might differ between different kernel versions and/or build + *    configurations. + * + *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C + *    converter, when deduplicated BTF of a kernel still contains more than + *    one different types with the same name. In that case, ___2, ___3, etc + *    are appended starting from second name conflict. But start flavors are + *    also useful to be defined "locally", in BPF program, to extract same + *    data from incompatible changes between different kernel + *    versions/configurations. For instance, to handle field renames between + *    kernel versions, one can use two flavors of the struct name with the + *    same common name and use conditional relocations to extract that field, + *    depending on target kernel version. + * 2. For each candidate type, try to match local specification to this + *    candidate target type. Matching involves finding corresponding + *    high-level spec accessors, meaning that all named fields should match, + *    as well as all array accesses should be within the actual bounds. Also, + *    types should be compatible (see bpf_core_fields_are_compat for details). + * 3. It is supported and expected that there might be multiple flavors + *    matching the spec. As long as all the specs resolve to the same set of + *    offsets across all candidates, there is no error. If there is any + *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate + *    imprefection of BTF deduplication, which can cause slight duplication of + *    the same BTF type, if some directly or indirectly referenced (by + *    pointer) type gets resolved to different actual types in different + *    object files. If such situation occurs, deduplicated BTF will end up + *    with two (or more) structurally identical types, which differ only in + *    types they refer to through pointer. This should be OK in most cases and + *    is not an error. + * 4. Candidate types search is performed by linearly scanning through all + *    types in target BTF. It is anticipated that this is overall more + *    efficient memory-wise and not significantly worse (if not better) + *    CPU-wise compared to prebuilding a map from all local type names to + *    a list of candidate type names. It's also sped up by caching resolved + *    list of matching candidates per each local "root" type ID, that has at + *    least one bpf_core_relo associated with it. This list is shared + *    between multiple relocations for the same type ID and is updated as some + *    of the candidates are pruned due to structural incompatibility. + */ +int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, +			     int insn_idx, +			     const struct bpf_core_relo *relo, +			     int relo_idx, +			     const struct btf *local_btf, +			     struct bpf_core_cand_list *cands) +{ +	struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; +	struct bpf_core_relo_res cand_res, targ_res; +	const struct btf_type *local_type; +	const char *local_name; +	__u32 local_id; +	const char *spec_str; +	int i, j, err; + +	local_id = relo->type_id; +	local_type = btf__type_by_id(local_btf, local_id); +	if (!local_type) +		return -EINVAL; + +	local_name = btf__name_by_offset(local_btf, local_type->name_off); +	if (!local_name) +		return -EINVAL; + +	spec_str = btf__name_by_offset(local_btf, relo->access_str_off); +	if (str_is_empty(spec_str)) +		return -EINVAL; + +	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); +	if (err) { +		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", +			prog_name, relo_idx, local_id, btf_kind_str(local_type), +			str_is_empty(local_name) ? "<anon>" : local_name, +			spec_str, err); +		return -EINVAL; +	} + +	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, +		 relo_idx, core_relo_kind_str(relo->kind), relo->kind); +	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); +	libbpf_print(LIBBPF_DEBUG, "\n"); + +	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ +	if (relo->kind == BPF_TYPE_ID_LOCAL) { +		targ_res.validate = true; +		targ_res.poison = false; +		targ_res.orig_val = local_spec.root_type_id; +		targ_res.new_val = local_spec.root_type_id; +		goto patch_insn; +	} + +	/* libbpf doesn't support candidate search for anonymous types */ +	if (str_is_empty(spec_str)) { +		pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", +			prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); +		return -EOPNOTSUPP; +	} + + +	for (i = 0, j = 0; i < cands->len; i++) { +		err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, +					  cands->cands[i].id, &cand_spec); +		if (err < 0) { +			pr_warn("prog '%s': relo #%d: error matching candidate #%d ", +				prog_name, relo_idx, i); +			bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); +			libbpf_print(LIBBPF_WARN, ": %d\n", err); +			return err; +		} + +		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, +			 relo_idx, err == 0 ? "non-matching" : "matching", i); +		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); +		libbpf_print(LIBBPF_DEBUG, "\n"); + +		if (err == 0) +			continue; + +		err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); +		if (err) +			return err; + +		if (j == 0) { +			targ_res = cand_res; +			targ_spec = cand_spec; +		} else if (cand_spec.bit_offset != targ_spec.bit_offset) { +			/* if there are many field relo candidates, they +			 * should all resolve to the same bit offset +			 */ +			pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", +				prog_name, relo_idx, cand_spec.bit_offset, +				targ_spec.bit_offset); +			return -EINVAL; +		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { +			/* all candidates should result in the same relocation +			 * decision and value, otherwise it's dangerous to +			 * proceed due to ambiguity +			 */ +			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", +				prog_name, relo_idx, +				cand_res.poison ? "failure" : "success", cand_res.new_val, +				targ_res.poison ? "failure" : "success", targ_res.new_val); +			return -EINVAL; +		} + +		cands->cands[j++] = cands->cands[i]; +	} + +	/* +	 * For BPF_FIELD_EXISTS relo or when used BPF program has field +	 * existence checks or kernel version/config checks, it's expected +	 * that we might not find any candidates. In this case, if field +	 * wasn't found in any candidate, the list of candidates shouldn't +	 * change at all, we'll just handle relocating appropriately, +	 * depending on relo's kind. +	 */ +	if (j > 0) +		cands->len = j; + +	/* +	 * If no candidates were found, it might be both a programmer error, +	 * as well as expected case, depending whether instruction w/ +	 * relocation is guarded in some way that makes it unreachable (dead +	 * code) if relocation can't be resolved. This is handled in +	 * bpf_core_patch_insn() uniformly by replacing that instruction with +	 * BPF helper call insn (using invalid helper ID). If that instruction +	 * is indeed unreachable, then it will be ignored and eliminated by +	 * verifier. If it was an error, then verifier will complain and point +	 * to a specific instruction number in its log. +	 */ +	if (j == 0) { +		pr_debug("prog '%s': relo #%d: no matching targets found\n", +			 prog_name, relo_idx); + +		/* calculate single target relo result explicitly */ +		err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); +		if (err) +			return err; +	} + +patch_insn: +	/* bpf_core_patch_insn() should know how to handle missing targ_spec */ +	err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res); +	if (err) { +		pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", +			prog_name, relo_idx, relo->insn_off / 8, err); +		return -EINVAL; +	} + +	return 0; +} diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h new file mode 100644 index 000000000000..3b9f8f18346c --- /dev/null +++ b/tools/lib/bpf/relo_core.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2019 Facebook */ + +#ifndef __RELO_CORE_H +#define __RELO_CORE_H + +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. + */ +enum bpf_core_relo_kind { +	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */ +	BPF_FIELD_BYTE_SIZE = 1,	/* field size in bytes */ +	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */ +	BPF_FIELD_SIGNED = 3,		/* field signedness (0 - unsigned, 1 - signed) */ +	BPF_FIELD_LSHIFT_U64 = 4,	/* bitfield-specific left bitshift */ +	BPF_FIELD_RSHIFT_U64 = 5,	/* bitfield-specific right bitshift */ +	BPF_TYPE_ID_LOCAL = 6,		/* type ID in local BPF object */ +	BPF_TYPE_ID_TARGET = 7,		/* type ID in target kernel */ +	BPF_TYPE_EXISTS = 8,		/* type existence in target kernel */ +	BPF_TYPE_SIZE = 9,		/* type size in bytes */ +	BPF_ENUMVAL_EXISTS = 10,	/* enum value existence in target kernel */ +	BPF_ENUMVAL_VALUE = 11,		/* enum value integer value */ +}; + +/* The minimum bpf_core_relo checked by the loader + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + *   its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + *   type or field; + * - access_str_off - offset into corresponding .BTF string section. String + *   interpretation depends on specific relocation kind: + *     - for field-based relocations, string encodes an accessed field using + *     a sequence of field and array indices, separated by colon (:). It's + *     conceptually very close to LLVM's getelementptr ([0]) instruction's + *     arguments for identifying offset to a field. + *     - for type-based relocations, strings is expected to be just "0"; + *     - for enum value-based relocations, string contains an index of enum + *     value within its enum type; + * + * Example to provide a better feel. + * + *   struct sample { + *       int a; + *       struct { + *           int b[10]; + *       }; + *   }; + * + *   struct sample *s = ...; + *   int x = &s->a;     // encoded as "0:0" (a is field #0) + *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1, + *                      // b is field #0 inside anon struct, accessing elem #5) + *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example  will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + *		  __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { +	__u32   insn_off; +	__u32   type_id; +	__u32   access_str_off; +	enum bpf_core_relo_kind kind; +}; + +struct bpf_core_cand { +	const struct btf *btf; +	const struct btf_type *t; +	const char *name; +	__u32 id; +}; + +/* dynamically sized list of type IDs and its associated struct btf */ +struct bpf_core_cand_list { +	struct bpf_core_cand *cands; +	int len; +}; + +int bpf_core_apply_relo_insn(const char *prog_name, +			     struct bpf_insn *insn, int insn_idx, +			     const struct bpf_core_relo *relo, int relo_idx, +			     const struct btf *local_btf, +			     struct bpf_core_cand_list *cands); +int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, +			      const struct btf *targ_btf, __u32 targ_id); + +size_t bpf_core_essential_name_len(const char *name); +#endif | 
