diff options
Diffstat (limited to 'tools/lib/bpf/libbpf.c')
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 1756 | 
1 files changed, 367 insertions, 1389 deletions
| diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6f5e2757bb3c..88d8825fc6f6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -193,6 +193,8 @@ enum kern_feature_id {  	FEAT_MODULE_BTF,  	/* BTF_KIND_FLOAT support */  	FEAT_BTF_FLOAT, +	/* BPF perf link support */ +	FEAT_PERF_LINK,  	__FEAT_CNT,  }; @@ -498,6 +500,10 @@ struct bpf_object {  	 * it at load time.  	 */  	struct btf *btf_vmlinux; +	/* Path to the custom BTF to be used for BPF CO-RE relocations as an +	 * override for vmlinux BTF. +	 */ +	char *btf_custom_path;  	/* vmlinux BTF override for CO-RE relocations */  	struct btf *btf_vmlinux_override;  	/* Lazily initialized kernel module BTFs */ @@ -591,11 +597,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)  	       insn->off == 0;  } -static bool is_ldimm64_insn(struct bpf_insn *insn) -{ -	return insn->code == (BPF_LD | BPF_IMM | BPF_DW); -} -  static bool is_call_insn(const struct bpf_insn *insn)  {  	return insn->code == (BPF_JMP | BPF_CALL); @@ -2645,8 +2646,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)  	struct bpf_program *prog;  	int i; -	/* CO-RE relocations need kernel BTF */ -	if (obj->btf_ext && obj->btf_ext->core_relo_info.len) +	/* CO-RE relocations need kernel BTF, only when btf_custom_path +	 * is not specified +	 */ +	if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)  		return true;  	/* Support for typed ksyms needs kernel BTF */ @@ -2679,7 +2682,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)  	if (!force && !obj_needs_vmlinux_btf(obj))  		return 0; -	obj->btf_vmlinux = libbpf_find_kernel_btf(); +	obj->btf_vmlinux = btf__load_vmlinux_btf();  	err = libbpf_get_error(obj->btf_vmlinux);  	if (err) {  		pr_warn("Error loading vmlinux BTF: %d\n", err); @@ -2768,7 +2771,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)  		 */  		btf__set_fd(kern_btf, 0);  	} else { -		err = btf__load(kern_btf); +		err = btf__load_into_kernel(kern_btf);  	}  	if (sanitize) {  		if (!err) { @@ -3894,6 +3897,42 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)  	return 0;  } +static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) +{ +	char file[PATH_MAX], buff[4096]; +	FILE *fp; +	__u32 val; +	int err; + +	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); +	memset(info, 0, sizeof(*info)); + +	fp = fopen(file, "r"); +	if (!fp) { +		err = -errno; +		pr_warn("failed to open %s: %d. No procfs support?\n", file, +			err); +		return err; +	} + +	while (fgets(buff, sizeof(buff), fp)) { +		if (sscanf(buff, "map_type:\t%u", &val) == 1) +			info->type = val; +		else if (sscanf(buff, "key_size:\t%u", &val) == 1) +			info->key_size = val; +		else if (sscanf(buff, "value_size:\t%u", &val) == 1) +			info->value_size = val; +		else if (sscanf(buff, "max_entries:\t%u", &val) == 1) +			info->max_entries = val; +		else if (sscanf(buff, "map_flags:\t%i", &val) == 1) +			info->map_flags = val; +	} + +	fclose(fp); + +	return 0; +} +  int bpf_map__reuse_fd(struct bpf_map *map, int fd)  {  	struct bpf_map_info info = {}; @@ -3902,6 +3941,8 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)  	char *new_name;  	err = bpf_obj_get_info_by_fd(fd, &info, &len); +	if (err && errno == EINVAL) +		err = bpf_get_map_info_from_fdinfo(fd, &info);  	if (err)  		return libbpf_err(err); @@ -4298,6 +4339,37 @@ static int probe_module_btf(void)  	return !err;  } +static int probe_perf_link(void) +{ +	struct bpf_load_program_attr attr; +	struct bpf_insn insns[] = { +		BPF_MOV64_IMM(BPF_REG_0, 0), +		BPF_EXIT_INSN(), +	}; +	int prog_fd, link_fd, err; + +	memset(&attr, 0, sizeof(attr)); +	attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; +	attr.insns = insns; +	attr.insns_cnt = ARRAY_SIZE(insns); +	attr.license = "GPL"; +	prog_fd = bpf_load_program_xattr(&attr, NULL, 0); +	if (prog_fd < 0) +		return -errno; + +	/* use invalid perf_event FD to get EBADF, if link is supported; +	 * otherwise EINVAL should be returned +	 */ +	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); +	err = -errno; /* close() can clobber errno */ + +	if (link_fd >= 0) +		close(link_fd); +	close(prog_fd); + +	return link_fd < 0 && err == -EBADF; +} +  enum kern_feature_result {  	FEAT_UNKNOWN = 0,  	FEAT_SUPPORTED = 1, @@ -4348,6 +4420,9 @@ static struct kern_feature_desc {  	[FEAT_BTF_FLOAT] = {  		"BTF_KIND_FLOAT support", probe_kern_btf_float,  	}, +	[FEAT_PERF_LINK] = { +		"BPF perf link support", probe_perf_link, +	},  };  static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4381,12 +4456,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)  	struct bpf_map_info map_info = {};  	char msg[STRERR_BUFSIZE];  	__u32 map_info_len; +	int err;  	map_info_len = sizeof(map_info); -	if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { -		pr_warn("failed to get map info for map FD %d: %s\n", -			map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); +	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); +	if (err && errno == EINVAL) +		err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); +	if (err) { +		pr_warn("failed to get map info for map FD %d: %s\n", map_fd, +			libbpf_strerror_r(errno, msg, sizeof(msg)));  		return false;  	} @@ -4479,6 +4558,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  {  	struct bpf_create_map_attr create_attr;  	struct bpf_map_def *def = &map->def; +	int err = 0;  	memset(&create_attr, 0, sizeof(create_attr)); @@ -4521,8 +4601,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  	if (bpf_map_type__is_map_in_map(def->type)) {  		if (map->inner_map) { -			int err; -  			err = bpf_object__create_map(obj, map->inner_map, true);  			if (err) {  				pr_warn("map '%s': failed to create inner map: %d\n", @@ -4547,8 +4625,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  	if (map->fd < 0 && (create_attr.btf_key_type_id ||  			    create_attr.btf_value_type_id)) {  		char *cp, errmsg[STRERR_BUFSIZE]; -		int err = -errno; +		err = -errno;  		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));  		pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",  			map->name, cp, err); @@ -4560,8 +4638,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  		map->fd = bpf_create_map_xattr(&create_attr);  	} -	if (map->fd < 0) -		return -errno; +	err = map->fd < 0 ? -errno : 0;  	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {  		if (obj->gen_loader) @@ -4570,7 +4647,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  		zfree(&map->inner_map);  	} -	return 0; +	return err;  }  static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) @@ -4616,10 +4693,13 @@ bpf_object__create_maps(struct bpf_object *obj)  	char *cp, errmsg[STRERR_BUFSIZE];  	unsigned int i, j;  	int err; +	bool retried;  	for (i = 0; i < obj->nr_maps; i++) {  		map = &obj->maps[i]; +		retried = false; +retry:  		if (map->pin_path) {  			err = bpf_object__reuse_map(map);  			if (err) { @@ -4627,6 +4707,12 @@ bpf_object__create_maps(struct bpf_object *obj)  					map->name);  				goto err_out;  			} +			if (retried && map->fd < 0) { +				pr_warn("map '%s': cannot find pinned map\n", +					map->name); +				err = -ENOENT; +				goto err_out; +			}  		}  		if (map->fd >= 0) { @@ -4660,9 +4746,13 @@ bpf_object__create_maps(struct bpf_object *obj)  		if (map->pin_path && !map->pinned) {  			err = bpf_map__pin(map, NULL);  			if (err) { +				zclose(map->fd); +				if (!retried && err == -EEXIST) { +					retried = true; +					goto retry; +				}  				pr_warn("map '%s': failed to auto-pin at '%s': %d\n",  					map->name, map->pin_path, err); -				zclose(map->fd);  				goto err_out;  			}  		} @@ -4679,279 +4769,6 @@ err_out:  	return err;  } -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { -	__u32 type_id;		/* struct/union type or array element type */ -	__u32 idx;		/* field index or array index */ -	const char *name;	/* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { -	const struct btf *btf; -	/* high-level spec: named fields and array indices only */ -	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; -	/* original unresolved (no skip_mods_or_typedefs) root type ID */ -	__u32 root_type_id; -	/* CO-RE relocation kind */ -	enum bpf_core_relo_kind relo_kind; -	/* high-level spec length */ -	int len; -	/* raw, low-level spec: 1-to-1 with accessor spec string */ -	int raw_spec[BPF_CORE_SPEC_MAX_LEN]; -	/* raw spec length */ -	int raw_len; -	/* field bit offset represented by spec */ -	__u32 bit_offset; -}; - -static bool str_is_empty(const char *s) -{ -	return !s || !s[0]; -} - -static bool is_flex_arr(const struct btf *btf, -			const struct bpf_core_accessor *acc, -			const struct btf_array *arr) -{ -	const struct btf_type *t; - -	/* not a flexible array, if not inside a struct or has non-zero size */ -	if (!acc->name || arr->nelems > 0) -		return false; - -	/* has to be the last member of enclosing struct */ -	t = btf__type_by_id(btf, acc->type_id); -	return acc->idx == btf_vlen(t) - 1; -} - -static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) -{ -	switch (kind) { -	case BPF_FIELD_BYTE_OFFSET: return "byte_off"; -	case BPF_FIELD_BYTE_SIZE: return "byte_sz"; -	case BPF_FIELD_EXISTS: return "field_exists"; -	case BPF_FIELD_SIGNED: return "signed"; -	case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; -	case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; -	case BPF_TYPE_ID_LOCAL: return "local_type_id"; -	case BPF_TYPE_ID_TARGET: return "target_type_id"; -	case BPF_TYPE_EXISTS: return "type_exists"; -	case BPF_TYPE_SIZE: return "type_size"; -	case BPF_ENUMVAL_EXISTS: return "enumval_exists"; -	case BPF_ENUMVAL_VALUE: return "enumval_value"; -	default: return "unknown"; -	} -} - -static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) -{ -	switch (kind) { -	case BPF_FIELD_BYTE_OFFSET: -	case BPF_FIELD_BYTE_SIZE: -	case BPF_FIELD_EXISTS: -	case BPF_FIELD_SIGNED: -	case BPF_FIELD_LSHIFT_U64: -	case BPF_FIELD_RSHIFT_U64: -		return true; -	default: -		return false; -	} -} - -static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) -{ -	switch (kind) { -	case BPF_TYPE_ID_LOCAL: -	case BPF_TYPE_ID_TARGET: -	case BPF_TYPE_EXISTS: -	case BPF_TYPE_SIZE: -		return true; -	default: -		return false; -	} -} - -static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) -{ -	switch (kind) { -	case BPF_ENUMVAL_EXISTS: -	case BPF_ENUMVAL_VALUE: -		return true; -	default: -		return false; -	} -} - -/* - * Turn bpf_core_relo into a low- and high-level spec representation, - * validating correctness along the way, as well as calculating resulting - * field bit offset, specified by accessor string. Low-level spec captures - * every single level of nestedness, including traversing anonymous - * struct/union members. High-level one only captures semantically meaningful - * "turning points": named fields and array indicies. - * E.g., for this case: - * - *   struct sample { - *       int __unimportant; - *       struct { - *           int __1; - *           int __2; - *           int a[7]; - *       }; - *   }; - * - *   struct sample *s = ...; - * - *   int x = &s->a[3]; // access string = '0:1:2:3' - * - * Low-level spec has 1:1 mapping with each element of access string (it's - * just a parsed access string representation): [0, 1, 2, 3]. - * - * High-level spec will capture only 3 points: - *   - intial zero-index access by pointer (&s->... is the same as &s[0]...); - *   - field 'a' access (corresponds to '2' in low-level spec); - *   - array element #3 access (corresponds to '3' in low-level spec). - * - * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, - * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their - * spec and raw_spec are kept empty. - * - * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access - * string to specify enumerator's value index that need to be relocated. - */ -static int bpf_core_parse_spec(const struct btf *btf, -			       __u32 type_id, -			       const char *spec_str, -			       enum bpf_core_relo_kind relo_kind, -			       struct bpf_core_spec *spec) -{ -	int access_idx, parsed_len, i; -	struct bpf_core_accessor *acc; -	const struct btf_type *t; -	const char *name; -	__u32 id; -	__s64 sz; - -	if (str_is_empty(spec_str) || *spec_str == ':') -		return -EINVAL; - -	memset(spec, 0, sizeof(*spec)); -	spec->btf = btf; -	spec->root_type_id = type_id; -	spec->relo_kind = relo_kind; - -	/* type-based relocations don't have a field access string */ -	if (core_relo_is_type_based(relo_kind)) { -		if (strcmp(spec_str, "0")) -			return -EINVAL; -		return 0; -	} - -	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ -	while (*spec_str) { -		if (*spec_str == ':') -			++spec_str; -		if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) -			return -EINVAL; -		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) -			return -E2BIG; -		spec_str += parsed_len; -		spec->raw_spec[spec->raw_len++] = access_idx; -	} - -	if (spec->raw_len == 0) -		return -EINVAL; - -	t = skip_mods_and_typedefs(btf, type_id, &id); -	if (!t) -		return -EINVAL; - -	access_idx = spec->raw_spec[0]; -	acc = &spec->spec[0]; -	acc->type_id = id; -	acc->idx = access_idx; -	spec->len++; - -	if (core_relo_is_enumval_based(relo_kind)) { -		if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) -			return -EINVAL; - -		/* record enumerator name in a first accessor */ -		acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); -		return 0; -	} - -	if (!core_relo_is_field_based(relo_kind)) -		return -EINVAL; - -	sz = btf__resolve_size(btf, id); -	if (sz < 0) -		return sz; -	spec->bit_offset = access_idx * sz * 8; - -	for (i = 1; i < spec->raw_len; i++) { -		t = skip_mods_and_typedefs(btf, id, &id); -		if (!t) -			return -EINVAL; - -		access_idx = spec->raw_spec[i]; -		acc = &spec->spec[spec->len]; - -		if (btf_is_composite(t)) { -			const struct btf_member *m; -			__u32 bit_offset; - -			if (access_idx >= btf_vlen(t)) -				return -EINVAL; - -			bit_offset = btf_member_bit_offset(t, access_idx); -			spec->bit_offset += bit_offset; - -			m = btf_members(t) + access_idx; -			if (m->name_off) { -				name = btf__name_by_offset(btf, m->name_off); -				if (str_is_empty(name)) -					return -EINVAL; - -				acc->type_id = id; -				acc->idx = access_idx; -				acc->name = name; -				spec->len++; -			} - -			id = m->type; -		} else if (btf_is_array(t)) { -			const struct btf_array *a = btf_array(t); -			bool flex; - -			t = skip_mods_and_typedefs(btf, a->type, &id); -			if (!t) -				return -EINVAL; - -			flex = is_flex_arr(btf, acc - 1, a); -			if (!flex && access_idx >= a->nelems) -				return -EINVAL; - -			spec->spec[spec->len].type_id = id; -			spec->spec[spec->len].idx = access_idx; -			spec->len++; - -			sz = btf__resolve_size(btf, id); -			if (sz < 0) -				return sz; -			spec->bit_offset += access_idx * sz * 8; -		} else { -			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", -				type_id, spec_str, i, id, btf_kind_str(t)); -			return -EINVAL; -		} -	} - -	return 0; -} -  static bool bpf_core_is_flavor_sep(const char *s)  {  	/* check X___Y name pattern, where X and Y are not underscores */ @@ -4964,7 +4781,7 @@ static bool bpf_core_is_flavor_sep(const char *s)   * before last triple underscore. Struct name part after last triple   * underscore is ignored by BPF CO-RE relocation during relocation matching.   */ -static size_t bpf_core_essential_name_len(const char *name) +size_t bpf_core_essential_name_len(const char *name)  {  	size_t n = strlen(name);  	int i; @@ -4976,34 +4793,20 @@ static size_t bpf_core_essential_name_len(const char *name)  	return n;  } -struct core_cand -{ -	const struct btf *btf; -	const struct btf_type *t; -	const char *name; -	__u32 id; -}; - -/* dynamically sized list of type IDs and its associated struct btf */ -struct core_cand_list { -	struct core_cand *cands; -	int len; -}; - -static void bpf_core_free_cands(struct core_cand_list *cands) +static void bpf_core_free_cands(struct bpf_core_cand_list *cands)  {  	free(cands->cands);  	free(cands);  } -static int bpf_core_add_cands(struct core_cand *local_cand, +static int bpf_core_add_cands(struct bpf_core_cand *local_cand,  			      size_t local_essent_len,  			      const struct btf *targ_btf,  			      const char *targ_btf_name,  			      int targ_start_id, -			      struct core_cand_list *cands) +			      struct bpf_core_cand_list *cands)  { -	struct core_cand *new_cands, *cand; +	struct bpf_core_cand *new_cands, *cand;  	const struct btf_type *t;  	const char *targ_name;  	size_t targ_essent_len; @@ -5139,11 +4942,11 @@ err_out:  	return 0;  } -static struct core_cand_list * +static struct bpf_core_cand_list *  bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)  { -	struct core_cand local_cand = {}; -	struct core_cand_list *cands; +	struct bpf_core_cand local_cand = {}; +	struct bpf_core_cand_list *cands;  	const struct btf *main_btf;  	size_t local_essent_len;  	int err, i; @@ -5197,165 +5000,6 @@ err_out:  	return ERR_PTR(err);  } -/* Check two types for compatibility for the purpose of field access - * relocation. const/volatile/restrict and typedefs are skipped to ensure we - * are relocating semantically compatible entities: - *   - any two STRUCTs/UNIONs are compatible and can be mixed; - *   - any two FWDs are compatible, if their names match (modulo flavor suffix); - *   - any two PTRs are always compatible; - *   - for ENUMs, names should be the same (ignoring flavor suffix) or at - *     least one of enums should be anonymous; - *   - for ENUMs, check sizes, names are ignored; - *   - for INT, size and signedness are ignored; - *   - any two FLOATs are always compatible; - *   - for ARRAY, dimensionality is ignored, element types are checked for - *     compatibility recursively; - *   - everything else shouldn't be ever a target of relocation. - * These rules are not set in stone and probably will be adjusted as we get - * more experience with using BPF CO-RE relocations. - */ -static int bpf_core_fields_are_compat(const struct btf *local_btf, -				      __u32 local_id, -				      const struct btf *targ_btf, -				      __u32 targ_id) -{ -	const struct btf_type *local_type, *targ_type; - -recur: -	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); -	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); -	if (!local_type || !targ_type) -		return -EINVAL; - -	if (btf_is_composite(local_type) && btf_is_composite(targ_type)) -		return 1; -	if (btf_kind(local_type) != btf_kind(targ_type)) -		return 0; - -	switch (btf_kind(local_type)) { -	case BTF_KIND_PTR: -	case BTF_KIND_FLOAT: -		return 1; -	case BTF_KIND_FWD: -	case BTF_KIND_ENUM: { -		const char *local_name, *targ_name; -		size_t local_len, targ_len; - -		local_name = btf__name_by_offset(local_btf, -						 local_type->name_off); -		targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); -		local_len = bpf_core_essential_name_len(local_name); -		targ_len = bpf_core_essential_name_len(targ_name); -		/* one of them is anonymous or both w/ same flavor-less names */ -		return local_len == 0 || targ_len == 0 || -		       (local_len == targ_len && -			strncmp(local_name, targ_name, local_len) == 0); -	} -	case BTF_KIND_INT: -		/* just reject deprecated bitfield-like integers; all other -		 * integers are by default compatible between each other -		 */ -		return btf_int_offset(local_type) == 0 && -		       btf_int_offset(targ_type) == 0; -	case BTF_KIND_ARRAY: -		local_id = btf_array(local_type)->type; -		targ_id = btf_array(targ_type)->type; -		goto recur; -	default: -		pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", -			btf_kind(local_type), local_id, targ_id); -		return 0; -	} -} - -/* - * Given single high-level named field accessor in local type, find - * corresponding high-level accessor for a target type. Along the way, - * maintain low-level spec for target as well. Also keep updating target - * bit offset. - * - * Searching is performed through recursive exhaustive enumeration of all - * fields of a struct/union. If there are any anonymous (embedded) - * structs/unions, they are recursively searched as well. If field with - * desired name is found, check compatibility between local and target types, - * before returning result. - * - * 1 is returned, if field is found. - * 0 is returned if no compatible field is found. - * <0 is returned on error. - */ -static int bpf_core_match_member(const struct btf *local_btf, -				 const struct bpf_core_accessor *local_acc, -				 const struct btf *targ_btf, -				 __u32 targ_id, -				 struct bpf_core_spec *spec, -				 __u32 *next_targ_id) -{ -	const struct btf_type *local_type, *targ_type; -	const struct btf_member *local_member, *m; -	const char *local_name, *targ_name; -	__u32 local_id; -	int i, n, found; - -	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); -	if (!targ_type) -		return -EINVAL; -	if (!btf_is_composite(targ_type)) -		return 0; - -	local_id = local_acc->type_id; -	local_type = btf__type_by_id(local_btf, local_id); -	local_member = btf_members(local_type) + local_acc->idx; -	local_name = btf__name_by_offset(local_btf, local_member->name_off); - -	n = btf_vlen(targ_type); -	m = btf_members(targ_type); -	for (i = 0; i < n; i++, m++) { -		__u32 bit_offset; - -		bit_offset = btf_member_bit_offset(targ_type, i); - -		/* too deep struct/union/array nesting */ -		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) -			return -E2BIG; - -		/* speculate this member will be the good one */ -		spec->bit_offset += bit_offset; -		spec->raw_spec[spec->raw_len++] = i; - -		targ_name = btf__name_by_offset(targ_btf, m->name_off); -		if (str_is_empty(targ_name)) { -			/* embedded struct/union, we need to go deeper */ -			found = bpf_core_match_member(local_btf, local_acc, -						      targ_btf, m->type, -						      spec, next_targ_id); -			if (found) /* either found or error */ -				return found; -		} else if (strcmp(local_name, targ_name) == 0) { -			/* matching named field */ -			struct bpf_core_accessor *targ_acc; - -			targ_acc = &spec->spec[spec->len++]; -			targ_acc->type_id = targ_id; -			targ_acc->idx = i; -			targ_acc->name = targ_name; - -			*next_targ_id = m->type; -			found = bpf_core_fields_are_compat(local_btf, -							   local_member->type, -							   targ_btf, m->type); -			if (!found) -				spec->len--; /* pop accessor */ -			return found; -		} -		/* member turned out not to be what we looked for */ -		spec->bit_offset -= bit_offset; -		spec->raw_len--; -	} - -	return 0; -} -  /* Check local and target types for compatibility. This check is used for   * type-based CO-RE relocations and follow slightly different rules than   * field-based relocations. This function assumes that root types were already @@ -5375,8 +5019,8 @@ static int bpf_core_match_member(const struct btf *local_btf,   * These rules are not set in stone and probably will be adjusted as we get   * more experience with using BPF CO-RE relocations.   */ -static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, -				     const struct btf *targ_btf, __u32 targ_id) +int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, +			      const struct btf *targ_btf, __u32 targ_id)  {  	const struct btf_type *local_type, *targ_type;  	int depth = 32; /* max recursion depth */ @@ -5450,671 +5094,6 @@ recur:  	}  } -/* - * Try to match local spec to a target type and, if successful, produce full - * target spec (high-level, low-level + bit offset). - */ -static int bpf_core_spec_match(struct bpf_core_spec *local_spec, -			       const struct btf *targ_btf, __u32 targ_id, -			       struct bpf_core_spec *targ_spec) -{ -	const struct btf_type *targ_type; -	const struct bpf_core_accessor *local_acc; -	struct bpf_core_accessor *targ_acc; -	int i, sz, matched; - -	memset(targ_spec, 0, sizeof(*targ_spec)); -	targ_spec->btf = targ_btf; -	targ_spec->root_type_id = targ_id; -	targ_spec->relo_kind = local_spec->relo_kind; - -	if (core_relo_is_type_based(local_spec->relo_kind)) { -		return bpf_core_types_are_compat(local_spec->btf, -						 local_spec->root_type_id, -						 targ_btf, targ_id); -	} - -	local_acc = &local_spec->spec[0]; -	targ_acc = &targ_spec->spec[0]; - -	if (core_relo_is_enumval_based(local_spec->relo_kind)) { -		size_t local_essent_len, targ_essent_len; -		const struct btf_enum *e; -		const char *targ_name; - -		/* has to resolve to an enum */ -		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); -		if (!btf_is_enum(targ_type)) -			return 0; - -		local_essent_len = bpf_core_essential_name_len(local_acc->name); - -		for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { -			targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); -			targ_essent_len = bpf_core_essential_name_len(targ_name); -			if (targ_essent_len != local_essent_len) -				continue; -			if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { -				targ_acc->type_id = targ_id; -				targ_acc->idx = i; -				targ_acc->name = targ_name; -				targ_spec->len++; -				targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; -				targ_spec->raw_len++; -				return 1; -			} -		} -		return 0; -	} - -	if (!core_relo_is_field_based(local_spec->relo_kind)) -		return -EINVAL; - -	for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { -		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, -						   &targ_id); -		if (!targ_type) -			return -EINVAL; - -		if (local_acc->name) { -			matched = bpf_core_match_member(local_spec->btf, -							local_acc, -							targ_btf, targ_id, -							targ_spec, &targ_id); -			if (matched <= 0) -				return matched; -		} else { -			/* for i=0, targ_id is already treated as array element -			 * type (because it's the original struct), for others -			 * we should find array element type first -			 */ -			if (i > 0) { -				const struct btf_array *a; -				bool flex; - -				if (!btf_is_array(targ_type)) -					return 0; - -				a = btf_array(targ_type); -				flex = is_flex_arr(targ_btf, targ_acc - 1, a); -				if (!flex && local_acc->idx >= a->nelems) -					return 0; -				if (!skip_mods_and_typedefs(targ_btf, a->type, -							    &targ_id)) -					return -EINVAL; -			} - -			/* too deep struct/union/array nesting */ -			if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) -				return -E2BIG; - -			targ_acc->type_id = targ_id; -			targ_acc->idx = local_acc->idx; -			targ_acc->name = NULL; -			targ_spec->len++; -			targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; -			targ_spec->raw_len++; - -			sz = btf__resolve_size(targ_btf, targ_id); -			if (sz < 0) -				return sz; -			targ_spec->bit_offset += local_acc->idx * sz * 8; -		} -	} - -	return 1; -} - -static int bpf_core_calc_field_relo(const struct bpf_program *prog, -				    const struct bpf_core_relo *relo, -				    const struct bpf_core_spec *spec, -				    __u32 *val, __u32 *field_sz, __u32 *type_id, -				    bool *validate) -{ -	const struct bpf_core_accessor *acc; -	const struct btf_type *t; -	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; -	const struct btf_member *m; -	const struct btf_type *mt; -	bool bitfield; -	__s64 sz; - -	*field_sz = 0; - -	if (relo->kind == BPF_FIELD_EXISTS) { -		*val = spec ? 1 : 0; -		return 0; -	} - -	if (!spec) -		return -EUCLEAN; /* request instruction poisoning */ - -	acc = &spec->spec[spec->len - 1]; -	t = btf__type_by_id(spec->btf, acc->type_id); - -	/* a[n] accessor needs special handling */ -	if (!acc->name) { -		if (relo->kind == BPF_FIELD_BYTE_OFFSET) { -			*val = spec->bit_offset / 8; -			/* remember field size for load/store mem size */ -			sz = btf__resolve_size(spec->btf, acc->type_id); -			if (sz < 0) -				return -EINVAL; -			*field_sz = sz; -			*type_id = acc->type_id; -		} else if (relo->kind == BPF_FIELD_BYTE_SIZE) { -			sz = btf__resolve_size(spec->btf, acc->type_id); -			if (sz < 0) -				return -EINVAL; -			*val = sz; -		} else { -			pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", -				prog->name, relo->kind, relo->insn_off / 8); -			return -EINVAL; -		} -		if (validate) -			*validate = true; -		return 0; -	} - -	m = btf_members(t) + acc->idx; -	mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); -	bit_off = spec->bit_offset; -	bit_sz = btf_member_bitfield_size(t, acc->idx); - -	bitfield = bit_sz > 0; -	if (bitfield) { -		byte_sz = mt->size; -		byte_off = bit_off / 8 / byte_sz * byte_sz; -		/* figure out smallest int size necessary for bitfield load */ -		while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { -			if (byte_sz >= 8) { -				/* bitfield can't be read with 64-bit read */ -				pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", -					prog->name, relo->kind, relo->insn_off / 8); -				return -E2BIG; -			} -			byte_sz *= 2; -			byte_off = bit_off / 8 / byte_sz * byte_sz; -		} -	} else { -		sz = btf__resolve_size(spec->btf, field_type_id); -		if (sz < 0) -			return -EINVAL; -		byte_sz = sz; -		byte_off = spec->bit_offset / 8; -		bit_sz = byte_sz * 8; -	} - -	/* for bitfields, all the relocatable aspects are ambiguous and we -	 * might disagree with compiler, so turn off validation of expected -	 * value, except for signedness -	 */ -	if (validate) -		*validate = !bitfield; - -	switch (relo->kind) { -	case BPF_FIELD_BYTE_OFFSET: -		*val = byte_off; -		if (!bitfield) { -			*field_sz = byte_sz; -			*type_id = field_type_id; -		} -		break; -	case BPF_FIELD_BYTE_SIZE: -		*val = byte_sz; -		break; -	case BPF_FIELD_SIGNED: -		/* enums will be assumed unsigned */ -		*val = btf_is_enum(mt) || -		       (btf_int_encoding(mt) & BTF_INT_SIGNED); -		if (validate) -			*validate = true; /* signedness is never ambiguous */ -		break; -	case BPF_FIELD_LSHIFT_U64: -#if __BYTE_ORDER == __LITTLE_ENDIAN -		*val = 64 - (bit_off + bit_sz - byte_off  * 8); -#else -		*val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); -#endif -		break; -	case BPF_FIELD_RSHIFT_U64: -		*val = 64 - bit_sz; -		if (validate) -			*validate = true; /* right shift is never ambiguous */ -		break; -	case BPF_FIELD_EXISTS: -	default: -		return -EOPNOTSUPP; -	} - -	return 0; -} - -static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, -				   const struct bpf_core_spec *spec, -				   __u32 *val) -{ -	__s64 sz; - -	/* type-based relos return zero when target type is not found */ -	if (!spec) { -		*val = 0; -		return 0; -	} - -	switch (relo->kind) { -	case BPF_TYPE_ID_TARGET: -		*val = spec->root_type_id; -		break; -	case BPF_TYPE_EXISTS: -		*val = 1; -		break; -	case BPF_TYPE_SIZE: -		sz = btf__resolve_size(spec->btf, spec->root_type_id); -		if (sz < 0) -			return -EINVAL; -		*val = sz; -		break; -	case BPF_TYPE_ID_LOCAL: -	/* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ -	default: -		return -EOPNOTSUPP; -	} - -	return 0; -} - -static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, -				      const struct bpf_core_spec *spec, -				      __u32 *val) -{ -	const struct btf_type *t; -	const struct btf_enum *e; - -	switch (relo->kind) { -	case BPF_ENUMVAL_EXISTS: -		*val = spec ? 1 : 0; -		break; -	case BPF_ENUMVAL_VALUE: -		if (!spec) -			return -EUCLEAN; /* request instruction poisoning */ -		t = btf__type_by_id(spec->btf, spec->spec[0].type_id); -		e = btf_enum(t) + spec->spec[0].idx; -		*val = e->val; -		break; -	default: -		return -EOPNOTSUPP; -	} - -	return 0; -} - -struct bpf_core_relo_res -{ -	/* expected value in the instruction, unless validate == false */ -	__u32 orig_val; -	/* new value that needs to be patched up to */ -	__u32 new_val; -	/* relocation unsuccessful, poison instruction, but don't fail load */ -	bool poison; -	/* some relocations can't be validated against orig_val */ -	bool validate; -	/* for field byte offset relocations or the forms: -	 *     *(T *)(rX + <off>) = rY -	 *     rX = *(T *)(rY + <off>), -	 * we remember original and resolved field size to adjust direct -	 * memory loads of pointers and integers; this is necessary for 32-bit -	 * host kernel architectures, but also allows to automatically -	 * relocate fields that were resized from, e.g., u32 to u64, etc. -	 */ -	bool fail_memsz_adjust; -	__u32 orig_sz; -	__u32 orig_type_id; -	__u32 new_sz; -	__u32 new_type_id; -}; - -/* Calculate original and target relocation values, given local and target - * specs and relocation kind. These values are calculated for each candidate. - * If there are multiple candidates, resulting values should all be consistent - * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. - * If instruction has to be poisoned, *poison will be set to true. - */ -static int bpf_core_calc_relo(const struct bpf_program *prog, -			      const struct bpf_core_relo *relo, -			      int relo_idx, -			      const struct bpf_core_spec *local_spec, -			      const struct bpf_core_spec *targ_spec, -			      struct bpf_core_relo_res *res) -{ -	int err = -EOPNOTSUPP; - -	res->orig_val = 0; -	res->new_val = 0; -	res->poison = false; -	res->validate = true; -	res->fail_memsz_adjust = false; -	res->orig_sz = res->new_sz = 0; -	res->orig_type_id = res->new_type_id = 0; - -	if (core_relo_is_field_based(relo->kind)) { -		err = bpf_core_calc_field_relo(prog, relo, local_spec, -					       &res->orig_val, &res->orig_sz, -					       &res->orig_type_id, &res->validate); -		err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, -						      &res->new_val, &res->new_sz, -						      &res->new_type_id, NULL); -		if (err) -			goto done; -		/* Validate if it's safe to adjust load/store memory size. -		 * Adjustments are performed only if original and new memory -		 * sizes differ. -		 */ -		res->fail_memsz_adjust = false; -		if (res->orig_sz != res->new_sz) { -			const struct btf_type *orig_t, *new_t; - -			orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); -			new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); - -			/* There are two use cases in which it's safe to -			 * adjust load/store's mem size: -			 *   - reading a 32-bit kernel pointer, while on BPF -			 *   size pointers are always 64-bit; in this case -			 *   it's safe to "downsize" instruction size due to -			 *   pointer being treated as unsigned integer with -			 *   zero-extended upper 32-bits; -			 *   - reading unsigned integers, again due to -			 *   zero-extension is preserving the value correctly. -			 * -			 * In all other cases it's incorrect to attempt to -			 * load/store field because read value will be -			 * incorrect, so we poison relocated instruction. -			 */ -			if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) -				goto done; -			if (btf_is_int(orig_t) && btf_is_int(new_t) && -			    btf_int_encoding(orig_t) != BTF_INT_SIGNED && -			    btf_int_encoding(new_t) != BTF_INT_SIGNED) -				goto done; - -			/* mark as invalid mem size adjustment, but this will -			 * only be checked for LDX/STX/ST insns -			 */ -			res->fail_memsz_adjust = true; -		} -	} else if (core_relo_is_type_based(relo->kind)) { -		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); -		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); -	} else if (core_relo_is_enumval_based(relo->kind)) { -		err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); -		err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); -	} - -done: -	if (err == -EUCLEAN) { -		/* EUCLEAN is used to signal instruction poisoning request */ -		res->poison = true; -		err = 0; -	} else if (err == -EOPNOTSUPP) { -		/* EOPNOTSUPP means unknown/unsupported relocation */ -		pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", -			prog->name, relo_idx, core_relo_kind_str(relo->kind), -			relo->kind, relo->insn_off / 8); -	} - -	return err; -} - -/* - * Turn instruction for which CO_RE relocation failed into invalid one with - * distinct signature. - */ -static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, -				 int insn_idx, struct bpf_insn *insn) -{ -	pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", -		 prog->name, relo_idx, insn_idx); -	insn->code = BPF_JMP | BPF_CALL; -	insn->dst_reg = 0; -	insn->src_reg = 0; -	insn->off = 0; -	/* if this instruction is reachable (not a dead code), -	 * verifier will complain with the following message: -	 * invalid func unknown#195896080 -	 */ -	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ -} - -static int insn_bpf_size_to_bytes(struct bpf_insn *insn) -{ -	switch (BPF_SIZE(insn->code)) { -	case BPF_DW: return 8; -	case BPF_W: return 4; -	case BPF_H: return 2; -	case BPF_B: return 1; -	default: return -1; -	} -} - -static int insn_bytes_to_bpf_size(__u32 sz) -{ -	switch (sz) { -	case 8: return BPF_DW; -	case 4: return BPF_W; -	case 2: return BPF_H; -	case 1: return BPF_B; -	default: return -1; -	} -} - -/* - * Patch relocatable BPF instruction. - * - * Patched value is determined by relocation kind and target specification. - * For existence relocations target spec will be NULL if field/type is not found. - * Expected insn->imm value is determined using relocation kind and local - * spec, and is checked before patching instruction. If actual insn->imm value - * is wrong, bail out with error. - * - * Currently supported classes of BPF instruction are: - * 1. rX = <imm> (assignment with immediate operand); - * 2. rX += <imm> (arithmetic operations with immediate operand); - * 3. rX = <imm64> (load with 64-bit immediate value); - * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; - * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; - * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. - */ -static int bpf_core_patch_insn(struct bpf_program *prog, -			       const struct bpf_core_relo *relo, -			       int relo_idx, -			       const struct bpf_core_relo_res *res) -{ -	__u32 orig_val, new_val; -	struct bpf_insn *insn; -	int insn_idx; -	__u8 class; - -	if (relo->insn_off % BPF_INSN_SZ) -		return -EINVAL; -	insn_idx = relo->insn_off / BPF_INSN_SZ; -	/* adjust insn_idx from section frame of reference to the local -	 * program's frame of reference; (sub-)program code is not yet -	 * relocated, so it's enough to just subtract in-section offset -	 */ -	insn_idx = insn_idx - prog->sec_insn_off; -	insn = &prog->insns[insn_idx]; -	class = BPF_CLASS(insn->code); - -	if (res->poison) { -poison: -		/* poison second part of ldimm64 to avoid confusing error from -		 * verifier about "unknown opcode 00" -		 */ -		if (is_ldimm64_insn(insn)) -			bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1); -		bpf_core_poison_insn(prog, relo_idx, insn_idx, insn); -		return 0; -	} - -	orig_val = res->orig_val; -	new_val = res->new_val; - -	switch (class) { -	case BPF_ALU: -	case BPF_ALU64: -		if (BPF_SRC(insn->code) != BPF_K) -			return -EINVAL; -		if (res->validate && insn->imm != orig_val) { -			pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", -				prog->name, relo_idx, -				insn_idx, insn->imm, orig_val, new_val); -			return -EINVAL; -		} -		orig_val = insn->imm; -		insn->imm = new_val; -		pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", -			 prog->name, relo_idx, insn_idx, -			 orig_val, new_val); -		break; -	case BPF_LDX: -	case BPF_ST: -	case BPF_STX: -		if (res->validate && insn->off != orig_val) { -			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", -				prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val); -			return -EINVAL; -		} -		if (new_val > SHRT_MAX) { -			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", -				prog->name, relo_idx, insn_idx, new_val); -			return -ERANGE; -		} -		if (res->fail_memsz_adjust) { -			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " -				"Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", -				prog->name, relo_idx, insn_idx); -			goto poison; -		} - -		orig_val = insn->off; -		insn->off = new_val; -		pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", -			 prog->name, relo_idx, insn_idx, orig_val, new_val); - -		if (res->new_sz != res->orig_sz) { -			int insn_bytes_sz, insn_bpf_sz; - -			insn_bytes_sz = insn_bpf_size_to_bytes(insn); -			if (insn_bytes_sz != res->orig_sz) { -				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", -					prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); -				return -EINVAL; -			} - -			insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); -			if (insn_bpf_sz < 0) { -				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", -					prog->name, relo_idx, insn_idx, res->new_sz); -				return -EINVAL; -			} - -			insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); -			pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", -				 prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz); -		} -		break; -	case BPF_LD: { -		__u64 imm; - -		if (!is_ldimm64_insn(insn) || -		    insn[0].src_reg != 0 || insn[0].off != 0 || -		    insn_idx + 1 >= prog->insns_cnt || -		    insn[1].code != 0 || insn[1].dst_reg != 0 || -		    insn[1].src_reg != 0 || insn[1].off != 0) { -			pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", -				prog->name, relo_idx, insn_idx); -			return -EINVAL; -		} - -		imm = insn[0].imm + ((__u64)insn[1].imm << 32); -		if (res->validate && imm != orig_val) { -			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", -				prog->name, relo_idx, -				insn_idx, (unsigned long long)imm, -				orig_val, new_val); -			return -EINVAL; -		} - -		insn[0].imm = new_val; -		insn[1].imm = 0; /* currently only 32-bit values are supported */ -		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", -			 prog->name, relo_idx, insn_idx, -			 (unsigned long long)imm, new_val); -		break; -	} -	default: -		pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", -			prog->name, relo_idx, insn_idx, insn->code, -			insn->src_reg, insn->dst_reg, insn->off, insn->imm); -		return -EINVAL; -	} - -	return 0; -} - -/* Output spec definition in the format: - * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, - * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b - */ -static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) -{ -	const struct btf_type *t; -	const struct btf_enum *e; -	const char *s; -	__u32 type_id; -	int i; - -	type_id = spec->root_type_id; -	t = btf__type_by_id(spec->btf, type_id); -	s = btf__name_by_offset(spec->btf, t->name_off); - -	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); - -	if (core_relo_is_type_based(spec->relo_kind)) -		return; - -	if (core_relo_is_enumval_based(spec->relo_kind)) { -		t = skip_mods_and_typedefs(spec->btf, type_id, NULL); -		e = btf_enum(t) + spec->raw_spec[0]; -		s = btf__name_by_offset(spec->btf, e->name_off); - -		libbpf_print(level, "::%s = %u", s, e->val); -		return; -	} - -	if (core_relo_is_field_based(spec->relo_kind)) { -		for (i = 0; i < spec->len; i++) { -			if (spec->spec[i].name) -				libbpf_print(level, ".%s", spec->spec[i].name); -			else if (i > 0 || spec->spec[i].idx > 0) -				libbpf_print(level, "[%u]", spec->spec[i].idx); -		} - -		libbpf_print(level, " ("); -		for (i = 0; i < spec->raw_len; i++) -			libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); - -		if (spec->bit_offset % 8) -			libbpf_print(level, " @ offset %u.%u)", -				     spec->bit_offset / 8, spec->bit_offset % 8); -		else -			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); -		return; -	} -} -  static size_t bpf_core_hash_fn(const void *key, void *ctx)  {  	return (size_t)key; @@ -6130,73 +5109,33 @@ static void *u32_as_hash_key(__u32 x)  	return (void *)(uintptr_t)x;  } -/* - * CO-RE relocate single instruction. - * - * The outline and important points of the algorithm: - * 1. For given local type, find corresponding candidate target types. - *    Candidate type is a type with the same "essential" name, ignoring - *    everything after last triple underscore (___). E.g., `sample`, - *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates - *    for each other. Names with triple underscore are referred to as - *    "flavors" and are useful, among other things, to allow to - *    specify/support incompatible variations of the same kernel struct, which - *    might differ between different kernel versions and/or build - *    configurations. - * - *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C - *    converter, when deduplicated BTF of a kernel still contains more than - *    one different types with the same name. In that case, ___2, ___3, etc - *    are appended starting from second name conflict. But start flavors are - *    also useful to be defined "locally", in BPF program, to extract same - *    data from incompatible changes between different kernel - *    versions/configurations. For instance, to handle field renames between - *    kernel versions, one can use two flavors of the struct name with the - *    same common name and use conditional relocations to extract that field, - *    depending on target kernel version. - * 2. For each candidate type, try to match local specification to this - *    candidate target type. Matching involves finding corresponding - *    high-level spec accessors, meaning that all named fields should match, - *    as well as all array accesses should be within the actual bounds. Also, - *    types should be compatible (see bpf_core_fields_are_compat for details). - * 3. It is supported and expected that there might be multiple flavors - *    matching the spec. As long as all the specs resolve to the same set of - *    offsets across all candidates, there is no error. If there is any - *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate - *    imprefection of BTF deduplication, which can cause slight duplication of - *    the same BTF type, if some directly or indirectly referenced (by - *    pointer) type gets resolved to different actual types in different - *    object files. If such situation occurs, deduplicated BTF will end up - *    with two (or more) structurally identical types, which differ only in - *    types they refer to through pointer. This should be OK in most cases and - *    is not an error. - * 4. Candidate types search is performed by linearly scanning through all - *    types in target BTF. It is anticipated that this is overall more - *    efficient memory-wise and not significantly worse (if not better) - *    CPU-wise compared to prebuilding a map from all local type names to - *    a list of candidate type names. It's also sped up by caching resolved - *    list of matching candidates per each local "root" type ID, that has at - *    least one bpf_core_relo associated with it. This list is shared - *    between multiple relocations for the same type ID and is updated as some - *    of the candidates are pruned due to structural incompatibility. - */  static int bpf_core_apply_relo(struct bpf_program *prog,  			       const struct bpf_core_relo *relo,  			       int relo_idx,  			       const struct btf *local_btf,  			       struct hashmap *cand_cache)  { -	struct bpf_core_spec local_spec, cand_spec, targ_spec = {};  	const void *type_key = u32_as_hash_key(relo->type_id); -	struct bpf_core_relo_res cand_res, targ_res; +	struct bpf_core_cand_list *cands = NULL; +	const char *prog_name = prog->name;  	const struct btf_type *local_type;  	const char *local_name; -	struct core_cand_list *cands = NULL; -	__u32 local_id; -	const char *spec_str; -	int i, j, err; +	__u32 local_id = relo->type_id; +	struct bpf_insn *insn; +	int insn_idx, err; + +	if (relo->insn_off % BPF_INSN_SZ) +		return -EINVAL; +	insn_idx = relo->insn_off / BPF_INSN_SZ; +	/* adjust insn_idx from section frame of reference to the local +	 * program's frame of reference; (sub-)program code is not yet +	 * relocated, so it's enough to just subtract in-section offset +	 */ +	insn_idx = insn_idx - prog->sec_insn_off; +	if (insn_idx > prog->insns_cnt) +		return -EINVAL; +	insn = &prog->insns[insn_idx]; -	local_id = relo->type_id;  	local_type = btf__type_by_id(local_btf, local_id);  	if (!local_type)  		return -EINVAL; @@ -6205,51 +5144,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog,  	if (!local_name)  		return -EINVAL; -	spec_str = btf__name_by_offset(local_btf, relo->access_str_off); -	if (str_is_empty(spec_str)) -		return -EINVAL; -  	if (prog->obj->gen_loader) { -		pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n", +		pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",  			prog - prog->obj->programs, relo->insn_off / 8, -			local_name, spec_str, relo->kind); +			local_name, relo->kind);  		return -ENOTSUP;  	} -	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); -	if (err) { -		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", -			prog->name, relo_idx, local_id, btf_kind_str(local_type), -			str_is_empty(local_name) ? "<anon>" : local_name, -			spec_str, err); -		return -EINVAL; -	} - -	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name, -		 relo_idx, core_relo_kind_str(relo->kind), relo->kind); -	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); -	libbpf_print(LIBBPF_DEBUG, "\n"); -	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ -	if (relo->kind == BPF_TYPE_ID_LOCAL) { -		targ_res.validate = true; -		targ_res.poison = false; -		targ_res.orig_val = local_spec.root_type_id; -		targ_res.new_val = local_spec.root_type_id; -		goto patch_insn; -	} - -	/* libbpf doesn't support candidate search for anonymous types */ -	if (str_is_empty(spec_str)) { -		pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", -			prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); -		return -EOPNOTSUPP; -	} - -	if (!hashmap__find(cand_cache, type_key, (void **)&cands)) { +	if (relo->kind != BPF_TYPE_ID_LOCAL && +	    !hashmap__find(cand_cache, type_key, (void **)&cands)) {  		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);  		if (IS_ERR(cands)) {  			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", -				prog->name, relo_idx, local_id, btf_kind_str(local_type), +				prog_name, relo_idx, local_id, btf_kind_str(local_type),  				local_name, PTR_ERR(cands));  			return PTR_ERR(cands);  		} @@ -6260,97 +5167,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,  		}  	} -	for (i = 0, j = 0; i < cands->len; i++) { -		err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, -					  cands->cands[i].id, &cand_spec); -		if (err < 0) { -			pr_warn("prog '%s': relo #%d: error matching candidate #%d ", -				prog->name, relo_idx, i); -			bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); -			libbpf_print(LIBBPF_WARN, ": %d\n", err); -			return err; -		} - -		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name, -			 relo_idx, err == 0 ? "non-matching" : "matching", i); -		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); -		libbpf_print(LIBBPF_DEBUG, "\n"); - -		if (err == 0) -			continue; - -		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res); -		if (err) -			return err; - -		if (j == 0) { -			targ_res = cand_res; -			targ_spec = cand_spec; -		} else if (cand_spec.bit_offset != targ_spec.bit_offset) { -			/* if there are many field relo candidates, they -			 * should all resolve to the same bit offset -			 */ -			pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", -				prog->name, relo_idx, cand_spec.bit_offset, -				targ_spec.bit_offset); -			return -EINVAL; -		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { -			/* all candidates should result in the same relocation -			 * decision and value, otherwise it's dangerous to -			 * proceed due to ambiguity -			 */ -			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", -				prog->name, relo_idx, -				cand_res.poison ? "failure" : "success", cand_res.new_val, -				targ_res.poison ? "failure" : "success", targ_res.new_val); -			return -EINVAL; -		} - -		cands->cands[j++] = cands->cands[i]; -	} - -	/* -	 * For BPF_FIELD_EXISTS relo or when used BPF program has field -	 * existence checks or kernel version/config checks, it's expected -	 * that we might not find any candidates. In this case, if field -	 * wasn't found in any candidate, the list of candidates shouldn't -	 * change at all, we'll just handle relocating appropriately, -	 * depending on relo's kind. -	 */ -	if (j > 0) -		cands->len = j; - -	/* -	 * If no candidates were found, it might be both a programmer error, -	 * as well as expected case, depending whether instruction w/ -	 * relocation is guarded in some way that makes it unreachable (dead -	 * code) if relocation can't be resolved. This is handled in -	 * bpf_core_patch_insn() uniformly by replacing that instruction with -	 * BPF helper call insn (using invalid helper ID). If that instruction -	 * is indeed unreachable, then it will be ignored and eliminated by -	 * verifier. If it was an error, then verifier will complain and point -	 * to a specific instruction number in its log. -	 */ -	if (j == 0) { -		pr_debug("prog '%s': relo #%d: no matching targets found\n", -			 prog->name, relo_idx); - -		/* calculate single target relo result explicitly */ -		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res); -		if (err) -			return err; -	} - -patch_insn: -	/* bpf_core_patch_insn() should know how to handle missing targ_spec */ -	err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res); -	if (err) { -		pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n", -			prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err); -		return -EINVAL; -	} - -	return 0; +	return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);  }  static int @@ -6496,11 +5313,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)  				}  				insn[1].imm = ext->kcfg.data_off;  			} else /* EXT_KSYM */ { -				if (ext->ksym.type_id) { /* typed ksyms */ +				if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */  					insn[0].src_reg = BPF_PSEUDO_BTF_ID;  					insn[0].imm = ext->ksym.kernel_btf_id;  					insn[1].imm = ext->ksym.kernel_btf_obj_fd; -				} else { /* typeless ksyms */ +				} else { /* typeless ksyms or unresolved typed ksyms */  					insn[0].imm = (__u32)ext->ksym.addr;  					insn[1].imm = ext->ksym.addr >> 32;  				} @@ -7190,7 +6007,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)  	for (i = 0; i < obj->nr_programs; i++) {  		struct bpf_program *p = &obj->programs[i]; -		 +  		if (!p->nr_reloc)  			continue; @@ -7554,7 +6371,7 @@ static struct bpf_object *  __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,  		   const struct bpf_object_open_opts *opts)  { -	const char *obj_name, *kconfig; +	const char *obj_name, *kconfig, *btf_tmp_path;  	struct bpf_program *prog;  	struct bpf_object *obj;  	char tmp_name[64]; @@ -7585,11 +6402,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,  	if (IS_ERR(obj))  		return obj; +	btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); +	if (btf_tmp_path) { +		if (strlen(btf_tmp_path) >= PATH_MAX) { +			err = -ENAMETOOLONG; +			goto out; +		} +		obj->btf_custom_path = strdup(btf_tmp_path); +		if (!obj->btf_custom_path) { +			err = -ENOMEM; +			goto out; +		} +	} +  	kconfig = OPTS_GET(opts, kconfig, NULL);  	if (kconfig) {  		obj->kconfig = strdup(kconfig); -		if (!obj->kconfig) -			return ERR_PTR(-ENOMEM); +		if (!obj->kconfig) { +			err = -ENOMEM; +			goto out; +		}  	}  	err = bpf_object__elf_init(obj); @@ -7812,11 +6644,8 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,  				break;  		}  	} -	if (id <= 0) { -		pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n", -			__btf_kind_str(kind), ksym_name); +	if (id <= 0)  		return -ESRCH; -	}  	*res_btf = btf;  	*res_btf_fd = btf_fd; @@ -7833,8 +6662,13 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,  	struct btf *btf = NULL;  	id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd); -	if (id < 0) +	if (id == -ESRCH && ext->is_weak) { +		return 0; +	} else if (id < 0) { +		pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", +			ext->name);  		return id; +	}  	/* find local type_id */  	local_type_id = ext->ksym.type_id; @@ -8055,7 +6889,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  	err = err ? : bpf_object__sanitize_maps(obj);  	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);  	err = err ? : bpf_object__create_maps(obj); -	err = err ? : bpf_object__relocate(obj, attr->target_btf_path); +	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);  	err = err ? : bpf_object__load_progs(obj, attr->log_level);  	if (obj->gen_loader) { @@ -8450,6 +7284,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map)  	return map->pin_path;  } +const char *bpf_map__pin_path(const struct bpf_map *map) +{ +	return map->pin_path; +} +  bool bpf_map__is_pinned(const struct bpf_map *map)  {  	return map->pinned; @@ -8702,6 +7541,7 @@ void bpf_object__close(struct bpf_object *obj)  	for (i = 0; i < obj->nr_maps; i++)  		bpf_map__destroy(&obj->maps[i]); +	zfree(&obj->btf_custom_path);  	zfree(&obj->kconfig);  	zfree(&obj->externs);  	obj->nr_extern = 0; @@ -9471,7 +8311,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,  	ret = snprintf(btf_type_name, sizeof(btf_type_name),  		       "%s%s", prefix, name);  	/* snprintf returns the number of characters written excluding the -	 * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it +	 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it  	 * indicates truncation.  	 */  	if (ret < 0 || ret >= sizeof(btf_type_name)) @@ -9495,7 +8335,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,  	struct btf *btf;  	int err; -	btf = libbpf_find_kernel_btf(); +	btf = btf__load_vmlinux_btf();  	err = libbpf_get_error(btf);  	if (err) {  		pr_warn("vmlinux BTF is not found\n"); @@ -9514,8 +8354,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)  {  	struct bpf_prog_info_linear *info_linear;  	struct bpf_prog_info *info; -	struct btf *btf = NULL; -	int err = -EINVAL; +	struct btf *btf; +	int err;  	info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);  	err = libbpf_get_error(info_linear); @@ -9524,12 +8364,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)  			attach_prog_fd);  		return err;  	} + +	err = -EINVAL;  	info = &info_linear->info;  	if (!info->btf_id) {  		pr_warn("The target program doesn't have BTF\n");  		goto out;  	} -	if (btf__get_from_id(info->btf_id, &btf)) { +	btf = btf__load_from_kernel_by_id(info->btf_id); +	if (libbpf_get_error(btf)) {  		pr_warn("Failed to get BTF of the program\n");  		goto out;  	} @@ -10003,7 +8846,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  struct bpf_link {  	int (*detach)(struct bpf_link *link); -	int (*destroy)(struct bpf_link *link); +	void (*dealloc)(struct bpf_link *link);  	char *pin_path;		/* NULL, if not pinned */  	int fd;			/* hook FD, -1 if not applicable */  	bool disconnected; @@ -10013,7 +8856,7 @@ struct bpf_link {  int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)  {  	int ret; -	 +  	ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);  	return libbpf_err_errno(ret);  } @@ -10042,11 +8885,12 @@ int bpf_link__destroy(struct bpf_link *link)  	if (!link->disconnected && link->detach)  		err = link->detach(link); -	if (link->destroy) -		link->destroy(link);  	if (link->pin_path)  		free(link->pin_path); -	free(link); +	if (link->dealloc) +		link->dealloc(link); +	else +		free(link);  	return libbpf_err(err);  } @@ -10143,23 +8987,42 @@ int bpf_link__unpin(struct bpf_link *link)  	return 0;  } -static int bpf_link__detach_perf_event(struct bpf_link *link) +struct bpf_link_perf { +	struct bpf_link link; +	int perf_event_fd; +}; + +static int bpf_link_perf_detach(struct bpf_link *link)  { -	int err; +	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); +	int err = 0; -	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0); -	if (err) +	if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)  		err = -errno; +	if (perf_link->perf_event_fd != link->fd) +		close(perf_link->perf_event_fd);  	close(link->fd); +  	return libbpf_err(err);  } -struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) +static void bpf_link_perf_dealloc(struct bpf_link *link) +{ +	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); + +	free(perf_link); +} + +struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd, +						     const struct bpf_perf_event_opts *opts)  {  	char errmsg[STRERR_BUFSIZE]; -	struct bpf_link *link; -	int prog_fd, err; +	struct bpf_link_perf *link; +	int prog_fd, link_fd = -1, err; + +	if (!OPTS_VALID(opts, bpf_perf_event_opts)) +		return libbpf_err_ptr(-EINVAL);  	if (pfd < 0) {  		pr_warn("prog '%s': invalid perf event FD %d\n", @@ -10176,27 +9039,59 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pf  	link = calloc(1, sizeof(*link));  	if (!link)  		return libbpf_err_ptr(-ENOMEM); -	link->detach = &bpf_link__detach_perf_event; -	link->fd = pfd; +	link->link.detach = &bpf_link_perf_detach; +	link->link.dealloc = &bpf_link_perf_dealloc; +	link->perf_event_fd = pfd; -	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { -		err = -errno; -		free(link); -		pr_warn("prog '%s': failed to attach to pfd %d: %s\n", -			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); -		if (err == -EPROTO) -			pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", -				prog->name, pfd); -		return libbpf_err_ptr(err); +	if (kernel_supports(prog->obj, FEAT_PERF_LINK)) { +		DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, +			.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); + +		link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); +		if (link_fd < 0) { +			err = -errno; +			pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", +				prog->name, pfd, +				err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +			goto err_out; +		} +		link->link.fd = link_fd; +	} else { +		if (OPTS_GET(opts, bpf_cookie, 0)) { +			pr_warn("prog '%s': user context value is not supported\n", prog->name); +			err = -EOPNOTSUPP; +			goto err_out; +		} + +		if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { +			err = -errno; +			pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", +				prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +			if (err == -EPROTO) +				pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", +					prog->name, pfd); +			goto err_out; +		} +		link->link.fd = pfd;  	}  	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {  		err = -errno; -		free(link); -		pr_warn("prog '%s': failed to enable pfd %d: %s\n", +		pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",  			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); -		return libbpf_err_ptr(err); +		goto err_out;  	} -	return link; + +	return &link->link; +err_out: +	if (link_fd >= 0) +		close(link_fd); +	free(link); +	return libbpf_err_ptr(err); +} + +struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) +{ +	return bpf_program__attach_perf_event_opts(prog, pfd, NULL);  }  /* @@ -10257,13 +9152,19 @@ static int determine_uprobe_retprobe_bit(void)  	return parse_uint_from_file(file, "config:%d\n");  } +#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 +#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 +  static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, -				 uint64_t offset, int pid) +				 uint64_t offset, int pid, size_t ref_ctr_off)  {  	struct perf_event_attr attr = {};  	char errmsg[STRERR_BUFSIZE];  	int type, pfd, err; +	if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) +		return -EINVAL; +  	type = uprobe ? determine_uprobe_perf_type()  		      : determine_kprobe_perf_type();  	if (type < 0) { @@ -10286,6 +9187,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,  	}  	attr.size = sizeof(attr);  	attr.type = type; +	attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;  	attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */  	attr.config2 = offset;		 /* kprobe_addr or probe_offset */ @@ -10304,23 +9206,34 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,  	return pfd;  } -struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, -					    bool retprobe, -					    const char *func_name) +struct bpf_link * +bpf_program__attach_kprobe_opts(struct bpf_program *prog, +				const char *func_name, +				const struct bpf_kprobe_opts *opts)  { +	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);  	char errmsg[STRERR_BUFSIZE];  	struct bpf_link *link; +	unsigned long offset; +	bool retprobe;  	int pfd, err; +	if (!OPTS_VALID(opts, bpf_kprobe_opts)) +		return libbpf_err_ptr(-EINVAL); + +	retprobe = OPTS_GET(opts, retprobe, false); +	offset = OPTS_GET(opts, offset, 0); +	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); +  	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, -				    0 /* offset */, -1 /* pid */); +				    offset, -1 /* pid */, 0 /* ref_ctr_off */);  	if (pfd < 0) {  		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",  			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return libbpf_err_ptr(pfd);  	} -	link = bpf_program__attach_perf_event(prog, pfd); +	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);  	err = libbpf_get_error(link);  	if (err) {  		close(pfd); @@ -10332,29 +9245,70 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,  	return link;  } +struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, +					    bool retprobe, +					    const char *func_name) +{ +	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, +		.retprobe = retprobe, +	); + +	return bpf_program__attach_kprobe_opts(prog, func_name, &opts); +} +  static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,  				      struct bpf_program *prog)  { +	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); +	unsigned long offset = 0; +	struct bpf_link *link;  	const char *func_name; -	bool retprobe; +	char *func; +	int n, err;  	func_name = prog->sec_name + sec->len; -	retprobe = strcmp(sec->sec, "kretprobe/") == 0; +	opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; + +	n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); +	if (n < 1) { +		err = -EINVAL; +		pr_warn("kprobe name is invalid: %s\n", func_name); +		return libbpf_err_ptr(err); +	} +	if (opts.retprobe && offset != 0) { +		free(func); +		err = -EINVAL; +		pr_warn("kretprobes do not support offset specification\n"); +		return libbpf_err_ptr(err); +	} -	return bpf_program__attach_kprobe(prog, retprobe, func_name); +	opts.offset = offset; +	link = bpf_program__attach_kprobe_opts(prog, func, &opts); +	free(func); +	return link;  } -struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, -					    bool retprobe, pid_t pid, -					    const char *binary_path, -					    size_t func_offset) +LIBBPF_API struct bpf_link * +bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid, +				const char *binary_path, size_t func_offset, +				const struct bpf_uprobe_opts *opts)  { +	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);  	char errmsg[STRERR_BUFSIZE];  	struct bpf_link *link; +	size_t ref_ctr_off;  	int pfd, err; +	bool retprobe; + +	if (!OPTS_VALID(opts, bpf_uprobe_opts)) +		return libbpf_err_ptr(-EINVAL); -	pfd = perf_event_open_probe(true /* uprobe */, retprobe, -				    binary_path, func_offset, pid); +	retprobe = OPTS_GET(opts, retprobe, false); +	ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); +	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + +	pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, +				    func_offset, pid, ref_ctr_off);  	if (pfd < 0) {  		pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",  			prog->name, retprobe ? "uretprobe" : "uprobe", @@ -10362,7 +9316,7 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return libbpf_err_ptr(pfd);  	} -	link = bpf_program__attach_perf_event(prog, pfd); +	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);  	err = libbpf_get_error(link);  	if (err) {  		close(pfd); @@ -10375,6 +9329,16 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,  	return link;  } +struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, +					    bool retprobe, pid_t pid, +					    const char *binary_path, +					    size_t func_offset) +{ +	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); + +	return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); +} +  static int determine_tracepoint_id(const char *tp_category,  				   const char *tp_name)  { @@ -10425,14 +9389,21 @@ static int perf_event_open_tracepoint(const char *tp_category,  	return pfd;  } -struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, -						const char *tp_category, -						const char *tp_name) +struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog, +						     const char *tp_category, +						     const char *tp_name, +						     const struct bpf_tracepoint_opts *opts)  { +	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);  	char errmsg[STRERR_BUFSIZE];  	struct bpf_link *link;  	int pfd, err; +	if (!OPTS_VALID(opts, bpf_tracepoint_opts)) +		return libbpf_err_ptr(-EINVAL); + +	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); +  	pfd = perf_event_open_tracepoint(tp_category, tp_name);  	if (pfd < 0) {  		pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", @@ -10440,7 +9411,7 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return libbpf_err_ptr(pfd);  	} -	link = bpf_program__attach_perf_event(prog, pfd); +	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);  	err = libbpf_get_error(link);  	if (err) {  		close(pfd); @@ -10452,6 +9423,13 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,  	return link;  } +struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, +						const char *tp_category, +						const char *tp_name) +{ +	return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); +} +  static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,  				  struct bpf_program *prog)  { | 
