summaryrefslogtreecommitdiff
path: root/tools/perf/util/bpf-event.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-08-01 16:55:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-08-01 16:55:47 -0700
commitf4f346c3465949ebba80c6cc52cd8d2eeaa545fd (patch)
treec4da4e549bd6a050c00ad902486dab70847ee41d /tools/perf/util/bpf-event.c
parent0905809b38bda1fa0b206986c44d846e46f13c1d (diff)
parent6235ce77749f45cac27f630337e2fdf04e8a6c73 (diff)
Merge tag 'perf-tools-for-v6.17-2025-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim: "Build-ID processing goodies: Build-IDs are content based hashes to link regions of memory to ELF files in post processing. They have been available in distros for quite a while: $ file /bin/bash /bin/bash: ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=707a1c670cd72f8e55ffedfbe94ea98901b7ce3a, for GNU/Linux 3.2.0, stripped It is possible to ask the kernel to get it from mmap executable backing storage at time they are being put in place and send it as metadata at that moment to have in perf.data. Prefer that across the board to speed up 'record' time - it post processes the samples to find binaries touched by any samples and to save them with build-ID. It can skip reading build-ID in userspace if it comes from the kernel. perf record: * Make --buildid-mmap default. The kernel can generate MMAP2 events with a build-ID from ELF header. Use that by default instead of using inode and device ID to identify binaries. It also can be disabled with --no-buildid-mmap. * Use BPF for -u/--uid option to sample processes belong to a user. BPF can track user processes more accurately and the existing logic often fails to get the list of processes due to race with reading the /proc filesystem. * Generate PERF_RECORD_BPF_METADATA when it profiles BPF programs and they have variables starting with "bpf_metadata_". This will help to identify BPF objects used in the profile. This has been supported in bpftool for some time and allows the recording of metadata such as commit hashes, versions, etc, that now gets recorded in perf.data as well. * Collect list of DSOs touched in the sample callchains as well as in the sample itself. This would increase the processing time at the end of record, but can improve the data quality. perf stat: * Add a new 'drm' pseudo-PMU support like in 'hwmon'. It can collect DRM usage stats using fdinfo in /proc. On my Intel laptop, it shows like below: $ perf list drm ... drm: drm-active-stolen-system0 [Total memory active in one or more engines. Unit: drm_i915] drm-active-system0 [Total memory active in one or more engines. Unit: drm_i915] drm-engine-capacity-video [Engine capacity. Unit: drm_i915] drm-engine-copy [Utilization in ns. Unit: drm_i915] drm-engine-render [Utilization in ns. Unit: drm_i915] drm-engine-video [Utilization in ns. Unit: drm_i915] ... $ sudo perf stat -a -e drm-engine-render,drm-engine-video,drm-engine-capacity-video sleep 1 Performance counter stats for 'system wide': 48,137,316,988,873 ns drm-engine-render 34,452,696,746 ns drm-engine-video 20 capacity drm-engine-capacity-video 1.002086194 seconds time elapsed perf list * Add description for software events. The description is in JSON format and the event parser now can handle the software events like others (for example, it's case-insensitive and subject to wildcard matching). $ perf list software List of pre-defined events (to be used in -e or -M): software: alignment-faults [Number of kernel handled memory alignment faults. Unit: software] bpf-output [An event used by BPF programs to write to the perf ring buffer. Unit: software] cgroup-switches [Number of context switches to a task in a different cgroup. Unit: software] context-switches [Number of context switches [This event is an alias of cs]. Unit: software] cpu-clock [Per-CPU high-resolution timer based event. Unit: software] cpu-migrations [Number of times a process has migrated to a new CPU [This event is an alias of migrations]. Unit: software] cs [Number of context switches [This event is an alias of context-switches]. Unit: software] dummy [A placeholder event that doesn't count anything. Unit: software] emulation-faults [Number of kernel handled unimplemented instruction faults handled through emulation. Unit: software] faults [Number of page faults [This event is an alias of page-faults]. Unit: software] major-faults [Number of major page faults. Major faults require I/O to handle. Unit: software] migrations [Number of times a process has migrated to a new CPU [This event is an alias of cpu-migrations]. Unit: software] minor-faults [Number of minor page faults. Minor faults don't require I/O to handle. Unit: software] page-faults [Number of page faults [This event is an alias of faults]. Unit: software] task-clock [Per-task high-resolution timer based event. Unit: software] perf ftrace: * Add -e/--events option to perf ftrace latency to measure latency between the two events instead of a function. $ sudo perf ftrace latency -ab -e i915_request_wait_begin,i915_request_wait_end --hide-empty -- sleep 1 # DURATION | COUNT | GRAPH | 256 - 512 us | 4 | ###### | 2 - 4 ms | 2 | ### | 4 - 8 ms | 12 | ################### | 8 - 16 ms | 10 | ################ | # statistics (in usec) total time: 194915 avg time: 6961 max time: 12855 min time: 373 count: 28 * Add new function graph tracer options (--graph-opts) to display more info like arguments and return value. They will be passed to the kernel ftrace directly. $ sudo perf ftrace -G vfs_write --graph-opts retval,retaddr # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | ... 5) | mutex_unlock() { /* <-rb_simple_write+0xda/0x150 */ 5) 0.188 us | local_clock(); /* <-lock_release+0x2ad/0x440 ret=0x3bf2a3cf90e */ 5) | rt_mutex_slowunlock() { /* <-rb_simple_write+0xda/0x150 */ 5) | _raw_spin_lock_irqsave() { /* <-rt_mutex_slowunlock+0x4f/0x200 */ 5) 0.123 us | preempt_count_add(); /* <-_raw_spin_lock_irqsave+0x23/0x90 ret=0x0 */ 5) 0.128 us | local_clock(); /* <-__lock_acquire.isra.0+0x17a/0x740 ret=0x3bf2a3cfc8b */ 5) 0.086 us | do_raw_spin_trylock(); /* <-_raw_spin_lock_irqsave+0x4a/0x90 ret=0x1 */ 5) 0.845 us | } /* _raw_spin_lock_irqsave ret=0x292 */ ... Misc: * Add perf archive --exclude-buildids <FILE> option to skip some binaries. The format of the FILE should be same as an output of perf buildid-list. * Get rid of dependency of libcrypto. It was just to get SHA-1 hash so implement it directly like in the kernel. A side effect is that it needs -fno-strict-aliasing compiler option (again, like in the kernel). * Convert all shell script tests to use bash" * tag 'perf-tools-for-v6.17-2025-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (179 commits) perf record: Cache build-ID of hit DSOs only perf test: Ensure lock contention using pipe mode perf python: Stop using deprecated PyUnicode_AsString() perf list: Skip ABI PMUs when printing pmu values perf list: Remove tracepoint printing code perf tp_pmu: Add event APIs perf tp_pmu: Factor existing tracepoint logic to new file perf parse-events: Remove non-json software events perf jevents: Add common software event json perf tools: Remove libtraceevent in .gitignore perf test: Fix comment ordering perf sort: Use perf_env to set arch sort keys and header perf test: Move PERF_SAMPLE_WEIGHT_STRUCT parsing to common test perf sample: Remove arch notion of sample parsing perf env: Remove global perf_env perf trace: Avoid global perf_env with evsel__env perf auxtrace: Pass perf_env from session through to mmap read perf machine: Explicitly pass in host perf_env perf bench synthesize: Avoid use of global perf_env perf top: Make perf_env locally scoped ...
Diffstat (limited to 'tools/perf/util/bpf-event.c')
-rw-r--r--tools/perf/util/bpf-event.c380
1 files changed, 379 insertions, 1 deletions
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index c81444059ad07..5b6d3e899e115 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -1,13 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
+#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/err.h>
+#include <linux/perf_event.h>
#include <linux/string.h>
+#include <linux/zalloc.h>
#include <internal/lib.h>
+#include <perf/event.h>
#include <symbol/kallsyms.h>
#include "bpf-event.h"
#include "bpf-utils.h"
@@ -151,6 +159,362 @@ static int synthesize_bpf_prog_name(char *buf, int size,
return name_len;
}
+#ifdef HAVE_LIBBPF_STRINGS_SUPPORT
+
+#define BPF_METADATA_PREFIX "bpf_metadata_"
+#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1)
+
+static bool name_has_bpf_metadata_prefix(const char **s)
+{
+ if (strncmp(*s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) != 0)
+ return false;
+ *s += BPF_METADATA_PREFIX_LEN;
+ return true;
+}
+
+struct bpf_metadata_map {
+ struct btf *btf;
+ const struct btf_type *datasec;
+ void *rodata;
+ size_t rodata_size;
+ unsigned int num_vars;
+};
+
+static int bpf_metadata_read_map_data(__u32 map_id, struct bpf_metadata_map *map)
+{
+ int map_fd;
+ struct bpf_map_info map_info;
+ __u32 map_info_len;
+ int key;
+ struct btf *btf;
+ const struct btf_type *datasec;
+ struct btf_var_secinfo *vsi;
+ unsigned int vlen, vars;
+ void *rodata;
+
+ map_fd = bpf_map_get_fd_by_id(map_id);
+ if (map_fd < 0)
+ return -1;
+
+ memset(&map_info, 0, sizeof(map_info));
+ map_info_len = sizeof(map_info);
+ if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len) < 0)
+ goto out_close;
+
+ /* If it's not an .rodata map, don't bother. */
+ if (map_info.type != BPF_MAP_TYPE_ARRAY ||
+ map_info.key_size != sizeof(int) ||
+ map_info.max_entries != 1 ||
+ !map_info.btf_value_type_id ||
+ !strstr(map_info.name, ".rodata")) {
+ goto out_close;
+ }
+
+ btf = btf__load_from_kernel_by_id(map_info.btf_id);
+ if (!btf)
+ goto out_close;
+ datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
+ if (!btf_is_datasec(datasec))
+ goto out_free_btf;
+
+ /*
+ * If there aren't any variables with the "bpf_metadata_" prefix,
+ * don't bother.
+ */
+ vlen = btf_vlen(datasec);
+ vsi = btf_var_secinfos(datasec);
+ vars = 0;
+ for (unsigned int i = 0; i < vlen; i++, vsi++) {
+ const struct btf_type *t_var = btf__type_by_id(btf, vsi->type);
+ const char *name = btf__name_by_offset(btf, t_var->name_off);
+
+ if (name_has_bpf_metadata_prefix(&name))
+ vars++;
+ }
+ if (vars == 0)
+ goto out_free_btf;
+
+ rodata = zalloc(map_info.value_size);
+ if (!rodata)
+ goto out_free_btf;
+ key = 0;
+ if (bpf_map_lookup_elem(map_fd, &key, rodata)) {
+ free(rodata);
+ goto out_free_btf;
+ }
+ close(map_fd);
+
+ map->btf = btf;
+ map->datasec = datasec;
+ map->rodata = rodata;
+ map->rodata_size = map_info.value_size;
+ map->num_vars = vars;
+ return 0;
+
+out_free_btf:
+ btf__free(btf);
+out_close:
+ close(map_fd);
+ return -1;
+}
+
+struct format_btf_ctx {
+ char *buf;
+ size_t buf_size;
+ size_t buf_idx;
+};
+
+static void format_btf_cb(void *arg, const char *fmt, va_list ap)
+{
+ int n;
+ struct format_btf_ctx *ctx = (struct format_btf_ctx *)arg;
+
+ n = vsnprintf(ctx->buf + ctx->buf_idx, ctx->buf_size - ctx->buf_idx,
+ fmt, ap);
+ ctx->buf_idx += n;
+ if (ctx->buf_idx >= ctx->buf_size)
+ ctx->buf_idx = ctx->buf_size;
+}
+
+static void format_btf_variable(struct btf *btf, char *buf, size_t buf_size,
+ const struct btf_type *t, const void *btf_data)
+{
+ struct format_btf_ctx ctx = {
+ .buf = buf,
+ .buf_idx = 0,
+ .buf_size = buf_size,
+ };
+ const struct btf_dump_type_data_opts opts = {
+ .sz = sizeof(struct btf_dump_type_data_opts),
+ .skip_names = 1,
+ .compact = 1,
+ .emit_strings = 1,
+ };
+ struct btf_dump *d;
+ size_t btf_size;
+
+ d = btf_dump__new(btf, format_btf_cb, &ctx, NULL);
+ btf_size = btf__resolve_size(btf, t->type);
+ btf_dump__dump_type_data(d, t->type, btf_data, btf_size, &opts);
+ btf_dump__free(d);
+}
+
+static void bpf_metadata_fill_event(struct bpf_metadata_map *map,
+ struct perf_record_bpf_metadata *bpf_metadata_event)
+{
+ struct btf_var_secinfo *vsi;
+ unsigned int i, vlen;
+
+ memset(bpf_metadata_event->prog_name, 0, BPF_PROG_NAME_LEN);
+ vlen = btf_vlen(map->datasec);
+ vsi = btf_var_secinfos(map->datasec);
+
+ for (i = 0; i < vlen; i++, vsi++) {
+ const struct btf_type *t_var = btf__type_by_id(map->btf,
+ vsi->type);
+ const char *name = btf__name_by_offset(map->btf,
+ t_var->name_off);
+ const __u64 nr_entries = bpf_metadata_event->nr_entries;
+ struct perf_record_bpf_metadata_entry *entry;
+
+ if (!name_has_bpf_metadata_prefix(&name))
+ continue;
+
+ if (nr_entries >= (__u64)map->num_vars)
+ break;
+
+ entry = &bpf_metadata_event->entries[nr_entries];
+ memset(entry, 0, sizeof(*entry));
+ snprintf(entry->key, BPF_METADATA_KEY_LEN, "%s", name);
+ format_btf_variable(map->btf, entry->value,
+ BPF_METADATA_VALUE_LEN, t_var,
+ map->rodata + vsi->offset);
+ bpf_metadata_event->nr_entries++;
+ }
+}
+
+static void bpf_metadata_free_map_data(struct bpf_metadata_map *map)
+{
+ btf__free(map->btf);
+ free(map->rodata);
+}
+
+static struct bpf_metadata *bpf_metadata_alloc(__u32 nr_prog_tags,
+ __u32 nr_variables)
+{
+ struct bpf_metadata *metadata;
+ size_t event_size;
+
+ metadata = zalloc(sizeof(struct bpf_metadata));
+ if (!metadata)
+ return NULL;
+
+ metadata->prog_names = zalloc(nr_prog_tags * sizeof(char *));
+ if (!metadata->prog_names) {
+ bpf_metadata_free(metadata);
+ return NULL;
+ }
+ for (__u32 prog_index = 0; prog_index < nr_prog_tags; prog_index++) {
+ metadata->prog_names[prog_index] = zalloc(BPF_PROG_NAME_LEN);
+ if (!metadata->prog_names[prog_index]) {
+ bpf_metadata_free(metadata);
+ return NULL;
+ }
+ metadata->nr_prog_names++;
+ }
+
+ event_size = sizeof(metadata->event->bpf_metadata) +
+ nr_variables * sizeof(metadata->event->bpf_metadata.entries[0]);
+ metadata->event = zalloc(event_size);
+ if (!metadata->event) {
+ bpf_metadata_free(metadata);
+ return NULL;
+ }
+ metadata->event->bpf_metadata = (struct perf_record_bpf_metadata) {
+ .header = {
+ .type = PERF_RECORD_BPF_METADATA,
+ .size = event_size,
+ },
+ .nr_entries = 0,
+ };
+
+ return metadata;
+}
+
+static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info)
+{
+ struct bpf_metadata *metadata;
+ const __u32 *map_ids = (__u32 *)(uintptr_t)info->map_ids;
+
+ for (__u32 map_index = 0; map_index < info->nr_map_ids; map_index++) {
+ struct bpf_metadata_map map;
+
+ if (bpf_metadata_read_map_data(map_ids[map_index], &map) != 0)
+ continue;
+
+ metadata = bpf_metadata_alloc(info->nr_prog_tags, map.num_vars);
+ if (!metadata)
+ continue;
+
+ bpf_metadata_fill_event(&map, &metadata->event->bpf_metadata);
+
+ for (__u32 index = 0; index < info->nr_prog_tags; index++) {
+ synthesize_bpf_prog_name(metadata->prog_names[index],
+ BPF_PROG_NAME_LEN, info,
+ map.btf, index);
+ }
+
+ bpf_metadata_free_map_data(&map);
+
+ return metadata;
+ }
+
+ return NULL;
+}
+
+static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata,
+ const struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ const size_t event_size = metadata->event->header.size;
+ union perf_event *event;
+ int err = 0;
+
+ event = zalloc(event_size + machine->id_hdr_size);
+ if (!event)
+ return -1;
+ memcpy(event, metadata->event, event_size);
+ memset((void *)event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+ for (__u32 index = 0; index < metadata->nr_prog_names; index++) {
+ memcpy(event->bpf_metadata.prog_name,
+ metadata->prog_names[index], BPF_PROG_NAME_LEN);
+ err = perf_tool__process_synth_event(tool, event, machine,
+ process);
+ if (err != 0)
+ break;
+ }
+
+ free(event);
+ return err;
+}
+
+void bpf_metadata_free(struct bpf_metadata *metadata)
+{
+ if (metadata == NULL)
+ return;
+ for (__u32 index = 0; index < metadata->nr_prog_names; index++)
+ free(metadata->prog_names[index]);
+ free(metadata->prog_names);
+ free(metadata->event);
+ free(metadata);
+}
+
+#else /* HAVE_LIBBPF_STRINGS_SUPPORT */
+
+static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info __maybe_unused)
+{
+ return NULL;
+}
+
+static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata __maybe_unused,
+ const struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
+void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused)
+{
+}
+
+#endif /* HAVE_LIBBPF_STRINGS_SUPPORT */
+
+struct bpf_metadata_final_ctx {
+ const struct perf_tool *tool;
+ perf_event__handler_t process;
+ struct machine *machine;
+};
+
+static void synthesize_final_bpf_metadata_cb(struct bpf_prog_info_node *node,
+ void *data)
+{
+ struct bpf_metadata_final_ctx *ctx = (struct bpf_metadata_final_ctx *)data;
+ struct bpf_metadata *metadata = node->metadata;
+ int err;
+
+ if (metadata == NULL)
+ return;
+ err = synthesize_perf_record_bpf_metadata(metadata, ctx->tool,
+ ctx->process, ctx->machine);
+ if (err != 0) {
+ const char *prog_name = metadata->prog_names[0];
+
+ if (prog_name != NULL)
+ pr_warning("Couldn't synthesize final BPF metadata for %s.\n", prog_name);
+ else
+ pr_warning("Couldn't synthesize final BPF metadata.\n");
+ }
+ bpf_metadata_free(metadata);
+ node->metadata = NULL;
+}
+
+void perf_event__synthesize_final_bpf_metadata(struct perf_session *session,
+ perf_event__handler_t process)
+{
+ struct perf_env *env = &session->header.env;
+ struct bpf_metadata_final_ctx ctx = {
+ .tool = session->tool,
+ .process = process,
+ .machine = &session->machines.host,
+ };
+
+ perf_env__iterate_bpf_prog_info(env, synthesize_final_bpf_metadata_cb,
+ &ctx);
+}
+
/*
* Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
* program. One PERF_RECORD_BPF_EVENT is generated for the program. And
@@ -173,6 +537,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
const struct perf_tool *tool = session->tool;
struct bpf_prog_info_node *info_node;
struct perf_bpil *info_linear;
+ struct bpf_metadata *metadata;
struct bpf_prog_info *info;
struct btf *btf = NULL;
struct perf_env *env;
@@ -184,7 +549,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
* for perf-record and perf-report use header.env;
* otherwise, use global perf_env.
*/
- env = session->data ? &session->header.env : &perf_env;
+ env = perf_session__env(session);
arrays = 1UL << PERF_BPIL_JITED_KSYMS;
arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
@@ -193,6 +558,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
arrays |= 1UL << PERF_BPIL_JITED_INSNS;
arrays |= 1UL << PERF_BPIL_LINE_INFO;
arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
+ arrays |= 1UL << PERF_BPIL_MAP_IDS;
info_linear = get_bpf_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
@@ -289,6 +655,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
}
info_node->info_linear = info_linear;
+ info_node->metadata = NULL;
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
free(info_linear);
free(info_node);
@@ -301,6 +668,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
*/
err = perf_tool__process_synth_event(tool, event,
machine, process);
+
+ /* Synthesize PERF_RECORD_BPF_METADATA */
+ metadata = bpf_metadata_create(info);
+ if (metadata != NULL) {
+ err = synthesize_perf_record_bpf_metadata(metadata,
+ tool, process,
+ machine);
+ bpf_metadata_free(metadata);
+ }
}
out:
@@ -471,6 +847,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
arrays |= 1UL << PERF_BPIL_JITED_INSNS;
arrays |= 1UL << PERF_BPIL_LINE_INFO;
arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
+ arrays |= 1UL << PERF_BPIL_MAP_IDS;
info_linear = get_bpf_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
@@ -483,6 +860,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
info_node = malloc(sizeof(struct bpf_prog_info_node));
if (info_node) {
info_node->info_linear = info_linear;
+ info_node->metadata = bpf_metadata_create(&info_linear->info);
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
free(info_linear);
free(info_node);