summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHoward Chu <howardchu95@gmail.com>2025-04-30 19:28:00 -0700
committerArnaldo Carvalho de Melo <acme@redhat.com>2025-05-05 21:48:02 -0300
commit0f72027bb9fb77a2b2ea7d73e6d58bb9c0efb1e8 (patch)
tree902674f6c9149e8986687ed87ce874f6767c73a2
parent671e943452b18001fe6286d0538825089400eed8 (diff)
perf record --off-cpu: Parse off-cpu event
Parse the off-cpu event using parse_event(), as bpf-output. Call evlist__enable_evsel() on off-cpu event. This fixes the inability to collect direct off-cpu samples on a workload, as reported by Arnaldo Carvalho de Melo <acme@redhat.com>. The reason being, workload sets enable_on_exec instead of calling evlist__enable(), but off-cpu event does not attach to an executable and execve won't be called, so the fds from perf_event_open() are not enabled. no-inherit should be set to 1, here's the reason: We update the BPF perf_event map for direct off-cpu sample dumping (in following patches), it executes as follows: bpf_map_update_value() bpf_fd_array_map_update_elem() perf_event_fd_array_get_ptr() perf_event_read_local() In perf_event_read_local(), there is: int perf_event_read_local(struct perf_event *event, u64 *value, u64 *enabled, u64 *running) { ... /* * It must not be an event with inherit set, we cannot read * all child counters from atomic context. */ if (event->attr.inherit) { ret = -EOPNOTSUPP; goto out; } Which means no-inherit has to be true for updating the BPF perf_event map. Moreover, for bpf-output events, we primarily want a system-wide event instead of a per-task event. The reason is that in BPF's bpf_perf_event_output(), BPF uses the CPU index to retrieve the perf_event file descriptor it outputs to. Making a bpf-output event system-wide naturally satisfies this requirement by mapping CPU appropriately. Suggested-by: Namhyung Kim <namhyung@kernel.org> Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Howard Chu <howardchu95@gmail.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Tested-by: Gautam Menghani <gautam@linux.ibm.com> Tested-by: Ian Rogers <irogers@google.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@linaro.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20241108204137.2444151-4-howardchu95@gmail.com Link: https://lore.kernel.org/r/20250501022809.449767-3-howardchu95@gmail.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/builtin-record.c7
-rw-r--r--tools/perf/util/bpf_off_cpu.c33
-rw-r--r--tools/perf/util/evsel.c4
3 files changed, 21 insertions, 23 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6637a3acb1f1..4194ea5ac729 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2569,6 +2569,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
evlist__enable(rec->evlist);
/*
+ * offcpu-time does not call execve, so enable_on_exe wouldn't work
+ * when recording a workload, do it manually
+ */
+ if (rec->off_cpu)
+ evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
+
+ /*
* Let the child rip
*/
if (forks) {
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index 4269b41d1771..2101aa2b7c42 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -38,32 +38,21 @@ union off_cpu_data {
static int off_cpu_config(struct evlist *evlist)
{
+ char off_cpu_event[64];
struct evsel *evsel;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_BPF_OUTPUT,
- .size = sizeof(attr), /* to capture ABI version */
- };
- char *evname = strdup(OFFCPU_EVENT);
-
- if (evname == NULL)
- return -ENOMEM;
- evsel = evsel__new(&attr);
- if (!evsel) {
- free(evname);
- return -ENOMEM;
+ scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/name=%s/", OFFCPU_EVENT);
+ if (parse_event(evlist, off_cpu_event)) {
+ pr_err("Failed to open off-cpu event\n");
+ return -1;
}
- evsel->core.attr.freq = 1;
- evsel->core.attr.sample_period = 1;
- /* off-cpu analysis depends on stack trace */
- evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
-
- evlist__add(evlist, evsel);
-
- free(evsel->name);
- evsel->name = evname;
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_offcpu_event(evsel)) {
+ evsel->core.system_wide = true;
+ break;
+ }
+ }
return 0;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 99eee3a2c762..2415e067b892 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1555,8 +1555,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
if (evsel__is_dummy_event(evsel))
evsel__reset_sample_bit(evsel, BRANCH_STACK);
- if (evsel__is_offcpu_event(evsel))
+ if (evsel__is_offcpu_event(evsel)) {
evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES;
+ attr->inherit = 0;
+ }
arch__post_evsel_config(evsel, attr);
}