diff options
Diffstat (limited to 'tools/perf/util')
53 files changed, 1659 insertions, 302 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 9a7209a99e164..a51267d88ca90 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -147,6 +147,7 @@ perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index d2b64e3f588b2..1a80151baed96 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -1033,10 +1033,11 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) memset(&attr, 0, sizeof(struct perf_event_attr)); attr.size = sizeof(struct perf_event_attr); attr.type = PERF_TYPE_HARDWARE; - attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type = evsel->core.attr.sample_type & + (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | - PERF_SAMPLE_WEIGHT; + PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; if (spe->timeless_decoding) attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; else diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index df1c5bbbaa0dd..511dd3caa1bcf 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -125,7 +125,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, mm->tid = mp->tid; mm->cpu = mp->cpu.cpu; - if (!mp->len) { + if (!mp->len || !mp->mmap_needed) { mm->base = NULL; return 0; } @@ -168,13 +168,20 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, } void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu) + struct evlist *evlist, + struct evsel *evsel, int idx) { + bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + + mp->mmap_needed = evsel->needs_auxtrace_mmap; + + if (!mp->mmap_needed) + return; + mp->idx = idx; if (per_cpu) { - mp->cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, idx); + mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else @@ -636,6 +643,22 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, return -EINVAL; } +static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) +{ + bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + + if (per_cpu_mmaps) { + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); + int cpu_map_idx = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu); + + if (cpu_map_idx == -1) + return -EINVAL; + return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx); + } + + return perf_evsel__enable_thread(&evsel->core, idx); +} + int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx) { struct evsel *evsel; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index dc38b6f572328..cd0d25c2751cc 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -344,6 +344,10 @@ struct auxtrace_mmap { * @idx: index of this mmap * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu * mmap) otherwise %0 + * @mmap_needed: set to %false for non-auxtrace events. This is needed because + * auxtrace mmapping is done in the same code path as non-auxtrace + * mmapping but not every evsel that needs non-auxtrace mmapping + * also needs auxtrace mmapping. * @cpu: cpu number for a per-cpu mmap otherwise %-1 */ struct auxtrace_mmap_params { @@ -353,6 +357,7 @@ struct auxtrace_mmap_params { int prot; int idx; pid_t tid; + bool mmap_needed; struct perf_cpu cpu; }; @@ -490,8 +495,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, unsigned int auxtrace_pages, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu); + struct evlist *evlist, + struct evsel *evsel, int idx); typedef int (*process_auxtrace_t)(struct perf_tool *tool, struct mmap *map, @@ -863,8 +868,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, unsigned int auxtrace_pages, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu); + struct evlist *evlist, + struct evsel *evsel, int idx); #define ITRACE_HELP "" diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 94624733af7e2..eee64ddb766df 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -22,7 +22,8 @@ #include "record.h" #include "util/synthetic-events.h" -struct btf * __weak btf__load_from_kernel_by_id(__u32 id) +#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID +struct btf *btf__load_from_kernel_by_id(__u32 id) { struct btf *btf; #pragma GCC diagnostic push @@ -32,12 +33,14 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) return err ? ERR_PTR(err) : btf; } - -int __weak bpf_prog_load(enum bpf_prog_type prog_type, - const char *prog_name __maybe_unused, - const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts) +#endif + +#ifndef HAVE_LIBBPF_BPF_PROG_LOAD +int bpf_prog_load(enum bpf_prog_type prog_type, + const char *prog_name __maybe_unused, + const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -45,8 +48,10 @@ int __weak bpf_prog_load(enum bpf_prog_type prog_type, opts->kern_version, opts->log_buf, opts->log_size); #pragma GCC diagnostic pop } +#endif -struct bpf_program * __weak +#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM +struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { #pragma GCC diagnostic push @@ -54,8 +59,10 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) return bpf_program__next(prev, obj); #pragma GCC diagnostic pop } +#endif -struct bpf_map * __weak +#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP +struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { #pragma GCC diagnostic push @@ -63,8 +70,10 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) return bpf_map__next(prev, obj); #pragma GCC diagnostic pop } +#endif -const void * __weak +#ifndef HAVE_LIBBPF_BTF__RAW_DATA +const void * btf__raw_data(const struct btf *btf_ro, __u32 *size) { #pragma GCC diagnostic push @@ -72,6 +81,7 @@ btf__raw_data(const struct btf *btf_ro, __u32 *size) return btf__get_raw_data(btf_ro, size); #pragma GCC diagnostic pop } +#endif static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) { diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index b72cef1ae959c..f8ad581ea2479 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -99,16 +99,26 @@ static int bpf_perf_object__add(struct bpf_object *obj) return perf_obj ? 0 : -ENOMEM; } +static int libbpf_init(void) +{ + if (libbpf_initialized) + return 0; + + libbpf_set_print(libbpf_perf_print); + libbpf_initialized = true; + return 0; +} + struct bpf_object * bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) { LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name); struct bpf_object *obj; + int err; - if (!libbpf_initialized) { - libbpf_set_print(libbpf_perf_print); - libbpf_initialized = true; - } + err = libbpf_init(); + if (err) + return ERR_PTR(err); obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); if (IS_ERR_OR_NULL(obj)) { @@ -135,14 +145,13 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) { LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename); struct bpf_object *obj; + int err; - if (!libbpf_initialized) { - libbpf_set_print(libbpf_perf_print); - libbpf_initialized = true; - } + err = libbpf_init(); + if (err) + return ERR_PTR(err); if (source) { - int err; void *obj_buf; size_t obj_buf_sz; diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 3ce8d03cb7ecf..ef1c15e4aeba5 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -224,25 +224,25 @@ static int bpf_program_profiler__disable(struct evsel *evsel) static int bpf_program_profiler__read(struct evsel *evsel) { - // perf_cpu_map uses /sys/devices/system/cpu/online - int num_cpu = evsel__nr_cpus(evsel); // BPF_MAP_TYPE_PERCPU_ARRAY uses /sys/devices/system/cpu/possible // Sometimes possible > online, like on a Ryzen 3900X that has 24 // threads but its possible showed 0-31 -acme int num_cpu_bpf = libbpf_num_possible_cpus(); struct bpf_perf_event_value values[num_cpu_bpf]; struct bpf_counter *counter; + struct perf_counts_values *counts; int reading_map_fd; __u32 key = 0; - int err, cpu; + int err, idx, bpf_cpu; if (list_empty(&evsel->bpf_counter_list)) return -EAGAIN; - for (cpu = 0; cpu < num_cpu; cpu++) { - perf_counts(evsel->counts, cpu, 0)->val = 0; - perf_counts(evsel->counts, cpu, 0)->ena = 0; - perf_counts(evsel->counts, cpu, 0)->run = 0; + perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) { + counts = perf_counts(evsel->counts, idx, 0); + counts->val = 0; + counts->ena = 0; + counts->run = 0; } list_for_each_entry(counter, &evsel->bpf_counter_list, list) { struct bpf_prog_profiler_bpf *skel = counter->skel; @@ -256,10 +256,15 @@ static int bpf_program_profiler__read(struct evsel *evsel) return err; } - for (cpu = 0; cpu < num_cpu; cpu++) { - perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter; - perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled; - perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running; + for (bpf_cpu = 0; bpf_cpu < num_cpu_bpf; bpf_cpu++) { + idx = perf_cpu_map__idx(evsel__cpus(evsel), + (struct perf_cpu){.cpu = bpf_cpu}); + if (idx == -1) + continue; + counts = perf_counts(evsel->counts, idx, 0); + counts->val += values[bpf_cpu].counter; + counts->ena += values[bpf_cpu].enabled; + counts->run += values[bpf_cpu].running; } } return 0; @@ -307,7 +312,10 @@ static bool bperf_attr_map_compatible(int attr_map_fd) (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); } -int __weak +#ifndef HAVE_LIBBPF_BPF_MAP_CREATE +LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags); +int bpf_map_create(enum bpf_map_type map_type, const char *map_name __maybe_unused, __u32 key_size, @@ -320,6 +328,7 @@ bpf_map_create(enum bpf_map_type map_type, return bpf_create_map(map_type, key_size, value_size, max_entries, 0); #pragma GCC diagnostic pop } +#endif static int bperf_lock_attr_map(struct target *target) { @@ -621,6 +630,7 @@ static int bperf__read(struct evsel *evsel) struct bperf_follower_bpf *skel = evsel->follower_skel; __u32 num_cpu_bpf = cpu__max_cpu().cpu; struct bpf_perf_event_value values[num_cpu_bpf]; + struct perf_counts_values *counts; int reading_map_fd, err = 0; __u32 i; int j; @@ -639,29 +649,32 @@ static int bperf__read(struct evsel *evsel) case BPERF_FILTER_GLOBAL: assert(i == 0); - perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) { - cpu = entry.cpu; - perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; - perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; - perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; + perf_cpu_map__for_each_cpu(entry, j, evsel__cpus(evsel)) { + counts = perf_counts(evsel->counts, j, 0); + counts->val = values[entry.cpu].counter; + counts->ena = values[entry.cpu].enabled; + counts->run = values[entry.cpu].running; } break; case BPERF_FILTER_CPU: - cpu = evsel->core.cpus->map[i].cpu; - perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; - perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; - perf_counts(evsel->counts, i, 0)->run = values[cpu].running; + cpu = perf_cpu_map__cpu(evsel__cpus(evsel), i).cpu; + assert(cpu >= 0); + counts = perf_counts(evsel->counts, i, 0); + counts->val = values[cpu].counter; + counts->ena = values[cpu].enabled; + counts->run = values[cpu].running; break; case BPERF_FILTER_PID: case BPERF_FILTER_TGID: - perf_counts(evsel->counts, 0, i)->val = 0; - perf_counts(evsel->counts, 0, i)->ena = 0; - perf_counts(evsel->counts, 0, i)->run = 0; + counts = perf_counts(evsel->counts, 0, i); + counts->val = 0; + counts->ena = 0; + counts->run = 0; for (cpu = 0; cpu < num_cpu_bpf; cpu++) { - perf_counts(evsel->counts, 0, i)->val += values[cpu].counter; - perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled; - perf_counts(evsel->counts, 0, i)->run += values[cpu].running; + counts->val += values[cpu].counter; + counts->ena += values[cpu].enabled; + counts->run += values[cpu].running; } break; default: diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index ac60c08e8e2a5..63b9db6574425 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -46,8 +46,8 @@ static int bperf_load_program(struct evlist *evlist) struct bpf_link *link; struct evsel *evsel; struct cgroup *cgrp, *leader_cgrp; - __u32 i, cpu; - __u32 nr_cpus = evlist->core.all_cpus->nr; + int i, j; + struct perf_cpu cpu; int total_cpus = cpu__max_cpu().cpu; int map_size, map_fd; int prog_fd, err; @@ -93,9 +93,9 @@ static int bperf_load_program(struct evlist *evlist) goto out; } - for (i = 0; i < nr_cpus; i++) { + perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) { link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch, - FD(cgrp_switch, i)); + FD(cgrp_switch, cpu.cpu)); if (IS_ERR(link)) { pr_err("Failed to attach cgroup program\n"); err = PTR_ERR(link); @@ -122,10 +122,9 @@ static int bperf_load_program(struct evlist *evlist) } map_fd = bpf_map__fd(skel->maps.events); - for (cpu = 0; cpu < nr_cpus; cpu++) { - int fd = FD(evsel, cpu); - __u32 idx = evsel->core.idx * total_cpus + - evlist->core.all_cpus->map[cpu].cpu; + perf_cpu_map__for_each_cpu(cpu, j, evlist->core.all_cpus) { + int fd = FD(evsel, cpu.cpu); + __u32 idx = evsel->core.idx * total_cpus + cpu.cpu; err = bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); @@ -207,14 +206,12 @@ static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused, */ static int bperf_cgrp__sync_counters(struct evlist *evlist) { - int i, cpu; - int nr_cpus = evlist->core.all_cpus->nr; + struct perf_cpu cpu; + int idx; int prog_fd = bpf_program__fd(skel->progs.trigger_read); - for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i].cpu; - bperf_trigger_reading(prog_fd, cpu); - } + perf_cpu_map__for_each_cpu(cpu, idx, evlist->core.all_cpus) + bperf_trigger_reading(prog_fd, cpu.cpu); return 0; } @@ -244,12 +241,10 @@ static int bperf_cgrp__disable(struct evsel *evsel) static int bperf_cgrp__read(struct evsel *evsel) { struct evlist *evlist = evsel->evlist; - int i, cpu, nr_cpus = evlist->core.all_cpus->nr; int total_cpus = cpu__max_cpu().cpu; struct perf_counts_values *counts; struct bpf_perf_event_value *values; int reading_map_fd, err = 0; - __u32 idx; if (evsel->core.idx) return 0; @@ -263,7 +258,10 @@ static int bperf_cgrp__read(struct evsel *evsel) reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings); evlist__for_each_entry(evlist, evsel) { - idx = evsel->core.idx; + __u32 idx = evsel->core.idx; + int i; + struct perf_cpu cpu; + err = bpf_map_lookup_elem(reading_map_fd, &idx, values); if (err) { pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n", @@ -271,13 +269,11 @@ static int bperf_cgrp__read(struct evsel *evsel) goto out; } - for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i].cpu; - + perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) { counts = perf_counts(evsel->counts, i, 0); - counts->val = values[cpu].counter; - counts->ena = values[cpu].enabled; - counts->run = values[cpu].running; + counts->val = values[cpu.cpu].counter; + counts->ena = values[cpu.cpu].enabled; + counts->run = values[cpu.cpu].running; } } diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c new file mode 100644 index 0000000000000..b73e84a022648 --- /dev/null +++ b/tools/perf/util/bpf_off_cpu.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/bpf_counter.h" +#include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/off_cpu.h" +#include "util/perf-hooks.h" +#include "util/record.h" +#include "util/session.h" +#include "util/target.h" +#include "util/cpumap.h" +#include "util/thread_map.h" +#include "util/cgroup.h" +#include <bpf/bpf.h> + +#include "bpf_skel/off_cpu.skel.h" + +#define MAX_STACKS 32 +/* we don't need actual timestamp, just want to put the samples at last */ +#define OFF_CPU_TIMESTAMP (~0ull << 32) + +static struct off_cpu_bpf *skel; + +struct off_cpu_key { + u32 pid; + u32 tgid; + u32 stack_id; + u32 state; + u64 cgroup_id; +}; + +union off_cpu_data { + struct perf_event_header hdr; + u64 array[1024 / sizeof(u64)]; +}; + +static int off_cpu_config(struct evlist *evlist) +{ + struct evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + .size = sizeof(attr), /* to capture ABI version */ + }; + char *evname = strdup(OFFCPU_EVENT); + + if (evname == NULL) + return -ENOMEM; + + evsel = evsel__new(&attr); + if (!evsel) { + free(evname); + return -ENOMEM; + } + + evsel->core.attr.freq = 1; + evsel->core.attr.sample_period = 1; + /* off-cpu analysis depends on stack trace */ + evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; + + evlist__add(evlist, evsel); + + free(evsel->name); + evsel->name = evname; + + return 0; +} + +static void off_cpu_start(void *arg) +{ + struct evlist *evlist = arg; + + /* update task filter for the given workload */ + if (!skel->bss->has_cpu && !skel->bss->has_task && + perf_thread_map__pid(evlist->core.threads, 0) != -1) { + int fd; + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + pid = perf_thread_map__pid(evlist->core.threads, 0); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + + skel->bss->enabled = 1; +} + +static void off_cpu_finish(void *arg __maybe_unused) +{ + skel->bss->enabled = 0; + off_cpu_bpf__destroy(skel); +} + +/* v5.18 kernel added prev_state arg, so it needs to check the signature */ +static void check_sched_switch_args(void) +{ + const struct btf *btf = bpf_object__btf(skel->obj); + const struct btf_type *t1, *t2, *t3; + u32 type_id; + + type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", + BTF_KIND_TYPEDEF); + if ((s32)type_id < 0) + return; + + t1 = btf__type_by_id(btf, type_id); + if (t1 == NULL) + return; + + t2 = btf__type_by_id(btf, t1->type); + if (t2 == NULL || !btf_is_ptr(t2)) + return; + + t3 = btf__type_by_id(btf, t2->type); + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { + /* new format: pass prev_state as 4th arg */ + skel->rodata->has_prev_state = true; + } +} + +int off_cpu_prepare(struct evlist *evlist, struct target *target, + struct record_opts *opts) +{ + int err, fd, i; + int ncpus = 1, ntasks = 1, ncgrps = 1; + + if (off_cpu_config(evlist) < 0) { + pr_err("Failed to config off-cpu BPF event\n"); + return -1; + } + + skel = off_cpu_bpf__open(); + if (!skel) { + pr_err("Failed to open off-cpu BPF skeleton\n"); + return -1; + } + + /* don't need to set cpu filter for system-wide mode */ + if (target->cpu_list) { + ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + } + + if (target__has_task(target)) { + ntasks = perf_thread_map__nr(evlist->core.threads); + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } + + if (evlist__first(evlist)->cgrp) { + ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */ + bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); + + if (!cgroup_is_v2("perf_event")) + skel->rodata->uses_cgroup_v1 = true; + } + + if (opts->record_cgroup) { + skel->rodata->needs_cgroup = true; + + if (!cgroup_is_v2("perf_event")) + skel->rodata->uses_cgroup_v1 = true; + } + + set_max_rlimit(); + check_sched_switch_args(); + + err = off_cpu_bpf__load(skel); + if (err) { + pr_err("Failed to load off-cpu skeleton\n"); + goto out; + } + + if (target->cpu_list) { + u32 cpu; + u8 val = 1; + + skel->bss->has_cpu = 1; + fd = bpf_map__fd(skel->maps.cpu_filter); + + for (i = 0; i < ncpus; i++) { + cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; + bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); + } + } + + if (target__has_task(target)) { + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + + for (i = 0; i < ntasks; i++) { + pid = perf_thread_map__pid(evlist->core.threads, i); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + } + + if (evlist__first(evlist)->cgrp) { + struct evsel *evsel; + u8 val = 1; + + skel->bss->has_cgroup = 1; + fd = bpf_map__fd(skel->maps.cgroup_filter); + + evlist__for_each_entry(evlist, evsel) { + struct cgroup *cgrp = evsel->cgrp; + + if (cgrp == NULL) + continue; + + if (!cgrp->id && read_cgroup_id(cgrp) < 0) { + pr_err("Failed to read cgroup id of %s\n", + cgrp->name); + goto out; + } + + bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY); + } + } + + err = off_cpu_bpf__attach(skel); + if (err) { + pr_err("Failed to attach off-cpu BPF skeleton\n"); + goto out; + } + + if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) || + perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) { + pr_err("Failed to attach off-cpu skeleton\n"); + goto out; + } + + return 0; + +out: + off_cpu_bpf__destroy(skel); + return -1; +} + +int off_cpu_write(struct perf_session *session) +{ + int bytes = 0, size; + int fd, stack; + u64 sample_type, val, sid = 0; + struct evsel *evsel; + struct perf_data_file *file = &session->data->file; + struct off_cpu_key prev, key; + union off_cpu_data data = { + .hdr = { + .type = PERF_RECORD_SAMPLE, + .misc = PERF_RECORD_MISC_USER, + }, + }; + u64 tstamp = OFF_CPU_TIMESTAMP; + + skel->bss->enabled = 0; + + evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT); + if (evsel == NULL) { + pr_err("%s evsel not found\n", OFFCPU_EVENT); + return 0; + } + + sample_type = evsel->core.attr.sample_type; + + if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) { + if (evsel->core.id) + sid = evsel->core.id[0]; + } + + fd = bpf_map__fd(skel->maps.off_cpu); + stack = bpf_map__fd(skel->maps.stacks); + memset(&prev, 0, sizeof(prev)); + + while (!bpf_map_get_next_key(fd, &prev, &key)) { + int n = 1; /* start from perf_event_header */ + int ip_pos = -1; + + bpf_map_lookup_elem(fd, &key, &val); + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + data.array[n++] = sid; + if (sample_type & PERF_SAMPLE_IP) { + ip_pos = n; + data.array[n++] = 0; /* will be updated */ + } + if (sample_type & PERF_SAMPLE_TID) + data.array[n++] = (u64)key.pid << 32 | key.tgid; + if (sample_type & PERF_SAMPLE_TIME) + data.array[n++] = tstamp; + if (sample_type & PERF_SAMPLE_ID) + data.array[n++] = sid; + if (sample_type & PERF_SAMPLE_CPU) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_PERIOD) + data.array[n++] = val; + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + int len = 0; + + /* data.array[n] is callchain->nr (updated later) */ + data.array[n + 1] = PERF_CONTEXT_USER; + data.array[n + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); + while (data.array[n + 2 + len]) + len++; + + /* update length of callchain */ + data.array[n] = len + 1; + + /* update sample ip with the first callchain entry */ + if (ip_pos >= 0) + data.array[ip_pos] = data.array[n + 2]; + + /* calculate sample callchain data array length */ + n += len + 2; + } + if (sample_type & PERF_SAMPLE_CGROUP) + data.array[n++] = key.cgroup_id; + /* TODO: handle more sample types */ + + size = n * sizeof(u64); + data.hdr.size = size; + bytes += size; + + if (perf_data_file__write(file, &data, size) < 0) { + pr_err("failed to write perf data, error: %m\n"); + return bytes; + } + + prev = key; + /* increase dummy timestamp to sort later samples */ + tstamp++; + } + return bytes; +} diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c new file mode 100644 index 0000000000000..792ae2847080c --- /dev/null +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2022 Google +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* task->flags for off-cpu analysis */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ + +/* task->state for off-cpu analysis */ +#define TASK_INTERRUPTIBLE 0x0001 +#define TASK_UNINTERRUPTIBLE 0x0002 + +#define MAX_STACKS 32 +#define MAX_ENTRIES 102400 + +struct tstamp_data { + __u32 stack_id; + __u32 state; + __u64 timestamp; +}; + +struct offcpu_key { + __u32 pid; + __u32 tgid; + __u32 stack_id; + __u32 state; + __u64 cgroup_id; +}; + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(__u32)); + __uint(value_size, MAX_STACKS * sizeof(__u64)); + __uint(max_entries, MAX_ENTRIES); +} stacks SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tstamp_data); +} tstamp SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct offcpu_key)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, MAX_ENTRIES); +} off_cpu SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cpu_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} task_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cgroup_filter SEC(".maps"); + +/* old kernel task_struct definition */ +struct task_struct___old { + long state; +} __attribute__((preserve_access_index)); + +int enabled = 0; +int has_cpu = 0; +int has_task = 0; +int has_cgroup = 0; + +const volatile bool has_prev_state = false; +const volatile bool needs_cgroup = false; +const volatile bool uses_cgroup_v1 = false; + +/* + * Old kernel used to call it task_struct->state and now it's '__state'. + * Use BPF CO-RE "ignored suffix rule" to deal with it like below: + * + * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes + */ +static inline int get_task_state(struct task_struct *t) +{ + if (bpf_core_field_exists(t->__state)) + return BPF_CORE_READ(t, __state); + + /* recast pointer to capture task_struct___old type for compiler */ + struct task_struct___old *t_old = (void *)t; + + /* now use old "state" name of the field */ + return BPF_CORE_READ(t_old, state); +} + +static inline __u64 get_cgroup_id(struct task_struct *t) +{ + struct cgroup *cgrp; + + if (uses_cgroup_v1) + cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup); + else + cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp); + + return BPF_CORE_READ(cgrp, kn, id); +} + +static inline int can_record(struct task_struct *t, int state) +{ + /* kernel threads don't have user stack */ + if (t->flags & PF_KTHREAD) + return 0; + + if (state != TASK_INTERRUPTIBLE && + state != TASK_UNINTERRUPTIBLE) + return 0; + + if (has_cpu) { + __u32 cpu = bpf_get_smp_processor_id(); + __u8 *ok; + + ok = bpf_map_lookup_elem(&cpu_filter, &cpu); + if (!ok) + return 0; + } + + if (has_task) { + __u8 *ok; + __u32 pid = t->pid; + + ok = bpf_map_lookup_elem(&task_filter, &pid); + if (!ok) + return 0; + } + + if (has_cgroup) { + __u8 *ok; + __u64 cgrp_id = get_cgroup_id(t); + + ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id); + if (!ok) + return 0; + } + + return 1; +} + +static int off_cpu_stat(u64 *ctx, struct task_struct *prev, + struct task_struct *next, int state) +{ + __u64 ts; + __u32 stack_id; + struct tstamp_data *pelem; + + ts = bpf_ktime_get_ns(); + + if (!can_record(prev, state)) + goto next; + + stack_id = bpf_get_stackid(ctx, &stacks, + BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK); + + pelem = bpf_task_storage_get(&tstamp, prev, NULL, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!pelem) + goto next; + + pelem->timestamp = ts; + pelem->state = state; + pelem->stack_id = stack_id; + +next: + pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); + + if (pelem && pelem->timestamp) { + struct offcpu_key key = { + .pid = next->pid, + .tgid = next->tgid, + .stack_id = pelem->stack_id, + .state = pelem->state, + .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0, + }; + __u64 delta = ts - pelem->timestamp; + __u64 *total; + + total = bpf_map_lookup_elem(&off_cpu, &key); + if (total) + *total += delta; + else + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + + /* prevent to reuse the timestamp later */ + pelem->timestamp = 0; + } + + return 0; +} + +SEC("tp_btf/sched_switch") +int on_switch(u64 *ctx) +{ + struct task_struct *prev, *next; + int prev_state; + + if (!enabled) + return 0; + + prev = (struct task_struct *)ctx[1]; + next = (struct task_struct *)ctx[2]; + + if (has_prev_state) + prev_state = (int)ctx[3]; + else + prev_state = get_task_state(prev); + + return off_cpu_stat(ctx, prev, next, prev_state); +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index df7b18fb6b6e6..1aad7d6d34aaa 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -20,7 +20,11 @@ #include "llvm/Option/Option.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" +#if CLANG_VERSION_MAJOR >= 14 +#include "llvm/MC/TargetRegistry.h" +#else #include "llvm/Support/TargetRegistry.h" +#endif #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a5ace2bbc28d2..caabeac24c690 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -479,6 +479,20 @@ int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz) return mkdir(buf, S_IRWXU); } +bool has_kcore_dir(const char *path) +{ + char *kcore_dir; + int ret; + + if (asprintf(&kcore_dir, "%s/kcore_dir", path) < 0) + return false; + + ret = access(kcore_dir, F_OK); + + free(kcore_dir); + return !ret; +} + char *perf_data__kallsyms_name(struct perf_data *data) { char *kallsyms_name; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index c9de82af5584e..7de53d6e2d7fc 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -4,6 +4,7 @@ #include <stdio.h> #include <stdbool.h> +#include <linux/types.h> enum perf_data_mode { PERF_DATA_MODE_WRITE, @@ -98,6 +99,7 @@ void perf_data__close_dir(struct perf_data *data); int perf_data__update_dir(struct perf_data *data); unsigned long perf_data__size(struct perf_data *data); int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz); +bool has_kcore_dir(const char *path); char *perf_data__kallsyms_name(struct perf_data *data); bool is_perf_data(const char *path); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6439c888ae382..0476bb3a4188a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -683,9 +683,12 @@ static bool check_address_range(struct intlist *addr_list, int addr_range, int machine__resolve(struct machine *machine, struct addr_location *al, struct perf_sample *sample) { - struct thread *thread = machine__findnew_thread(machine, sample->pid, - sample->tid); + struct thread *thread; + if (symbol_conf.guest_code && !machine__is_host(machine)) + thread = machine__findnew_guest_code(machine, sample->pid); + else + thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) return -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 52ea004ba01e5..48af7d379d822 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -242,14 +242,20 @@ int __evlist__add_default(struct evlist *evlist, bool precise) return 0; } -int evlist__add_dummy(struct evlist *evlist) +static struct evsel *evlist__dummy_event(struct evlist *evlist) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); + + return evsel__new_idx(&attr, evlist->core.nr_entries); +} + +int evlist__add_dummy(struct evlist *evlist) +{ + struct evsel *evsel = evlist__dummy_event(evlist); if (evsel == NULL) return -ENOMEM; @@ -258,6 +264,51 @@ int evlist__add_dummy(struct evlist *evlist) return 0; } +static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel) +{ + evsel->core.system_wide = true; + + /* + * All CPUs. + * + * Note perf_event_open() does not accept CPUs that are not online, so + * in fact this CPU list will include only all online CPUs. + */ + perf_cpu_map__put(evsel->core.own_cpus); + evsel->core.own_cpus = perf_cpu_map__new(NULL); + perf_cpu_map__put(evsel->core.cpus); + evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); + + /* No threads */ + perf_thread_map__put(evsel->core.threads); + evsel->core.threads = perf_thread_map__new_dummy(); + + evlist__add(evlist, evsel); +} + +struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) +{ + struct evsel *evsel = evlist__dummy_event(evlist); + + if (!evsel) + return NULL; + + evsel->core.attr.exclude_kernel = 1; + evsel->core.attr.exclude_guest = 1; + evsel->core.attr.exclude_hv = 1; + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->no_aux_samples = true; + evsel->name = strdup("dummy:u"); + + if (system_wide) + evlist__add_on_all_cpus(evlist, evsel); + else + evlist__add(evlist, evsel); + + return evsel; +} + static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { struct evsel *evsel, *n; @@ -334,14 +385,6 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, return 0; } -static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) -{ - if (evsel->core.system_wide) - return 1; - else - return perf_thread_map__nr(evlist->core.threads); -} - struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) { struct evlist_cpu_iterator itr = { @@ -546,48 +589,6 @@ void evlist__toggle_enable(struct evlist *evlist) (evlist->enabled ? evlist__disable : evlist__enable)(evlist); } -static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu) -{ - int thread; - int nr_threads = evlist__nr_threads(evlist, evsel); - - if (!evsel->core.fd) - return -EINVAL; - - for (thread = 0; thread < nr_threads; thread++) { - int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); - if (err) - return err; - } - return 0; -} - -static int evlist__enable_event_thread(struct evlist *evlist, struct evsel *evsel, int thread) -{ - int cpu; - int nr_cpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); - - if (!evsel->core.fd) - return -EINVAL; - - for (cpu = 0; cpu < nr_cpus; cpu++) { - int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); - if (err) - return err; - } - return 0; -} - -int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) -{ - bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); - - if (per_cpu_mmaps) - return evlist__enable_event_cpu(evlist, evsel, idx); - - return evlist__enable_event_thread(evlist, evsel, idx); -} - int evlist__add_pollfd(struct evlist *evlist, int fd) { return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default); @@ -797,13 +798,15 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, static void perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, + struct perf_evsel *_evsel, struct perf_mmap_param *_mp, - int idx, bool per_cpu) + int idx) { struct evlist *evlist = container_of(_evlist, struct evlist, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); + struct evsel *evsel = container_of(_evsel, struct evsel, core); - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx); } static struct perf_mmap* @@ -1790,8 +1793,13 @@ struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel * if (evsel__has_leader(c2, leader)) { if (is_open && close) perf_evsel__close(&c2->core); - evsel__set_leader(c2, c2); - c2->core.nr_members = 0; + /* + * We want to close all members of the group and reopen + * them. Some events, like Intel topdown, require being + * in a group and so keep these in the group. + */ + evsel__remove_from_group(c2, leader); + /* * Set this for all former members of the group * to indicate they get reopened. @@ -1799,6 +1807,9 @@ struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel * c2->reset_group = true; } } + /* Reset the leader count if all entries were removed. */ + if (leader->core.nr_members == 1) + leader->core.nr_members = 0; return leader; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a21daaa5fc1b3..1bde9ccf4e7da 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -114,6 +114,11 @@ int arch_evlist__add_default_attrs(struct evlist *evlist); struct evsel *arch_evlist__leader(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); +struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); +static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) +{ + return evlist__add_aux_dummy(evlist, true); +} int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, evsel__sb_cb_t cb, void *data); @@ -196,8 +201,6 @@ void evlist__toggle_enable(struct evlist *evlist); void evlist__disable_evsel(struct evlist *evlist, char *evsel_name); void evlist__enable_evsel(struct evlist *evlist, char *evsel_name); -int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx); - void evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int evlist__create_maps(struct evlist *evlist, struct target *target); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2a1729e7aee46..ce499c5da8d7b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -59,6 +59,33 @@ struct perf_missing_features perf_missing_features; static clockid_t clockid; +static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = { + NULL, + "duration_time", + "user_time", + "system_time", +}; + +const char *perf_tool_event__to_str(enum perf_tool_event ev) +{ + if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX) + return perf_tool_event__tool_names[ev]; + + return NULL; +} + +enum perf_tool_event perf_tool_event__from_str(const char *str) +{ + int i; + + perf_tool_event__for_each_event(i) { + if (!strcmp(str, perf_tool_event__tool_names[i])) + return i; + } + return PERF_TOOL_NONE; +} + + static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) { return 0; @@ -269,8 +296,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) return NULL; evsel__init(evsel, attr, idx); - if (evsel__is_bpf_output(evsel)) { - evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + if (evsel__is_bpf_output(evsel) && !attr->sample_type) { + evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->core.attr.sample_period = 1; } @@ -382,6 +409,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.threads = perf_thread_map__get(orig->core.threads); evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; + evsel->core.requires_cpu = orig->core.requires_cpu; if (orig->name) { evsel->name = strdup(orig->name); @@ -486,7 +514,7 @@ out_err: return ERR_PTR(err); } -const char *evsel__hw_names[PERF_COUNT_HW_MAX] = { +const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = { "cycles", "instructions", "cache-references", @@ -571,7 +599,7 @@ static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -const char *evsel__sw_names[PERF_COUNT_SW_MAX] = { +const char *const evsel__sw_names[PERF_COUNT_SW_MAX] = { "cpu-clock", "task-clock", "page-faults", @@ -597,6 +625,11 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } +static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size) +{ + return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev)); +} + static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -622,7 +655,7 @@ static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { +const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { { "L1-dcache", "l1-d", "l1d", "L1-data", }, { "L1-icache", "l1-i", "l1i", "L1-instruction", }, { "LLC", "L2", }, @@ -632,13 +665,13 @@ const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = { { "node", }, }; -const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = { +const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = { { "load", "loads", "read", }, { "store", "stores", "write", }, { "prefetch", "prefetches", "speculative-read", "speculative-load", }, }; -const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = { +const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = { { "refs", "Reference", "ops", "access", }, { "misses", "miss", }, }; @@ -654,7 +687,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_AL * L1I : Read and prefetch only * ITLB and BPU : Read-only */ -static unsigned long evsel__hw_cache_stat[C(MAX)] = { +static const unsigned long evsel__hw_cache_stat[C(MAX)] = { [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), @@ -723,12 +756,6 @@ static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size) return ret + evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int evsel__tool_name(char *bf, size_t size) -{ - int ret = scnprintf(bf, size, "duration_time"); - return ret; -} - const char *evsel__name(struct evsel *evsel) { char bf[128]; @@ -753,8 +780,8 @@ const char *evsel__name(struct evsel *evsel) break; case PERF_TYPE_SOFTWARE: - if (evsel->tool_event) - evsel__tool_name(bf, sizeof(bf)); + if (evsel__is_tool(evsel)) + evsel__tool_name(evsel->tool_event, bf, sizeof(bf)); else evsel__sw_name(evsel, bf, sizeof(bf)); break; @@ -786,8 +813,8 @@ const char *evsel__metric_id(const struct evsel *evsel) if (evsel->metric_id) return evsel->metric_id; - if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && evsel->tool_event) - return "duration_time"; + if (evsel__is_tool(evsel)) + return perf_tool_event__to_str(evsel->tool_event); return "unknown"; } @@ -870,7 +897,7 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o "specifying a subset with --user-regs may render DWARF unwinding unreliable, " "so the minimal registers set (IP, SP) is explicitly forced.\n"); } else { - attr->sample_regs_user |= PERF_REGS_MASK; + attr->sample_regs_user |= arch__user_reg_mask(); } attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; @@ -3077,3 +3104,22 @@ int evsel__source_count(const struct evsel *evsel) } return count; } + +bool __weak arch_evsel__must_be_in_group(const struct evsel *evsel __maybe_unused) +{ + return false; +} + +/* + * Remove an event from a given group (leader). + * Some events, e.g., perf metrics Topdown events, + * must always be grouped. Ignore the events. + */ +void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader) +{ + if (!arch_evsel__must_be_in_group(evsel) && evsel != leader) { + evsel__set_leader(evsel, evsel); + evsel->core.nr_members = 0; + leader->core.nr_members--; + } +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 041b42d33bf5a..73ea48e940796 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -30,8 +30,18 @@ typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); enum perf_tool_event { PERF_TOOL_NONE = 0, PERF_TOOL_DURATION_TIME = 1, + PERF_TOOL_USER_TIME = 2, + PERF_TOOL_SYSTEM_TIME = 3, + + PERF_TOOL_MAX, }; +const char *perf_tool_event__to_str(enum perf_tool_event ev); +enum perf_tool_event perf_tool_event__from_str(const char *str); + +#define perf_tool_event__for_each_event(ev) \ + for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++) + /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -120,6 +130,7 @@ struct evsel { bool merged_stat; bool reset_group; bool errored; + bool needs_auxtrace_mmap; struct hashmap *per_pkg_mask; int err; struct { @@ -253,11 +264,11 @@ static inline bool evsel__is_bpf(struct evsel *evsel) #define EVSEL__MAX_ALIASES 8 -extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES]; -extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES]; -extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES]; -extern const char *evsel__hw_names[PERF_COUNT_HW_MAX]; -extern const char *evsel__sw_names[PERF_COUNT_SW_MAX]; +extern const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES]; +extern const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES]; +extern const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES]; +extern const char *const evsel__hw_names[PERF_COUNT_HW_MAX]; +extern const char *const evsel__sw_names[PERF_COUNT_SW_MAX]; extern char *evsel__bpf_counter_events; bool evsel__match_bpf_counter_events(const char *name); @@ -265,6 +276,11 @@ int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size const char *evsel__name(struct evsel *evsel); const char *evsel__metric_id(const struct evsel *evsel); +static inline bool evsel__is_tool(const struct evsel *evsel) +{ + return evsel->tool_event != PERF_TOOL_NONE; +} + const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); @@ -483,6 +499,9 @@ bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); void evsel__set_leader(struct evsel *evsel, struct evsel *leader); int evsel__source_count(const struct evsel *evsel); +void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader); + +bool arch_evsel__must_be_in_group(const struct evsel *evsel); /* * Macro to swap the bit-field postition and size. diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 3db3293213a9b..ae138afe6c563 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -38,6 +38,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__s390x__) #define GEN_ELF_ARCH EM_S390 #define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__riscv) && __riscv_xlen == 64 +#define GEN_ELF_ARCH EM_RISCV +#define GEN_ELF_CLASS ELFCLASS64 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a27132e5a5efe..53332da100e83 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3462,9 +3462,22 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) return 0; } +struct header_fw { + struct feat_writer fw; + struct feat_fd *ff; +}; + +static int feat_writer_cb(struct feat_writer *fw, void *buf, size_t sz) +{ + struct header_fw *h = container_of(fw, struct header_fw, fw); + + return do_write(h->ff, buf, sz); +} + static int do_write_feat(struct feat_fd *ff, int type, struct perf_file_section **p, - struct evlist *evlist) + struct evlist *evlist, + struct feat_copier *fc) { int err; int ret = 0; @@ -3478,7 +3491,23 @@ static int do_write_feat(struct feat_fd *ff, int type, (*p)->offset = lseek(ff->fd, 0, SEEK_CUR); - err = feat_ops[type].write(ff, evlist); + /* + * Hook to let perf inject copy features sections from the input + * file. + */ + if (fc && fc->copy) { + struct header_fw h = { + .fw.write = feat_writer_cb, + .ff = ff, + }; + + /* ->copy() returns 0 if the feature was not copied */ + err = fc->copy(fc, type, &h.fw); + } else { + err = 0; + } + if (!err) + err = feat_ops[type].write(ff, evlist); if (err < 0) { pr_debug("failed to write feature %s\n", feat_ops[type].name); @@ -3494,7 +3523,8 @@ static int do_write_feat(struct feat_fd *ff, int type, } static int perf_header__adds_write(struct perf_header *header, - struct evlist *evlist, int fd) + struct evlist *evlist, int fd, + struct feat_copier *fc) { int nr_sections; struct feat_fd ff; @@ -3523,7 +3553,7 @@ static int perf_header__adds_write(struct perf_header *header, lseek(fd, sec_start + sec_size, SEEK_SET); for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { - if (do_write_feat(&ff, feat, &p, evlist)) + if (do_write_feat(&ff, feat, &p, evlist, fc)) perf_header__clear_feat(header, feat); } @@ -3561,9 +3591,10 @@ int perf_header__write_pipe(int fd) return 0; } -int perf_session__write_header(struct perf_session *session, - struct evlist *evlist, - int fd, bool at_exit) +static int perf_session__do_write_header(struct perf_session *session, + struct evlist *evlist, + int fd, bool at_exit, + struct feat_copier *fc) { struct perf_file_header f_header; struct perf_file_attr f_attr; @@ -3615,7 +3646,7 @@ int perf_session__write_header(struct perf_session *session, header->feat_offset = header->data_offset + header->data_size; if (at_exit) { - err = perf_header__adds_write(header, evlist, fd); + err = perf_header__adds_write(header, evlist, fd, fc); if (err < 0) return err; } @@ -3648,6 +3679,21 @@ int perf_session__write_header(struct perf_session *session, return 0; } +int perf_session__write_header(struct perf_session *session, + struct evlist *evlist, + int fd, bool at_exit) +{ + return perf_session__do_write_header(session, evlist, fd, at_exit, NULL); +} + +int perf_session__inject_header(struct perf_session *session, + struct evlist *evlist, + int fd, + struct feat_copier *fc) +{ + return perf_session__do_write_header(session, evlist, fd, true, fc); +} + static int perf_header__getbuffer64(struct perf_header *header, int fd, void *buf, size_t size) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 0eb4bc29a5a46..08563c1f1bffe 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -121,6 +121,21 @@ int perf_session__write_header(struct perf_session *session, int fd, bool at_exit); int perf_header__write_pipe(int fd); +/* feat_writer writes a feature section to output */ +struct feat_writer { + int (*write)(struct feat_writer *fw, void *buf, size_t sz); +}; + +/* feat_copier copies a feature section using feat_writer to output */ +struct feat_copier { + int (*copy)(struct feat_copier *fc, int feat, struct feat_writer *fw); +}; + +int perf_session__inject_header(struct perf_session *session, + struct evlist *evlist, + int fd, + struct feat_copier *fc); + void perf_header__set_feat(struct perf_header *header, int feat); void perf_header__clear_feat(struct perf_header *header, int feat); bool perf_header__has_feat(const struct perf_header *header, int feat); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index e1d8f7504cbe5..0ac860c8dd2b8 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -137,6 +137,7 @@ struct intel_pt_decoder { bool in_psb; bool hop; bool leap; + bool emulated_ptwrite; bool vm_time_correlation; bool vm_tm_corr_dry_run; bool vm_tm_corr_reliable; @@ -481,6 +482,8 @@ static int intel_pt_ext_err(int code) return INTEL_PT_ERR_LOST; case -ELOOP: return INTEL_PT_ERR_NELOOP; + case -ECONNRESET: + return INTEL_PT_ERR_EPTW; default: return INTEL_PT_ERR_UNK; } @@ -497,6 +500,7 @@ static const char *intel_pt_err_msgs[] = { [INTEL_PT_ERR_LOST] = "Lost trace data", [INTEL_PT_ERR_UNK] = "Unknown error!", [INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)", + [INTEL_PT_ERR_EPTW] = "Broken emulated ptwrite", }; int intel_pt__strerror(int code, char *buf, size_t buflen) @@ -1535,17 +1539,108 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) return intel_pt_bug(decoder); } +struct eptw_data { + int bit_countdown; + uint64_t payload; +}; + +static int intel_pt_eptw_lookahead_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct eptw_data *data = pkt_info->data; + int nr_bits; + + switch (pkt_info->packet.type) { + case INTEL_PT_PAD: + case INTEL_PT_MNT: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_MTC: + case INTEL_PT_FUP: + case INTEL_PT_CYC: + case INTEL_PT_CBR: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_PIP: + case INTEL_PT_VMCS: + case INTEL_PT_PSB: + case INTEL_PT_PSBEND: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + case INTEL_PT_CFE: + case INTEL_PT_CFE_IP: + case INTEL_PT_EVD: + break; + + case INTEL_PT_TNT: + nr_bits = data->bit_countdown; + if (nr_bits > pkt_info->packet.count) + nr_bits = pkt_info->packet.count; + data->payload <<= nr_bits; + data->payload |= pkt_info->packet.payload >> (64 - nr_bits); + data->bit_countdown -= nr_bits; + return !data->bit_countdown; + + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP: + case INTEL_PT_BAD: + case INTEL_PT_OVF: + case INTEL_PT_TRACESTOP: + default: + return 1; + } + + return 0; +} + +static int intel_pt_emulated_ptwrite(struct intel_pt_decoder *decoder) +{ + int n = 64 - decoder->tnt.count; + struct eptw_data data = { + .bit_countdown = n, + .payload = decoder->tnt.payload >> n, + }; + + decoder->emulated_ptwrite = false; + intel_pt_log("Emulated ptwrite detected\n"); + + intel_pt_pkt_lookahead(decoder, intel_pt_eptw_lookahead_cb, &data); + if (data.bit_countdown) + return -ECONNRESET; + + decoder->state.type = INTEL_PT_PTW; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->state.ptw_payload = data.payload; + return 0; +} + static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) { struct intel_pt_insn intel_pt_insn; int err; while (1) { + if (decoder->emulated_ptwrite) + return intel_pt_emulated_ptwrite(decoder); err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); - if (err == INTEL_PT_RETURN) + if (err == INTEL_PT_RETURN) { + decoder->emulated_ptwrite = intel_pt_insn.emulated_ptwrite; return 0; - if (err) + } + if (err) { + decoder->emulated_ptwrite = false; return err; + } if (intel_pt_insn.op == INTEL_PT_OP_RET) { if (!decoder->return_compression) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index efb2cb3ae0ca1..c773028df80e1 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -58,6 +58,7 @@ enum { INTEL_PT_ERR_LOST, INTEL_PT_ERR_UNK, INTEL_PT_ERR_NELOOP, + INTEL_PT_ERR_EPTW, INTEL_PT_ERR_MAX, }; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 9d5e65cec89b4..1376077183f72 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -32,6 +32,7 @@ static void intel_pt_insn_decoder(struct insn *insn, int ext; intel_pt_insn->rel = 0; + intel_pt_insn->emulated_ptwrite = false; if (insn_is_avx(insn)) { intel_pt_insn->op = INTEL_PT_OP_OTHER; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h index c2861cfdd768d..e3338b56a75f2 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h @@ -37,6 +37,7 @@ enum intel_pt_insn_branch { struct intel_pt_insn { enum intel_pt_insn_op op; enum intel_pt_insn_branch branch; + bool emulated_ptwrite; int length; int32_t rel; unsigned char buf[INTEL_PT_INSN_BUF_SZ]; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index ec43d364d0deb..62b2f375a94dc 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -192,6 +192,7 @@ struct intel_pt_queue { pid_t next_tid; struct thread *thread; struct machine *guest_machine; + struct thread *guest_thread; struct thread *unknown_guest_thread; pid_t guest_machine_pid; bool exclude_kernel; @@ -530,6 +531,7 @@ struct intel_pt_cache_entry { u64 byte_cnt; enum intel_pt_insn_op op; enum intel_pt_insn_branch branch; + bool emulated_ptwrite; int length; int32_t rel; char insn[INTEL_PT_INSN_BUF_SZ]; @@ -616,6 +618,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine, e->byte_cnt = byte_cnt; e->op = intel_pt_insn->op; e->branch = intel_pt_insn->branch; + e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite; e->length = intel_pt_insn->length; e->rel = intel_pt_insn->rel; memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); @@ -688,6 +691,11 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq) ptq->guest_machine = NULL; thread__zput(ptq->unknown_guest_thread); + if (symbol_conf.guest_code) { + thread__zput(ptq->guest_thread); + ptq->guest_thread = machines__findnew_guest_code(machines, pid); + } + machine = machines__find_guest(machines, pid); if (!machine) return -1; @@ -702,6 +710,28 @@ static int intel_pt_get_guest(struct intel_pt_queue *ptq) return 0; } +static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn) +{ + return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL; +} + +#define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite " +#define PTWRITE_MAGIC_LEN 16 + +static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset) +{ + unsigned char buf[PTWRITE_MAGIC_LEN]; + ssize_t len; + + len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN); + if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) { + intel_pt_log("Emulated ptwrite signature found\n"); + return true; + } + intel_pt_log("Emulated ptwrite signature not found\n"); + return false; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -729,11 +759,16 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, cpumode = intel_pt_nr_cpumode(ptq, *ip, nr); if (nr) { - if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL || + if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) || intel_pt_get_guest(ptq)) return -EINVAL; machine = ptq->guest_machine; - thread = ptq->unknown_guest_thread; + thread = ptq->guest_thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) + return -EINVAL; + thread = ptq->unknown_guest_thread; + } } else { thread = ptq->thread; if (!thread) { @@ -764,6 +799,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, *ip += e->byte_cnt; intel_pt_insn->op = e->op; intel_pt_insn->branch = e->branch; + intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite; intel_pt_insn->length = e->length; intel_pt_insn->rel = e->rel; memcpy(intel_pt_insn->buf, e->insn, @@ -795,8 +831,18 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, insn_cnt += 1; - if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) { + bool eptw; + u64 offs; + + if (!intel_pt_jmp_16(intel_pt_insn)) + goto out; + /* Check for emulated ptwrite */ + offs = offset + intel_pt_insn->length; + eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs); + intel_pt_insn->emulated_ptwrite = eptw; goto out; + } if (max_insn_cnt && insn_cnt >= max_insn_cnt) goto out_no_cache; @@ -1300,6 +1346,7 @@ static void intel_pt_free_queue(void *priv) if (!ptq) return; thread__zput(ptq->thread); + thread__zput(ptq->guest_thread); thread__zput(ptq->unknown_guest_thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); @@ -2372,6 +2419,10 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; } + /* Ensure guest code maps are set up */ + if (symbol_conf.guest_code && (state->from_nr || state->to_nr)) + intel_pt_get_guest(ptq); + /* * Do PEBS first to allow for the possibility that the PEBS timestamp * precedes the current timestamp. diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 15f60fd094249..014d821596563 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -24,7 +24,7 @@ #include "unwind.h" #include "libunwind-aarch64.h" #define perf_event_arm_regs perf_event_arm64_regs -#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#include <../../../arch/arm64/include/uapi/asm/perf_regs.h> #undef perf_event_arm_regs #include "../../arch/arm64/util/unwind-libunwind.c" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 95391236f5f6a..0090618528089 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -84,6 +84,14 @@ static int machine__set_mmap_name(struct machine *machine) return machine->mmap_name ? 0 : -ENOMEM; } +static void thread__set_guest_comm(struct thread *thread, pid_t pid) +{ + char comm[64]; + + snprintf(comm, sizeof(comm), "[guest/%d]", pid); + thread__set_comm(thread, comm, 0); +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { int err = -ENOMEM; @@ -119,13 +127,11 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) if (pid != HOST_KERNEL_ID) { struct thread *thread = machine__findnew_thread(machine, -1, pid); - char comm[64]; if (thread == NULL) goto out; - snprintf(comm, sizeof(comm), "[guest/%d]", pid); - thread__set_comm(thread, comm, 0); + thread__set_guest_comm(thread, pid); thread__put(thread); } @@ -299,6 +305,8 @@ struct machine *machines__add(struct machines *machines, pid_t pid, rb_link_node(&machine->rb_node, parent, p); rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost); + machine->machines = machines; + return machine; } @@ -384,6 +392,93 @@ struct machine *machines__find_guest(struct machines *machines, pid_t pid) return machine; } +/* + * A common case for KVM test programs is that the test program acts as the + * hypervisor, creating, running and destroying the virtual machine, and + * providing the guest object code from its own object code. In this case, + * the VM is not running an OS, but only the functions loaded into it by the + * hypervisor test program, and conveniently, loaded at the same virtual + * addresses. + * + * Normally to resolve addresses, MMAP events are needed to map addresses + * back to the object code and debug symbols for that object code. + * + * Currently, there is no way to get such mapping information from guests + * but, in the scenario described above, the guest has the same mappings + * as the hypervisor, so support for that scenario can be achieved. + * + * To support that, copy the host thread's maps to the guest thread's maps. + * Note, we do not discover the guest until we encounter a guest event, + * which works well because it is not until then that we know that the host + * thread's maps have been set up. + * + * This function returns the guest thread. Apart from keeping the data + * structures sane, using a thread belonging to the guest machine, instead + * of the host thread, allows it to have its own comm (refer + * thread__set_guest_comm()). + */ +static struct thread *findnew_guest_code(struct machine *machine, + struct machine *host_machine, + pid_t pid) +{ + struct thread *host_thread; + struct thread *thread; + int err; + + if (!machine) + return NULL; + + thread = machine__findnew_thread(machine, -1, pid); + if (!thread) + return NULL; + + /* Assume maps are set up if there are any */ + if (thread->maps->nr_maps) + return thread; + + host_thread = machine__find_thread(host_machine, -1, pid); + if (!host_thread) + goto out_err; + + thread__set_guest_comm(thread, pid); + + /* + * Guest code can be found in hypervisor process at the same address + * so copy host maps. + */ + err = maps__clone(thread, host_thread->maps); + thread__put(host_thread); + if (err) + goto out_err; + + return thread; + +out_err: + thread__zput(thread); + return NULL; +} + +struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid) +{ + struct machine *host_machine = machines__find(machines, HOST_KERNEL_ID); + struct machine *machine = machines__findnew(machines, pid); + + return findnew_guest_code(machine, host_machine, pid); +} + +struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid) +{ + struct machines *machines = machine->machines; + struct machine *host_machine; + + if (!machines) + return NULL; + + host_machine = machines__find(machines, HOST_KERNEL_ID); + + return findnew_guest_code(machine, host_machine, pid); +} + void machines__process_guests(struct machines *machines, machine__process_t process, void *data) { diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 0023165422aa4..5d7daf7cb7bcd 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -18,6 +18,7 @@ struct symbol; struct target; struct thread; union perf_event; +struct machines; /* Native host kernel uses -1 as pid index in machine */ #define HOST_KERNEL_ID (-1) @@ -59,6 +60,7 @@ struct machine { void *priv; u64 db_id; }; + struct machines *machines; bool trampolines_mapped; }; @@ -162,10 +164,11 @@ void machines__process_guests(struct machines *machines, struct machine *machines__add(struct machines *machines, pid_t pid, const char *root_dir); -struct machine *machines__find_host(struct machines *machines); struct machine *machines__find(struct machines *machines, pid_t pid); struct machine *machines__findnew(struct machines *machines, pid_t pid); struct machine *machines__find_guest(struct machines *machines, pid_t pid); +struct thread *machines__findnew_guest_code(struct machines *machines, pid_t pid); +struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ed0ab838bcc5d..c3c21a9c350b2 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -314,6 +314,30 @@ static const char * const mem_hops[] = { "board", }; +static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + u64 op = PERF_MEM_LOCK_NA; + int l; + + if (mem_info) + op = mem_info->data_src.mem_op; + + if (op & PERF_MEM_OP_NA) + l = scnprintf(out, sz, "N/A"); + else if (op & PERF_MEM_OP_LOAD) + l = scnprintf(out, sz, "LOAD"); + else if (op & PERF_MEM_OP_STORE) + l = scnprintf(out, sz, "STORE"); + else if (op & PERF_MEM_OP_PFETCH) + l = scnprintf(out, sz, "PFETCH"); + else if (op & PERF_MEM_OP_EXEC) + l = scnprintf(out, sz, "EXEC"); + else + l = scnprintf(out, sz, "No"); + + return l; +} + int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; @@ -466,7 +490,10 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in { int i = 0; - i += perf_mem__lvl_scnprintf(out, sz, mem_info); + i += scnprintf(out, sz, "|OP "); + i += perf_mem__op_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|LVL "); + i += perf_mem__lvl_scnprintf(out + i, sz, mem_info); i += scnprintf(out + i, sz - i, "|SNP "); i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info); i += scnprintf(out + i, sz - i, "|TLB "); @@ -582,6 +609,8 @@ do { \ } if (lvl & P(LVL, MISS)) if (lvl & P(LVL, L1)) stats->st_l1miss++; + if (lvl & P(LVL, NA)) + stats->st_na++; } else { /* unparsable data_src? */ stats->noparse++; @@ -608,6 +637,7 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->st_noadrs += add->st_noadrs; stats->st_l1hit += add->st_l1hit; stats->st_l1miss += add->st_l1miss; + stats->st_na += add->st_na; stats->load += add->load; stats->ld_excl += add->ld_excl; stats->ld_shared += add->ld_shared; diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 916242f8020a8..8a8b568baeeef 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -63,6 +63,7 @@ struct c2c_stats { u32 st_noadrs; /* cacheable store with no address */ u32 st_l1hit; /* count of stores that hit L1D */ u32 st_l1miss; /* count of stores that miss L1D */ + u32 st_na; /* count of stores with memory level is not available */ u32 load; /* count of all loads in trace */ u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */ u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index d8492e3395218..ee8fcfa115e50 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -728,22 +728,23 @@ static int metricgroup__build_event_string(struct strbuf *events, { struct hashmap_entry *cur; size_t bkt; - bool no_group = true, has_duration = false; + bool no_group = true, has_tool_events = false; + bool tool_events[PERF_TOOL_MAX] = {false}; int ret = 0; #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *sep, *rsep, *id = cur->key; + enum perf_tool_event ev; pr_debug("found event %s\n", id); - /* - * Duration time maps to a software event and can make - * groups not count. Always use it outside a - * group. - */ - if (!strcmp(id, "duration_time")) { - has_duration = true; + + /* Always move tool events outside of the group. */ + ev = perf_tool_event__from_str(id); + if (ev != PERF_TOOL_NONE) { + has_tool_events = true; + tool_events[ev] = true; continue; } /* Separate events with commas and open the group if necessary. */ @@ -802,16 +803,25 @@ static int metricgroup__build_event_string(struct strbuf *events, RETURN_IF_NON_ZERO(ret); } } - if (has_duration) { - if (no_group) { - /* Strange case of a metric of just duration_time. */ - ret = strbuf_addf(events, "duration_time"); - } else if (!has_constraint) - ret = strbuf_addf(events, "}:W,duration_time"); - else - ret = strbuf_addf(events, ",duration_time"); - } else if (!no_group && !has_constraint) + if (!no_group && !has_constraint) { ret = strbuf_addf(events, "}:W"); + RETURN_IF_NON_ZERO(ret); + } + if (has_tool_events) { + int i; + + perf_tool_event__for_each_event(i) { + if (tool_events[i]) { + if (!no_group) { + ret = strbuf_addch(events, ','); + RETURN_IF_NON_ZERO(ret); + } + no_group = false; + ret = strbuf_addstr(events, perf_tool_event__to_str(i)); + RETURN_IF_NON_ZERO(ret); + } + } + } return ret; #undef RETURN_IF_NON_ZERO @@ -1117,7 +1127,7 @@ out: /** * metric_list_cmp - list_sort comparator that sorts metrics with more events to - * the front. duration_time is excluded from the count. + * the front. tool events are excluded from the count. */ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, const struct list_head *r) @@ -1125,15 +1135,19 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, const struct metric *left = container_of(l, struct metric, nd); const struct metric *right = container_of(r, struct metric, nd); struct expr_id_data *data; - int left_count, right_count; + int i, left_count, right_count; left_count = hashmap__size(left->pctx->ids); - if (!expr__get_id(left->pctx, "duration_time", &data)) - left_count--; + perf_tool_event__for_each_event(i) { + if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data)) + left_count--; + } right_count = hashmap__size(right->pctx->ids); - if (!expr__get_id(right->pctx, "duration_time", &data)) - right_count--; + perf_tool_event__for_each_event(i) { + if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data)) + right_count--; + } return right_count - left_count; } @@ -1270,6 +1284,30 @@ static void metricgroup__free_metrics(struct list_head *metric_list) } /** + * find_tool_events - Search for the pressence of tool events in metric_list. + * @metric_list: List to take metrics from. + * @tool_events: Array of false values, indices corresponding to tool events set + * to true if tool event is found. + */ +static void find_tool_events(const struct list_head *metric_list, + bool tool_events[PERF_TOOL_MAX]) +{ + struct metric *m; + + list_for_each_entry(m, metric_list, nd) { + int i; + + perf_tool_event__for_each_event(i) { + struct expr_id_data *data; + + if (!tool_events[i] && + !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data)) + tool_events[i] = true; + } + } +} + +/** * build_combined_expr_ctx - Make an expr_parse_ctx with all has_constraint * metric IDs, as the IDs are held in a set, * duplicates will be removed. @@ -1318,11 +1356,14 @@ err_out: * @ids: the event identifiers parsed from a metric. * @modifier: any modifiers added to the events. * @has_constraint: false if events should be placed in a weak group. + * @tool_events: entries set true if the tool event of index could be present in + * the overall list of metrics. * @out_evlist: the created list of events. */ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, const char *modifier, - bool has_constraint, struct evlist **out_evlist) + bool has_constraint, const bool tool_events[PERF_TOOL_MAX], + struct evlist **out_evlist) { struct parse_events_error parse_error; struct evlist *parsed_evlist; @@ -1331,26 +1372,29 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, *out_evlist = NULL; if (!metric_no_merge || hashmap__size(ids->ids) == 0) { - char *tmp; + int i; /* - * We may fail to share events between metrics because - * duration_time isn't present in one metric. For example, a - * ratio of cache misses doesn't need duration_time but the same - * events may be used for a misses per second. Events without - * sharing implies multiplexing, that is best avoided, so place - * duration_time in every group. + * We may fail to share events between metrics because a tool + * event isn't present in one metric. For example, a ratio of + * cache misses doesn't need duration_time but the same events + * may be used for a misses per second. Events without sharing + * implies multiplexing, that is best avoided, so place + * all tool events in every group. * * Also, there may be no ids/events in the expression parsing * context because of constant evaluation, e.g.: * event1 if #smt_on else 0 - * Add a duration_time event to avoid a parse error on an empty - * string. + * Add a tool event to avoid a parse error on an empty string. */ - tmp = strdup("duration_time"); - if (!tmp) - return -ENOMEM; + perf_tool_event__for_each_event(i) { + if (tool_events[i]) { + char *tmp = strdup(perf_tool_event__to_str(i)); - ids__insert(ids->ids, tmp); + if (!tmp) + return -ENOMEM; + ids__insert(ids->ids, tmp); + } + } } ret = metricgroup__build_event_string(&events, ids, modifier, has_constraint); @@ -1392,6 +1436,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); struct metric *m; + bool tool_events[PERF_TOOL_MAX] = {false}; int ret; if (metric_events_list->nr_entries == 0) @@ -1407,12 +1452,15 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, if (!metric_no_merge) { struct expr_parse_ctx *combined = NULL; + find_tool_events(&metric_list, tool_events); + ret = build_combined_expr_ctx(&metric_list, &combined); if (!ret && combined && hashmap__size(combined->ids)) { ret = parse_ids(metric_no_merge, fake_pmu, combined, /*modifier=*/NULL, /*has_constraint=*/true, + tool_events, &combined_evlist); } if (combined) @@ -1460,7 +1508,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, } if (!metric_evlist) { ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier, - m->has_constraint, &m->evlist); + m->has_constraint, tool_events, &m->evlist); if (ret) goto out; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 50502b4a7ca46..a4dff881be39b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -62,8 +62,8 @@ void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_u void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, struct evlist *evlist __maybe_unused, - int idx __maybe_unused, - bool per_cpu __maybe_unused) + struct evsel *evsel __maybe_unused, + int idx __maybe_unused) { } diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h new file mode 100644 index 0000000000000..548008f74d420 --- /dev/null +++ b/tools/perf/util/off_cpu.h @@ -0,0 +1,29 @@ +#ifndef PERF_UTIL_OFF_CPU_H +#define PERF_UTIL_OFF_CPU_H + +struct evlist; +struct target; +struct perf_session; +struct record_opts; + +#define OFFCPU_EVENT "offcpu-time" + +#ifdef HAVE_BPF_SKEL +int off_cpu_prepare(struct evlist *evlist, struct target *target, + struct record_opts *opts); +int off_cpu_write(struct perf_session *session); +#else +static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused, + struct target *target __maybe_unused, + struct record_opts *opts __maybe_unused) +{ + return -1; +} + +static inline int off_cpu_write(struct perf_session *session __maybe_unused) +{ + return -1; +} +#endif + +#endif /* PERF_UTIL_OFF_CPU_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index dd84fed698a3b..7ed2357404316 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -154,6 +154,21 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { }, }; +struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { + [PERF_TOOL_DURATION_TIME] = { + .symbol = "duration_time", + .alias = "", + }, + [PERF_TOOL_USER_TIME] = { + .symbol = "user_time", + .alias = "", + }, + [PERF_TOOL_SYSTEM_TIME] = { + .symbol = "system_time", + .alias = "", + }, +}; + #define __PERF_EVENT_FIELD(config, name) \ ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) @@ -350,7 +365,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->core.system_wide = pmu ? pmu->is_uncore : false; + evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) @@ -402,14 +417,16 @@ static int add_event_tool(struct list_head *list, int *idx, if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME) { + if (tool_event == PERF_TOOL_DURATION_TIME + || tool_event == PERF_TOOL_USER_TIME + || tool_event == PERF_TOOL_SYSTEM_TIME) { free((char *)evsel->unit); evsel->unit = strdup("ns"); } return 0; } -static int parse_aliases(char *str, const char *names[][EVSEL__MAX_ALIASES], int size) +static int parse_aliases(char *str, const char *const names[][EVSEL__MAX_ALIASES], int size) { int i, j; int n, longest = -1; @@ -3056,21 +3073,34 @@ out_enomem: return evt_num; } -static void print_tool_event(const char *name, const char *event_glob, +static void print_tool_event(const struct event_symbol *syms, const char *event_glob, bool name_only) { - if (event_glob && !strglobmatch(name, event_glob)) + if (syms->symbol == NULL) + return; + + if (event_glob && !(strglobmatch(syms->symbol, event_glob) || + (syms->alias && strglobmatch(syms->alias, event_glob)))) return; + if (name_only) - printf("%s ", name); - else + printf("%s ", syms->symbol); + else { + char name[MAX_NAME_LEN]; + if (syms->alias && strlen(syms->alias)) + snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); + else + strlcpy(name, syms->symbol, MAX_NAME_LEN); printf(" %-50s [%s]\n", name, "Tool event"); - + } } void print_tool_events(const char *event_glob, bool name_only) { - print_tool_event("duration_time", event_glob, name_only); + // Start at 1 because the first enum entry symbols no tool event + for (int i = 1; i < PERF_TOOL_MAX; ++i) { + print_tool_event(event_symbols_tool + i, event_glob, name_only); + } if (pager_in_use()) printf("\n"); } diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 5b6e4b5249cff..3a9ce96c8bce1 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -353,6 +353,8 @@ alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_AL emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } +user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); } +system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index caed0336429f2..ce80b79be1036 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -86,9 +86,21 @@ bool is_directory(const char *base_path, const struct dirent *dent) char path[PATH_MAX]; struct stat st; - sprintf(path, "%s/%s", base_path, dent->d_name); + snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name); if (stat(path, &st)) return false; return S_ISDIR(st.st_mode); } + +bool is_executable_file(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 083429b7efa37..d94902c222223 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -12,5 +12,6 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con bool is_regular_file(const char *file); bool is_directory(const char *base_path, const struct dirent *dent); +bool is_executable_file(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a982e40ee5a91..872dd3d387821 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -103,6 +103,8 @@ static const char *__perf_reg_name_arm64(int id) return "lr"; case PERF_REG_ARM64_PC: return "pc"; + case PERF_REG_ARM64_VG: + return "vg"; default: return NULL; } diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index a685d20165f78..aa5156c2bcff5 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -38,5 +38,6 @@ util/units.c util/affinity.c util/rwsem.c util/hashmap.c +util/perf_regs.c util/pmu-hybrid.c util/fncache.c diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 413f2d19c13f6..adba01b7d9dd9 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -392,6 +392,18 @@ static const char *get_dsoname(struct map *map) return dsoname; } +static unsigned long get_offset(struct symbol *sym, struct addr_location *al) +{ + unsigned long offset; + + if (al->addr < sym->end) + offset = al->addr - sym->start; + else + offset = al->addr - al->map->start - sym->start; + + return offset; +} + static PyObject *python_process_callchain(struct perf_sample *sample, struct evsel *evsel, struct addr_location *al) @@ -443,6 +455,25 @@ static PyObject *python_process_callchain(struct perf_sample *sample, _PyUnicode_FromStringAndSize(node->ms.sym->name, node->ms.sym->namelen)); pydict_set_item_string_decref(pyelem, "sym", pysym); + + if (node->ms.map) { + struct map *map = node->ms.map; + struct addr_location node_al; + unsigned long offset; + + node_al.addr = map->map_ip(map, node->ip); + node_al.map = map; + offset = get_offset(node->ms.sym, &node_al); + + pydict_set_item_string_decref( + pyelem, "sym_off", + PyLong_FromUnsignedLongLong(offset)); + } + if (node->srcline && strcmp(":0", node->srcline)) { + pydict_set_item_string_decref( + pyelem, "sym_srcline", + _PyUnicode_FromString(node->srcline)); + } } if (node->ms.map) { @@ -520,18 +551,6 @@ exit: return pylist; } -static unsigned long get_offset(struct symbol *sym, struct addr_location *al) -{ - unsigned long offset; - - if (al->addr < sym->end) - offset = al->addr - sym->start; - else - offset = al->addr - al->map->start - sym->start; - - return offset; -} - static int get_symoff(struct symbol *sym, struct addr_location *al, bool print_off, char *bf, int size) { @@ -736,12 +755,22 @@ static void set_regs_in_dict(PyObject *dict, } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, - const char *dso_field, const char *sym_field, - const char *symoff_field) + const char *dso_field, const char *dso_bid_field, + const char *dso_map_start, const char *dso_map_end, + const char *sym_field, const char *symoff_field) { + char sbuild_id[SBUILD_ID_SIZE]; + if (al->map) { pydict_set_item_string_decref(dict, dso_field, _PyUnicode_FromString(al->map->dso->name)); + build_id__sprintf(&al->map->dso->bid, sbuild_id); + pydict_set_item_string_decref(dict, dso_bid_field, + _PyUnicode_FromString(sbuild_id)); + pydict_set_item_string_decref(dict, dso_map_start, + PyLong_FromUnsignedLong(al->map->start)); + pydict_set_item_string_decref(dict, dso_map_end, + PyLong_FromUnsignedLong(al->map->end)); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -821,7 +850,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); - set_sym_in_dict(dict, al, "dso", "symbol", "symoff"); + set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", + "symbol", "symoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -837,7 +867,9 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (addr_al) { pydict_set_item_string_decref(dict_sample, "addr_correlates_sym", PyBool_FromLong(1)); - set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff"); + set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", + "addr_dso_map_start", "addr_dso_map_end", + "addr_symbol", "addr_symoff"); } if (sample->flags) @@ -2074,7 +2106,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile fprintf(ofp, "\t\tfor node in common_callchain:"); fprintf(ofp, "\n\t\t\tif 'sym' in node:"); - fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))"); + fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x] %%s%%s%%s%%s\" %% ("); + fprintf(ofp, "\n\t\t\t\t\tnode['ip'], node['sym']['name'],"); + fprintf(ofp, "\n\t\t\t\t\t\"+0x{:x}\".format(node['sym_off']) if 'sym_off' in node else \"\","); + fprintf(ofp, "\n\t\t\t\t\t\" ({})\".format(node['dso']) if 'dso' in node else \"\","); + fprintf(ofp, "\n\t\t\t\t\t\" \" + node['sym_srcline'] if 'sym_srcline' in node else \"\"))"); fprintf(ofp, "\n\t\t\telse:"); fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n"); fprintf(ofp, "\t\tprint()\n\n"); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 45a30040ec8d0..0aa818977d2bc 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1151,9 +1151,20 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) struct branch_entry *entries = perf_sample__branch_entries(sample); uint64_t i; - printf("%s: nr:%" PRIu64 "\n", - !callstack ? "... branch stack" : "... branch callstack", - sample->branch_stack->nr); + if (!callstack) { + printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr); + } else { + /* the reason of adding 1 to nr is because after expanding + * branch stack it generates nr + 1 callstack records. e.g., + * B()->C() + * A()->B() + * the final callstack should be: + * C() + * B() + * A() + */ + printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1); + } for (i = 0; i < sample->branch_stack->nr; i++) { struct branch_entry *e = &entries[i]; @@ -1169,8 +1180,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) (unsigned)e->flags.reserved, e->flags.type ? branch_type_name(e->flags.type) : ""); } else { - printf("..... %2"PRIu64": %016" PRIx64 "\n", - i, i > 0 ? e->from : e->to); + if (i == 0) { + printf("..... %2"PRIu64": %016" PRIx64 "\n" + "..... %2"PRIu64": %016" PRIx64 "\n", + i, e->to, i+1, e->from); + } else { + printf("..... %2"PRIu64": %016" PRIx64 "\n", i+1, e->from); + } } } } @@ -1410,6 +1426,13 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, else pid = sample->pid; + /* + * Guest code machine is created as needed and does not use + * DEFAULT_GUEST_KERNEL_ID. + */ + if (symbol_conf.guest_code) + return machines__findnew(machines, pid); + return machines__find_guest(machines, pid); } @@ -2576,7 +2599,7 @@ int perf_session__process_events(struct perf_session *session) if (perf_data__is_pipe(session->data)) return __perf_session__process_pipe_events(session); - if (perf_data__is_dir(session->data)) + if (perf_data__is_dir(session->data) && session->data->dir.nr) return __perf_session__process_dir_events(session); return __perf_session__process_events(session); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 138e3ab9d638d..606f09b09226f 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -610,6 +610,19 @@ static bool hybrid_uniquify(struct evsel *evsel) return perf_pmu__has_hybrid() && !is_uncore(evsel); } +static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config, + bool check) +{ + if (hybrid_uniquify(counter)) { + if (check) + return config && config->hybrid_merge; + else + return config && !config->hybrid_merge; + } + + return false; +} + static bool collect_data(struct perf_stat_config *config, struct evsel *counter, void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, bool first), @@ -618,9 +631,9 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter, if (counter->merged_stat) return false; cb(config, counter, data, true); - if (config->no_merge || hybrid_uniquify(counter)) + if (config->no_merge || hybrid_merge(counter, config, false)) uniquify_event_name(counter); - else if (counter->auto_merge_stats) + else if (counter->auto_merge_stats || hybrid_merge(counter, config, true)) collect_all_aliases(config, counter, cb, data); return true; } @@ -751,11 +764,11 @@ static int cmp_val(const void *a, const void *b) static struct perf_aggr_thread_value *sort_aggr_thread( struct evsel *counter, - int nthreads, int ncpus, int *ret, struct target *_target) { - int cpu, thread, i = 0; + int nthreads = perf_thread_map__nr(counter->core.threads); + int i = 0; double uval; struct perf_aggr_thread_value *buf; @@ -763,13 +776,17 @@ static struct perf_aggr_thread_value *sort_aggr_thread( if (!buf) return NULL; - for (thread = 0; thread < nthreads; thread++) { + for (int thread = 0; thread < nthreads; thread++) { + int idx; u64 ena = 0, run = 0, val = 0; - for (cpu = 0; cpu < ncpus; cpu++) { - val += perf_counts(counter->counts, cpu, thread)->val; - ena += perf_counts(counter->counts, cpu, thread)->ena; - run += perf_counts(counter->counts, cpu, thread)->run; + perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) { + struct perf_counts_values *counts = + perf_counts(counter->counts, idx, thread); + + val += counts->val; + ena += counts->ena; + run += counts->run; } uval = val * counter->scale; @@ -804,13 +821,11 @@ static void print_aggr_thread(struct perf_stat_config *config, struct evsel *counter, char *prefix) { FILE *output = config->output; - int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = perf_cpu_map__nr(counter->core.cpus); int thread, sorted_threads; struct aggr_cpu_id id; struct perf_aggr_thread_value *buf; - buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); + buf = sort_aggr_thread(counter, &sorted_threads, _target); if (!buf) { perror("cannot sort aggr thread"); return; @@ -933,8 +948,6 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evsel *counter; bool first = true; - if (prefix) - fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { u64 ena, run, val; double uval; @@ -946,6 +959,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { + if (prefix) + fputs(prefix, config->output); aggr_printout(config, counter, id, 0); first = false; } @@ -957,7 +972,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config, printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); } - fputc('\n', config->output); + if (!first) + fputc('\n', config->output); } } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 10af7804e482f..979c8cb918f72 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -26,6 +26,7 @@ struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; +struct rusage_stats ru_stats; struct saved_value { struct rb_node rb_node; @@ -199,6 +200,7 @@ void perf_stat__reset_shadow_stats(void) { reset_stat(&rt_stat); memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); + memset(&ru_stats, 0, sizeof(ru_stats)); } void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) @@ -831,10 +833,31 @@ static int prepare_metric(struct evsel **metric_events, u64 metric_total = 0; int source_count; - if (!strcmp(metric_events[i]->name, "duration_time")) { - stats = &walltime_nsecs_stats; - scale = 1e-9; + if (evsel__is_tool(metric_events[i])) { source_count = 1; + switch (metric_events[i]->tool_event) { + case PERF_TOOL_DURATION_TIME: + stats = &walltime_nsecs_stats; + scale = 1e-9; + break; + case PERF_TOOL_USER_TIME: + stats = &ru_stats.ru_utime_usec_stat; + scale = 1e-6; + break; + case PERF_TOOL_SYSTEM_TIME: + stats = &ru_stats.ru_stime_usec_stat; + scale = 1e-6; + break; + case PERF_TOOL_NONE: + pr_err("Invalid tool event 'none'"); + abort(); + case PERF_TOOL_MAX: + pr_err("Invalid tool event 'max'"); + abort(); + default: + pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); + abort(); + } } else { v = saved_value_lookup(metric_events[i], cpu_map_idx, false, STAT_NONE, 0, st, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 817a2de264b46..37ea2d0447085 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -117,7 +117,9 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { - if (!strcmp(evsel__name(evsel), id_str[i])) { + if (!strcmp(evsel__name(evsel), id_str[i]) || + (strstr(evsel__name(evsel), id_str[i]) && evsel->pmu_name + && strstr(evsel__name(evsel), evsel->pmu_name))) { ps->id = i; break; } @@ -235,14 +237,12 @@ void evlist__reset_prev_raw_counts(struct evlist *evlist) static void evsel__copy_prev_raw_counts(struct evsel *evsel) { - int ncpus = evsel__nr_cpus(evsel); - int nthreads = perf_thread_map__nr(evsel->core.threads); + int idx, nthreads = perf_thread_map__nr(evsel->core.threads); for (int thread = 0; thread < nthreads; thread++) { - for (int cpu = 0; cpu < ncpus; cpu++) { - *perf_counts(evsel->counts, cpu, thread) = - *perf_counts(evsel->prev_raw_counts, cpu, - thread); + perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) { + *perf_counts(evsel->counts, idx, thread) = + *perf_counts(evsel->prev_raw_counts, idx, thread); } } @@ -472,9 +472,10 @@ int perf_stat_process_counter(struct perf_stat_config *config, int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) { - struct perf_counts_values count; + struct perf_counts_values count, *ptr; struct perf_record_stat *st = &event->stat; struct evsel *counter; + int cpu_map_idx; count.val = st->val; count.ena = st->ena; @@ -485,8 +486,18 @@ int perf_event__process_stat_event(struct perf_session *session, pr_err("Failed to resolve counter for stat event.\n"); return -EINVAL; } - - *perf_counts(counter->counts, st->cpu, st->thread) = count; + cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu}); + if (cpu_map_idx == -1) { + pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter)); + return -EINVAL; + } + ptr = perf_counts(counter->counts, cpu_map_idx, st->thread); + if (ptr == NULL) { + pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n", + st->cpu, st->thread, evsel__name(counter)); + return -EINVAL; + } + *ptr = count; counter->supported = true; return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 335d19cc30630..b5aeb8e6d34b0 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -108,6 +108,11 @@ struct runtime_stat { struct rblist value_list; }; +struct rusage_stats { + struct stats ru_utime_usec_stat; + struct stats ru_stime_usec_stat; +}; + typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); struct perf_stat_config { @@ -122,6 +127,7 @@ struct perf_stat_config { bool ru_display; bool big_num; bool no_merge; + bool hybrid_merge; bool walltime_run_table; bool all_kernel; bool all_user; @@ -148,6 +154,7 @@ struct perf_stat_config { const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; + struct rusage_stats *ru_stats; struct cpu_aggr_map *aggr_map; aggr_get_id_t aggr_get_id; struct cpu_aggr_map *cpus_aggr_map; @@ -177,6 +184,20 @@ static inline void init_stats(struct stats *stats) stats->max = 0; } +static inline void init_rusage_stats(struct rusage_stats *ru_stats) { + init_stats(&ru_stats->ru_utime_usec_stat); + init_stats(&ru_stats->ru_stime_usec_stat); +} + +static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) { + const u64 us_to_ns = 1000; + const u64 s_to_ns = 1000000000; + update_stats(&ru_stats->ru_utime_usec_stat, + (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns)); + update_stats(&ru_stats->ru_stime_usec_stat, + (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns)); +} + struct evsel; struct evlist; @@ -196,6 +217,7 @@ bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; +extern struct rusage_stats ru_stats; typedef void (*print_metric_t)(struct perf_stat_config *config, void *ctx, const char *color, const char *unit, diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 31cd59a2b66e6..ecd377938eea8 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_end(&dso->symbols); + symbols__fixup_end(&dso->symbols, false); symbols__fixup_duplicate(&dso->symbols); if (kmap) { /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index dea0fc495185d..f72baf636724c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str) return tail - str; } -void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c) -{ - p->end = c->start; -} - const char * __weak arch__normalize_symbol_name(const char *name) { return name; @@ -217,7 +212,8 @@ again: } } -void symbols__fixup_end(struct rb_root_cached *symbols) +/* Update zero-sized symbols using the address of the next symbol */ +void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) { struct rb_node *nd, *prevnd = rb_first_cached(symbols); struct symbol *curr, *prev; @@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols) prev = curr; curr = rb_entry(nd, struct symbol, rb_node); - if (prev->end == prev->start || prev->end != curr->start) - arch__symbols__fixup_end(prev, curr); + /* + * On some architecture kernel text segment start is located at + * some low memory address, while modules are located at high + * memory addresses (or vice versa). The gap between end of + * kernel text segment and beginning of first module's text + * segment is very big. Therefore do not fill this gap and do + * not assign it to the kernel dso map (kallsyms). + * + * In kallsyms, it determines module symbols using '[' character + * like in: + * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi] + */ + if (prev->end == prev->start) { + /* Last kernel/module symbol mapped to end of page */ + if (is_kallsyms && (!strchr(prev->name, '[') != + !strchr(curr->name, '['))) + prev->end = roundup(prev->end + 4096, 4096); + else + prev->end = curr->start; + + pr_debug4("%s sym:%s end:%#" PRIx64 "\n", + __func__, prev->name, prev->end); + } } /* Last entry */ @@ -1467,7 +1484,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, if (kallsyms__delta(kmap, filename, &delta)) return -1; - symbols__fixup_end(&dso->symbols); + symbols__fixup_end(&dso->symbols, true); symbols__fixup_duplicate(&dso->symbols); if (dso->kernel == DSO_SPACE__KERNEL_GUEST) @@ -1659,7 +1676,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) #undef bfd_asymbol_section #endif - symbols__fixup_end(&dso->symbols); + symbols__fixup_end(&dso->symbols, false); symbols__fixup_duplicate(&dso->symbols); dso->adjust_symbols = 1; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fbf866d82dccd..0b893dcc8ea68 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -203,7 +203,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, bool kernel); void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root_cached *symbols); -void symbols__fixup_end(struct rb_root_cached *symbols); +void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); void maps__fixup_end(struct maps *maps); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); @@ -241,7 +241,6 @@ const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 -void arch__symbols__fixup_end(struct symbol *p, struct symbol *c); int arch__compare_symbol_names(const char *namea, const char *nameb); int arch__compare_symbol_names_n(const char *namea, const char *nameb, unsigned int n); diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index a70b3ec09dace..bc3d046fbb632 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -43,7 +43,8 @@ struct symbol_conf { report_individual_block, inline_name, disable_add2line_warn, - buildid_mmap2; + buildid_mmap2, + guest_code; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c index 1081b20f9891d..a369f84ceb6a4 100644 --- a/tools/perf/util/topdown.c +++ b/tools/perf/util/topdown.c @@ -1,18 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> #include "pmu.h" +#include "pmu-hybrid.h" #include "topdown.h" -int topdown_filter_events(const char **attr, char **str, bool use_group) +int topdown_filter_events(const char **attr, char **str, bool use_group, + const char *pmu_name) { int off = 0; int i; int len = 0; char *s; + bool is_hybrid = perf_pmu__is_hybrid(pmu_name); for (i = 0; attr[i]; i++) { - if (pmu_have_event("cpu", attr[i])) { - len += strlen(attr[i]) + 1; + if (pmu_have_event(pmu_name, attr[i])) { + if (is_hybrid) + len += strlen(attr[i]) + strlen(pmu_name) + 3; + else + len += strlen(attr[i]) + 1; attr[i - off] = attr[i]; } else off++; @@ -30,7 +36,10 @@ int topdown_filter_events(const char **attr, char **str, bool use_group) if (use_group) *s++ = '{'; for (i = 0; attr[i]; i++) { - strcpy(s, attr[i]); + if (!is_hybrid) + strcpy(s, attr[i]); + else + sprintf(s, "%s/%s/", pmu_name, attr[i]); s += strlen(s); *s++ = ','; } diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h index 2f0d0b8876393..118e75281f933 100644 --- a/tools/perf/util/topdown.h +++ b/tools/perf/util/topdown.h @@ -7,6 +7,7 @@ bool arch_topdown_check_group(bool *warn); void arch_topdown_group_warn(void); bool arch_topdown_sample_read(struct evsel *leader); -int topdown_filter_events(const char **attr, char **str, bool use_group); +int topdown_filter_events(const char **attr, char **str, bool use_group, + const char *pmu_name); #endif diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index f8571a66d063f..eeb83c80f4581 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -430,6 +430,11 @@ void perf_debuginfod_setup(struct perf_debuginfod *di) setenv("DEBUGINFOD_URLS", di->urls, 1); pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS")); + +#ifndef HAVE_DEBUGINFOD_SUPPORT + if (di->set) + pr_warning("WARNING: debuginfod support requested, but perf is not built with it\n"); +#endif } /* |