From e5fcc2abc353be94548080d84de3269ef6cc2af6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:01 +0800 Subject: perf stat: Define a structure for per-thread shadow stats Perf has a set of static variables to record the runtime shadow metrics stats. While if we want to record the runtime shadow stats for per-thread, it will be the limitation. This patch creates a structure and the next patches will use this structure to update the runtime shadow stats for per-thread. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 11 ----------- tools/perf/util/stat.h | 43 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 57ec22513971..93aac2788056 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,17 +9,6 @@ #include "expr.h" #include "metricgroup.h" -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -#define NUM_CTX CTX_BIT_MAX - /* * AGGR_GLOBAL: Use CPU 0 * AGGR_SOCKET: Use first CPU of socket diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eefca5c981fd..c685c41f1fb9 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -5,6 +5,7 @@ #include #include #include "xyarray.h" +#include "rblist.h" struct stats { @@ -43,6 +44,47 @@ enum aggr_mode { AGGR_UNSET, }; +enum { + CTX_BIT_USER = 1 << 0, + CTX_BIT_KERNEL = 1 << 1, + CTX_BIT_HV = 1 << 2, + CTX_BIT_HOST = 1 << 3, + CTX_BIT_IDLE = 1 << 4, + CTX_BIT_MAX = 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +enum stat_type { + STAT_NONE = 0, + STAT_NSECS, + STAT_CYCLES, + STAT_STALLED_CYCLES_FRONT, + STAT_STALLED_CYCLES_BACK, + STAT_BRANCHES, + STAT_CACHEREFS, + STAT_L1_DCACHE, + STAT_L1_ICACHE, + STAT_LL_CACHE, + STAT_ITLB_CACHE, + STAT_DTLB_CACHE, + STAT_CYCLES_IN_TX, + STAT_TRANSACTION, + STAT_ELISION, + STAT_TOPDOWN_TOTAL_SLOTS, + STAT_TOPDOWN_SLOTS_ISSUED, + STAT_TOPDOWN_SLOTS_RETIRED, + STAT_TOPDOWN_FETCH_BUBBLES, + STAT_TOPDOWN_RECOVERY_BUBBLES, + STAT_SMI_NUM, + STAT_APERF, + STAT_MAX +}; + +struct runtime_stat { + struct rblist value_list; +}; + struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; @@ -92,7 +134,6 @@ struct perf_stat_output_ctx { bool force_header; }; -struct rblist; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, -- cgit v1.2.3 From 49cd456af1dcb13ff3e94cb997c82968ae86722a Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:02 +0800 Subject: perf stat: Extend rbtree to support per-thread shadow stats Previously the rbtree was used to link generic metrics. This patches adds new ctx/type/stat into rbtree keys because we will use this rbtree to maintain shadow metrics to replace original a couple of static arrays for supporting per-thread shadow stats. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 93aac2788056..528be3e8d13b 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -45,7 +45,10 @@ struct stats walltime_nsecs_stats; struct saved_value { struct rb_node rb_node; struct perf_evsel *evsel; + enum stat_type type; + int ctx; int cpu; + struct runtime_stat *stat; struct stats stats; }; @@ -58,6 +61,30 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->cpu != b->cpu) return a->cpu - b->cpu; + + /* + * Previously the rbtree was used to link generic metrics. + * The keys were evsel/cpu. Now the rbtree is extended to support + * per-thread shadow stats. For shadow stats case, the keys + * are cpu/type/ctx/stat (evsel is NULL). For generic metrics + * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). + */ + if (a->type != b->type) + return a->type - b->type; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + + if (a->evsel == NULL && b->evsel == NULL) { + if (a->stat == b->stat) + return 0; + + if ((char *)a->stat < (char *)b->stat) + return -1; + + return 1; + } + if (a->evsel == b->evsel) return 0; if ((char *)a->evsel < (char *)b->evsel) -- cgit v1.2.3 From 8efb2df1288bc1bcc3711a97028620717319f138 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:03 +0800 Subject: perf stat: Create the runtime_stat init/exit function It mainly initializes and releases the rblist which is defined in struct runtime_stat. For the original rblist 'runtime_saved_values', it's still kept there for keeping the patch bisectable. The rblist 'runtime_saved_values' will be removed in later patch at switching time. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-4-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 17 +++++++++++++++++ tools/perf/util/stat.h | 3 +++ 2 files changed, 20 insertions(+) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 528be3e8d13b..07cfbf613bdc 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -40,6 +40,7 @@ static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; static struct rblist runtime_saved_values; static bool have_frontend_stalled; +struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; struct saved_value { @@ -134,6 +135,21 @@ static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, return NULL; } +void runtime_stat__init(struct runtime_stat *st) +{ + struct rblist *rblist = &st->value_list; + + rblist__init(rblist); + rblist->node_cmp = saved_value_cmp; + rblist->node_new = saved_value_new; + rblist->node_delete = saved_value_delete; +} + +void runtime_stat__exit(struct runtime_stat *st) +{ + rblist__exit(&st->value_list); +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); @@ -141,6 +157,7 @@ void perf_stat__init_shadow_stats(void) runtime_saved_values.node_cmp = saved_value_cmp; runtime_saved_values.node_new = saved_value_new; runtime_saved_values.node_delete = saved_value_delete; + runtime_stat__init(&rt_stat); } static int evsel_context(struct perf_evsel *evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index c685c41f1fb9..f20240037377 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -117,12 +117,15 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, void perf_stat_evsel_id_init(struct perf_evsel *evsel); +extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void runtime_stat__init(struct runtime_stat *st); +void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, -- cgit v1.2.3 From 1fcd03946b52b8a57a6692fedd4406b45baedfe6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:04 +0800 Subject: perf stat: Update per-thread shadow stats The functions perf_stat__update_shadow_stats() is called to update the shadow stats on a set of static variables. But the static variables are the limitations to be extended to support per-thread shadow stats. This patch lets the perf_stat__update_shadow_stats() support to update the shadow stats on a input parameter 'st' and uses update_runtime_stat() to update the stats. It will not directly update the static variables as before. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-5-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +- tools/perf/builtin-stat.c | 3 +- tools/perf/util/stat-shadow.c | 86 +++++++++++++++++++++++++++++-------------- tools/perf/util/stat.c | 8 ++-- tools/perf/util/stat.h | 2 +- 5 files changed, 68 insertions(+), 34 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 39d8b55f0db3..81b395040298 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1548,7 +1548,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, val = sample->period * evsel->scale; perf_stat__update_shadow_stats(evsel, val, - sample->cpu); + sample->cpu, + &rt_stat); evsel_script(evsel)->val = val; if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { for_each_group_member (ev2, evsel->leader) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a027b4712e48..3f4a2c21b824 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1214,7 +1214,8 @@ static void aggr_update_shadow(void) val += perf_counts(counter->counts, cpu, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(counter, id)); + first_shadow_cpu(counter, id), + &rt_stat); } } } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 07cfbf613bdc..4b28c40de927 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -116,19 +116,29 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, int cpu, - bool create) + bool create, + enum stat_type type, + int ctx, + struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { .cpu = cpu, .evsel = evsel, + .type = type, + .ctx = ctx, + .stat = st, }; - nd = rblist__find(&runtime_saved_values, &dm); + + rblist = &st->value_list; + + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); if (create) { - rblist__add_node(&runtime_saved_values, &dm); - nd = rblist__find(&runtime_saved_values, &dm); + rblist__add_node(rblist, &dm); + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); } @@ -217,13 +227,24 @@ void perf_stat__reset_shadow_stats(void) } } +static void update_runtime_stat(struct runtime_stat *st, + enum stat_type type, + int ctx, int cpu, u64 count) +{ + struct saved_value *v = saved_value_lookup(NULL, cpu, true, + type, ctx, st); + + if (v) + update_stats(&v->stats, count); +} + /* * Update various tracking values we maintain to print * more semantic information such as miss/hit ratios, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu) + int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); @@ -231,50 +252,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) - update_stats(&runtime_nsecs_stats[cpu], count); + update_runtime_stat(st, STAT_NSECS, 0, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_stats(&runtime_transaction_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_stats(&runtime_elision_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) - update_stats(&runtime_topdown_total_slots[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) - update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) - update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) - update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) - update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_stats(&runtime_smi_num_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); else if (perf_stat_evsel__is(counter, APERF)) - update_stats(&runtime_aperf_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_APERF, ctx, cpu, count); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, true); + struct saved_value *v = saved_value_lookup(counter, cpu, true, + STAT_NONE, 0, st); update_stats(&v->stats, count); } } @@ -694,7 +723,8 @@ static void generic_metric(const char *metric_expr, stats = &walltime_nsecs_stats; scale = 1e-9; } else { - v = saved_value_lookup(metric_events[i], cpu, false); + v = saved_value_lookup(metric_events[i], cpu, false, + STAT_NONE, 0, &rt_stat); if (!v) break; stats = &v->stats; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 151e9efd7286..78abfd40b135 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -278,9 +278,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->val, cpu); + perf_stat__update_shadow_stats(evsel, count->val, cpu, + &rt_stat); if (config->aggr_mode == AGGR_THREAD) - perf_stat__update_shadow_stats(evsel, count->val, 0); + perf_stat__update_shadow_stats(evsel, count->val, 0, + &rt_stat); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -362,7 +364,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - perf_stat__update_shadow_stats(counter, *count, 0); + perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f20240037377..bb9902ad3a79 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -129,7 +129,7 @@ void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu); + int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; -- cgit v1.2.3 From e0128b30dbfb2884530251b4accdffdbf55a6b72 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:05 +0800 Subject: perf stat: Print per-thread shadow stats The function perf_stat__print_shadow_stats() is called to print the shadow stats on a set of static variables. But the static variables are the limitations to support per-thread shadow stats. This patch lets the perf_stat__print_shadow_stats() support to print the shadow stats from a input parameter 'st'. It will not directly get value from static variable. Instead, it now uses runtime_stat_avg() and runtime_stat_n() to get and compute the values. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-6-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +- tools/perf/builtin-stat.c | 23 +++-- tools/perf/util/stat-shadow.c | 209 ++++++++++++++++++++++++++---------------- tools/perf/util/stat.h | 3 +- 4 files changed, 151 insertions(+), 87 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 81b395040298..fac6f053e4da 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1557,7 +1557,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, evsel_script(ev2)->val, sample->cpu, &ctx, - NULL); + NULL, + &rt_stat); } evsel_script(evsel->leader)->gnum = 0; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3f4a2c21b824..097a694d16f2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1097,7 +1097,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } static void printout(int id, int nr, struct perf_evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise) + char *prefix, u64 run, u64 ena, double noise, + struct runtime_stat *st) { struct perf_stat_output_ctx out; struct outstate os = { @@ -1190,7 +1191,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), - &out, &metric_events); + &out, &metric_events, st); if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); @@ -1335,7 +1336,8 @@ static void print_aggr(char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(id, nr, counter, uval, prefix, run, ena, 1.0); + printout(id, nr, counter, uval, prefix, run, ena, 1.0, + &rt_stat); if (!metric_only) fputc('\n', output); } @@ -1365,7 +1367,8 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix, run, ena, 1.0); + printout(thread, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); fputc('\n', output); } } @@ -1402,7 +1405,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); } @@ -1441,7 +1445,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); fputc('\n', output); } @@ -1473,7 +1478,8 @@ static void print_no_aggr_metric(char *prefix) run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); } fputc('\n', stat_config.output); } @@ -1529,7 +1535,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) perf_stat__print_shadow_stats(counter, 0, 0, &out, - &metric_events); + &metric_events, + &rt_stat); } fputc('\n', stat_config.output); } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 4b28c40de927..a95c4fe991aa 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -424,15 +424,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) } } +static double runtime_stat_avg(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return avg_stats(&v->stats); +} + +static double runtime_stat_n(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return v->stats.n; +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -448,13 +473,14 @@ static void print_stalled_cycles_frontend(int cpu, static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -467,13 +493,14 @@ static void print_stalled_cycles_backend(int cpu, static void print_branch_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_branches_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -486,13 +513,15 @@ static void print_branch_misses(int cpu, static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -505,13 +534,15 @@ static void print_l1_dcache_misses(int cpu, static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -523,13 +554,14 @@ static void print_l1_icache_misses(int cpu, static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -541,13 +573,14 @@ static void print_dtlb_cache_misses(int cpu, static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -559,13 +592,14 @@ static void print_itlb_cache_misses(int cpu, static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -623,68 +657,72 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu) +static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) { - return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); } -static double td_bad_spec(int ctx, int cpu) +static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) { double bad_spec = 0; double total_slots; double total; - total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - - avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + - avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); - total_slots = td_total_slots(ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + + total_slots = td_total_slots(ctx, cpu, st); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu) +static double td_retiring(int ctx, int cpu, struct runtime_stat *st) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu); - double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu) +static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu); - double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu) +static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) { - double sum = (td_fe_bound(ctx, cpu) + - td_bad_spec(ctx, cpu) + - td_retiring(ctx, cpu)); + double sum = (td_fe_bound(ctx, cpu, st) + + td_bad_spec(ctx, cpu, st) + + td_retiring(ctx, cpu, st)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); } static void print_smi_cost(int cpu, struct perf_evsel *evsel, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double smi_num, aperf, cycles, cost = 0.0; int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); - aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); - cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); + aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); + cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if ((cycles == 0) || (aperf == 0)) return; @@ -704,7 +742,8 @@ static void generic_metric(const char *metric_expr, const char *metric_name, double avg, int cpu, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; struct parse_ctx pctx; @@ -724,7 +763,7 @@ static void generic_metric(const char *metric_expr, scale = 1e-9; } else { v = saved_value_lookup(metric_events[i], cpu, false, - STAT_NONE, 0, &rt_stat); + STAT_NONE, 0, st); if (!v) break; stats = &v->stats; @@ -752,7 +791,8 @@ static void generic_metric(const char *metric_expr, void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct rblist *metric_events, + struct runtime_stat *st) { void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; @@ -763,7 +803,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, int num = 1; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) { ratio = avg / total; print_metric(ctxp, NULL, "%7.2f ", @@ -771,8 +812,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } else { print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } - total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); - total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu); + + total = max(total, runtime_stat_avg(st, + STAT_STALLED_CYCLES_BACK, + ctx, cpu)); if (total && avg) { out->new_line(ctxp); @@ -785,8 +831,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_branches_stats[ctx][cpu].n != 0) - print_branch_misses(cpu, evsel, avg, out); + if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) + print_branch_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -794,8 +840,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_dcache_stats[ctx][cpu].n != 0) - print_l1_dcache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) + print_l1_dcache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( @@ -803,8 +850,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_icache_stats[ctx][cpu].n != 0) - print_l1_icache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) + print_l1_icache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( @@ -812,8 +860,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( @@ -821,8 +870,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_itlb_cache_stats[ctx][cpu].n != 0) - print_itlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) + print_itlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( @@ -830,27 +880,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_ll_cache_stats[ctx][cpu].n != 0) - print_ll_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) + print_ll_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) ratio = avg * 100 / total; - if (runtime_cacherefs_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out); + print_stalled_cycles_frontend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out); + print_stalled_cycles_backend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; @@ -859,7 +910,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) print_metric(ctxp, NULL, "%7.2f%%", "transactional cycles", @@ -868,8 +920,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + if (total2 < avg) total2 = avg; if (total) @@ -878,19 +931,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; - if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(ctxp, NULL, NULL, "cycles / transaction", - 0); + 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; @@ -904,28 +959,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu); + double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu); + double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu); + double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu); + double be_bound = td_be_bound(ctx, cpu, st); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -938,19 +993,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu) > 0) + if (td_total_slots(ctx, cpu, st) > 0) print_metric(ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, avg, cpu, out); - } else if (runtime_nsecs_stats[cpu].n != 0) { + evsel->metric_name, avg, cpu, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) ratio = 1000.0 * avg / total; @@ -961,7 +1016,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out); + print_smi_cost(cpu, evsel, out, st); } else { num = 0; } @@ -974,7 +1029,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, out->new_line(ctxp); generic_metric(mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - avg, cpu, out); + avg, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index bb9902ad3a79..76b322a2d293 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -140,7 +140,8 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events); + struct rblist *metric_events, + struct runtime_stat *st); void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); -- cgit v1.2.3 From 6a1e2c5c267358455a13bd8d59547430370c845a Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:06 +0800 Subject: perf stat: Remove a set of shadow stats static variables In previous patches, we have reconstructed the code and let it not access the static variables directly. This patch removes these static variables. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-7-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 68 ++++++++++--------------------------------- tools/perf/util/stat.h | 1 + 2 files changed, 16 insertions(+), 53 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a95c4fe991aa..594d14a02b67 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -16,28 +16,6 @@ * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? */ -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; -static struct rblist runtime_saved_values; static bool have_frontend_stalled; struct runtime_stat rt_stat; @@ -163,10 +141,6 @@ void runtime_stat__exit(struct runtime_stat *st) void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); - rblist__init(&runtime_saved_values); - runtime_saved_values.node_cmp = saved_value_cmp; - runtime_saved_values.node_new = saved_value_new; - runtime_saved_values.node_delete = saved_value_delete; runtime_stat__init(&rt_stat); } @@ -188,36 +162,13 @@ static int evsel_context(struct perf_evsel *evsel) return ctx; } -void perf_stat__reset_shadow_stats(void) +static void reset_stat(struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *pos, *next; - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); - memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); - memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); - memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); - memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); - memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); - memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); - memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); - memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); - memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); - memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); - memset(runtime_cycles_in_tx_stats, 0, - sizeof(runtime_cycles_in_tx_stats)); - memset(runtime_transaction_stats, 0, - sizeof(runtime_transaction_stats)); - memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); - memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); - memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); - memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); - memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); - memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); - memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); - - next = rb_first(&runtime_saved_values.entries); + rblist = &st->value_list; + next = rb_first(&rblist->entries); while (next) { pos = next; next = rb_next(pos); @@ -227,6 +178,17 @@ void perf_stat__reset_shadow_stats(void) } } +void perf_stat__reset_shadow_stats(void) +{ + reset_stat(&rt_stat); + memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) +{ + reset_stat(st); +} + static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, int ctx, int cpu, u64 count) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 76b322a2d293..cfe4fb899633 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -128,6 +128,7 @@ void runtime_stat__init(struct runtime_stat *st); void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { -- cgit v1.2.3 From 56739444d861daa050624d40c7adff32c73e9980 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:07 +0800 Subject: perf stat: Allocate shadow stats buffer for threads After perf_evlist__create_maps() being executed, we can get all threads from /proc. And via thread_map__nr(), we can also get the number of threads. With the number of threads, the patch allocates a buffer which will record the shadow stats for these threads. The buffer pointer is saved in stat_config. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-8-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/stat.h | 2 ++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 097a694d16f2..4c492ac3ac07 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -214,8 +214,13 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, static void perf_stat__reset_stats(void) { + int i; + perf_evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); + + for (i = 0; i < stat_config.stats_num; i++) + perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); } static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -2495,6 +2500,35 @@ int process_cpu_map_event(struct perf_tool *tool, return set_maps(st); } +static int runtime_stat_new(struct perf_stat_config *config, int nthreads) +{ + int i; + + config->stats = calloc(nthreads, sizeof(struct runtime_stat)); + if (!config->stats) + return -1; + + config->stats_num = nthreads; + + for (i = 0; i < nthreads; i++) + runtime_stat__init(&config->stats[i]); + + return 0; +} + +static void runtime_stat_delete(struct perf_stat_config *config) +{ + int i; + + if (!config->stats) + return; + + for (i = 0; i < config->stats_num; i++) + runtime_stat__exit(&config->stats[i]); + + free(config->stats); +} + static const char * const stat_report_usage[] = { "perf stat report []", NULL, @@ -2750,8 +2784,15 @@ int cmd_stat(int argc, const char **argv) * Initialize thread_map with comm names, * so we could print it out on output. */ - if (stat_config.aggr_mode == AGGR_THREAD) + if (stat_config.aggr_mode == AGGR_THREAD) { thread_map__read_comms(evsel_list->threads); + if (target.system_wide) { + if (runtime_stat_new(&stat_config, + thread_map__nr(evsel_list->threads))) { + goto out; + } + } + } if (interval && interval < 100) { if (interval < 10) { @@ -2841,5 +2882,8 @@ out: sysfs__write_int(FREEZE_ON_SMI_PATH, 0); perf_evlist__delete(evsel_list); + + runtime_stat_delete(&stat_config); + return status; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index cfe4fb899633..2ed95dc72784 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -90,6 +90,8 @@ struct perf_stat_config { bool scale; FILE *output; unsigned int interval; + struct runtime_stat *stats; + int stats_num; }; void update_stats(struct stats *stats, u64 val); -- cgit v1.2.3 From 14e72a21c783654ca7b6c897b6d6508c1abccd7d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:08 +0800 Subject: perf stat: Update or print per-thread stats If the stats pointer in stat_config structure is not null, it will update the per-thread stats or print the per-thread stats on this buffer. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-9-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 +++++++-- tools/perf/util/stat.c | 11 ++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4c492ac3ac07..f4129a5fbb01 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1372,8 +1372,13 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + + if (stat_config.stats) + printout(thread, 0, counter, uval, prefix, run, ena, + 1.0, &stat_config.stats[thread]); + else + printout(thread, 0, counter, uval, prefix, run, ena, + 1.0, &rt_stat); fputc('\n', output); } } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 78abfd40b135..32235657c1ac 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -280,9 +280,14 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel if (config->aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->val, cpu, &rt_stat); - if (config->aggr_mode == AGGR_THREAD) - perf_stat__update_shadow_stats(evsel, count->val, 0, - &rt_stat); + if (config->aggr_mode == AGGR_THREAD) { + if (config->stats) + perf_stat__update_shadow_stats(evsel, + count->val, 0, &config->stats[thread]); + else + perf_stat__update_shadow_stats(evsel, + count->val, 0, &rt_stat); + } break; case AGGR_GLOBAL: aggr->val += count->val; -- cgit v1.2.3 From 73c0ca1eee3d2c96898e05a16be49da2a6d590b2 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:09 +0800 Subject: perf thread_map: Enumerate all threads from /proc This patch calls thread_map__new_all_cpus() to enumerate all threads from /proc if per-thread flag is enabled. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-10-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 2 +- tools/perf/util/evlist.c | 3 ++- tools/perf/util/thread_map.c | 5 ++++- tools/perf/util/thread_map.h | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index dbcb6a19b375..4de1939b58ba 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -105,7 +105,7 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb TEST_ASSERT_VAL("failed to allocate map string", asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); - threads = thread_map__new_str(str, NULL, 0); + threads = thread_map__new_str(str, NULL, 0, false); TEST_ASSERT_VAL("failed to allocate thread_map", threads); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3570355bcf39..f0a5e09c4071 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1105,7 +1105,8 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) struct cpu_map *cpus; struct thread_map *threads; - threads = thread_map__new_str(target->pid, target->tid, target->uid); + threads = thread_map__new_str(target->pid, target->tid, target->uid, + target->per_thread); if (!threads) return -1; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 2b653853eec2..3e1038f6491c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -323,7 +323,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid) + uid_t uid, bool per_thread) { if (pid) return thread_map__new_by_pid_str(pid); @@ -331,6 +331,9 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); + if (per_thread) + return thread_map__new_all_cpus(); + return thread_map__new_by_tid_str(tid); } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 07a765fb22bb..0a806b99e73c 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid); + const char *tid, uid_t uid, bool per_thread); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); -- cgit v1.2.3 From 1d9f8d1b824bf69cf984c1c36e5641b51eea42bb Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:10 +0800 Subject: perf stat: Remove --per-thread pid/tid limitation Currently, if we execute 'perf stat --per-thread' without specifying pid/tid, perf will return error. root@skl:/tmp# perf stat --per-thread The --per-thread option is only available when monitoring via -p -t options. -p, --pid stat events on existing process id -t, --tid stat events on existing thread id This patch removes this limitation. If no pid/tid specified, it returns all threads (get threads from /proc). Note that it doesn't support cpu_list yet so if it's a cpu_list case, then skip. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-11-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 23 +++++++++++++++-------- tools/perf/util/target.h | 7 +++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f4129a5fbb01..ee708ba6f79a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -277,7 +277,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->enable_on_exec = 1; } - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); return perf_evsel__open_per_thread(evsel, evsel_list->threads); @@ -340,7 +340,7 @@ static int read_counter(struct perf_evsel *counter) int nthreads = thread_map__nr(evsel_list->threads); int ncpus, cpu, thread; - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) ncpus = perf_evsel__nr_cpus(counter); else ncpus = 1; @@ -2743,12 +2743,16 @@ int cmd_stat(int argc, const char **argv) run_count = 1; } - if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { - fprintf(stderr, "The --per-thread option is only available " - "when monitoring via -p -t options.\n"); - parse_options_usage(NULL, stat_options, "p", 1); - parse_options_usage(NULL, stat_options, "t", 1); - goto out; + if ((stat_config.aggr_mode == AGGR_THREAD) && + !target__has_task(&target)) { + if (!target.system_wide || target.cpu_list) { + fprintf(stderr, "The --per-thread option is only " + "available when monitoring via -p -t -a " + "options or only --per-thread.\n"); + parse_options_usage(NULL, stat_options, "p", 1); + parse_options_usage(NULL, stat_options, "t", 1); + goto out; + } } /* @@ -2772,6 +2776,9 @@ int cmd_stat(int argc, const char **argv) target__validate(&target); + if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) + target.per_thread = true; + if (perf_evlist__create_maps(evsel_list, &target) < 0) { if (target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 446aa7a56f25..6ef01a83b24e 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -64,6 +64,11 @@ static inline bool target__none(struct target *target) return !target__has_task(target) && !target__has_cpu(target); } +static inline bool target__has_per_thread(struct target *target) +{ + return target->system_wide && target->per_thread; +} + static inline bool target__uses_dummy_map(struct target *target) { bool use_dummy = false; @@ -73,6 +78,8 @@ static inline bool target__uses_dummy_map(struct target *target) else if (target__has_task(target) || (!target__has_cpu(target) && !target->uses_mmap)) use_dummy = true; + else if (target__has_per_thread(target)) + use_dummy = true; return use_dummy; } -- cgit v1.2.3 From 29734550c996c259ffa8d32198439d6fe4b51320 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:11 +0800 Subject: perf stat: Resort '--per-thread' result There are many threads reported if we enable '--per-thread' globally. 1. Most of the threads are not counted or counting value 0. This patch removes these threads. 2. We also resort the threads in display according to the counting value. It's useful for user to see the hottest threads easily. For example, the new results would be: root@skl:/tmp# perf stat --per-thread ^C Performance counter stats for 'system wide': perf-24165 4.302433 cpu-clock (msec) # 0.001 CPUs utilized vmstat-23127 1.562215 cpu-clock (msec) # 0.000 CPUs utilized irqbalance-2780 0.827851 cpu-clock (msec) # 0.000 CPUs utilized sshd-23111 0.278308 cpu-clock (msec) # 0.000 CPUs utilized thermald-2841 0.230880 cpu-clock (msec) # 0.000 CPUs utilized sshd-23058 0.207306 cpu-clock (msec) # 0.000 CPUs utilized kworker/0:2-19991 0.133983 cpu-clock (msec) # 0.000 CPUs utilized kworker/u16:1-18249 0.125636 cpu-clock (msec) # 0.000 CPUs utilized rcu_sched-8 0.085533 cpu-clock (msec) # 0.000 CPUs utilized kworker/u16:2-23146 0.077139 cpu-clock (msec) # 0.000 CPUs utilized gmain-2700 0.041789 cpu-clock (msec) # 0.000 CPUs utilized kworker/4:1-15354 0.028370 cpu-clock (msec) # 0.000 CPUs utilized kworker/6:0-17528 0.023895 cpu-clock (msec) # 0.000 CPUs utilized kworker/4:1H-1887 0.013209 cpu-clock (msec) # 0.000 CPUs utilized kworker/5:2-31362 0.011627 cpu-clock (msec) # 0.000 CPUs utilized watchdog/0-11 0.010892 cpu-clock (msec) # 0.000 CPUs utilized kworker/3:2-12870 0.010220 cpu-clock (msec) # 0.000 CPUs utilized ksoftirqd/0-7 0.008869 cpu-clock (msec) # 0.000 CPUs utilized watchdog/1-14 0.008476 cpu-clock (msec) # 0.000 CPUs utilized watchdog/7-50 0.002944 cpu-clock (msec) # 0.000 CPUs utilized watchdog/3-26 0.002893 cpu-clock (msec) # 0.000 CPUs utilized watchdog/4-32 0.002759 cpu-clock (msec) # 0.000 CPUs utilized watchdog/2-20 0.002429 cpu-clock (msec) # 0.000 CPUs utilized watchdog/6-44 0.001491 cpu-clock (msec) # 0.000 CPUs utilized watchdog/5-38 0.001477 cpu-clock (msec) # 0.000 CPUs utilized rcu_sched-8 10 context-switches # 0.117 M/sec kworker/u16:1-18249 7 context-switches # 0.056 M/sec sshd-23111 4 context-switches # 0.014 M/sec vmstat-23127 4 context-switches # 0.003 M/sec perf-24165 4 context-switches # 0.930 K/sec kworker/0:2-19991 3 context-switches # 0.022 M/sec kworker/u16:2-23146 3 context-switches # 0.039 M/sec kworker/4:1-15354 2 context-switches # 0.070 M/sec kworker/6:0-17528 2 context-switches # 0.084 M/sec sshd-23058 2 context-switches # 0.010 M/sec ksoftirqd/0-7 1 context-switches # 0.113 M/sec watchdog/0-11 1 context-switches # 0.092 M/sec watchdog/1-14 1 context-switches # 0.118 M/sec watchdog/2-20 1 context-switches # 0.412 M/sec watchdog/3-26 1 context-switches # 0.346 M/sec watchdog/4-32 1 context-switches # 0.362 M/sec watchdog/5-38 1 context-switches # 0.677 M/sec watchdog/6-44 1 context-switches # 0.671 M/sec watchdog/7-50 1 context-switches # 0.340 M/sec kworker/4:1H-1887 1 context-switches # 0.076 M/sec thermald-2841 1 context-switches # 0.004 M/sec gmain-2700 1 context-switches # 0.024 M/sec irqbalance-2780 1 context-switches # 0.001 M/sec kworker/3:2-12870 1 context-switches # 0.098 M/sec kworker/5:2-31362 1 context-switches # 0.086 M/sec kworker/u16:1-18249 2 cpu-migrations # 0.016 M/sec kworker/u16:2-23146 2 cpu-migrations # 0.026 M/sec rcu_sched-8 1 cpu-migrations # 0.012 M/sec sshd-23058 1 cpu-migrations # 0.005 M/sec perf-24165 8,833,385 cycles # 2.053 GHz vmstat-23127 1,702,699 cycles # 1.090 GHz irqbalance-2780 739,847 cycles # 0.894 GHz sshd-23111 269,506 cycles # 0.968 GHz thermald-2841 204,556 cycles # 0.886 GHz sshd-23058 158,780 cycles # 0.766 GHz kworker/0:2-19991 112,981 cycles # 0.843 GHz kworker/u16:1-18249 100,926 cycles # 0.803 GHz rcu_sched-8 74,024 cycles # 0.865 GHz kworker/u16:2-23146 55,984 cycles # 0.726 GHz gmain-2700 34,278 cycles # 0.820 GHz kworker/4:1-15354 20,665 cycles # 0.728 GHz kworker/6:0-17528 16,445 cycles # 0.688 GHz kworker/5:2-31362 9,492 cycles # 0.816 GHz watchdog/3-26 8,695 cycles # 3.006 GHz kworker/4:1H-1887 8,238 cycles # 0.624 GHz watchdog/4-32 7,580 cycles # 2.747 GHz kworker/3:2-12870 7,306 cycles # 0.715 GHz watchdog/2-20 7,274 cycles # 2.995 GHz watchdog/0-11 6,988 cycles # 0.642 GHz ksoftirqd/0-7 6,376 cycles # 0.719 GHz watchdog/1-14 5,340 cycles # 0.630 GHz watchdog/5-38 4,061 cycles # 2.749 GHz watchdog/6-44 3,976 cycles # 2.667 GHz watchdog/7-50 3,418 cycles # 1.161 GHz vmstat-23127 2,511,699 instructions # 1.48 insn per cycle perf-24165 1,829,908 instructions # 0.21 insn per cycle irqbalance-2780 1,190,204 instructions # 1.61 insn per cycle thermald-2841 143,544 instructions # 0.70 insn per cycle sshd-23111 128,138 instructions # 0.48 insn per cycle sshd-23058 57,654 instructions # 0.36 insn per cycle rcu_sched-8 44,063 instructions # 0.60 insn per cycle kworker/u16:1-18249 42,551 instructions # 0.42 insn per cycle kworker/0:2-19991 25,873 instructions # 0.23 insn per cycle kworker/u16:2-23146 21,407 instructions # 0.38 insn per cycle gmain-2700 13,691 instructions # 0.40 insn per cycle kworker/4:1-15354 12,964 instructions # 0.63 insn per cycle kworker/6:0-17528 10,034 instructions # 0.61 insn per cycle kworker/5:2-31362 5,203 instructions # 0.55 insn per cycle kworker/3:2-12870 4,866 instructions # 0.67 insn per cycle kworker/4:1H-1887 3,586 instructions # 0.44 insn per cycle ksoftirqd/0-7 3,463 instructions # 0.54 insn per cycle watchdog/0-11 3,135 instructions # 0.45 insn per cycle watchdog/1-14 3,135 instructions # 0.59 insn per cycle watchdog/2-20 3,135 instructions # 0.43 insn per cycle watchdog/3-26 3,135 instructions # 0.36 insn per cycle watchdog/4-32 3,135 instructions # 0.41 insn per cycle watchdog/5-38 3,135 instructions # 0.77 insn per cycle watchdog/6-44 3,135 instructions # 0.79 insn per cycle watchdog/7-50 3,135 instructions # 0.92 insn per cycle vmstat-23127 539,181 branches # 345.139 M/sec perf-24165 375,364 branches # 87.245 M/sec irqbalance-2780 262,092 branches # 316.593 M/sec thermald-2841 31,611 branches # 136.915 M/sec sshd-23111 21,874 branches # 78.596 M/sec sshd-23058 10,682 branches # 51.528 M/sec rcu_sched-8 8,693 branches # 101.633 M/sec kworker/u16:1-18249 7,891 branches # 62.808 M/sec kworker/0:2-19991 5,761 branches # 42.998 M/sec kworker/u16:2-23146 4,099 branches # 53.138 M/sec kworker/4:1-15354 2,755 branches # 97.110 M/sec gmain-2700 2,638 branches # 63.127 M/sec kworker/6:0-17528 2,216 branches # 92.739 M/sec kworker/5:2-31362 1,132 branches # 97.360 M/sec kworker/3:2-12870 1,081 branches # 105.773 M/sec kworker/4:1H-1887 725 branches # 54.887 M/sec ksoftirqd/0-7 707 branches # 79.716 M/sec watchdog/0-11 652 branches # 59.860 M/sec watchdog/1-14 652 branches # 76.923 M/sec watchdog/2-20 652 branches # 268.423 M/sec watchdog/3-26 652 branches # 225.372 M/sec watchdog/4-32 652 branches # 236.318 M/sec watchdog/5-38 652 branches # 441.435 M/sec watchdog/6-44 652 branches # 437.290 M/sec watchdog/7-50 652 branches # 221.467 M/sec vmstat-23127 8,960 branch-misses # 1.66% of all branches irqbalance-2780 3,047 branch-misses # 1.16% of all branches perf-24165 2,876 branch-misses # 0.77% of all branches sshd-23111 1,843 branch-misses # 8.43% of all branches thermald-2841 1,444 branch-misses # 4.57% of all branches sshd-23058 1,379 branch-misses # 12.91% of all branches kworker/u16:1-18249 982 branch-misses # 12.44% of all branches rcu_sched-8 893 branch-misses # 10.27% of all branches kworker/u16:2-23146 578 branch-misses # 14.10% of all branches kworker/0:2-19991 376 branch-misses # 6.53% of all branches gmain-2700 280 branch-misses # 10.61% of all branches kworker/6:0-17528 196 branch-misses # 8.84% of all branches kworker/4:1-15354 187 branch-misses # 6.79% of all branches kworker/5:2-31362 123 branch-misses # 10.87% of all branches watchdog/0-11 95 branch-misses # 14.57% of all branches watchdog/4-32 89 branch-misses # 13.65% of all branches kworker/3:2-12870 80 branch-misses # 7.40% of all branches watchdog/3-26 61 branch-misses # 9.36% of all branches kworker/4:1H-1887 60 branch-misses # 8.28% of all branches watchdog/2-20 52 branch-misses # 7.98% of all branches ksoftirqd/0-7 47 branch-misses # 6.65% of all branches watchdog/1-14 46 branch-misses # 7.06% of all branches watchdog/7-50 13 branch-misses # 1.99% of all branches watchdog/5-38 8 branch-misses # 1.23% of all branches watchdog/6-44 7 branch-misses # 1.07% of all branches 3.695150786 seconds time elapsed root@skl:/tmp# perf stat --per-thread -M IPC,CPI ^C Performance counter stats for 'system wide': vmstat-23127 2,000,783 inst_retired.any # 1.5 IPC thermald-2841 1,472,670 inst_retired.any # 1.3 IPC sshd-23111 977,374 inst_retired.any # 1.2 IPC perf-24163 483,779 inst_retired.any # 0.2 IPC gmain-2700 341,213 inst_retired.any # 0.9 IPC sshd-23058 148,891 inst_retired.any # 0.8 IPC rtkit-daemon-3288 71,210 inst_retired.any # 0.7 IPC kworker/u16:1-18249 39,562 inst_retired.any # 0.3 IPC rcu_sched-8 14,474 inst_retired.any # 0.8 IPC kworker/0:2-19991 7,659 inst_retired.any # 0.2 IPC kworker/4:1-15354 6,714 inst_retired.any # 0.8 IPC rtkit-daemon-3289 4,839 inst_retired.any # 0.3 IPC kworker/6:0-17528 3,321 inst_retired.any # 0.6 IPC kworker/5:2-31362 3,215 inst_retired.any # 0.5 IPC kworker/7:2-23145 3,173 inst_retired.any # 0.7 IPC kworker/4:1H-1887 1,719 inst_retired.any # 0.3 IPC watchdog/0-11 1,479 inst_retired.any # 0.3 IPC watchdog/1-14 1,479 inst_retired.any # 0.3 IPC watchdog/2-20 1,479 inst_retired.any # 0.4 IPC watchdog/3-26 1,479 inst_retired.any # 0.4 IPC watchdog/4-32 1,479 inst_retired.any # 0.3 IPC watchdog/5-38 1,479 inst_retired.any # 0.3 IPC watchdog/6-44 1,479 inst_retired.any # 0.7 IPC watchdog/7-50 1,479 inst_retired.any # 0.7 IPC kworker/u16:2-23146 1,408 inst_retired.any # 0.5 IPC perf-24163 2,249,872 cpu_clk_unhalted.thread vmstat-23127 1,352,455 cpu_clk_unhalted.thread thermald-2841 1,161,140 cpu_clk_unhalted.thread sshd-23111 807,827 cpu_clk_unhalted.thread gmain-2700 375,535 cpu_clk_unhalted.thread sshd-23058 194,071 cpu_clk_unhalted.thread kworker/u16:1-18249 114,306 cpu_clk_unhalted.thread rtkit-daemon-3288 103,547 cpu_clk_unhalted.thread kworker/0:2-19991 46,550 cpu_clk_unhalted.thread rcu_sched-8 18,855 cpu_clk_unhalted.thread rtkit-daemon-3289 17,549 cpu_clk_unhalted.thread kworker/4:1-15354 8,812 cpu_clk_unhalted.thread kworker/5:2-31362 6,812 cpu_clk_unhalted.thread kworker/4:1H-1887 5,270 cpu_clk_unhalted.thread kworker/6:0-17528 5,111 cpu_clk_unhalted.thread kworker/7:2-23145 4,667 cpu_clk_unhalted.thread watchdog/0-11 4,663 cpu_clk_unhalted.thread watchdog/1-14 4,663 cpu_clk_unhalted.thread watchdog/4-32 4,626 cpu_clk_unhalted.thread watchdog/5-38 4,403 cpu_clk_unhalted.thread watchdog/3-26 3,936 cpu_clk_unhalted.thread watchdog/2-20 3,850 cpu_clk_unhalted.thread kworker/u16:2-23146 2,654 cpu_clk_unhalted.thread watchdog/6-44 2,017 cpu_clk_unhalted.thread watchdog/7-50 2,017 cpu_clk_unhalted.thread vmstat-23127 2,000,783 inst_retired.any # 0.7 CPI thermald-2841 1,472,670 inst_retired.any # 0.8 CPI sshd-23111 977,374 inst_retired.any # 0.8 CPI perf-24163 495,037 inst_retired.any # 4.7 CPI gmain-2700 341,213 inst_retired.any # 1.1 CPI sshd-23058 148,891 inst_retired.any # 1.3 CPI rtkit-daemon-3288 71,210 inst_retired.any # 1.5 CPI kworker/u16:1-18249 39,562 inst_retired.any # 2.9 CPI rcu_sched-8 14,474 inst_retired.any # 1.3 CPI kworker/0:2-19991 7,659 inst_retired.any # 6.1 CPI kworker/4:1-15354 6,714 inst_retired.any # 1.3 CPI rtkit-daemon-3289 4,839 inst_retired.any # 3.6 CPI kworker/6:0-17528 3,321 inst_retired.any # 1.5 CPI kworker/5:2-31362 3,215 inst_retired.any # 2.1 CPI kworker/7:2-23145 3,173 inst_retired.any # 1.5 CPI kworker/4:1H-1887 1,719 inst_retired.any # 3.1 CPI watchdog/0-11 1,479 inst_retired.any # 3.2 CPI watchdog/1-14 1,479 inst_retired.any # 3.2 CPI watchdog/2-20 1,479 inst_retired.any # 2.6 CPI watchdog/3-26 1,479 inst_retired.any # 2.7 CPI watchdog/4-32 1,479 inst_retired.any # 3.1 CPI watchdog/5-38 1,479 inst_retired.any # 3.0 CPI watchdog/6-44 1,479 inst_retired.any # 1.4 CPI watchdog/7-50 1,479 inst_retired.any # 1.4 CPI kworker/u16:2-23146 1,408 inst_retired.any # 1.9 CPI perf-24163 2,302,323 cycles vmstat-23127 1,352,455 cycles thermald-2841 1,161,140 cycles sshd-23111 807,827 cycles gmain-2700 375,535 cycles sshd-23058 194,071 cycles kworker/u16:1-18249 114,306 cycles rtkit-daemon-3288 103,547 cycles kworker/0:2-19991 46,550 cycles rcu_sched-8 18,855 cycles rtkit-daemon-3289 17,549 cycles kworker/4:1-15354 8,812 cycles kworker/5:2-31362 6,812 cycles kworker/4:1H-1887 5,270 cycles kworker/6:0-17528 5,111 cycles kworker/7:2-23145 4,667 cycles watchdog/0-11 4,663 cycles watchdog/1-14 4,663 cycles watchdog/4-32 4,626 cycles watchdog/5-38 4,403 cycles watchdog/3-26 3,936 cycles watchdog/2-20 3,850 cycles kworker/u16:2-23146 2,654 cycles watchdog/6-44 2,017 cycles watchdog/7-50 2,017 cycles 2.175726600 seconds time elapsed Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-12-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 77 ++++++++++++++++++++++++++++++++++++++++------- tools/perf/util/stat.h | 9 ++++++ 2 files changed, 75 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee708ba6f79a..58d501d1f5fd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1351,13 +1351,24 @@ static void print_aggr(char *prefix) } } -static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +static int cmp_val(const void *a, const void *b) { - FILE *output = stat_config.output; - int nthreads = thread_map__nr(counter->threads); - int ncpus = cpu_map__nr(counter->cpus); - int cpu, thread; + return ((struct perf_aggr_thread_value *)b)->val - + ((struct perf_aggr_thread_value *)a)->val; +} + +static struct perf_aggr_thread_value *sort_aggr_thread( + struct perf_evsel *counter, + int nthreads, int ncpus, + int *ret) +{ + int cpu, thread, i = 0; double uval; + struct perf_aggr_thread_value *buf; + + buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); + if (!buf) + return NULL; for (thread = 0; thread < nthreads; thread++) { u64 ena = 0, run = 0, val = 0; @@ -1368,19 +1379,63 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) run += perf_counts(counter->counts, cpu, thread)->run; } + uval = val * counter->scale; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (uval == 0.0 && target__has_per_thread(&target)) + continue; + + buf[i].counter = counter; + buf[i].id = thread; + buf[i].uval = uval; + buf[i].val = val; + buf[i].run = run; + buf[i].ena = ena; + i++; + } + + qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); + + if (ret) + *ret = i; + + return buf; +} + +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + FILE *output = stat_config.output; + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int thread, sorted_threads, id; + struct perf_aggr_thread_value *buf; + + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); + if (!buf) { + perror("cannot sort aggr thread"); + return; + } + + for (thread = 0; thread < sorted_threads; thread++) { if (prefix) fprintf(output, "%s", prefix); - uval = val * counter->scale; - + id = buf[thread].id; if (stat_config.stats) - printout(thread, 0, counter, uval, prefix, run, ena, - 1.0, &stat_config.stats[thread]); + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &stat_config.stats[id]); else - printout(thread, 0, counter, uval, prefix, run, ena, - 1.0, &rt_stat); + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); fputc('\n', output); } + + free(buf); } struct caggr_data { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2ed95dc72784..dbc6f7134f61 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -111,6 +111,15 @@ static inline void init_stats(struct stats *stats) struct perf_evsel; struct perf_evlist; +struct perf_aggr_thread_value { + struct perf_evsel *counter; + int id; + double uval; + u64 val; + u64 run; + u64 ena; +}; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); -- cgit v1.2.3 From 06c3f2aa9fc68e7f3fe3d83e7569d2a2801d9f99 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2017 18:45:35 +0100 Subject: perf utils: Move is_directory() to path.h So that it can be used more widely, like in the next patch, when it will be used to fix a bug in 'perf test' handling of dirent.d_type == DT_UNKNOWN. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org [ Split from a larger patch, removed needless includes in path.h ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 14 +------------- tools/perf/util/path.c | 14 ++++++++++++++ tools/perf/util/path.h | 3 +++ 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index fac6f053e4da..77e47cf39f2c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" +#include "util/path.h" #include "print_binary.h" #include #include @@ -2401,19 +2402,6 @@ out: return rc; } -/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ -static int is_directory(const char *base_path, const struct dirent *dent) -{ - char path[PATH_MAX]; - struct stat st; - - sprintf(path, "%s/%s", base_path, dent->d_name); - if (stat(path, &st)) - return 0; - - return S_ISDIR(st.st_mode); -} - #define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ while ((lang_dirent = readdir(scripts_dir)) != NULL) \ if ((lang_dirent->d_type == DT_DIR || \ diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 933f5c6bffb4..ca56ba2dd3da 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static char bad_path[] = "/bad-path/"; @@ -77,3 +78,16 @@ bool is_regular_file(const char *file) return S_ISREG(st.st_mode); } + +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +bool is_directory(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return S_ISDIR(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 14a254ada7eb..f014f905df50 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -2,9 +2,12 @@ #ifndef _PERF_PATH_H #define _PERF_PATH_H +struct dirent; + int path__join(char *bf, size_t size, const char *path1, const char *path2); int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); bool is_regular_file(const char *file); +bool is_directory(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ -- cgit v1.2.3 From 378811ac303df13efbe49f3ad1795b63d334ac5d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2017 18:45:35 +0100 Subject: perf test: Handle properly readdir DT_UNKNOWN Some system can return DT_UNKNOWN in readdir's struct dirent::d_type and we must handle it properly. In this case we can directly check if the entity we found is directory and skip it. Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 766573e236e4..fafa014240cd 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -411,9 +411,9 @@ static const char *shell_test__description(char *description, size_t size, return description ? trim(description + 1) : NULL; } -#define for_each_shell_test(dir, ent) \ +#define for_each_shell_test(dir, base, ent) \ while ((ent = readdir(dir)) != NULL) \ - if (ent->d_type == DT_REG && ent->d_name[0] != '.') + if (!is_directory(base, ent)) static const char *shell_tests__dir(char *path, size_t size) { @@ -452,7 +452,7 @@ static int shell_tests__max_desc_width(void) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, path, ent) { char bf[256]; const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name); @@ -504,7 +504,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, st.dir, ent) { int curr = i++; char desc[256]; struct test test = { @@ -614,7 +614,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, path, ent) { int curr = i++; char bf[256]; struct test t = { -- cgit v1.2.3 From 3315d14f8eea27a845bd2e3a88341a35f4025866 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 23:13:24 +0530 Subject: perf perf: Remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1512582204-6493-1-git-send-email-pravin.shedge4linux@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 1 - tools/perf/builtin-c2c.c | 3 --- tools/perf/builtin-record.c | 1 - tools/perf/builtin-stat.c | 1 - tools/perf/tests/parse-events.c | 1 - tools/perf/util/auxtrace.c | 3 --- tools/perf/util/header.c | 2 -- tools/perf/util/metricgroup.c | 2 -- tools/perf/util/scripting-engines/trace-event-python.c | 1 - 9 files changed, 15 deletions(-) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2defb6df7fd0..9aa3a674829b 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -27,7 +27,6 @@ #include "cpumap.h" #include -#include static unsigned int nthreads = 0; static unsigned int nsecs = 10; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f1da9b0833c0..c0debc3f79b6 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -27,13 +27,10 @@ #include "sort.h" #include "tool.h" #include "data.h" -#include "sort.h" #include "event.h" #include "evlist.h" #include "evsel.h" -#include #include "ui/browsers/hists.h" -#include "evlist.h" #include "thread.h" struct c2c_hists { diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0a5749ef8b94..98da8cb8de93 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -51,7 +51,6 @@ #include #include #include -#include #include struct switch_output { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 58d501d1f5fd..98bf9d32f222 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -63,7 +63,6 @@ #include "util/group.h" #include "util/session.h" #include "util/tool.h" -#include "util/group.h" #include "util/string2.h" #include "util/metricgroup.h" #include "asm/bug.h" diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f0679613bd18..18b06444f230 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a33491416400..c76687e42344 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,9 +31,6 @@ #include #include #include -#include -#include -#include #include #include "../perf.h" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5890e08e0754..ca73aa7be708 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -15,9 +15,7 @@ #include #include #include -#include #include -#include #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index e48410c99b39..1ddc3d1d0147 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -20,12 +20,10 @@ #include "pmu.h" #include "expr.h" #include "rblist.h" -#include "pmu.h" #include #include #include #include "pmu-events/pmu-events.h" -#include "strbuf.h" #include "strlist.h" #include #include diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c7187f067d31..c1848b543f27 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -43,7 +43,6 @@ #include "../db-export.h" #include "../thread-stack.h" #include "../trace-event.h" -#include "../machine.h" #include "../call-path.h" #include "thread_map.h" #include "cpumap.h" -- cgit v1.2.3 From 7af7919f0f4bde0cec1f546f924be81cfe50533d Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: tools include s390: Grab a copy of arch/s390/include/uapi/asm/unistd.h Will be used for generating the syscall id/string translation table. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-vjfbfvgjrnqnbdluqd7leo98@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/unistd.h | 412 ++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 413 insertions(+) create mode 100644 tools/arch/s390/include/uapi/asm/unistd.h diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..725120939051 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/unistd.h @@ -0,0 +1,412 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * S390 version + * + * Derived from "include/asm-i386/unistd.h" + */ + +#ifndef _UAPI_ASM_S390_UNISTD_H_ +#define _UAPI_ASM_S390_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_restart_syscall 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_brk 45 +#define __NR_signal 48 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 57 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_symlink 83 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_lookup_dcookie 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_getdents 141 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 +#define __NR_putpmsg 189 +#define __NR_vfork 190 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_readahead 222 +#define __NR_setxattr 224 +#define __NR_lsetxattr 225 +#define __NR_fsetxattr 226 +#define __NR_getxattr 227 +#define __NR_lgetxattr 228 +#define __NR_fgetxattr 229 +#define __NR_listxattr 230 +#define __NR_llistxattr 231 +#define __NR_flistxattr 232 +#define __NR_removexattr 233 +#define __NR_lremovexattr 234 +#define __NR_fremovexattr 235 +#define __NR_gettid 236 +#define __NR_tkill 237 +#define __NR_futex 238 +#define __NR_sched_setaffinity 239 +#define __NR_sched_getaffinity 240 +#define __NR_tgkill 241 +/* Number 242 is reserved for tux */ +#define __NR_io_setup 243 +#define __NR_io_destroy 244 +#define __NR_io_getevents 245 +#define __NR_io_submit 246 +#define __NR_io_cancel 247 +#define __NR_exit_group 248 +#define __NR_epoll_create 249 +#define __NR_epoll_ctl 250 +#define __NR_epoll_wait 251 +#define __NR_set_tid_address 252 +#define __NR_fadvise64 253 +#define __NR_timer_create 254 +#define __NR_timer_settime 255 +#define __NR_timer_gettime 256 +#define __NR_timer_getoverrun 257 +#define __NR_timer_delete 258 +#define __NR_clock_settime 259 +#define __NR_clock_gettime 260 +#define __NR_clock_getres 261 +#define __NR_clock_nanosleep 262 +/* Number 263 is reserved for vserver */ +#define __NR_statfs64 265 +#define __NR_fstatfs64 266 +#define __NR_remap_file_pages 267 +#define __NR_mbind 268 +#define __NR_get_mempolicy 269 +#define __NR_set_mempolicy 270 +#define __NR_mq_open 271 +#define __NR_mq_unlink 272 +#define __NR_mq_timedsend 273 +#define __NR_mq_timedreceive 274 +#define __NR_mq_notify 275 +#define __NR_mq_getsetattr 276 +#define __NR_kexec_load 277 +#define __NR_add_key 278 +#define __NR_request_key 279 +#define __NR_keyctl 280 +#define __NR_waitid 281 +#define __NR_ioprio_set 282 +#define __NR_ioprio_get 283 +#define __NR_inotify_init 284 +#define __NR_inotify_add_watch 285 +#define __NR_inotify_rm_watch 286 +#define __NR_migrate_pages 287 +#define __NR_openat 288 +#define __NR_mkdirat 289 +#define __NR_mknodat 290 +#define __NR_fchownat 291 +#define __NR_futimesat 292 +#define __NR_unlinkat 294 +#define __NR_renameat 295 +#define __NR_linkat 296 +#define __NR_symlinkat 297 +#define __NR_readlinkat 298 +#define __NR_fchmodat 299 +#define __NR_faccessat 300 +#define __NR_pselect6 301 +#define __NR_ppoll 302 +#define __NR_unshare 303 +#define __NR_set_robust_list 304 +#define __NR_get_robust_list 305 +#define __NR_splice 306 +#define __NR_sync_file_range 307 +#define __NR_tee 308 +#define __NR_vmsplice 309 +#define __NR_move_pages 310 +#define __NR_getcpu 311 +#define __NR_epoll_pwait 312 +#define __NR_utimes 313 +#define __NR_fallocate 314 +#define __NR_utimensat 315 +#define __NR_signalfd 316 +#define __NR_timerfd 317 +#define __NR_eventfd 318 +#define __NR_timerfd_create 319 +#define __NR_timerfd_settime 320 +#define __NR_timerfd_gettime 321 +#define __NR_signalfd4 322 +#define __NR_eventfd2 323 +#define __NR_inotify_init1 324 +#define __NR_pipe2 325 +#define __NR_dup3 326 +#define __NR_epoll_create1 327 +#define __NR_preadv 328 +#define __NR_pwritev 329 +#define __NR_rt_tgsigqueueinfo 330 +#define __NR_perf_event_open 331 +#define __NR_fanotify_init 332 +#define __NR_fanotify_mark 333 +#define __NR_prlimit64 334 +#define __NR_name_to_handle_at 335 +#define __NR_open_by_handle_at 336 +#define __NR_clock_adjtime 337 +#define __NR_syncfs 338 +#define __NR_setns 339 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define __NR_s390_runtime_instr 342 +#define __NR_kcmp 343 +#define __NR_finit_module 344 +#define __NR_sched_setattr 345 +#define __NR_sched_getattr 346 +#define __NR_renameat2 347 +#define __NR_seccomp 348 +#define __NR_getrandom 349 +#define __NR_memfd_create 350 +#define __NR_bpf 351 +#define __NR_s390_pci_mmio_write 352 +#define __NR_s390_pci_mmio_read 353 +#define __NR_execveat 354 +#define __NR_userfaultfd 355 +#define __NR_membarrier 356 +#define __NR_recvmmsg 357 +#define __NR_sendmmsg 358 +#define __NR_socket 359 +#define __NR_socketpair 360 +#define __NR_bind 361 +#define __NR_connect 362 +#define __NR_listen 363 +#define __NR_accept4 364 +#define __NR_getsockopt 365 +#define __NR_setsockopt 366 +#define __NR_getsockname 367 +#define __NR_getpeername 368 +#define __NR_sendto 369 +#define __NR_sendmsg 370 +#define __NR_recvfrom 371 +#define __NR_recvmsg 372 +#define __NR_shutdown 373 +#define __NR_mlock2 374 +#define __NR_copy_file_range 375 +#define __NR_preadv2 376 +#define __NR_pwritev2 377 +#define __NR_s390_guarded_storage 378 +#define __NR_statx 379 +#define __NR_s390_sthyi 380 +#define NR_syscalls 381 + +/* + * There are some system calls that are not present on 64 bit, some + * have a different name although they do the same (e.g. __NR_chown32 + * is __NR_chown on 64 bit). + */ +#ifndef __s390x__ + +#define __NR_time 13 +#define __NR_lchown 16 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_getrlimit 76 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_fchown 95 +#define __NR_ioperm 101 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR__newselect 142 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_chown 182 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_fcntl64 221 +#define __NR_sendfile64 223 +#define __NR_fadvise64_64 264 +#define __NR_fstatat64 293 + +#else + +#define __NR_select 142 +#define __NR_getrlimit 191 /* SuS compliant getrlimit */ +#define __NR_lchown 198 +#define __NR_getuid 199 +#define __NR_getgid 200 +#define __NR_geteuid 201 +#define __NR_getegid 202 +#define __NR_setreuid 203 +#define __NR_setregid 204 +#define __NR_getgroups 205 +#define __NR_setgroups 206 +#define __NR_fchown 207 +#define __NR_setresuid 208 +#define __NR_getresuid 209 +#define __NR_setresgid 210 +#define __NR_getresgid 211 +#define __NR_chown 212 +#define __NR_setuid 213 +#define __NR_setgid 214 +#define __NR_setfsuid 215 +#define __NR_setfsgid 216 +#define __NR_newfstatat 293 + +#endif + +#endif /* _UAPI_ASM_S390_UNISTD_H_ */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index ea602cd1b43a..f81ca508700c 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -33,6 +33,7 @@ arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm_perf.h arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/sie.h +arch/s390/include/uapi/asm/unistd.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h include/asm-generic/bitops/arch_hweight.h -- cgit v1.2.3 From 164a747f1ac2380c582988d2a4d9a9af13f8e644 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: perf s390: Generate system call table from asm/unistd.h This should speed up accessing new system calls introduced with the kernel rather than waiting for libaudit updates to include them. Committer testing: $ rm -rf /tmp/build/perf $ mkdir /tmp/build/perf $ make srctree=/home/acme/git/perf -C tools/perf/arch/s390 OUTPUT=/tmp/build/perf/ archheaders make: Entering directory '/home/acme/git/perf/tools/perf/arch/s390' /bin/sh '/home/acme/git/perf/tools/perf/arch/s390/entry/syscalls//mksyscalltbl' 'cc' /home/acme/git/perf/tools/arch/s390/include/uapi/asm/unistd.h > /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c make: Leaving directory '/home/acme/git/perf/tools/perf/arch/s390' $ head -5 /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c static const char *syscalltbl_s390_64[] = { [1] = "exit", [2] = "fork", [3] = "read", [4] = "write", $ tail -5 /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c [378] = "s390_guarded_storage", [379] = "statx", [380] = "s390_sthyi", }; #define SYSCALLTBL_S390_64_MAX_ID 380 $ Now to plug this into 'perf trace' proper. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-h5km60rdg3rqxvsys85q50l3@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/Makefile | 21 ++++++++++++++ tools/perf/arch/s390/entry/syscalls/mksyscalltbl | 36 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100755 tools/perf/arch/s390/entry/syscalls/mksyscalltbl diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 09ba923debe8..48228de415d0 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -3,3 +3,24 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/s390/include/generated/asm +header := $(out)/syscalls_64.c +sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, s390) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..7fa0d0abd419 --- /dev/null +++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl @@ -0,0 +1,36 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf +# +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner +# + +gcc=$1 +input=$2 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table() +{ + local max_nr + + echo 'static const char *syscalltbl_s390_64[] = {' + while read sc nr; do + printf '\t[%d] = "%s",\n' $nr $sc + max_nr=$nr + done + echo '};' + echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr" +} + + +$gcc -m64 -E -dM -x c $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -nu \ + |create_table -- cgit v1.2.3 From 901bb0280b60782603e999a6c1e30ddfe1c7b0fb Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: perf trace: Use generated syscall table on s390 too This should speed up accessing new system calls introduced with the kernel rather than waiting for libaudit updates to include them. It also enables users to specify wildcards, for example, perf trace -e 'open*', just like was already possible on x86. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-htplh3nbrivi7g3cffbh4fsu@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 10 +++++++++- tools/perf/util/syscalltbl.c | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 79b117a03fd7..6f73c2316740 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -22,6 +22,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) NO_PERF_REGS := 1 +NO_SYSCALL_TABLE := 1 # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) @@ -33,7 +34,8 @@ endif ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) - CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated + NO_SYSCALL_TABLE := 0 + CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma $(call detected,CONFIG_X86_64) @@ -56,12 +58,18 @@ endif ifeq ($(ARCH),s390) NO_PERF_REGS := 0 + NO_SYSCALL_TABLE := 0 + CFLAGS += -I$(OUTPUT)arch/s390/include/generated endif ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif +ifneq ($(NO_SYSCALL_TABLE),1) + CFLAGS += -DHAVE_SYSCALL_TABLE +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 6eea7cff3d4e..303bdb84ab5a 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -26,6 +26,10 @@ #include const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_x86_64; +#elif defined(__s390x__) +#include +const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_s390_64; #endif struct syscall { -- cgit v1.2.3 From 5449f13c553e9c50690419f6114665a8beb71bea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 12:46:11 -0300 Subject: perf annotate: Get the cpuid from evsel->evlist->env in symbol__annotate() To reduce its function signature, since we get this from 'evsel' which is already one of its arguments. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-070eap7t6uicg9c3w086xy2z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/annotate.c | 4 +--- tools/perf/ui/gtk/annotate.c | 2 +- tools/perf/util/annotate.c | 7 ++++--- tools/perf/util/annotate.h | 2 +- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/evsel.h | 2 +- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 540461f5e345..c6ccda52117d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -138,7 +138,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 03b7363a49c9..286427975112 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1116,9 +1116,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - err = symbol__annotate(sym, map, evsel, - sizeof(struct browser_line), &browser.arch, - perf_evsel__env_cpuid(evsel)); + err = symbol__annotate(sym, map, evsel, sizeof(struct browser_line), &browser.arch); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index cdb5ecf91666..aeeaf15029f0 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index facad1e279a8..bc34b28373f4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1622,13 +1622,14 @@ void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, - struct arch **parch, char *cpuid) + struct arch **parch) { struct annotate_args args = { .privsize = privsize, .map = map, .evsel = evsel, }; + struct perf_env *env = perf_evsel__env(evsel); const char *arch_name = NULL; struct arch *arch; int err; @@ -1648,7 +1649,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, *parch = arch; if (arch->init) { - err = arch->init(arch, cpuid); + err = arch->init(arch, env ? env->cpuid : NULL); if (err) { pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); return err; @@ -1999,7 +2000,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) return -1; symbol__calc_percent(sym, evsel); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6d7289e88fa3..ce427445671f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -179,7 +179,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, - struct arch **parch, char *cpuid); + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 95853c51c0ca..541897049c6c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2842,9 +2842,9 @@ char *perf_evsel__env_arch(struct perf_evsel *evsel) return NULL; } -char *perf_evsel__env_cpuid(struct perf_evsel *evsel) +struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->cpuid; + if (evsel && evsel->evlist) + return evsel->evlist->env; return NULL; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c3663a70c9b9..0e961ce60a9c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -447,6 +447,6 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); char *perf_evsel__env_arch(struct perf_evsel *evsel); -char *perf_evsel__env_cpuid(struct perf_evsel *evsel); +struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ -- cgit v1.2.3 From 3285debaf5992f9729ba33e3f31eff5253d29dc4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 12:52:17 -0300 Subject: perf annotate: Use perf_env when obtaining the arch name Paving the way to reuse these routines in other areas, like when generating errno tables. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-rh1qv051vb8gfdcswskrn53h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- tools/perf/util/evsel.c | 7 ------- tools/perf/util/evsel.h | 1 - 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bc34b28373f4..eac45ccd5c32 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1420,16 +1420,19 @@ fallback: return 0; } -static const char *annotate__norm_arch(const char *arch_name) +static const char *perf_env__arch(struct perf_env *env) { struct utsname uts; + char *arch_name; - if (!arch_name) { /* Assume we are annotating locally. */ + if (!env) { /* Assume local operation */ if (uname(&uts) < 0) return NULL; arch_name = uts.machine; - } - return normalize_arch((char *)arch_name); + } else + arch_name = env->arch; + + return normalize_arch(arch_name); } static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) @@ -1630,14 +1633,10 @@ int symbol__annotate(struct symbol *sym, struct map *map, .evsel = evsel, }; struct perf_env *env = perf_evsel__env(evsel); - const char *arch_name = NULL; + const char *arch_name = perf_env__arch(env); struct arch *arch; int err; - if (evsel) - arch_name = perf_evsel__env_arch(evsel); - - arch_name = annotate__norm_arch(arch_name); if (!arch_name) return -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 541897049c6c..4718f0a460df 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2835,13 +2835,6 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, perf_evsel__name(evsel)); } -char *perf_evsel__env_arch(struct perf_evsel *evsel) -{ - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->arch; - return NULL; -} - struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { if (evsel && evsel->evlist) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0e961ce60a9c..846e41644525 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -446,7 +446,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); -char *perf_evsel__env_arch(struct perf_evsel *evsel); struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ -- cgit v1.2.3 From 4e8fbc1c975c667c61a3073da81b338b9bf61c37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 14:47:49 -0300 Subject: perf env: Adopt perf_env__arch() from the annotate code And use it in the libunwind case, with both passing a valid perf_env to extract the arch to be normalized from and passing NULL with the same semantic as in the annotate code: to get it from uname() uts.machine. Now the code to generate per arch errno translation tables (int/string) can use it to decode perf.data files recorded in a different arch than that where 'perf trace' (or any other analysis tool) runs. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-p2epffgash69w38kvj3ntpc9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/common.c | 44 +++-------------------------------- tools/perf/arch/common.h | 1 - tools/perf/util/annotate.c | 16 ------------- tools/perf/util/env.c | 47 ++++++++++++++++++++++++++++++++++++++ tools/perf/util/env.h | 2 ++ tools/perf/util/unwind-libunwind.c | 4 ++-- 6 files changed, 54 insertions(+), 60 deletions(-) diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 8c0cfeb55f8e..c6f373508a4f 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include "common.h" +#include "../util/env.h" #include "../util/util.h" #include "../util/debug.h" -#include "sane_ctype.h" - const char *const arm_triplets[] = { "arm-eabi-", "arm-linux-androideabi-", @@ -120,55 +118,19 @@ static int lookup_triplets(const char *const *triplets, const char *name) return -1; } -/* - * Return architecture name in a normalized form. - * The conversion logic comes from the Makefile. - */ -const char *normalize_arch(char *arch) -{ - if (!strcmp(arch, "x86_64")) - return "x86"; - if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') - return "x86"; - if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) - return "sparc"; - if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) - return "arm64"; - if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) - return "arm"; - if (!strncmp(arch, "s390", 4)) - return "s390"; - if (!strncmp(arch, "parisc", 6)) - return "parisc"; - if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) - return "powerpc"; - if (!strncmp(arch, "mips", 4)) - return "mips"; - if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) - return "sh"; - - return arch; -} - static int perf_env__lookup_binutils_path(struct perf_env *env, const char *name, const char **path) { int idx; - const char *arch, *cross_env; - struct utsname uts; + const char *arch = perf_env__arch(env), *cross_env; const char *const *path_list; char *buf = NULL; - arch = normalize_arch(env->arch); - - if (uname(&uts) < 0) - goto out; - /* * We don't need to try to find objdump path for native system. * Just use default binutils path (e.g.: "objdump"). */ - if (!strcmp(normalize_arch(uts.machine), arch)) + if (!strcmp(perf_env__arch(NULL), arch)) goto out; cross_env = getenv("CROSS_COMPILE"); diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index a1546509ad24..2d875baa92e6 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -7,6 +7,5 @@ extern const char *objdump_path; int perf_env__lookup_objdump(struct perf_env *env); -const char *normalize_arch(char *arch); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index eac45ccd5c32..68e687d1bf99 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "sane_ctype.h" @@ -1420,21 +1419,6 @@ fallback: return 0; } -static const char *perf_env__arch(struct perf_env *env) -{ - struct utsname uts; - char *arch_name; - - if (!env) { /* Assume local operation */ - if (uname(&uts) < 0) - return NULL; - arch_name = uts.machine; - } else - arch_name = env->arch; - - return normalize_arch(arch_name); -} - static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct map *map = args->map; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893..6d311868d850 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include "cpumap.h" #include "env.h" +#include "sane_ctype.h" #include "util.h" #include +#include struct perf_env perf_env; @@ -93,3 +95,48 @@ void cpu_cache_level__free(struct cpu_cache_level *cache) free(cache->map); free(cache->size); } + +/* + * Return architecture name in a normalized form. + * The conversion logic comes from the Makefile. + */ +static const char *normalize_arch(char *arch) +{ + if (!strcmp(arch, "x86_64")) + return "x86"; + if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') + return "x86"; + if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) + return "sparc"; + if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) + return "arm64"; + if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) + return "arm"; + if (!strncmp(arch, "s390", 4)) + return "s390"; + if (!strncmp(arch, "parisc", 6)) + return "parisc"; + if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) + return "powerpc"; + if (!strncmp(arch, "mips", 4)) + return "mips"; + if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) + return "sh"; + + return arch; +} + +const char *perf_env__arch(struct perf_env *env) +{ + struct utsname uts; + char *arch_name; + + if (!env) { /* Assume local operation */ + if (uname(&uts) < 0) + return NULL; + arch_name = uts.machine; + } else + arch_name = env->arch; + + return normalize_arch(arch_name); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b34..bf970f57dce0 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__arch(struct perf_env *env); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 647a1e6b4c7b..b029a5e9ae49 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -3,7 +3,7 @@ #include "thread.h" #include "session.h" #include "debug.h" -#include "arch/common.h" +#include "env.h" struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -39,7 +39,7 @@ int unwind__prepare_access(struct thread *thread, struct map *map, if (dso_type == DSO__TYPE_UNKNOWN) return 0; - arch = normalize_arch(thread->mg->machine->env->arch); + arch = perf_env__arch(thread->mg->machine->env); if (!strcmp(arch, "x86")) { if (dso_type != DSO__TYPE_64BIT) -- cgit v1.2.3 From 9f5c6d8777a2d962b0eeacb2a16f37da6bea545b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:26:46 +0900 Subject: perf probe: Add warning message if there is unexpected event name This improve the error message so that user can know event-name error before writing new events to kprobe-events interface. E.g. ====== #./perf probe -x /lib64/libc-2.25.so malloc_get_state* Internal error: "malloc_get_state@GLIBC_2" is an invalid event name. Error: Failed to add events. ====== Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275040665.24652.5188568529237584489.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b7aaf9b2294d..262d5da86623 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2625,6 +2625,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base, out: free(nbase); + + /* Final validation */ + if (ret >= 0 && !is_c_func_name(buf)) { + pr_warning("Internal error: \"%s\" is an invalid event name.\n", + buf); + ret = -EINVAL; + } + return ret; } -- cgit v1.2.3 From a3110cd9d0f77a796da545e112f9305094257798 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 11 Dec 2017 15:19:25 -0300 Subject: perf probe: Cut off the version suffix from event name Cut off the version suffix (e.g. @GLIBC_2.2.5 etc.) from automatic generated event name. This fixes wildcard event adding like below case; ===== # perf probe -x /lib64/libc-2.25.so malloc* Internal error: "malloc_get_state@GLIBC_2" is wrong event name. Error: Failed to add events. ===== This failure was caused by a versioned suffix symbol. With this fix, perf probe automatically cuts the suffix after @ as below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc* Added new events: probe_libc:malloc_printerr (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_consolidate (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_check (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_hook_ini (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_trim (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_usable_size (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_stats (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_info (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:mallochook (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_get_state (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_set_state (on malloc* in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_set_state -aR sleep 1 ===== Reported-by: Arnaldo Carvalho de Melo Reported-by: bhargavb Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/None Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 262d5da86623..7e582547ac07 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2584,8 +2584,8 @@ static int get_new_event_name(char *buf, size_t len, const char *base, if (!nbase) return -ENOMEM; - /* Cut off the dot suffixes (e.g. .const, .isra)*/ - p = strchr(nbase, '.'); + /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ + p = strpbrk(nbase, ".@"); if (p && p != nbase) *p = '\0'; -- cgit v1.2.3 From e63c625a1e417edbe513b75b347a7238e9e7fea0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:27:44 +0900 Subject: perf probe: Add __return suffix for return events Add __return suffix for function return events automatically. Without this, user have to give --force option and will see the number suffix for each event like "function_1", which is not easy to recognize. Instead, this adds __return suffix to it automatically. E.g. ===== # ./perf probe -x /lib64/libc-2.25.so 'malloc*%return' Added new events: probe_libc:malloc_printerr__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_consolidate__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_check__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_hook_ini__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_trim__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_usable_size__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_stats__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_info__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:mallochook__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_get_state__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_set_state__return (on malloc*%return in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_set_state__return -aR sleep 1 ===== Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275046418.24652.6696011972866498489.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 2 +- tools/perf/util/probe-event.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index d7e4869905f1..f96382692f42 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -170,7 +170,7 @@ Probe points are defined by following syntax. or, sdt_PROVIDER:SDTEVENT -'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_' is used for uprobe. +'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function, and for return probes, a "\_\_return" suffix is automatically added to the function name. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_' is used for uprobe. Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the modules. 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 7e582547ac07..a68141d360b0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2573,7 +2573,8 @@ int show_perf_probe_events(struct strfilter *filter) } static int get_new_event_name(char *buf, size_t len, const char *base, - struct strlist *namelist, bool allow_suffix) + struct strlist *namelist, bool ret_event, + bool allow_suffix) { int i, ret; char *p, *nbase; @@ -2590,7 +2591,7 @@ static int get_new_event_name(char *buf, size_t len, const char *base, *p = '\0'; /* Try no suffix number */ - ret = e_snprintf(buf, len, "%s", nbase); + ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); goto out; @@ -2689,8 +2690,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, group = PERFPROBE_GROUP; /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, - namelist, allow_suffix); + ret = get_new_event_name(buf, 64, event, namelist, + tev->point.retprobe, allow_suffix); if (ret < 0) return ret; -- cgit v1.2.3 From 4b3a2716dd785fabb9f6ac80c1d53cb29a88169d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:28:12 +0900 Subject: perf probe: Find versioned symbols from map Commit d80406453ad4 ("perf symbols: Allow user probes on versioned symbols") allows user to find default versioned symbols (with "@@") in map. However, it did not enable normal versioned symbol (with "@") for perf-probe. E.g. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state Failed to find symbol malloc_get_state in /usr/lib64/libc-2.25.so Error: Failed to add events. ===== This solves above issue by improving perf-probe symbol search function, as below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state Added new event: probe_libc:malloc_get_state (on malloc_get_state in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_get_state -aR sleep 1 # ./perf probe -l probe_libc:malloc_get_state (on malloc_get_state@GLIBC_2.2.5 in /usr/lib64/libc-2.25.so) ===== Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275049269.24652.1639103455496216255.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 8 ++++++++ tools/perf/util/probe-event.c | 20 ++++++++++++++++++-- tools/perf/util/symbol.c | 5 +++++ tools/perf/util/symbol.h | 1 + 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 9c4e23d8c8ce..53d83d7e6a09 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -64,6 +64,14 @@ int arch__compare_symbol_names_n(const char *namea, const char *nameb, return strncmp(namea, nameb, n); } + +const char *arch__normalize_symbol_name(const char *name) +{ + /* Skip over initial dot */ + if (name && *name == '.') + name++; + return name; +} #endif #if defined(_CALL_ELF) && _CALL_ELF == 2 diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a68141d360b0..0d6c66d51939 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2801,16 +2801,32 @@ static int find_probe_functions(struct map *map, char *name, int found = 0; struct symbol *sym; struct rb_node *tmp; + const char *norm, *ver; + char *buf = NULL; if (map__load(map) < 0) return 0; map__for_each_symbol(map, sym, tmp) { - if (strglobmatch(sym->name, name)) { + norm = arch__normalize_symbol_name(sym->name); + if (!norm) + continue; + + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) syms[found - 1] = sym; } + if (buf) + zfree(&buf); } return found; @@ -2856,7 +2872,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, * same name but different addresses, this lists all the symbols. */ num_matched_functions = find_probe_functions(map, pp->function, syms); - if (num_matched_functions == 0) { + if (num_matched_functions <= 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, pev->target ? : "kernel"); ret = -ENOENT; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1b67a8639dfe..cc065d4bfafc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -94,6 +94,11 @@ static int prefix_underscores_count(const char *str) return tail - str; } +const char * __weak arch__normalize_symbol_name(const char *name) +{ + return name; +} + int __weak arch__compare_symbol_names(const char *namea, const char *nameb) { return strcmp(namea, nameb); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a4f0075b4e5c..0563f33c1eb3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -349,6 +349,7 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); void arch__sym_update(struct symbol *s, GElf_Sym *sym); #endif +const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 -- cgit v1.2.3 From 1e9f9e8af0de80e8f6a47d991df66090934be0c6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:28:41 +0900 Subject: perf string: Add {strdup,strpbrk}_esc() To support the special characters escaped by '\' in 'perf probe' event parser. Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275052163.24652.18205979384585484358.stgit@devbox [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/string.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/string2.h | 2 ++ 2 files changed, 48 insertions(+) diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index aaa08ee8c717..d8bfd0c4d2cb 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -396,3 +396,49 @@ out_err_overflow: free(expr); return NULL; } + +/* Like strpbrk(), but not break if it is right after a backslash (escaped) */ +char *strpbrk_esc(char *str, const char *stopset) +{ + char *ptr; + + do { + ptr = strpbrk(str, stopset); + if (ptr == str || + (ptr == str + 1 && *(ptr - 1) != '\\')) + break; + str = ptr + 1; + } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + + return ptr; +} + +/* Like strdup, but do not copy a single backslash */ +char *strdup_esc(const char *str) +{ + char *s, *d, *p, *ret = strdup(str); + + if (!ret) + return NULL; + + d = strchr(ret, '\\'); + if (!d) + return ret; + + s = d + 1; + do { + if (*s == '\0') { + *d = '\0'; + break; + } + p = strchr(s + 1, '\\'); + if (p) { + memmove(d, s, p - s); + d += p - s; + s = p + 1; + } else + memmove(d, s, strlen(s) + 1); + } while (p); + + return ret; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index ee14ca5451ab..4c68a09b97e8 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -39,5 +39,7 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int return asprintf_expr_inout_ints(var, false, nints, ints); } +char *strpbrk_esc(char *str, const char *stopset); +char *strdup_esc(const char *str); #endif /* PERF_STRING_H */ -- cgit v1.2.3 From c588d158124d5b60184fc612e551a19720720d68 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 13 Dec 2017 00:05:12 +0900 Subject: perf probe: Support escaped character in parser Support the special characters escaped by '\' in parser. This allows user to specify versions directly like below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state\\@GLIBC_2.2.5 Added new event: probe_libc:malloc_get_state (on malloc_get_state@GLIBC_2.2.5 in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_get_state -aR sleep 1 ===== Or, you can use separators in source filename, e.g. ===== # ./perf probe -x /opt/test/a.out foo+bar.c:3 Semantic error :There is non-digit character in offset. Error: Command Parse Error. ===== Usually "+" in source file cause parser error, but ===== # ./perf probe -x /opt/test/a.out foo\\+bar.c:4 Added new event: probe_a:main (on @foo+bar.c:4 in /opt/test/a.out) You can now use it in all perf tools, such as: perf record -e probe_a:main -aR sleep 1 ===== escaped "\+" allows you to specify that. Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151309111236.18107.5634753157435343410.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 16 +++++++++ tools/perf/util/probe-event.c | 58 ++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index f96382692f42..b6866a05edd2 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -182,6 +182,14 @@ Note that before using the SDT event, the target binary (on which SDT events are For details of the SDT, see below. https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html +ESCAPED CHARACTER +----------------- + +In the probe syntax, '=', '@', '+', ':' and ';' are treated as a special character. You can use a backslash ('\') to escape the special characters. +This is useful if you need to probe on a specific versioned symbols, like @GLIBC_... suffixes, or also you need to specify a source file which includes the special characters. +Note that usually single backslash is consumed by shell, so you might need to pass double backslash (\\) or wrapping with single quotes (\'AAA\@BBB'). +See EXAMPLES how it is used. + PROBE ARGUMENT -------------- Each probe argument follows below syntax. @@ -277,6 +285,14 @@ Add a USDT probe to a target process running in a different mount namespace ./perf probe --target-ns -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end +Add a probe on specific versioned symbol by backslash escape + + ./perf probe -x /lib64/libc-2.25.so 'malloc_get_state\@GLIBC_2.2.5' + +Add a probe in a source file using special characters by backslash escape + + ./perf probe -x /opt/test/a.out 'foo\+bar.c:4' + SEE ALSO -------- diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0d6c66d51939..e1dbc9821617 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1325,27 +1325,30 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strchr(*arg, ':'); + ptr = strpbrk_esc(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup(*arg); + pev->group = strdup_esc(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - if (!pev->sdt && !is_c_func_name(*arg)) { + + pev->event = strdup_esc(*arg); + if (pev->event == NULL) + return -ENOMEM; + + if (!pev->sdt && !is_c_func_name(pev->event)) { + zfree(&pev->event); ng_name: + zfree(&pev->group); semantic_error("%s is bad for event name -it must " "follow C symbol-naming rule.\n", *arg); return -EINVAL; } - pev->event = strdup(*arg); - if (pev->event == NULL) - return -ENOMEM; - return 0; } @@ -1373,7 +1376,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk(arg, ";=@+%"); + ptr = strpbrk_esc(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1387,7 +1390,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup(ptr + 1); + pev->target = strdup_esc(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1421,13 +1424,14 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * Otherwise, we consider arg to be a function specification. */ - if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) { + if (!strpbrk_esc(arg, "+@%")) { + ptr = strpbrk_esc(arg, ";:"); /* This is a file spec if it includes a '.' before ; or : */ - if (memchr(arg, '.', ptr - arg)) + if (ptr && memchr(arg, '.', ptr - arg)) file_spec = true; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1436,7 +1440,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup(arg); + tmp = strdup_esc(arg); if (tmp == NULL) return -ENOMEM; } @@ -1469,12 +1473,12 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg = ptr; c = nc; if (c == ';') { /* Lazy pattern must be the last part */ - pp->lazy_line = strdup(arg); + pp->lazy_line = strdup(arg); /* let leave escapes */ if (pp->lazy_line == NULL) return -ENOMEM; break; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1501,7 +1505,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup(arg); + pp->file = strdup_esc(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2803,23 +2807,31 @@ static int find_probe_functions(struct map *map, char *name, struct rb_node *tmp; const char *norm, *ver; char *buf = NULL; + bool cut_version = true; if (map__load(map) < 0) return 0; + /* If user gives a version, don't cut off the version from symbols */ + if (strchr(name, '@')) + cut_version = false; + map__for_each_symbol(map, sym, tmp) { norm = arch__normalize_symbol_name(sym->name); if (!norm) continue; - /* We don't care about default symbol or not */ - ver = strchr(norm, '@'); - if (ver) { - buf = strndup(norm, ver - norm); - if (!buf) - return -ENOMEM; - norm = buf; + if (cut_version) { + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } } + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) -- cgit v1.2.3 From f9d8adb345d7adbb2d3431eea73beb89c8d6d612 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 29 Nov 2017 19:43:46 +0100 Subject: perf evsel: Fix swap for samples with raw data When we detect a different endianity we swap event before processing. It's tricky for samples because we have no idea what's inside. We treat it as an array of u64s, swap them and later on we swap back parts which are different. We mangle this way also the tracepoint raw data, which ends up in report showing wrong data: 1.95% comm=Q^B pid=29285 prio=16777216 target_cpu=000 1.67% comm=l^B pid=0 prio=16777216 target_cpu=000 Luckily the traceevent library handles the endianity by itself (thank you Steven!), so we can pass the RAW data directly in the other endianity. 2.51% comm=beah-rhts-task pid=1175 prio=120 target_cpu=002 2.23% comm=kworker/0:0 pid=11566 prio=120 target_cpu=000 The fix is basically to swap back the raw data if different endianity is detected. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20171129184346.3656-1-jolsa@kernel.org [ Add util/memswap.c to python-ext-sources to link missing mem_bswap_64() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 20 +++++++++++++++++--- tools/perf/util/python-ext-sources | 1 + 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4718f0a460df..1cf044cbae36 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -36,6 +36,7 @@ #include "debug.h" #include "trace-event.h" #include "stat.h" +#include "memswap.h" #include "util/parse-branch-options.h" #include "sane_ctype.h" @@ -2131,14 +2132,27 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_RAW) { OVERFLOW_CHECK_u64(array); u.val64 = *array; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* undo swap of u64, then swap on individual u32s */ + + /* + * Undo swap of u64, then swap on individual u32s, + * get the size of the raw area and undo all of the + * swap. The pevent interface handles endianity by + * itself. + */ + if (swapped) { u.val64 = bswap_64(u.val64); u.val32[0] = bswap_32(u.val32[0]); u.val32[1] = bswap_32(u.val32[1]); } data->raw_size = u.val32[0]; + + /* + * The raw data is aligned on 64bits including the + * u32 size, so it's safe to use mem_bswap_64. + */ + if (swapped) + mem_bswap_64((void *) array, data->raw_size); + array = (void *)array + sizeof(u32); OVERFLOW_CHECK(array, data->raw_size, max_size); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index b4f2f06722a7..7aa0ea64544e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/memswap.c util/mmap.c util/namespaces.c ../lib/bitmap.c -- cgit v1.2.3 From 69b5c953400897a978f8a7d212c53aa90ff5027d Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 12 Dec 2017 11:22:03 -0500 Subject: perf test shell: Fix check open filename arg using 'perf trace' Commit f231af789b11 ("perf test shell: Fix check open filename arg using 'perf trace' on s390x") added an exception for s390x to use openat() instead of open() in the test that intercepts a open syscall to look for the filename argument as obtained by the vfs_getname 'perf probe' it puts in place at the getname_flags kernel function. Its not just s390x that uses openat() instead of open(), so use 'perf list' to look for the syscall:sys_enter_open(at)? present in the system being tested instead of checking if the system is s390x. In fact Namhyung pointed out that glibc 2.26 changed this behaviour, as described in https://lwn.net/Articles/738694/, so systems where glibc is >= 2.26 will need this patch for this test to work, which already took place in some distros for architectures such as s390x, while Fedora 26 x86_64 is at glibc 2.25, i.e. still uses open(). Signed-off-by: Michael Petlan Tested-by: Arnaldo Carvalho de Melo Tested-by: Thomas Richter Link: https://lkml.kernel.org/r/ab23fe42-1080-a46b-503e-744e097f414f@linux.vnet.ibm.com Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan LPU-Reference: 1275675985.12835754.1513095723265.JavaMail.zimbra@redhat.com Link: https://lkml.kernel.org/n/tip-j2wbz9av1rw3thr3t0g4dtuk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 2a9ef080efd0..55ad9793d544 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,10 +17,9 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; } - - perf trace -e ${svc:-open} touch $file 2>&1 | \ - egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + perf trace -e $evts touch $file 2>&1 | \ + egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- cgit v1.2.3 From 922991c2b14219b33270c770f917e0d1bf8f5597 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Dec 2017 17:43:40 -0300 Subject: Revert "perf s390: Always build with -fPIC" This one made x86 always build with -fPIC, when the intention was for s390 to be built that way, due to a rebase mistake. Reported-by: Hendrik Brueckner This reverts commit 1dc4ddf112a408e607a073d951b962b6c6e2bd6c. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6f73c2316740..eb6bd99be0bd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -43,7 +43,6 @@ ifeq ($(SRCARCH),x86) LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind endif NO_PERF_REGS := 0 - CFLAGS += -fPIC endif ifeq ($(SRCARCH),arm) -- cgit v1.2.3 From a9a3f1d18a6c9ccf89728e23474645aa91e2f4f1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 13 Dec 2017 17:46:54 -0300 Subject: perf s390: Always build with -fPIC On s390, object files must be compiled with position-indepedent code in order to be incrementally linked or linked to shared libraries. Therefore, add -fPIC to the CFLAGS for s390 to ensure each object file is built properly. Reported-by: Jonathan Hermann Signed-off-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Thomas Richter Cc: linux s390 list Link: https://lkml.kernel.org/r/20171207080951.GC4889@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eb6bd99be0bd..f050f38d8fa3 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -58,7 +58,7 @@ endif ifeq ($(ARCH),s390) NO_PERF_REGS := 0 NO_SYSCALL_TABLE := 0 - CFLAGS += -I$(OUTPUT)arch/s390/include/generated + CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated endif ifeq ($(NO_PERF_REGS),0) -- cgit v1.2.3 From ca8000684ec4e66f965e1f9547a3c6cb834154ca Mon Sep 17 00:00:00 2001 From: Mengting Zhang Date: Wed, 13 Dec 2017 15:01:53 +0800 Subject: perf evsel: Enable ignore_missing_thread for pid option While monitoring a multithread process with pid option, perf sometimes may return sys_perf_event_open failure with 3(No such process) if any of the process's threads die before we open the event. However, we want perf continue monitoring the remaining threads and do not exit with error. Here, the patch enables perf_evsel::ignore_missing_thread for -p option to ignore complete failure if any of threads die before we open the event. But it may still return sys_perf_event_open failure with 22(Invalid) if we monitors several event groups. sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8 sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8 WARNING: Ignored open failure for pid 28962 sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8 sys_perf_event_open failed, error -22 That is because when we ignore a missing thread, we change the thread_idx without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd() may return a wrong group_fd for the next thread and sys_perf_event_open() return with 22. sys_perf_event_open(){ ... if (group_fd != -1) perf_fget_light()//to get corresponding group_leader by group_fd ... if (group_leader) if (group_leader->ctx->task != ctx->task)//should on the same task goto err_context ... } This patch also fixes this bug by introducing perf_evsel__remove_fd() and update_fds to allow removing fds for the missing thread. Changes since v1: - Change group_fd__remove() into a more genetic way without changing code logic - Remove redundant condition Changes since v2: - Use a proper function name and add some comment. - Multiline comment style fixes. Committer testing: Before this patch the recently added 'perf stat --per-thread' for system wide counting would race while enumerating all threads using /proc: [root@jouet ~]# perf stat --per-thread failed to parse CPUs map: No such file or directory Usage: perf stat [] [] -C, --cpu list of cpus to monitor in system-wide -a, --all-cpus system-wide collection from all CPUs [root@jouet ~]# perf stat --per-thread failed to parse CPUs map: No such file or directory Usage: perf stat [] [] -C, --cpu list of cpus to monitor in system-wide -a, --all-cpus system-wide collection from all CPUs [root@jouet ~]# When, say, the kernel was being built, so lots of shortlived threads, after this patch this doesn't happen. Signed-off-by: Mengting Zhang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Cheng Jian Cc: Li Bin Cc: Wang Nan Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com [ Remove one use 'evlist' alias variable ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 98da8cb8de93..50385d89c497 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1804,8 +1804,8 @@ int cmd_record(int argc, const char **argv) goto out; } - /* Enable ignoring missing threads when -u option is defined. */ - rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; + /* Enable ignoring missing threads when -u/-p option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; err = -ENOMEM; if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cf044cbae36..a4d256ea0dc4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1599,10 +1599,46 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static void perf_evsel__remove_fd(struct perf_evsel *pos, + int nr_cpus, int nr_threads, + int thread_idx) +{ + for (int cpu = 0; cpu < nr_cpus; cpu++) + for (int thread = thread_idx; thread < nr_threads - 1; thread++) + FD(pos, cpu, thread) = FD(pos, cpu, thread + 1); +} + +static int update_fds(struct perf_evsel *evsel, + int nr_cpus, int cpu_idx, + int nr_threads, int thread_idx) +{ + struct perf_evsel *pos; + + if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + return -EINVAL; + + evlist__for_each_entry(evsel->evlist, pos) { + nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + + perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + + /* + * Since fds for next evsel has not been created, + * there is no need to iterate whole event list. + */ + if (pos == evsel) + break; + } + return 0; +} + static bool ignore_missing_thread(struct perf_evsel *evsel, + int nr_cpus, int cpu, struct thread_map *threads, int thread, int err) { + pid_t ignore_pid = thread_map__pid(threads, thread); + if (!evsel->ignore_missing_thread) return false; @@ -1618,11 +1654,18 @@ static bool ignore_missing_thread(struct perf_evsel *evsel, if (threads->nr == 1) return false; + /* + * We should remove fd for missing_thread first + * because thread_map__remove() will decrease threads->nr. + */ + if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + return false; + if (thread_map__remove(threads, thread)) return false; pr_warning("WARNING: Ignored open failure for pid %d\n", - thread_map__pid(threads, thread)); + ignore_pid); return true; } @@ -1727,7 +1770,7 @@ retry_open: if (fd < 0) { err = -errno; - if (ignore_missing_thread(evsel, threads, thread, err)) { + if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { /* * We just removed 1 thread, so take a step * back on thread index and lower the upper -- cgit v1.2.3 From f1031c8d33a8c40d4cac26e58c37d9fba0e31a8a Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 14 Dec 2017 17:52:42 -0600 Subject: perf probe arm64: Fix symbol fixup issues due to ELF type On an arm64 machine running a CONFIG_RANDOMIZE_BASE=y kernel, perf kernel symbol resolution fails. Debugging saw symsrc_init calling the default elf__needs_adjust_symbols() where checks for an ET_DYN (3) ehdr.e_type failed when they should have succeeded. Fix by adopting powerpc version of the weak elf__needs_adjust_symbols() function, as done in commit d2332098331f ("perf probe ppc: Fix symbol fixup issues due to ELF type"). Prior to this patch, perf test 1 would fail: $ sudo oldperf test -v 1 |& head 1: vmlinux symtab matches kallsyms : test child forked, pid 33374 Looking at the vmlinux_path (8 entries long) Using /usr/lib/debug/boot/vmlinux for symbols ERR : 0xfffe0000100f1000: do_undefinstr not on kallsyms ERR : 0xfffe0000100f1320: do_sysinstr not on kallsyms ERR : 0xfffe0000100f13b0: do_debug_exception not on kallsyms ERR : 0xfffe0000100f1498: do_mem_abort not on kallsyms ERR : 0xfffe0000100f1580: do_sp_pc_abort not on kallsyms ... After applying this patch, perf test 1 now succeeds: $ sudo ./newperf test -v 1 |& head 1: vmlinux symtab matches kallsyms : test child forked, pid 33378 Looking at the vmlinux_path (8 entries long) Using /usr/lib/debug/boot/vmlinux for symbols WARN: 0xffff000008081000: diff name v: do_undefinstr k: __exception_text_start WARN: 0xffff0000080819e8: diff name v: __irqentry_text_end k: __softirqentry_text_start WARN: 0xffff000008081d08: diff name v: __entry_text_start k: __softirqentry_text_end WARN: 0xffff00000809db5c: diff name v: flush_icache_range k: __flush_cache_user_range WARN: 0xffff000008101908: diff name v: sys_ni_syscall k: sys_vm86old ... Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Ganapatrao Kulkarni Cc: Jiri Olsa Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171214175242.e30450f17f93ad675d968fa3@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/sym-handling.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 tools/perf/arch/arm64/util/sym-handling.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index b1ab72d2a42e..e04f6cdd6f32 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ libperf-y += header.o +libperf-y += sym-handling.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c new file mode 100644 index 000000000000..0051b1ee8450 --- /dev/null +++ b/tools/perf/arch/arm64/util/sym-handling.c @@ -0,0 +1,22 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2015 Naveen N. Rao, IBM Corporation + */ + +#include "debug.h" +#include "symbol.h" +#include "map.h" +#include "probe-event.h" +#include "probe-file.h" + +#ifdef HAVE_LIBELF_SUPPORT +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) +{ + return ehdr.e_type == ET_EXEC || + ehdr.e_type == ET_REL || + ehdr.e_type == ET_DYN; +} +#endif -- cgit v1.2.3 From 74cd5815d9af6e6c4f3bcecfbc8e439f2fd7e6b1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 21 Dec 2017 17:26:10 +0800 Subject: perf tool: Improve bash command line auto-complete for multiple events with comma perf has perf-completion.sh to define command line auto-completion in bash/zsh. For record/stat -e it works for single events, but isn't working when specifying multiple events with comma. It would be very useful if it could be fixed to make it easier by supporting multiple events, comma separated. With this patch, the result can be like this: 1. Support the events returned from 'perf list --raw-dump' root@skl:/tmp# perf stat -e cpu/cache cpu/cache-misses/ cpu/cache-references/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch- cpu/branch-instructions/ cpu/branch-misses/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch-i root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch-instructions/ 2. Support the events listed in /sys/bus/event_source/devices/cpu/events root@skl:/tmp# perf stat -e cycle cycle_activity.cycles_l1d_miss cycle_activity.stalls_l3_miss cycle_activity.cycles_l2_miss cycle_activity.stalls_mem_any cycle_activity.cycles_l3_miss cycle_activity.stalls_total cycle_activity.cycles_mem_any cycles-ct cycle_activity.stalls_l1d_miss cycles-t cycle_activity.stalls_l2_miss root@skl:/tmp# perf stat -e cycles- cycles-ct cycles-t root@skl:/tmp# perf stat -e cycles-t,cpu/c cpu/cache-misses/ cpu/cpu-cycles/ cpu/cycles-t/ cpu/cache-references/ cpu/cycles-ct/ root@skl:/tmp# perf stat -e cycles-t,cpu/cache- cpu/cache-misses/ cpu/cache-references/ root@skl:/tmp# perf stat -e cycles-t,cpu/cache-misses/ 3. Support the uppercase event which is with prefix "cpu/" root@skl:/tmp# perf stat -e cpu/c cpu/cache-misses/ cpu/cpu-cycles/ cpu/cycles-t/ cpu/cache-references/ cpu/cycles-ct/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/C cpu/CACHE-MISSES/ cpu/CPU-CYCLES/ cpu/CYCLES-T/ cpu/CACHE-REFERENCES/ cpu/CYCLES-CT/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/CACHE-REFERENCES/ Note that: a) This patch only supports bash. b) It doesn't support the cases like {},{} or {...,...}. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513848370-8098-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 345f5d6e9ed5..d8310830a18b 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -162,8 +162,33 @@ __perf_main () # List possible events for -e option elif [[ $prev == @("-e"|"--event") && $prev_skip_opts == @(record|stat|top) ]]; then - evts=$($cmd list --raw-dump) - __perfcomp_colon "$evts" "$cur" + + local cur1=${COMP_WORDS[COMP_CWORD]} + local raw_evts=$($cmd list --raw-dump) + local arr s tmp result + + if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then + OLD_IFS="$IFS" + IFS=" " + arr=($raw_evts) + IFS="$OLD_IFS" + + for s in ${arr[@]} + do + if [[ "$s" == *cpu/* ]]; then + tmp=${s#*cpu/} + result=$result" ""cpu/"${tmp^^} + else + result=$result" "$s + fi + done + + evts=${result}+$(ls /sys/bus/event_source/devices/cpu/events) + else + evts=${raw_evts}+$(ls /sys/bus/event_source/devices/cpu/events) + fi + + __perfcomp_colon "$evts" "$cur1" else # List subcommands for perf commands if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| @@ -246,11 +271,16 @@ fi type perf &>/dev/null && _perf() { + if [[ "$COMP_WORDBREAKS" != *,* ]]; then + COMP_WORDBREAKS="${COMP_WORDBREAKS}," + export COMP_WORDBREAKS + fi + local cur words cword prev if [ $preload_get_comp_words_by_ref = "true" ]; then - _get_comp_words_by_ref -n =: cur words cword prev + _get_comp_words_by_ref -n =:, cur words cword prev else - __perf_get_comp_words_by_ref -n =: cur words cword prev + __perf_get_comp_words_by_ref -n =:, cur words cword prev fi __perf_main } && -- cgit v1.2.3 From 34c16db0f035f3f3dc50fbed03747693c12b6a5b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 22 Dec 2017 18:57:35 +0800 Subject: perf tools: Return all events as auto-completions after comma It's a follow up for one previous patch "perf tool: Improve bash command line auto-complete for multiple events with comma." It fixes an issue that no events are displayed when is directly typed after comma. With this patch, now the result is: root@skl:/tmp# perf stat -e cpu-cycles, Display all 2389 possibilities? (y or n) alarmtimer:alarmtimer_cancel alarmtimer:alarmtimer_fired alarmtimer:alarmtimer_start alarmtimer:alarmtimer_suspend alignment-faults arith.divider_active BAClear_Cost baclears.any block:block_bio_backmerge block:block_bio_bounce block:block_bio_complete block:block_bio_frontmerge block:block_bio_queue block:block_bio_remap block:block_dirty_buffer block:block_getrq block:block_plug block:block_rq_complete block:block_rq_insert block:block_rq_issue block:block_rq_remap block:block_rq_requeue block:block_sleeprq --More-- One remaining issue is that the auto-completions doesn't work well for the event with ':'. For example, clk:clk_enable. Because ':' is set as WORDBREAK by default in bash. Need more work for this case. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513940255-16528-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index d8310830a18b..90206413f4d7 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -183,12 +183,16 @@ __perf_main () fi done - evts=${result}+$(ls /sys/bus/event_source/devices/cpu/events) + evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events) else - evts=${raw_evts}+$(ls /sys/bus/event_source/devices/cpu/events) + evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events) fi - __perfcomp_colon "$evts" "$cur1" + if [[ "$cur1" == , ]]; then + __perfcomp_colon "$evts" "" + else + __perfcomp_colon "$evts" "$cur1" + fi else # List subcommands for perf commands if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| -- cgit v1.2.3 From 5d4fd9c8b83b36d34521b3af361a5726899045bf Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sat, 23 Dec 2017 04:15:58 +0800 Subject: perf tools: Auto-complete for events with ':' It's a follow up patch for a previous patch "perf tool: Return all events as auto-completions after comma". With this patch, auto-completion can work well for events with a ':'. For example: root@skl:/tmp# perf stat -e block:block_ block:block_bio_backmerge block:block_rq_complete block:block_bio_bounce block:block_rq_insert block:block_bio_complete block:block_rq_issue block:block_bio_frontmerge block:block_rq_remap block:block_bio_queue block:block_rq_requeue block:block_bio_remap block:block_sleeprq block:block_dirty_buffer block:block_split block:block_getrq block:block_touch_buffer block:block_plug block:block_unplug root@skl:/tmp# perf stat -e block:block_rq_ block:block_rq_complete block:block_rq_issue block:block_rq_requeue block:block_rq_insert block:block_rq_remap root@skl:/tmp# perf stat -e block:block_rq_complete block:block_rq_complete root@skl:/tmp# perf stat -e block:block_rq_complete Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513973758-19109-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 90206413f4d7..fdf75d45efff 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -280,6 +280,11 @@ _perf() export COMP_WORDBREAKS fi + if [[ "$COMP_WORDBREAKS" == *:* ]]; then + COMP_WORDBREAKS="${COMP_WORDBREAKS/:/}" + export COMP_WORDBREAKS + fi + local cur words cword prev if [ $preload_get_comp_words_by_ref = "true" ]; then _get_comp_words_by_ref -n =:, cur words cword prev -- cgit v1.2.3