From e1df4cb682ab2c3c2981c8efa4aec044e61f4e06 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 12 Mar 2013 10:09:42 -0400 Subject: ftrace: Fix function probe to only enable needed functions Currently the function probe enables all functions and runs a "hash" against every function call to see if it should call a probe. This is extremely wasteful. Note, a probe is something like: echo schedule:traceoff > /debug/tracing/set_ftrace_filter When schedule is called, the probe will disable tracing. But currently, it has a call back for *all* functions, and checks to see if the called function is the probe that is needed. The probe function has been created before ftrace was rewritten to allow for more than one "op" to be registered by the function tracer. When probes were created, it couldn't limit the functions without also limiting normal function calls. But now we can, it's about time to update the probe code. Todo, have separate ops for different entries. That is, assign a ftrace_ops per probe, instead of one op for all probes. But as there's not many probes assigned, this may not be that urgent. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e6effd0c40a9..dab031fec85b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2988,18 +2988,20 @@ static void ftrace_free_entry_rcu(struct rcu_head *rhp) kfree(entry); } - int register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_probe *entry; + struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; + struct ftrace_hash *hash; struct ftrace_page *pg; struct dyn_ftrace *rec; int type, len, not; unsigned long key; int count = 0; char *search; + int ret; type = filter_parse_regex(glob, strlen(glob), &search, ¬); len = strlen(search); @@ -3010,8 +3012,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, mutex_lock(&ftrace_lock); - if (unlikely(ftrace_disabled)) + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); + if (!hash) { + count = -ENOMEM; + goto out_unlock; + } + + if (unlikely(ftrace_disabled)) { + count = -ENODEV; goto out_unlock; + } do_for_each_ftrace_rec(pg, rec) { @@ -3043,6 +3053,13 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, } } + ret = enter_record(hash, rec, 0); + if (ret < 0) { + kfree(entry); + count = ret; + goto out_unlock; + } + entry->ops = ops; entry->ip = rec->ip; @@ -3050,10 +3067,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]); } while_for_each_ftrace_rec(); + + ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + if (ret < 0) + count = ret; + __enable_ftrace_function_probe(); out_unlock: mutex_unlock(&ftrace_lock); + free_ftrace_hash(hash); return count; } @@ -3067,7 +3090,10 @@ static void __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data, int flags) { + struct ftrace_func_entry *rec_entry; struct ftrace_func_probe *entry; + struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; + struct ftrace_hash *hash; struct hlist_node *n, *tmp; char str[KSYM_SYMBOL_LEN]; int type = MATCH_FULL; @@ -3088,6 +3114,12 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, } mutex_lock(&ftrace_lock); + + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); + if (!hash) + /* Hmm, should report this somehow */ + goto out_unlock; + for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { struct hlist_head *hhd = &ftrace_func_hash[i]; @@ -3108,12 +3140,24 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, continue; } + rec_entry = ftrace_lookup_ip(hash, entry->ip); + /* It is possible more than one entry had this ip */ + if (rec_entry) + free_hash_entry(hash, rec_entry); + hlist_del_rcu(&entry->node); call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu); } } __disable_ftrace_function_probe(); + /* + * Remove after the disable is called. Otherwise, if the last + * probe is removed, a null hash means *all enabled*. + */ + ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + out_unlock: mutex_unlock(&ftrace_lock); + free_ftrace_hash(hash); } void -- cgit v1.2.3 From e67efb93f0e9130174293ffaa5975f87b301b531 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 12 Mar 2013 15:07:59 -0400 Subject: ftrace: Clean up function probe methods When a function probe is created, each function that the probe is attached to, a "callback" method is called. On release of the probe, each function entry calls the "free" method. First, "callback" is a confusing name and does not really match what it does. Callback sounds like it will be called when the probe triggers. But that's not the case. This is really an "init" function, so lets rename it as such. Secondly, both "init" and "free" do not pass enough information back to the handlers. Pass back the ops, ip and data for each time the method is called. We have the information, might as well use it. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 ++++-- kernel/trace/ftrace.c | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e5ca8ef50e9b..832422d706f4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -259,8 +259,10 @@ struct ftrace_probe_ops { void (*func)(unsigned long ip, unsigned long parent_ip, void **data); - int (*callback)(unsigned long ip, void **data); - void (*free)(void **data); + int (*init)(struct ftrace_probe_ops *ops, + unsigned long ip, void **data); + void (*free)(struct ftrace_probe_ops *ops, + unsigned long ip, void **data); int (*print)(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dab031fec85b..ff0ef41c6d93 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2984,7 +2984,7 @@ static void ftrace_free_entry_rcu(struct rcu_head *rhp) container_of(rhp, struct ftrace_func_probe, rcu); if (entry->ops->free) - entry->ops->free(&entry->data); + entry->ops->free(entry->ops, entry->ip, &entry->data); kfree(entry); } @@ -3045,8 +3045,8 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, * for each function we find. We call the callback * to give the caller an opportunity to do so. */ - if (ops->callback) { - if (ops->callback(rec->ip, &entry->data) < 0) { + if (ops->init) { + if (ops->init(ops, rec->ip, &entry->data) < 0) { /* caller does not like this func */ kfree(entry); continue; -- cgit v1.2.3 From 7818b3886545f89549185e4023743e2df91d1fa1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 Mar 2013 12:42:58 -0400 Subject: ftrace: Use manual free after synchronize_sched() not call_rcu_sched() The entries to the probe hash must be freed after a synchronize_sched() after the entry has been removed from the hash. As the entries are registered with ops that may have their own callbacks, and these callbacks may sleep, we can not use call_rcu_sched() because the rcu callbacks registered with that are called from a softirq context. Instead of using call_rcu_sched(), manually save the entries on a free_list and at the end of the loop that removes the entries, do a synchronize_sched() and then go through the free_list, freeing the entries. Cc: Paul McKenney Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ff0ef41c6d93..25770824598f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1068,7 +1068,7 @@ struct ftrace_func_probe { unsigned long flags; unsigned long ip; void *data; - struct rcu_head rcu; + struct list_head free_list; }; struct ftrace_func_entry { @@ -2978,11 +2978,8 @@ static void __disable_ftrace_function_probe(void) } -static void ftrace_free_entry_rcu(struct rcu_head *rhp) +static void ftrace_free_entry(struct ftrace_func_probe *entry) { - struct ftrace_func_probe *entry = - container_of(rhp, struct ftrace_func_probe, rcu); - if (entry->ops->free) entry->ops->free(entry->ops, entry->ip, &entry->data); kfree(entry); @@ -3092,7 +3089,9 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, { struct ftrace_func_entry *rec_entry; struct ftrace_func_probe *entry; + struct ftrace_func_probe *p; struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash; + struct list_head free_list; struct ftrace_hash *hash; struct hlist_node *n, *tmp; char str[KSYM_SYMBOL_LEN]; @@ -3120,6 +3119,8 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, /* Hmm, should report this somehow */ goto out_unlock; + INIT_LIST_HEAD(&free_list); + for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { struct hlist_head *hhd = &ftrace_func_hash[i]; @@ -3146,7 +3147,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, free_hash_entry(hash, rec_entry); hlist_del_rcu(&entry->node); - call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu); + list_add(&entry->free_list, &free_list); } } __disable_ftrace_function_probe(); @@ -3155,6 +3156,12 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, * probe is removed, a null hash means *all enabled*. */ ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + synchronize_sched(); + list_for_each_entry_safe(entry, p, &free_list, free_list) { + list_del(&entry->free_list); + ftrace_free_entry(entry); + } + out_unlock: mutex_unlock(&ftrace_lock); free_ftrace_hash(hash); -- cgit v1.2.3 From 9607a869ee59594f3f7b9f3ac43a11d92bf3f960 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 8 Apr 2013 12:06:44 +0800 Subject: kernel: tracing: Use strlcpy instead of strncpy Use strlcpy() instead of strncpy() as it will always add a '\0' to the end of the string even if the buffer is smaller than what is being copied. Link: http://lkml.kernel.org/r/51624254.30301@asianux.com Signed-off-by: Chen Gang Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 4 ++-- kernel/trace/trace.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 25770824598f..548a1f7ea2c1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3496,14 +3496,14 @@ static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; static int __init set_ftrace_notrace(char *str) { - strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); + strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_notrace=", set_ftrace_notrace); static int __init set_ftrace_filter(char *str) { - strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); + strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_filter=", set_ftrace_filter); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 829b2bee24e8..07860b995752 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -125,7 +125,7 @@ static bool allocate_snapshot; static int __init set_cmdline_ftrace(char *str) { - strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); + strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; /* We are using ftrace early, expand it */ ring_buffer_expanded = true; @@ -164,7 +164,7 @@ static char *trace_boot_options __initdata; static int __init set_trace_boot_options(char *str) { - strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); + strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); trace_boot_options = trace_boot_options_buf; return 0; } -- cgit v1.2.3 From 39e30cd1537937d3c00ef87e865324e981434e5b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Apr 2013 21:46:24 +0900 Subject: tracing: Fix off-by-one on allocating stat->pages The first page was allocated separately, so no need to start from 0. Link: http://lkml.kernel.org/r/1364820385-32027-2-git-send-email-namhyung@kernel.org Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: stable@vger.kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 548a1f7ea2c1..c9f31491009f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -676,7 +676,7 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); - for (i = 0; i < pages; i++) { + for (i = 1; i < pages; i++) { pg->next = (void *)get_zeroed_page(GFP_KERNEL); if (!pg->next) goto out_free; -- cgit v1.2.3 From 9f50afccfdc15d95d7331acddcb0f7703df089ae Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 11 Apr 2013 16:01:38 +0900 Subject: tracing: Reset ftrace_graph_filter_enabled if count is zero The ftrace_graph_count can be decreased with a "!" pattern, so that the enabled flag should be updated too. Link: http://lkml.kernel.org/r/1365663698-2413-1-git-send-email-namhyung@kernel.org Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: stable@vger.kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c9f31491009f..9e3198782507 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3792,7 +3792,8 @@ out: if (fail) return -EINVAL; - ftrace_graph_filter_enabled = 1; + ftrace_graph_filter_enabled = !!(*idx); + return 0; } -- cgit v1.2.3 From f1943977e6648c1d42a78eda4ba4429a2bc0b786 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 10 Apr 2013 09:18:11 +0900 Subject: tracing: Get rid of unneeded key calculation in ftrace_hash_move() It's not used anywhere in the function. Link: http://lkml.kernel.org/r/1365553093-10180-1-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9e3198782507..3b84fc100788 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1320,7 +1320,6 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, struct hlist_head *hhd; struct ftrace_hash *old_hash; struct ftrace_hash *new_hash; - unsigned long key; int size = src->count; int bits = 0; int ret; @@ -1363,10 +1362,6 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, for (i = 0; i < size; i++) { hhd = &src->buckets[i]; hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) { - if (bits > 0) - key = hash_long(entry->ip, bits); - else - key = 0; remove_hash_entry(src, entry); __add_hash_entry(new_hash, entry); } -- cgit v1.2.3 From 20079ebe73c16b34621abd2993f3d48e2f9336b7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 10 Apr 2013 08:55:50 +0900 Subject: ftrace: Get rid of ftrace_profile_bits It seems that function profiler's hash size is fixed at 1024. Add and use FTRACE_PROFILE_HASH_BITS instead and update hash size macro. Link: http://lkml.kernel.org/r/1365551750-4504-1-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3b84fc100788..9b44abb2c5a0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -486,7 +486,6 @@ struct ftrace_profile_stat { #define PROFILES_PER_PAGE \ (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -static int ftrace_profile_bits __read_mostly; static int ftrace_profile_enabled __read_mostly; /* ftrace_profile_lock - synchronize the enable and disable of the profiler */ @@ -494,7 +493,8 @@ static DEFINE_MUTEX(ftrace_profile_lock); static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); -#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ +#define FTRACE_PROFILE_HASH_BITS 10 +#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS) static void * function_stat_next(void *v, int idx) @@ -725,13 +725,6 @@ static int ftrace_profile_init_cpu(int cpu) if (!stat->hash) return -ENOMEM; - if (!ftrace_profile_bits) { - size--; - - for (; size; size >>= 1) - ftrace_profile_bits++; - } - /* Preallocate the function profiling pages */ if (ftrace_profile_pages_init(stat) < 0) { kfree(stat->hash); @@ -765,7 +758,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) struct hlist_node *n; unsigned long key; - key = hash_long(ip, ftrace_profile_bits); + key = hash_long(ip, FTRACE_PROFILE_HASH_BITS); hhd = &stat->hash[key]; if (hlist_empty(hhd)) @@ -784,7 +777,7 @@ static void ftrace_add_profile(struct ftrace_profile_stat *stat, { unsigned long key; - key = hash_long(rec->ip, ftrace_profile_bits); + key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS); hlist_add_head_rcu(&rec->node, &stat->hash[key]); } -- cgit v1.2.3