diff options
| author | Ingo Molnar <mingo@elte.hu> | 2012-02-27 08:44:48 +0100 | 
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2012-02-27 08:44:48 +0100 | 
| commit | 83b8450317a1441aef5d110cbc4102d53df9ec02 (patch) | |
| tree | 20bea2fec26dc43d03e42c7231599b8cca321258 /kernel | |
| parent | 4ff16c25e2cc48cbe6956e356c38a25ac063a64d (diff) | |
| parent | 5500fa51199aee770ce53718853732600543619e (diff) | |
Merge branch 'tip/perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace into perf/core
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/trace/ftrace.c | 117 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 38 | ||||
| -rw-r--r-- | kernel/trace/trace_entries.h | 54 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 208 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 12 | ||||
| -rw-r--r-- | kernel/trace/trace_events_filter.c | 168 | ||||
| -rw-r--r-- | kernel/trace/trace_export.c | 64 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 8 | ||||
| -rw-r--r-- | kernel/trace/trace_output.c | 12 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 18 | 
10 files changed, 587 insertions, 112 deletions
| diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d1499e910fe8..867bd1dd2dd0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,6 +62,8 @@  #define FTRACE_HASH_DEFAULT_BITS 10  #define FTRACE_HASH_MAX_BITS 12 +#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +  /* ftrace_enabled is a method to turn ftrace on or off */  int ftrace_enabled __read_mostly;  static int last_ftrace_enabled; @@ -89,12 +91,14 @@ static struct ftrace_ops ftrace_list_end __read_mostly = {  };  static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; +static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;  static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;  ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;  static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;  ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;  ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;  static struct ftrace_ops global_ops; +static struct ftrace_ops control_ops;  static void  ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); @@ -168,6 +172,32 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)  }  #endif +static void control_ops_disable_all(struct ftrace_ops *ops) +{ +	int cpu; + +	for_each_possible_cpu(cpu) +		*per_cpu_ptr(ops->disabled, cpu) = 1; +} + +static int control_ops_alloc(struct ftrace_ops *ops) +{ +	int __percpu *disabled; + +	disabled = alloc_percpu(int); +	if (!disabled) +		return -ENOMEM; + +	ops->disabled = disabled; +	control_ops_disable_all(ops); +	return 0; +} + +static void control_ops_free(struct ftrace_ops *ops) +{ +	free_percpu(ops->disabled); +} +  static void update_global_ops(void)  {  	ftrace_func_t func; @@ -259,6 +289,26 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)  	return 0;  } +static void add_ftrace_list_ops(struct ftrace_ops **list, +				struct ftrace_ops *main_ops, +				struct ftrace_ops *ops) +{ +	int first = *list == &ftrace_list_end; +	add_ftrace_ops(list, ops); +	if (first) +		add_ftrace_ops(&ftrace_ops_list, main_ops); +} + +static int remove_ftrace_list_ops(struct ftrace_ops **list, +				  struct ftrace_ops *main_ops, +				  struct ftrace_ops *ops) +{ +	int ret = remove_ftrace_ops(list, ops); +	if (!ret && *list == &ftrace_list_end) +		ret = remove_ftrace_ops(&ftrace_ops_list, main_ops); +	return ret; +} +  static int __register_ftrace_function(struct ftrace_ops *ops)  {  	if (ftrace_disabled) @@ -270,15 +320,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops)  	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))  		return -EBUSY; +	/* We don't support both control and global flags set. */ +	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) +		return -EINVAL; +  	if (!core_kernel_data((unsigned long)ops))  		ops->flags |= FTRACE_OPS_FL_DYNAMIC;  	if (ops->flags & FTRACE_OPS_FL_GLOBAL) { -		int first = ftrace_global_list == &ftrace_list_end; -		add_ftrace_ops(&ftrace_global_list, ops); +		add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);  		ops->flags |= FTRACE_OPS_FL_ENABLED; -		if (first) -			add_ftrace_ops(&ftrace_ops_list, &global_ops); +	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) { +		if (control_ops_alloc(ops)) +			return -ENOMEM; +		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);  	} else  		add_ftrace_ops(&ftrace_ops_list, ops); @@ -302,11 +357,23 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)  		return -EINVAL;  	if (ops->flags & FTRACE_OPS_FL_GLOBAL) { -		ret = remove_ftrace_ops(&ftrace_global_list, ops); -		if (!ret && ftrace_global_list == &ftrace_list_end) -			ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops); +		ret = remove_ftrace_list_ops(&ftrace_global_list, +					     &global_ops, ops);  		if (!ret)  			ops->flags &= ~FTRACE_OPS_FL_ENABLED; +	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) { +		ret = remove_ftrace_list_ops(&ftrace_control_list, +					     &control_ops, ops); +		if (!ret) { +			/* +			 * The ftrace_ops is now removed from the list, +			 * so there'll be no new users. We must ensure +			 * all current users are done before we free +			 * the control data. +			 */ +			synchronize_sched(); +			control_ops_free(ops); +		}  	} else  		ret = remove_ftrace_ops(&ftrace_ops_list, ops); @@ -1119,6 +1186,12 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)  	call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);  } +void ftrace_free_filter(struct ftrace_ops *ops) +{ +	free_ftrace_hash(ops->filter_hash); +	free_ftrace_hash(ops->notrace_hash); +} +  static struct ftrace_hash *alloc_ftrace_hash(int size_bits)  {  	struct ftrace_hash *hash; @@ -3874,6 +3947,36 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)  #endif /* CONFIG_DYNAMIC_FTRACE */  static void +ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) +{ +	struct ftrace_ops *op; + +	if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) +		return; + +	/* +	 * Some of the ops may be dynamically allocated, +	 * they must be freed after a synchronize_sched(). +	 */ +	preempt_disable_notrace(); +	trace_recursion_set(TRACE_CONTROL_BIT); +	op = rcu_dereference_raw(ftrace_control_list); +	while (op != &ftrace_list_end) { +		if (!ftrace_function_local_disabled(op) && +		    ftrace_ops_test(op, ip)) +			op->func(ip, parent_ip); + +		op = rcu_dereference_raw(op->next); +	}; +	trace_recursion_clear(TRACE_CONTROL_BIT); +	preempt_enable_notrace(); +} + +static struct ftrace_ops control_ops = { +	.func = ftrace_ops_control_func, +}; + +static void  ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)  {  	struct ftrace_ops *op; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b93ecbadad6d..54faec790bc1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -56,17 +56,23 @@ enum trace_type {  #define F_STRUCT(args...)		args  #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)	\ -	struct struct_name {					\ -		struct trace_entry	ent;			\ -		tstruct						\ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)	\ +	struct struct_name {						\ +		struct trace_entry	ent;				\ +		tstruct							\  	}  #undef TP_ARGS  #define TP_ARGS(args...)	args  #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) +#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) + +#undef FTRACE_ENTRY_REG +#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print,	\ +			 filter, regfn) \ +	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ +		     filter)  #include "trace_entries.h" @@ -288,6 +294,8 @@ struct tracer {  /* for function tracing recursion */  #define TRACE_INTERNAL_BIT		(1<<11)  #define TRACE_GLOBAL_BIT		(1<<12) +#define TRACE_CONTROL_BIT		(1<<13) +  /*   * Abuse of the trace_recursion.   * As we need a way to maintain state if we are tracing the function @@ -589,6 +597,8 @@ static inline int ftrace_trace_task(struct task_struct *task)  static inline int ftrace_is_dead(void) { return 0; }  #endif +int ftrace_event_is_function(struct ftrace_event_call *call); +  /*   * struct trace_parser - servers for reading the user input separated by spaces   * @cont: set if the input is not complete - no final space char was found @@ -766,9 +776,7 @@ struct filter_pred {  	u64 			val;  	struct regex		regex;  	unsigned short		*ops; -#ifdef CONFIG_FTRACE_STARTUP_TEST  	struct ftrace_event_field *field; -#endif  	int 			offset;  	int 			not;  	int 			op; @@ -818,12 +826,22 @@ extern const char *__start___trace_bprintk_fmt[];  extern const char *__stop___trace_bprintk_fmt[];  #undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, id, tstruct, print)		\ +#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)	\  	extern struct ftrace_event_call					\  	__attribute__((__aligned__(4))) event_##call;  #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print)		\ -	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) +#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter)	\ +	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ +		     filter)  #include "trace_entries.h" +#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_FUNCTION_TRACER +int perf_ftrace_event_register(struct ftrace_event_call *call, +			       enum trace_reg type, void *data); +#else +#define perf_ftrace_event_register NULL +#endif /* CONFIG_FUNCTION_TRACER */ +#endif /* CONFIG_PERF_EVENTS */ +  #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 93365907f219..d91eb0541b3a 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -55,7 +55,7 @@  /*   * Function trace entry - function address and parent function address:   */ -FTRACE_ENTRY(function, ftrace_entry, +FTRACE_ENTRY_REG(function, ftrace_entry,  	TRACE_FN, @@ -64,7 +64,11 @@ FTRACE_ENTRY(function, ftrace_entry,  		__field(	unsigned long,	parent_ip	)  	), -	F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) +	F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip), + +	FILTER_TRACE_FN, + +	perf_ftrace_event_register  );  /* Function call entry */ @@ -78,7 +82,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,  		__field_desc(	int,		graph_ent,	depth		)  	), -	F_printk("--> %lx (%d)", __entry->func, __entry->depth) +	F_printk("--> %lx (%d)", __entry->func, __entry->depth), + +	FILTER_OTHER  );  /* Function return entry */ @@ -98,7 +104,9 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,  	F_printk("<-- %lx (%d) (start: %llx  end: %llx) over: %d",  		 __entry->func, __entry->depth,  		 __entry->calltime, __entry->rettime, -		 __entry->depth) +		 __entry->depth), + +	FILTER_OTHER  );  /* @@ -127,8 +135,9 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry,  	F_printk("%u:%u:%u  ==> %u:%u:%u [%03u]",  		 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,  		 __entry->next_pid, __entry->next_prio, __entry->next_state, -		 __entry->next_cpu -		) +		 __entry->next_cpu), + +	FILTER_OTHER  );  /* @@ -146,8 +155,9 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,  	F_printk("%u:%u:%u  ==+ %u:%u:%u [%03u]",  		 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,  		 __entry->next_pid, __entry->next_prio, __entry->next_state, -		 __entry->next_cpu -		) +		 __entry->next_cpu), + +	FILTER_OTHER  );  /* @@ -169,7 +179,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry,  		 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",  		 __entry->caller[0], __entry->caller[1], __entry->caller[2],  		 __entry->caller[3], __entry->caller[4], __entry->caller[5], -		 __entry->caller[6], __entry->caller[7]) +		 __entry->caller[6], __entry->caller[7]), + +	FILTER_OTHER  );  FTRACE_ENTRY(user_stack, userstack_entry, @@ -185,7 +197,9 @@ FTRACE_ENTRY(user_stack, userstack_entry,  		 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",  		 __entry->caller[0], __entry->caller[1], __entry->caller[2],  		 __entry->caller[3], __entry->caller[4], __entry->caller[5], -		 __entry->caller[6], __entry->caller[7]) +		 __entry->caller[6], __entry->caller[7]), + +	FILTER_OTHER  );  /* @@ -202,7 +216,9 @@ FTRACE_ENTRY(bprint, bprint_entry,  	),  	F_printk("%08lx fmt:%p", -		 __entry->ip, __entry->fmt) +		 __entry->ip, __entry->fmt), + +	FILTER_OTHER  );  FTRACE_ENTRY(print, print_entry, @@ -215,7 +231,9 @@ FTRACE_ENTRY(print, print_entry,  	),  	F_printk("%08lx %s", -		 __entry->ip, __entry->buf) +		 __entry->ip, __entry->buf), + +	FILTER_OTHER  );  FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, @@ -234,7 +252,9 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,  	F_printk("%lx %lx %lx %d %x %x",  		 (unsigned long)__entry->phys, __entry->value, __entry->pc, -		 __entry->map_id, __entry->opcode, __entry->width) +		 __entry->map_id, __entry->opcode, __entry->width), + +	FILTER_OTHER  );  FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, @@ -252,7 +272,9 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,  	F_printk("%lx %lx %lx %d %x",  		 (unsigned long)__entry->phys, __entry->virt, __entry->len, -		 __entry->map_id, __entry->opcode) +		 __entry->map_id, __entry->opcode), + +	FILTER_OTHER  ); @@ -272,6 +294,8 @@ FTRACE_ENTRY(branch, trace_branch,  	F_printk("%u:%s:%s (%u)",  		 __entry->line, -		 __entry->func, __entry->file, __entry->correct) +		 __entry->func, __entry->file, __entry->correct), + +	FILTER_OTHER  ); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 19a359d5e6d5..fee3752ae8f6 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -24,6 +24,11 @@ static int	total_ref_count;  static int perf_trace_event_perm(struct ftrace_event_call *tp_event,  				 struct perf_event *p_event)  { +	/* The ftrace function trace is allowed only for root. */ +	if (ftrace_event_is_function(tp_event) && +	    perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) +		return -EPERM; +  	/* No tracing, just counting, so no obvious leak */  	if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))  		return 0; @@ -44,23 +49,17 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,  	return 0;  } -static int perf_trace_event_init(struct ftrace_event_call *tp_event, -				 struct perf_event *p_event) +static int perf_trace_event_reg(struct ftrace_event_call *tp_event, +				struct perf_event *p_event)  {  	struct hlist_head __percpu *list; -	int ret; +	int ret = -ENOMEM;  	int cpu; -	ret = perf_trace_event_perm(tp_event, p_event); -	if (ret) -		return ret; -  	p_event->tp_event = tp_event;  	if (tp_event->perf_refcount++ > 0)  		return 0; -	ret = -ENOMEM; -  	list = alloc_percpu(struct hlist_head);  	if (!list)  		goto fail; @@ -83,7 +82,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,  		}  	} -	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); +	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);  	if (ret)  		goto fail; @@ -108,6 +107,69 @@ fail:  	return ret;  } +static void perf_trace_event_unreg(struct perf_event *p_event) +{ +	struct ftrace_event_call *tp_event = p_event->tp_event; +	int i; + +	if (--tp_event->perf_refcount > 0) +		goto out; + +	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL); + +	/* +	 * Ensure our callback won't be called anymore. The buffers +	 * will be freed after that. +	 */ +	tracepoint_synchronize_unregister(); + +	free_percpu(tp_event->perf_events); +	tp_event->perf_events = NULL; + +	if (!--total_ref_count) { +		for (i = 0; i < PERF_NR_CONTEXTS; i++) { +			free_percpu(perf_trace_buf[i]); +			perf_trace_buf[i] = NULL; +		} +	} +out: +	module_put(tp_event->mod); +} + +static int perf_trace_event_open(struct perf_event *p_event) +{ +	struct ftrace_event_call *tp_event = p_event->tp_event; +	return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); +} + +static void perf_trace_event_close(struct perf_event *p_event) +{ +	struct ftrace_event_call *tp_event = p_event->tp_event; +	tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); +} + +static int perf_trace_event_init(struct ftrace_event_call *tp_event, +				 struct perf_event *p_event) +{ +	int ret; + +	ret = perf_trace_event_perm(tp_event, p_event); +	if (ret) +		return ret; + +	ret = perf_trace_event_reg(tp_event, p_event); +	if (ret) +		return ret; + +	ret = perf_trace_event_open(p_event); +	if (ret) { +		perf_trace_event_unreg(p_event); +		return ret; +	} + +	return 0; +} +  int perf_trace_init(struct perf_event *p_event)  {  	struct ftrace_event_call *tp_event; @@ -130,6 +192,14 @@ int perf_trace_init(struct perf_event *p_event)  	return ret;  } +void perf_trace_destroy(struct perf_event *p_event) +{ +	mutex_lock(&event_mutex); +	perf_trace_event_close(p_event); +	perf_trace_event_unreg(p_event); +	mutex_unlock(&event_mutex); +} +  int perf_trace_add(struct perf_event *p_event, int flags)  {  	struct ftrace_event_call *tp_event = p_event->tp_event; @@ -146,43 +216,14 @@ int perf_trace_add(struct perf_event *p_event, int flags)  	list = this_cpu_ptr(pcpu_list);  	hlist_add_head_rcu(&p_event->hlist_entry, list); -	return 0; +	return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);  }  void perf_trace_del(struct perf_event *p_event, int flags)  { -	hlist_del_rcu(&p_event->hlist_entry); -} - -void perf_trace_destroy(struct perf_event *p_event) -{  	struct ftrace_event_call *tp_event = p_event->tp_event; -	int i; - -	mutex_lock(&event_mutex); -	if (--tp_event->perf_refcount > 0) -		goto out; - -	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); - -	/* -	 * Ensure our callback won't be called anymore. The buffers -	 * will be freed after that. -	 */ -	tracepoint_synchronize_unregister(); - -	free_percpu(tp_event->perf_events); -	tp_event->perf_events = NULL; - -	if (!--total_ref_count) { -		for (i = 0; i < PERF_NR_CONTEXTS; i++) { -			free_percpu(perf_trace_buf[i]); -			perf_trace_buf[i] = NULL; -		} -	} -out: -	module_put(tp_event->mod); -	mutex_unlock(&event_mutex); +	hlist_del_rcu(&p_event->hlist_entry); +	tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);  }  __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, @@ -214,3 +255,86 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,  	return raw_data;  }  EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); + +#ifdef CONFIG_FUNCTION_TRACER +static void +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) +{ +	struct ftrace_entry *entry; +	struct hlist_head *head; +	struct pt_regs regs; +	int rctx; + +#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ +		    sizeof(u64)) - sizeof(u32)) + +	BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); + +	perf_fetch_caller_regs(®s); + +	entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); +	if (!entry) +		return; + +	entry->ip = ip; +	entry->parent_ip = parent_ip; + +	head = this_cpu_ptr(event_function.perf_events); +	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, +			      1, ®s, head); + +#undef ENTRY_SIZE +} + +static int perf_ftrace_function_register(struct perf_event *event) +{ +	struct ftrace_ops *ops = &event->ftrace_ops; + +	ops->flags |= FTRACE_OPS_FL_CONTROL; +	ops->func = perf_ftrace_function_call; +	return register_ftrace_function(ops); +} + +static int perf_ftrace_function_unregister(struct perf_event *event) +{ +	struct ftrace_ops *ops = &event->ftrace_ops; +	int ret = unregister_ftrace_function(ops); +	ftrace_free_filter(ops); +	return ret; +} + +static void perf_ftrace_function_enable(struct perf_event *event) +{ +	ftrace_function_local_enable(&event->ftrace_ops); +} + +static void perf_ftrace_function_disable(struct perf_event *event) +{ +	ftrace_function_local_disable(&event->ftrace_ops); +} + +int perf_ftrace_event_register(struct ftrace_event_call *call, +			       enum trace_reg type, void *data) +{ +	switch (type) { +	case TRACE_REG_REGISTER: +	case TRACE_REG_UNREGISTER: +		break; +	case TRACE_REG_PERF_REGISTER: +	case TRACE_REG_PERF_UNREGISTER: +		return 0; +	case TRACE_REG_PERF_OPEN: +		return perf_ftrace_function_register(data); +	case TRACE_REG_PERF_CLOSE: +		return perf_ftrace_function_unregister(data); +	case TRACE_REG_PERF_ADD: +		perf_ftrace_function_enable(data); +		return 0; +	case TRACE_REG_PERF_DEL: +		perf_ftrace_function_disable(data); +		return 0; +	} + +	return -EINVAL; +} +#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c212a7f934ec..079a93ae8a9d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -147,7 +147,8 @@ int trace_event_raw_init(struct ftrace_event_call *call)  }  EXPORT_SYMBOL_GPL(trace_event_raw_init); -int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) +int ftrace_event_reg(struct ftrace_event_call *call, +		     enum trace_reg type, void *data)  {  	switch (type) {  	case TRACE_REG_REGISTER: @@ -170,6 +171,11 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)  					    call->class->perf_probe,  					    call);  		return 0; +	case TRACE_REG_PERF_OPEN: +	case TRACE_REG_PERF_CLOSE: +	case TRACE_REG_PERF_ADD: +	case TRACE_REG_PERF_DEL: +		return 0;  #endif  	}  	return 0; @@ -209,7 +215,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,  				tracing_stop_cmdline_record();  				call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;  			} -			call->class->reg(call, TRACE_REG_UNREGISTER); +			call->class->reg(call, TRACE_REG_UNREGISTER, NULL);  		}  		break;  	case 1: @@ -218,7 +224,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,  				tracing_start_cmdline_record();  				call->flags |= TRACE_EVENT_FL_RECORDED_CMD;  			} -			ret = call->class->reg(call, TRACE_REG_REGISTER); +			ret = call->class->reg(call, TRACE_REG_REGISTER, NULL);  			if (ret) {  				tracing_stop_cmdline_record();  				pr_info("event trace: Could not enable event " diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 76afaee99dbc..431dba8b7542 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -81,6 +81,7 @@ enum {  	FILT_ERR_TOO_MANY_PREDS,  	FILT_ERR_MISSING_FIELD,  	FILT_ERR_INVALID_FILTER, +	FILT_ERR_IP_FIELD_ONLY,  };  static char *err_text[] = { @@ -96,6 +97,7 @@ static char *err_text[] = {  	"Too many terms in predicate expression",  	"Missing field name and/or value",  	"Meaningless filter expression", +	"Only 'ip' field is supported for function trace",  };  struct opstack_op { @@ -899,6 +901,11 @@ int filter_assign_type(const char *type)  	return FILTER_OTHER;  } +static bool is_function_field(struct ftrace_event_field *field) +{ +	return field->filter_type == FILTER_TRACE_FN; +} +  static bool is_string_field(struct ftrace_event_field *field)  {  	return field->filter_type == FILTER_DYN_STRING || @@ -986,6 +993,11 @@ static int init_pred(struct filter_parse_state *ps,  			fn = filter_pred_strloc;  		else  			fn = filter_pred_pchar; +	} else if (is_function_field(field)) { +		if (strcmp(field->name, "ip")) { +			parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0); +			return -EINVAL; +		}  	} else {  		if (field->is_signed)  			ret = strict_strtoll(pred->regex.pattern, 0, &val); @@ -1333,10 +1345,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps,  	strcpy(pred.regex.pattern, operand2);  	pred.regex.len = strlen(pred.regex.pattern); - -#ifdef CONFIG_FTRACE_STARTUP_TEST  	pred.field = field; -#endif  	return init_pred(ps, field, &pred) ? NULL : &pred;  } @@ -1949,6 +1958,148 @@ void ftrace_profile_free_filter(struct perf_event *event)  	__free_filter(filter);  } +struct function_filter_data { +	struct ftrace_ops *ops; +	int first_filter; +	int first_notrace; +}; + +#ifdef CONFIG_FUNCTION_TRACER +static char ** +ftrace_function_filter_re(char *buf, int len, int *count) +{ +	char *str, *sep, **re; + +	str = kstrndup(buf, len, GFP_KERNEL); +	if (!str) +		return NULL; + +	/* +	 * The argv_split function takes white space +	 * as a separator, so convert ',' into spaces. +	 */ +	while ((sep = strchr(str, ','))) +		*sep = ' '; + +	re = argv_split(GFP_KERNEL, str, count); +	kfree(str); +	return re; +} + +static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter, +				      int reset, char *re, int len) +{ +	int ret; + +	if (filter) +		ret = ftrace_set_filter(ops, re, len, reset); +	else +		ret = ftrace_set_notrace(ops, re, len, reset); + +	return ret; +} + +static int __ftrace_function_set_filter(int filter, char *buf, int len, +					struct function_filter_data *data) +{ +	int i, re_cnt, ret; +	int *reset; +	char **re; + +	reset = filter ? &data->first_filter : &data->first_notrace; + +	/* +	 * The 'ip' field could have multiple filters set, separated +	 * either by space or comma. We first cut the filter and apply +	 * all pieces separatelly. +	 */ +	re = ftrace_function_filter_re(buf, len, &re_cnt); +	if (!re) +		return -EINVAL; + +	for (i = 0; i < re_cnt; i++) { +		ret = ftrace_function_set_regexp(data->ops, filter, *reset, +						 re[i], strlen(re[i])); +		if (ret) +			break; + +		if (*reset) +			*reset = 0; +	} + +	argv_free(re); +	return ret; +} + +static int ftrace_function_check_pred(struct filter_pred *pred, int leaf) +{ +	struct ftrace_event_field *field = pred->field; + +	if (leaf) { +		/* +		 * Check the leaf predicate for function trace, verify: +		 *  - only '==' and '!=' is used +		 *  - the 'ip' field is used +		 */ +		if ((pred->op != OP_EQ) && (pred->op != OP_NE)) +			return -EINVAL; + +		if (strcmp(field->name, "ip")) +			return -EINVAL; +	} else { +		/* +		 * Check the non leaf predicate for function trace, verify: +		 *  - only '||' is used +		*/ +		if (pred->op != OP_OR) +			return -EINVAL; +	} + +	return 0; +} + +static int ftrace_function_set_filter_cb(enum move_type move, +					 struct filter_pred *pred, +					 int *err, void *data) +{ +	/* Checking the node is valid for function trace. */ +	if ((move != MOVE_DOWN) || +	    (pred->left != FILTER_PRED_INVALID)) { +		*err = ftrace_function_check_pred(pred, 0); +	} else { +		*err = ftrace_function_check_pred(pred, 1); +		if (*err) +			return WALK_PRED_ABORT; + +		*err = __ftrace_function_set_filter(pred->op == OP_EQ, +						    pred->regex.pattern, +						    pred->regex.len, +						    data); +	} + +	return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT; +} + +static int ftrace_function_set_filter(struct perf_event *event, +				      struct event_filter *filter) +{ +	struct function_filter_data data = { +		.first_filter  = 1, +		.first_notrace = 1, +		.ops           = &event->ftrace_ops, +	}; + +	return walk_pred_tree(filter->preds, filter->root, +			      ftrace_function_set_filter_cb, &data); +} +#else +static int ftrace_function_set_filter(struct perf_event *event, +				      struct event_filter *filter) +{ +	return -ENODEV; +} +#endif /* CONFIG_FUNCTION_TRACER */ +  int ftrace_profile_set_filter(struct perf_event *event, int event_id,  			      char *filter_str)  { @@ -1969,9 +2120,16 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,  		goto out_unlock;  	err = create_filter(call, filter_str, false, &filter); -	if (!err) -		event->filter = filter; +	if (err) +		goto free_filter; + +	if (ftrace_event_is_function(call)) +		err = ftrace_function_set_filter(event, filter);  	else +		event->filter = filter; + +free_filter: +	if (err || ftrace_event_is_function(call))  		__free_filter(filter);  out_unlock: diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index bbeec31e0ae3..7b46c9bd22ae 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -18,6 +18,16 @@  #undef TRACE_SYSTEM  #define TRACE_SYSTEM	ftrace +/* + * The FTRACE_ENTRY_REG macro allows ftrace entry to define register + * function and thus become accesible via perf. + */ +#undef FTRACE_ENTRY_REG +#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ +			 filter, regfn) \ +	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ +		     filter) +  /* not needed for this file */  #undef __field_struct  #define __field_struct(type, item) @@ -44,21 +54,22 @@  #define F_printk(fmt, args...) fmt, args  #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)	\ -struct ____ftrace_##name {					\ -	tstruct							\ -};								\ -static void __always_unused ____ftrace_check_##name(void)	\ -{								\ -	struct ____ftrace_##name *__entry = NULL;		\ -								\ -	/* force compile-time check on F_printk() */		\ -	printk(print);						\ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)	\ +struct ____ftrace_##name {						\ +	tstruct								\ +};									\ +static void __always_unused ____ftrace_check_##name(void)		\ +{									\ +	struct ____ftrace_##name *__entry = NULL;			\ +									\ +	/* force compile-time check on F_printk() */			\ +	printk(print);							\  }  #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print)	\ -	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) +#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter)	\ +	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ +		     filter)  #include "trace_entries.h" @@ -67,7 +78,7 @@ static void __always_unused ____ftrace_check_##name(void)	\  	ret = trace_define_field(event_call, #type, #item,		\  				 offsetof(typeof(field), item),		\  				 sizeof(field.item),			\ -				 is_signed_type(type), FILTER_OTHER);	\ +				 is_signed_type(type), filter_type);	\  	if (ret)							\  		return ret; @@ -77,7 +88,7 @@ static void __always_unused ____ftrace_check_##name(void)	\  				 offsetof(typeof(field),		\  					  container.item),		\  				 sizeof(field.container.item),		\ -				 is_signed_type(type), FILTER_OTHER);	\ +				 is_signed_type(type), filter_type);	\  	if (ret)							\  		return ret; @@ -91,7 +102,7 @@ static void __always_unused ____ftrace_check_##name(void)	\  		ret = trace_define_field(event_call, event_storage, #item, \  				 offsetof(typeof(field), item),		\  				 sizeof(field.item),			\ -				 is_signed_type(type), FILTER_OTHER);	\ +				 is_signed_type(type), filter_type);	\  		mutex_unlock(&event_storage_mutex);			\  		if (ret)						\  			return ret;					\ @@ -104,7 +115,7 @@ static void __always_unused ____ftrace_check_##name(void)	\  				 offsetof(typeof(field),		\  					  container.item),		\  				 sizeof(field.container.item),		\ -				 is_signed_type(type), FILTER_OTHER);	\ +				 is_signed_type(type), filter_type);	\  	if (ret)							\  		return ret; @@ -112,17 +123,18 @@ static void __always_unused ____ftrace_check_##name(void)	\  #define __dynamic_array(type, item)					\  	ret = trace_define_field(event_call, #type, #item,		\  				 offsetof(typeof(field), item),		\ -				 0, is_signed_type(type), FILTER_OTHER);\ +				 0, is_signed_type(type), filter_type);\  	if (ret)							\  		return ret;  #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)		\ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)	\  int									\  ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\  {									\  	struct struct_name field;					\  	int ret;							\ +	int filter_type = filter;					\  									\  	tstruct;							\  									\ @@ -152,13 +164,15 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\  #undef F_printk  #define F_printk(fmt, args...) #fmt ", "  __stringify(args) -#undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print)		\ +#undef FTRACE_ENTRY_REG +#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ +			 regfn)						\  									\  struct ftrace_event_class event_class_ftrace_##call = {			\  	.system			= __stringify(TRACE_SYSTEM),		\  	.define_fields		= ftrace_define_fields_##call,		\  	.fields			= LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ +	.reg			= regfn,				\  };									\  									\  struct ftrace_event_call __used event_##call = {			\ @@ -170,4 +184,14 @@ struct ftrace_event_call __used event_##call = {			\  struct ftrace_event_call __used						\  __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter)	\ +	FTRACE_ENTRY_REG(call, struct_name, etype,			\ +			 PARAMS(tstruct), PARAMS(print), filter, NULL) + +int ftrace_event_is_function(struct ftrace_event_call *call) +{ +	return call == &event_function; +} +  #include "trace_entries.h" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 00d527c945a4..580a05ec926b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1892,7 +1892,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,  #endif	/* CONFIG_PERF_EVENTS */  static __kprobes -int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) +int kprobe_register(struct ftrace_event_call *event, +		    enum trace_reg type, void *data)  {  	struct trace_probe *tp = (struct trace_probe *)event->data; @@ -1909,6 +1910,11 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)  	case TRACE_REG_PERF_UNREGISTER:  		disable_trace_probe(tp, TP_FLAG_PROFILE);  		return 0; +	case TRACE_REG_PERF_OPEN: +	case TRACE_REG_PERF_CLOSE: +	case TRACE_REG_PERF_ADD: +	case TRACE_REG_PERF_DEL: +		return 0;  #endif  	}  	return 0; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 0d6ff3555942..c5a01873567d 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,  	unsigned long mask;  	const char *str;  	const char *ret = p->buffer + p->len; -	int i; +	int i, first = 1;  	for (i = 0;  flag_array[i].name && flags; i++) { @@ -310,14 +310,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,  		str = flag_array[i].name;  		flags &= ~mask; -		if (p->len && delim) +		if (!first && delim)  			trace_seq_puts(p, delim); +		else +			first = 0;  		trace_seq_puts(p, str);  	}  	/* check for left over flags */  	if (flags) { -		if (p->len && delim) +		if (!first && delim)  			trace_seq_puts(p, delim);  		trace_seq_printf(p, "0x%lx", flags);  	} @@ -344,7 +346,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,  		break;  	} -	if (!p->len) +	if (ret == (const char *)(p->buffer + p->len))  		trace_seq_printf(p, "0x%lx", val);  	trace_seq_putc(p, 0); @@ -370,7 +372,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,  		break;  	} -	if (!p->len) +	if (ret == (const char *)(p->buffer + p->len))  		trace_seq_printf(p, "0x%llx", val);  	trace_seq_putc(p, 0); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 43500153dd1e..96fc73369099 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -17,9 +17,9 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);  static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);  static int syscall_enter_register(struct ftrace_event_call *event, -				 enum trace_reg type); +				 enum trace_reg type, void *data);  static int syscall_exit_register(struct ftrace_event_call *event, -				 enum trace_reg type); +				 enum trace_reg type, void *data);  static int syscall_enter_define_fields(struct ftrace_event_call *call);  static int syscall_exit_define_fields(struct ftrace_event_call *call); @@ -649,7 +649,7 @@ void perf_sysexit_disable(struct ftrace_event_call *call)  #endif /* CONFIG_PERF_EVENTS */  static int syscall_enter_register(struct ftrace_event_call *event, -				 enum trace_reg type) +				 enum trace_reg type, void *data)  {  	switch (type) {  	case TRACE_REG_REGISTER: @@ -664,13 +664,18 @@ static int syscall_enter_register(struct ftrace_event_call *event,  	case TRACE_REG_PERF_UNREGISTER:  		perf_sysenter_disable(event);  		return 0; +	case TRACE_REG_PERF_OPEN: +	case TRACE_REG_PERF_CLOSE: +	case TRACE_REG_PERF_ADD: +	case TRACE_REG_PERF_DEL: +		return 0;  #endif  	}  	return 0;  }  static int syscall_exit_register(struct ftrace_event_call *event, -				 enum trace_reg type) +				 enum trace_reg type, void *data)  {  	switch (type) {  	case TRACE_REG_REGISTER: @@ -685,6 +690,11 @@ static int syscall_exit_register(struct ftrace_event_call *event,  	case TRACE_REG_PERF_UNREGISTER:  		perf_sysexit_disable(event);  		return 0; +	case TRACE_REG_PERF_OPEN: +	case TRACE_REG_PERF_CLOSE: +	case TRACE_REG_PERF_ADD: +	case TRACE_REG_PERF_DEL: +		return 0;  #endif  	}  	return 0; | 
