diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup_freezer.c | 5 | ||||
| -rw-r--r-- | kernel/fork.c | 3 | ||||
| -rw-r--r-- | kernel/hw_breakpoint.c | 196 | ||||
| -rw-r--r-- | kernel/kprobes.c | 132 | ||||
| -rw-r--r-- | kernel/lockdep.c | 4 | ||||
| -rw-r--r-- | kernel/perf_event.c | 379 | ||||
| -rw-r--r-- | kernel/ptrace.c | 1 | ||||
| -rw-r--r-- | kernel/sched.c | 61 | ||||
| -rw-r--r-- | kernel/trace/Kconfig | 11 | ||||
| -rw-r--r-- | kernel/trace/Makefile | 1 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 20 | ||||
| -rw-r--r-- | kernel/trace/trace_entries.h | 12 | ||||
| -rw-r--r-- | kernel/trace/trace_events_filter.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_hw_branches.c | 312 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 535 | ||||
| -rw-r--r-- | kernel/trace/trace_ksym.c | 26 | ||||
| -rw-r--r-- | kernel/trace/trace_selftest.c | 57 | ||||
| -rw-r--r-- | kernel/workqueue.c | 2 | 
18 files changed, 849 insertions, 910 deletions
| diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index da5e13975531..e5c0244962b0 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -205,9 +205,12 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)  	 * No lock is needed, since the task isn't on tasklist yet,  	 * so it can't be moved to another cgroup, which means the  	 * freezer won't be removed and will be valid during this -	 * function call. +	 * function call.  Nevertheless, apply RCU read-side critical +	 * section to suppress RCU lockdep false positives.  	 */ +	rcu_read_lock();  	freezer = task_freezer(task); +	rcu_read_unlock();  	/*  	 * The root cgroup is non-freezable, so we can skip the diff --git a/kernel/fork.c b/kernel/fork.c index 44b0791b0a2e..5d3592deaf71 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1111,9 +1111,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,  	p->memcg_batch.do_batch = 0;  	p->memcg_batch.memcg = NULL;  #endif - -	p->bts = NULL; -  	p->stack_start = stack_start;  	/* Perform scheduler related setup. Assign this task to a CPU. */ diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 03808ed342a6..7a56b22e0602 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -40,23 +40,29 @@  #include <linux/percpu.h>  #include <linux/sched.h>  #include <linux/init.h> +#include <linux/slab.h>  #include <linux/cpu.h>  #include <linux/smp.h>  #include <linux/hw_breakpoint.h> +  /*   * Constraints data   */  /* Number of pinned cpu breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);  /* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); +static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);  /* Number of non-pinned cpu/task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); + +static int nr_slots[TYPE_MAX]; + +static int constraints_initialized;  /* Gather the number of total pinned and un-pinned bp in a cpuset */  struct bp_busy_slots { @@ -67,16 +73,29 @@ struct bp_busy_slots {  /* Serialize accesses to the above constraints */  static DEFINE_MUTEX(nr_bp_mutex); +__weak int hw_breakpoint_weight(struct perf_event *bp) +{ +	return 1; +} + +static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) +{ +	if (bp->attr.bp_type & HW_BREAKPOINT_RW) +		return TYPE_DATA; + +	return TYPE_INST; +} +  /*   * Report the maximum number of pinned breakpoints a task   * have in this cpu   */ -static unsigned int max_task_bp_pinned(int cpu) +static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)  {  	int i; -	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); +	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); -	for (i = HBP_NUM -1; i >= 0; i--) { +	for (i = nr_slots[type] - 1; i >= 0; i--) {  		if (tsk_pinned[i] > 0)  			return i + 1;  	} @@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu)  	return 0;  } -static int task_bp_pinned(struct task_struct *tsk) +static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)  {  	struct perf_event_context *ctx = tsk->perf_event_ctxp;  	struct list_head *list; @@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk)  	 */  	list_for_each_entry(bp, list, event_entry) {  		if (bp->attr.type == PERF_TYPE_BREAKPOINT) -			count++; +			if (find_slot_idx(bp) == type) +				count += hw_breakpoint_weight(bp);  	}  	raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk)   * a given cpu (cpu > -1) or in all of them (cpu = -1).   */  static void -fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) +fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, +		    enum bp_type_idx type)  {  	int cpu = bp->cpu;  	struct task_struct *tsk = bp->ctx->task;  	if (cpu >= 0) { -		slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); +		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);  		if (!tsk) -			slots->pinned += max_task_bp_pinned(cpu); +			slots->pinned += max_task_bp_pinned(cpu, type);  		else -			slots->pinned += task_bp_pinned(tsk); -		slots->flexible = per_cpu(nr_bp_flexible, cpu); +			slots->pinned += task_bp_pinned(tsk, type); +		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);  		return;  	} @@ -137,16 +158,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)  	for_each_online_cpu(cpu) {  		unsigned int nr; -		nr = per_cpu(nr_cpu_bp_pinned, cpu); +		nr = per_cpu(nr_cpu_bp_pinned[type], cpu);  		if (!tsk) -			nr += max_task_bp_pinned(cpu); +			nr += max_task_bp_pinned(cpu, type);  		else -			nr += task_bp_pinned(tsk); +			nr += task_bp_pinned(tsk, type);  		if (nr > slots->pinned)  			slots->pinned = nr; -		nr = per_cpu(nr_bp_flexible, cpu); +		nr = per_cpu(nr_bp_flexible[type], cpu);  		if (nr > slots->flexible)  			slots->flexible = nr; @@ -154,31 +175,49 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)  }  /* + * For now, continue to consider flexible as pinned, until we can + * ensure no flexible event can ever be scheduled before a pinned event + * in a same cpu. + */ +static void +fetch_this_slot(struct bp_busy_slots *slots, int weight) +{ +	slots->pinned += weight; +} + +/*   * Add a pinned breakpoint for the given task in our constraint table   */ -static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, +				enum bp_type_idx type, int weight)  {  	unsigned int *tsk_pinned; -	int count = 0; +	int old_count = 0; +	int old_idx = 0; +	int idx = 0; -	count = task_bp_pinned(tsk); +	old_count = task_bp_pinned(tsk, type); +	old_idx = old_count - 1; +	idx = old_idx + weight; -	tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); +	tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);  	if (enable) { -		tsk_pinned[count]++; -		if (count > 0) -			tsk_pinned[count-1]--; +		tsk_pinned[idx]++; +		if (old_count > 0) +			tsk_pinned[old_idx]--;  	} else { -		tsk_pinned[count]--; -		if (count > 0) -			tsk_pinned[count-1]++; +		tsk_pinned[idx]--; +		if (old_count > 0) +			tsk_pinned[old_idx]++;  	}  }  /*   * Add/remove the given breakpoint in our constraint table   */ -static void toggle_bp_slot(struct perf_event *bp, bool enable) +static void +toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, +	       int weight)  {  	int cpu = bp->cpu;  	struct task_struct *tsk = bp->ctx->task; @@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)  	/* Pinned counter task profiling */  	if (tsk) {  		if (cpu >= 0) { -			toggle_bp_task_slot(tsk, cpu, enable); +			toggle_bp_task_slot(tsk, cpu, enable, type, weight);  			return;  		}  		for_each_online_cpu(cpu) -			toggle_bp_task_slot(tsk, cpu, enable); +			toggle_bp_task_slot(tsk, cpu, enable, type, weight);  		return;  	}  	/* Pinned counter cpu profiling */  	if (enable) -		per_cpu(nr_cpu_bp_pinned, bp->cpu)++; +		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;  	else -		per_cpu(nr_cpu_bp_pinned, bp->cpu)--; +		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;  }  /* @@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)  static int __reserve_bp_slot(struct perf_event *bp)  {  	struct bp_busy_slots slots = {0}; +	enum bp_type_idx type; +	int weight; -	fetch_bp_busy_slots(&slots, bp); +	/* We couldn't initialize breakpoint constraints on boot */ +	if (!constraints_initialized) +		return -ENOMEM; + +	/* Basic checks */ +	if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || +	    bp->attr.bp_type == HW_BREAKPOINT_INVALID) +		return -EINVAL; + +	type = find_slot_idx(bp); +	weight = hw_breakpoint_weight(bp); + +	fetch_bp_busy_slots(&slots, bp, type); +	fetch_this_slot(&slots, weight);  	/* Flexible counters need to keep at least one slot */ -	if (slots.pinned + (!!slots.flexible) == HBP_NUM) +	if (slots.pinned + (!!slots.flexible) > nr_slots[type])  		return -ENOSPC; -	toggle_bp_slot(bp, true); +	toggle_bp_slot(bp, true, type, weight);  	return 0;  } @@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp)  static void __release_bp_slot(struct perf_event *bp)  { -	toggle_bp_slot(bp, false); +	enum bp_type_idx type; +	int weight; + +	type = find_slot_idx(bp); +	weight = hw_breakpoint_weight(bp); +	toggle_bp_slot(bp, false, type, weight);  }  void release_bp_slot(struct perf_event *bp) @@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp)  	return 0;  } +static int validate_hw_breakpoint(struct perf_event *bp) +{ +	int ret; + +	ret = arch_validate_hwbkpt_settings(bp); +	if (ret) +		return ret; + +	if (arch_check_bp_in_kernelspace(bp)) { +		if (bp->attr.exclude_kernel) +			return -EINVAL; +		/* +		 * Don't let unprivileged users set a breakpoint in the trap +		 * path to avoid trap recursion attacks. +		 */ +		if (!capable(CAP_SYS_ADMIN)) +			return -EPERM; +	} + +	return 0; +} +  int register_perf_hw_breakpoint(struct perf_event *bp)  {  	int ret; @@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)  	if (ret)  		return ret; -	/* -	 * Ptrace breakpoints can be temporary perf events only -	 * meant to reserve a slot. In this case, it is created disabled and -	 * we don't want to check the params right now (as we put a null addr) -	 * But perf tools create events as disabled and we want to check -	 * the params for them. -	 * This is a quick hack that will be removed soon, once we remove -	 * the tmp breakpoints from ptrace -	 */ -	if (!bp->attr.disabled || !bp->overflow_handler) -		ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); +	ret = validate_hw_breakpoint(bp);  	/* if arch_validate_hwbkpt_settings() fails then release bp slot */  	if (ret) @@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att  	if (attr->disabled)  		goto end; -	err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); +	err = validate_hw_breakpoint(bp);  	if (!err)  		perf_event_enable(bp); @@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {  static int __init init_hw_breakpoint(void)  { +	unsigned int **task_bp_pinned; +	int cpu, err_cpu; +	int i; + +	for (i = 0; i < TYPE_MAX; i++) +		nr_slots[i] = hw_breakpoint_slots(i); + +	for_each_possible_cpu(cpu) { +		for (i = 0; i < TYPE_MAX; i++) { +			task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); +			*task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], +						  GFP_KERNEL); +			if (!*task_bp_pinned) +				goto err_alloc; +		} +	} + +	constraints_initialized = 1; +  	return register_die_notifier(&hw_breakpoint_exceptions_nb); + + err_alloc: +	for_each_possible_cpu(err_cpu) { +		if (err_cpu == cpu) +			break; +		for (i = 0; i < TYPE_MAX; i++) +			kfree(per_cpu(nr_task_bp_pinned[i], cpu)); +	} + +	return -ENOMEM;  }  core_initcall(init_hw_breakpoint); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 0ed46f3e51e9..282035f3ae96 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1588,6 +1588,72 @@ static void __kprobes kill_kprobe(struct kprobe *p)  	arch_remove_kprobe(p);  } +/* Disable one kprobe */ +int __kprobes disable_kprobe(struct kprobe *kp) +{ +	int ret = 0; +	struct kprobe *p; + +	mutex_lock(&kprobe_mutex); + +	/* Check whether specified probe is valid. */ +	p = __get_valid_kprobe(kp); +	if (unlikely(p == NULL)) { +		ret = -EINVAL; +		goto out; +	} + +	/* If the probe is already disabled (or gone), just return */ +	if (kprobe_disabled(kp)) +		goto out; + +	kp->flags |= KPROBE_FLAG_DISABLED; +	if (p != kp) +		/* When kp != p, p is always enabled. */ +		try_to_disable_aggr_kprobe(p); + +	if (!kprobes_all_disarmed && kprobe_disabled(p)) +		disarm_kprobe(p); +out: +	mutex_unlock(&kprobe_mutex); +	return ret; +} +EXPORT_SYMBOL_GPL(disable_kprobe); + +/* Enable one kprobe */ +int __kprobes enable_kprobe(struct kprobe *kp) +{ +	int ret = 0; +	struct kprobe *p; + +	mutex_lock(&kprobe_mutex); + +	/* Check whether specified probe is valid. */ +	p = __get_valid_kprobe(kp); +	if (unlikely(p == NULL)) { +		ret = -EINVAL; +		goto out; +	} + +	if (kprobe_gone(kp)) { +		/* This kprobe has gone, we couldn't enable it. */ +		ret = -EINVAL; +		goto out; +	} + +	if (p != kp) +		kp->flags &= ~KPROBE_FLAG_DISABLED; + +	if (!kprobes_all_disarmed && kprobe_disabled(p)) { +		p->flags &= ~KPROBE_FLAG_DISABLED; +		arm_kprobe(p); +	} +out: +	mutex_unlock(&kprobe_mutex); +	return ret; +} +EXPORT_SYMBOL_GPL(enable_kprobe); +  void __kprobes dump_kprobe(struct kprobe *kp)  {  	printk(KERN_WARNING "Dumping kprobe:\n"); @@ -1805,72 +1871,6 @@ static const struct file_operations debugfs_kprobes_operations = {  	.release        = seq_release,  }; -/* Disable one kprobe */ -int __kprobes disable_kprobe(struct kprobe *kp) -{ -	int ret = 0; -	struct kprobe *p; - -	mutex_lock(&kprobe_mutex); - -	/* Check whether specified probe is valid. */ -	p = __get_valid_kprobe(kp); -	if (unlikely(p == NULL)) { -		ret = -EINVAL; -		goto out; -	} - -	/* If the probe is already disabled (or gone), just return */ -	if (kprobe_disabled(kp)) -		goto out; - -	kp->flags |= KPROBE_FLAG_DISABLED; -	if (p != kp) -		/* When kp != p, p is always enabled. */ -		try_to_disable_aggr_kprobe(p); - -	if (!kprobes_all_disarmed && kprobe_disabled(p)) -		disarm_kprobe(p); -out: -	mutex_unlock(&kprobe_mutex); -	return ret; -} -EXPORT_SYMBOL_GPL(disable_kprobe); - -/* Enable one kprobe */ -int __kprobes enable_kprobe(struct kprobe *kp) -{ -	int ret = 0; -	struct kprobe *p; - -	mutex_lock(&kprobe_mutex); - -	/* Check whether specified probe is valid. */ -	p = __get_valid_kprobe(kp); -	if (unlikely(p == NULL)) { -		ret = -EINVAL; -		goto out; -	} - -	if (kprobe_gone(kp)) { -		/* This kprobe has gone, we couldn't enable it. */ -		ret = -EINVAL; -		goto out; -	} - -	if (p != kp) -		kp->flags &= ~KPROBE_FLAG_DISABLED; - -	if (!kprobes_all_disarmed && kprobe_disabled(p)) { -		p->flags &= ~KPROBE_FLAG_DISABLED; -		arm_kprobe(p); -	} -out: -	mutex_unlock(&kprobe_mutex); -	return ret; -} -EXPORT_SYMBOL_GPL(enable_kprobe); -  static void __kprobes arm_all_kprobes(void)  {  	struct hlist_head *head; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 2594e1ce41cb..e9c759f06c1d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3227,7 +3227,7 @@ void lock_release(struct lockdep_map *lock, int nested,  	raw_local_irq_save(flags);  	check_flags(flags);  	current->lockdep_recursion = 1; -	trace_lock_release(lock, nested, ip); +	trace_lock_release(lock, ip);  	__lock_release(lock, nested, ip);  	current->lockdep_recursion = 0;  	raw_local_irq_restore(flags); @@ -3380,7 +3380,7 @@ found_it:  		hlock->holdtime_stamp = now;  	} -	trace_lock_acquired(lock, ip, waittime); +	trace_lock_acquired(lock, ip);  	stats = get_lock_stats(hlock_class(hlock));  	if (waittime) { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2f3fbf84215a..a4fa381db3c2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -16,6 +16,7 @@  #include <linux/file.h>  #include <linux/poll.h>  #include <linux/slab.h> +#include <linux/hash.h>  #include <linux/sysfs.h>  #include <linux/dcache.h>  #include <linux/percpu.h> @@ -82,14 +83,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)  void __weak hw_perf_disable(void)		{ barrier(); }  void __weak hw_perf_enable(void)		{ barrier(); } -int __weak -hw_perf_group_sched_in(struct perf_event *group_leader, -	       struct perf_cpu_context *cpuctx, -	       struct perf_event_context *ctx) -{ -	return 0; -} -  void __weak perf_event_print_debug(void)	{ }  static DEFINE_PER_CPU(int, perf_disable_count); @@ -262,6 +255,18 @@ static void update_event_times(struct perf_event *event)  	event->total_time_running = run_end - event->tstamp_running;  } +/* + * Update total_time_enabled and total_time_running for all events in a group. + */ +static void update_group_times(struct perf_event *leader) +{ +	struct perf_event *event; + +	update_event_times(leader); +	list_for_each_entry(event, &leader->sibling_list, group_entry) +		update_event_times(event); +} +  static struct list_head *  ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)  { @@ -315,8 +320,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)  static void  list_del_event(struct perf_event *event, struct perf_event_context *ctx)  { -	struct perf_event *sibling, *tmp; -  	if (list_empty(&event->group_entry))  		return;  	ctx->nr_events--; @@ -329,7 +332,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)  	if (event->group_leader != event)  		event->group_leader->nr_siblings--; -	update_event_times(event); +	update_group_times(event);  	/*  	 * If event was in error state, then keep it @@ -340,6 +343,12 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)  	 */  	if (event->state > PERF_EVENT_STATE_OFF)  		event->state = PERF_EVENT_STATE_OFF; +} + +static void +perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx) +{ +	struct perf_event *sibling, *tmp;  	/*  	 * If this was a group event with sibling events then @@ -505,18 +514,6 @@ retry:  }  /* - * Update total_time_enabled and total_time_running for all events in a group. - */ -static void update_group_times(struct perf_event *leader) -{ -	struct perf_event *event; - -	update_event_times(leader); -	list_for_each_entry(event, &leader->sibling_list, group_entry) -		update_event_times(event); -} - -/*   * Cross CPU call to disable a performance event   */  static void __perf_event_disable(void *info) @@ -640,15 +637,20 @@ group_sched_in(struct perf_event *group_event,  	       struct perf_cpu_context *cpuctx,  	       struct perf_event_context *ctx)  { -	struct perf_event *event, *partial_group; +	struct perf_event *event, *partial_group = NULL; +	const struct pmu *pmu = group_event->pmu; +	bool txn = false;  	int ret;  	if (group_event->state == PERF_EVENT_STATE_OFF)  		return 0; -	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); -	if (ret) -		return ret < 0 ? ret : 0; +	/* Check if group transaction availabe */ +	if (pmu->start_txn) +		txn = true; + +	if (txn) +		pmu->start_txn(pmu);  	if (event_sched_in(group_event, cpuctx, ctx))  		return -EAGAIN; @@ -663,9 +665,19 @@ group_sched_in(struct perf_event *group_event,  		}  	} -	return 0; +	if (!txn) +		return 0; + +	ret = pmu->commit_txn(pmu); +	if (!ret) { +		pmu->cancel_txn(pmu); +		return 0; +	}  group_error: +	if (txn) +		pmu->cancel_txn(pmu); +  	/*  	 * Groups can be scheduled in as one unit only, so undo any  	 * partial group before returning: @@ -1367,6 +1379,8 @@ void perf_event_task_sched_in(struct task_struct *task)  	if (cpuctx->task_ctx == ctx)  		return; +	perf_disable(); +  	/*  	 * We want to keep the following priority order:  	 * cpu pinned (that don't need to move), task pinned, @@ -1379,6 +1393,8 @@ void perf_event_task_sched_in(struct task_struct *task)  	ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);  	cpuctx->task_ctx = ctx; + +	perf_enable();  }  #define MAX_INTERRUPTS (~0ULL) @@ -1856,9 +1872,30 @@ int perf_event_release_kernel(struct perf_event *event)  {  	struct perf_event_context *ctx = event->ctx; +	/* +	 * Remove from the PMU, can't get re-enabled since we got +	 * here because the last ref went. +	 */ +	perf_event_disable(event); +  	WARN_ON_ONCE(ctx->parent_ctx); -	mutex_lock(&ctx->mutex); -	perf_event_remove_from_context(event); +	/* +	 * There are two ways this annotation is useful: +	 * +	 *  1) there is a lock recursion from perf_event_exit_task +	 *     see the comment there. +	 * +	 *  2) there is a lock-inversion with mmap_sem through +	 *     perf_event_read_group(), which takes faults while +	 *     holding ctx->mutex, however this is called after +	 *     the last filedesc died, so there is no possibility +	 *     to trigger the AB-BA case. +	 */ +	mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); +	raw_spin_lock_irq(&ctx->lock); +	list_del_event(event, ctx); +	perf_destroy_group(event, ctx); +	raw_spin_unlock_irq(&ctx->lock);  	mutex_unlock(&ctx->mutex);  	mutex_lock(&event->owner->perf_event_mutex); @@ -2642,6 +2679,7 @@ static int perf_fasync(int fd, struct file *filp, int on)  }  static const struct file_operations perf_fops = { +	.llseek			= no_llseek,  	.release		= perf_release,  	.read			= perf_read,  	.poll			= perf_poll, @@ -2792,6 +2830,27 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski  /* + * We assume there is only KVM supporting the callbacks. + * Later on, we might change it to a list if there is + * another virtualization implementation supporting the callbacks. + */ +struct perf_guest_info_callbacks *perf_guest_cbs; + +int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) +{ +	perf_guest_cbs = cbs; +	return 0; +} +EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); + +int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) +{ +	perf_guest_cbs = NULL; +	return 0; +} +EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); + +/*   * Output   */  static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, @@ -3743,7 +3802,7 @@ void __perf_event_mmap(struct vm_area_struct *vma)  		.event_id  = {  			.header = {  				.type = PERF_RECORD_MMAP, -				.misc = 0, +				.misc = PERF_RECORD_MISC_USER,  				/* .size */  			},  			/* .pid */ @@ -3961,36 +4020,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,  	perf_swevent_overflow(event, 0, nmi, data, regs);  } -static int perf_swevent_is_counting(struct perf_event *event) -{ -	/* -	 * The event is active, we're good! -	 */ -	if (event->state == PERF_EVENT_STATE_ACTIVE) -		return 1; - -	/* -	 * The event is off/error, not counting. -	 */ -	if (event->state != PERF_EVENT_STATE_INACTIVE) -		return 0; - -	/* -	 * The event is inactive, if the context is active -	 * we're part of a group that didn't make it on the 'pmu', -	 * not counting. -	 */ -	if (event->ctx->is_active) -		return 0; - -	/* -	 * We're inactive and the context is too, this means the -	 * task is scheduled out, we're counting events that happen -	 * to us, like migration events. -	 */ -	return 1; -} -  static int perf_tp_event_match(struct perf_event *event,  				struct perf_sample_data *data); @@ -4014,12 +4043,6 @@ static int perf_swevent_match(struct perf_event *event,  				struct perf_sample_data *data,  				struct pt_regs *regs)  { -	if (event->cpu != -1 && event->cpu != smp_processor_id()) -		return 0; - -	if (!perf_swevent_is_counting(event)) -		return 0; -  	if (event->attr.type != type)  		return 0; @@ -4036,18 +4059,53 @@ static int perf_swevent_match(struct perf_event *event,  	return 1;  } -static void perf_swevent_ctx_event(struct perf_event_context *ctx, -				     enum perf_type_id type, -				     u32 event_id, u64 nr, int nmi, -				     struct perf_sample_data *data, -				     struct pt_regs *regs) +static inline u64 swevent_hash(u64 type, u32 event_id) +{ +	u64 val = event_id | (type << 32); + +	return hash_64(val, SWEVENT_HLIST_BITS); +} + +static struct hlist_head * +find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id) +{ +	u64 hash; +	struct swevent_hlist *hlist; + +	hash = swevent_hash(type, event_id); + +	hlist = rcu_dereference(ctx->swevent_hlist); +	if (!hlist) +		return NULL; + +	return &hlist->heads[hash]; +} + +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, +				    u64 nr, int nmi, +				    struct perf_sample_data *data, +				    struct pt_regs *regs)  { +	struct perf_cpu_context *cpuctx;  	struct perf_event *event; +	struct hlist_node *node; +	struct hlist_head *head; -	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { +	cpuctx = &__get_cpu_var(perf_cpu_context); + +	rcu_read_lock(); + +	head = find_swevent_head(cpuctx, type, event_id); + +	if (!head) +		goto end; + +	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {  		if (perf_swevent_match(event, type, event_id, data, regs))  			perf_swevent_add(event, nr, nmi, data, regs);  	} +end: +	rcu_read_unlock();  }  int perf_swevent_get_recursion_context(void) @@ -4085,27 +4143,6 @@ void perf_swevent_put_recursion_context(int rctx)  }  EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, -				    u64 nr, int nmi, -				    struct perf_sample_data *data, -				    struct pt_regs *regs) -{ -	struct perf_cpu_context *cpuctx; -	struct perf_event_context *ctx; - -	cpuctx = &__get_cpu_var(perf_cpu_context); -	rcu_read_lock(); -	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, -				 nr, nmi, data, regs); -	/* -	 * doesn't really matter which of the child contexts the -	 * events ends up in. -	 */ -	ctx = rcu_dereference(current->perf_event_ctxp); -	if (ctx) -		perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); -	rcu_read_unlock(); -}  void __perf_sw_event(u32 event_id, u64 nr, int nmi,  			    struct pt_regs *regs, u64 addr) @@ -4131,16 +4168,28 @@ static void perf_swevent_read(struct perf_event *event)  static int perf_swevent_enable(struct perf_event *event)  {  	struct hw_perf_event *hwc = &event->hw; +	struct perf_cpu_context *cpuctx; +	struct hlist_head *head; + +	cpuctx = &__get_cpu_var(perf_cpu_context);  	if (hwc->sample_period) {  		hwc->last_period = hwc->sample_period;  		perf_swevent_set_period(event);  	} + +	head = find_swevent_head(cpuctx, event->attr.type, event->attr.config); +	if (WARN_ON_ONCE(!head)) +		return -EINVAL; + +	hlist_add_head_rcu(&event->hlist_entry, head); +  	return 0;  }  static void perf_swevent_disable(struct perf_event *event)  { +	hlist_del_rcu(&event->hlist_entry);  }  static const struct pmu perf_ops_generic = { @@ -4168,15 +4217,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)  	perf_sample_data_init(&data, 0);  	data.period = event->hw.last_period;  	regs = get_irq_regs(); -	/* -	 * In case we exclude kernel IPs or are somehow not in interrupt -	 * context, provide the next best thing, the user IP. -	 */ -	if ((event->attr.exclude_kernel || !regs) && -			!event->attr.exclude_user) -		regs = task_pt_regs(current); -	if (regs) { +	if (regs && !perf_exclude_event(event, regs)) {  		if (!(event->attr.exclude_idle && current->pid == 0))  			if (perf_event_overflow(event, 0, &data, regs))  				ret = HRTIMER_NORESTART; @@ -4324,6 +4366,105 @@ static const struct pmu perf_ops_task_clock = {  	.read		= task_clock_perf_event_read,  }; +static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) +{ +	struct swevent_hlist *hlist; + +	hlist = container_of(rcu_head, struct swevent_hlist, rcu_head); +	kfree(hlist); +} + +static void swevent_hlist_release(struct perf_cpu_context *cpuctx) +{ +	struct swevent_hlist *hlist; + +	if (!cpuctx->swevent_hlist) +		return; + +	hlist = cpuctx->swevent_hlist; +	rcu_assign_pointer(cpuctx->swevent_hlist, NULL); +	call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); +} + +static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) +{ +	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + +	mutex_lock(&cpuctx->hlist_mutex); + +	if (!--cpuctx->hlist_refcount) +		swevent_hlist_release(cpuctx); + +	mutex_unlock(&cpuctx->hlist_mutex); +} + +static void swevent_hlist_put(struct perf_event *event) +{ +	int cpu; + +	if (event->cpu != -1) { +		swevent_hlist_put_cpu(event, event->cpu); +		return; +	} + +	for_each_possible_cpu(cpu) +		swevent_hlist_put_cpu(event, cpu); +} + +static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) +{ +	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); +	int err = 0; + +	mutex_lock(&cpuctx->hlist_mutex); + +	if (!cpuctx->swevent_hlist && cpu_online(cpu)) { +		struct swevent_hlist *hlist; + +		hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); +		if (!hlist) { +			err = -ENOMEM; +			goto exit; +		} +		rcu_assign_pointer(cpuctx->swevent_hlist, hlist); +	} +	cpuctx->hlist_refcount++; + exit: +	mutex_unlock(&cpuctx->hlist_mutex); + +	return err; +} + +static int swevent_hlist_get(struct perf_event *event) +{ +	int err; +	int cpu, failed_cpu; + +	if (event->cpu != -1) +		return swevent_hlist_get_cpu(event, event->cpu); + +	get_online_cpus(); +	for_each_possible_cpu(cpu) { +		err = swevent_hlist_get_cpu(event, cpu); +		if (err) { +			failed_cpu = cpu; +			goto fail; +		} +	} +	put_online_cpus(); + +	return 0; + fail: +	for_each_possible_cpu(cpu) { +		if (cpu == failed_cpu) +			break; +		swevent_hlist_put_cpu(event, cpu); +	} + +	put_online_cpus(); +	return err; +} +  #ifdef CONFIG_EVENT_TRACING  void perf_tp_event(int event_id, u64 addr, u64 count, void *record, @@ -4357,10 +4498,13 @@ static int perf_tp_event_match(struct perf_event *event,  static void tp_perf_event_destroy(struct perf_event *event)  {  	perf_trace_disable(event->attr.config); +	swevent_hlist_put(event);  }  static const struct pmu *tp_perf_event_init(struct perf_event *event)  { +	int err; +  	/*  	 * Raw tracepoint data is a severe data leak, only allow root to  	 * have these. @@ -4374,6 +4518,11 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)  		return NULL;  	event->destroy = tp_perf_event_destroy; +	err = swevent_hlist_get(event); +	if (err) { +		perf_trace_disable(event->attr.config); +		return ERR_PTR(err); +	}  	return &perf_ops_generic;  } @@ -4474,6 +4623,7 @@ static void sw_perf_event_destroy(struct perf_event *event)  	WARN_ON(event->parent);  	atomic_dec(&perf_swevent_enabled[event_id]); +	swevent_hlist_put(event);  }  static const struct pmu *sw_perf_event_init(struct perf_event *event) @@ -4512,6 +4662,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)  	case PERF_COUNT_SW_ALIGNMENT_FAULTS:  	case PERF_COUNT_SW_EMULATION_FAULTS:  		if (!event->parent) { +			int err; + +			err = swevent_hlist_get(event); +			if (err) +				return ERR_PTR(err); +  			atomic_inc(&perf_swevent_enabled[event_id]);  			event->destroy = sw_perf_event_destroy;  		} @@ -4897,7 +5053,7 @@ err_fput_free_put_context:  err_free_put_context:  	if (err < 0) -		kfree(event); +		free_event(event);  err_put_context:  	if (err < 0) @@ -5176,7 +5332,7 @@ void perf_event_exit_task(struct task_struct *child)  	 *  	 * But since its the parent context it won't be the same instance.  	 */ -	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); +	mutex_lock(&child_ctx->mutex);  again:  	list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, @@ -5384,6 +5540,7 @@ static void __init perf_event_init_all_cpus(void)  	for_each_possible_cpu(cpu) {  		cpuctx = &per_cpu(perf_cpu_context, cpu); +		mutex_init(&cpuctx->hlist_mutex);  		__perf_event_init_context(&cpuctx->ctx, NULL);  	}  } @@ -5397,6 +5554,16 @@ static void __cpuinit perf_event_init_cpu(int cpu)  	spin_lock(&perf_resource_lock);  	cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;  	spin_unlock(&perf_resource_lock); + +	mutex_lock(&cpuctx->hlist_mutex); +	if (cpuctx->hlist_refcount > 0) { +		struct swevent_hlist *hlist; + +		hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); +		WARN_ON_ONCE(!hlist); +		rcu_assign_pointer(cpuctx->swevent_hlist, hlist); +	} +	mutex_unlock(&cpuctx->hlist_mutex);  }  #ifdef CONFIG_HOTPLUG_CPU @@ -5416,6 +5583,10 @@ static void perf_event_exit_cpu(int cpu)  	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);  	struct perf_event_context *ctx = &cpuctx->ctx; +	mutex_lock(&cpuctx->hlist_mutex); +	swevent_hlist_release(cpuctx); +	mutex_unlock(&cpuctx->hlist_mutex); +  	mutex_lock(&ctx->mutex);  	smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);  	mutex_unlock(&ctx->mutex); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 42ad8ae729a0..9fb51237b18c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -76,7 +76,6 @@ void __ptrace_unlink(struct task_struct *child)  	child->parent = child->real_parent;  	list_del_init(&child->ptrace_entry); -	arch_ptrace_untrace(child);  	if (task_is_traced(child))  		ptrace_untrace(child);  } diff --git a/kernel/sched.c b/kernel/sched.c index b531d7934083..78554dd0d1a4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -323,6 +323,15 @@ static inline struct task_group *task_group(struct task_struct *p)  /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */  static inline void set_task_rq(struct task_struct *p, unsigned int cpu)  { +	/* +	 * Strictly speaking this rcu_read_lock() is not needed since the +	 * task_group is tied to the cgroup, which in turn can never go away +	 * as long as there are tasks attached to it. +	 * +	 * However since task_group() uses task_subsys_state() which is an +	 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. +	 */ +	rcu_read_lock();  #ifdef CONFIG_FAIR_GROUP_SCHED  	p->se.cfs_rq = task_group(p)->cfs_rq[cpu];  	p->se.parent = task_group(p)->se[cpu]; @@ -332,6 +341,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)  	p->rt.rt_rq  = task_group(p)->rt_rq[cpu];  	p->rt.parent = task_group(p)->rt_se[cpu];  #endif +	rcu_read_unlock();  }  #else @@ -2058,49 +2068,6 @@ static bool migrate_task(struct task_struct *p, int dest_cpu)  }  /* - * wait_task_context_switch -	wait for a thread to complete at least one - *				context switch. - * - * @p must not be current. - */ -void wait_task_context_switch(struct task_struct *p) -{ -	unsigned long nvcsw, nivcsw, flags; -	int running; -	struct rq *rq; - -	nvcsw	= p->nvcsw; -	nivcsw	= p->nivcsw; -	for (;;) { -		/* -		 * The runqueue is assigned before the actual context -		 * switch. We need to take the runqueue lock. -		 * -		 * We could check initially without the lock but it is -		 * very likely that we need to take the lock in every -		 * iteration. -		 */ -		rq = task_rq_lock(p, &flags); -		running = task_running(rq, p); -		task_rq_unlock(rq, &flags); - -		if (likely(!running)) -			break; -		/* -		 * The switch count is incremented before the actual -		 * context switch. We thus wait for two switches to be -		 * sure at least one completed. -		 */ -		if ((p->nvcsw - nvcsw) > 1) -			break; -		if ((p->nivcsw - nivcsw) > 1) -			break; - -		cpu_relax(); -	} -} - -/*   * wait_task_inactive - wait for a thread to unschedule.   *   * If @match_state is nonzero, it's the @p->state value just checked and @@ -3724,7 +3691,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)  	 * the mutex owner just released it and exited.  	 */  	if (probe_kernel_address(&owner->cpu, cpu)) -		goto out; +		return 0;  #else  	cpu = owner->cpu;  #endif @@ -3734,14 +3701,14 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)  	 * the cpu field may no longer be valid.  	 */  	if (cpu >= nr_cpumask_bits) -		goto out; +		return 0;  	/*  	 * We need to validate that we can do a  	 * get_cpu() and that we have the percpu area.  	 */  	if (!cpu_online(cpu)) -		goto out; +		return 0;  	rq = cpu_rq(cpu); @@ -3760,7 +3727,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)  		cpu_relax();  	} -out: +  	return 1;  }  #endif diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 13e13d428cd3..8b1797c4545b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -44,9 +44,6 @@ config HAVE_FTRACE_MCOUNT_RECORD  	help  	  See Documentation/trace/ftrace-design.txt -config HAVE_HW_BRANCH_TRACER -	bool -  config HAVE_SYSCALL_TRACEPOINTS  	bool  	help @@ -374,14 +371,6 @@ config STACK_TRACER  	  Say N if unsure. -config HW_BRANCH_TRACER -	depends on HAVE_HW_BRANCH_TRACER -	bool "Trace hw branches" -	select GENERIC_TRACER -	help -	  This tracer records all branches on the system in a circular -	  buffer, giving access to the last N branches for each cpu. -  config KMEMTRACE  	bool "Trace SLAB allocations"  	select GENERIC_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 78edc6490038..ffb1a5b0550e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -41,7 +41,6 @@ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o  obj-$(CONFIG_BOOT_TRACER) += trace_boot.o  obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o  obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o -obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o  obj-$(CONFIG_KMEMTRACE) += kmemtrace.o  obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o  obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 40cd1718fb1b..2cd96399463f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -34,7 +34,6 @@ enum trace_type {  	TRACE_GRAPH_RET,  	TRACE_GRAPH_ENT,  	TRACE_USER_STACK, -	TRACE_HW_BRANCHES,  	TRACE_KMEM_ALLOC,  	TRACE_KMEM_FREE,  	TRACE_BLK, @@ -103,29 +102,17 @@ struct syscall_trace_exit {  	long			ret;  }; -struct kprobe_trace_entry { +struct kprobe_trace_entry_head {  	struct trace_entry	ent;  	unsigned long		ip; -	int			nargs; -	unsigned long		args[];  }; -#define SIZEOF_KPROBE_TRACE_ENTRY(n)			\ -	(offsetof(struct kprobe_trace_entry, args) +	\ -	(sizeof(unsigned long) * (n))) - -struct kretprobe_trace_entry { +struct kretprobe_trace_entry_head {  	struct trace_entry	ent;  	unsigned long		func;  	unsigned long		ret_ip; -	int			nargs; -	unsigned long		args[];  }; -#define SIZEOF_KRETPROBE_TRACE_ENTRY(n)			\ -	(offsetof(struct kretprobe_trace_entry, args) +	\ -	(sizeof(unsigned long) * (n))) -  /*   * trace_flag_type is an enumeration that holds different   * states when a trace occurs. These are: @@ -229,7 +216,6 @@ extern void __ftrace_bad_type(void);  			  TRACE_GRAPH_ENT);		\  		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\  			  TRACE_GRAPH_RET);		\ -		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\  		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\  			  TRACE_KMEM_ALLOC);	\  		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\ @@ -470,8 +456,6 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,  					       struct trace_array *tr);  extern int trace_selftest_startup_branch(struct tracer *trace,  					 struct trace_array *tr); -extern int trace_selftest_startup_hw_branches(struct tracer *trace, -					      struct trace_array *tr);  extern int trace_selftest_startup_ksym(struct tracer *trace,  					 struct trace_array *tr);  #endif /* CONFIG_FTRACE_STARTUP_TEST */ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index c16a08f399df..dc008c1240da 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -318,18 +318,6 @@ FTRACE_ENTRY(branch, trace_branch,  		 __entry->func, __entry->file, __entry->correct)  ); -FTRACE_ENTRY(hw_branch, hw_branch_entry, - -	TRACE_HW_BRANCHES, - -	F_STRUCT( -		__field(	u64,	from	) -		__field(	u64,	to	) -	), - -	F_printk("from: %llx to: %llx", __entry->from, __entry->to) -); -  FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,  	TRACE_KMEM_ALLOC, diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 239ea5d77d68..57bb1bb32999 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1400,7 +1400,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,  	}  	err = -EINVAL; -	if (!call) +	if (&call->list == &ftrace_events)  		goto out_unlock;  	err = -EEXIST; diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c deleted file mode 100644 index 7b97000745f5..000000000000 --- a/kernel/trace/trace_hw_branches.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - * h/w branch tracer for x86 based on BTS - * - * Copyright (C) 2008-2009 Intel Corporation. - * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 - */ -#include <linux/kallsyms.h> -#include <linux/debugfs.h> -#include <linux/ftrace.h> -#include <linux/module.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <linux/fs.h> - -#include <asm/ds.h> - -#include "trace_output.h" -#include "trace.h" - - -#define BTS_BUFFER_SIZE (1 << 13) - -static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer); -static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer); - -#define this_tracer per_cpu(hwb_tracer, smp_processor_id()) - -static int trace_hw_branches_enabled __read_mostly; -static int trace_hw_branches_suspended __read_mostly; -static struct trace_array *hw_branch_trace __read_mostly; - - -static void bts_trace_init_cpu(int cpu) -{ -	per_cpu(hwb_tracer, cpu) = -		ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu), -				   BTS_BUFFER_SIZE, NULL, (size_t)-1, -				   BTS_KERNEL); - -	if (IS_ERR(per_cpu(hwb_tracer, cpu))) -		per_cpu(hwb_tracer, cpu) = NULL; -} - -static int bts_trace_init(struct trace_array *tr) -{ -	int cpu; - -	hw_branch_trace = tr; -	trace_hw_branches_enabled = 0; - -	get_online_cpus(); -	for_each_online_cpu(cpu) { -		bts_trace_init_cpu(cpu); - -		if (likely(per_cpu(hwb_tracer, cpu))) -			trace_hw_branches_enabled = 1; -	} -	trace_hw_branches_suspended = 0; -	put_online_cpus(); - -	/* If we could not enable tracing on a single cpu, we fail. */ -	return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP; -} - -static void bts_trace_reset(struct trace_array *tr) -{ -	int cpu; - -	get_online_cpus(); -	for_each_online_cpu(cpu) { -		if (likely(per_cpu(hwb_tracer, cpu))) { -			ds_release_bts(per_cpu(hwb_tracer, cpu)); -			per_cpu(hwb_tracer, cpu) = NULL; -		} -	} -	trace_hw_branches_enabled = 0; -	trace_hw_branches_suspended = 0; -	put_online_cpus(); -} - -static void bts_trace_start(struct trace_array *tr) -{ -	int cpu; - -	get_online_cpus(); -	for_each_online_cpu(cpu) -		if (likely(per_cpu(hwb_tracer, cpu))) -			ds_resume_bts(per_cpu(hwb_tracer, cpu)); -	trace_hw_branches_suspended = 0; -	put_online_cpus(); -} - -static void bts_trace_stop(struct trace_array *tr) -{ -	int cpu; - -	get_online_cpus(); -	for_each_online_cpu(cpu) -		if (likely(per_cpu(hwb_tracer, cpu))) -			ds_suspend_bts(per_cpu(hwb_tracer, cpu)); -	trace_hw_branches_suspended = 1; -	put_online_cpus(); -} - -static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, -				     unsigned long action, void *hcpu) -{ -	int cpu = (long)hcpu; - -	switch (action) { -	case CPU_ONLINE: -	case CPU_DOWN_FAILED: -		/* The notification is sent with interrupts enabled. */ -		if (trace_hw_branches_enabled) { -			bts_trace_init_cpu(cpu); - -			if (trace_hw_branches_suspended && -			    likely(per_cpu(hwb_tracer, cpu))) -				ds_suspend_bts(per_cpu(hwb_tracer, cpu)); -		} -		break; - -	case CPU_DOWN_PREPARE: -		/* The notification is sent with interrupts enabled. */ -		if (likely(per_cpu(hwb_tracer, cpu))) { -			ds_release_bts(per_cpu(hwb_tracer, cpu)); -			per_cpu(hwb_tracer, cpu) = NULL; -		} -	} - -	return NOTIFY_DONE; -} - -static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { -	.notifier_call = bts_hotcpu_handler -}; - -static void bts_trace_print_header(struct seq_file *m) -{ -	seq_puts(m, "# CPU#        TO  <-  FROM\n"); -} - -static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) -{ -	unsigned long symflags = TRACE_ITER_SYM_OFFSET; -	struct trace_entry *entry = iter->ent; -	struct trace_seq *seq = &iter->seq; -	struct hw_branch_entry *it; - -	trace_assign_type(it, entry); - -	if (entry->type == TRACE_HW_BRANCHES) { -		if (trace_seq_printf(seq, "%4d  ", iter->cpu) && -		    seq_print_ip_sym(seq, it->to, symflags) && -		    trace_seq_printf(seq, "\t  <-  ") && -		    seq_print_ip_sym(seq, it->from, symflags) && -		    trace_seq_printf(seq, "\n")) -			return TRACE_TYPE_HANDLED; -		return TRACE_TYPE_PARTIAL_LINE; -	} -	return TRACE_TYPE_UNHANDLED; -} - -void trace_hw_branch(u64 from, u64 to) -{ -	struct ftrace_event_call *call = &event_hw_branch; -	struct trace_array *tr = hw_branch_trace; -	struct ring_buffer_event *event; -	struct ring_buffer *buf; -	struct hw_branch_entry *entry; -	unsigned long irq1; -	int cpu; - -	if (unlikely(!tr)) -		return; - -	if (unlikely(!trace_hw_branches_enabled)) -		return; - -	local_irq_save(irq1); -	cpu = raw_smp_processor_id(); -	if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) -		goto out; - -	buf = tr->buffer; -	event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES, -					  sizeof(*entry), 0, 0); -	if (!event) -		goto out; -	entry	= ring_buffer_event_data(event); -	tracing_generic_entry_update(&entry->ent, 0, from); -	entry->ent.type = TRACE_HW_BRANCHES; -	entry->from = from; -	entry->to   = to; -	if (!filter_check_discard(call, entry, buf, event)) -		trace_buffer_unlock_commit(buf, event, 0, 0); - - out: -	atomic_dec(&tr->data[cpu]->disabled); -	local_irq_restore(irq1); -} - -static void trace_bts_at(const struct bts_trace *trace, void *at) -{ -	struct bts_struct bts; -	int err = 0; - -	WARN_ON_ONCE(!trace->read); -	if (!trace->read) -		return; - -	err = trace->read(this_tracer, at, &bts); -	if (err < 0) -		return; - -	switch (bts.qualifier) { -	case BTS_BRANCH: -		trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to); -		break; -	} -} - -/* - * Collect the trace on the current cpu and write it into the ftrace buffer. - * - * pre: tracing must be suspended on the current cpu - */ -static void trace_bts_cpu(void *arg) -{ -	struct trace_array *tr = (struct trace_array *)arg; -	const struct bts_trace *trace; -	unsigned char *at; - -	if (unlikely(!tr)) -		return; - -	if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled))) -		return; - -	if (unlikely(!this_tracer)) -		return; - -	trace = ds_read_bts(this_tracer); -	if (!trace) -		return; - -	for (at = trace->ds.top; (void *)at < trace->ds.end; -	     at += trace->ds.size) -		trace_bts_at(trace, at); - -	for (at = trace->ds.begin; (void *)at < trace->ds.top; -	     at += trace->ds.size) -		trace_bts_at(trace, at); -} - -static void trace_bts_prepare(struct trace_iterator *iter) -{ -	int cpu; - -	get_online_cpus(); -	for_each_online_cpu(cpu) -		if (likely(per_cpu(hwb_tracer, cpu))) -			ds_suspend_bts(per_cpu(hwb_tracer, cpu)); -	/* -	 * We need to collect the trace on the respective cpu since ftrace -	 * implicitly adds the record for the current cpu. -	 * Once that is more flexible, we could collect the data from any cpu. -	 */ -	on_each_cpu(trace_bts_cpu, iter->tr, 1); - -	for_each_online_cpu(cpu) -		if (likely(per_cpu(hwb_tracer, cpu))) -			ds_resume_bts(per_cpu(hwb_tracer, cpu)); -	put_online_cpus(); -} - -static void trace_bts_close(struct trace_iterator *iter) -{ -	tracing_reset_online_cpus(iter->tr); -} - -void trace_hw_branch_oops(void) -{ -	if (this_tracer) { -		ds_suspend_bts_noirq(this_tracer); -		trace_bts_cpu(hw_branch_trace); -		ds_resume_bts_noirq(this_tracer); -	} -} - -struct tracer bts_tracer __read_mostly = -{ -	.name		= "hw-branch-tracer", -	.init		= bts_trace_init, -	.reset		= bts_trace_reset, -	.print_header	= bts_trace_print_header, -	.print_line	= bts_trace_print_line, -	.start		= bts_trace_start, -	.stop		= bts_trace_stop, -	.open		= trace_bts_prepare, -	.close		= trace_bts_close, -#ifdef CONFIG_FTRACE_SELFTEST -	.selftest	= trace_selftest_startup_hw_branches, -#endif /* CONFIG_FTRACE_SELFTEST */ -}; - -__init static int init_bts_trace(void) -{ -	register_hotcpu_notifier(&bts_hotcpu_notifier); -	return register_tracer(&bts_tracer); -} -device_initcall(init_bts_trace); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0e3ded64cdb7..9a082bba9537 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -29,6 +29,8 @@  #include <linux/ctype.h>  #include <linux/ptrace.h>  #include <linux/perf_event.h> +#include <linux/stringify.h> +#include <asm/bitsperlong.h>  #include "trace.h"  #include "trace_output.h" @@ -40,7 +42,6 @@  /* Reserved field names */  #define FIELD_STRING_IP "__probe_ip" -#define FIELD_STRING_NARGS "__probe_nargs"  #define FIELD_STRING_RETIP "__probe_ret_ip"  #define FIELD_STRING_FUNC "__probe_func" @@ -52,56 +53,102 @@ const char *reserved_field_names[] = {  	"common_tgid",  	"common_lock_depth",  	FIELD_STRING_IP, -	FIELD_STRING_NARGS,  	FIELD_STRING_RETIP,  	FIELD_STRING_FUNC,  }; -struct fetch_func { -	unsigned long (*func)(struct pt_regs *, void *); +/* Printing function type */ +typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *); +#define PRINT_TYPE_FUNC_NAME(type)	print_type_##type +#define PRINT_TYPE_FMT_NAME(type)	print_type_format_##type + +/* Printing  in basic type function template */ +#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast)			\ +static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,	\ +						const char *name, void *data)\ +{									\ +	return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\ +}									\ +static const char PRINT_TYPE_FMT_NAME(type)[] = fmt; + +DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int) +DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int) +DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long) +DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long) +DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int) +DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int) +DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long) +DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) + +/* Data fetch function type */ +typedef	void (*fetch_func_t)(struct pt_regs *, void *, void *); + +struct fetch_param { +	fetch_func_t	fn;  	void *data;  }; -static __kprobes unsigned long call_fetch(struct fetch_func *f, -					  struct pt_regs *regs) +static __kprobes void call_fetch(struct fetch_param *fprm, +				 struct pt_regs *regs, void *dest)  { -	return f->func(regs, f->data); +	return fprm->fn(regs, fprm->data, dest);  } -/* fetch handlers */ -static __kprobes unsigned long fetch_register(struct pt_regs *regs, -					      void *offset) -{ -	return regs_get_register(regs, (unsigned int)((unsigned long)offset)); +#define FETCH_FUNC_NAME(kind, type)	fetch_##kind##_##type +/* + * Define macro for basic types - we don't need to define s* types, because + * we have to care only about bitwidth at recording time. + */ +#define DEFINE_BASIC_FETCH_FUNCS(kind)  \ +DEFINE_FETCH_##kind(u8)			\ +DEFINE_FETCH_##kind(u16)		\ +DEFINE_FETCH_##kind(u32)		\ +DEFINE_FETCH_##kind(u64) + +#define CHECK_BASIC_FETCH_FUNCS(kind, fn)	\ +	((FETCH_FUNC_NAME(kind, u8) == fn) ||	\ +	 (FETCH_FUNC_NAME(kind, u16) == fn) ||	\ +	 (FETCH_FUNC_NAME(kind, u32) == fn) ||	\ +	 (FETCH_FUNC_NAME(kind, u64) == fn)) + +/* Data fetch function templates */ +#define DEFINE_FETCH_reg(type)						\ +static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,	\ +					  void *offset, void *dest)	\ +{									\ +	*(type *)dest = (type)regs_get_register(regs,			\ +				(unsigned int)((unsigned long)offset));	\  } - -static __kprobes unsigned long fetch_stack(struct pt_regs *regs, -					   void *num) -{ -	return regs_get_kernel_stack_nth(regs, -					 (unsigned int)((unsigned long)num)); +DEFINE_BASIC_FETCH_FUNCS(reg) + +#define DEFINE_FETCH_stack(type)					\ +static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ +					  void *offset, void *dest)	\ +{									\ +	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\ +				(unsigned int)((unsigned long)offset));	\  } +DEFINE_BASIC_FETCH_FUNCS(stack) -static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) -{ -	unsigned long retval; - -	if (probe_kernel_address(addr, retval)) -		return 0; -	return retval; +#define DEFINE_FETCH_retval(type)					\ +static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ +					  void *dummy, void *dest)	\ +{									\ +	*(type *)dest = (type)regs_return_value(regs);			\  } - -static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, -					      void *dummy) -{ -	return regs_return_value(regs); -} - -static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, -						   void *dummy) -{ -	return kernel_stack_pointer(regs); +DEFINE_BASIC_FETCH_FUNCS(retval) + +#define DEFINE_FETCH_memory(type)					\ +static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ +					  void *addr, void *dest)	\ +{									\ +	type retval;							\ +	if (probe_kernel_address(addr, retval))				\ +		*(type *)dest = 0;					\ +	else								\ +		*(type *)dest = retval;					\  } +DEFINE_BASIC_FETCH_FUNCS(memory)  /* Memory fetching by symbol */  struct symbol_cache { @@ -145,51 +192,126 @@ static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)  	return sc;  } -static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) -{ -	struct symbol_cache *sc = data; - -	if (sc->addr) -		return fetch_memory(regs, (void *)sc->addr); -	else -		return 0; +#define DEFINE_FETCH_symbol(type)					\ +static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\ +					  void *data, void *dest)	\ +{									\ +	struct symbol_cache *sc = data;					\ +	if (sc->addr)							\ +		fetch_memory_##type(regs, (void *)sc->addr, dest);	\ +	else								\ +		*(type *)dest = 0;					\  } +DEFINE_BASIC_FETCH_FUNCS(symbol) -/* Special indirect memory access interface */ -struct indirect_fetch_data { -	struct fetch_func orig; +/* Dereference memory access function */ +struct deref_fetch_param { +	struct fetch_param orig;  	long offset;  }; -static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) -{ -	struct indirect_fetch_data *ind = data; -	unsigned long addr; - -	addr = call_fetch(&ind->orig, regs); -	if (addr) { -		addr += ind->offset; -		return fetch_memory(regs, (void *)addr); -	} else -		return 0; +#define DEFINE_FETCH_deref(type)					\ +static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\ +					    void *data, void *dest)	\ +{									\ +	struct deref_fetch_param *dprm = data;				\ +	unsigned long addr;						\ +	call_fetch(&dprm->orig, regs, &addr);				\ +	if (addr) {							\ +		addr += dprm->offset;					\ +		fetch_memory_##type(regs, (void *)addr, dest);		\ +	} else								\ +		*(type *)dest = 0;					\  } +DEFINE_BASIC_FETCH_FUNCS(deref) -static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) +static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)  { -	if (data->orig.func == fetch_indirect) -		free_indirect_fetch_data(data->orig.data); -	else if (data->orig.func == fetch_symbol) +	if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn)) +		free_deref_fetch_param(data->orig.data); +	else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn))  		free_symbol_cache(data->orig.data);  	kfree(data);  } +/* Default (unsigned long) fetch type */ +#define __DEFAULT_FETCH_TYPE(t) u##t +#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) +#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG) +#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) + +#define ASSIGN_FETCH_FUNC(kind, type)	\ +	.kind = FETCH_FUNC_NAME(kind, type) + +#define ASSIGN_FETCH_TYPE(ptype, ftype, sign)	\ +	{.name = #ptype,			\ +	 .size = sizeof(ftype),			\ +	 .is_signed = sign,			\ +	 .print = PRINT_TYPE_FUNC_NAME(ptype),	\ +	 .fmt = PRINT_TYPE_FMT_NAME(ptype),	\ +ASSIGN_FETCH_FUNC(reg, ftype),			\ +ASSIGN_FETCH_FUNC(stack, ftype),		\ +ASSIGN_FETCH_FUNC(retval, ftype),		\ +ASSIGN_FETCH_FUNC(memory, ftype),		\ +ASSIGN_FETCH_FUNC(symbol, ftype),		\ +ASSIGN_FETCH_FUNC(deref, ftype),		\ +	} + +/* Fetch type information table */ +static const struct fetch_type { +	const char	*name;		/* Name of type */ +	size_t		size;		/* Byte size of type */ +	int		is_signed;	/* Signed flag */ +	print_type_func_t	print;	/* Print functions */ +	const char	*fmt;		/* Fromat string */ +	/* Fetch functions */ +	fetch_func_t	reg; +	fetch_func_t	stack; +	fetch_func_t	retval; +	fetch_func_t	memory; +	fetch_func_t	symbol; +	fetch_func_t	deref; +} fetch_type_table[] = { +	ASSIGN_FETCH_TYPE(u8,  u8,  0), +	ASSIGN_FETCH_TYPE(u16, u16, 0), +	ASSIGN_FETCH_TYPE(u32, u32, 0), +	ASSIGN_FETCH_TYPE(u64, u64, 0), +	ASSIGN_FETCH_TYPE(s8,  u8,  1), +	ASSIGN_FETCH_TYPE(s16, u16, 1), +	ASSIGN_FETCH_TYPE(s32, u32, 1), +	ASSIGN_FETCH_TYPE(s64, u64, 1), +}; + +static const struct fetch_type *find_fetch_type(const char *type) +{ +	int i; + +	if (!type) +		type = DEFAULT_FETCH_TYPE_STR; + +	for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) +		if (strcmp(type, fetch_type_table[i].name) == 0) +			return &fetch_type_table[i]; +	return NULL; +} + +/* Special function : only accept unsigned long */ +static __kprobes void fetch_stack_address(struct pt_regs *regs, +					  void *dummy, void *dest) +{ +	*(unsigned long *)dest = kernel_stack_pointer(regs); +} +  /**   * Kprobe event core functions   */  struct probe_arg { -	struct fetch_func	fetch; -	const char		*name; +	struct fetch_param	fetch; +	unsigned int		offset;	/* Offset from argument entry */ +	const char		*name;	/* Name of this argument */ +	const char		*comm;	/* Command of this argument */ +	const struct fetch_type	*type;	/* Type of this argument */  };  /* Flags for trace_probe */ @@ -204,6 +326,7 @@ struct trace_probe {  	const char		*symbol;	/* symbol name */  	struct ftrace_event_class	class;  	struct ftrace_event_call	call; +	ssize_t			size;		/* trace entry size */  	unsigned int		nr_args;  	struct probe_arg	args[];  }; @@ -212,6 +335,7 @@ struct trace_probe {  	(offsetof(struct trace_probe, args) +	\  	(sizeof(struct probe_arg) * (n))) +  static __kprobes int probe_is_return(struct trace_probe *tp)  {  	return tp->rp.handler != NULL; @@ -222,49 +346,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)  	return tp->symbol ? tp->symbol : "unknown";  } -static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) -{ -	int ret = -EINVAL; - -	if (ff->func == fetch_register) { -		const char *name; -		name = regs_query_register_name((unsigned int)((long)ff->data)); -		ret = snprintf(buf, n, "%%%s", name); -	} else if (ff->func == fetch_stack) -		ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data); -	else if (ff->func == fetch_memory) -		ret = snprintf(buf, n, "@0x%p", ff->data); -	else if (ff->func == fetch_symbol) { -		struct symbol_cache *sc = ff->data; -		if (sc->offset) -			ret = snprintf(buf, n, "@%s%+ld", sc->symbol, -					sc->offset); -		else -			ret = snprintf(buf, n, "@%s", sc->symbol); -	} else if (ff->func == fetch_retvalue) -		ret = snprintf(buf, n, "$retval"); -	else if (ff->func == fetch_stack_address) -		ret = snprintf(buf, n, "$stack"); -	else if (ff->func == fetch_indirect) { -		struct indirect_fetch_data *id = ff->data; -		size_t l = 0; -		ret = snprintf(buf, n, "%+ld(", id->offset); -		if (ret >= n) -			goto end; -		l += ret; -		ret = probe_arg_string(buf + l, n - l, &id->orig); -		if (ret < 0) -			goto end; -		l += ret; -		ret = snprintf(buf + l, n - l, ")"); -		ret += l; -	} -end: -	if (ret >= n) -		return -ENOSPC; -	return ret; -} -  static int register_probe_event(struct trace_probe *tp);  static void unregister_probe_event(struct trace_probe *tp); @@ -348,11 +429,12 @@ error:  static void free_probe_arg(struct probe_arg *arg)  { -	if (arg->fetch.func == fetch_symbol) +	if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn)) +		free_deref_fetch_param(arg->fetch.data); +	else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn))  		free_symbol_cache(arg->fetch.data); -	else if (arg->fetch.func == fetch_indirect) -		free_indirect_fetch_data(arg->fetch.data);  	kfree(arg->name); +	kfree(arg->comm);  }  static void free_trace_probe(struct trace_probe *tp) @@ -458,28 +540,30 @@ static int split_symbol_offset(char *symbol, unsigned long *offset)  #define PARAM_MAX_ARGS 16  #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) -static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) +static int parse_probe_vars(char *arg, const struct fetch_type *t, +			    struct fetch_param *f, int is_return)  {  	int ret = 0;  	unsigned long param;  	if (strcmp(arg, "retval") == 0) { -		if (is_return) { -			ff->func = fetch_retvalue; -			ff->data = NULL; -		} else +		if (is_return) +			f->fn = t->retval; +		else  			ret = -EINVAL;  	} else if (strncmp(arg, "stack", 5) == 0) {  		if (arg[5] == '\0') { -			ff->func = fetch_stack_address; -			ff->data = NULL; +			if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0) +				f->fn = fetch_stack_address; +			else +				ret = -EINVAL;  		} else if (isdigit(arg[5])) {  			ret = strict_strtoul(arg + 5, 10, ¶m);  			if (ret || param > PARAM_MAX_STACK)  				ret = -EINVAL;  			else { -				ff->func = fetch_stack; -				ff->data = (void *)param; +				f->fn = t->stack; +				f->data = (void *)param;  			}  		} else  			ret = -EINVAL; @@ -489,7 +573,8 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)  }  /* Recursive argument parser */ -static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +static int __parse_probe_arg(char *arg, const struct fetch_type *t, +			     struct fetch_param *f, int is_return)  {  	int ret = 0;  	unsigned long param; @@ -498,13 +583,13 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)  	switch (arg[0]) {  	case '$': -		ret = parse_probe_vars(arg + 1, ff, is_return); +		ret = parse_probe_vars(arg + 1, t, f, is_return);  		break;  	case '%':	/* named register */  		ret = regs_query_register_offset(arg + 1);  		if (ret >= 0) { -			ff->func = fetch_register; -			ff->data = (void *)(unsigned long)ret; +			f->fn = t->reg; +			f->data = (void *)(unsigned long)ret;  			ret = 0;  		}  		break; @@ -513,26 +598,22 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)  			ret = strict_strtoul(arg + 1, 0, ¶m);  			if (ret)  				break; -			ff->func = fetch_memory; -			ff->data = (void *)param; +			f->fn = t->memory; +			f->data = (void *)param;  		} else {  			ret = split_symbol_offset(arg + 1, &offset);  			if (ret)  				break; -			ff->data = alloc_symbol_cache(arg + 1, offset); -			if (ff->data) -				ff->func = fetch_symbol; -			else -				ret = -EINVAL; +			f->data = alloc_symbol_cache(arg + 1, offset); +			if (f->data) +				f->fn = t->symbol;  		}  		break; -	case '+':	/* indirect memory */ +	case '+':	/* deref memory */  	case '-':  		tmp = strchr(arg, '('); -		if (!tmp) { -			ret = -EINVAL; +		if (!tmp)  			break; -		}  		*tmp = '\0';  		ret = strict_strtol(arg + 1, 0, &offset);  		if (ret) @@ -542,38 +623,58 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)  		arg = tmp + 1;  		tmp = strrchr(arg, ')');  		if (tmp) { -			struct indirect_fetch_data *id; +			struct deref_fetch_param *dprm; +			const struct fetch_type *t2 = find_fetch_type(NULL);  			*tmp = '\0'; -			id = kzalloc(sizeof(struct indirect_fetch_data), -				     GFP_KERNEL); -			if (!id) +			dprm = kzalloc(sizeof(struct deref_fetch_param), +				       GFP_KERNEL); +			if (!dprm)  				return -ENOMEM; -			id->offset = offset; -			ret = __parse_probe_arg(arg, &id->orig, is_return); +			dprm->offset = offset; +			ret = __parse_probe_arg(arg, t2, &dprm->orig, +						is_return);  			if (ret) -				kfree(id); +				kfree(dprm);  			else { -				ff->func = fetch_indirect; -				ff->data = (void *)id; +				f->fn = t->deref; +				f->data = (void *)dprm;  			} -		} else -			ret = -EINVAL; +		}  		break; -	default: -		/* TODO: support custom handler */ -		ret = -EINVAL;  	} +	if (!ret && !f->fn) +		ret = -EINVAL;  	return ret;  }  /* String length checking wrapper */ -static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +static int parse_probe_arg(char *arg, struct trace_probe *tp, +			   struct probe_arg *parg, int is_return)  { +	const char *t; +  	if (strlen(arg) > MAX_ARGSTR_LEN) {  		pr_info("Argument is too long.: %s\n",  arg);  		return -ENOSPC;  	} -	return __parse_probe_arg(arg, ff, is_return); +	parg->comm = kstrdup(arg, GFP_KERNEL); +	if (!parg->comm) { +		pr_info("Failed to allocate memory for command '%s'.\n", arg); +		return -ENOMEM; +	} +	t = strchr(parg->comm, ':'); +	if (t) { +		arg[t - parg->comm] = '\0'; +		t++; +	} +	parg->type = find_fetch_type(t); +	if (!parg->type) { +		pr_info("Unsupported type: %s\n", t); +		return -EINVAL; +	} +	parg->offset = tp->size; +	tp->size += parg->type->size; +	return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);  }  /* Return 1 if name is reserved or already used by another argument */ @@ -603,15 +704,18 @@ static int create_trace_probe(int argc, char **argv)  	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)  	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)  	 *  %REG	: fetch register REG -	 * Indirect memory fetch: +	 * Dereferencing memory fetch:  	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.  	 * Alias name of args:  	 *  NAME=FETCHARG : set NAME as alias of FETCHARG. +	 * Type of args: +	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.  	 */  	struct trace_probe *tp;  	int i, ret = 0;  	int is_return = 0, is_delete = 0; -	char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; +	char *symbol = NULL, *event = NULL, *group = NULL; +	char *arg, *tmp;  	unsigned long offset = 0;  	void *addr = NULL;  	char buf[MAX_EVENT_NAME_LEN]; @@ -724,13 +828,6 @@ static int create_trace_probe(int argc, char **argv)  		else  			arg = argv[i]; -		if (conflict_field_name(argv[i], tp->args, i)) { -			pr_info("Argument%d name '%s' conflicts with " -				"another field.\n", i, argv[i]); -			ret = -EINVAL; -			goto error; -		} -  		tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);  		if (!tp->args[i].name) {  			pr_info("Failed to allocate argument%d name '%s'.\n", @@ -738,9 +835,19 @@ static int create_trace_probe(int argc, char **argv)  			ret = -ENOMEM;  			goto error;  		} +		tmp = strchr(tp->args[i].name, ':'); +		if (tmp) +			*tmp = '_';	/* convert : to _ */ + +		if (conflict_field_name(tp->args[i].name, tp->args, i)) { +			pr_info("Argument%d name '%s' conflicts with " +				"another field.\n", i, argv[i]); +			ret = -EINVAL; +			goto error; +		}  		/* Parse fetch argument */ -		ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); +		ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);  		if (ret) {  			pr_info("Parse error at argument%d. (%d)\n", i, ret);  			kfree(tp->args[i].name); @@ -795,8 +902,7 @@ static void probes_seq_stop(struct seq_file *m, void *v)  static int probes_seq_show(struct seq_file *m, void *v)  {  	struct trace_probe *tp = v; -	int i, ret; -	char buf[MAX_ARGSTR_LEN + 1]; +	int i;  	seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');  	seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); @@ -808,15 +914,10 @@ static int probes_seq_show(struct seq_file *m, void *v)  	else  		seq_printf(m, " %s", probe_symbol(tp)); -	for (i = 0; i < tp->nr_args; i++) { -		ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); -		if (ret < 0) { -			pr_warning("Argument%d decoding error(%d).\n", i, ret); -			return ret; -		} -		seq_printf(m, " %s=%s", tp->args[i].name, buf); -	} +	for (i = 0; i < tp->nr_args; i++) +		seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);  	seq_printf(m, "\n"); +  	return 0;  } @@ -946,9 +1047,10 @@ static const struct file_operations kprobe_profile_ops = {  static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)  {  	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); -	struct kprobe_trace_entry *entry; +	struct kprobe_trace_entry_head *entry;  	struct ring_buffer_event *event;  	struct ring_buffer *buffer; +	u8 *data;  	int size, i, pc;  	unsigned long irq_flags;  	struct ftrace_event_call *call = &tp->call; @@ -958,7 +1060,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)  	local_save_flags(irq_flags);  	pc = preempt_count(); -	size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); +	size = sizeof(*entry) + tp->size;  	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,  						  size, irq_flags, pc); @@ -966,10 +1068,10 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)  		return;  	entry = ring_buffer_event_data(event); -	entry->nargs = tp->nr_args;  	entry->ip = (unsigned long)kp->addr; +	data = (u8 *)&entry[1];  	for (i = 0; i < tp->nr_args; i++) -		entry->args[i] = call_fetch(&tp->args[i].fetch, regs); +		call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);  	if (!filter_current_check_discard(buffer, call, entry, event))  		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); @@ -980,9 +1082,10 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,  					  struct pt_regs *regs)  {  	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); -	struct kretprobe_trace_entry *entry; +	struct kretprobe_trace_entry_head *entry;  	struct ring_buffer_event *event;  	struct ring_buffer *buffer; +	u8 *data;  	int size, i, pc;  	unsigned long irq_flags;  	struct ftrace_event_call *call = &tp->call; @@ -990,7 +1093,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,  	local_save_flags(irq_flags);  	pc = preempt_count(); -	size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); +	size = sizeof(*entry) + tp->size;  	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,  						  size, irq_flags, pc); @@ -998,11 +1101,11 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,  		return;  	entry = ring_buffer_event_data(event); -	entry->nargs = tp->nr_args;  	entry->func = (unsigned long)tp->rp.kp.addr;  	entry->ret_ip = (unsigned long)ri->ret_addr; +	data = (u8 *)&entry[1];  	for (i = 0; i < tp->nr_args; i++) -		entry->args[i] = call_fetch(&tp->args[i].fetch, regs); +		call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);  	if (!filter_current_check_discard(buffer, call, entry, event))  		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); @@ -1013,12 +1116,13 @@ enum print_line_t  print_kprobe_event(struct trace_iterator *iter, int flags,  		   struct trace_event *event)  { -	struct kprobe_trace_entry *field; +	struct kprobe_trace_entry_head *field;  	struct trace_seq *s = &iter->seq;  	struct trace_probe *tp; +	u8 *data;  	int i; -	field = (struct kprobe_trace_entry *)iter->ent; +	field = (struct kprobe_trace_entry_head *)iter->ent;  	tp = container_of(event, struct trace_probe, call.event);  	if (!trace_seq_printf(s, "%s: (", tp->call.name)) @@ -1030,9 +1134,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags,  	if (!trace_seq_puts(s, ")"))  		goto partial; -	for (i = 0; i < field->nargs; i++) -		if (!trace_seq_printf(s, " %s=%lx", -				      tp->args[i].name, field->args[i])) +	data = (u8 *)&field[1]; +	for (i = 0; i < tp->nr_args; i++) +		if (!tp->args[i].type->print(s, tp->args[i].name, +					     data + tp->args[i].offset))  			goto partial;  	if (!trace_seq_puts(s, "\n")) @@ -1047,12 +1152,13 @@ enum print_line_t  print_kretprobe_event(struct trace_iterator *iter, int flags,  		      struct trace_event *event)  { -	struct kretprobe_trace_entry *field; +	struct kretprobe_trace_entry_head *field;  	struct trace_seq *s = &iter->seq;  	struct trace_probe *tp; +	u8 *data;  	int i; -	field = (struct kretprobe_trace_entry *)iter->ent; +	field = (struct kretprobe_trace_entry_head *)iter->ent;  	tp = container_of(event, struct trace_probe, call.event);  	if (!trace_seq_printf(s, "%s: (", tp->call.name)) @@ -1070,9 +1176,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,  	if (!trace_seq_puts(s, ")"))  		goto partial; -	for (i = 0; i < field->nargs; i++) -		if (!trace_seq_printf(s, " %s=%lx", -				      tp->args[i].name, field->args[i])) +	data = (u8 *)&field[1]; +	for (i = 0; i < tp->nr_args; i++) +		if (!tp->args[i].type->print(s, tp->args[i].name, +					     data + tp->args[i].offset))  			goto partial;  	if (!trace_seq_puts(s, "\n")) @@ -1126,29 +1233,43 @@ static int probe_event_raw_init(struct ftrace_event_call *event_call)  static int kprobe_event_define_fields(struct ftrace_event_call *event_call)  {  	int ret, i; -	struct kprobe_trace_entry field; +	struct kprobe_trace_entry_head field;  	struct trace_probe *tp = (struct trace_probe *)event_call->data;  	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); -	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);  	/* Set argument names as fields */ -	for (i = 0; i < tp->nr_args; i++) -		DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); +	for (i = 0; i < tp->nr_args; i++) { +		ret = trace_define_field(event_call, tp->args[i].type->name, +					 tp->args[i].name, +					 sizeof(field) + tp->args[i].offset, +					 tp->args[i].type->size, +					 tp->args[i].type->is_signed, +					 FILTER_OTHER); +		if (ret) +			return ret; +	}  	return 0;  }  static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)  {  	int ret, i; -	struct kretprobe_trace_entry field; +	struct kretprobe_trace_entry_head field;  	struct trace_probe *tp = (struct trace_probe *)event_call->data;  	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);  	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); -	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);  	/* Set argument names as fields */ -	for (i = 0; i < tp->nr_args; i++) -		DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); +	for (i = 0; i < tp->nr_args; i++) { +		ret = trace_define_field(event_call, tp->args[i].type->name, +					 tp->args[i].name, +					 sizeof(field) + tp->args[i].offset, +					 tp->args[i].type->size, +					 tp->args[i].type->is_signed, +					 FILTER_OTHER); +		if (ret) +			return ret; +	}  	return 0;  } @@ -1173,8 +1294,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)  	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);  	for (i = 0; i < tp->nr_args; i++) { -		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx", -				tp->args[i].name); +		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", +				tp->args[i].name, tp->args[i].type->fmt);  	}  	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); @@ -1216,12 +1337,13 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,  {  	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);  	struct ftrace_event_call *call = &tp->call; -	struct kprobe_trace_entry *entry; +	struct kprobe_trace_entry_head *entry; +	u8 *data;  	int size, __size, i;  	unsigned long irq_flags;  	int rctx; -	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); +	__size = sizeof(*entry) + tp->size;  	size = ALIGN(__size + sizeof(u32), sizeof(u64));  	size -= sizeof(u32);  	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, @@ -1233,10 +1355,10 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,  	if (!entry)  		return; -	entry->nargs = tp->nr_args;  	entry->ip = (unsigned long)kp->addr; +	data = (u8 *)&entry[1];  	for (i = 0; i < tp->nr_args; i++) -		entry->args[i] = call_fetch(&tp->args[i].fetch, regs); +		call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);  	perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);  } @@ -1247,12 +1369,13 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,  {  	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);  	struct ftrace_event_call *call = &tp->call; -	struct kretprobe_trace_entry *entry; +	struct kretprobe_trace_entry_head *entry; +	u8 *data;  	int size, __size, i;  	unsigned long irq_flags;  	int rctx; -	__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); +	__size = sizeof(*entry) + tp->size;  	size = ALIGN(__size + sizeof(u32), sizeof(u64));  	size -= sizeof(u32);  	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, @@ -1264,11 +1387,11 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,  	if (!entry)  		return; -	entry->nargs = tp->nr_args;  	entry->func = (unsigned long)tp->rp.kp.addr;  	entry->ret_ip = (unsigned long)ri->ret_addr; +	data = (u8 *)&entry[1];  	for (i = 0; i < tp->nr_args; i++) -		entry->args[i] = call_fetch(&tp->args[i].fetch, regs); +		call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);  	perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,  			       irq_flags, regs); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index d59cd6879477..8eaf00749b65 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -34,12 +34,6 @@  #include <asm/atomic.h> -/* - * For now, let us restrict the no. of symbols traced simultaneously to number - * of available hardware breakpoint registers. - */ -#define KSYM_TRACER_MAX HBP_NUM -  #define KSYM_TRACER_OP_LEN 3 /* rw- */  struct trace_ksym { @@ -53,7 +47,6 @@ struct trace_ksym {  static struct trace_array *ksym_trace_array; -static unsigned int ksym_filter_entry_count;  static unsigned int ksym_tracing_enabled;  static HLIST_HEAD(ksym_filter_head); @@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)  	struct trace_ksym *entry;  	int ret = -ENOMEM; -	if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { -		printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" -		" new requests for tracing can be accepted now.\n", -			KSYM_TRACER_MAX); -		return -ENOSPC; -	} -  	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);  	if (!entry)  		return -ENOMEM; @@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)  	if (IS_ERR(entry->ksym_hbp)) {  		ret = PTR_ERR(entry->ksym_hbp); -		printk(KERN_INFO "ksym_tracer request failed. Try again" -					" later!!\n"); +		if (ret == -ENOSPC) { +			printk(KERN_ERR "ksym_tracer: Maximum limit reached." +			" No new requests for tracing can be accepted now.\n"); +		} else { +			printk(KERN_INFO "ksym_tracer request failed. Try again" +					 " later!!\n"); +		}  		goto err;  	}  	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); -	ksym_filter_entry_count++;  	return 0; @@ -265,7 +255,6 @@ static void __ksym_trace_reset(void)  	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,  								ksym_hlist) {  		unregister_wide_hw_breakpoint(entry->ksym_hbp); -		ksym_filter_entry_count--;  		hlist_del_rcu(&(entry->ksym_hlist));  		synchronize_rcu();  		kfree(entry); @@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file,  				goto out_unlock;  		}  		/* Error or "symbol:---" case: drop it */ -		ksym_filter_entry_count--;  		hlist_del_rcu(&(entry->ksym_hlist));  		synchronize_rcu();  		kfree(entry); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 6a9d36ddfcf2..250e7f9bd2f0 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -17,7 +17,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)  	case TRACE_BRANCH:  	case TRACE_GRAPH_ENT:  	case TRACE_GRAPH_RET: -	case TRACE_HW_BRANCHES:  	case TRACE_KSYM:  		return 1;  	} @@ -756,62 +755,6 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)  }  #endif /* CONFIG_BRANCH_TRACER */ -#ifdef CONFIG_HW_BRANCH_TRACER -int -trace_selftest_startup_hw_branches(struct tracer *trace, -				   struct trace_array *tr) -{ -	struct trace_iterator *iter; -	struct tracer tracer; -	unsigned long count; -	int ret; - -	if (!trace->open) { -		printk(KERN_CONT "missing open function..."); -		return -1; -	} - -	ret = tracer_init(trace, tr); -	if (ret) { -		warn_failed_init_tracer(trace, ret); -		return ret; -	} - -	/* -	 * The hw-branch tracer needs to collect the trace from the various -	 * cpu trace buffers - before tracing is stopped. -	 */ -	iter = kzalloc(sizeof(*iter), GFP_KERNEL); -	if (!iter) -		return -ENOMEM; - -	memcpy(&tracer, trace, sizeof(tracer)); - -	iter->trace = &tracer; -	iter->tr = tr; -	iter->pos = -1; -	mutex_init(&iter->mutex); - -	trace->open(iter); - -	mutex_destroy(&iter->mutex); -	kfree(iter); - -	tracing_stop(); - -	ret = trace_test_buffer(tr, &count); -	trace->reset(tr); -	tracing_start(); - -	if (!ret && !count) { -		printk(KERN_CONT "no entries found.."); -		ret = -1; -	} - -	return ret; -} -#endif /* CONFIG_HW_BRANCH_TRACER */ -  #ifdef CONFIG_KSYM_TRACER  static int ksym_selftest_dummy; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index dee48658805c..5bfb213984b2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -774,7 +774,7 @@ void flush_delayed_work(struct delayed_work *dwork)  {  	if (del_timer_sync(&dwork->timer)) {  		struct cpu_workqueue_struct *cwq; -		cwq = wq_per_cpu(keventd_wq, get_cpu()); +		cwq = wq_per_cpu(get_wq_data(&dwork->work)->wq, get_cpu());  		__queue_work(cwq, &dwork->work);  		put_cpu();  	} | 
