diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/events/core.c | 117 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 31 | ||||
| -rw-r--r-- | kernel/hrtimer.c | 1 | 
3 files changed, 80 insertions, 69 deletions
| diff --git a/kernel/events/core.c b/kernel/events/core.c index 440eefc67397..689237a0c5e8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -39,6 +39,7 @@  #include <linux/hw_breakpoint.h>  #include <linux/mm_types.h>  #include <linux/cgroup.h> +#include <linux/module.h>  #include "internal.h" @@ -1677,6 +1678,8 @@ event_sched_in(struct perf_event *event,  	u64 tstamp = perf_event_time(event);  	int ret = 0; +	lockdep_assert_held(&ctx->lock); +  	if (event->state <= PERF_EVENT_STATE_OFF)  		return 0; @@ -3244,9 +3247,13 @@ static void __free_event(struct perf_event *event)  	if (event->ctx)  		put_ctx(event->ctx); +	if (event->pmu) +		module_put(event->pmu->module); +  	call_rcu(&event->rcu_head, free_event_rcu);  } -static void free_event(struct perf_event *event) + +static void _free_event(struct perf_event *event)  {  	irq_work_sync(&event->pending); @@ -3267,42 +3274,31 @@ static void free_event(struct perf_event *event)  	if (is_cgroup_event(event))  		perf_detach_cgroup(event); -  	__free_event(event);  } -int perf_event_release_kernel(struct perf_event *event) +/* + * Used to free events which have a known refcount of 1, such as in error paths + * where the event isn't exposed yet and inherited events. + */ +static void free_event(struct perf_event *event)  { -	struct perf_event_context *ctx = event->ctx; - -	WARN_ON_ONCE(ctx->parent_ctx); -	/* -	 * There are two ways this annotation is useful: -	 * -	 *  1) there is a lock recursion from perf_event_exit_task -	 *     see the comment there. -	 * -	 *  2) there is a lock-inversion with mmap_sem through -	 *     perf_event_read_group(), which takes faults while -	 *     holding ctx->mutex, however this is called after -	 *     the last filedesc died, so there is no possibility -	 *     to trigger the AB-BA case. -	 */ -	mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); -	perf_remove_from_context(event, true); -	mutex_unlock(&ctx->mutex); - -	free_event(event); +	if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1, +				"unexpected event refcount: %ld; ptr=%p\n", +				atomic_long_read(&event->refcount), event)) { +		/* leak to avoid use-after-free */ +		return; +	} -	return 0; +	_free_event(event);  } -EXPORT_SYMBOL_GPL(perf_event_release_kernel);  /*   * Called when the last reference to the file is gone.   */  static void put_event(struct perf_event *event)  { +	struct perf_event_context *ctx = event->ctx;  	struct task_struct *owner;  	if (!atomic_long_dec_and_test(&event->refcount)) @@ -3341,9 +3337,33 @@ static void put_event(struct perf_event *event)  		put_task_struct(owner);  	} -	perf_event_release_kernel(event); +	WARN_ON_ONCE(ctx->parent_ctx); +	/* +	 * There are two ways this annotation is useful: +	 * +	 *  1) there is a lock recursion from perf_event_exit_task +	 *     see the comment there. +	 * +	 *  2) there is a lock-inversion with mmap_sem through +	 *     perf_event_read_group(), which takes faults while +	 *     holding ctx->mutex, however this is called after +	 *     the last filedesc died, so there is no possibility +	 *     to trigger the AB-BA case. +	 */ +	mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); +	perf_remove_from_context(event, true); +	mutex_unlock(&ctx->mutex); + +	_free_event(event);  } +int perf_event_release_kernel(struct perf_event *event) +{ +	put_event(event); +	return 0; +} +EXPORT_SYMBOL_GPL(perf_event_release_kernel); +  static int perf_release(struct inode *inode, struct file *file)  {  	put_event(file->private_data); @@ -6578,6 +6598,7 @@ free_pdc:  	free_percpu(pmu->pmu_disable_count);  	goto unlock;  } +EXPORT_SYMBOL_GPL(perf_pmu_register);  void perf_pmu_unregister(struct pmu *pmu)  { @@ -6599,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)  	put_device(pmu->dev);  	free_pmu_context(pmu);  } +EXPORT_SYMBOL_GPL(perf_pmu_unregister);  struct pmu *perf_init_event(struct perf_event *event)  { @@ -6612,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)  	pmu = idr_find(&pmu_idr, event->attr.type);  	rcu_read_unlock();  	if (pmu) { +		if (!try_module_get(pmu->module)) { +			pmu = ERR_PTR(-ENODEV); +			goto unlock; +		}  		event->pmu = pmu;  		ret = pmu->event_init(event);  		if (ret) @@ -6620,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)  	}  	list_for_each_entry_rcu(pmu, &pmus, entry) { +		if (!try_module_get(pmu->module)) { +			pmu = ERR_PTR(-ENODEV); +			goto unlock; +		}  		event->pmu = pmu;  		ret = pmu->event_init(event);  		if (!ret) @@ -6798,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,  err_pmu:  	if (event->destroy)  		event->destroy(event); +	module_put(pmu->module);  err_ns:  	if (event->ns)  		put_pid_ns(event->ns); @@ -7067,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,  		}  	} +	if (task && group_leader && +	    group_leader->attr.inherit != attr.inherit) { +		err = -EINVAL; +		goto err_task; +	} +  	get_online_cpus();  	event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,  				 NULL, NULL);  	if (IS_ERR(event)) {  		err = PTR_ERR(event); -		goto err_task; +		goto err_cpus;  	}  	if (flags & PERF_FLAG_PID_CGROUP) {  		err = perf_cgroup_connect(pid, event, &attr, group_leader);  		if (err) {  			__free_event(event); -			goto err_task; +			goto err_cpus;  		}  	} @@ -7242,8 +7279,9 @@ err_context:  	put_ctx(ctx);  err_alloc:  	free_event(event); -err_task: +err_cpus:  	put_online_cpus(); +err_task:  	if (task)  		put_task_struct(task);  err_group_fd: @@ -7379,7 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,  			 struct perf_event_context *child_ctx,  			 struct task_struct *child)  { -	perf_remove_from_context(child_event, !!child_event->parent); +	perf_remove_from_context(child_event, true);  	/*  	 * It can happen that the parent exits first, and has events @@ -7394,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,  static void perf_event_exit_task_context(struct task_struct *child, int ctxn)  { -	struct perf_event *child_event, *tmp; +	struct perf_event *child_event;  	struct perf_event_context *child_ctx;  	unsigned long flags; @@ -7448,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)  	 */  	mutex_lock(&child_ctx->mutex); -again: -	list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, -				 group_entry) -		__perf_event_exit_task(child_event, child_ctx, child); - -	list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, -				 group_entry) +	list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)  		__perf_event_exit_task(child_event, child_ctx, child); -	/* -	 * If the last event was a group event, it will have appended all -	 * its siblings to the list, but we obtained 'tmp' before that which -	 * will still point to the list head terminating the iteration. -	 */ -	if (!list_empty(&child_ctx->pinned_groups) || -	    !list_empty(&child_ctx->flexible_groups)) -		goto again; -  	mutex_unlock(&child_ctx->mutex);  	put_ctx(child_ctx); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 04709b66369d..d1edc5e6fd03 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;  /* Have a copy of original instruction */  #define UPROBE_COPY_INSN	0 -/* Can skip singlestep */ -#define UPROBE_SKIP_SSTEP	1  struct uprobe {  	struct rb_node		rb_node;	/* node in the rb tree */ @@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)  	uprobe->offset = offset;  	init_rwsem(&uprobe->register_rwsem);  	init_rwsem(&uprobe->consumer_rwsem); -	/* For now assume that the instruction need not be single-stepped */ -	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);  	/* add to uprobes_tree, sorted on inode:offset */  	cur_uprobe = insert_uprobe(uprobe); -  	/* a uprobe exists for this inode:offset combination */  	if (cur_uprobe) {  		kfree(uprobe); @@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)  	return true;  } -/* - * Avoid singlestepping the original instruction if the original instruction - * is a NOP or can be emulated. - */ -static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) -{ -	if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) { -		if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) -			return true; -		clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); -	} -	return false; -} -  static void mmf_recalc_uprobes(struct mm_struct *mm)  {  	struct vm_area_struct *vma; @@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)  	handler_chain(uprobe, regs); -	if (can_skip_sstep(uprobe, regs)) +	if (arch_uprobe_skip_sstep(&uprobe->arch, regs))  		goto out;  	if (!pre_ssout(uprobe, regs, bp_vaddr))  		return; -	/* can_skip_sstep() succeeded, or restart if can't singlestep */ +	/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */  out:  	put_uprobe(uprobe);  } @@ -1886,10 +1867,11 @@ out:  static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)  {  	struct uprobe *uprobe; +	int err = 0;  	uprobe = utask->active_uprobe;  	if (utask->state == UTASK_SSTEP_ACK) -		arch_uprobe_post_xol(&uprobe->arch, regs); +		err = arch_uprobe_post_xol(&uprobe->arch, regs);  	else if (utask->state == UTASK_SSTEP_TRAPPED)  		arch_uprobe_abort_xol(&uprobe->arch, regs);  	else @@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)  	spin_lock_irq(¤t->sighand->siglock);  	recalc_sigpending(); /* see uprobe_deny_signal() */  	spin_unlock_irq(¤t->sighand->siglock); + +	if (unlikely(err)) { +		uprobe_warn(current, "execute the probed insn, sending SIGILL."); +		force_sig_info(SIGILL, SEND_SIG_FORCED, current); +	}  }  /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index e0501fe7140d..3ab28993f6e0 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1039,6 +1039,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,  	return ret;  } +EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);  /**   * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU | 
