diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/core.c | 5 | ||||
-rw-r--r-- | kernel/bpf/inode.c | 4 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 4 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/pid.c | 2 | ||||
-rw-r--r-- | kernel/rseq.c | 10 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 73 | ||||
-rw-r--r-- | kernel/sched/fair.c | 9 | ||||
-rw-r--r-- | kernel/seccomp.c | 12 | ||||
-rw-r--r-- | kernel/smp.c | 11 | ||||
-rw-r--r-- | kernel/sys.c | 22 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace_fprobe.c | 11 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 11 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 9 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 12 |
17 files changed, 138 insertions, 70 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1f51c8f20722..08bdb623f4f9 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2326,6 +2326,7 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, map->owner->type = prog_type; map->owner->jited = fp->jited; map->owner->xdp_has_frags = aux->xdp_has_frags; + map->owner->expected_attach_type = fp->expected_attach_type; map->owner->attach_func_proto = aux->attach_func_proto; for_each_cgroup_storage_type(i) { map->owner->storage_cookie[i] = @@ -2337,6 +2338,10 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map, ret = map->owner->type == prog_type && map->owner->jited == fp->jited && map->owner->xdp_has_frags == aux->xdp_has_frags; + if (ret && + map->map_type == BPF_MAP_TYPE_PROG_ARRAY && + map->owner->expected_attach_type != fp->expected_attach_type) + ret = false; for_each_cgroup_storage_type(i) { if (!ret) break; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 9aaf5124648b..746b5644d9a1 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -775,7 +775,7 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; } -static void bpf_free_inode(struct inode *inode) +static void bpf_destroy_inode(struct inode *inode) { enum bpf_type type; @@ -790,7 +790,7 @@ const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, - .free_inode = bpf_free_inode, + .destroy_inode = bpf_destroy_inode, }; enum { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1829f62a74a9..96640a80fd9c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14545,7 +14545,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } else { /* all other ALU ops: and, sub, xor, add, ... */ if (BPF_SRC(insn->code) == BPF_X) { - if (insn->imm != 0 || insn->off > 1 || + if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) || (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) { verbose(env, "BPF_ALU uses reserved fields\n"); return -EINVAL; @@ -14555,7 +14555,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (err) return err; } else { - if (insn->src_reg != BPF_REG_0 || insn->off > 1 || + if (insn->src_reg != BPF_REG_0 || (insn->off != 0 && insn->off != 1) || (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) { verbose(env, "BPF_ALU uses reserved fields\n"); return -EINVAL; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e60f5e71e35d..c00981cc6fe5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -114,7 +114,7 @@ struct xol_area { static void uprobe_warn(struct task_struct *t, const char *msg) { - pr_warn("uprobe: %s:%d failed to %s\n", current->comm, current->pid, msg); + pr_warn("uprobe: %s:%d failed to %s\n", t->comm, t->pid, msg); } /* diff --git a/kernel/fork.c b/kernel/fork.c index 97c9afe3efc3..e5ec098a6f61 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1807,7 +1807,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk, return 0; } -static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) +static int copy_sighand(u64 clone_flags, struct task_struct *tsk) { struct sighand_struct *sig; diff --git a/kernel/pid.c b/kernel/pid.c index 2715afb77eab..b80c3bfb58d0 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -487,7 +487,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) struct upid *upid; pid_t nr = 0; - if (pid && ns->level <= pid->level) { + if (pid && ns && ns->level <= pid->level) { upid = &pid->numbers[ns->level]; if (upid->ns == ns) nr = upid->nr; diff --git a/kernel/rseq.c b/kernel/rseq.c index 23894ba8250c..810005f927d7 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -255,12 +255,12 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) /* * Load and clear event mask atomically with respect to - * scheduler preemption. + * scheduler preemption and membarrier IPIs. */ - preempt_disable(); - event_mask = t->rseq_event_mask; - t->rseq_event_mask = 0; - preempt_enable(); + scoped_guard(RSEQ_EVENT_GUARD) { + event_mask = t->rseq_event_mask; + t->rseq_event_mask = 0; + } return !!event_mask; } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 53e3670fbb1e..6ec66fef3f91 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2617,6 +2617,25 @@ static int find_later_rq(struct task_struct *task) return -1; } +static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) +{ + struct task_struct *p; + + if (!has_pushable_dl_tasks(rq)) + return NULL; + + p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root)); + + WARN_ON_ONCE(rq->cpu != task_cpu(p)); + WARN_ON_ONCE(task_current(rq, p)); + WARN_ON_ONCE(p->nr_cpus_allowed <= 1); + + WARN_ON_ONCE(!task_on_rq_queued(p)); + WARN_ON_ONCE(!dl_task(p)); + + return p; +} + /* Locks the rq it finds */ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) { @@ -2644,12 +2663,37 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { - if (unlikely(task_rq(task) != rq || + /* + * double_lock_balance had to release rq->lock, in the + * meantime, task may no longer be fit to be migrated. + * Check the following to ensure that the task is + * still suitable for migration: + * 1. It is possible the task was scheduled, + * migrate_disabled was set and then got preempted, + * so we must check the task migration disable + * flag. + * 2. The CPU picked is in the task's affinity. + * 3. For throttled task (dl_task_offline_migration), + * check the following: + * - the task is not on the rq anymore (it was + * migrated) + * - the task is not on CPU anymore + * - the task is still a dl task + * - the task is not queued on the rq anymore + * 4. For the non-throttled task (push_dl_task), the + * check to ensure that this task is still at the + * head of the pushable tasks list is enough. + */ + if (unlikely(is_migration_disabled(task) || !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) || - task_on_cpu(rq, task) || - !dl_task(task) || - is_migration_disabled(task) || - !task_on_rq_queued(task))) { + (task->dl.dl_throttled && + (task_rq(task) != rq || + task_on_cpu(rq, task) || + !dl_task(task) || + !task_on_rq_queued(task))) || + (!task->dl.dl_throttled && + task != pick_next_pushable_dl_task(rq)))) { + double_unlock_balance(rq, later_rq); later_rq = NULL; break; @@ -2672,25 +2716,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) return later_rq; } -static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) -{ - struct task_struct *p; - - if (!has_pushable_dl_tasks(rq)) - return NULL; - - p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root)); - - WARN_ON_ONCE(rq->cpu != task_cpu(p)); - WARN_ON_ONCE(task_current(rq, p)); - WARN_ON_ONCE(p->nr_cpus_allowed <= 1); - - WARN_ON_ONCE(!task_on_rq_queued(p)); - WARN_ON_ONCE(!dl_task(p)); - - return p; -} - /* * See if the non running -deadline tasks on this rq * can be sent to some other CPU where they can preempt diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index af61769b1d50..b3d9826e25b0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7187,6 +7187,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) int h_nr_delayed = 0; struct cfs_rq *cfs_rq; u64 slice = 0; + int ret = 0; if (entity_is_task(se)) { p = task_of(se); @@ -7218,7 +7219,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) - return 0; + goto out; /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { @@ -7261,7 +7262,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) - return 0; + goto out; } sub_nr_running(rq, h_nr_queued); @@ -7273,6 +7274,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) if (unlikely(!was_sched_idle && sched_idle_rq(rq))) rq->next_balance = jiffies; + ret = 1; +out: if (p && task_delayed) { SCHED_WARN_ON(!task_sleep); SCHED_WARN_ON(p->on_rq != 1); @@ -7288,7 +7291,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) __block_task(rq, p); } - return 1; + return ret; } /* diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 0cd1f8b5a102..267b00005eaf 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1124,7 +1124,7 @@ static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_kn static bool should_sleep_killable(struct seccomp_filter *match, struct seccomp_knotif *n) { - return match->wait_killable_recv && n->state == SECCOMP_NOTIFY_SENT; + return match->wait_killable_recv && n->state >= SECCOMP_NOTIFY_SENT; } static int seccomp_do_user_notification(int this_syscall, @@ -1171,13 +1171,11 @@ static int seccomp_do_user_notification(int this_syscall, if (err != 0) { /* - * Check to see if the notifcation got picked up and - * whether we should switch to wait killable. + * Check to see whether we should switch to wait + * killable. Only return the interrupted error if not. */ - if (!wait_killable && should_sleep_killable(match, &n)) - continue; - - goto interrupted; + if (!(!wait_killable && should_sleep_killable(match, &n))) + goto interrupted; } addfd = list_first_entry_or_null(&n.addfd, diff --git a/kernel/smp.c b/kernel/smp.c index f25e20617b7e..fa6faf50fb43 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -891,16 +891,15 @@ static void smp_call_function_many_cond(const struct cpumask *mask, * @mask: The set of cpus to run on (only runs on online subset). * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. - * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait - * (atomically) until function has completed on other CPUs. If - * %SCF_RUN_LOCAL is set, the function will also be run locally - * if the local CPU is set in the @cpumask. - * - * If @wait is true, then returns once @func has returned. + * @wait: If true, wait (atomically) until function has completed + * on other CPUs. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. Preemption * must be disabled when calling this function. + * + * @func is not called on the local CPU even if @mask contains it. Consider + * using on_each_cpu_cond_mask() instead if this is not desirable. */ void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) diff --git a/kernel/sys.c b/kernel/sys.c index 4da31f28fda8..35990f0796bc 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1698,6 +1698,7 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, struct rlimit old, new; struct task_struct *tsk; unsigned int checkflags = 0; + bool need_tasklist; int ret; if (old_rlim) @@ -1724,8 +1725,25 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, get_task_struct(tsk); rcu_read_unlock(); - ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, - old_rlim ? &old : NULL); + need_tasklist = !same_thread_group(tsk, current); + if (need_tasklist) { + /* + * Ensure we can't race with group exit or de_thread(), + * so tsk->group_leader can't be freed or changed until + * read_unlock(tasklist_lock) below. + */ + read_lock(&tasklist_lock); + if (!pid_alive(tsk)) + ret = -ESRCH; + } + + if (!ret) { + ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, + old_rlim ? &old : NULL); + } + + if (need_tasklist) + read_unlock(&tasklist_lock); if (!ret && old_rlim) { rlim_to_rlim64(&old, &old64); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3ec7df7dbeec..4a44451efbcc 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2759,19 +2759,24 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, struct bpf_run_ctx *old_run_ctx; int err; + /* + * graph tracer framework ensures we won't migrate, so there is no need + * to use migrate_disable for bpf_prog_run again. The check here just for + * __this_cpu_inc_return. + */ + cant_sleep(); + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { bpf_prog_inc_misses_counter(link->link.prog); err = 0; goto out; } - migrate_disable(); rcu_read_lock(); old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); err = bpf_prog_run(link->link.prog, regs); bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); - migrate_enable(); out: __this_cpu_dec(bpf_prog_active); diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index af7d6e2060d9..440dbfa6bbfd 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -343,12 +343,14 @@ static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip, void *entry_data) { struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp); + unsigned int flags = trace_probe_load_flag(&tf->tp); int ret = 0; - if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE)) + if (flags & TP_FLAG_TRACE) fentry_trace_func(tf, entry_ip, regs); + #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) ret = fentry_perf_func(tf, entry_ip, regs); #endif return ret; @@ -360,11 +362,12 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip, void *entry_data) { struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp); + unsigned int flags = trace_probe_load_flag(&tf->tp); - if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE)) + if (flags & TP_FLAG_TRACE) fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) fexit_perf_func(tf, entry_ip, ret_ip, regs, entry_data); #endif } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6b9c3f3f870f..b273611c5026 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1799,14 +1799,15 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + unsigned int flags = trace_probe_load_flag(&tk->tp); int ret = 0; raw_cpu_inc(*tk->nhit); - if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) + if (flags & TP_FLAG_TRACE) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) ret = kprobe_perf_func(tk, regs); #endif return ret; @@ -1818,6 +1819,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct kretprobe *rp = get_kretprobe(ri); struct trace_kprobe *tk; + unsigned int flags; /* * There is a small chance that get_kretprobe(ri) returns NULL when @@ -1830,10 +1832,11 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) tk = container_of(rp, struct trace_kprobe, rp); raw_cpu_inc(*tk->nhit); - if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) + flags = trace_probe_load_flag(&tk->tp); + if (flags & TP_FLAG_TRACE) kretprobe_trace_func(tk, ri, regs); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) kretprobe_perf_func(tk, ri, regs); #endif return 0; /* We don't tweak kernel, so just return 0 */ diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 8a6797c2278d..4f54f7935d5d 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -269,16 +269,21 @@ struct event_file_link { struct list_head list; }; +static inline unsigned int trace_probe_load_flag(struct trace_probe *tp) +{ + return smp_load_acquire(&tp->event->flags); +} + static inline bool trace_probe_test_flag(struct trace_probe *tp, unsigned int flag) { - return !!(tp->event->flags & flag); + return !!(trace_probe_load_flag(tp) & flag); } static inline void trace_probe_set_flag(struct trace_probe *tp, unsigned int flag) { - tp->event->flags |= flag; + smp_store_release(&tp->event->flags, tp->event->flags | flag); } static inline void trace_probe_clear_flag(struct trace_probe *tp, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9916677acf24..f210e71bc155 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1531,6 +1531,7 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) struct trace_uprobe *tu; struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb = NULL; + unsigned int flags; int ret = 0; tu = container_of(con, struct trace_uprobe, consumer); @@ -1545,11 +1546,12 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) + flags = trace_probe_load_flag(&tu->tp); + if (flags & TP_FLAG_TRACE) ret |= uprobe_trace_func(tu, regs, &ucb); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) ret |= uprobe_perf_func(tu, regs, &ucb); #endif uprobe_buffer_put(ucb); @@ -1562,6 +1564,7 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, struct trace_uprobe *tu; struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb = NULL; + unsigned int flags; tu = container_of(con, struct trace_uprobe, consumer); @@ -1573,11 +1576,12 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con, if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; - if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) + flags = trace_probe_load_flag(&tu->tp); + if (flags & TP_FLAG_TRACE) uretprobe_trace_func(tu, func, regs, &ucb); #ifdef CONFIG_PERF_EVENTS - if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) + if (flags & TP_FLAG_PROFILE) uretprobe_perf_func(tu, func, regs, &ucb); #endif uprobe_buffer_put(ucb); |