diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/core.c | 5 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 6 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/futex/requeue.c | 6 | ||||
-rw-r--r-- | kernel/sched/ext_idle.c | 52 | ||||
-rw-r--r-- | kernel/sched/ext_idle.h | 7 | ||||
-rw-r--r-- | kernel/trace/fgraph.c | 12 | ||||
-rw-r--r-- | kernel/trace/fprobe.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace_dynevent.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_osnoise.c | 3 | ||||
-rw-r--r-- | kernel/vhost_task.c | 3 |
11 files changed, 82 insertions, 25 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 829f0792d8d8..17e5cf18da1e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -3013,7 +3013,10 @@ EXPORT_SYMBOL_GPL(bpf_event_output); /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { - .func = NULL, + /* func is unused for tail_call, we set it to pass the + * get_helper_proto check + */ + .func = BPF_PTR_POISON, .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_CTX, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4fd89659750b..a6338936085a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8405,6 +8405,10 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, verifier_bug(env, "Two map pointers in a timer helper"); return -EFAULT; } + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n"); + return -EOPNOTSUPP; + } meta->map_uid = reg->map_uid; meta->map_ptr = map; return 0; @@ -11206,7 +11210,7 @@ static int get_helper_proto(struct bpf_verifier_env *env, int func_id, return -EINVAL; *ptr = env->ops->get_func_proto(func_id, env->prog); - return *ptr ? 0 : -EINVAL; + return *ptr && (*ptr)->func ? 0 : -EINVAL; } static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..0cbc174da76a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2289,7 +2289,7 @@ __latent_entropy struct task_struct *copy_process( if (need_futex_hash_allocate_default(clone_flags)) { retval = futex_hash_allocate_default(); if (retval) - goto bad_fork_core_free; + goto bad_fork_cancel_cgroup; /* * If we fail beyond this point we don't free the allocated * futex hash map. We assume that another thread will be created diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index c716a66f8692..d818b4d47f1b 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -230,8 +230,9 @@ static inline void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, struct futex_hash_bucket *hb) { - q->key = *key; + struct task_struct *task; + q->key = *key; __futex_unqueue(q); WARN_ON(!q->rt_waiter); @@ -243,10 +244,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, futex_hash_get(hb); q->drop_hb_ref = true; q->lock_ptr = &hb->lock; + task = READ_ONCE(q->task); /* Signal locked state to the waiter */ futex_requeue_pi_complete(q, 1); - wake_up_state(q->task, TASK_NORMAL); + wake_up_state(task, TASK_NORMAL); } /** diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 001fb88a8481..edd6cdd9aadc 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -75,7 +75,7 @@ static int scx_cpu_node_if_enabled(int cpu) return cpu_to_node(cpu); } -bool scx_idle_test_and_clear_cpu(int cpu) +static bool scx_idle_test_and_clear_cpu(int cpu) { int node = scx_cpu_node_if_enabled(cpu); struct cpumask *idle_cpus = idle_cpumask(node)->cpu; @@ -198,7 +198,7 @@ pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u6 /* * Find an idle CPU in the system, starting from @node. */ -s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags) +static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags) { s32 cpu; @@ -794,6 +794,16 @@ static void reset_idle_masks(struct sched_ext_ops *ops) cpumask_and(idle_cpumask(node)->smt, cpu_online_mask, node_mask); } } +#else /* !CONFIG_SMP */ +static bool scx_idle_test_and_clear_cpu(int cpu) +{ + return -EBUSY; +} + +static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags) +{ + return -EBUSY; +} #endif /* CONFIG_SMP */ void scx_idle_enable(struct sched_ext_ops *ops) @@ -860,8 +870,34 @@ static bool check_builtin_idle_enabled(void) return false; } -s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags, - const struct cpumask *allowed, u64 flags) +/* + * Determine whether @p is a migration-disabled task in the context of BPF + * code. + * + * We can't simply check whether @p->migration_disabled is set in a + * sched_ext callback, because migration is always disabled for the current + * task while running BPF code. + * + * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) respectively + * disable and re-enable migration. For this reason, the current task + * inside a sched_ext callback is always a migration-disabled task. + * + * Therefore, when @p->migration_disabled == 1, check whether @p is the + * current task or not: if it is, then migration was not disabled before + * entering the callback, otherwise migration was disabled. + * + * Returns true if @p is migration-disabled, false otherwise. + */ +static bool is_bpf_migration_disabled(const struct task_struct *p) +{ + if (p->migration_disabled == 1) + return p != current; + else + return p->migration_disabled; +} + +static s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags, + const struct cpumask *allowed, u64 flags) { struct rq *rq; struct rq_flags rf; @@ -903,7 +939,7 @@ s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags, * selection optimizations and simply check whether the previously * used CPU is idle and within the allowed cpumask. */ - if (p->nr_cpus_allowed == 1 || is_migration_disabled(p)) { + if (p->nr_cpus_allowed == 1 || is_bpf_migration_disabled(p)) { if (cpumask_test_cpu(prev_cpu, allowed ?: p->cpus_ptr) && scx_idle_test_and_clear_cpu(prev_cpu)) cpu = prev_cpu; @@ -1125,10 +1161,10 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) if (!check_builtin_idle_enabled()) return false; - if (kf_cpu_valid(cpu, NULL)) - return scx_idle_test_and_clear_cpu(cpu); - else + if (!kf_cpu_valid(cpu, NULL)) return false; + + return scx_idle_test_and_clear_cpu(cpu); } /** diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h index 37be78a7502b..05e389ed72e4 100644 --- a/kernel/sched/ext_idle.h +++ b/kernel/sched/ext_idle.h @@ -15,16 +15,9 @@ struct sched_ext_ops; #ifdef CONFIG_SMP void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops); void scx_idle_init_masks(void); -bool scx_idle_test_and_clear_cpu(int cpu); -s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags); #else /* !CONFIG_SMP */ static inline void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops) {} static inline void scx_idle_init_masks(void) {} -static inline bool scx_idle_test_and_clear_cpu(int cpu) { return false; } -static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags) -{ - return -EBUSY; -} #endif /* CONFIG_SMP */ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index db40ec5cc9d7..9fb1a8c107ec 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -815,6 +815,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe unsigned long bitmap; unsigned long ret; int offset; + int bit; int i; ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset); @@ -829,6 +830,15 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe if (fregs) ftrace_regs_set_instruction_pointer(fregs, ret); + bit = ftrace_test_recursion_trylock(trace.func, ret); + /* + * This can fail because ftrace_test_recursion_trylock() allows one nest + * call. If we are already in a nested call, then we don't probe this and + * just return the original return address. + */ + if (unlikely(bit < 0)) + goto out; + #ifdef CONFIG_FUNCTION_GRAPH_RETVAL trace.retval = ftrace_regs_get_return_value(fregs); #endif @@ -852,6 +862,8 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe } } + ftrace_test_recursion_unlock(bit); +out: /* * The ftrace_graph_return() may still access the current * ret_stack structure, we need to make sure the update of diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index f9b3aa9afb17..342e84f8a40e 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -428,8 +428,9 @@ static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long ad { unsigned long *addrs; - if (alist->index >= alist->size) - return -ENOMEM; + /* Previously we failed to expand the list. */ + if (alist->index == alist->size) + return -ENOSPC; alist->addrs[alist->index++] = addr; if (alist->index < alist->size) @@ -489,7 +490,7 @@ static int fprobe_module_callback(struct notifier_block *nb, for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++) fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist); - if (alist.index < alist.size && alist.index > 0) + if (alist.index > 0) ftrace_set_filter_ips(&fprobe_graph_ops.ops, alist.addrs, alist.index, 1, 0); mutex_unlock(&fprobe_mutex); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 5d64a18cacac..d06854bd32b3 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -230,6 +230,10 @@ static int dyn_event_open(struct inode *inode, struct file *file) { int ret; + ret = security_locked_down(LOCKDOWN_TRACEFS); + if (ret) + return ret; + ret = tracing_check_open_get_tr(NULL); if (ret) return ret; diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 337bc0eb5d71..dc734867f0fc 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -2325,12 +2325,13 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, if (count < 1) return 0; - buf = kmalloc(count, GFP_KERNEL); + buf = kmalloc(count + 1, GFP_KERNEL); if (!buf) return -ENOMEM; if (copy_from_user(buf, ubuf, count)) return -EFAULT; + buf[count] = '\0'; if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) return -ENOMEM; diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 2f844c279a3e..7f24ccc896c6 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -100,6 +100,7 @@ void vhost_task_stop(struct vhost_task *vtsk) * freeing it below. */ wait_for_completion(&vtsk->exited); + put_task_struct(vtsk->task); kfree(vtsk); } EXPORT_SYMBOL_GPL(vhost_task_stop); @@ -148,7 +149,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), return ERR_PTR(PTR_ERR(tsk)); } - vtsk->task = tsk; + vtsk->task = get_task_struct(tsk); return vtsk; } EXPORT_SYMBOL_GPL(vhost_task_create); |