summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/futex/requeue.c6
-rw-r--r--kernel/sched/deadline.c35
-rw-r--r--kernel/sched/fair.c7
-rw-r--r--kernel/sched/sched.h37
-rw-r--r--kernel/trace/fgraph.c12
-rw-r--r--kernel/trace/trace_osnoise.c3
7 files changed, 54 insertions, 48 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index c4ada32598bd..6ca8689a83b5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2295,7 +2295,7 @@ __latent_entropy struct task_struct *copy_process(
if (need_futex_hash_allocate_default(clone_flags)) {
retval = futex_hash_allocate_default();
if (retval)
- goto bad_fork_core_free;
+ goto bad_fork_cancel_cgroup;
/*
* If we fail beyond this point we don't free the allocated
* futex hash map. We assume that another thread will be created
diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
index c716a66f8692..d818b4d47f1b 100644
--- a/kernel/futex/requeue.c
+++ b/kernel/futex/requeue.c
@@ -230,8 +230,9 @@ static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
- q->key = *key;
+ struct task_struct *task;
+ q->key = *key;
__futex_unqueue(q);
WARN_ON(!q->rt_waiter);
@@ -243,10 +244,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
futex_hash_get(hb);
q->drop_hb_ref = true;
q->lock_ptr = &hb->lock;
+ task = READ_ONCE(q->task);
/* Signal locked state to the waiter */
futex_requeue_pi_complete(q, 1);
- wake_up_state(q->task, TASK_NORMAL);
+ wake_up_state(task, TASK_NORMAL);
}
/**
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index f25301267e47..72c1f72463c7 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -875,7 +875,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
*/
if (dl_se->dl_defer && !dl_se->dl_defer_running &&
dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
- if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {
+ if (!is_dl_boosted(dl_se)) {
/*
* Set dl_se->dl_defer_armed and dl_throttled variables to
@@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
-static bool dl_server_stopped(struct sched_dl_entity *dl_se);
-
static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
{
struct rq *rq = rq_of_dl_se(dl_se);
@@ -1171,12 +1169,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
if (!dl_se->dl_runtime)
return HRTIMER_NORESTART;
- if (!dl_se->server_has_tasks(dl_se)) {
- replenish_dl_entity(dl_se);
- dl_server_stopped(dl_se);
- return HRTIMER_NORESTART;
- }
-
if (dl_se->dl_defer_armed) {
/*
* First check if the server could consume runtime in background.
@@ -1579,10 +1571,8 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
{
/* 0 runtime = fair server disabled */
- if (dl_se->dl_runtime) {
- dl_se->dl_server_idle = 0;
+ if (dl_se->dl_runtime)
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
- }
}
void dl_server_start(struct sched_dl_entity *dl_se)
@@ -1610,26 +1600,10 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
dl_se->dl_server_active = 0;
}
-static bool dl_server_stopped(struct sched_dl_entity *dl_se)
-{
- if (!dl_se->dl_server_active)
- return true;
-
- if (dl_se->dl_server_idle) {
- dl_server_stop(dl_se);
- return true;
- }
-
- dl_se->dl_server_idle = 1;
- return false;
-}
-
void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
- dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_task)
{
dl_se->rq = rq;
- dl_se->server_has_tasks = has_tasks;
dl_se->server_pick_task = pick_task;
}
@@ -2394,10 +2368,7 @@ again:
if (dl_server(dl_se)) {
p = dl_se->server_pick_task(dl_se);
if (!p) {
- if (!dl_server_stopped(dl_se)) {
- dl_se->dl_yielded = 1;
- update_curr_dl_se(rq, dl_se, 0);
- }
+ dl_server_stop(dl_se);
goto again;
}
rq->dl_server = dl_se;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b173a059315c..8ce56a8d507f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8859,11 +8859,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru
return pick_next_task_fair(rq, prev, NULL);
}
-static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
-{
- return !!dl_se->rq->cfs.nr_queued;
-}
-
static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se)
{
return pick_task_fair(dl_se->rq);
@@ -8875,7 +8870,7 @@ void fair_server_init(struct rq *rq)
init_dl_entity(dl_se);
- dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task);
+ dl_server_init(dl_se, rq, fair_server_pick_task);
}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index be9745d104f7..cf2109b67f9a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -365,25 +365,50 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
*
* dl_se::rq -- runqueue we belong to.
*
- * dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
- * server when it runs out of tasks to run.
- *
* dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
* returns NULL.
*
* dl_server_update() -- called from update_curr_common(), propagates runtime
* to the server.
*
- * dl_server_start()
- * dl_server_stop() -- start/stop the server when it has (no) tasks.
+ * dl_server_start() -- start the server when it has tasks; it will stop
+ * automatically when there are no more tasks, per
+ * dl_se::server_pick() returning NULL.
+ *
+ * dl_server_stop() -- (force) stop the server; use when updating
+ * parameters.
*
* dl_server_init() -- initializes the server.
+ *
+ * When started the dl_server will (per dl_defer) schedule a timer for its
+ * zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a
+ * server will run at the very end of its period.
+ *
+ * This is done such that any runtime from the target class can be accounted
+ * against the server -- through dl_server_update() above -- such that when it
+ * becomes time to run, it might already be out of runtime and get deferred
+ * until the next period. In this case dl_server_timer() will alternate
+ * between defer and replenish but never actually enqueue the server.
+ *
+ * Only when the target class does not manage to exhaust the server's runtime
+ * (there's actualy starvation in the given period), will the dl_server get on
+ * the runqueue. Once queued it will pick tasks from the target class and run
+ * them until either its runtime is exhaused, at which point its back to
+ * dl_server_timer, or until there are no more tasks to run, at which point
+ * the dl_server stops itself.
+ *
+ * By stopping at this point the dl_server retains bandwidth, which, if a new
+ * task wakes up imminently (starting the server again), can be used --
+ * subject to CBS wakeup rules -- without having to wait for the next period.
+ *
+ * Additionally, because of the dl_defer behaviour the start/stop behaviour is
+ * naturally thottled to once per period, avoiding high context switch
+ * workloads from spamming the hrtimer program/cancel paths.
*/
extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_start(struct sched_dl_entity *dl_se);
extern void dl_server_stop(struct sched_dl_entity *dl_se);
extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
- dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_task);
extern void sched_init_dl_servers(void);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 1e3b32b1e82c..484ad7a18463 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -815,6 +815,7 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe
unsigned long bitmap;
unsigned long ret;
int offset;
+ int bit;
int i;
ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset);
@@ -829,6 +830,15 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe
if (fregs)
ftrace_regs_set_instruction_pointer(fregs, ret);
+ bit = ftrace_test_recursion_trylock(trace.func, ret);
+ /*
+ * This can fail because ftrace_test_recursion_trylock() allows one nest
+ * call. If we are already in a nested call, then we don't probe this and
+ * just return the original return address.
+ */
+ if (unlikely(bit < 0))
+ goto out;
+
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
trace.retval = ftrace_regs_get_return_value(fregs);
#endif
@@ -852,6 +862,8 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe
}
}
+ ftrace_test_recursion_unlock(bit);
+out:
/*
* The ftrace_graph_return() may still access the current
* ret_stack structure, we need to make sure the update of
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 337bc0eb5d71..dc734867f0fc 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2325,12 +2325,13 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
if (count < 1)
return 0;
- buf = kmalloc(count, GFP_KERNEL);
+ buf = kmalloc(count + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
if (copy_from_user(buf, ubuf, count))
return -EFAULT;
+ buf[count] = '\0';
if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
return -ENOMEM;