summaryrefslogtreecommitdiff
path: root/kernel/sched/ext.h
diff options
context:
space:
mode:
authorDavid Vernet <dvernet@meta.com>2024-06-18 10:09:18 -1000
committerTejun Heo <tj@kernel.org>2024-06-18 10:09:18 -1000
commit8a010b81b3a50b033fc3cddc613517abda586cbe (patch)
tree318c5aa2b40d5fdbe3dec286629c181158d5587c /kernel/sched/ext.h
parent79e104400fc3b94a2d04c65d222a0726b3ae2382 (diff)
sched_ext: Implement runnable task stall watchdog
The most common and critical way that a BPF scheduler can misbehave is by failing to run runnable tasks for too long. This patch implements a watchdog. * All tasks record when they become runnable. * A watchdog work periodically scans all runnable tasks. If any task has stayed runnable for too long, the BPF scheduler is aborted. * scheduler_tick() monitors whether the watchdog itself is stuck. If so, the BPF scheduler is aborted. Because the watchdog only scans the tasks which are currently runnable and usually very infrequently, the overhead should be negligible. scx_qmap is updated so that it can be told to stall user and/or kernel tasks. A detected task stall looks like the following: sched_ext: BPF scheduler "qmap" errored, disabling sched_ext: runnable task stall (dbus-daemon[953] failed to run for 6.478s) scx_check_timeout_workfn+0x10e/0x1b0 process_one_work+0x287/0x560 worker_thread+0x234/0x420 kthread+0xe9/0x100 ret_from_fork+0x1f/0x30 A detected watchdog stall: sched_ext: BPF scheduler "qmap" errored, disabling sched_ext: runnable task stall (watchdog failed to check in for 5.001s) scheduler_tick+0x2eb/0x340 update_process_times+0x7a/0x90 tick_sched_timer+0xd8/0x130 __hrtimer_run_queues+0x178/0x3b0 hrtimer_interrupt+0xfc/0x390 __sysvec_apic_timer_interrupt+0xb7/0x2b0 sysvec_apic_timer_interrupt+0x90/0xb0 asm_sysvec_apic_timer_interrupt+0x1b/0x20 default_idle+0x14/0x20 arch_cpu_idle+0xf/0x20 default_idle_call+0x50/0x90 do_idle+0xe8/0x240 cpu_startup_entry+0x1d/0x20 kernel_init+0x0/0x190 start_kernel+0x0/0x392 start_kernel+0x324/0x392 x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x104/0x109 secondary_startup_64_no_verify+0xce/0xdb Note that this patch exposes scx_ops_error[_type]() in kernel/sched/ext.h to inline scx_notify_sched_tick(). v4: - While disabling, cancel_delayed_work_sync(&scx_watchdog_work) was being called before forward progress was guaranteed and thus could lead to system lockup. Relocated. - While enabling, it was comparing msecs against jiffies without conversion leading to spurious load failures on lower HZ kernels. Fixed. - runnable list management is now used by core bypass logic and moved to the patch implementing sched_ext core. v3: - bpf_scx_init_member() was incorrectly comparing ops->timeout_ms against SCX_WATCHDOG_MAX_TIMEOUT which is in jiffies without conversion leading to spurious load failures in lower HZ kernels. Fixed. v2: - Julia Lawall noticed that the watchdog code was mixing msecs and jiffies. Fix by using jiffies for everything. Signed-off-by: David Vernet <dvernet@meta.com> Reviewed-by: Tejun Heo <tj@kernel.org> Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Josh Don <joshdon@google.com> Acked-by: Hao Luo <haoluo@google.com> Acked-by: Barret Rhoden <brho@google.com> Cc: Julia Lawall <julia.lawall@inria.fr>
Diffstat (limited to 'kernel/sched/ext.h')
-rw-r--r--kernel/sched/ext.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 9c5a2d928281..56fcdb0b2c05 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -29,6 +29,7 @@ static inline bool task_on_scx(const struct task_struct *p)
return scx_enabled() && p->sched_class == &ext_sched_class;
}
+void scx_tick(struct rq *rq);
void init_scx_entity(struct sched_ext_entity *scx);
void scx_pre_fork(struct task_struct *p);
int scx_fork(struct task_struct *p);
@@ -66,6 +67,7 @@ static inline const struct sched_class *next_active_class(const struct sched_cla
#define scx_enabled() false
#define scx_switched_all() false
+static inline void scx_tick(struct rq *rq) {}
static inline void scx_pre_fork(struct task_struct *p) {}
static inline int scx_fork(struct task_struct *p) { return 0; }
static inline void scx_post_fork(struct task_struct *p) {}