summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/sched/ext.c17
1 files changed, 16 insertions, 1 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index bba9d805dc2b8..bd8cb37b4b2eb 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2909,9 +2909,24 @@ static struct task_struct *pick_task_scx(struct rq *rq)
* If balance_scx() is telling us to keep running @prev, replenish slice
* if necessary and keep running @prev. Otherwise, pop the first one
* from the local DSQ.
+ *
+ * WORKAROUND:
+ *
+ * %SCX_RQ_BAL_KEEP should be set iff $prev is on SCX as it must just
+ * have gone through balance_scx(). Unfortunately, there currently is a
+ * bug where fair could say yes on balance() but no on pick_task(),
+ * which then ends up calling pick_task_scx() without preceding
+ * balance_scx().
+ *
+ * For now, ignore cases where $prev is not on SCX. This isn't great and
+ * can theoretically lead to stalls. However, for switch_all cases, this
+ * happens only while a BPF scheduler is being loaded or unloaded, and,
+ * for partial cases, fair will likely keep triggering this CPU.
+ *
+ * Once fair is fixed, restore WARN_ON_ONCE().
*/
if ((rq->scx.flags & SCX_RQ_BAL_KEEP) &&
- !WARN_ON_ONCE(prev->sched_class != &ext_sched_class)) {
+ prev->sched_class == &ext_sched_class) {
p = prev;
if (!p->scx.slice)
p->scx.slice = SCX_SLICE_DFL;