Skip to content

Commit a8ad873

Browse files
etsalhtejun
authored andcommitted
sched_ext: defer queue_balance_callback() until after ops.dispatch
The sched_ext code calls queue_balance_callback() during enqueue_task() to defer operations that drop multiple locks until we can unpin them. The call assumes that the rq lock is held until the callbacks are invoked, and the pending callbacks will not be visible to any other threads. This is enforced by a WARN_ON_ONCE() in rq_pin_lock(). However, balance_one() may actually drop the lock during a BPF dispatch call. Another thread may win the race to get the rq lock and see the pending callback. To avoid this, sched_ext must only queue the callback after the dispatch calls have completed. CPU 0 CPU 1 CPU 2 scx_balance() rq_unpin_lock() scx_balance_one() |= IN_BALANCE scx_enqueue() ops.dispatch() rq_unlock() rq_lock() queue_balance_callback() rq_unlock() [WARN] rq_pin_lock() rq_lock() &= ~IN_BALANCE rq_repin_lock() Changelog v2-> v1 (https://lore.kernel.org/sched-ext/aOgOxtHCeyRT_7jn@gpd4) - Fixed explanation in patch description (Andrea) - Fixed scx_rq mask state updates (Andrea) - Added Reviewed-by tag from Andrea Reported-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Emil Tsalapatis (Meta) <emil@etsalapatis.com> Reviewed-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent efeeaac commit a8ad873

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

kernel/sched/ext.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -780,13 +780,23 @@ static void schedule_deferred(struct rq *rq)
780780
if (rq->scx.flags & SCX_RQ_IN_WAKEUP)
781781
return;
782782

783+
/* Don't do anything if there already is a deferred operation. */
784+
if (rq->scx.flags & SCX_RQ_BAL_PENDING)
785+
return;
786+
783787
/*
784788
* If in balance, the balance callbacks will be called before rq lock is
785789
* released. Schedule one.
790+
*
791+
*
792+
* We can't directly insert the callback into the
793+
* rq's list: The call can drop its lock and make the pending balance
794+
* callback visible to unrelated code paths that call rq_pin_lock().
795+
*
796+
* Just let balance_one() know that it must do it itself.
786797
*/
787798
if (rq->scx.flags & SCX_RQ_IN_BALANCE) {
788-
queue_balance_callback(rq, &rq->scx.deferred_bal_cb,
789-
deferred_bal_cb_workfn);
799+
rq->scx.flags |= SCX_RQ_BAL_CB_PENDING;
790800
return;
791801
}
792802

@@ -2003,6 +2013,19 @@ static void flush_dispatch_buf(struct scx_sched *sch, struct rq *rq)
20032013
dspc->cursor = 0;
20042014
}
20052015

2016+
static inline void maybe_queue_balance_callback(struct rq *rq)
2017+
{
2018+
lockdep_assert_rq_held(rq);
2019+
2020+
if (!(rq->scx.flags & SCX_RQ_BAL_CB_PENDING))
2021+
return;
2022+
2023+
queue_balance_callback(rq, &rq->scx.deferred_bal_cb,
2024+
deferred_bal_cb_workfn);
2025+
2026+
rq->scx.flags &= ~SCX_RQ_BAL_CB_PENDING;
2027+
}
2028+
20062029
static int balance_one(struct rq *rq, struct task_struct *prev)
20072030
{
20082031
struct scx_sched *sch = scx_root;
@@ -2150,6 +2173,8 @@ static int balance_scx(struct rq *rq, struct task_struct *prev,
21502173
#endif
21512174
rq_repin_lock(rq, rf);
21522175

2176+
maybe_queue_balance_callback(rq);
2177+
21532178
return ret;
21542179
}
21552180

kernel/sched/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,7 @@ enum scx_rq_flags {
784784
SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */
785785
SCX_RQ_BYPASSING = 1 << 4,
786786
SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */
787+
SCX_RQ_BAL_CB_PENDING = 1 << 6, /* must queue a cb after dispatching */
787788

788789
SCX_RQ_IN_WAKEUP = 1 << 16,
789790
SCX_RQ_IN_BALANCE = 1 << 17,

0 commit comments

Comments
 (0)