Skip to content

Commit 083fc6d

Browse files
committed
Merge tag 'sched-urgent-2025-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Fix two dl_server regressions: a race that can end up leaving the dl_server stuck, and a dl_server throttling bug causing lag to fair tasks" * tag 'sched-urgent-2025-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/deadline: Fix dl_server behaviour sched/deadline: Fix dl_server getting stuck
2 parents 2cea0ed + a3a70ca commit 083fc6d

File tree

4 files changed

+35
-46
lines changed

4 files changed

+35
-46
lines changed

include/linux/sched.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,6 @@ struct sched_dl_entity {
706706
unsigned int dl_defer : 1;
707707
unsigned int dl_defer_armed : 1;
708708
unsigned int dl_defer_running : 1;
709-
unsigned int dl_server_idle : 1;
710709

711710
/*
712711
* Bandwidth enforcement timer. Each -deadline task has its
@@ -733,7 +732,6 @@ struct sched_dl_entity {
733732
* runnable task.
734733
*/
735734
struct rq *rq;
736-
dl_server_has_tasks_f server_has_tasks;
737735
dl_server_pick_f server_pick_task;
738736

739737
#ifdef CONFIG_RT_MUTEXES

kernel/sched/deadline.c

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
875875
*/
876876
if (dl_se->dl_defer && !dl_se->dl_defer_running &&
877877
dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
878-
if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {
878+
if (!is_dl_boosted(dl_se)) {
879879

880880
/*
881881
* Set dl_se->dl_defer_armed and dl_throttled variables to
@@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
11521152
/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
11531153
static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
11541154

1155-
static bool dl_server_stopped(struct sched_dl_entity *dl_se);
1156-
11571155
static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
11581156
{
11591157
struct rq *rq = rq_of_dl_se(dl_se);
@@ -1171,12 +1169,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
11711169
if (!dl_se->dl_runtime)
11721170
return HRTIMER_NORESTART;
11731171

1174-
if (!dl_se->server_has_tasks(dl_se)) {
1175-
replenish_dl_entity(dl_se);
1176-
dl_server_stopped(dl_se);
1177-
return HRTIMER_NORESTART;
1178-
}
1179-
11801172
if (dl_se->dl_defer_armed) {
11811173
/*
11821174
* First check if the server could consume runtime in background.
@@ -1579,10 +1571,8 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
15791571
void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
15801572
{
15811573
/* 0 runtime = fair server disabled */
1582-
if (dl_se->dl_runtime) {
1583-
dl_se->dl_server_idle = 0;
1574+
if (dl_se->dl_runtime)
15841575
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
1585-
}
15861576
}
15871577

15881578
void dl_server_start(struct sched_dl_entity *dl_se)
@@ -1610,26 +1600,10 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
16101600
dl_se->dl_server_active = 0;
16111601
}
16121602

1613-
static bool dl_server_stopped(struct sched_dl_entity *dl_se)
1614-
{
1615-
if (!dl_se->dl_server_active)
1616-
return true;
1617-
1618-
if (dl_se->dl_server_idle) {
1619-
dl_server_stop(dl_se);
1620-
return true;
1621-
}
1622-
1623-
dl_se->dl_server_idle = 1;
1624-
return false;
1625-
}
1626-
16271603
void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
1628-
dl_server_has_tasks_f has_tasks,
16291604
dl_server_pick_f pick_task)
16301605
{
16311606
dl_se->rq = rq;
1632-
dl_se->server_has_tasks = has_tasks;
16331607
dl_se->server_pick_task = pick_task;
16341608
}
16351609

@@ -2394,10 +2368,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
23942368
if (dl_server(dl_se)) {
23952369
p = dl_se->server_pick_task(dl_se);
23962370
if (!p) {
2397-
if (!dl_server_stopped(dl_se)) {
2398-
dl_se->dl_yielded = 1;
2399-
update_curr_dl_se(rq, dl_se, 0);
2400-
}
2371+
dl_server_stop(dl_se);
24012372
goto again;
24022373
}
24032374
rq->dl_server = dl_se;

kernel/sched/fair.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8859,11 +8859,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru
88598859
return pick_next_task_fair(rq, prev, NULL);
88608860
}
88618861

8862-
static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
8863-
{
8864-
return !!dl_se->rq->cfs.nr_queued;
8865-
}
8866-
88678862
static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se)
88688863
{
88698864
return pick_task_fair(dl_se->rq);
@@ -8875,7 +8870,7 @@ void fair_server_init(struct rq *rq)
88758870

88768871
init_dl_entity(dl_se);
88778872

8878-
dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task);
8873+
dl_server_init(dl_se, rq, fair_server_pick_task);
88798874
}
88808875

88818876
/*

kernel/sched/sched.h

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -365,25 +365,50 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
365365
*
366366
* dl_se::rq -- runqueue we belong to.
367367
*
368-
* dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
369-
* server when it runs out of tasks to run.
370-
*
371368
* dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
372369
* returns NULL.
373370
*
374371
* dl_server_update() -- called from update_curr_common(), propagates runtime
375372
* to the server.
376373
*
377-
* dl_server_start()
378-
* dl_server_stop() -- start/stop the server when it has (no) tasks.
374+
* dl_server_start() -- start the server when it has tasks; it will stop
375+
* automatically when there are no more tasks, per
376+
* dl_se::server_pick() returning NULL.
377+
*
378+
* dl_server_stop() -- (force) stop the server; use when updating
379+
* parameters.
379380
*
380381
* dl_server_init() -- initializes the server.
382+
*
383+
* When started the dl_server will (per dl_defer) schedule a timer for its
384+
* zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a
385+
* server will run at the very end of its period.
386+
*
387+
* This is done such that any runtime from the target class can be accounted
388+
* against the server -- through dl_server_update() above -- such that when it
389+
* becomes time to run, it might already be out of runtime and get deferred
390+
* until the next period. In this case dl_server_timer() will alternate
391+
* between defer and replenish but never actually enqueue the server.
392+
*
393+
* Only when the target class does not manage to exhaust the server's runtime
394+
* (there's actualy starvation in the given period), will the dl_server get on
395+
* the runqueue. Once queued it will pick tasks from the target class and run
396+
* them until either its runtime is exhaused, at which point its back to
397+
* dl_server_timer, or until there are no more tasks to run, at which point
398+
* the dl_server stops itself.
399+
*
400+
* By stopping at this point the dl_server retains bandwidth, which, if a new
401+
* task wakes up imminently (starting the server again), can be used --
402+
* subject to CBS wakeup rules -- without having to wait for the next period.
403+
*
404+
* Additionally, because of the dl_defer behaviour the start/stop behaviour is
405+
* naturally thottled to once per period, avoiding high context switch
406+
* workloads from spamming the hrtimer program/cancel paths.
381407
*/
382408
extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
383409
extern void dl_server_start(struct sched_dl_entity *dl_se);
384410
extern void dl_server_stop(struct sched_dl_entity *dl_se);
385411
extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
386-
dl_server_has_tasks_f has_tasks,
387412
dl_server_pick_f pick_task);
388413
extern void sched_init_dl_servers(void);
389414

0 commit comments

Comments
 (0)