Skip to content

Commit 305b1a3

Browse files
Harshit Agarwalgregkh
authored andcommitted
sched/deadline: Fix race in push_dl_task()
commit 8fd5485 upstream. When a CPU chooses to call push_dl_task and picks a task to push to another CPU's runqueue then it will call find_lock_later_rq method which would take a double lock on both CPUs' runqueues. If one of the locks aren't readily available, it may lead to dropping the current runqueue lock and reacquiring both the locks at once. During this window it is possible that the task is already migrated and is running on some other CPU. These cases are already handled. However, if the task is migrated and has already been executed and another CPU is now trying to wake it up (ttwu) such that it is queued again on the runqeue (on_rq is 1) and also if the task was run by the same CPU, then the current checks will pass even though the task was migrated out and is no longer in the pushable tasks list. Please go through the original rt change for more details on the issue. To fix this, after the lock is obtained inside the find_lock_later_rq, it ensures that the task is still at the head of pushable tasks list. Also removed some checks that are no longer needed with the addition of this new check. However, the new check of pushable tasks list only applies when find_lock_later_rq is called by push_dl_task. For the other caller i.e. dl_task_offline_migration, existing checks are used. Signed-off-by: Harshit Agarwal <harshit@nutanix.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Juri Lelli <juri.lelli@redhat.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250408045021.3283624-1-harshit@nutanix.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent b9cc715 commit 305b1a3

File tree

1 file changed

+49
-24
lines changed

1 file changed

+49
-24
lines changed

kernel/sched/deadline.c

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2617,6 +2617,25 @@ static int find_later_rq(struct task_struct *task)
26172617
return -1;
26182618
}
26192619

2620+
static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
2621+
{
2622+
struct task_struct *p;
2623+
2624+
if (!has_pushable_dl_tasks(rq))
2625+
return NULL;
2626+
2627+
p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
2628+
2629+
WARN_ON_ONCE(rq->cpu != task_cpu(p));
2630+
WARN_ON_ONCE(task_current(rq, p));
2631+
WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
2632+
2633+
WARN_ON_ONCE(!task_on_rq_queued(p));
2634+
WARN_ON_ONCE(!dl_task(p));
2635+
2636+
return p;
2637+
}
2638+
26202639
/* Locks the rq it finds */
26212640
static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
26222641
{
@@ -2644,12 +2663,37 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
26442663

26452664
/* Retry if something changed. */
26462665
if (double_lock_balance(rq, later_rq)) {
2647-
if (unlikely(task_rq(task) != rq ||
2666+
/*
2667+
* double_lock_balance had to release rq->lock, in the
2668+
* meantime, task may no longer be fit to be migrated.
2669+
* Check the following to ensure that the task is
2670+
* still suitable for migration:
2671+
* 1. It is possible the task was scheduled,
2672+
* migrate_disabled was set and then got preempted,
2673+
* so we must check the task migration disable
2674+
* flag.
2675+
* 2. The CPU picked is in the task's affinity.
2676+
* 3. For throttled task (dl_task_offline_migration),
2677+
* check the following:
2678+
* - the task is not on the rq anymore (it was
2679+
* migrated)
2680+
* - the task is not on CPU anymore
2681+
* - the task is still a dl task
2682+
* - the task is not queued on the rq anymore
2683+
* 4. For the non-throttled task (push_dl_task), the
2684+
* check to ensure that this task is still at the
2685+
* head of the pushable tasks list is enough.
2686+
*/
2687+
if (unlikely(is_migration_disabled(task) ||
26482688
!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
2649-
task_on_cpu(rq, task) ||
2650-
!dl_task(task) ||
2651-
is_migration_disabled(task) ||
2652-
!task_on_rq_queued(task))) {
2689+
(task->dl.dl_throttled &&
2690+
(task_rq(task) != rq ||
2691+
task_on_cpu(rq, task) ||
2692+
!dl_task(task) ||
2693+
!task_on_rq_queued(task))) ||
2694+
(!task->dl.dl_throttled &&
2695+
task != pick_next_pushable_dl_task(rq)))) {
2696+
26532697
double_unlock_balance(rq, later_rq);
26542698
later_rq = NULL;
26552699
break;
@@ -2672,25 +2716,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
26722716
return later_rq;
26732717
}
26742718

2675-
static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
2676-
{
2677-
struct task_struct *p;
2678-
2679-
if (!has_pushable_dl_tasks(rq))
2680-
return NULL;
2681-
2682-
p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
2683-
2684-
WARN_ON_ONCE(rq->cpu != task_cpu(p));
2685-
WARN_ON_ONCE(task_current(rq, p));
2686-
WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
2687-
2688-
WARN_ON_ONCE(!task_on_rq_queued(p));
2689-
WARN_ON_ONCE(!dl_task(p));
2690-
2691-
return p;
2692-
}
2693-
26942719
/*
26952720
* See if the non running -deadline tasks on this rq
26962721
* can be sent to some other CPU where they can preempt

0 commit comments

Comments
 (0)