Skip to content

Commit 6840458

Browse files
committed
Merge: Sched: late fixes for 9.7
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7023 JIRA: https://issues.redhat.com/browse/RHEL-96250 A few fixes have come in upstream (as of about v6.15) that are needed in rhel9. We'll take them now to prevent hitting these issues later. Signed-off-by: Phil Auld <pauld@redhat.com> Approved-by: Herton R. Krzesinski <herton@redhat.com> Approved-by: Aristeu Rozanski <arozansk@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Augusto Caringi <acaringi@redhat.com>
2 parents 0517fa9 + 3834336 commit 6840458

File tree

4 files changed

+57
-28
lines changed

4 files changed

+57
-28
lines changed

include/linux/sched.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1725,8 +1725,9 @@ static inline unsigned int __task_state_index(unsigned int tsk_state,
17251725
* We're lying here, but rather than expose a completely new task state
17261726
* to userspace, we can make this appear as if the task has gone through
17271727
* a regular rt_mutex_lock() call.
1728+
* Report frozen tasks as uninterruptible.
17281729
*/
1729-
if (tsk_state & TASK_RTLOCK_WAIT)
1730+
if ((tsk_state & TASK_RTLOCK_WAIT) || (tsk_state & TASK_FROZEN))
17301731
state = TASK_UNINTERRUPTIBLE;
17311732

17321733
return fls(state);

include/linux/sched/mm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,13 @@ enum {
466466

467467
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
468468
{
469+
/*
470+
* The atomic_read() below prevents CSE. The following should
471+
* help the compiler generate more efficient code on architectures
472+
* where sync_core_before_usermode() is a no-op.
473+
*/
474+
if (!IS_ENABLED(CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE))
475+
return;
469476
if (current->mm != mm)
470477
return;
471478
if (likely(!(atomic_read(&mm->membarrier_state) &

kernel/sched/core.c

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,9 +1031,10 @@ void wake_up_q(struct wake_q_head *head)
10311031
struct task_struct *task;
10321032

10331033
task = container_of(node, struct task_struct, wake_q);
1034-
/* Task can safely be re-inserted now: */
10351034
node = node->next;
1036-
task->wake_q.next = NULL;
1035+
/* pairs with cmpxchg_relaxed() in __wake_q_add() */
1036+
WRITE_ONCE(task->wake_q.next, NULL);
1037+
/* Task can safely be re-inserted now. */
10371038

10381039
/*
10391040
* wake_up_process() executes a full barrier, which pairs with
@@ -8810,7 +8811,7 @@ void sched_release_group(struct task_group *tg)
88108811
spin_unlock_irqrestore(&task_group_lock, flags);
88118812
}
88128813

8813-
static struct task_group *sched_get_task_group(struct task_struct *tsk)
8814+
static void sched_change_group(struct task_struct *tsk)
88148815
{
88158816
struct task_group *tg;
88168817

@@ -8822,13 +8823,7 @@ static struct task_group *sched_get_task_group(struct task_struct *tsk)
88228823
tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
88238824
struct task_group, css);
88248825
tg = autogroup_task_group(tsk, tg);
8825-
8826-
return tg;
8827-
}
8828-
8829-
static void sched_change_group(struct task_struct *tsk, struct task_group *group)
8830-
{
8831-
tsk->sched_task_group = group;
8826+
tsk->sched_task_group = tg;
88328827

88338828
#ifdef CONFIG_FAIR_GROUP_SCHED
88348829
if (tsk->sched_class->task_change_group)
@@ -8849,20 +8844,11 @@ void sched_move_task(struct task_struct *tsk)
88498844
{
88508845
int queued, running, queue_flags =
88518846
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
8852-
struct task_group *group;
88538847
struct rq *rq;
88548848

88558849
CLASS(task_rq_lock, rq_guard)(tsk);
88568850
rq = rq_guard.rq;
88578851

8858-
/*
8859-
* Esp. with SCHED_AUTOGROUP enabled it is possible to get superfluous
8860-
* group changes.
8861-
*/
8862-
group = sched_get_task_group(tsk);
8863-
if (group == tsk->sched_task_group)
8864-
return;
8865-
88668852
update_rq_clock(rq);
88678853

88688854
running = task_current(rq, tsk);
@@ -8873,7 +8859,7 @@ void sched_move_task(struct task_struct *tsk)
88738859
if (running)
88748860
put_prev_task(rq, tsk);
88758861

8876-
sched_change_group(tsk, group);
8862+
sched_change_group(tsk);
88778863

88788864
if (queued)
88798865
enqueue_task(rq, tsk, queue_flags);

kernel/sched/cpufreq_schedutil.c

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,23 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
8181
if (!cpufreq_this_cpu_can_update(sg_policy->policy))
8282
return false;
8383

84-
if (unlikely(sg_policy->limits_changed)) {
85-
sg_policy->limits_changed = false;
86-
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
84+
if (unlikely(READ_ONCE(sg_policy->limits_changed))) {
85+
WRITE_ONCE(sg_policy->limits_changed, false);
86+
sg_policy->need_freq_update = true;
87+
88+
/*
89+
* The above limits_changed update must occur before the reads
90+
* of policy limits in cpufreq_driver_resolve_freq() or a policy
91+
* limits update might be missed, so use a memory barrier to
92+
* ensure it.
93+
*
94+
* This pairs with the write memory barrier in sugov_limits().
95+
*/
96+
smp_mb();
97+
98+
return true;
99+
} else if (sg_policy->need_freq_update) {
100+
/* ignore_dl_rate_limit() wants a new frequency to be found. */
87101
return true;
88102
}
89103

@@ -95,10 +109,22 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
95109
static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
96110
unsigned int next_freq)
97111
{
98-
if (sg_policy->need_freq_update)
112+
if (sg_policy->need_freq_update) {
99113
sg_policy->need_freq_update = false;
100-
else if (sg_policy->next_freq == next_freq)
114+
/*
115+
* The policy limits have changed, but if the return value of
116+
* cpufreq_driver_resolve_freq() after applying the new limits
117+
* is still equal to the previously selected frequency, the
118+
* driver callback need not be invoked unless the driver
119+
* specifically wants that to happen on every update of the
120+
* policy limits.
121+
*/
122+
if (sg_policy->next_freq == next_freq &&
123+
!cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS))
124+
return false;
125+
} else if (sg_policy->next_freq == next_freq) {
101126
return false;
127+
}
102128

103129
sg_policy->next_freq = next_freq;
104130
sg_policy->last_freq_update_time = time;
@@ -344,7 +370,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
344370
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
345371
{
346372
if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min)
347-
sg_cpu->sg_policy->limits_changed = true;
373+
sg_cpu->sg_policy->need_freq_update = true;
348374
}
349375

350376
static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
@@ -880,7 +906,16 @@ static void sugov_limits(struct cpufreq_policy *policy)
880906
mutex_unlock(&sg_policy->work_lock);
881907
}
882908

883-
sg_policy->limits_changed = true;
909+
/*
910+
* The limits_changed update below must take place before the updates
911+
* of policy limits in cpufreq_set_policy() or a policy limits update
912+
* might be missed, so use a memory barrier to ensure it.
913+
*
914+
* This pairs with the memory barrier in sugov_should_update_freq().
915+
*/
916+
smp_wmb();
917+
918+
WRITE_ONCE(sg_policy->limits_changed, true);
884919
}
885920

886921
struct cpufreq_governor schedutil_gov = {

0 commit comments

Comments
 (0)