Skip to content

Commit 3719a9f

Browse files
author
CKI KWF Bot
committed
Merge: sched: DL server and eevdf (mostly) fixes
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1274 JIRA: https://issues.redhat.com/browse/RHEL-105980 This MR is a collection of recent urgent fixes in the scheduler. Many are needed for the new dl server functionality. A few fix some eevdf edge cases. I also included a 4 part series for amd cpufreq support which was requested to help an upcoming cpufreq update. Signed-off-by: Phil Auld <pauld@redhat.com> Approved-by: Steve Best <sbest@redhat.com> Approved-by: Waiman Long <longman@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 6286028 + 147d867 commit 3719a9f

File tree

15 files changed

+371
-103
lines changed

15 files changed

+371
-103
lines changed

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20671,6 +20671,7 @@ F: include/linux/sched.h
2067120671
F: include/linux/wait.h
2067220672
F: include/uapi/linux/sched.h
2067320673
F: kernel/sched/
20674+
F: tools/sched/
2067420675

2067520676
SCHEDULER - SCHED_EXT
2067620677
R: Tejun Heo <tj@kernel.org>

drivers/cpufreq/amd-pstate.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,8 +831,10 @@ static void amd_pstate_update_limits(unsigned int cpu)
831831
if (highest_perf_changed) {
832832
WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
833833

834-
if (cur_high < CPPC_MAX_PERF)
834+
if (cur_high < CPPC_MAX_PERF) {
835835
sched_set_itmt_core_prio((int)cur_high, cpu);
836+
sched_update_asym_prefer_cpu(cpu, prev_high, cur_high);
837+
}
836838
}
837839
}
838840

include/linux/sched.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,15 @@ struct sched_entity {
586586
u64 sum_exec_runtime;
587587
u64 prev_sum_exec_runtime;
588588
u64 vruntime;
589-
s64 vlag;
589+
union {
590+
/*
591+
* When !@on_rq this field is vlag.
592+
* When cfs_rq->curr == se (which implies @on_rq)
593+
* this field is vprot. See protect_slice().
594+
*/
595+
s64 vlag;
596+
u64 vprot;
597+
};
590598
u64 slice;
591599

592600
u64 nr_migrations;
@@ -703,6 +711,7 @@ struct sched_dl_entity {
703711
unsigned int dl_defer : 1;
704712
unsigned int dl_defer_armed : 1;
705713
unsigned int dl_defer_running : 1;
714+
unsigned int dl_server_idle : 1;
706715

707716
/*
708717
* Bandwidth enforcement timer. Each -deadline task has its

include/linux/sched/topology.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ struct sched_domain_topology_level {
199199
};
200200

201201
extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
202+
extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio);
203+
202204

203205
# define SD_INIT_NAME(type) .name = #type
204206

@@ -227,6 +229,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu)
227229
return true;
228230
}
229231

232+
static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio)
233+
{
234+
}
235+
230236
#endif /* !CONFIG_SMP */
231237

232238
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)

kernel/sched/core.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3941,6 +3941,11 @@ static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
39413941
if (task_on_scx(p))
39423942
return false;
39433943

3944+
#ifdef CONFIG_SMP
3945+
if (p->sched_class == &stop_sched_class)
3946+
return false;
3947+
#endif
3948+
39443949
/*
39453950
* Do not complicate things with the async wake_list while the CPU is
39463951
* in hotplug state.
@@ -7698,7 +7703,7 @@ const char *preempt_model_str(void)
76987703

76997704
if (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC)) {
77007705
seq_buf_printf(&s, "(%s)%s",
7701-
preempt_dynamic_mode > 0 ?
7706+
preempt_dynamic_mode >= 0 ?
77027707
preempt_modes[preempt_dynamic_mode] : "undef",
77037708
brace ? "}" : "");
77047709
return seq_buf_str(&s);
@@ -8505,6 +8510,8 @@ void __init sched_init_smp(void)
85058510
init_sched_rt_class();
85068511
init_sched_dl_class();
85078512

8513+
sched_init_dl_servers();
8514+
85088515
sched_smp_initialized = true;
85098516
}
85108517

kernel/sched/deadline.c

Lines changed: 66 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,8 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
824824
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
825825
struct rq *rq = rq_of_dl_rq(dl_rq);
826826

827+
update_rq_clock(rq);
828+
827829
WARN_ON(is_dl_boosted(dl_se));
828830
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
829831

@@ -1215,6 +1217,8 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
12151217
/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
12161218
static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
12171219

1220+
static bool dl_server_stopped(struct sched_dl_entity *dl_se);
1221+
12181222
static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
12191223
{
12201224
struct rq *rq = rq_of_dl_se(dl_se);
@@ -1234,6 +1238,7 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
12341238

12351239
if (!dl_se->server_has_tasks(dl_se)) {
12361240
replenish_dl_entity(dl_se);
1241+
dl_server_stopped(dl_se);
12371242
return HRTIMER_NORESTART;
12381243
}
12391244

@@ -1505,7 +1510,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
15051510
if (dl_entity_is_special(dl_se))
15061511
return;
15071512

1508-
scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);
1513+
scaled_delta_exec = delta_exec;
1514+
if (!dl_server(dl_se))
1515+
scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);
15091516

15101517
dl_se->runtime -= scaled_delta_exec;
15111518

@@ -1612,7 +1619,7 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
16121619
*/
16131620
void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
16141621
{
1615-
s64 delta_exec, scaled_delta_exec;
1622+
s64 delta_exec;
16161623

16171624
if (!rq->fair_server.dl_defer)
16181625
return;
@@ -1625,9 +1632,7 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
16251632
if (delta_exec < 0)
16261633
return;
16271634

1628-
scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec);
1629-
1630-
rq->fair_server.runtime -= scaled_delta_exec;
1635+
rq->fair_server.runtime -= delta_exec;
16311636

16321637
if (rq->fair_server.runtime < 0) {
16331638
rq->fair_server.dl_defer_running = 0;
@@ -1640,31 +1645,17 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
16401645
void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
16411646
{
16421647
/* 0 runtime = fair server disabled */
1643-
if (dl_se->dl_runtime)
1648+
if (dl_se->dl_runtime) {
1649+
dl_se->dl_server_idle = 0;
16441650
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
1651+
}
16451652
}
16461653

16471654
void dl_server_start(struct sched_dl_entity *dl_se)
16481655
{
16491656
struct rq *rq = dl_se->rq;
16501657

1651-
/*
1652-
* XXX: the apply do not work fine at the init phase for the
1653-
* fair server because things are not yet set. We need to improve
1654-
* this before getting generic.
1655-
*/
1656-
if (!dl_server(dl_se)) {
1657-
u64 runtime = 50 * NSEC_PER_MSEC;
1658-
u64 period = 1000 * NSEC_PER_MSEC;
1659-
1660-
dl_server_apply_params(dl_se, runtime, period, 1);
1661-
1662-
dl_se->dl_server = 1;
1663-
dl_se->dl_defer = 1;
1664-
setup_new_dl_entity(dl_se);
1665-
}
1666-
1667-
if (!dl_se->dl_runtime)
1658+
if (!dl_server(dl_se) || dl_se->dl_server_active)
16681659
return;
16691660

16701661
dl_se->dl_server_active = 1;
@@ -1675,7 +1666,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
16751666

16761667
void dl_server_stop(struct sched_dl_entity *dl_se)
16771668
{
1678-
if (!dl_se->dl_runtime)
1669+
if (!dl_server(dl_se) || !dl_server_active(dl_se))
16791670
return;
16801671

16811672
dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
@@ -1685,6 +1676,20 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
16851676
dl_se->dl_server_active = 0;
16861677
}
16871678

1679+
static bool dl_server_stopped(struct sched_dl_entity *dl_se)
1680+
{
1681+
if (!dl_se->dl_server_active)
1682+
return false;
1683+
1684+
if (dl_se->dl_server_idle) {
1685+
dl_server_stop(dl_se);
1686+
return true;
1687+
}
1688+
1689+
dl_se->dl_server_idle = 1;
1690+
return false;
1691+
}
1692+
16881693
void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
16891694
dl_server_has_tasks_f has_tasks,
16901695
dl_server_pick_f pick_task)
@@ -1694,6 +1699,32 @@ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
16941699
dl_se->server_pick_task = pick_task;
16951700
}
16961701

1702+
void sched_init_dl_servers(void)
1703+
{
1704+
int cpu;
1705+
struct rq *rq;
1706+
struct sched_dl_entity *dl_se;
1707+
1708+
for_each_online_cpu(cpu) {
1709+
u64 runtime = 50 * NSEC_PER_MSEC;
1710+
u64 period = 1000 * NSEC_PER_MSEC;
1711+
1712+
rq = cpu_rq(cpu);
1713+
1714+
guard(rq_lock_irq)(rq);
1715+
1716+
dl_se = &rq->fair_server;
1717+
1718+
WARN_ON(dl_server(dl_se));
1719+
1720+
dl_server_apply_params(dl_se, runtime, period, 1);
1721+
1722+
dl_se->dl_server = 1;
1723+
dl_se->dl_defer = 1;
1724+
setup_new_dl_entity(dl_se);
1725+
}
1726+
}
1727+
16971728
void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
16981729
{
16991730
u64 new_bw = dl_se->dl_bw;
@@ -2437,7 +2468,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
24372468
if (dl_server(dl_se)) {
24382469
p = dl_se->server_pick_task(dl_se);
24392470
if (!p) {
2440-
if (dl_server_active(dl_se)) {
2471+
if (!dl_server_stopped(dl_se)) {
24412472
dl_se->dl_yielded = 1;
24422473
update_curr_dl_se(rq, dl_se, 0);
24432474
}
@@ -2978,7 +3009,14 @@ void dl_clear_root_domain(struct root_domain *rd)
29783009
int i;
29793010

29803011
guard(raw_spinlock_irqsave)(&rd->dl_bw.lock);
3012+
3013+
/*
3014+
* Reset total_bw to zero and extra_bw to max_bw so that next
3015+
* loop will add dl-servers contributions back properly,
3016+
*/
29813017
rd->dl_bw.total_bw = 0;
3018+
for_each_cpu(i, rd->span)
3019+
cpu_rq(i)->dl.extra_bw = cpu_rq(i)->dl.max_bw;
29823020

29833021
/*
29843022
* dl_servers are not tasks. Since dl_add_task_root_domain ignores
@@ -3244,6 +3282,9 @@ void sched_dl_do_global(void)
32443282
if (global_rt_runtime() != RUNTIME_INF)
32453283
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
32463284

3285+
for_each_possible_cpu(cpu)
3286+
init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
3287+
32473288
for_each_possible_cpu(cpu) {
32483289
rcu_read_lock_sched();
32493290

@@ -3259,7 +3300,6 @@ void sched_dl_do_global(void)
32593300
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
32603301

32613302
rcu_read_unlock_sched();
3262-
init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
32633303
}
32643304
}
32653305

kernel/sched/debug.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,10 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
588588
debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
589589
debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
590590
debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level);
591+
592+
if (sd->flags & SD_ASYM_PACKING)
593+
debugfs_create_u32("group_asym_prefer_cpu", 0444, parent,
594+
(u32 *)&sd->groups->asym_prefer_cpu);
591595
}
592596

593597
void update_sched_domain_debugfs(void)

0 commit comments

Comments
 (0)