Skip to content

Commit f79d833

Browse files
committed
Merge: sched: fixes from sched/urgent for v6.10
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/4745 JIRA: https://issues.redhat.com/browse/RHEL-48226 A small set of fixes came into v6.10 via sched/urgent. Some are trivial doc changes but they apply cleanly and won't effect code. And a couple are really needed, including the last which is for VROOM-20591. A couple were added to make things apply cleanly. Signed-off-by: Phil Auld <pauld@redhat.com> Approved-by: Waiman Long <longman@redhat.com> Approved-by: Eder Zulian <ezulian@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Lucas Zampieri <lzampier@redhat.com>
2 parents 30f7090 + 331eacd commit f79d833

File tree

15 files changed

+112
-38
lines changed

15 files changed

+112
-38
lines changed

Documentation/accounting/delay-accounting.rst

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ d) memory reclaim
1616
e) thrashing
1717
f) direct compact
1818
g) write-protect copy
19+
h) IRQ/SOFTIRQ
1920

2021
and makes these statistics available to userspace through
2122
the taskstats interface.
@@ -49,7 +50,7 @@ this structure. See
4950
for a description of the fields pertaining to delay accounting.
5051
It will generally be in the form of counters returning the cumulative
5152
delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
52-
cache, direct compact, write-protect copy etc.
53+
cache, direct compact, write-protect copy, IRQ/SOFTIRQ etc.
5354

5455
Taking the difference of two successive readings of a given
5556
counter (say cpu_delay_total) for a task will give the delay
@@ -109,17 +110,19 @@ Get sum of delays, since system boot, for all pids with tgid 5::
109110
CPU count real total virtual total delay total delay average
110111
8 7000000 6872122 3382277 0.423ms
111112
IO count delay total delay average
112-
0 0 0ms
113+
0 0 0.000ms
113114
SWAP count delay total delay average
114-
0 0 0ms
115+
0 0 0.000ms
115116
RECLAIM count delay total delay average
116-
0 0 0ms
117+
0 0 0.000ms
117118
THRASHING count delay total delay average
118-
0 0 0ms
119+
0 0 0.000ms
119120
COMPACT count delay total delay average
120-
0 0 0ms
121-
WPCOPY count delay total delay average
122-
0 0 0ms
121+
0 0 0.000ms
122+
WPCOPY count delay total delay average
123+
0 0 0.000ms
124+
IRQ count delay total delay average
125+
0 0 0.000ms
123126

124127
Get IO accounting for pid 1, it works only with -p::
125128

Documentation/admin-guide/cgroup-v1/cpusets.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ on the next tick. For some applications in special situation, waiting
568568

569569
The 'cpuset.sched_relax_domain_level' file allows you to request changing
570570
this searching range as you like. This file takes int value which
571-
indicates size of searching range in levels ideally as follows,
571+
indicates size of searching range in levels approximately as follows,
572572
otherwise initial value -1 that indicates the cpuset has no request.
573573

574574
====== ===========================================================
@@ -581,6 +581,11 @@ otherwise initial value -1 that indicates the cpuset has no request.
581581
5 search system wide [on NUMA system]
582582
====== ===========================================================
583583

584+
Not all levels can be present and values can change depending on the
585+
system architecture and kernel configuration. Check
586+
/sys/kernel/debug/sched/domains/cpu*/domain*/ for system-specific
587+
details.
588+
584589
The system default is architecture dependent. The system default
585590
can be changed using the relax_domain_level= boot parameter.
586591

include/linux/delayacct.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,13 @@ struct task_delay_info {
4949
u64 wpcopy_start;
5050
u64 wpcopy_delay; /* wait for write-protect copy */
5151

52+
u64 irq_delay; /* wait for IRQ/SOFTIRQ */
53+
5254
u32 freepages_count; /* total count of memory reclaim */
5355
u32 thrashing_count; /* total count of thrash waits */
5456
u32 compact_count; /* total count of memory compact */
5557
u32 wpcopy_count; /* total count of write-protect copy */
58+
u32 irq_count; /* total count of IRQ/SOFTIRQ */
5659
};
5760
#endif
5861

@@ -85,6 +88,7 @@ extern void __delayacct_compact_start(void);
8588
extern void __delayacct_compact_end(void);
8689
extern void __delayacct_wpcopy_start(void);
8790
extern void __delayacct_wpcopy_end(void);
91+
extern void __delayacct_irq(struct task_struct *task, u32 delta);
8892

8993
static inline void delayacct_set_flag(struct task_struct *p, int flag)
9094
{
@@ -213,6 +217,15 @@ static inline void delayacct_wpcopy_end(void)
213217
__delayacct_wpcopy_end();
214218
}
215219

220+
static inline void delayacct_irq(struct task_struct *task, u32 delta)
221+
{
222+
if (!static_branch_unlikely(&delayacct_key))
223+
return;
224+
225+
if (task->delays)
226+
__delayacct_irq(task, delta);
227+
}
228+
216229
#else
217230
static inline void delayacct_set_flag(struct task_struct *p, int flag)
218231
{}
@@ -255,6 +268,8 @@ static inline void delayacct_wpcopy_start(void)
255268
{}
256269
static inline void delayacct_wpcopy_end(void)
257270
{}
271+
static inline void delayacct_irq(struct task_struct *task, u32 delta)
272+
{}
258273

259274
#endif /* CONFIG_TASK_DELAY_ACCT */
260275

include/uapi/linux/taskstats.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
*/
3535

3636

37-
#define TASKSTATS_VERSION 13
37+
#define TASKSTATS_VERSION 14
3838
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
3939
* in linux/sched.h */
4040

@@ -198,6 +198,10 @@ struct taskstats {
198198
/* v13: Delay waiting for write-protect copy */
199199
__u64 wpcopy_count;
200200
__u64 wpcopy_delay_total;
201+
202+
/* v14: Delay waiting for IRQ/SOFTIRQ */
203+
__u64 irq_count;
204+
__u64 irq_delay_total;
201205
};
202206

203207

kernel/cgroup/cpuset.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3026,7 +3026,7 @@ bool current_cpuset_is_being_rebound(void)
30263026
static int update_relax_domain_level(struct cpuset *cs, s64 val)
30273027
{
30283028
#ifdef CONFIG_SMP
3029-
if (val < -1 || val >= sched_domain_level_max)
3029+
if (val < -1 || val > sched_domain_level_max + 1)
30303030
return -EINVAL;
30313031
#endif
30323032

kernel/delayacct.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,15 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
159159
d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
160160
tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
161161
d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
162+
tmp = d->irq_delay_total + tsk->delays->irq_delay;
163+
d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
162164
d->blkio_count += tsk->delays->blkio_count;
163165
d->swapin_count += tsk->delays->swapin_count;
164166
d->freepages_count += tsk->delays->freepages_count;
165167
d->thrashing_count += tsk->delays->thrashing_count;
166168
d->compact_count += tsk->delays->compact_count;
167169
d->wpcopy_count += tsk->delays->wpcopy_count;
170+
d->irq_count += tsk->delays->irq_count;
168171
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
169172

170173
return 0;
@@ -254,3 +257,14 @@ void __delayacct_wpcopy_end(void)
254257
&current->delays->wpcopy_delay,
255258
&current->delays->wpcopy_count);
256259
}
260+
261+
void __delayacct_irq(struct task_struct *task, u32 delta)
262+
{
263+
unsigned long flags;
264+
265+
raw_spin_lock_irqsave(&task->delays->lock, flags);
266+
task->delays->irq_delay += delta;
267+
task->delays->irq_count++;
268+
raw_spin_unlock_irqrestore(&task->delays->lock, flags);
269+
}
270+

kernel/sched/core.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
722722

723723
rq->prev_irq_time += irq_delta;
724724
delta -= irq_delta;
725-
psi_account_irqtime(rq->curr, irq_delta);
725+
delayacct_irq(rq->curr, irq_delta);
726726
#endif
727727
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
728728
if (static_key_false((&paravirt_steal_rq_enabled))) {
@@ -5671,7 +5671,7 @@ void scheduler_tick(void)
56715671
{
56725672
int cpu = smp_processor_id();
56735673
struct rq *rq = cpu_rq(cpu);
5674-
struct task_struct *curr = rq->curr;
5674+
struct task_struct *curr;
56755675
struct rq_flags rf;
56765676
unsigned long thermal_pressure;
56775677
u64 resched_latency;
@@ -5683,6 +5683,9 @@ void scheduler_tick(void)
56835683

56845684
rq_lock(rq, &rf);
56855685

5686+
curr = rq->curr;
5687+
psi_account_irqtime(rq, curr, NULL);
5688+
56865689
update_rq_clock(rq);
56875690
thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
56885691
update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
@@ -6733,6 +6736,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
67336736
++*switch_count;
67346737

67356738
migrate_disable_switch(rq, prev);
6739+
psi_account_irqtime(rq, prev, next);
67366740
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
67376741

67386742
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
@@ -11505,7 +11509,7 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
1150511509
{
1150611510
struct task_group *tg = css_tg(of_css(of));
1150711511
u64 period = tg_get_cfs_period(tg);
11508-
u64 burst = tg_get_cfs_burst(tg);
11512+
u64 burst = tg->cfs_bandwidth.burst;
1150911513
u64 quota;
1151011514
int ret;
1151111515

kernel/sched/deadline.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1805,8 +1805,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
18051805
* The replenish timer needs to be canceled. No
18061806
* problem if it fires concurrently: boosted threads
18071807
* are ignored in dl_task_timer().
1808+
*
1809+
* If the timer callback was running (hrtimer_try_to_cancel == -1),
1810+
* it will eventually call put_task_struct().
18081811
*/
1809-
hrtimer_try_to_cancel(&p->dl.dl_timer);
1812+
if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 &&
1813+
!dl_server(&p->dl))
1814+
put_task_struct(p);
18101815
p->dl.dl_throttled = 0;
18111816
}
18121817
} else if (!dl_prio(p->normal_prio)) {

kernel/sched/debug.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
428428

429429
debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
430430
debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
431+
debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level);
431432
}
432433

433434
void update_sched_domain_debugfs(void)

kernel/sched/fair.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7659,8 +7659,8 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
76597659
* Performance domain frequency: utilization clamping
76607660
* must be considered since it affects the selection
76617661
* of the performance domain frequency.
7662-
* NOTE: in case RT tasks are running, by default the
7663-
* FREQUENCY_UTIL's utilization can be max OPP.
7662+
* NOTE: in case RT tasks are running, by default the min
7663+
* utilization can be max OPP.
76647664
*/
76657665
eff_util = effective_cpu_util(cpu, util, &min, &max);
76667666

0 commit comments

Comments
 (0)