Skip to content

Commit c8e2808

Browse files
committed
sched/core: Reorganize cgroup bandwidth control interface file writes
JIRA: https://issues.redhat.com/browse/RHEL-110301 commit 5bc34be Author: Tejun Heo <tj@kernel.org> Date: Fri Jun 13 15:23:30 2025 -1000 sched/core: Reorganize cgroup bandwidth control interface file writes - Move input parameter validation from tg_set_cfs_bandwidth() to the new outer function tg_set_bandwidth(). The outer function handles parameters in usecs, validates them and calls tg_set_cfs_bandwidth() which converts them into nsecs. This matches tg_bandwidth() on the read side. - max/min_cfs_* consts are now used by tg_set_bandwidth(). Relocate, convert into usecs and drop "cfs" from the names. - Reimplement cpu_cfs_{period|quote|burst}_write_*() using tg_bandwidth() and tg_set_bandwidth() and replace "cfs" in the names with "bw". - Update cpu_max_write() to use tg_set_bandiwdth(). cpu_period_quota_parse() is updated to drop nsec conversion accordingly. This aligns the behavior with cfs_period_quota_print(). - Drop now unused tg_set_cfs_{period|quota|burst}(). - While at it, for consistency, rename default_cfs_period() to default_bw_period_us() and make it return usecs. This is to prepare for adding bandwidth control support to sched_ext. tg_set_bandwidth() will be used as the muxing point. No functional changes intended. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20250614012346.2358261-5-tj@kernel.org Signed-off-by: Phil Auld <pauld@redhat.com>
1 parent c5c55a7 commit c8e2808

File tree

3 files changed

+106
-113
lines changed

3 files changed

+106
-113
lines changed

kernel/sched/core.c

Lines changed: 99 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -9319,47 +9319,23 @@ static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
93199319
#ifdef CONFIG_CFS_BANDWIDTH
93209320
static DEFINE_MUTEX(cfs_constraints_mutex);
93219321

9322-
const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
9323-
static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
9324-
/* More than 203 days if BW_SHIFT equals 20. */
9325-
static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
9326-
93279322
static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
93289323

9329-
static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
9330-
u64 burst)
9324+
static int tg_set_cfs_bandwidth(struct task_group *tg,
9325+
u64 period_us, u64 quota_us, u64 burst_us)
93319326
{
93329327
int i, ret = 0, runtime_enabled, runtime_was_enabled;
93339328
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
9329+
u64 period, quota, burst;
93349330

9335-
if (tg == &root_task_group)
9336-
return -EINVAL;
9337-
9338-
/*
9339-
* Ensure we have at some amount of bandwidth every period. This is
9340-
* to prevent reaching a state of large arrears when throttled via
9341-
* entity_tick() resulting in prolonged exit starvation.
9342-
*/
9343-
if (quota < min_cfs_quota_period || period < min_cfs_quota_period)
9344-
return -EINVAL;
9331+
period = (u64)period_us * NSEC_PER_USEC;
93459332

9346-
/*
9347-
* Likewise, bound things on the other side by preventing insane quota
9348-
* periods. This also allows us to normalize in computing quota
9349-
* feasibility.
9350-
*/
9351-
if (period > max_cfs_quota_period)
9352-
return -EINVAL;
9353-
9354-
/*
9355-
* Bound quota to defend quota against overflow during bandwidth shift.
9356-
*/
9357-
if (quota != RUNTIME_INF && quota > max_cfs_runtime)
9358-
return -EINVAL;
9333+
if (quota_us == RUNTIME_INF)
9334+
quota = RUNTIME_INF;
9335+
else
9336+
quota = (u64)quota_us * NSEC_PER_USEC;
93599337

9360-
if (quota != RUNTIME_INF && (burst > quota ||
9361-
burst + quota > max_cfs_runtime))
9362-
return -EINVAL;
9338+
burst = (u64)burst_us * NSEC_PER_USEC;
93639339

93649340
/*
93659341
* Prevent race between setting of cfs_rq->runtime_enabled and
@@ -9447,50 +9423,6 @@ static u64 tg_get_cfs_burst(struct task_group *tg)
94479423
return burst_us;
94489424
}
94499425

9450-
static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
9451-
{
9452-
u64 quota, period, burst;
9453-
9454-
if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
9455-
return -EINVAL;
9456-
9457-
period = (u64)cfs_period_us * NSEC_PER_USEC;
9458-
quota = tg->cfs_bandwidth.quota;
9459-
burst = tg->cfs_bandwidth.burst;
9460-
9461-
return tg_set_cfs_bandwidth(tg, period, quota, burst);
9462-
}
9463-
9464-
static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
9465-
{
9466-
u64 quota, period, burst;
9467-
9468-
period = ktime_to_ns(tg->cfs_bandwidth.period);
9469-
burst = tg->cfs_bandwidth.burst;
9470-
if (cfs_quota_us < 0)
9471-
quota = RUNTIME_INF;
9472-
else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
9473-
quota = (u64)cfs_quota_us * NSEC_PER_USEC;
9474-
else
9475-
return -EINVAL;
9476-
9477-
return tg_set_cfs_bandwidth(tg, period, quota, burst);
9478-
}
9479-
9480-
static int tg_set_cfs_burst(struct task_group *tg, long cfs_burst_us)
9481-
{
9482-
u64 quota, period, burst;
9483-
9484-
if ((u64)cfs_burst_us > U64_MAX / NSEC_PER_USEC)
9485-
return -EINVAL;
9486-
9487-
burst = (u64)cfs_burst_us * NSEC_PER_USEC;
9488-
period = ktime_to_ns(tg->cfs_bandwidth.period);
9489-
quota = tg->cfs_bandwidth.quota;
9490-
9491-
return tg_set_cfs_bandwidth(tg, period, quota, burst);
9492-
}
9493-
94949426
struct cfs_schedulable_data {
94959427
struct task_group *tg;
94969428
u64 period, quota;
@@ -9624,6 +9556,11 @@ static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v)
96249556
return 0;
96259557
}
96269558

9559+
const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */
9560+
static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */
9561+
/* More than 203 days if BW_SHIFT equals 20. */
9562+
static const u64 max_bw_runtime_us = MAX_BW;
9563+
96279564
static void tg_bandwidth(struct task_group *tg,
96289565
u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p)
96299566
{
@@ -9644,6 +9581,50 @@ static u64 cpu_period_read_u64(struct cgroup_subsys_state *css,
96449581
return period_us;
96459582
}
96469583

9584+
static int tg_set_bandwidth(struct task_group *tg,
9585+
u64 period_us, u64 quota_us, u64 burst_us)
9586+
{
9587+
const u64 max_usec = U64_MAX / NSEC_PER_USEC;
9588+
9589+
if (tg == &root_task_group)
9590+
return -EINVAL;
9591+
9592+
/* Values should survive translation to nsec */
9593+
if (period_us > max_usec ||
9594+
(quota_us != RUNTIME_INF && quota_us > max_usec) ||
9595+
burst_us > max_usec)
9596+
return -EINVAL;
9597+
9598+
/*
9599+
* Ensure we have some amount of bandwidth every period. This is to
9600+
* prevent reaching a state of large arrears when throttled via
9601+
* entity_tick() resulting in prolonged exit starvation.
9602+
*/
9603+
if (quota_us < min_bw_quota_period_us ||
9604+
period_us < min_bw_quota_period_us)
9605+
return -EINVAL;
9606+
9607+
/*
9608+
* Likewise, bound things on the other side by preventing insane quota
9609+
* periods. This also allows us to normalize in computing quota
9610+
* feasibility.
9611+
*/
9612+
if (period_us > max_bw_quota_period_us)
9613+
return -EINVAL;
9614+
9615+
/*
9616+
* Bound quota to defend quota against overflow during bandwidth shift.
9617+
*/
9618+
if (quota_us != RUNTIME_INF && quota_us > max_bw_runtime_us)
9619+
return -EINVAL;
9620+
9621+
if (quota_us != RUNTIME_INF && (burst_us > quota_us ||
9622+
burst_us + quota_us > max_bw_runtime_us))
9623+
return -EINVAL;
9624+
9625+
return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
9626+
}
9627+
96479628
static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css,
96489629
struct cftype *cft)
96499630
{
@@ -9662,22 +9643,37 @@ static u64 cpu_burst_read_u64(struct cgroup_subsys_state *css,
96629643
return burst_us;
96639644
}
96649645

9665-
static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
9666-
struct cftype *cftype, u64 cfs_period_us)
9646+
static int cpu_period_write_u64(struct cgroup_subsys_state *css,
9647+
struct cftype *cftype, u64 period_us)
96679648
{
9668-
return tg_set_cfs_period(css_tg(css), cfs_period_us);
9649+
struct task_group *tg = css_tg(css);
9650+
u64 quota_us, burst_us;
9651+
9652+
tg_bandwidth(tg, NULL, &quota_us, &burst_us);
9653+
return tg_set_bandwidth(tg, period_us, quota_us, burst_us);
96699654
}
96709655

9671-
static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css,
9672-
struct cftype *cftype, s64 cfs_quota_us)
9656+
static int cpu_quota_write_s64(struct cgroup_subsys_state *css,
9657+
struct cftype *cftype, s64 quota_us)
96739658
{
9674-
return tg_set_cfs_quota(css_tg(css), cfs_quota_us);
9659+
struct task_group *tg = css_tg(css);
9660+
u64 period_us, burst_us;
9661+
9662+
if (quota_us < 0)
9663+
quota_us = RUNTIME_INF;
9664+
9665+
tg_bandwidth(tg, &period_us, NULL, &burst_us);
9666+
return tg_set_bandwidth(tg, period_us, quota_us, burst_us);
96759667
}
96769668

9677-
static int cpu_cfs_burst_write_u64(struct cgroup_subsys_state *css,
9678-
struct cftype *cftype, u64 cfs_burst_us)
9669+
static int cpu_burst_write_u64(struct cgroup_subsys_state *css,
9670+
struct cftype *cftype, u64 burst_us)
96799671
{
9680-
return tg_set_cfs_burst(css_tg(css), cfs_burst_us);
9672+
struct task_group *tg = css_tg(css);
9673+
u64 period_us, quota_us;
9674+
9675+
tg_bandwidth(tg, &period_us, &quota_us, NULL);
9676+
return tg_set_bandwidth(tg, period_us, quota_us, burst_us);
96819677
}
96829678
#endif /* CONFIG_CFS_BANDWIDTH */
96839679

@@ -9743,17 +9739,17 @@ static struct cftype cpu_legacy_files[] = {
97439739
{
97449740
.name = "cfs_period_us",
97459741
.read_u64 = cpu_period_read_u64,
9746-
.write_u64 = cpu_cfs_period_write_u64,
9742+
.write_u64 = cpu_period_write_u64,
97479743
},
97489744
{
97499745
.name = "cfs_quota_us",
97509746
.read_s64 = cpu_quota_read_s64,
9751-
.write_s64 = cpu_cfs_quota_write_s64,
9747+
.write_s64 = cpu_quota_write_s64,
97529748
},
97539749
{
97549750
.name = "cfs_burst_us",
97559751
.read_u64 = cpu_burst_read_u64,
9756-
.write_u64 = cpu_cfs_burst_write_u64,
9752+
.write_u64 = cpu_burst_write_u64,
97579753
},
97589754
{
97599755
.name = "stat",
@@ -9950,22 +9946,20 @@ static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
99509946
}
99519947

99529948
/* caller should put the current value in *@periodp before calling */
9953-
static int __maybe_unused cpu_period_quota_parse(char *buf,
9954-
u64 *periodp, u64 *quotap)
9949+
static int __maybe_unused cpu_period_quota_parse(char *buf, u64 *period_us_p,
9950+
u64 *quota_us_p)
99559951
{
99569952
char tok[21]; /* U64_MAX */
99579953

9958-
if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
9954+
if (sscanf(buf, "%20s %llu", tok, period_us_p) < 1)
99599955
return -EINVAL;
99609956

9961-
*periodp *= NSEC_PER_USEC;
9962-
9963-
if (sscanf(tok, "%llu", quotap))
9964-
*quotap *= NSEC_PER_USEC;
9965-
else if (!strcmp(tok, "max"))
9966-
*quotap = RUNTIME_INF;
9967-
else
9968-
return -EINVAL;
9957+
if (sscanf(tok, "%llu", quota_us_p) < 1) {
9958+
if (!strcmp(tok, "max"))
9959+
*quota_us_p = RUNTIME_INF;
9960+
else
9961+
return -EINVAL;
9962+
}
99699963

99709964
return 0;
99719965
}
@@ -9985,14 +9979,13 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
99859979
char *buf, size_t nbytes, loff_t off)
99869980
{
99879981
struct task_group *tg = css_tg(of_css(of));
9988-
u64 period = tg_get_cfs_period(tg);
9989-
u64 burst = tg->cfs_bandwidth.burst;
9990-
u64 quota;
9982+
u64 period_us, quota_us, burst_us;
99919983
int ret;
99929984

9993-
ret = cpu_period_quota_parse(buf, &period, &quota);
9985+
tg_bandwidth(tg, &period_us, NULL, &burst_us);
9986+
ret = cpu_period_quota_parse(buf, &period_us, &quota_us);
99949987
if (!ret)
9995-
ret = tg_set_cfs_bandwidth(tg, period, quota, burst);
9988+
ret = tg_set_bandwidth(tg, period_us, quota_us, burst_us);
99969989
return ret ?: nbytes;
99979990
}
99989991
#endif /* CONFIG_CFS_BANDWIDTH */
@@ -10029,7 +10022,7 @@ static struct cftype cpu_files[] = {
1002910022
.name = "max.burst",
1003010023
.flags = CFTYPE_NOT_ON_ROOT,
1003110024
.read_u64 = cpu_burst_read_u64,
10032-
.write_u64 = cpu_cfs_burst_write_u64,
10025+
.write_u64 = cpu_burst_write_u64,
1003310026
},
1003410027
#endif /* CONFIG_CFS_BANDWIDTH */
1003510028
#ifdef CONFIG_UCLAMP_TASK_GROUP

kernel/sched/fair.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6411,7 +6411,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
64116411
* to fail.
64126412
*/
64136413
new = old * 2;
6414-
if (new < max_cfs_quota_period) {
6414+
if (new < max_bw_quota_period_us * NSEC_PER_USEC) {
64156415
cfs_b->period = ns_to_ktime(new);
64166416
cfs_b->quota *= 2;
64176417
cfs_b->burst *= 2;
@@ -6445,7 +6445,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *paren
64456445
raw_spin_lock_init(&cfs_b->lock);
64466446
cfs_b->runtime = 0;
64476447
cfs_b->quota = RUNTIME_INF;
6448-
cfs_b->period = ns_to_ktime(default_cfs_period());
6448+
cfs_b->period = us_to_ktime(default_bw_period_us());
64496449
cfs_b->burst = 0;
64506450
cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF;
64516451

kernel/sched/sched.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,15 +404,15 @@ static inline bool dl_server_active(struct sched_dl_entity *dl_se)
404404
extern struct list_head task_groups;
405405

406406
#ifdef CONFIG_CFS_BANDWIDTH
407-
extern const u64 max_cfs_quota_period;
407+
extern const u64 max_bw_quota_period_us;
408408

409409
/*
410-
* default period for cfs group bandwidth.
411-
* default: 0.1s, units: nanoseconds
410+
* default period for group bandwidth.
411+
* default: 0.1s, units: microseconds
412412
*/
413-
static inline u64 default_cfs_period(void)
413+
static inline u64 default_bw_period_us(void)
414414
{
415-
return 100000000ULL;
415+
return 100000ULL;
416416
}
417417
#endif /* CONFIG_CFS_BANDWIDTH */
418418

0 commit comments

Comments
 (0)