Skip to content

Commit ce052ea

Browse files
committed
Merge: cgroup: Backport upstream cgroup commits up to v6.12
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6581 JIRA: https://issues.redhat.com/browse/RHEL-80382 This MR backports upstream cgroup commits up to v6.12 with relevant fixes, if applicable. Signed-off-by: Radostin Stoyanov <rstoyano@redhat.com> Approved-by: Waiman Long <longman@redhat.com> Approved-by: Rafael Aquini <raquini@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Jan Stancek <jstancek@redhat.com>
2 parents d889941 + 1bf2f1d commit ce052ea

File tree

31 files changed

+2039
-1402
lines changed

31 files changed

+2039
-1402
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -533,10 +533,12 @@ cgroup namespace on namespace creation.
533533
Because the resource control interface files in a given directory
534534
control the distribution of the parent's resources, the delegatee
535535
shouldn't be allowed to write to them. For the first method, this is
536-
achieved by not granting access to these files. For the second, the
537-
kernel rejects writes to all files other than "cgroup.procs" and
538-
"cgroup.subtree_control" on a namespace root from inside the
539-
namespace.
536+
achieved by not granting access to these files. For the second, files
537+
outside the namespace should be hidden from the delegatee by the means
538+
of at least mount namespacing, and the kernel rejects writes to all
539+
files on a namespace root from inside the cgroup namespace, except for
540+
those files listed in "/sys/kernel/cgroup/delegate" (including
541+
"cgroup.procs", "cgroup.threads", "cgroup.subtree_control", etc.).
540542

541543
The end results are equivalent for both delegation types. Once
542544
delegated, the user can build sub-hierarchy under the directory,
@@ -1708,6 +1710,8 @@ PAGE_SIZE multiple when read back.
17081710

17091711
Note that this is subtly different from setting memory.swap.max to
17101712
0, as it still allows for pages to be written to the zswap pool.
1713+
This setting has no effect if zswap is disabled, and swapping
1714+
is allowed unless memory.swap.max is set to 0.
17111715

17121716
memory.pressure
17131717
A read-only nested-keyed file.

MAINTAINERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4928,9 +4928,12 @@ S: Maintained
49284928
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
49294929
F: Documentation/admin-guide/cgroup-v1/cpusets.rst
49304930
F: include/linux/cpuset.h
4931+
F: kernel/cgroup/cpuset-internal.h
4932+
F: kernel/cgroup/cpuset-v1.c
49314933
F: kernel/cgroup/cpuset.c
49324934
F: tools/testing/selftests/cgroup/test_cpuset.c
49334935
F: tools/testing/selftests/cgroup/test_cpuset_prs.sh
4936+
F: tools/testing/selftests/cgroup/test_cpuset_v1_base.sh
49344937

49354938
CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
49364939
M: Johannes Weiner <hannes@cmpxchg.org>

include/linux/cgroup-defs.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,11 @@ struct cgroup_subsys_state {
172172
/* reference count - access via css_[try]get() and css_put() */
173173
struct percpu_ref refcnt;
174174

175-
/* siblings list anchored at the parent's ->children */
175+
/*
176+
* siblings list anchored at the parent's ->children
177+
*
178+
* linkage is protected by cgroup_mutex or RCU
179+
*/
176180
struct list_head sibling;
177181
struct list_head children;
178182

@@ -323,6 +327,7 @@ struct cgroup_base_stat {
323327
#ifdef CONFIG_SCHED_CORE
324328
u64 forceidle_sum;
325329
#endif
330+
u64 ntime;
326331
};
327332

328333
/*

include/linux/cpuset.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,21 +99,24 @@ static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
9999
extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
100100
const struct task_struct *tsk2);
101101

102+
#ifdef CONFIG_CPUSETS_V1
102103
#define cpuset_memory_pressure_bump() \
103104
do { \
104105
if (cpuset_memory_pressure_enabled) \
105106
__cpuset_memory_pressure_bump(); \
106107
} while (0)
107108
extern int cpuset_memory_pressure_enabled;
108109
extern void __cpuset_memory_pressure_bump(void);
110+
#else
111+
static inline void cpuset_memory_pressure_bump(void) { }
112+
#endif
109113

110114
extern void cpuset_task_status_allowed(struct seq_file *m,
111115
struct task_struct *task);
112116
extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
113117
struct pid *pid, struct task_struct *tsk);
114118

115119
extern int cpuset_mem_spread_node(void);
116-
extern int cpuset_slab_spread_node(void);
117120

118121
static inline int cpuset_do_page_mem_spread(void)
119122
{
@@ -251,11 +254,6 @@ static inline int cpuset_mem_spread_node(void)
251254
return 0;
252255
}
253256

254-
static inline int cpuset_slab_spread_node(void)
255-
{
256-
return 0;
257-
}
258-
259257
static inline int cpuset_do_page_mem_spread(void)
260258
{
261259
return 0;

include/linux/mm.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,7 +1433,6 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
14331433
struct page *page, unsigned int nr, unsigned long addr);
14341434

14351435
vm_fault_t finish_fault(struct vm_fault *vmf);
1436-
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
14371436
#endif
14381437

14391438
/*
@@ -1783,38 +1782,39 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
17831782

17841783
#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
17851784
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
1786-
static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
1785+
static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid)
17871786
{
1788-
return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK);
1787+
return xchg(&folio->_last_cpupid, cpupid & LAST_CPUPID_MASK);
17891788
}
17901789

1791-
static inline int page_cpupid_last(struct page *page)
1790+
static inline int folio_last_cpupid(struct folio *folio)
17921791
{
1793-
return page->_last_cpupid;
1792+
return folio->_last_cpupid;
17941793
}
17951794
static inline void page_cpupid_reset_last(struct page *page)
17961795
{
17971796
page->_last_cpupid = -1 & LAST_CPUPID_MASK;
17981797
}
17991798
#else
1800-
static inline int page_cpupid_last(struct page *page)
1799+
static inline int folio_last_cpupid(struct folio *folio)
18011800
{
1802-
return (page->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
1801+
return (folio->flags >> LAST_CPUPID_PGSHIFT) & LAST_CPUPID_MASK;
18031802
}
18041803

1805-
extern int page_cpupid_xchg_last(struct page *page, int cpupid);
1804+
int folio_xchg_last_cpupid(struct folio *folio, int cpupid);
18061805

18071806
static inline void page_cpupid_reset_last(struct page *page)
18081807
{
18091808
page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
18101809
}
18111810
#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
18121811

1813-
static inline int xchg_page_access_time(struct page *page, int time)
1812+
static inline int folio_xchg_access_time(struct folio *folio, int time)
18141813
{
18151814
int last_time;
18161815

1817-
last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS);
1816+
last_time = folio_xchg_last_cpupid(folio,
1817+
time >> PAGE_ACCESS_TIME_BUCKETS);
18181818
return last_time << PAGE_ACCESS_TIME_BUCKETS;
18191819
}
18201820

@@ -1828,19 +1828,19 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
18281828
}
18291829
}
18301830
#else /* !CONFIG_NUMA_BALANCING */
1831-
static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
1831+
static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid)
18321832
{
1833-
return page_to_nid(page); /* XXX */
1833+
return folio_nid(folio); /* XXX */
18341834
}
18351835

1836-
static inline int xchg_page_access_time(struct page *page, int time)
1836+
static inline int folio_xchg_access_time(struct folio *folio, int time)
18371837
{
18381838
return 0;
18391839
}
18401840

1841-
static inline int page_cpupid_last(struct page *page)
1841+
static inline int folio_last_cpupid(struct folio *folio)
18421842
{
1843-
return page_to_nid(page); /* XXX */
1843+
return folio_nid(folio); /* XXX */
18441844
}
18451845

18461846
static inline int cpupid_to_nid(int cpupid)

include/linux/mm_types.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ struct page {
190190
not kmapped, ie. highmem) */
191191
#endif /* WANT_PAGE_VIRTUAL */
192192

193+
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
194+
int _last_cpupid;
195+
#endif
196+
193197
#ifdef CONFIG_KMSAN
194198
/*
195199
* KMSAN metadata for this page:
@@ -201,10 +205,6 @@ struct page {
201205
struct page *kmsan_shadow;
202206
struct page *kmsan_origin;
203207
#endif
204-
205-
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
206-
int _last_cpupid;
207-
#endif
208208
} _struct_page_alignment;
209209

210210
/*
@@ -263,6 +263,8 @@ typedef struct {
263263
* @_refcount: Do not access this member directly. Use folio_ref_count()
264264
* to find how many references there are to this folio.
265265
* @memcg_data: Memory Control Group data.
266+
* @virtual: Virtual address in the kernel direct map.
267+
* @_last_cpupid: IDs of last CPU and last process that accessed the folio.
266268
* @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
267269
* @_nr_pages_mapped: Do not use directly, call folio_mapcount().
268270
* @_pincount: Do not use directly, call folio_maybe_dma_pinned().
@@ -308,6 +310,12 @@ struct folio {
308310
atomic_t _refcount;
309311
#ifdef CONFIG_MEMCG
310312
unsigned long memcg_data;
313+
#endif
314+
#if defined(WANT_PAGE_VIRTUAL)
315+
void *virtual;
316+
#endif
317+
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
318+
int _last_cpupid;
311319
#endif
312320
/* private: the union with struct page is transitional */
313321
};
@@ -364,6 +372,12 @@ FOLIO_MATCH(_refcount, _refcount);
364372
#ifdef CONFIG_MEMCG
365373
FOLIO_MATCH(memcg_data, memcg_data);
366374
#endif
375+
#if defined(WANT_PAGE_VIRTUAL)
376+
FOLIO_MATCH(virtual, virtual);
377+
#endif
378+
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
379+
FOLIO_MATCH(_last_cpupid, _last_cpupid);
380+
#endif
367381
#undef FOLIO_MATCH
368382
#define FOLIO_MATCH(pg, fl) \
369383
static_assert(offsetof(struct folio, fl) == \

include/linux/sched.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1244,7 +1244,6 @@ struct task_struct {
12441244
/* Sequence number to catch updates: */
12451245
seqcount_spinlock_t mems_allowed_seq;
12461246
int cpuset_mem_spread_rotor;
1247-
int cpuset_slab_spread_rotor;
12481247
#endif
12491248
#ifdef CONFIG_CGROUPS
12501249
/* Control Group info protected by css_set_lock: */

init/Kconfig

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1123,9 +1123,23 @@ config CPUSETS
11231123

11241124
Say N if unsure.
11251125

1126+
config CPUSETS_V1
1127+
bool "Legacy cgroup v1 cpusets controller"
1128+
depends on CPUSETS
1129+
default n
1130+
help
1131+
Legacy cgroup v1 cpusets controller which has been deprecated by
1132+
cgroup v2 implementation. The v1 is there for legacy applications
1133+
which haven't migrated to the new cgroup v2 interface yet. Legacy
1134+
interface includes cpuset filesystem and /proc/<pid>/cpuset. If you
1135+
do not have any such application then you are completely fine leaving
1136+
this option disabled.
1137+
1138+
Say N if unsure.
1139+
11261140
config PROC_PID_CPUSET
11271141
bool "Include legacy /proc/<pid>/cpuset file"
1128-
depends on CPUSETS
1142+
depends on CPUSETS_V1
11291143
default y
11301144

11311145
config CGROUP_DEVICE

kernel/cgroup/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@ obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
55
obj-$(CONFIG_CGROUP_PIDS) += pids.o
66
obj-$(CONFIG_CGROUP_RDMA) += rdma.o
77
obj-$(CONFIG_CPUSETS) += cpuset.o
8+
obj-$(CONFIG_CPUSETS_V1) += cpuset-v1.o
89
obj-$(CONFIG_CGROUP_MISC) += misc.o
910
obj-$(CONFIG_CGROUP_DEBUG) += debug.o

kernel/cgroup/cgroup-v1.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ bool cgroup1_ssid_disabled(int ssid)
4646
return cgroup_no_v1_mask & (1 << ssid);
4747
}
4848

49+
static bool cgroup1_subsys_absent(struct cgroup_subsys *ss)
50+
{
51+
/* Check also dfl_cftypes for file-less controllers, i.e. perf_event */
52+
return ss->legacy_cftypes == NULL && ss->dfl_cftypes;
53+
}
54+
4955
/**
5056
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
5157
* @from: attach to all cgroups of a given task
@@ -675,11 +681,14 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)
675681
* cgroup_mutex contention.
676682
*/
677683

678-
for_each_subsys(ss, i)
684+
for_each_subsys(ss, i) {
685+
if (cgroup1_subsys_absent(ss))
686+
continue;
679687
seq_printf(m, "%s\t%d\t%d\t%d\n",
680688
ss->legacy_name, ss->root->hierarchy_id,
681689
atomic_read(&ss->root->nr_cgrps),
682690
cgroup_ssid_enabled(i));
691+
}
683692

684693
return 0;
685694
}
@@ -932,7 +941,8 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
932941
if (ret != -ENOPARAM)
933942
return ret;
934943
for_each_subsys(ss, i) {
935-
if (strcmp(param->key, ss->legacy_name))
944+
if (strcmp(param->key, ss->legacy_name) ||
945+
cgroup1_subsys_absent(ss))
936946
continue;
937947
if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i))
938948
return invalfc(fc, "Disabled controller '%s'",
@@ -1024,7 +1034,8 @@ static int check_cgroupfs_options(struct fs_context *fc)
10241034
mask = ~((u16)1 << cpuset_cgrp_id);
10251035
#endif
10261036
for_each_subsys(ss, i)
1027-
if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i))
1037+
if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i) &&
1038+
!cgroup1_subsys_absent(ss))
10281039
enabled |= 1 << i;
10291040

10301041
ctx->subsys_mask &= enabled;

0 commit comments

Comments
 (0)