Skip to content

Commit e38a481

Browse files
perf: Disallow mis-matched inherited group reads
jira VULN-8890 cve CVE-2023-5717 commit-author Peter Zijlstra <peterz@infradead.org> commit 32671e3 upstream-diff This patch causes kABI breakage due to a change in the struct perf_event layout after adding the group_generation field. Hence, to preserve kABI compatibility, use RH_KABI_EXTEND macro to safely append the new field without affecting the existing layout. Because group consistency is non-atomic between parent (filedesc) and children (inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum non-matching counter groups -- with non-sensical results. Add group_generation to distinguish the case where a parent group removes and adds an event and thus has the same number, but a different configuration of events as inherited groups. This became a problem when commit fa8c269 ("perf/core: Invert perf_read_group() loops") flipped the order of child_list and sibling_list. Previously it would iterate the group (sibling_list) first, and for each sibling traverse the child_list. In this order, only the group composition of the parent is relevant. By flipping the order the group composition of the child (inherited) events becomes an issue and the mis-match in group composition becomes evident. That said; even prior to this commit, while reading of a group that is not equally inherited was not broken, it still made no sense. (Ab)use ECHILD as error return to indicate issues with child process group composition. Fixes: fa8c269 ("perf/core: Invert perf_read_group() loops") Reported-by: Budimir Markovic <markovicbudimir@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20231018115654.GK33217@noisy.programming.kicks-ass.net (cherry picked from commit 32671e3) Signed-off-by: Shreeya Patel <spatel@ciq.com>
1 parent 7ded590 commit e38a481

File tree

2 files changed

+36
-6
lines changed

2 files changed

+36
-6
lines changed

include/linux/perf_event.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct perf_guest_info_callbacks {
5959
#include <linux/cgroup.h>
6060
#include <linux/refcount.h>
6161
#include <linux/security.h>
62+
#include <linux/rh_kabi.h>
6263
#include <asm/local.h>
6364

6465
struct perf_callchain_entry {
@@ -811,6 +812,8 @@ struct perf_event {
811812
RH_KABI_BROKEN_INSERT(void *security)
812813
#endif
813814
struct list_head sb_list;
815+
816+
RH_KABI_EXTEND(unsigned int group_generation)
814817
#endif /* CONFIG_PERF_EVENTS */
815818
};
816819

kernel/events/core.c

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2022,6 +2022,7 @@ static void perf_group_attach(struct perf_event *event)
20222022

20232023
list_add_tail(&event->sibling_list, &group_leader->sibling_list);
20242024
group_leader->nr_siblings++;
2025+
group_leader->group_generation++;
20252026

20262027
perf_event__header_size(group_leader);
20272028

@@ -2216,6 +2217,7 @@ static void perf_group_detach(struct perf_event *event)
22162217
if (leader != event) {
22172218
list_del_init(&event->sibling_list);
22182219
event->group_leader->nr_siblings--;
2220+
event->group_leader->group_generation++;
22192221
goto out;
22202222
}
22212223

@@ -5282,7 +5284,7 @@ static int __perf_read_group_add(struct perf_event *leader,
52825284
u64 read_format, u64 *values)
52835285
{
52845286
struct perf_event_context *ctx = leader->ctx;
5285-
struct perf_event *sub;
5287+
struct perf_event *sub, *parent;
52865288
unsigned long flags;
52875289
int n = 1; /* skip @nr */
52885290
int ret;
@@ -5292,6 +5294,33 @@ static int __perf_read_group_add(struct perf_event *leader,
52925294
return ret;
52935295

52945296
raw_spin_lock_irqsave(&ctx->lock, flags);
5297+
/*
5298+
* Verify the grouping between the parent and child (inherited)
5299+
* events is still in tact.
5300+
*
5301+
* Specifically:
5302+
* - leader->ctx->lock pins leader->sibling_list
5303+
* - parent->child_mutex pins parent->child_list
5304+
* - parent->ctx->mutex pins parent->sibling_list
5305+
*
5306+
* Because parent->ctx != leader->ctx (and child_list nests inside
5307+
* ctx->mutex), group destruction is not atomic between children, also
5308+
* see perf_event_release_kernel(). Additionally, parent can grow the
5309+
* group.
5310+
*
5311+
* Therefore it is possible to have parent and child groups in a
5312+
* different configuration and summing over such a beast makes no sense
5313+
* what so ever.
5314+
*
5315+
* Reject this.
5316+
*/
5317+
parent = leader->parent;
5318+
if (parent &&
5319+
(parent->group_generation != leader->group_generation ||
5320+
parent->nr_siblings != leader->nr_siblings)) {
5321+
ret = -ECHILD;
5322+
goto unlock;
5323+
}
52955324

52965325
/*
52975326
* Since we co-schedule groups, {enabled,running} times of siblings
@@ -5321,8 +5350,9 @@ static int __perf_read_group_add(struct perf_event *leader,
53215350
values[n++] = primary_event_id(sub);
53225351
}
53235352

5353+
unlock:
53245354
raw_spin_unlock_irqrestore(&ctx->lock, flags);
5325-
return 0;
5355+
return ret;
53265356
}
53275357

53285358
static int perf_read_group(struct perf_event *event,
@@ -5341,10 +5371,6 @@ static int perf_read_group(struct perf_event *event,
53415371

53425372
values[0] = 1 + leader->nr_siblings;
53435373

5344-
/*
5345-
* By locking the child_mutex of the leader we effectively
5346-
* lock the child list of all siblings.. XXX explain how.
5347-
*/
53485374
mutex_lock(&leader->child_mutex);
53495375

53505376
ret = __perf_read_group_add(leader, read_format, values);
@@ -13005,6 +13031,7 @@ static int inherit_group(struct perf_event *parent_event,
1300513031
!perf_get_aux_event(child_ctr, leader))
1300613032
return -EINVAL;
1300713033
}
13034+
leader->group_generation = parent_event->group_generation;
1300813035
return 0;
1300913036
}
1301013037

0 commit comments

Comments
 (0)