Skip to content

Commit bd8ec4a

Browse files
committed
perf: Fix event leak upon exec and file release
JIRA: https://issues.redhat.com/browse/RHEL-55606 CVE: CVE-2024-43869 upstream ======== commit 3a54654 Author: Frederic Weisbecker <frederic@kernel.org> Date: Fri Jun 21 11:16:01 2024 +0200 description =========== The perf pending task work is never waited upon the matching event release. In the case of a child event, released via free_event() directly, this can potentially result in a leaked event, such as in the following scenario that doesn't even require a weak IRQ work implementation to trigger: schedule() prepare_task_switch() =======> <NMI> perf_event_overflow() event->pending_sigtrap = ... irq_work_queue(&event->pending_irq) <======= </NMI> perf_event_task_sched_out() event_sched_out() event->pending_sigtrap = 0; atomic_long_inc_not_zero(&event->refcount) task_work_add(&event->pending_task) finish_lock_switch() =======> <IRQ> perf_pending_irq() //do nothing, rely on pending task work <======= </IRQ> begin_new_exec() perf_event_exit_task() perf_event_exit_event() // If is child event free_event() WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1) // event is leaked Similar scenarios can also happen with perf_event_remove_on_exec() or simply against concurrent perf_event_release(). Fix this with synchonizing against the possibly remaining pending task work while freeing the event, just like is done with remaining pending IRQ work. This means that the pending task callback neither need nor should hold a reference to the event, preventing it from ever beeing freed. Fixes: 517e6a3 ("perf: Fix perf_pending_task() UaF") Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240621091601.18227-5-frederic@kernel.org Signed-off-by: Michael Petlan <mpetlan@redhat.com>
1 parent 4154e6e commit bd8ec4a

File tree

2 files changed

+35
-4
lines changed

2 files changed

+35
-4
lines changed

include/linux/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ struct perf_event {
788788
struct irq_work pending_irq;
789789
struct callback_head pending_task;
790790
unsigned int pending_work;
791+
struct rcuwait pending_work_wait;
791792

792793
atomic_t event_limit;
793794

kernel/events/core.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2288,7 +2288,6 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
22882288
if (state != PERF_EVENT_STATE_OFF &&
22892289
!event->pending_work &&
22902290
!task_work_add(current, &event->pending_task, TWA_RESUME)) {
2291-
WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
22922291
event->pending_work = 1;
22932292
} else {
22942293
local_dec(&event->ctx->nr_pending);
@@ -5184,9 +5183,35 @@ static bool exclusive_event_installable(struct perf_event *event,
51845183
static void perf_addr_filters_splice(struct perf_event *event,
51855184
struct list_head *head);
51865185

5186+
static void perf_pending_task_sync(struct perf_event *event)
5187+
{
5188+
struct callback_head *head = &event->pending_task;
5189+
5190+
if (!event->pending_work)
5191+
return;
5192+
/*
5193+
* If the task is queued to the current task's queue, we
5194+
* obviously can't wait for it to complete. Simply cancel it.
5195+
*/
5196+
if (task_work_cancel(current, head)) {
5197+
event->pending_work = 0;
5198+
local_dec(&event->ctx->nr_pending);
5199+
return;
5200+
}
5201+
5202+
/*
5203+
* All accesses related to the event are within the same
5204+
* non-preemptible section in perf_pending_task(). The RCU
5205+
* grace period before the event is freed will make sure all
5206+
* those accesses are complete by then.
5207+
*/
5208+
rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
5209+
}
5210+
51875211
static void _free_event(struct perf_event *event)
51885212
{
51895213
irq_work_sync(&event->pending_irq);
5214+
perf_pending_task_sync(event);
51905215

51915216
unaccount_event(event);
51925217

@@ -6804,24 +6829,28 @@ static void perf_pending_task(struct callback_head *head)
68046829
struct perf_event *event = container_of(head, struct perf_event, pending_task);
68056830
int rctx;
68066831

6832+
/*
6833+
* All accesses to the event must belong to the same implicit RCU read-side
6834+
* critical section as the ->pending_work reset. See comment in
6835+
* perf_pending_task_sync().
6836+
*/
6837+
preempt_disable_notrace();
68076838
/*
68086839
* If we 'fail' here, that's OK, it means recursion is already disabled
68096840
* and we won't recurse 'further'.
68106841
*/
6811-
preempt_disable_notrace();
68126842
rctx = perf_swevent_get_recursion_context();
68136843

68146844
if (event->pending_work) {
68156845
event->pending_work = 0;
68166846
perf_sigtrap(event);
68176847
local_dec(&event->ctx->nr_pending);
6848+
rcuwait_wake_up(&event->pending_work_wait);
68186849
}
68196850

68206851
if (rctx >= 0)
68216852
perf_swevent_put_recursion_context(rctx);
68226853
preempt_enable_notrace();
6823-
6824-
put_event(event);
68256854
}
68266855

68276856
#ifdef CONFIG_GUEST_PERF_EVENTS
@@ -11936,6 +11965,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
1193611965
init_waitqueue_head(&event->waitq);
1193711966
init_irq_work(&event->pending_irq, perf_pending_irq);
1193811967
init_task_work(&event->pending_task, perf_pending_task);
11968+
rcuwait_init(&event->pending_work_wait);
1193911969

1194011970
mutex_init(&event->mmap_mutex);
1194111971
raw_spin_lock_init(&event->addr_filters.lock);

0 commit comments

Comments
 (0)