Skip to content

Commit 4a24cab

Browse files
committed
acct: perform last write from workqueue
JIRA: https://issues.redhat.com/browse/RHEL-83160 CVE: CVE-2025-21846 commit 56d5f3e Author: Christian Brauner <brauner@kernel.org> Date: Tue, 11 Feb 2025 18:15:59 +0100 acct: perform last write from workqueue In [1] it was reported that the acct(2) system call can be used to trigger NULL deref in cases where it is set to write to a file that triggers an internal lookup. This can e.g., happen when pointing acc(2) to /sys/power/resume. At the point the where the write to this file happens the calling task has already exited and called exit_fs(). A lookup will thus trigger a NULL-deref when accessing current->fs. Reorganize the code so that the the final write happens from the workqueue but with the caller's credentials. This preserves the (strange) permission model and has almost no regression risk. This api should stop to exist though. Link: https://lore.kernel.org/r/20250127091811.3183623-1-quzicheng@huawei.com [1] Link: https://lore.kernel.org/r/20250211-work-acct-v1-1-1c16aecab8b3@kernel.org Fixes: 1da177e ("Linux-2.6.12-rc2") Reported-by: Zicheng Qu <quzicheng@huawei.com> Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org> Signed-off-by: Waiman Long <longman@redhat.com>
1 parent 0408b04 commit 4a24cab

File tree

1 file changed

+70
-50
lines changed

1 file changed

+70
-50
lines changed

kernel/acct.c

Lines changed: 70 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -84,48 +84,50 @@ struct bsd_acct_struct {
8484
atomic_long_t count;
8585
struct rcu_head rcu;
8686
struct mutex lock;
87-
int active;
87+
bool active;
88+
bool check_space;
8889
unsigned long needcheck;
8990
struct file *file;
9091
struct pid_namespace *ns;
9192
struct work_struct work;
9293
struct completion done;
94+
acct_t ac;
9395
};
9496

95-
static void do_acct_process(struct bsd_acct_struct *acct);
97+
static void fill_ac(struct bsd_acct_struct *acct);
98+
static void acct_write_process(struct bsd_acct_struct *acct);
9699

97100
/*
98101
* Check the amount of free space and suspend/resume accordingly.
99102
*/
100-
static int check_free_space(struct bsd_acct_struct *acct)
103+
static bool check_free_space(struct bsd_acct_struct *acct)
101104
{
102105
struct kstatfs sbuf;
103106

104-
if (time_is_after_jiffies(acct->needcheck))
105-
goto out;
107+
if (!acct->check_space)
108+
return acct->active;
106109

107110
/* May block */
108111
if (vfs_statfs(&acct->file->f_path, &sbuf))
109-
goto out;
112+
return acct->active;
110113

111114
if (acct->active) {
112115
u64 suspend = sbuf.f_blocks * SUSPEND;
113116
do_div(suspend, 100);
114117
if (sbuf.f_bavail <= suspend) {
115-
acct->active = 0;
118+
acct->active = false;
116119
pr_info("Process accounting paused\n");
117120
}
118121
} else {
119122
u64 resume = sbuf.f_blocks * RESUME;
120123
do_div(resume, 100);
121124
if (sbuf.f_bavail >= resume) {
122-
acct->active = 1;
125+
acct->active = true;
123126
pr_info("Process accounting resumed\n");
124127
}
125128
}
126129

127130
acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
128-
out:
129131
return acct->active;
130132
}
131133

@@ -170,7 +172,11 @@ static void acct_pin_kill(struct fs_pin *pin)
170172
{
171173
struct bsd_acct_struct *acct = to_acct(pin);
172174
mutex_lock(&acct->lock);
173-
do_acct_process(acct);
175+
/*
176+
* Fill the accounting struct with the exiting task's info
177+
* before punting to the workqueue.
178+
*/
179+
fill_ac(acct);
174180
schedule_work(&acct->work);
175181
wait_for_completion(&acct->done);
176182
cmpxchg(&acct->ns->bacct, pin, NULL);
@@ -183,6 +189,9 @@ static void close_work(struct work_struct *work)
183189
{
184190
struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
185191
struct file *file = acct->file;
192+
193+
/* We were fired by acct_pin_kill() which holds acct->lock. */
194+
acct_write_process(acct);
186195
if (file->f_op->flush)
187196
file->f_op->flush(file, NULL);
188197
__fput_sync(file);
@@ -409,13 +418,27 @@ static u32 encode_float(u64 value)
409418
* do_exit() or when switching to a different output file.
410419
*/
411420

412-
static void fill_ac(acct_t *ac)
421+
static void fill_ac(struct bsd_acct_struct *acct)
413422
{
414423
struct pacct_struct *pacct = &current->signal->pacct;
424+
struct file *file = acct->file;
425+
acct_t *ac = &acct->ac;
415426
u64 elapsed, run_time;
416427
time64_t btime;
417428
struct tty_struct *tty;
418429

430+
lockdep_assert_held(&acct->lock);
431+
432+
if (time_is_after_jiffies(acct->needcheck)) {
433+
acct->check_space = false;
434+
435+
/* Don't fill in @ac if nothing will be written. */
436+
if (!acct->active)
437+
return;
438+
} else {
439+
acct->check_space = true;
440+
}
441+
419442
/*
420443
* Fill the accounting struct with the needed info as recorded
421444
* by the different kernel functions.
@@ -463,64 +486,61 @@ static void fill_ac(acct_t *ac)
463486
ac->ac_majflt = encode_comp_t(pacct->ac_majflt);
464487
ac->ac_exitcode = pacct->ac_exitcode;
465488
spin_unlock_irq(&current->sighand->siglock);
466-
}
467-
/*
468-
* do_acct_process does all actual work. Caller holds the reference to file.
469-
*/
470-
static void do_acct_process(struct bsd_acct_struct *acct)
471-
{
472-
acct_t ac;
473-
unsigned long flim;
474-
const struct cred *orig_cred;
475-
struct file *file = acct->file;
476-
477-
/*
478-
* Accounting records are not subject to resource limits.
479-
*/
480-
flim = rlimit(RLIMIT_FSIZE);
481-
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
482-
/* Perform file operations on behalf of whoever enabled accounting */
483-
orig_cred = override_creds(file->f_cred);
484489

485-
/*
486-
* First check to see if there is enough free_space to continue
487-
* the process accounting system.
488-
*/
489-
if (!check_free_space(acct))
490-
goto out;
491-
492-
fill_ac(&ac);
493490
/* we really need to bite the bullet and change layout */
494-
ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
495-
ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
491+
ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid());
492+
ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid());
496493
#if ACCT_VERSION == 1 || ACCT_VERSION == 2
497494
/* backward-compatible 16 bit fields */
498-
ac.ac_uid16 = ac.ac_uid;
499-
ac.ac_gid16 = ac.ac_gid;
495+
ac->ac_uid16 = ac->ac_uid;
496+
ac->ac_gid16 = ac->ac_gid;
500497
#elif ACCT_VERSION == 3
501498
{
502499
struct pid_namespace *ns = acct->ns;
503500

504-
ac.ac_pid = task_tgid_nr_ns(current, ns);
501+
ac->ac_pid = task_tgid_nr_ns(current, ns);
505502
rcu_read_lock();
506-
ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent),
507-
ns);
503+
ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
508504
rcu_read_unlock();
509505
}
510506
#endif
507+
}
508+
509+
static void acct_write_process(struct bsd_acct_struct *acct)
510+
{
511+
struct file *file = acct->file;
512+
const struct cred *cred;
513+
acct_t *ac = &acct->ac;
514+
515+
/* Perform file operations on behalf of whoever enabled accounting */
516+
cred = override_creds(file->f_cred);
517+
511518
/*
512-
* Get freeze protection. If the fs is frozen, just skip the write
513-
* as we could deadlock the system otherwise.
519+
* First check to see if there is enough free_space to continue
520+
* the process accounting system. Then get freeze protection. If
521+
* the fs is frozen, just skip the write as we could deadlock
522+
* the system otherwise.
514523
*/
515-
if (file_start_write_trylock(file)) {
524+
if (check_free_space(acct) && file_start_write_trylock(file)) {
516525
/* it's been opened O_APPEND, so position is irrelevant */
517526
loff_t pos = 0;
518-
__kernel_write(file, &ac, sizeof(acct_t), &pos);
527+
__kernel_write(file, ac, sizeof(acct_t), &pos);
519528
file_end_write(file);
520529
}
521-
out:
530+
531+
revert_creds(cred);
532+
}
533+
534+
static void do_acct_process(struct bsd_acct_struct *acct)
535+
{
536+
unsigned long flim;
537+
538+
/* Accounting records are not subject to resource limits. */
539+
flim = rlimit(RLIMIT_FSIZE);
540+
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
541+
fill_ac(acct);
542+
acct_write_process(acct);
522543
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
523-
revert_creds(orig_cred);
524544
}
525545

526546
/**

0 commit comments

Comments
 (0)