Skip to content

Commit 1902a85

Browse files
author
CKI KWF Bot
committed
Merge: Backport pidfd/pidfs commits to enabling passing pidfd to coredump usermode helper
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1420 JIRA: https://issues.redhat.com/browse/RHEL-113598 CVE: CVE-2025-38306 MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1420 The goal of this MR is to backport enough upstream pidfd/pidfs related commits to enable systemd to pass pidfd to coredump usermode helper for fixing CVE-2025-4598. Signed-off-by: Waiman Long <longman@redhat.com> Approved-by: Rafael Aquini <raquini@redhat.com> Approved-by: Herton R. Krzesinski <herton@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 2adeb84 + 7144f79 commit 1902a85

File tree

29 files changed

+1768
-299
lines changed

29 files changed

+1768
-299
lines changed

Documentation/admin-guide/sysctl/kernel.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ core_pattern
177177
%E executable path
178178
%c maximum size of core file by resource limit RLIMIT_CORE
179179
%C CPU the task ran on
180+
%F pidfd number
180181
%<OTHER> both are dropped
181182
======== ==========================================
182183

fs/coredump.c

Lines changed: 155 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include <linux/timekeeping.h>
4444
#include <linux/sysctl.h>
4545
#include <linux/elf.h>
46+
#include <linux/pidfs.h>
47+
#include <uapi/linux/pidfd.h>
4648

4749
#include <linux/uaccess.h>
4850
#include <asm/mmu_context.h>
@@ -60,6 +62,12 @@ static void free_vma_snapshot(struct coredump_params *cprm);
6062
#define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
6163
/* Define a reasonable max cap */
6264
#define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
65+
/*
66+
* File descriptor number for the pidfd for the thread-group leader of
67+
* the coredumping task installed into the usermode helper's file
68+
* descriptor table.
69+
*/
70+
#define COREDUMP_PIDFD_NUMBER 3
6371

6472
static int core_uses_pid;
6573
static unsigned int core_pipe_limit;
@@ -68,9 +76,15 @@ static char core_pattern[CORENAME_MAX_SIZE] = "core";
6876
static int core_name_size = CORENAME_MAX_SIZE;
6977
unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
7078

79+
enum coredump_type_t {
80+
COREDUMP_FILE = 1,
81+
COREDUMP_PIPE = 2,
82+
};
83+
7184
struct core_name {
7285
char *corename;
7386
int used, size;
87+
enum coredump_type_t core_type;
7488
};
7589

7690
static int expand_corename(struct core_name *cn, int size)
@@ -210,18 +224,21 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
210224
{
211225
const struct cred *cred = current_cred();
212226
const char *pat_ptr = core_pattern;
213-
int ispipe = (*pat_ptr == '|');
214227
bool was_space = false;
215228
int pid_in_pattern = 0;
216229
int err = 0;
217230

218231
cn->used = 0;
219232
cn->corename = NULL;
233+
if (*pat_ptr == '|')
234+
cn->core_type = COREDUMP_PIPE;
235+
else
236+
cn->core_type = COREDUMP_FILE;
220237
if (expand_corename(cn, core_name_size))
221238
return -ENOMEM;
222239
cn->corename[0] = '\0';
223240

224-
if (ispipe) {
241+
if (cn->core_type == COREDUMP_PIPE) {
225242
int argvs = sizeof(core_pattern) / 2;
226243
(*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
227244
if (!(*argv))
@@ -239,7 +256,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
239256
* Split on spaces before doing template expansion so that
240257
* %e and %E don't get split if they have spaces in them
241258
*/
242-
if (ispipe) {
259+
if (cn->core_type == COREDUMP_PIPE) {
243260
if (isspace(*pat_ptr)) {
244261
if (cn->used != 0)
245262
was_space = true;
@@ -339,6 +356,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
339356
case 'C':
340357
err = cn_printf(cn, "%d", cprm->cpu);
341358
break;
359+
/* pidfd number */
360+
case 'F': {
361+
/*
362+
* Installing a pidfd only makes sense if
363+
* we actually spawn a usermode helper.
364+
*/
365+
if (cn->core_type != COREDUMP_PIPE)
366+
break;
367+
368+
/*
369+
* Note that we'll install a pidfd for the
370+
* thread-group leader. We know that task
371+
* linkage hasn't been removed yet and even if
372+
* this @current isn't the actual thread-group
373+
* leader we know that the thread-group leader
374+
* cannot be reaped until @current has exited.
375+
*/
376+
cprm->pid = task_tgid(current);
377+
err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
378+
break;
379+
}
342380
default:
343381
break;
344382
}
@@ -355,12 +393,9 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
355393
* If core_pattern does not include a %p (as is the default)
356394
* and core_uses_pid is set, then .%pid will be appended to
357395
* the filename. Do not do this for piped commands. */
358-
if (!ispipe && !pid_in_pattern && core_uses_pid) {
359-
err = cn_printf(cn, ".%d", task_tgid_vnr(current));
360-
if (err)
361-
return err;
362-
}
363-
return ispipe;
396+
if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid)
397+
return cn_printf(cn, ".%d", task_tgid_vnr(current));
398+
return 0;
364399
}
365400

366401
static int zap_process(struct signal_struct *signal, int exit_code)
@@ -493,7 +528,7 @@ static void wait_for_dump_helpers(struct file *file)
493528
}
494529

495530
/*
496-
* umh_pipe_setup
531+
* umh_coredump_setup
497532
* helper function to customize the process used
498533
* to collect the core in userspace. Specifically
499534
* it sets up a pipe and installs it as fd 0 (stdin)
@@ -503,22 +538,46 @@ static void wait_for_dump_helpers(struct file *file)
503538
* is a special value that we use to trap recursive
504539
* core dumps
505540
*/
506-
static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
541+
static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
507542
{
508543
struct file *files[2];
509544
struct coredump_params *cp = (struct coredump_params *)info->data;
510-
int err = create_pipe_files(files, 0);
545+
int err;
546+
547+
if (cp->pid) {
548+
struct file *pidfs_file __free(fput) = NULL;
549+
550+
pidfs_file = pidfs_alloc_file(cp->pid, 0);
551+
if (IS_ERR(pidfs_file))
552+
return PTR_ERR(pidfs_file);
553+
554+
/*
555+
* Usermode helpers are childen of either
556+
* system_unbound_wq or of kthreadd. So we know that
557+
* we're starting off with a clean file descriptor
558+
* table. So we should always be able to use
559+
* COREDUMP_PIDFD_NUMBER as our file descriptor value.
560+
*/
561+
err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
562+
if (err < 0)
563+
return err;
564+
}
565+
566+
err = create_pipe_files(files, 0);
511567
if (err)
512568
return err;
513569

514570
cp->file = files[1];
515571

516572
err = replace_fd(0, files[0], 0);
517573
fput(files[0]);
574+
if (err < 0)
575+
return err;
576+
518577
/* and disallow core files too */
519578
current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
520579

521-
return err;
580+
return 0;
522581
}
523582

524583
void do_coredump(const kernel_siginfo_t *siginfo)
@@ -530,7 +589,6 @@ void do_coredump(const kernel_siginfo_t *siginfo)
530589
const struct cred *old_cred;
531590
struct cred *cred;
532591
int retval = 0;
533-
int ispipe;
534592
size_t *argv = NULL;
535593
int argc = 0;
536594
/* require nonrelative corefile path and be extra careful */
@@ -579,70 +637,14 @@ void do_coredump(const kernel_siginfo_t *siginfo)
579637

580638
old_cred = override_creds(cred);
581639

582-
ispipe = format_corename(&cn, &cprm, &argv, &argc);
583-
584-
if (ispipe) {
585-
int argi;
586-
int dump_count;
587-
char **helper_argv;
588-
struct subprocess_info *sub_info;
589-
590-
if (ispipe < 0) {
591-
coredump_report_failure("format_corename failed, aborting core");
592-
goto fail_unlock;
593-
}
594-
595-
if (cprm.limit == 1) {
596-
/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
597-
*
598-
* Normally core limits are irrelevant to pipes, since
599-
* we're not writing to the file system, but we use
600-
* cprm.limit of 1 here as a special value, this is a
601-
* consistent way to catch recursive crashes.
602-
* We can still crash if the core_pattern binary sets
603-
* RLIM_CORE = !1, but it runs as root, and can do
604-
* lots of stupid things.
605-
*
606-
* Note that we use task_tgid_vnr here to grab the pid
607-
* of the process group leader. That way we get the
608-
* right pid if a thread in a multi-threaded
609-
* core_pattern process dies.
610-
*/
611-
coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
612-
goto fail_unlock;
613-
}
614-
cprm.limit = RLIM_INFINITY;
615-
616-
dump_count = atomic_inc_return(&core_dump_count);
617-
if (core_pipe_limit && (core_pipe_limit < dump_count)) {
618-
coredump_report_failure("over core_pipe_limit, skipping core dump");
619-
goto fail_dropcount;
620-
}
621-
622-
helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
623-
GFP_KERNEL);
624-
if (!helper_argv) {
625-
coredump_report_failure("%s failed to allocate memory", __func__);
626-
goto fail_dropcount;
627-
}
628-
for (argi = 0; argi < argc; argi++)
629-
helper_argv[argi] = cn.corename + argv[argi];
630-
helper_argv[argi] = NULL;
631-
632-
retval = -ENOMEM;
633-
sub_info = call_usermodehelper_setup(helper_argv[0],
634-
helper_argv, NULL, GFP_KERNEL,
635-
umh_pipe_setup, NULL, &cprm);
636-
if (sub_info)
637-
retval = call_usermodehelper_exec(sub_info,
638-
UMH_WAIT_EXEC);
640+
retval = format_corename(&cn, &cprm, &argv, &argc);
641+
if (retval < 0) {
642+
coredump_report_failure("format_corename failed, aborting core");
643+
goto fail_unlock;
644+
}
639645

640-
kfree(helper_argv);
641-
if (retval) {
642-
coredump_report_failure("|%s pipe failed", cn.corename);
643-
goto close_fail;
644-
}
645-
} else {
646+
switch (cn.core_type) {
647+
case COREDUMP_FILE: {
646648
struct mnt_idmap *idmap;
647649
struct inode *inode;
648650
int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
@@ -736,6 +738,69 @@ void do_coredump(const kernel_siginfo_t *siginfo)
736738
if (do_truncate(idmap, cprm.file->f_path.dentry,
737739
0, 0, cprm.file))
738740
goto close_fail;
741+
break;
742+
}
743+
case COREDUMP_PIPE: {
744+
int argi;
745+
int dump_count;
746+
char **helper_argv;
747+
struct subprocess_info *sub_info;
748+
749+
if (cprm.limit == 1) {
750+
/* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
751+
*
752+
* Normally core limits are irrelevant to pipes, since
753+
* we're not writing to the file system, but we use
754+
* cprm.limit of 1 here as a special value, this is a
755+
* consistent way to catch recursive crashes.
756+
* We can still crash if the core_pattern binary sets
757+
* RLIM_CORE = !1, but it runs as root, and can do
758+
* lots of stupid things.
759+
*
760+
* Note that we use task_tgid_vnr here to grab the pid
761+
* of the process group leader. That way we get the
762+
* right pid if a thread in a multi-threaded
763+
* core_pattern process dies.
764+
*/
765+
coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
766+
goto fail_unlock;
767+
}
768+
cprm.limit = RLIM_INFINITY;
769+
770+
dump_count = atomic_inc_return(&core_dump_count);
771+
if (core_pipe_limit && (core_pipe_limit < dump_count)) {
772+
coredump_report_failure("over core_pipe_limit, skipping core dump");
773+
goto fail_dropcount;
774+
}
775+
776+
helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
777+
GFP_KERNEL);
778+
if (!helper_argv) {
779+
coredump_report_failure("%s failed to allocate memory", __func__);
780+
goto fail_dropcount;
781+
}
782+
for (argi = 0; argi < argc; argi++)
783+
helper_argv[argi] = cn.corename + argv[argi];
784+
helper_argv[argi] = NULL;
785+
786+
retval = -ENOMEM;
787+
sub_info = call_usermodehelper_setup(helper_argv[0],
788+
helper_argv, NULL, GFP_KERNEL,
789+
umh_coredump_setup, NULL, &cprm);
790+
if (sub_info)
791+
retval = call_usermodehelper_exec(sub_info,
792+
UMH_WAIT_EXEC);
793+
794+
kfree(helper_argv);
795+
if (retval) {
796+
coredump_report_failure("|%s pipe failed", cn.corename);
797+
goto close_fail;
798+
}
799+
break;
800+
}
801+
default:
802+
WARN_ON_ONCE(true);
803+
goto close_fail;
739804
}
740805

741806
/* get us an unshared descriptor table; almost always a no-op */
@@ -770,13 +835,13 @@ void do_coredump(const kernel_siginfo_t *siginfo)
770835
file_end_write(cprm.file);
771836
free_vma_snapshot(&cprm);
772837
}
773-
if (ispipe && core_pipe_limit)
838+
if ((cn.core_type == COREDUMP_PIPE) && core_pipe_limit)
774839
wait_for_dump_helpers(cprm.file);
775840
close_fail:
776841
if (cprm.file)
777842
filp_close(cprm.file, NULL);
778843
fail_dropcount:
779-
if (ispipe)
844+
if (cn.core_type == COREDUMP_PIPE)
780845
atomic_dec(&core_dump_count);
781846
fail_unlock:
782847
kfree(argv);
@@ -799,10 +864,9 @@ static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr)
799864
struct file *file = cprm->file;
800865
loff_t pos = file->f_pos;
801866
ssize_t n;
867+
802868
if (cprm->written + nr > cprm->limit)
803869
return 0;
804-
805-
806870
if (dump_interrupted())
807871
return 0;
808872
n = __kernel_write(file, addr, nr, &pos);
@@ -819,20 +883,21 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr)
819883
{
820884
static char zeroes[PAGE_SIZE];
821885
struct file *file = cprm->file;
886+
822887
if (file->f_mode & FMODE_LSEEK) {
823-
if (dump_interrupted() ||
824-
vfs_llseek(file, nr, SEEK_CUR) < 0)
888+
if (dump_interrupted() || vfs_llseek(file, nr, SEEK_CUR) < 0)
825889
return 0;
826890
cprm->pos += nr;
827891
return 1;
828-
} else {
829-
while (nr > PAGE_SIZE) {
830-
if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
831-
return 0;
832-
nr -= PAGE_SIZE;
833-
}
834-
return __dump_emit(cprm, zeroes, nr);
835892
}
893+
894+
while (nr > PAGE_SIZE) {
895+
if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
896+
return 0;
897+
nr -= PAGE_SIZE;
898+
}
899+
900+
return __dump_emit(cprm, zeroes, nr);
836901
}
837902

838903
int dump_emit(struct coredump_params *cprm, const void *addr, int nr)

0 commit comments

Comments
 (0)