Skip to content

Commit 06b8760

Browse files
committed
coredump: hand a pidfd to the usermode coredump helper
JIRA: https://issues.redhat.com/browse/RHEL-107520 Conflicts: A merge conflict with the COREDUMP_PIDFD_NUMBER hunk of fs/coredump.c due to missing upstream commit 4bbf9c3 ("fs/coredump: Enable dynamic configuration of max file note size"). commit b5325b2 Author: Christian Brauner <brauner@kernel.org> Date: Mon, 14 Apr 2025 15:55:07 +0200 coredump: hand a pidfd to the usermode coredump helper Give userspace a way to instruct the kernel to install a pidfd into the usermode helper process. This makes coredump handling a lot more reliable for userspace. In parallel with this commit we already have systemd adding support for this in [1]. We create a pidfs file for the coredumping process when we process the corename pattern. When the usermode helper process is forked we then install the pidfs file as file descriptor three into the usermode helpers file descriptor table so it's available to the exec'd program. Since usermode helpers are either children of the system_unbound_wq workqueue or kthreadd we know that the file descriptor table is empty and can thus always use three as the file descriptor number. Note, that we'll install a pidfd for the thread-group leader even if a subthread is calling do_coredump(). We know that task linkage hasn't been removed due to delay_group_leader() and even if this @current isn't the actual thread-group leader we know that the thread-group leader cannot be reaped until @current has exited. Link: systemd/systemd#37125 [1] Link: https://lore.kernel.org/20250414-work-coredump-v2-3-685bf231f828@kernel.org Tested-by: Luca Boccassi <luca.boccassi@gmail.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Christian Brauner <brauner@kernel.org> Signed-off-by: Waiman Long <longman@redhat.com>
1 parent 907c49a commit 06b8760

File tree

2 files changed

+53
-4
lines changed

2 files changed

+53
-4
lines changed

fs/coredump.c

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include <linux/timekeeping.h>
4444
#include <linux/sysctl.h>
4545
#include <linux/elf.h>
46+
#include <linux/pidfs.h>
47+
#include <uapi/linux/pidfd.h>
4648

4749
#include <linux/uaccess.h>
4850
#include <asm/mmu_context.h>
@@ -56,6 +58,12 @@
5658

5759
static bool dump_vma_snapshot(struct coredump_params *cprm);
5860
static void free_vma_snapshot(struct coredump_params *cprm);
61+
/*
62+
* File descriptor number for the pidfd for the thread-group leader of
63+
* the coredumping task installed into the usermode helper's file
64+
* descriptor table.
65+
*/
66+
#define COREDUMP_PIDFD_NUMBER 3
5967

6068
static int core_uses_pid;
6169
static unsigned int core_pipe_limit;
@@ -333,6 +341,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
333341
case 'C':
334342
err = cn_printf(cn, "%d", cprm->cpu);
335343
break;
344+
/* pidfd number */
345+
case 'F': {
346+
/*
347+
* Installing a pidfd only makes sense if
348+
* we actually spawn a usermode helper.
349+
*/
350+
if (!ispipe)
351+
break;
352+
353+
/*
354+
* Note that we'll install a pidfd for the
355+
* thread-group leader. We know that task
356+
* linkage hasn't been removed yet and even if
357+
* this @current isn't the actual thread-group
358+
* leader we know that the thread-group leader
359+
* cannot be reaped until @current has exited.
360+
*/
361+
cprm->pid = task_tgid(current);
362+
err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
363+
break;
364+
}
336365
default:
337366
break;
338367
}
@@ -487,7 +516,7 @@ static void wait_for_dump_helpers(struct file *file)
487516
}
488517

489518
/*
490-
* umh_pipe_setup
519+
* umh_coredump_setup
491520
* helper function to customize the process used
492521
* to collect the core in userspace. Specifically
493522
* it sets up a pipe and installs it as fd 0 (stdin)
@@ -497,12 +526,31 @@ static void wait_for_dump_helpers(struct file *file)
497526
* is a special value that we use to trap recursive
498527
* core dumps
499528
*/
500-
static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
529+
static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
501530
{
502531
struct file *files[2];
503532
struct coredump_params *cp = (struct coredump_params *)info->data;
504533
int err;
505534

535+
if (cp->pid) {
536+
struct file *pidfs_file __free(fput) = NULL;
537+
538+
pidfs_file = pidfs_alloc_file(cp->pid, 0);
539+
if (IS_ERR(pidfs_file))
540+
return PTR_ERR(pidfs_file);
541+
542+
/*
543+
* Usermode helpers are childen of either
544+
* system_unbound_wq or of kthreadd. So we know that
545+
* we're starting off with a clean file descriptor
546+
* table. So we should always be able to use
547+
* COREDUMP_PIDFD_NUMBER as our file descriptor value.
548+
*/
549+
err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
550+
if (err < 0)
551+
return err;
552+
}
553+
506554
err = create_pipe_files(files, 0);
507555
if (err)
508556
return err;
@@ -594,7 +642,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
594642
}
595643

596644
if (cprm.limit == 1) {
597-
/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
645+
/* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
598646
*
599647
* Normally core limits are irrelevant to pipes, since
600648
* we're not writing to the file system, but we use
@@ -639,7 +687,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
639687
retval = -ENOMEM;
640688
sub_info = call_usermodehelper_setup(helper_argv[0],
641689
helper_argv, NULL, GFP_KERNEL,
642-
umh_pipe_setup, NULL, &cprm);
690+
umh_coredump_setup, NULL, &cprm);
643691
if (sub_info)
644692
retval = call_usermodehelper_exec(sub_info,
645693
UMH_WAIT_EXEC);

include/linux/coredump.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ struct coredump_params {
2929
int vma_count;
3030
size_t vma_data_size;
3131
struct core_vma_metadata *vma_meta;
32+
struct pid *pid;
3233
};
3334

3435
/*

0 commit comments

Comments
 (0)