Skip to content

Commit af84d28

Browse files
author
Ian Kent
committed
kernfs: Separate kernfs_pr_cont_buf and rename_lock
JIRA: https://issues.redhat.com/browse/RHEL-52956 Upstream status: Linus commit 1a702dc Author: Hao Luo <haoluo@google.com> Date: Mon May 16 12:09:51 2022 -0700 kernfs: Separate kernfs_pr_cont_buf and rename_lock. Previously the protection of kernfs_pr_cont_buf was piggy backed by rename_lock, which means that pr_cont() needs to be protected under rename_lock. This can cause potential circular lock dependencies. If there is an OOM, we have the following call hierarchy: -> cpuset_print_current_mems_allowed() -> pr_cont_cgroup_name() -> pr_cont_kernfs_name() pr_cont_kernfs_name() will grab rename_lock and call printk. So we have the following lock dependencies: kernfs_rename_lock -> console_sem Sometimes, printk does a wakeup before releasing console_sem, which has the dependence chain: console_sem -> p->pi_lock -> rq->lock Now, imagine one wants to read cgroup_name under rq->lock, for example, printing cgroup_name in a tracepoint in the scheduler code. They will be holding rq->lock and take rename_lock: rq->lock -> kernfs_rename_lock Now they will deadlock. A prevention to this circular lock dependency is to separate the protection of pr_cont_buf from rename_lock. In principle, rename_lock is to protect the integrity of cgroup name when copying to buf. Once pr_cont_buf has got its content, rename_lock can be dropped. So it's safe to drop rename_lock after kernfs_name_locked (and kernfs_path_from_node_locked) and rely on a dedicated pr_cont_lock to protect pr_cont_buf. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Hao Luo <haoluo@google.com> Link: https://lore.kernel.org/r/20220516190951.3144144-1-haoluo@google.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Ian Kent <ikent@redhat.com>
1 parent e203ef1 commit af84d28

File tree

1 file changed

+19
-12
lines changed

1 file changed

+19
-12
lines changed

fs/kernfs/dir.c

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,15 @@
1818
#include "kernfs-internal.h"
1919

2020
static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
21-
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
21+
/*
22+
* Don't use rename_lock to piggy back on pr_cont_buf. We don't want to
23+
* call pr_cont() while holding rename_lock. Because sometimes pr_cont()
24+
* will perform wakeups when releasing console_sem. Holding rename_lock
25+
* will introduce deadlock if the scheduler reads the kernfs_name in the
26+
* wakeup path.
27+
*/
28+
static DEFINE_SPINLOCK(kernfs_pr_cont_lock);
29+
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */
2230
static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
2331

2432
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
@@ -234,12 +242,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn)
234242
{
235243
unsigned long flags;
236244

237-
spin_lock_irqsave(&kernfs_rename_lock, flags);
245+
spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
238246

239-
kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
247+
kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
240248
pr_cont("%s", kernfs_pr_cont_buf);
241249

242-
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
250+
spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
243251
}
244252

245253
/**
@@ -253,10 +261,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn)
253261
unsigned long flags;
254262
int sz;
255263

256-
spin_lock_irqsave(&kernfs_rename_lock, flags);
264+
spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
257265

258-
sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
259-
sizeof(kernfs_pr_cont_buf));
266+
sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf,
267+
sizeof(kernfs_pr_cont_buf));
260268
if (sz < 0) {
261269
pr_cont("(error)");
262270
goto out;
@@ -270,7 +278,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn)
270278
pr_cont("%s", kernfs_pr_cont_buf);
271279

272280
out:
273-
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
281+
spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
274282
}
275283

276284
/**
@@ -841,13 +849,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
841849

842850
lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);
843851

844-
/* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
845-
spin_lock_irq(&kernfs_rename_lock);
852+
spin_lock_irq(&kernfs_pr_cont_lock);
846853

847854
len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
848855

849856
if (len >= sizeof(kernfs_pr_cont_buf)) {
850-
spin_unlock_irq(&kernfs_rename_lock);
857+
spin_unlock_irq(&kernfs_pr_cont_lock);
851858
return NULL;
852859
}
853860

@@ -859,7 +866,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
859866
parent = kernfs_find_ns(parent, name, ns);
860867
}
861868

862-
spin_unlock_irq(&kernfs_rename_lock);
869+
spin_unlock_irq(&kernfs_pr_cont_lock);
863870

864871
return parent;
865872
}

0 commit comments

Comments
 (0)