Skip to content

Commit f5d1dcf

Browse files
committed
Merge: Additional series arising from discussion in bug 212635, RHEL-9 pro-active back port
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/4970 JIRA: https://issues.redhat.com/browse/RHEL-52956 Upstream status: Linus Following a discussion in Bug 2126350 it was agreed we should also apply an additional series that can help with kernfs contention as we are still struggling with resolving contention problems in kernfs in RHEL-8. Now it's been requested to pro-actively allpy these changes to RHEL-9 There is one missing patch from the RHEL-8 series. It's the one that's takes the definition of the kernfs root structure out of public view and has kABI impact but it has already been applied so is not needed here. Signed-off-by: Ian Kent <ikent@redhat.com> Approved-by: Brian Foster <bfoster@redhat.com> Approved-by: Carlos Maiolino <cmaiolino@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Lucas Zampieri <lzampier@redhat.com>
2 parents 996297b + 5148946 commit f5d1dcf

File tree

5 files changed

+56
-35
lines changed

5 files changed

+56
-35
lines changed

fs/kernfs/dir.c

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,16 @@
1717

1818
#include "kernfs-internal.h"
1919

20-
static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
21-
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
20+
static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */
21+
/*
22+
* Don't use rename_lock to piggy back on pr_cont_buf. We don't want to
23+
* call pr_cont() while holding rename_lock. Because sometimes pr_cont()
24+
* will perform wakeups when releasing console_sem. Holding rename_lock
25+
* will introduce deadlock if the scheduler reads the kernfs_name in the
26+
* wakeup path.
27+
*/
28+
static DEFINE_SPINLOCK(kernfs_pr_cont_lock);
29+
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */
2230
static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
2331

2432
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
@@ -189,9 +197,9 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
189197
unsigned long flags;
190198
int ret;
191199

192-
spin_lock_irqsave(&kernfs_rename_lock, flags);
200+
read_lock_irqsave(&kernfs_rename_lock, flags);
193201
ret = kernfs_name_locked(kn, buf, buflen);
194-
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
202+
read_unlock_irqrestore(&kernfs_rename_lock, flags);
195203
return ret;
196204
}
197205

@@ -217,9 +225,9 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
217225
unsigned long flags;
218226
int ret;
219227

220-
spin_lock_irqsave(&kernfs_rename_lock, flags);
228+
read_lock_irqsave(&kernfs_rename_lock, flags);
221229
ret = kernfs_path_from_node_locked(to, from, buf, buflen);
222-
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
230+
read_unlock_irqrestore(&kernfs_rename_lock, flags);
223231
return ret;
224232
}
225233
EXPORT_SYMBOL_GPL(kernfs_path_from_node);
@@ -234,12 +242,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn)
234242
{
235243
unsigned long flags;
236244

237-
spin_lock_irqsave(&kernfs_rename_lock, flags);
245+
spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
238246

239-
kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
247+
kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
240248
pr_cont("%s", kernfs_pr_cont_buf);
241249

242-
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
250+
spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
243251
}
244252

245253
/**
@@ -253,10 +261,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn)
253261
unsigned long flags;
254262
int sz;
255263

256-
spin_lock_irqsave(&kernfs_rename_lock, flags);
264+
spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
257265

258-
sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
259-
sizeof(kernfs_pr_cont_buf));
266+
sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf,
267+
sizeof(kernfs_pr_cont_buf));
260268
if (sz < 0) {
261269
pr_cont("(error)");
262270
goto out;
@@ -270,7 +278,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn)
270278
pr_cont("%s", kernfs_pr_cont_buf);
271279

272280
out:
273-
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
281+
spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
274282
}
275283

276284
/**
@@ -285,10 +293,10 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
285293
struct kernfs_node *parent;
286294
unsigned long flags;
287295

288-
spin_lock_irqsave(&kernfs_rename_lock, flags);
296+
read_lock_irqsave(&kernfs_rename_lock, flags);
289297
parent = kn->parent;
290298
kernfs_get(parent);
291-
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
299+
read_unlock_irqrestore(&kernfs_rename_lock, flags);
292300

293301
return parent;
294302
}
@@ -374,9 +382,11 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
374382
rb_insert_color(&kn->rb, &kn->parent->dir.children);
375383

376384
/* successfully added, account subdir number */
385+
down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
377386
if (kernfs_type(kn) == KERNFS_DIR)
378387
kn->parent->dir.subdirs++;
379388
kernfs_inc_rev(kn->parent);
389+
up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
380390

381391
return 0;
382392
}
@@ -397,9 +407,11 @@ static bool kernfs_unlink_sibling(struct kernfs_node *kn)
397407
if (RB_EMPTY_NODE(&kn->rb))
398408
return false;
399409

410+
down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
400411
if (kernfs_type(kn) == KERNFS_DIR)
401412
kn->parent->dir.subdirs--;
402413
kernfs_inc_rev(kn->parent);
414+
up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
403415

404416
rb_erase(&kn->rb, &kn->parent->dir.children);
405417
RB_CLEAR_NODE(&kn->rb);
@@ -759,12 +771,15 @@ int kernfs_add_one(struct kernfs_node *kn)
759771
goto out_unlock;
760772

761773
/* Update timestamps on the parent */
774+
down_write(&root->kernfs_iattr_rwsem);
775+
762776
ps_iattr = parent->iattr;
763777
if (ps_iattr) {
764778
ktime_get_real_ts64(&ps_iattr->ia_ctime);
765779
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
766780
}
767781

782+
up_write(&root->kernfs_iattr_rwsem);
768783
up_write(&root->kernfs_rwsem);
769784

770785
/*
@@ -834,13 +849,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
834849

835850
lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);
836851

837-
/* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
838-
spin_lock_irq(&kernfs_rename_lock);
852+
spin_lock_irq(&kernfs_pr_cont_lock);
839853

840854
len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
841855

842856
if (len >= sizeof(kernfs_pr_cont_buf)) {
843-
spin_unlock_irq(&kernfs_rename_lock);
857+
spin_unlock_irq(&kernfs_pr_cont_lock);
844858
return NULL;
845859
}
846860

@@ -852,7 +866,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
852866
parent = kernfs_find_ns(parent, name, ns);
853867
}
854868

855-
spin_unlock_irq(&kernfs_rename_lock);
869+
spin_unlock_irq(&kernfs_pr_cont_lock);
856870

857871
return parent;
858872
}
@@ -927,6 +941,8 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
927941

928942
idr_init(&root->ino_idr);
929943
init_rwsem(&root->kernfs_rwsem);
944+
init_rwsem(&root->kernfs_iattr_rwsem);
945+
init_rwsem(&root->kernfs_supers_rwsem);
930946
INIT_LIST_HEAD(&root->supers);
931947

932948
/*
@@ -1445,11 +1461,14 @@ static void __kernfs_remove(struct kernfs_node *kn)
14451461
pos->parent ? pos->parent->iattr : NULL;
14461462

14471463
/* update timestamps on the parent */
1464+
down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
1465+
14481466
if (ps_iattr) {
14491467
ktime_get_real_ts64(&ps_iattr->ia_ctime);
14501468
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
14511469
}
14521470

1471+
up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
14531472
kernfs_put(pos);
14541473
}
14551474

@@ -1701,7 +1720,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
17011720
kernfs_get(new_parent);
17021721

17031722
/* rename_lock protects ->parent and ->name accessors */
1704-
spin_lock_irq(&kernfs_rename_lock);
1723+
write_lock_irq(&kernfs_rename_lock);
17051724

17061725
old_parent = kn->parent;
17071726
kn->parent = new_parent;
@@ -1712,7 +1731,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
17121731
kn->name = new_name;
17131732
}
17141733

1715-
spin_unlock_irq(&kernfs_rename_lock);
1734+
write_unlock_irq(&kernfs_rename_lock);
17161735

17171736
kn->hash = kernfs_name_hash(kn->name, kn->ns);
17181737
kernfs_link_sibling(kn);

fs/kernfs/file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -923,8 +923,8 @@ static void kernfs_notify_workfn(struct work_struct *work)
923923

924924
root = kernfs_root(kn);
925925
/* kick fsnotify */
926-
down_write(&root->kernfs_rwsem);
927926

927+
down_read(&root->kernfs_supers_rwsem);
928928
list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
929929
struct kernfs_node *parent;
930930
struct inode *p_inode = NULL;
@@ -961,7 +961,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
961961
iput(inode);
962962
}
963963

964-
up_write(&root->kernfs_rwsem);
964+
up_read(&root->kernfs_supers_rwsem);
965965
kernfs_put(kn);
966966
goto repeat;
967967
}

fs/kernfs/inode.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
101101
int ret;
102102
struct kernfs_root *root = kernfs_root(kn);
103103

104-
down_write(&root->kernfs_rwsem);
104+
down_write(&root->kernfs_iattr_rwsem);
105105
ret = __kernfs_setattr(kn, iattr);
106-
up_write(&root->kernfs_rwsem);
106+
up_write(&root->kernfs_iattr_rwsem);
107107
return ret;
108108
}
109109

@@ -119,7 +119,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
119119
return -EINVAL;
120120

121121
root = kernfs_root(kn);
122-
down_write(&root->kernfs_rwsem);
122+
down_write(&root->kernfs_iattr_rwsem);
123123
error = setattr_prepare(&init_user_ns, dentry, iattr);
124124
if (error)
125125
goto out;
@@ -132,7 +132,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
132132
setattr_copy(&init_user_ns, inode, iattr);
133133

134134
out:
135-
up_write(&root->kernfs_rwsem);
135+
up_write(&root->kernfs_iattr_rwsem);
136136
return error;
137137
}
138138

@@ -189,10 +189,10 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns,
189189
struct kernfs_node *kn = inode->i_private;
190190
struct kernfs_root *root = kernfs_root(kn);
191191

192-
down_read(&root->kernfs_rwsem);
192+
down_read(&root->kernfs_iattr_rwsem);
193193
kernfs_refresh_inode(kn, inode);
194194
generic_fillattr(&init_user_ns, inode, stat);
195-
up_read(&root->kernfs_rwsem);
195+
up_read(&root->kernfs_iattr_rwsem);
196196

197197
return 0;
198198
}
@@ -285,10 +285,10 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns,
285285
kn = inode->i_private;
286286
root = kernfs_root(kn);
287287

288-
down_read(&root->kernfs_rwsem);
288+
down_read(&root->kernfs_iattr_rwsem);
289289
kernfs_refresh_inode(kn, inode);
290290
ret = generic_permission(&init_user_ns, inode, mask);
291-
up_read(&root->kernfs_rwsem);
291+
up_read(&root->kernfs_iattr_rwsem);
292292

293293
return ret;
294294
}

fs/kernfs/kernfs-internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ struct kernfs_root {
4747

4848
wait_queue_head_t deactivate_waitq;
4949
struct rw_semaphore kernfs_rwsem;
50+
RH_KABI_EXTEND(struct rw_semaphore kernfs_iattr_rwsem)
51+
RH_KABI_EXTEND(struct rw_semaphore kernfs_supers_rwsem)
5052
};
5153

5254
/* +1 to avoid triggering overflow warning when negating it */

fs/kernfs/mount.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,9 @@ int kernfs_get_tree(struct fs_context *fc)
347347
}
348348
sb->s_flags |= SB_ACTIVE;
349349

350-
down_write(&root->kernfs_rwsem);
350+
down_write(&root->kernfs_supers_rwsem);
351351
list_add(&info->node, &info->root->supers);
352-
up_write(&root->kernfs_rwsem);
352+
up_write(&root->kernfs_supers_rwsem);
353353
}
354354

355355
fc->root = dget(sb->s_root);
@@ -376,9 +376,9 @@ void kernfs_kill_sb(struct super_block *sb)
376376
struct kernfs_super_info *info = kernfs_info(sb);
377377
struct kernfs_root *root = info->root;
378378

379-
down_write(&root->kernfs_rwsem);
379+
down_write(&root->kernfs_supers_rwsem);
380380
list_del(&info->node);
381-
up_write(&root->kernfs_rwsem);
381+
up_write(&root->kernfs_supers_rwsem);
382382

383383
/*
384384
* Remove the superblock from fs_supers/s_instances

0 commit comments

Comments
 (0)