Skip to content

Commit bb65ee2

Browse files
author
CKI KWF Bot
committed
Merge: ceph: fix client race condition validating r_parent before applying state
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7303 ceph: fix client race condition validating r_parent before applying state JIRA: https://issues.redhat.com/browse/RHEL-109212 Signed-off-by: Alex Markuze <amarkuze@redhat.com> Approved-by: Ilya Dryomov <idryomov@redhat.com> Approved-by: Xiubo Li <xiubli@redhat.com> Approved-by: Venky Shankar <vshankar@redhat.com> Approved-by: David Howells <dhowells@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 63e9a83 + 5707b2f commit bb65ee2

File tree

6 files changed

+201
-101
lines changed

6 files changed

+201
-101
lines changed

fs/ceph/debugfs.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@ static int mdsc_show(struct seq_file *s, void *p)
5555
struct ceph_mds_client *mdsc = fsc->mdsc;
5656
struct ceph_mds_request *req;
5757
struct rb_node *rp;
58-
int pathlen = 0;
59-
u64 pathbase;
6058
char *path;
6159

6260
mutex_lock(&mdsc->mutex);
@@ -81,8 +79,8 @@ static int mdsc_show(struct seq_file *s, void *p)
8179
if (req->r_inode) {
8280
seq_printf(s, " #%llx", ceph_ino(req->r_inode));
8381
} else if (req->r_dentry) {
84-
path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
85-
&pathbase, 0);
82+
struct ceph_path_info path_info;
83+
path = ceph_mdsc_build_path(req->r_dentry, &path_info, 0);
8684
if (IS_ERR(path))
8785
path = NULL;
8886
spin_lock(&req->r_dentry->d_lock);
@@ -91,7 +89,7 @@ static int mdsc_show(struct seq_file *s, void *p)
9189
req->r_dentry,
9290
path ? path : "");
9391
spin_unlock(&req->r_dentry->d_lock);
94-
ceph_mdsc_free_path(path, pathlen);
92+
ceph_mdsc_free_path_info(&path_info);
9593
} else if (req->r_path1) {
9694
seq_printf(s, " #%llx/%s", req->r_ino1.ino,
9795
req->r_path1);
@@ -100,8 +98,8 @@ static int mdsc_show(struct seq_file *s, void *p)
10098
}
10199

102100
if (req->r_old_dentry) {
103-
path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
104-
&pathbase, 0);
101+
struct ceph_path_info path_info;
102+
path = ceph_mdsc_build_path(req->r_old_dentry, &path_info, 0);
105103
if (IS_ERR(path))
106104
path = NULL;
107105
spin_lock(&req->r_old_dentry->d_lock);
@@ -111,7 +109,7 @@ static int mdsc_show(struct seq_file *s, void *p)
111109
req->r_old_dentry,
112110
path ? path : "");
113111
spin_unlock(&req->r_old_dentry->d_lock);
114-
ceph_mdsc_free_path(path, pathlen);
112+
ceph_mdsc_free_path_info(&path_info);
115113
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
116114
if (req->r_ino2.ino)
117115
seq_printf(s, " #%llx/%s", req->r_ino2.ino,

fs/ceph/dir.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,10 +1224,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
12241224

12251225
/* If op failed, mark everyone involved for errors */
12261226
if (result) {
1227-
int pathlen = 0;
1228-
u64 base = 0;
1229-
char *path = ceph_mdsc_build_path(dentry, &pathlen,
1230-
&base, 0);
1227+
struct ceph_path_info path_info = {0};
1228+
char *path = ceph_mdsc_build_path(dentry, &path_info, 0);
12311229

12321230
/* mark error on parent + clear complete */
12331231
mapping_set_error(req->r_parent->i_mapping, result);
@@ -1241,8 +1239,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
12411239
mapping_set_error(req->r_old_inode->i_mapping, result);
12421240

12431241
pr_warn("async unlink failure path=(%llx)%s result=%d!\n",
1244-
base, IS_ERR(path) ? "<<bad>>" : path, result);
1245-
ceph_mdsc_free_path(path, pathlen);
1242+
path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
1243+
ceph_mdsc_free_path_info(&path_info);
12461244
}
12471245
out:
12481246
iput(req->r_old_inode);

fs/ceph/file.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -576,14 +576,12 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
576576
mapping_set_error(req->r_parent->i_mapping, result);
577577

578578
if (result) {
579-
int pathlen = 0;
580-
u64 base = 0;
581-
char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
582-
&base, 0);
579+
struct ceph_path_info path_info = {0};
580+
char *path = ceph_mdsc_build_path(req->r_dentry, &path_info, 0);
583581

584582
pr_warn("async create failure path=(%llx)%s result=%d!\n",
585-
base, IS_ERR(path) ? "<<bad>>" : path, result);
586-
ceph_mdsc_free_path(path, pathlen);
583+
path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
584+
ceph_mdsc_free_path_info(&path_info);
587585

588586
ceph_dir_clear_complete(req->r_parent);
589587
if (!d_unhashed(dentry))

fs/ceph/inode.c

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,52 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
5555
return 0;
5656
}
5757

58+
/*
59+
* Check if the parent inode matches the vino from directory reply info
60+
*/
61+
static inline bool ceph_vino_matches_parent(struct inode *parent,
62+
struct ceph_vino vino)
63+
{
64+
return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap;
65+
}
66+
67+
/*
68+
* Validate that the directory inode referenced by @req->r_parent matches the
69+
* inode number and snapshot id contained in the reply's directory record. If
70+
* they do not match – which can theoretically happen if the parent dentry was
71+
* moved between the time the request was issued and the reply arrived – fall
72+
* back to looking up the correct inode in the inode cache.
73+
*
74+
* A reference is *always* returned. Callers that receive a different inode
75+
* than the original @parent are responsible for dropping the extra reference
76+
* once the reply has been processed.
77+
*/
78+
static struct inode *ceph_get_reply_dir(struct super_block *sb,
79+
struct inode *parent,
80+
struct ceph_mds_reply_info_parsed *rinfo)
81+
{
82+
struct ceph_vino vino;
83+
84+
if (unlikely(!rinfo->diri.in))
85+
return parent; /* nothing to compare against */
86+
87+
/* If we didn't have a cached parent inode to begin with, just bail out. */
88+
if (!parent)
89+
return NULL;
90+
91+
vino.ino = le64_to_cpu(rinfo->diri.in->ino);
92+
vino.snap = le64_to_cpu(rinfo->diri.in->snapid);
93+
94+
if (likely(ceph_vino_matches_parent(parent, vino)))
95+
return parent; /* matches – use the original reference */
96+
97+
/* Mismatch – this should be rare. Emit a WARN and obtain the correct inode. */
98+
WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n",
99+
ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap);
100+
101+
return ceph_get_inode(sb, vino, NULL);
102+
}
103+
58104
/**
59105
* ceph_new_inode - allocate a new inode in advance of an expected create
60106
* @dir: parent directory for new inode
@@ -1489,6 +1535,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
14891535
struct inode *in = NULL;
14901536
struct ceph_vino tvino, dvino;
14911537
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
1538+
struct inode *parent_dir = NULL;
14921539
int err = 0;
14931540

14941541
dout("fill_trace %p is_dentry %d is_target %d\n", req,
@@ -1502,10 +1549,18 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15021549
}
15031550

15041551
if (rinfo->head->is_dentry) {
1505-
struct inode *dir = req->r_parent;
1552+
/*
1553+
* r_parent may be stale, in cases when R_PARENT_LOCKED is not set,
1554+
* so we need to get the correct inode
1555+
*/
1556+
parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
1557+
if (unlikely(IS_ERR(parent_dir))) {
1558+
err = PTR_ERR(parent_dir);
1559+
goto done;
1560+
}
15061561

1507-
if (dir) {
1508-
err = ceph_fill_inode(dir, NULL, &rinfo->diri,
1562+
if (parent_dir) {
1563+
err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
15091564
rinfo->dirfrag, session, -1,
15101565
&req->r_caps_reservation);
15111566
if (err < 0)
@@ -1514,14 +1569,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15141569
WARN_ON_ONCE(1);
15151570
}
15161571

1517-
if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
1572+
if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
15181573
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
15191574
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
15201575
bool is_nokey = false;
15211576
struct qstr dname;
15221577
struct dentry *dn, *parent;
15231578
struct fscrypt_str oname = FSTR_INIT(NULL, 0);
1524-
struct ceph_fname fname = { .dir = dir,
1579+
struct ceph_fname fname = { .dir = parent_dir,
15251580
.name = rinfo->dname,
15261581
.ctext = rinfo->altname,
15271582
.name_len = rinfo->dname_len,
@@ -1530,10 +1585,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15301585
BUG_ON(!rinfo->head->is_target);
15311586
BUG_ON(req->r_dentry);
15321587

1533-
parent = d_find_any_alias(dir);
1588+
parent = d_find_any_alias(parent_dir);
15341589
BUG_ON(!parent);
15351590

1536-
err = ceph_fname_alloc_buffer(dir, &oname);
1591+
err = ceph_fname_alloc_buffer(parent_dir, &oname);
15371592
if (err < 0) {
15381593
dput(parent);
15391594
goto done;
@@ -1542,14 +1597,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15421597
err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
15431598
if (err < 0) {
15441599
dput(parent);
1545-
ceph_fname_free_buffer(dir, &oname);
1600+
ceph_fname_free_buffer(parent_dir, &oname);
15461601
goto done;
15471602
}
15481603
dname.name = oname.name;
15491604
dname.len = oname.len;
15501605
dname.hash = full_name_hash(parent, dname.name, dname.len);
15511606
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
15521607
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
1608+
15531609
retry_lookup:
15541610
dn = d_lookup(parent, &dname);
15551611
dout("d_lookup on parent=%p name=%.*s got %p\n",
@@ -1561,7 +1617,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15611617
dname.len, dname.name, dn);
15621618
if (!dn) {
15631619
dput(parent);
1564-
ceph_fname_free_buffer(dir, &oname);
1620+
ceph_fname_free_buffer(parent_dir, &oname);
15651621
err = -ENOMEM;
15661622
goto done;
15671623
}
@@ -1576,12 +1632,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
15761632
ceph_snap(d_inode(dn)) != tvino.snap)) {
15771633
dout(" dn %p points to wrong inode %p\n",
15781634
dn, d_inode(dn));
1579-
ceph_dir_clear_ordered(dir);
1635+
ceph_dir_clear_ordered(parent_dir);
15801636
d_delete(dn);
15811637
dput(dn);
15821638
goto retry_lookup;
15831639
}
1584-
ceph_fname_free_buffer(dir, &oname);
1640+
ceph_fname_free_buffer(parent_dir, &oname);
15851641

15861642
req->r_dentry = dn;
15871643
dput(parent);
@@ -1763,6 +1819,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
17631819
&dvino, ptvino);
17641820
}
17651821
done:
1822+
/* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */
1823+
if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent))
1824+
iput(parent_dir);
17661825
dout("fill_trace done err=%d\n", err);
17671826
return err;
17681827
}

0 commit comments

Comments
 (0)