|
| 1 | +ceph: fix race condition where r_parent becomes stale before sending message |
| 2 | + |
| 3 | +jira LE-4311 |
| 4 | +Rebuild_History Non-Buildable kernel-5.14.0-570.49.1.el9_6 |
| 5 | +Rebuild_CHGLOG: - ceph: fix client race condition where r_parent becomes stale before sending message (Alex Markuze) [RHEL-114962] |
| 6 | +Rebuild_FUZZ: 95.60% |
| 7 | +commit-author Alex Markuze <amarkuze@redhat.com> |
| 8 | +commit bec324f33d1ed346394b2eee25bf6dbf3511f727 |
| 9 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 10 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 11 | +ciq/ciq_backports/kernel-5.14.0-570.49.1.el9_6/bec324f3.failed |
| 12 | + |
| 13 | +When the parent directory's i_rwsem is not locked, req->r_parent may become |
| 14 | +stale due to concurrent operations (e.g. rename) between dentry lookup and |
| 15 | +message creation. Validate that r_parent matches the encoded parent inode |
| 16 | +and update to the correct inode if a mismatch is detected. |
| 17 | + |
| 18 | +[ idryomov: folded a follow-up fix from Alex to drop extra reference |
| 19 | + from ceph_get_reply_dir() in ceph_fill_trace(): |
| 20 | + |
| 21 | + ceph_get_reply_dir() may return a different, referenced inode when |
| 22 | + r_parent is stale and the parent directory lock is not held. |
| 23 | + ceph_fill_trace() used that inode but failed to drop the reference |
| 24 | + when it differed from req->r_parent, leaking an inode reference. |
| 25 | + |
| 26 | + Keep the directory inode in a local variable and iput() it at |
| 27 | + function end if it does not match req->r_parent. ] |
| 28 | + |
| 29 | + Cc: stable@vger.kernel.org |
| 30 | + Signed-off-by: Alex Markuze <amarkuze@redhat.com> |
| 31 | + Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> |
| 32 | + Signed-off-by: Ilya Dryomov <idryomov@gmail.com> |
| 33 | +(cherry picked from commit bec324f33d1ed346394b2eee25bf6dbf3511f727) |
| 34 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 35 | + |
| 36 | +# Conflicts: |
| 37 | +# fs/ceph/inode.c |
| 38 | +diff --cc fs/ceph/inode.c |
| 39 | +index 34cfeb0fba9e,f67025465de0..000000000000 |
| 40 | +--- a/fs/ceph/inode.c |
| 41 | ++++ b/fs/ceph/inode.c |
| 42 | +@@@ -1488,14 -1567,16 +1534,20 @@@ int ceph_fill_trace(struct super_block |
| 43 | + struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; |
| 44 | + struct inode *in = NULL; |
| 45 | + struct ceph_vino tvino, dvino; |
| 46 | +++<<<<<<< HEAD |
| 47 | + + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
| 48 | +++======= |
| 49 | ++ struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); |
| 50 | ++ struct ceph_client *cl = fsc->client; |
| 51 | ++ struct inode *parent_dir = NULL; |
| 52 | +++>>>>>>> bec324f33d1e (ceph: fix race condition where r_parent becomes stale before sending message) |
| 53 | + int err = 0; |
| 54 | + |
| 55 | + - doutc(cl, "%p is_dentry %d is_target %d\n", req, |
| 56 | + - rinfo->head->is_dentry, rinfo->head->is_target); |
| 57 | + + dout("fill_trace %p is_dentry %d is_target %d\n", req, |
| 58 | + + rinfo->head->is_dentry, rinfo->head->is_target); |
| 59 | + |
| 60 | + if (!rinfo->head->is_target && !rinfo->head->is_dentry) { |
| 61 | + - doutc(cl, "reply is empty!\n"); |
| 62 | + + dout("fill_trace reply is empty!\n"); |
| 63 | + if (rinfo->head->result == 0 && req->r_parent) |
| 64 | + ceph_invalidate_dir_request(req); |
| 65 | + return 0; |
| 66 | +@@@ -1557,11 -1645,11 +1616,11 @@@ retry_lookup |
| 67 | + |
| 68 | + if (!dn) { |
| 69 | + dn = d_alloc(parent, &dname); |
| 70 | + - doutc(cl, "d_alloc %p '%.*s' = %p\n", parent, |
| 71 | + - dname.len, dname.name, dn); |
| 72 | + + dout("d_alloc %p '%.*s' = %p\n", parent, |
| 73 | + + dname.len, dname.name, dn); |
| 74 | + if (!dn) { |
| 75 | + dput(parent); |
| 76 | +- ceph_fname_free_buffer(dir, &oname); |
| 77 | ++ ceph_fname_free_buffer(parent_dir, &oname); |
| 78 | + err = -ENOMEM; |
| 79 | + goto done; |
| 80 | + } |
| 81 | +@@@ -1574,9 -1662,9 +1633,15 @@@ |
| 82 | + } else if (d_really_is_positive(dn) && |
| 83 | + (ceph_ino(d_inode(dn)) != tvino.ino || |
| 84 | + ceph_snap(d_inode(dn)) != tvino.snap)) { |
| 85 | +++<<<<<<< HEAD |
| 86 | + + dout(" dn %p points to wrong inode %p\n", |
| 87 | + + dn, d_inode(dn)); |
| 88 | + + ceph_dir_clear_ordered(dir); |
| 89 | +++======= |
| 90 | ++ doutc(cl, " dn %p points to wrong inode %p\n", |
| 91 | ++ dn, d_inode(dn)); |
| 92 | ++ ceph_dir_clear_ordered(parent_dir); |
| 93 | +++>>>>>>> bec324f33d1e (ceph: fix race condition where r_parent becomes stale before sending message) |
| 94 | + d_delete(dn); |
| 95 | + dput(dn); |
| 96 | + goto retry_lookup; |
| 97 | +@@@ -1763,7 -1848,10 +1828,14 @@@ |
| 98 | + &dvino, ptvino); |
| 99 | + } |
| 100 | + done: |
| 101 | +++<<<<<<< HEAD |
| 102 | + + dout("fill_trace done err=%d\n", err); |
| 103 | +++======= |
| 104 | ++ /* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */ |
| 105 | ++ if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent)) |
| 106 | ++ iput(parent_dir); |
| 107 | ++ doutc(cl, "done err=%d\n", err); |
| 108 | +++>>>>>>> bec324f33d1e (ceph: fix race condition where r_parent becomes stale before sending message) |
| 109 | + return err; |
| 110 | + } |
| 111 | + |
| 112 | +* Unmerged path fs/ceph/inode.c |
0 commit comments