Skip to content

Commit 4d428dc

Browse files
MaxKellermannbrauner
authored andcommitted
netfs: fix reference leak
Commit 20d72b0 ("netfs: Fix the request's work item to not require a ref") modified netfs_alloc_request() to initialize the reference counter to 2 instead of 1. The rationale was that the requet's "work" would release the second reference after completion (via netfs_{read,write}_collection_worker()). That works most of the time if all goes well. However, it leaks this additional reference if the request is released before the I/O operation has been submitted: the error code path only decrements the reference counter once and the work item will never be queued because there will never be a completion. This has caused outages of our whole server cluster today because tasks were blocked in netfs_wait_for_outstanding_io(), leading to deadlocks in Ceph (another bug that I will address soon in another patch). This was caused by a netfs_pgpriv2_begin_copy_to_cache() call which failed in fscache_begin_write_operation(). The leaked netfs_io_request was never completed, leaving `netfs_inode.io_count` with a positive value forever. All of this is super-fragile code. Finding out which code paths will lead to an eventual completion and which do not is hard to see: - Some functions like netfs_create_write_req() allocate a request, but will never submit any I/O. - netfs_unbuffered_read_iter_locked() calls netfs_unbuffered_read() and then netfs_put_request(); however, netfs_unbuffered_read() can also fail early before submitting the I/O request, therefore another netfs_put_request() call must be added there. A rule of thumb is that functions that return a `netfs_io_request` do not submit I/O, and all of their callers must be checked. For my taste, the whole netfs code needs an overhaul to make reference counting easier to understand and less fragile & obscure. But to fix this bug here and now and produce a patch that is adequate for a stable backport, I tried a minimal approach that quickly frees the request object upon early failure. I decided against adding a second netfs_put_request() each time because that would cause code duplication which obscures the code further. Instead, I added the function netfs_put_failed_request() which frees such a failed request synchronously under the assumption that the reference count is exactly 2 (as initially set by netfs_alloc_request() and never touched), verified by a WARN_ON_ONCE(). It then deinitializes the request object (without going through the "cleanup_work" indirection) and frees the allocation (with RCU protection to protect against concurrent access by netfs_requests_seq_start()). All code paths that fail early have been changed to call netfs_put_failed_request() instead of netfs_put_request(). Additionally, I have added a netfs_put_request() call to netfs_unbuffered_read() as explained above because the netfs_put_failed_request() approach does not work there. Fixes: 20d72b0 ("netfs: Fix the request's work item to not require a ref") Signed-off-by: Max Kellermann <max.kellermann@ionos.com> Signed-off-by: David Howells <dhowells@redhat.com> cc: Paulo Alcantara <pc@manguebit.org> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 9158c6b commit 4d428dc

File tree

8 files changed

+47
-14
lines changed

8 files changed

+47
-14
lines changed

fs/netfs/buffered_read.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ void netfs_readahead(struct readahead_control *ractl)
369369
return netfs_put_request(rreq, netfs_rreq_trace_put_return);
370370

371371
cleanup_free:
372-
return netfs_put_request(rreq, netfs_rreq_trace_put_failed);
372+
return netfs_put_failed_request(rreq);
373373
}
374374
EXPORT_SYMBOL(netfs_readahead);
375375

@@ -472,7 +472,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
472472
return ret < 0 ? ret : 0;
473473

474474
discard:
475-
netfs_put_request(rreq, netfs_rreq_trace_put_discard);
475+
netfs_put_failed_request(rreq);
476476
alloc_error:
477477
folio_unlock(folio);
478478
return ret;
@@ -532,7 +532,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
532532
return ret < 0 ? ret : 0;
533533

534534
discard:
535-
netfs_put_request(rreq, netfs_rreq_trace_put_discard);
535+
netfs_put_failed_request(rreq);
536536
alloc_error:
537537
folio_unlock(folio);
538538
return ret;
@@ -699,7 +699,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
699699
return 0;
700700

701701
error_put:
702-
netfs_put_request(rreq, netfs_rreq_trace_put_failed);
702+
netfs_put_failed_request(rreq);
703703
error:
704704
if (folio) {
705705
folio_unlock(folio);
@@ -754,7 +754,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
754754
return ret < 0 ? ret : 0;
755755

756756
error_put:
757-
netfs_put_request(rreq, netfs_rreq_trace_put_discard);
757+
netfs_put_failed_request(rreq);
758758
error:
759759
_leave(" = %d", ret);
760760
return ret;

fs/netfs/direct_read.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
131131

132132
if (rreq->len == 0) {
133133
pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
134+
netfs_put_request(rreq, netfs_rreq_trace_put_discard);
134135
return -EIO;
135136
}
136137

@@ -205,7 +206,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
205206
if (user_backed_iter(iter)) {
206207
ret = netfs_extract_user_iter(iter, rreq->len, &rreq->buffer.iter, 0);
207208
if (ret < 0)
208-
goto out;
209+
goto error_put;
209210
rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec;
210211
rreq->direct_bv_count = ret;
211212
rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
@@ -238,6 +239,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
238239
if (ret > 0)
239240
orig_count -= ret;
240241
return ret;
242+
243+
error_put:
244+
netfs_put_failed_request(rreq);
245+
return ret;
241246
}
242247
EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);
243248

fs/netfs/direct_write.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
5757
n = netfs_extract_user_iter(iter, len, &wreq->buffer.iter, 0);
5858
if (n < 0) {
5959
ret = n;
60-
goto out;
60+
goto error_put;
6161
}
6262
wreq->direct_bv = (struct bio_vec *)wreq->buffer.iter.bvec;
6363
wreq->direct_bv_count = n;
@@ -101,6 +101,10 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
101101
out:
102102
netfs_put_request(wreq, netfs_rreq_trace_put_return);
103103
return ret;
104+
105+
error_put:
106+
netfs_put_failed_request(wreq);
107+
return ret;
104108
}
105109
EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked);
106110

fs/netfs/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
8787
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
8888
void netfs_clear_subrequests(struct netfs_io_request *rreq);
8989
void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
90+
void netfs_put_failed_request(struct netfs_io_request *rreq);
9091
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq);
9192

9293
static inline void netfs_see_request(struct netfs_io_request *rreq,

fs/netfs/objects.c

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,8 @@ static void netfs_free_request_rcu(struct rcu_head *rcu)
116116
netfs_stat_d(&netfs_n_rh_rreq);
117117
}
118118

119-
static void netfs_free_request(struct work_struct *work)
119+
static void netfs_deinit_request(struct netfs_io_request *rreq)
120120
{
121-
struct netfs_io_request *rreq =
122-
container_of(work, struct netfs_io_request, cleanup_work);
123121
struct netfs_inode *ictx = netfs_inode(rreq->inode);
124122
unsigned int i;
125123

@@ -149,6 +147,14 @@ static void netfs_free_request(struct work_struct *work)
149147

150148
if (atomic_dec_and_test(&ictx->io_count))
151149
wake_up_var(&ictx->io_count);
150+
}
151+
152+
static void netfs_free_request(struct work_struct *work)
153+
{
154+
struct netfs_io_request *rreq =
155+
container_of(work, struct netfs_io_request, cleanup_work);
156+
157+
netfs_deinit_request(rreq);
152158
call_rcu(&rreq->rcu, netfs_free_request_rcu);
153159
}
154160

@@ -167,6 +173,24 @@ void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace
167173
}
168174
}
169175

176+
/*
177+
* Free a request (synchronously) that was just allocated but has
178+
* failed before it could be submitted.
179+
*/
180+
void netfs_put_failed_request(struct netfs_io_request *rreq)
181+
{
182+
int r = refcount_read(&rreq->ref);
183+
184+
/* new requests have two references (see
185+
* netfs_alloc_request(), and this function is only allowed on
186+
* new request objects
187+
*/
188+
WARN_ON_ONCE(r != 2);
189+
190+
trace_netfs_rreq_ref(rreq->debug_id, r, netfs_rreq_trace_put_failed);
191+
netfs_free_request(&rreq->cleanup_work);
192+
}
193+
170194
/*
171195
* Allocate and partially initialise an I/O request structure.
172196
*/

fs/netfs/read_pgpriv2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
118118
return creq;
119119

120120
cancel_put:
121-
netfs_put_request(creq, netfs_rreq_trace_put_return);
121+
netfs_put_failed_request(creq);
122122
cancel:
123123
rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
124124
clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);

fs/netfs/read_single.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite
189189
return ret;
190190

191191
cleanup_free:
192-
netfs_put_request(rreq, netfs_rreq_trace_put_failed);
192+
netfs_put_failed_request(rreq);
193193
return ret;
194194
}
195195
EXPORT_SYMBOL(netfs_read_single);

fs/netfs/write_issue.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
133133

134134
return wreq;
135135
nomem:
136-
wreq->error = -ENOMEM;
137-
netfs_put_request(wreq, netfs_rreq_trace_put_failed);
136+
netfs_put_failed_request(wreq);
138137
return ERR_PTR(-ENOMEM);
139138
}
140139

0 commit comments

Comments
 (0)