Skip to content

Commit f563359

Browse files
author
Ming Lei
committed
nbd: fix partial sending
JIRA: https://issues.redhat.com/browse/RHEL-74229 Upstream Status: for-6.14/block commit 8337b02 Author: Ming Lei <ming.lei@redhat.com> Date: Tue Oct 29 09:19:41 2024 +0800 nbd: fix partial sending nbd driver sends request header and payload with multiple call of sock_sendmsg, and partial sending can't be avoided. However, nbd driver returns BLK_STS_RESOURCE to block core in this situation. This way causes one issue: request->tag may change in the next run of nbd_queue_rq(), but the original old tag has been sent as part of header cookie, this way confuses nbd driver reply handling, since the real request can't be retrieved any more with the obsolete old tag. Fix it by retrying sending directly in per-socket work function, meantime return BLK_STS_OK to block layer core. Cc: vincent.chen@sifive.com Cc: Leon Schuermann <leon@is.currently.online> Cc: Bart Van Assche <bvanassche@acm.org> Reported-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Tested-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Link: https://lore.kernel.org/r/20241029011941.153037-1-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Ming Lei <ming.lei@redhat.com>
1 parent 00d19a4 commit f563359

File tree

1 file changed

+85
-10
lines changed

1 file changed

+85
-10
lines changed

drivers/block/nbd.c

Lines changed: 85 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ struct nbd_sock {
6262
bool dead;
6363
int fallback_index;
6464
int cookie;
65+
struct work_struct work;
6566
};
6667

6768
struct recv_thread_args {
@@ -141,6 +142,9 @@ struct nbd_device {
141142
*/
142143
#define NBD_CMD_INFLIGHT 2
143144

145+
/* Just part of request header or data payload is sent successfully */
146+
#define NBD_CMD_PARTIAL_SEND 3
147+
144148
struct nbd_cmd {
145149
struct nbd_device *nbd;
146150
struct mutex lock;
@@ -466,6 +470,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
466470
if (!mutex_trylock(&cmd->lock))
467471
return BLK_EH_RESET_TIMER;
468472

473+
/* partial send is handled in nbd_sock's work function */
474+
if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)) {
475+
mutex_unlock(&cmd->lock);
476+
return BLK_EH_RESET_TIMER;
477+
}
478+
469479
if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
470480
mutex_unlock(&cmd->lock);
471481
return BLK_EH_DONE;
@@ -614,6 +624,30 @@ static inline int was_interrupted(int result)
614624
return result == -ERESTARTSYS || result == -EINTR;
615625
}
616626

627+
/*
628+
* We've already sent header or part of data payload, have no choice but
629+
* to set pending and schedule it in work.
630+
*
631+
* And we have to return BLK_STS_OK to block core, otherwise this same
632+
* request may be re-dispatched with different tag, but our header has
633+
* been sent out with old tag, and this way does confuse reply handling.
634+
*/
635+
static void nbd_sched_pending_work(struct nbd_device *nbd,
636+
struct nbd_sock *nsock,
637+
struct nbd_cmd *cmd, int sent)
638+
{
639+
struct request *req = blk_mq_rq_from_pdu(cmd);
640+
641+
/* pending work should be scheduled only once */
642+
WARN_ON_ONCE(test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags));
643+
644+
nsock->pending = req;
645+
nsock->sent = sent;
646+
set_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
647+
refcount_inc(&nbd->config_refs);
648+
schedule_work(&nsock->work);
649+
}
650+
617651
/*
618652
* Returns BLK_STS_RESOURCE if the caller should retry after a delay.
619653
* Returns BLK_STS_IOERR if sending failed.
@@ -699,8 +733,8 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
699733
* completely done.
700734
*/
701735
if (sent) {
702-
nsock->pending = req;
703-
nsock->sent = sent;
736+
nbd_sched_pending_work(nbd, nsock, cmd, sent);
737+
return BLK_STS_OK;
704738
}
705739
set_bit(NBD_CMD_REQUEUED, &cmd->flags);
706740
return BLK_STS_RESOURCE;
@@ -737,14 +771,8 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
737771
result = sock_xmit(nbd, index, 1, &from, flags, &sent);
738772
if (result < 0) {
739773
if (was_interrupted(result)) {
740-
/* We've already sent the header, we
741-
* have no choice but to set pending and
742-
* return BUSY.
743-
*/
744-
nsock->pending = req;
745-
nsock->sent = sent;
746-
set_bit(NBD_CMD_REQUEUED, &cmd->flags);
747-
return BLK_STS_RESOURCE;
774+
nbd_sched_pending_work(nbd, nsock, cmd, sent);
775+
return BLK_STS_OK;
748776
}
749777
dev_err(disk_to_dev(nbd->disk),
750778
"Send data failed (result %d)\n",
@@ -770,6 +798,14 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
770798
return BLK_STS_OK;
771799

772800
requeue:
801+
/*
802+
* Can't requeue in case we are dealing with partial send
803+
*
804+
* We must run from pending work function.
805+
* */
806+
if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
807+
return BLK_STS_OK;
808+
773809
/* retry on a different socket */
774810
dev_err_ratelimited(disk_to_dev(nbd->disk),
775811
"Request send failed, requeueing\n");
@@ -778,6 +814,44 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
778814
return BLK_STS_OK;
779815
}
780816

817+
/* handle partial sending */
818+
static void nbd_pending_cmd_work(struct work_struct *work)
819+
{
820+
struct nbd_sock *nsock = container_of(work, struct nbd_sock, work);
821+
struct request *req = nsock->pending;
822+
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
823+
struct nbd_device *nbd = cmd->nbd;
824+
unsigned long deadline = READ_ONCE(req->deadline);
825+
unsigned int wait_ms = 2;
826+
827+
mutex_lock(&cmd->lock);
828+
829+
WARN_ON_ONCE(test_bit(NBD_CMD_REQUEUED, &cmd->flags));
830+
if (WARN_ON_ONCE(!test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)))
831+
goto out;
832+
833+
mutex_lock(&nsock->tx_lock);
834+
while (true) {
835+
nbd_send_cmd(nbd, cmd, cmd->index);
836+
if (!nsock->pending)
837+
break;
838+
839+
/* don't bother timeout handler for partial sending */
840+
if (READ_ONCE(jiffies) + msecs_to_jiffies(wait_ms) >= deadline) {
841+
cmd->status = BLK_STS_IOERR;
842+
blk_mq_complete_request(req);
843+
break;
844+
}
845+
msleep(wait_ms);
846+
wait_ms *= 2;
847+
}
848+
mutex_unlock(&nsock->tx_lock);
849+
clear_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
850+
out:
851+
mutex_unlock(&cmd->lock);
852+
nbd_config_put(nbd);
853+
}
854+
781855
static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock,
782856
struct nbd_reply *reply)
783857
{
@@ -1224,6 +1298,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
12241298
nsock->pending = NULL;
12251299
nsock->sent = 0;
12261300
nsock->cookie = 0;
1301+
INIT_WORK(&nsock->work, nbd_pending_cmd_work);
12271302
socks[config->num_connections++] = nsock;
12281303
atomic_inc(&config->live_connections);
12291304
blk_mq_unfreeze_queue(nbd->disk->queue);

0 commit comments

Comments
 (0)