Skip to content

Commit 1a81dfc

Browse files
Trond Myklebustgregkh
authored andcommitted
NFSv4/flexfiles: Fix handling of NFS level errors in I/O
[ Upstream commit 38074de ] Allow the flexfiles error handling to recognise NFS level errors (as opposed to RPC level errors) and handle them separately. The main motivator is the NFSERR_PERM errors that get returned if the NFS client connects to the data server through a port number that is lower than 1024. In that case, the client should disconnect and retry a READ on a different data server, or it should retry a WRITE after reconnecting. Reviewed-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de> Fixes: d67ae82 ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 5e110e8 commit 1a81dfc

File tree

1 file changed

+87
-34
lines changed

1 file changed

+87
-34
lines changed

fs/nfs/flexfilelayout/flexfilelayout.c

Lines changed: 87 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
11041104
}
11051105

11061106
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
1107+
u32 op_status,
11071108
struct nfs4_state *state,
11081109
struct nfs_client *clp,
11091110
struct pnfs_layout_segment *lseg,
@@ -1114,32 +1115,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11141115
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
11151116
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
11161117

1117-
switch (task->tk_status) {
1118-
case -NFS4ERR_BADSESSION:
1119-
case -NFS4ERR_BADSLOT:
1120-
case -NFS4ERR_BAD_HIGH_SLOT:
1121-
case -NFS4ERR_DEADSESSION:
1122-
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1123-
case -NFS4ERR_SEQ_FALSE_RETRY:
1124-
case -NFS4ERR_SEQ_MISORDERED:
1118+
switch (op_status) {
1119+
case NFS4_OK:
1120+
case NFS4ERR_NXIO:
1121+
break;
1122+
case NFSERR_PERM:
1123+
if (!task->tk_xprt)
1124+
break;
1125+
xprt_force_disconnect(task->tk_xprt);
1126+
goto out_retry;
1127+
case NFS4ERR_BADSESSION:
1128+
case NFS4ERR_BADSLOT:
1129+
case NFS4ERR_BAD_HIGH_SLOT:
1130+
case NFS4ERR_DEADSESSION:
1131+
case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1132+
case NFS4ERR_SEQ_FALSE_RETRY:
1133+
case NFS4ERR_SEQ_MISORDERED:
11251134
dprintk("%s ERROR %d, Reset session. Exchangeid "
11261135
"flags 0x%x\n", __func__, task->tk_status,
11271136
clp->cl_exchange_flags);
11281137
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
1129-
break;
1130-
case -NFS4ERR_DELAY:
1131-
case -NFS4ERR_GRACE:
1138+
goto out_retry;
1139+
case NFS4ERR_DELAY:
1140+
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
1141+
fallthrough;
1142+
case NFS4ERR_GRACE:
11321143
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
1133-
break;
1134-
case -NFS4ERR_RETRY_UNCACHED_REP:
1135-
break;
1144+
goto out_retry;
1145+
case NFS4ERR_RETRY_UNCACHED_REP:
1146+
goto out_retry;
11361147
/* Invalidate Layout errors */
1137-
case -NFS4ERR_PNFS_NO_LAYOUT:
1138-
case -ESTALE: /* mapped NFS4ERR_STALE */
1139-
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
1140-
case -EISDIR: /* mapped NFS4ERR_ISDIR */
1141-
case -NFS4ERR_FHEXPIRED:
1142-
case -NFS4ERR_WRONG_TYPE:
1148+
case NFS4ERR_PNFS_NO_LAYOUT:
1149+
case NFS4ERR_STALE:
1150+
case NFS4ERR_BADHANDLE:
1151+
case NFS4ERR_ISDIR:
1152+
case NFS4ERR_FHEXPIRED:
1153+
case NFS4ERR_WRONG_TYPE:
11431154
dprintk("%s Invalid layout error %d\n", __func__,
11441155
task->tk_status);
11451156
/*
@@ -1152,6 +1163,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11521163
pnfs_destroy_layout(NFS_I(inode));
11531164
rpc_wake_up(&tbl->slot_tbl_waitq);
11541165
goto reset;
1166+
default:
1167+
break;
1168+
}
1169+
1170+
switch (task->tk_status) {
11551171
/* RPC connection errors */
11561172
case -ECONNREFUSED:
11571173
case -EHOSTDOWN:
@@ -1167,26 +1183,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11671183
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
11681184
&devid->deviceid);
11691185
rpc_wake_up(&tbl->slot_tbl_waitq);
1170-
fallthrough;
1186+
break;
11711187
default:
1172-
if (ff_layout_avoid_mds_available_ds(lseg))
1173-
return -NFS4ERR_RESET_TO_PNFS;
1174-
reset:
1175-
dprintk("%s Retry through MDS. Error %d\n", __func__,
1176-
task->tk_status);
1177-
return -NFS4ERR_RESET_TO_MDS;
1188+
break;
11781189
}
1190+
1191+
if (ff_layout_avoid_mds_available_ds(lseg))
1192+
return -NFS4ERR_RESET_TO_PNFS;
1193+
reset:
1194+
dprintk("%s Retry through MDS. Error %d\n", __func__,
1195+
task->tk_status);
1196+
return -NFS4ERR_RESET_TO_MDS;
1197+
1198+
out_retry:
11791199
task->tk_status = 0;
11801200
return -EAGAIN;
11811201
}
11821202

11831203
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
11841204
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
1205+
u32 op_status,
1206+
struct nfs_client *clp,
11851207
struct pnfs_layout_segment *lseg,
11861208
u32 idx)
11871209
{
11881210
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
11891211

1212+
switch (op_status) {
1213+
case NFS_OK:
1214+
case NFSERR_NXIO:
1215+
break;
1216+
case NFSERR_PERM:
1217+
if (!task->tk_xprt)
1218+
break;
1219+
xprt_force_disconnect(task->tk_xprt);
1220+
goto out_retry;
1221+
case NFSERR_ACCES:
1222+
case NFSERR_BADHANDLE:
1223+
case NFSERR_FBIG:
1224+
case NFSERR_IO:
1225+
case NFSERR_NOSPC:
1226+
case NFSERR_ROFS:
1227+
case NFSERR_STALE:
1228+
goto out_reset_to_pnfs;
1229+
case NFSERR_JUKEBOX:
1230+
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
1231+
goto out_retry;
1232+
default:
1233+
break;
1234+
}
1235+
11901236
switch (task->tk_status) {
11911237
/* File access problems. Don't mark the device as unavailable */
11921238
case -EACCES:
@@ -1205,6 +1251,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
12051251
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
12061252
&devid->deviceid);
12071253
}
1254+
out_reset_to_pnfs:
12081255
/* FIXME: Need to prevent infinite looping here. */
12091256
return -NFS4ERR_RESET_TO_PNFS;
12101257
out_retry:
@@ -1215,6 +1262,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
12151262
}
12161263

12171264
static int ff_layout_async_handle_error(struct rpc_task *task,
1265+
u32 op_status,
12181266
struct nfs4_state *state,
12191267
struct nfs_client *clp,
12201268
struct pnfs_layout_segment *lseg,
@@ -1233,10 +1281,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
12331281

12341282
switch (vers) {
12351283
case 3:
1236-
return ff_layout_async_handle_error_v3(task, lseg, idx);
1237-
case 4:
1238-
return ff_layout_async_handle_error_v4(task, state, clp,
1284+
return ff_layout_async_handle_error_v3(task, op_status, clp,
12391285
lseg, idx);
1286+
case 4:
1287+
return ff_layout_async_handle_error_v4(task, op_status, state,
1288+
clp, lseg, idx);
12401289
default:
12411290
/* should never happen */
12421291
WARN_ON_ONCE(1);
@@ -1289,6 +1338,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
12891338
switch (status) {
12901339
case NFS4ERR_DELAY:
12911340
case NFS4ERR_GRACE:
1341+
case NFS4ERR_PERM:
12921342
break;
12931343
case NFS4ERR_NXIO:
12941344
ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1321,7 +1371,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
13211371
trace_ff_layout_read_error(hdr);
13221372
}
13231373

1324-
err = ff_layout_async_handle_error(task, hdr->args.context->state,
1374+
err = ff_layout_async_handle_error(task, hdr->res.op_status,
1375+
hdr->args.context->state,
13251376
hdr->ds_clp, hdr->lseg,
13261377
hdr->pgio_mirror_idx);
13271378

@@ -1491,7 +1542,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
14911542
trace_ff_layout_write_error(hdr);
14921543
}
14931544

1494-
err = ff_layout_async_handle_error(task, hdr->args.context->state,
1545+
err = ff_layout_async_handle_error(task, hdr->res.op_status,
1546+
hdr->args.context->state,
14951547
hdr->ds_clp, hdr->lseg,
14961548
hdr->pgio_mirror_idx);
14971549

@@ -1537,8 +1589,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
15371589
trace_ff_layout_commit_error(data);
15381590
}
15391591

1540-
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
1541-
data->lseg, data->ds_commit_index);
1592+
err = ff_layout_async_handle_error(task, data->res.op_status,
1593+
NULL, data->ds_clp, data->lseg,
1594+
data->ds_commit_index);
15421595

15431596
trace_nfs4_pnfs_commit_ds(data, err);
15441597
switch (err) {

0 commit comments

Comments
 (0)