Skip to content

Commit 426e9da

Browse files
Shahar Shitritkuba-moo
authored andcommitted
net/mlx5e: kTLS, Cancel RX async resync request in error flows
When device loses track of TLS records, it attempts to resync by monitoring records and requests an asynchronous resynchronization from software for this TLS connection. The TLS module handles such device RX resync requests by logging record headers and comparing them with the record tcp_sn when provided by the device. It also increments rcd_delta to track how far the current record tcp_sn is from the tcp_sn of the original resync request. If the device later responds with a matching tcp_sn, the TLS module approves the tcp_sn for resync. However, the device response may be delayed or never arrive, particularly due to traffic-related issues such as packet drops or reordering. In such cases, the TLS module remains unaware that resync will not complete, and continues performing unnecessary work by logging headers and incrementing rcd_delta, which can eventually exceed the threshold and trigger a WARN(). For example, this was observed when the device got out of tracking, causing mlx5e_ktls_handle_get_psv_completion() to fail and ultimately leading to the rcd_delta warning. To address this, call tls_offload_rx_resync_async_request_cancel() to cancel the resync request and stop resync tracking in such error cases. Also, increment the tls_resync_req_skip counter to track these cancellations. Fixes: 0419d8c ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Shahar Shitrit <shshitrit@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://patch.msgid.link/1761508983-937977-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent c15d5c6 commit 426e9da

File tree

3 files changed

+37
-5
lines changed

3 files changed

+37
-5
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,6 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
320320
err_free:
321321
kfree(buf);
322322
err_out:
323-
priv_rx->rq_stats->tls_resync_req_skip++;
324323
return err;
325324
}
326325

@@ -339,14 +338,19 @@ static void resync_handle_work(struct work_struct *work)
339338

340339
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
341340
mlx5e_ktls_priv_rx_put(priv_rx);
341+
priv_rx->rq_stats->tls_resync_req_skip++;
342+
tls_offload_rx_resync_async_request_cancel(&resync->core);
342343
return;
343344
}
344345

345346
c = resync->priv->channels.c[priv_rx->rxq];
346347
sq = &c->async_icosq;
347348

348-
if (resync_post_get_progress_params(sq, priv_rx))
349+
if (resync_post_get_progress_params(sq, priv_rx)) {
350+
priv_rx->rq_stats->tls_resync_req_skip++;
351+
tls_offload_rx_resync_async_request_cancel(&resync->core);
349352
mlx5e_ktls_priv_rx_put(priv_rx);
353+
}
350354
}
351355

352356
static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
@@ -425,6 +429,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
425429
{
426430
struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf;
427431
struct mlx5e_ktls_offload_context_rx *priv_rx;
432+
struct tls_offload_resync_async *async_resync;
428433
struct tls_offload_context_rx *rx_ctx;
429434
u8 tracker_state, auth_state, *ctx;
430435
struct device *dev;
@@ -433,8 +438,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
433438
priv_rx = buf->priv_rx;
434439
dev = mlx5_core_dma_dev(sq->channel->mdev);
435440
rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk));
436-
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
441+
async_resync = rx_ctx->resync_async;
442+
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
443+
priv_rx->rq_stats->tls_resync_req_skip++;
444+
tls_offload_rx_resync_async_request_cancel(async_resync);
437445
goto out;
446+
}
438447

439448
dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE,
440449
DMA_FROM_DEVICE);
@@ -445,11 +454,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
445454
if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING ||
446455
auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) {
447456
priv_rx->rq_stats->tls_resync_req_skip++;
457+
tls_offload_rx_resync_async_request_cancel(async_resync);
448458
goto out;
449459
}
450460

451461
hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn);
452-
tls_offload_rx_resync_async_request_end(rx_ctx->resync_async,
462+
tls_offload_rx_resync_async_request_end(async_resync,
453463
cpu_to_be32(hw_seq));
454464
priv_rx->rq_stats->tls_resync_req_end++;
455465
out:
@@ -475,8 +485,10 @@ static bool resync_queue_get_psv(struct sock *sk)
475485

476486
resync = &priv_rx->resync;
477487
mlx5e_ktls_priv_rx_get(priv_rx);
478-
if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work)))
488+
if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) {
479489
mlx5e_ktls_priv_rx_put(priv_rx);
490+
return false;
491+
}
480492

481493
return true;
482494
}
@@ -561,6 +573,18 @@ void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk,
561573
resync_handle_seq_match(priv_rx, c);
562574
}
563575

576+
void
577+
mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi)
578+
{
579+
struct mlx5e_ktls_offload_context_rx *priv_rx;
580+
struct mlx5e_ktls_rx_resync_buf *buf;
581+
582+
buf = wi->tls_get_params.buf;
583+
priv_rx = buf->priv_rx;
584+
priv_rx->rq_stats->tls_resync_req_skip++;
585+
tls_offload_rx_resync_async_request_cancel(&priv_rx->resync.core);
586+
}
587+
564588
/* End of resync section */
565589

566590
void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
2929
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
3030
struct mlx5e_tx_wqe_info *wi,
3131
u32 *dma_fifo_cc);
32+
33+
void
34+
mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi);
35+
3236
static inline bool
3337
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
3438
struct mlx5e_tx_wqe_info *wi,

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
10361036
netdev_WARN_ONCE(cq->netdev,
10371037
"Bad OP in ICOSQ CQE: 0x%x\n",
10381038
get_cqe_opcode(cqe));
1039+
#ifdef CONFIG_MLX5_EN_TLS
1040+
if (wi->wqe_type == MLX5E_ICOSQ_WQE_GET_PSV_TLS)
1041+
mlx5e_ktls_rx_resync_async_request_cancel(wi);
1042+
#endif
10391043
mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
10401044
(struct mlx5_err_cqe *)cqe);
10411045
mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);

0 commit comments

Comments
 (0)