Skip to content

Commit 0dd1be4

Browse files
committed
Merge branch 'tls-introduce-and-use-rx-async-resync-request-cancel-function'
Tariq Toukan says: ==================== tls: Introduce and use RX async resync request cancel function This series by Shahar introduces RX async resync request cancel function in tls module, and uses it in mlx5e driver. For a device-offloaded TLS RX connection, the TLS module increments rcd_delta each time a new TLS record is received, tracking the distance from the original resync request. In the meanwhile, the device is queried and is expected to respond, asynchronously. However, if the device response is delayed or fails (e.g due to unstable connection and device getting out of tracking, hardware errors, resource exhaustion etc.), the TLS module keeps logging and incrementing rcd_delta, which can lead to a WARN() when rcd_delta exceeds the threshold. This series improves this code area by canceling the resync request when spotting an issue with the device response. ==================== Link: https://patch.msgid.link/1761508983-937977-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents e98cda7 + 426e9da commit 0dd1be4

File tree

5 files changed

+59
-19
lines changed

5 files changed

+59
-19
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,6 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
320320
err_free:
321321
kfree(buf);
322322
err_out:
323-
priv_rx->rq_stats->tls_resync_req_skip++;
324323
return err;
325324
}
326325

@@ -339,14 +338,19 @@ static void resync_handle_work(struct work_struct *work)
339338

340339
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
341340
mlx5e_ktls_priv_rx_put(priv_rx);
341+
priv_rx->rq_stats->tls_resync_req_skip++;
342+
tls_offload_rx_resync_async_request_cancel(&resync->core);
342343
return;
343344
}
344345

345346
c = resync->priv->channels.c[priv_rx->rxq];
346347
sq = &c->async_icosq;
347348

348-
if (resync_post_get_progress_params(sq, priv_rx))
349+
if (resync_post_get_progress_params(sq, priv_rx)) {
350+
priv_rx->rq_stats->tls_resync_req_skip++;
351+
tls_offload_rx_resync_async_request_cancel(&resync->core);
349352
mlx5e_ktls_priv_rx_put(priv_rx);
353+
}
350354
}
351355

352356
static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
@@ -425,14 +429,21 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
425429
{
426430
struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf;
427431
struct mlx5e_ktls_offload_context_rx *priv_rx;
432+
struct tls_offload_resync_async *async_resync;
433+
struct tls_offload_context_rx *rx_ctx;
428434
u8 tracker_state, auth_state, *ctx;
429435
struct device *dev;
430436
u32 hw_seq;
431437

432438
priv_rx = buf->priv_rx;
433439
dev = mlx5_core_dma_dev(sq->channel->mdev);
434-
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
440+
rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk));
441+
async_resync = rx_ctx->resync_async;
442+
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
443+
priv_rx->rq_stats->tls_resync_req_skip++;
444+
tls_offload_rx_resync_async_request_cancel(async_resync);
435445
goto out;
446+
}
436447

437448
dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE,
438449
DMA_FROM_DEVICE);
@@ -443,11 +454,13 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
443454
if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING ||
444455
auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) {
445456
priv_rx->rq_stats->tls_resync_req_skip++;
457+
tls_offload_rx_resync_async_request_cancel(async_resync);
446458
goto out;
447459
}
448460

449461
hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn);
450-
tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq));
462+
tls_offload_rx_resync_async_request_end(async_resync,
463+
cpu_to_be32(hw_seq));
451464
priv_rx->rq_stats->tls_resync_req_end++;
452465
out:
453466
mlx5e_ktls_priv_rx_put(priv_rx);
@@ -472,8 +485,10 @@ static bool resync_queue_get_psv(struct sock *sk)
472485

473486
resync = &priv_rx->resync;
474487
mlx5e_ktls_priv_rx_get(priv_rx);
475-
if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work)))
488+
if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) {
476489
mlx5e_ktls_priv_rx_put(priv_rx);
490+
return false;
491+
}
477492

478493
return true;
479494
}
@@ -482,6 +497,7 @@ static bool resync_queue_get_psv(struct sock *sk)
482497
static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
483498
{
484499
struct ethhdr *eth = (struct ethhdr *)(skb->data);
500+
struct tls_offload_resync_async *resync_async;
485501
struct net_device *netdev = rq->netdev;
486502
struct net *net = dev_net(netdev);
487503
struct sock *sk = NULL;
@@ -527,7 +543,8 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
527543

528544
seq = th->seq;
529545
datalen = skb->len - depth;
530-
tls_offload_rx_resync_async_request_start(sk, seq, datalen);
546+
resync_async = tls_offload_ctx_rx(tls_get_ctx(sk))->resync_async;
547+
tls_offload_rx_resync_async_request_start(resync_async, seq, datalen);
531548
rq->stats->tls_resync_req_start++;
532549

533550
unref:
@@ -556,6 +573,18 @@ void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk,
556573
resync_handle_seq_match(priv_rx, c);
557574
}
558575

576+
void
577+
mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi)
578+
{
579+
struct mlx5e_ktls_offload_context_rx *priv_rx;
580+
struct mlx5e_ktls_rx_resync_buf *buf;
581+
582+
buf = wi->tls_get_params.buf;
583+
priv_rx = buf->priv_rx;
584+
priv_rx->rq_stats->tls_resync_req_skip++;
585+
tls_offload_rx_resync_async_request_cancel(&priv_rx->resync.core);
586+
}
587+
559588
/* End of resync section */
560589

561590
void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
2929
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
3030
struct mlx5e_tx_wqe_info *wi,
3131
u32 *dma_fifo_cc);
32+
33+
void
34+
mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi);
35+
3236
static inline bool
3337
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
3438
struct mlx5e_tx_wqe_info *wi,

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
10361036
netdev_WARN_ONCE(cq->netdev,
10371037
"Bad OP in ICOSQ CQE: 0x%x\n",
10381038
get_cqe_opcode(cqe));
1039+
#ifdef CONFIG_MLX5_EN_TLS
1040+
if (wi->wqe_type == MLX5E_ICOSQ_WQE_GET_PSV_TLS)
1041+
mlx5e_ktls_rx_resync_async_request_cancel(wi);
1042+
#endif
10391043
mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
10401044
(struct mlx5_err_cqe *)cqe);
10411045
mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);

include/net/tls.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -451,25 +451,26 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
451451

452452
/* Log all TLS record header TCP sequences in [seq, seq+len] */
453453
static inline void
454-
tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len)
454+
tls_offload_rx_resync_async_request_start(struct tls_offload_resync_async *resync_async,
455+
__be32 seq, u16 len)
455456
{
456-
struct tls_context *tls_ctx = tls_get_ctx(sk);
457-
struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
458-
459-
atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) |
457+
atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) |
460458
((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC);
461-
rx_ctx->resync_async->loglen = 0;
462-
rx_ctx->resync_async->rcd_delta = 0;
459+
resync_async->loglen = 0;
460+
resync_async->rcd_delta = 0;
463461
}
464462

465463
static inline void
466-
tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq)
464+
tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_async,
465+
__be32 seq)
467466
{
468-
struct tls_context *tls_ctx = tls_get_ctx(sk);
469-
struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
467+
atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ);
468+
}
470469

471-
atomic64_set(&rx_ctx->resync_async->req,
472-
((u64)ntohl(seq) << 32) | RESYNC_REQ);
470+
static inline void
471+
tls_offload_rx_resync_async_request_cancel(struct tls_offload_resync_async *resync_async)
472+
{
473+
atomic64_set(&resync_async->req, 0);
473474
}
474475

475476
static inline void

net/tls/tls_device.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -723,8 +723,10 @@ tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async,
723723
/* shouldn't get to wraparound:
724724
* too long in async stage, something bad happened
725725
*/
726-
if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX))
726+
if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) {
727+
tls_offload_rx_resync_async_request_cancel(resync_async);
727728
return false;
729+
}
728730

729731
/* asynchronous stage: log all headers seq such that
730732
* req_seq <= seq <= end_seq, and wait for real resync request

0 commit comments

Comments
 (0)