Skip to content

Commit c70e368

Browse files
author
Benjamin Poirier
committed
net/mlx5e: TX, Utilize WQ fragments edge for multi-packet WQEs
JIRA: https://issues.redhat.com/browse/RHEL-72227 JIRA: https://issues.redhat.com/browse/RHEL-73520 Upstream-status: v6.15-rc1 commit 9da10c2 Author: Tariq Toukan <tariqt@nvidia.com> Date: Wed Mar 19 15:42:26 2025 +0200 net/mlx5e: TX, Utilize WQ fragments edge for multi-packet WQEs For simplicity reasons, the driver avoids crossing work queue fragment boundaries within the same TX WQE (Work-Queue Element). Until today, as the number of packets in a TX MPWQE (Multi-Packet WQE) descriptor is not known in advance, the driver pre-prepared contiguous memory for the largest possible WQE. For this, when getting too close to the fragment edge, having no room for the largest WQE possible, the driver was filling the fragment remainder with NOP descriptors, aligning the next descriptor to the beginning of the next fragment. Generating and handling these NOPs wastes resources, like: CPU cycles, work-queue entries fetched to the device, and PCI bandwidth. In this patch, we replace this NOPs filling mechanism in the TX MPWQE flow. Instead, we utilize the remaining entries of the fragment with a TX MPWQE. If this room turns out to be too small, we simply open an additional descriptor starting at the beginning of the next fragment. Performance benchmark: uperf test, single server against 3 clients. TCP multi-stream, bidir, traffic profile "2x350B read, 1400B write". Bottleneck is in inbound PCI bandwidth (device POV). +---------------+------------+------------+--------+ | | Before | After | | +---------------+------------+------------+--------+ | BW | 117.4 Gbps | 121.1 Gbps | +3.1% | +---------------+------------+------------+--------+ | tx_packets | 15 M/sec | 15.5 M/sec | +3.3% | +---------------+------------+------------+--------+ | tx_nops | 3 M/sec | 0 | -100% | +---------------+------------+------------+--------+ Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/1742391746-118647-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
1 parent bd02287 commit c70e368

File tree

5 files changed

+25
-9
lines changed

5 files changed

+25
-9
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ struct mlx5e_tx_mpwqe {
398398
struct mlx5e_tx_wqe *wqe;
399399
u32 bytes_count;
400400
u8 ds_count;
401+
u8 ds_count_max;
401402
u8 pkt_count;
402403
u8 inline_on;
403404
};

drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,19 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
214214
return pi;
215215
}
216216

217+
static inline u16 mlx5e_txqsq_get_next_pi_anysize(struct mlx5e_txqsq *sq,
218+
u16 *size)
219+
{
220+
struct mlx5_wq_cyc *wq = &sq->wq;
221+
u16 pi, contig_wqebbs;
222+
223+
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
224+
contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
225+
*size = min_t(u16, contig_wqebbs, sq->max_sq_mpw_wqebbs);
226+
227+
return pi;
228+
}
229+
217230
void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
218231

219232
static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
@@ -358,9 +371,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
358371

359372
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
360373

361-
static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
374+
static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
362375
{
363-
return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
376+
return session->ds_count == session->ds_count_max;
364377
}
365378

366379
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)

drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
390390
.wqe = wqe,
391391
.bytes_count = 0,
392392
.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
393+
.ds_count_max = sq->max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS,
393394
.pkt_count = 0,
394395
.inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
395396
};
@@ -501,7 +502,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
501502

502503
mlx5e_xdp_mpwqe_add_dseg(sq, p, stats);
503504

504-
if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
505+
if (unlikely(mlx5e_xdp_mpwqe_is_full(session)))
505506
mlx5e_xdp_mpwqe_complete(sq);
506507

507508
stats->xmit++;

drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,13 +182,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
182182
return cur;
183183
}
184184

185-
static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
185+
static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
186186
{
187187
if (session->inline_on)
188188
return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
189-
max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
189+
session->ds_count_max;
190190

191-
return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
191+
return mlx5e_tx_mpwqe_is_full(session);
192192
}
193193

194194
struct mlx5e_xdp_wqe_info {

drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,16 +525,17 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
525525
{
526526
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
527527
struct mlx5e_tx_wqe *wqe;
528-
u16 pi;
528+
u16 pi, num_wqebbs;
529529

530-
pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
530+
pi = mlx5e_txqsq_get_next_pi_anysize(sq, &num_wqebbs);
531531
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
532532
net_prefetchw(wqe->data);
533533

534534
*session = (struct mlx5e_tx_mpwqe) {
535535
.wqe = wqe,
536536
.bytes_count = 0,
537537
.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
538+
.ds_count_max = num_wqebbs * MLX5_SEND_WQEBB_NUM_DS,
538539
.pkt_count = 0,
539540
.inline_on = 0,
540541
};
@@ -626,7 +627,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
626627
mlx5e_tx_mpwqe_add_dseg(sq, &txd);
627628
mlx5e_tx_skb_update_hwts_flags(skb);
628629

629-
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
630+
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
630631
/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
631632
cseg = mlx5e_tx_mpwqe_session_complete(sq);
632633

0 commit comments

Comments
 (0)