Skip to content

Commit ae71c16

Browse files
cjubrangregkh
authored andcommitted
net/mlx5e: Reuse per-RQ XDP buffer to avoid stack zeroing overhead
[ Upstream commit b66b76a ] CONFIG_INIT_STACK_ALL_ZERO introduces a performance cost by zero-initializing all stack variables on function entry. The mlx5 XDP RX path previously allocated a struct mlx5e_xdp_buff on the stack per received CQE, resulting in measurable performance degradation under this config. This patch reuses a mlx5e_xdp_buff stored in the mlx5e_rq struct, avoiding per-CQE stack allocations and repeated zeroing. With this change, XDP_DROP and XDP_TX performance matches that of kernels built without CONFIG_INIT_STACK_ALL_ZERO. Performance was measured on a ConnectX-6Dx using a single RX channel (1 CPU at 100% usage) at ~50 Mpps. The baseline results were taken from net-next-6.15. Stack zeroing disabled: - XDP_DROP: * baseline: 31.47 Mpps * baseline + per-RQ allocation: 32.31 Mpps (+2.68%) - XDP_TX: * baseline: 12.41 Mpps * baseline + per-RQ allocation: 12.95 Mpps (+4.30%) Stack zeroing enabled: - XDP_DROP: * baseline: 24.32 Mpps * baseline + per-RQ allocation: 32.27 Mpps (+32.7%) - XDP_TX: * baseline: 11.80 Mpps * baseline + per-RQ allocation: 12.24 Mpps (+3.72%) Reported-by: Sebastiano Miano <mianosebastiano@gmail.com> Reported-by: Samuel Dobron <sdobron@redhat.com> Link: https://lore.kernel.org/all/CAMENy5pb8ea+piKLg5q5yRTMZacQqYWAoVLE1FE9WhQPq92E0g@mail.gmail.com/ Signed-off-by: Carolina Jubran <cjubran@nvidia.com> Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Acked-by: Jesper Dangaard Brouer <hawk@kernel.org> Link: https://patch.msgid.link/1747253032-663457-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Stable-dep-of: afd5ba5 ("net/mlx5e: RX, Fix generating skb from non-linear xdp_buff for legacy RQ") Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 4688adc commit ae71c16

File tree

3 files changed

+51
-43
lines changed

3 files changed

+51
-43
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,12 @@ struct mlx5e_xdpsq {
512512
struct mlx5e_channel *channel;
513513
} ____cacheline_aligned_in_smp;
514514

515+
struct mlx5e_xdp_buff {
516+
struct xdp_buff xdp;
517+
struct mlx5_cqe64 *cqe;
518+
struct mlx5e_rq *rq;
519+
};
520+
515521
struct mlx5e_ktls_resync_resp;
516522

517523
struct mlx5e_icosq {
@@ -710,6 +716,7 @@ struct mlx5e_rq {
710716
struct mlx5e_xdpsq *xdpsq;
711717
DECLARE_BITMAP(flags, 8);
712718
struct page_pool *page_pool;
719+
struct mlx5e_xdp_buff mxbuf;
713720

714721
/* AF_XDP zero-copy */
715722
struct xsk_buff_pool *xsk_pool;

drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,6 @@
4545
(MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
4646
sizeof(struct mlx5_wqe_inline_seg))
4747

48-
struct mlx5e_xdp_buff {
49-
struct xdp_buff xdp;
50-
struct mlx5_cqe64 *cqe;
51-
struct mlx5e_rq *rq;
52-
};
53-
5448
/* XDP packets can be transmitted in different ways. On completion, we need to
5549
* distinguish between them to clean up things in a proper way.
5650
*/

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,17 +1691,17 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
16911691

16921692
prog = rcu_dereference(rq->xdp_prog);
16931693
if (prog) {
1694-
struct mlx5e_xdp_buff mxbuf;
1694+
struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
16951695

16961696
net_prefetchw(va); /* xdp_frame data area */
16971697
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
1698-
cqe_bcnt, &mxbuf);
1699-
if (mlx5e_xdp_handle(rq, prog, &mxbuf))
1698+
cqe_bcnt, mxbuf);
1699+
if (mlx5e_xdp_handle(rq, prog, mxbuf))
17001700
return NULL; /* page/packet was consumed by XDP */
17011701

1702-
rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start;
1703-
metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta;
1704-
cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data;
1702+
rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start;
1703+
metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta;
1704+
cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data;
17051705
}
17061706
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
17071707
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
@@ -1720,11 +1720,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
17201720
struct mlx5_cqe64 *cqe, u32 cqe_bcnt)
17211721
{
17221722
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
1723+
struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
17231724
struct mlx5e_wqe_frag_info *head_wi = wi;
17241725
u16 rx_headroom = rq->buff.headroom;
17251726
struct mlx5e_frag_page *frag_page;
17261727
struct skb_shared_info *sinfo;
1727-
struct mlx5e_xdp_buff mxbuf;
17281728
u32 frag_consumed_bytes;
17291729
struct bpf_prog *prog;
17301730
struct sk_buff *skb;
@@ -1744,8 +1744,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
17441744
net_prefetch(va + rx_headroom);
17451745

17461746
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
1747-
frag_consumed_bytes, &mxbuf);
1748-
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
1747+
frag_consumed_bytes, mxbuf);
1748+
sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp);
17491749
truesize = 0;
17501750

17511751
cqe_bcnt -= frag_consumed_bytes;
@@ -1757,8 +1757,9 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
17571757

17581758
frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
17591759

1760-
mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page,
1761-
wi->offset, frag_consumed_bytes);
1760+
mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp,
1761+
frag_page, wi->offset,
1762+
frag_consumed_bytes);
17621763
truesize += frag_info->frag_stride;
17631764

17641765
cqe_bcnt -= frag_consumed_bytes;
@@ -1767,7 +1768,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
17671768
}
17681769

17691770
prog = rcu_dereference(rq->xdp_prog);
1770-
if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) {
1771+
if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) {
17711772
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
17721773
struct mlx5e_wqe_frag_info *pwi;
17731774

@@ -1777,21 +1778,23 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
17771778
return NULL; /* page/packet was consumed by XDP */
17781779
}
17791780

1780-
skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, rq->buff.frame0_sz,
1781-
mxbuf.xdp.data - mxbuf.xdp.data_hard_start,
1782-
mxbuf.xdp.data_end - mxbuf.xdp.data,
1783-
mxbuf.xdp.data - mxbuf.xdp.data_meta);
1781+
skb = mlx5e_build_linear_skb(
1782+
rq, mxbuf->xdp.data_hard_start, rq->buff.frame0_sz,
1783+
mxbuf->xdp.data - mxbuf->xdp.data_hard_start,
1784+
mxbuf->xdp.data_end - mxbuf->xdp.data,
1785+
mxbuf->xdp.data - mxbuf->xdp.data_meta);
17841786
if (unlikely(!skb))
17851787
return NULL;
17861788

17871789
skb_mark_for_recycle(skb);
17881790
head_wi->frag_page->frags++;
17891791

1790-
if (xdp_buff_has_frags(&mxbuf.xdp)) {
1792+
if (xdp_buff_has_frags(&mxbuf->xdp)) {
17911793
/* sinfo->nr_frags is reset by build_skb, calculate again. */
17921794
xdp_update_skb_shared_info(skb, wi - head_wi - 1,
17931795
sinfo->xdp_frags_size, truesize,
1794-
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
1796+
xdp_buff_is_frag_pfmemalloc(
1797+
&mxbuf->xdp));
17951798

17961799
for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++)
17971800
pwi->frag_page->frags++;
@@ -1991,10 +1994,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
19911994
struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx];
19921995
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
19931996
struct mlx5e_frag_page *head_page = frag_page;
1997+
struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
19941998
u32 frag_offset = head_offset;
19951999
u32 byte_cnt = cqe_bcnt;
19962000
struct skb_shared_info *sinfo;
1997-
struct mlx5e_xdp_buff mxbuf;
19982001
unsigned int truesize = 0;
19992002
struct bpf_prog *prog;
20002003
struct sk_buff *skb;
@@ -2040,9 +2043,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
20402043
}
20412044
}
20422045

2043-
mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf);
2046+
mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz,
2047+
linear_data_len, mxbuf);
20442048

2045-
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
2049+
sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp);
20462050

20472051
while (byte_cnt) {
20482052
/* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */
@@ -2053,15 +2057,16 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
20532057
else
20542058
truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
20552059

2056-
mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset,
2060+
mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp,
2061+
frag_page, frag_offset,
20572062
pg_consumed_bytes);
20582063
byte_cnt -= pg_consumed_bytes;
20592064
frag_offset = 0;
20602065
frag_page++;
20612066
}
20622067

20632068
if (prog) {
2064-
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
2069+
if (mlx5e_xdp_handle(rq, prog, mxbuf)) {
20652070
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
20662071
struct mlx5e_frag_page *pfp;
20672072

@@ -2074,10 +2079,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
20742079
return NULL; /* page/packet was consumed by XDP */
20752080
}
20762081

2077-
skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start,
2078-
linear_frame_sz,
2079-
mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0,
2080-
mxbuf.xdp.data - mxbuf.xdp.data_meta);
2082+
skb = mlx5e_build_linear_skb(
2083+
rq, mxbuf->xdp.data_hard_start, linear_frame_sz,
2084+
mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0,
2085+
mxbuf->xdp.data - mxbuf->xdp.data_meta);
20812086
if (unlikely(!skb)) {
20822087
mlx5e_page_release_fragmented(rq, &wi->linear_page);
20832088
return NULL;
@@ -2087,13 +2092,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
20872092
wi->linear_page.frags++;
20882093
mlx5e_page_release_fragmented(rq, &wi->linear_page);
20892094

2090-
if (xdp_buff_has_frags(&mxbuf.xdp)) {
2095+
if (xdp_buff_has_frags(&mxbuf->xdp)) {
20912096
struct mlx5e_frag_page *pagep;
20922097

20932098
/* sinfo->nr_frags is reset by build_skb, calculate again. */
20942099
xdp_update_skb_shared_info(skb, frag_page - head_page,
20952100
sinfo->xdp_frags_size, truesize,
2096-
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
2101+
xdp_buff_is_frag_pfmemalloc(
2102+
&mxbuf->xdp));
20972103

20982104
pagep = head_page;
20992105
do
@@ -2104,12 +2110,13 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
21042110
} else {
21052111
dma_addr_t addr;
21062112

2107-
if (xdp_buff_has_frags(&mxbuf.xdp)) {
2113+
if (xdp_buff_has_frags(&mxbuf->xdp)) {
21082114
struct mlx5e_frag_page *pagep;
21092115

21102116
xdp_update_skb_shared_info(skb, sinfo->nr_frags,
21112117
sinfo->xdp_frags_size, truesize,
2112-
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
2118+
xdp_buff_is_frag_pfmemalloc(
2119+
&mxbuf->xdp));
21132120

21142121
pagep = frag_page - sinfo->nr_frags;
21152122
do
@@ -2159,20 +2166,20 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
21592166

21602167
prog = rcu_dereference(rq->xdp_prog);
21612168
if (prog) {
2162-
struct mlx5e_xdp_buff mxbuf;
2169+
struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf;
21632170

21642171
net_prefetchw(va); /* xdp_frame data area */
21652172
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz,
2166-
cqe_bcnt, &mxbuf);
2167-
if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
2173+
cqe_bcnt, mxbuf);
2174+
if (mlx5e_xdp_handle(rq, prog, mxbuf)) {
21682175
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
21692176
frag_page->frags++;
21702177
return NULL; /* page/packet was consumed by XDP */
21712178
}
21722179

2173-
rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start;
2174-
metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta;
2175-
cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data;
2180+
rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start;
2181+
metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta;
2182+
cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data;
21762183
}
21772184
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
21782185
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);

0 commit comments

Comments
 (0)