Skip to content

Commit e39cc8c

Browse files
committed
Merge: CNB97: net: Add support for per-NAPI config via netlink
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6687 JIRA: https://issues.redhat.com/browse/RHEL-77816 Upstream Status: all mainline in net-next.git Tested: boot-tested only Conflicts: see individual patches Signed-off-by: Davide Caratti <dcaratti@redhat.com> Approved-by: Sabrina Dubroca <sdubroca@redhat.com> Approved-by: Ivan Vecera <ivecera@redhat.com> Approved-by: mheib <mheib@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Augusto Caringi <acaringi@redhat.com>
2 parents 39c1614 + 3e4d79c commit e39cc8c

File tree

24 files changed

+402
-86
lines changed

24 files changed

+402
-86
lines changed

Documentation/netlink/specs/netdev.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,21 @@ attribute-sets:
244244
threaded mode. If NAPI is not in threaded mode (i.e. uses normal
245245
softirq context), the attribute will be absent.
246246
type: u32
247+
-
248+
name: defer-hard-irqs
249+
doc: The number of consecutive empty polls before IRQ deferral ends
250+
and hardware IRQs are re-enabled.
251+
type: u32
252+
checks:
253+
max: s32-max
254+
-
255+
name: gro-flush-timeout
256+
doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
257+
timer which schedules NAPI processing. Additionally, a non-zero
258+
value will also prevent GRO from flushing recent super-frames at
259+
the end of a NAPI cycle. This may add receive latency in exchange
260+
for reducing the number of frames processed by the network stack.
261+
type: uint
247262
-
248263
name: queue
249264
attributes:
@@ -593,6 +608,8 @@ operations:
593608
- ifindex
594609
- irq
595610
- pid
611+
- defer-hard-irqs
612+
- gro-flush-timeout
596613
dump:
597614
request:
598615
attributes:
@@ -619,6 +636,17 @@ operations:
619636
- rx-bytes
620637
- tx-packets
621638
- tx-bytes
639+
-
640+
name: napi-set
641+
doc: Set configurable NAPI instance settings.
642+
attribute-set: napi
643+
flags: [ admin-perm ]
644+
do:
645+
request:
646+
attributes:
647+
- id
648+
- defer-hard-irqs
649+
- gro-flush-timeout
622650

623651
mcast-groups:
624652
list:

Documentation/networking/net_cachelines/net_device.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,6 @@ netdevice_tracker dev_registered_tracker
181181
struct_rtnl_hw_stats64* offload_xstats_l3
182182
struct_devlink_port* devlink_port
183183
struct_dpll_pin* dpll_pin
184+
struct_napi_config* napi_config
185+
unsigned_long gro_flush_timeout
186+
u32 napi_defer_hard_irqs

drivers/net/ethernet/broadcom/bnxt/bnxt.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11021,7 +11021,8 @@ static void bnxt_init_napi(struct bnxt *bp)
1102111021
cp_nr_rings--;
1102211022
for (i = 0; i < cp_nr_rings; i++) {
1102311023
bnapi = bp->bnapi[i];
11024-
netif_napi_add(bp->dev, &bnapi->napi, poll_fn);
11024+
netif_napi_add_config(bp->dev, &bnapi->napi, poll_fn,
11025+
bnapi->index);
1102511026
}
1102611027
if (BNXT_CHIP_TYPE_NITRO_A0(bp)) {
1102711028
bnapi = bp->bnapi[cp_nr_rings];

drivers/net/ethernet/mellanox/mlx4/en_cq.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
156156
break;
157157
case RX:
158158
cq->mcq.comp = mlx4_en_rx_irq;
159-
netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq);
159+
netif_napi_add_config(cq->dev, &cq->napi, mlx4_en_poll_rx_cq,
160+
cq_idx);
160161
netif_napi_set_irq(&cq->napi, irq);
161162
napi_enable(&cq->napi);
162163
netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_RX, &cq->napi);

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2704,7 +2704,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
27042704
c->aff_mask = irq_get_effective_affinity_mask(irq);
27052705
c->lag_port = mlx5e_enumerate_lag_port(mdev, ix);
27062706

2707-
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll);
2707+
netif_napi_add_config(netdev, &c->napi, mlx5e_napi_poll, ix);
27082708
netif_napi_set_irq(&c->napi, irq);
27092709

27102710
err = mlx5e_open_queues(c, params, cparam);

include/linux/netdevice.h

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,15 @@ struct gro_list {
333333
*/
334334
#define GRO_HASH_BUCKETS 8
335335

336+
/*
337+
* Structure for per-NAPI config
338+
*/
339+
struct napi_config {
340+
u64 gro_flush_timeout;
341+
u32 defer_hard_irqs;
342+
unsigned int napi_id;
343+
};
344+
336345
/*
337346
* Structure for NAPI scheduling similar to tasklet but with weighting
338347
*/
@@ -364,10 +373,14 @@ struct napi_struct {
364373
unsigned int napi_id;
365374
struct hrtimer timer;
366375
struct task_struct *thread;
376+
unsigned long gro_flush_timeout;
377+
u32 defer_hard_irqs;
367378
/* control-path-only fields follow */
368379
struct list_head dev_list;
369380
struct hlist_node napi_hash_node;
370381
int irq;
382+
int index;
383+
struct napi_config *config;
371384

372385
RH_KABI_RESERVE(1)
373386
RH_KABI_RESERVE(2)
@@ -1928,9 +1941,6 @@ enum netdev_reg_state {
19281941
* allocated at register_netdev() time
19291942
* @real_num_rx_queues: Number of RX queues currently active in device
19301943
* @xdp_prog: XDP sockets filter program pointer
1931-
* @gro_flush_timeout: timeout for GRO layer in NAPI
1932-
* @napi_defer_hard_irqs: If not zero, provides a counter that would
1933-
* allow to avoid NIC hard IRQ, on busy queues.
19341944
*
19351945
* @rx_handler: handler for received packets
19361946
* @rx_handler_data: XXX: need comments on this one
@@ -2078,6 +2088,12 @@ enum netdev_reg_state {
20782088
* @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem,
20792089
* where the clock is recovered.
20802090
*
2091+
* @napi_config: An array of napi_config structures containing per-NAPI
2092+
* settings.
2093+
* @gro_flush_timeout: timeout for GRO layer in NAPI
2094+
* @napi_defer_hard_irqs: If not zero, provides a counter that would
2095+
* allow to avoid NIC hard IRQ, on busy queues.
2096+
*
20812097
* FIXME: cleanup struct net_device such that network protocol info
20822098
* moves out.
20832099
*/
@@ -2143,8 +2159,6 @@ struct net_device {
21432159
int ifindex;
21442160
unsigned int real_num_rx_queues;
21452161
struct netdev_rx_queue *_rx;
2146-
unsigned long gro_flush_timeout;
2147-
u32 napi_defer_hard_irqs;
21482162
unsigned int gro_max_size;
21492163
unsigned int gro_ipv4_max_size;
21502164
rx_handler_func_t __rcu *rx_handler;
@@ -2473,6 +2487,9 @@ struct net_device {
24732487

24742488
/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
24752489
struct dim_irq_moder *irq_moder;
2490+
struct napi_config *napi_config;
2491+
unsigned long gro_flush_timeout;
2492+
u32 napi_defer_hard_irqs;
24762493

24772494
RH_KABI_RESERVE(1)
24782495
RH_KABI_RESERVE(2)
@@ -2758,6 +2775,22 @@ netif_napi_add_tx_weight(struct net_device *dev,
27582775
netif_napi_add_weight(dev, napi, poll, weight);
27592776
}
27602777

2778+
/**
2779+
* netif_napi_add_config - initialize a NAPI context with persistent config
2780+
* @dev: network device
2781+
* @napi: NAPI context
2782+
* @poll: polling function
2783+
* @index: the NAPI index
2784+
*/
2785+
static inline void
2786+
netif_napi_add_config(struct net_device *dev, struct napi_struct *napi,
2787+
int (*poll)(struct napi_struct *, int), int index)
2788+
{
2789+
napi->index = index;
2790+
napi->config = &dev->napi_config[index];
2791+
netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
2792+
}
2793+
27612794
/**
27622795
* netif_napi_add_tx() - initialize a NAPI context to be used for Tx only
27632796
* @dev: network device
@@ -3251,7 +3284,6 @@ struct net_device *netdev_get_by_index(struct net *net, int ifindex,
32513284
struct net_device *netdev_get_by_name(struct net *net, const char *name,
32523285
netdevice_tracker *tracker, gfp_t gfp);
32533286
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
3254-
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
32553287
void netdev_copy_name(struct net_device *dev, char *name);
32563288

32573289
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,

include/net/busy_poll.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,4 @@ static inline void sk_mark_napi_id_once(struct sock *sk,
156156
#endif
157157
}
158158

159-
static inline void sk_mark_napi_id_once_xdp(struct sock *sk,
160-
const struct xdp_buff *xdp)
161-
{
162-
#ifdef CONFIG_NET_RX_BUSY_POLL
163-
__sk_mark_napi_id_once(sk, xdp->rxq->napi_id);
164-
#endif
165-
}
166-
167159
#endif /* _LINUX_NET_BUSY_POLL_H */

include/net/page_pool/types.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,6 @@ struct page_pool {
232232
struct {
233233
struct hlist_node list;
234234
u64 detach_time;
235-
u32 napi_id;
236235
u32 id;
237236
} user;
238237
};

include/net/xdp.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ struct xdp_rxq_info {
6363
u32 queue_index;
6464
u32 reg_state;
6565
struct xdp_mem_info mem;
66-
unsigned int napi_id;
6766
u32 frag_size;
6867
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
6968

include/net/xdp_sock_drv.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,6 @@ static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
5959
xp_fill_cb(pool, desc);
6060
}
6161

62-
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
63-
{
64-
#ifdef CONFIG_NET_RX_BUSY_POLL
65-
return pool->heads[0].xdp.rxq->napi_id;
66-
#else
67-
return 0;
68-
#endif
69-
}
70-
7162
static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
7263
unsigned long attrs)
7364
{
@@ -306,11 +297,6 @@ static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
306297
{
307298
}
308299

309-
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
310-
{
311-
return 0;
312-
}
313-
314300
static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool,
315301
unsigned long attrs)
316302
{

0 commit comments

Comments
 (0)