Skip to content

Commit fbc5505

Browse files
author
CKI KWF Bot
committed
Merge: net: introduce per netns packet chains
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/823 JIRA: https://issues.redhat.com/browse/RHEL-83393 Tested: vs issue reproducer per netns packet chains prevent unrelated workload from interfering in different netns, causing incremental slow-down on each new tap. Signed-off-by: Paolo Abeni <pabeni@redhat.com> Approved-by: Sabrina Dubroca <sdubroca@redhat.com> Approved-by: Antoine Tenart <atenart@redhat.com> Approved-by: Xin Long <lxin@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 6d0f55d + 7c7bec9 commit fbc5505

File tree

7 files changed

+79
-22
lines changed

7 files changed

+79
-22
lines changed

include/linux/netdevice.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4076,7 +4076,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
40764076
return 0;
40774077
}
40784078

4079-
bool dev_nit_active(struct net_device *dev);
4079+
bool dev_nit_active_rcu(const struct net_device *dev);
4080+
static inline bool dev_nit_active(const struct net_device *dev)
4081+
{
4082+
bool ret;
4083+
4084+
rcu_read_lock();
4085+
ret = dev_nit_active_rcu(dev);
4086+
rcu_read_unlock();
4087+
return ret;
4088+
}
4089+
40804090
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
40814091

40824092
static inline void __dev_put(struct net_device *dev)

include/net/hotdata.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ struct net_hotdata {
2323
struct net_offload udpv6_offload;
2424
#endif
2525
struct list_head offload_base;
26-
struct list_head ptype_all;
2726
struct kmem_cache *skbuff_cache;
2827
struct kmem_cache *skbuff_fclone_cache;
2928
struct kmem_cache *skb_small_head_cache;

include/net/net_namespace.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ struct net {
8383
struct llist_node defer_free_list;
8484
struct llist_node cleanup_list; /* namespaces on death row */
8585

86+
struct list_head ptype_all;
87+
struct list_head ptype_specific;
88+
8689
#ifdef CONFIG_KEYS
8790
struct key_tag *key_domain; /* Key domain of operation tag */
8891
#endif

net/core/dev.c

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -570,10 +570,18 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
570570

571571
static inline struct list_head *ptype_head(const struct packet_type *pt)
572572
{
573-
if (pt->type == htons(ETH_P_ALL))
574-
return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
575-
else
576-
return pt->dev ? &pt->dev->ptype_specific :
573+
if (pt->type == htons(ETH_P_ALL)) {
574+
if (!pt->af_packet_net && !pt->dev)
575+
return NULL;
576+
577+
return pt->dev ? &pt->dev->ptype_all :
578+
&pt->af_packet_net->ptype_all;
579+
}
580+
581+
if (pt->dev)
582+
return &pt->dev->ptype_specific;
583+
584+
return pt->af_packet_net ? &pt->af_packet_net->ptype_specific :
577585
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
578586
}
579587

@@ -594,6 +602,9 @@ void dev_add_pack(struct packet_type *pt)
594602
{
595603
struct list_head *head = ptype_head(pt);
596604

605+
if (WARN_ON_ONCE(!head))
606+
return;
607+
597608
spin_lock(&ptype_lock);
598609
list_add_rcu(&pt->list, head);
599610
spin_unlock(&ptype_lock);
@@ -618,6 +629,9 @@ void __dev_remove_pack(struct packet_type *pt)
618629
struct list_head *head = ptype_head(pt);
619630
struct packet_type *pt1;
620631

632+
if (!head)
633+
return;
634+
621635
spin_lock(&ptype_lock);
622636

623637
list_for_each_entry(pt1, head, list) {
@@ -2332,16 +2346,21 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
23322346
}
23332347

23342348
/**
2335-
* dev_nit_active - return true if any network interface taps are in use
2349+
* dev_nit_active_rcu - return true if any network interface taps are in use
2350+
*
2351+
* The caller must hold the RCU lock
23362352
*
23372353
* @dev: network device to check for the presence of taps
23382354
*/
2339-
bool dev_nit_active(struct net_device *dev)
2355+
bool dev_nit_active_rcu(const struct net_device *dev)
23402356
{
2341-
return !list_empty(&net_hotdata.ptype_all) ||
2357+
/* Callers may hold either RCU or RCU BH lock */
2358+
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
2359+
2360+
return !list_empty(&dev_net(dev)->ptype_all) ||
23422361
!list_empty(&dev->ptype_all);
23432362
}
2344-
EXPORT_SYMBOL_GPL(dev_nit_active);
2363+
EXPORT_SYMBOL_GPL(dev_nit_active_rcu);
23452364

23462365
/*
23472366
* Support routine. Sends outgoing frames to any network
@@ -2350,11 +2369,12 @@ EXPORT_SYMBOL_GPL(dev_nit_active);
23502369

23512370
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
23522371
{
2353-
struct list_head *ptype_list = &net_hotdata.ptype_all;
23542372
struct packet_type *ptype, *pt_prev = NULL;
2373+
struct list_head *ptype_list;
23552374
struct sk_buff *skb2 = NULL;
23562375

23572376
rcu_read_lock();
2377+
ptype_list = &dev_net_rcu(dev)->ptype_all;
23582378
again:
23592379
list_for_each_entry_rcu(ptype, ptype_list, list) {
23602380
if (READ_ONCE(ptype->ignore_outgoing))
@@ -2398,7 +2418,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
23982418
pt_prev = ptype;
23992419
}
24002420

2401-
if (ptype_list == &net_hotdata.ptype_all) {
2421+
if (ptype_list != &dev->ptype_all) {
24022422
ptype_list = &dev->ptype_all;
24032423
goto again;
24042424
}
@@ -3641,7 +3661,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
36413661
unsigned int len;
36423662
int rc;
36433663

3644-
if (dev_nit_active(dev))
3664+
if (dev_nit_active_rcu(dev))
36453665
dev_queue_xmit_nit(skb, dev);
36463666

36473667
len = skb->len;
@@ -5578,7 +5598,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
55785598
if (pfmemalloc)
55795599
goto skip_taps;
55805600

5581-
list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
5601+
list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all,
5602+
list) {
55825603
if (pt_prev)
55835604
ret = deliver_skb(skb, pt_prev, orig_dev);
55845605
pt_prev = ptype;
@@ -5690,6 +5711,14 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
56905711
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
56915712
&ptype_base[ntohs(type) &
56925713
PTYPE_HASH_MASK]);
5714+
5715+
/* orig_dev and skb->dev could belong to different netns;
5716+
* Even in such case we need to traverse only the list
5717+
* coming from skb->dev, as the ptype owner (packet socket)
5718+
* will use dev_net(skb->dev) to do namespace filtering.
5719+
*/
5720+
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5721+
&dev_net_rcu(skb->dev)->ptype_specific);
56935722
}
56945723

56955724
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,

net/core/hotdata.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
struct net_hotdata net_hotdata __cacheline_aligned = {
99
.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
10-
.ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all),
1110
.gro_normal_batch = 8,
1211

1312
.netdev_budget = 300,

net/core/net-procfs.c

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,13 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
185185
}
186186
}
187187

188-
list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) {
188+
list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) {
189+
if (i == pos)
190+
return pt;
191+
++i;
192+
}
193+
194+
list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_specific, list) {
189195
if (i == pos)
190196
return pt;
191197
++i;
@@ -210,6 +216,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
210216

211217
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
212218
{
219+
struct net *net = seq_file_net(seq);
213220
struct net_device *dev;
214221
struct packet_type *pt;
215222
struct list_head *nxt;
@@ -232,15 +239,22 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
232239
goto found;
233240
}
234241
}
235-
236-
nxt = net_hotdata.ptype_all.next;
237-
goto ptype_all;
242+
nxt = net->ptype_all.next;
243+
goto net_ptype_all;
238244
}
239245

240-
if (pt->type == htons(ETH_P_ALL)) {
241-
ptype_all:
242-
if (nxt != &net_hotdata.ptype_all)
246+
if (pt->af_packet_net) {
247+
net_ptype_all:
248+
if (nxt != &net->ptype_all && nxt != &net->ptype_specific)
243249
goto found;
250+
251+
if (nxt == &net->ptype_all) {
252+
/* continue with ->ptype_specific if it's not empty */
253+
nxt = net->ptype_specific.next;
254+
if (nxt != &net->ptype_specific)
255+
goto found;
256+
}
257+
244258
hash = 0;
245259
nxt = ptype_base[0].next;
246260
} else

net/core/net_namespace.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,9 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
334334
idr_init(&net->netns_ids);
335335
spin_lock_init(&net->nsid_lock);
336336
mutex_init(&net->ipv4.ra_mutex);
337+
338+
INIT_LIST_HEAD(&net->ptype_all);
339+
INIT_LIST_HEAD(&net->ptype_specific);
337340
preinit_net_sysctl(net);
338341
}
339342

0 commit comments

Comments
 (0)