Skip to content

Commit 1955673

Browse files
author
Xin Long
committed
net: sched: refine software bypass handling in tc_run
JIRA: https://issues.redhat.com/browse/RHEL-60271 JIRA: https://issues.redhat.com/browse/RHEL-61181 Upstream Status: net-next.git Tested: compile only commit a12c76a Author: Xin Long <lucien.xin@gmail.com> Date: Wed Jan 15 09:27:54 2025 -0500 net: sched: refine software bypass handling in tc_run This patch addresses issues with filter counting in block (tcf_block), particularly for software bypass scenarios, by introducing a more accurate mechanism using useswcnt. Previously, filtercnt and skipswcnt were introduced by: Commit 2081fd3 ("net: sched: cls_api: add filter counter") and Commit f631ef3 ("net: sched: cls_api: add skip_sw counter") filtercnt tracked all tp (tcf_proto) objects added to a block, and skipswcnt counted tp objects with the skipsw attribute set. The problem is: a single tp can contain multiple filters, some with skipsw and others without. The current implementation fails in the case: When the first filter in a tp has skipsw, both skipswcnt and filtercnt are incremented, then adding a second filter without skipsw to the same tp does not modify these counters because tp->counted is already set. This results in bypass software behavior based solely on skipswcnt equaling filtercnt, even when the block includes filters without skipsw. Consequently, filters without skipsw are inadvertently bypassed. To address this, the patch introduces useswcnt in block to explicitly count tp objects containing at least one filter without skipsw. Key changes include: Whenever a filter without skipsw is added, its tp is marked with usesw and counted in useswcnt. tc_run() now uses useswcnt to determine software bypass, eliminating reliance on filtercnt and skipswcnt. This refined approach prevents software bypass for blocks containing mixed filters, ensuring correct behavior in tc_run(). Additionally, as atomic operations on useswcnt ensure thread safety and tp->lock guards access to tp->usesw and tp->counted, the broader lock down_write(&block->cb_lock) is no longer required in tc_new_tfilter(), and this resolves a performance regression caused by the filter counting mechanism during parallel filter insertions. The improvement can be demonstrated using the following script: # cat insert_tc_rules.sh tc qdisc add dev ens1f0np0 ingress for i in $(seq 16); do taskset -c $i tc -b rules_$i.txt & done wait Each of rules_$i.txt files above includes 100000 tc filter rules to a mlx5 driver NIC ens1f0np0. Without this patch: # time sh insert_tc_rules.sh real 0m50.780s user 0m23.556s sys 4m13.032s With this patch: # time sh insert_tc_rules.sh real 0m17.718s user 0m7.807s sys 3m45.050s Fixes: 047f340 ("net: sched: make skip_sw actually skip software") Reported-by: Shuang Li <shuali@redhat.com> Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Reviewed-by: Asbjørn Sloth Tønnesen <ast@fiberby.net> Tested-by: Asbjørn Sloth Tønnesen <ast@fiberby.net> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Xin Long <lxin@redhat.com>
1 parent 446b16a commit 1955673

File tree

8 files changed

+55
-45
lines changed

8 files changed

+55
-45
lines changed

include/net/pkt_cls.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block)
7575
}
7676

7777
#ifdef CONFIG_NET_CLS_ACT
78-
DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
78+
DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key);
7979

8080
static inline bool tcf_block_bypass_sw(struct tcf_block *block)
8181
{
82-
return block && block->bypass_wanted;
82+
return block && !atomic_read(&block->useswcnt);
8383
}
8484
#endif
8585

@@ -759,6 +759,15 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
759759
cls_common->extack = extack;
760760
}
761761

762+
static inline void tcf_proto_update_usesw(struct tcf_proto *tp, u32 flags)
763+
{
764+
if (tp->usesw)
765+
return;
766+
if (tc_skip_sw(flags) && tc_in_hw(flags))
767+
return;
768+
tp->usesw = true;
769+
}
770+
762771
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
763772
static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
764773
{

include/net/sch_generic.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ struct tcf_proto {
425425
spinlock_t lock;
426426
bool deleting;
427427
bool counted;
428+
bool usesw;
428429
refcount_t refcnt;
429430
struct rcu_head rcu;
430431
struct hlist_node destroy_ht_node;
@@ -474,9 +475,7 @@ struct tcf_block {
474475
struct flow_block flow_block;
475476
struct list_head owner_list;
476477
bool keep_dst;
477-
bool bypass_wanted;
478-
atomic_t filtercnt; /* Number of filters */
479-
atomic_t skipswcnt; /* Number of skip_sw filters */
478+
atomic_t useswcnt;
480479
atomic_t offloadcnt; /* Number of oddloaded filters */
481480
unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
482481
unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */

net/core/dev.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2126,8 +2126,8 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
21262126
#endif
21272127

21282128
#ifdef CONFIG_NET_CLS_ACT
2129-
DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
2130-
EXPORT_SYMBOL(tcf_bypass_check_needed_key);
2129+
DEFINE_STATIC_KEY_FALSE(tcf_sw_enabled_key);
2130+
EXPORT_SYMBOL(tcf_sw_enabled_key);
21312131
#endif
21322132

21332133
DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
@@ -3973,10 +3973,13 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
39733973
if (!miniq)
39743974
return ret;
39753975

3976-
if (static_branch_unlikely(&tcf_bypass_check_needed_key)) {
3977-
if (tcf_block_bypass_sw(miniq->block))
3978-
return ret;
3979-
}
3976+
/* Global bypass */
3977+
if (!static_branch_likely(&tcf_sw_enabled_key))
3978+
return ret;
3979+
3980+
/* Block-wise bypass */
3981+
if (tcf_block_bypass_sw(miniq->block))
3982+
return ret;
39803983

39813984
tc_skb_cb(skb)->mru = 0;
39823985
tc_skb_cb(skb)->post_ct = false;

net/sched/cls_api.c

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
390390
tp->protocol = protocol;
391391
tp->prio = prio;
392392
tp->chain = chain;
393+
tp->usesw = !tp->ops->reoffload;
393394
spin_lock_init(&tp->lock);
394395
refcount_set(&tp->refcnt, 1);
395396

@@ -410,39 +411,31 @@ static void tcf_proto_get(struct tcf_proto *tp)
410411
refcount_inc(&tp->refcnt);
411412
}
412413

413-
static void tcf_maintain_bypass(struct tcf_block *block)
414+
static void tcf_proto_count_usesw(struct tcf_proto *tp, bool add)
414415
{
415-
int filtercnt = atomic_read(&block->filtercnt);
416-
int skipswcnt = atomic_read(&block->skipswcnt);
417-
bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt;
418-
419-
if (bypass_wanted != block->bypass_wanted) {
420416
#ifdef CONFIG_NET_CLS_ACT
421-
if (bypass_wanted)
422-
static_branch_inc(&tcf_bypass_check_needed_key);
423-
else
424-
static_branch_dec(&tcf_bypass_check_needed_key);
425-
#endif
426-
block->bypass_wanted = bypass_wanted;
417+
struct tcf_block *block = tp->chain->block;
418+
bool counted = false;
419+
420+
if (!add) {
421+
if (tp->usesw && tp->counted) {
422+
if (!atomic_dec_return(&block->useswcnt))
423+
static_branch_dec(&tcf_sw_enabled_key);
424+
tp->counted = false;
425+
}
426+
return;
427427
}
428-
}
429-
430-
static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add)
431-
{
432-
lockdep_assert_not_held(&block->cb_lock);
433428

434-
down_write(&block->cb_lock);
435-
if (*counted != add) {
436-
if (add) {
437-
atomic_inc(&block->filtercnt);
438-
*counted = true;
439-
} else {
440-
atomic_dec(&block->filtercnt);
441-
*counted = false;
442-
}
429+
spin_lock(&tp->lock);
430+
if (tp->usesw && !tp->counted) {
431+
counted = true;
432+
tp->counted = true;
443433
}
444-
tcf_maintain_bypass(block);
445-
up_write(&block->cb_lock);
434+
spin_unlock(&tp->lock);
435+
436+
if (counted && atomic_inc_return(&block->useswcnt) == 1)
437+
static_branch_inc(&tcf_sw_enabled_key);
438+
#endif
446439
}
447440

448441
static void tcf_chain_put(struct tcf_chain *chain);
@@ -451,7 +444,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
451444
bool sig_destroy, struct netlink_ext_ack *extack)
452445
{
453446
tp->ops->destroy(tp, rtnl_held, extack);
454-
tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false);
447+
tcf_proto_count_usesw(tp, false);
455448
if (sig_destroy)
456449
tcf_proto_signal_destroyed(tp->chain, tp);
457450
tcf_chain_put(tp->chain);
@@ -2404,7 +2397,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
24042397
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
24052398
RTM_NEWTFILTER, false, rtnl_held, extack);
24062399
tfilter_put(tp, fh);
2407-
tcf_block_filter_cnt_update(block, &tp->counted, true);
2400+
tcf_proto_count_usesw(tp, true);
24082401
/* q pointer is NULL for shared blocks */
24092402
if (q)
24102403
q->flags &= ~TCQ_F_CAN_BYPASS;
@@ -3521,8 +3514,6 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
35213514
if (*flags & TCA_CLS_FLAGS_IN_HW)
35223515
return;
35233516
*flags |= TCA_CLS_FLAGS_IN_HW;
3524-
if (tc_skip_sw(*flags))
3525-
atomic_inc(&block->skipswcnt);
35263517
atomic_inc(&block->offloadcnt);
35273518
}
35283519

@@ -3531,8 +3522,6 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
35313522
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
35323523
return;
35333524
*flags &= ~TCA_CLS_FLAGS_IN_HW;
3534-
if (tc_skip_sw(*flags))
3535-
atomic_dec(&block->skipswcnt);
35363525
atomic_dec(&block->offloadcnt);
35373526
}
35383527

net/sched/cls_bpf.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
509509
if (!tc_in_hw(prog->gen_flags))
510510
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
511511

512+
tcf_proto_update_usesw(tp, prog->gen_flags);
513+
512514
if (oldprog) {
513515
idr_replace(&head->handle_idr, prog, handle);
514516
list_replace_rcu(&oldprog->link, &prog->link);

net/sched/cls_flower.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2427,6 +2427,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
24272427
if (!tc_in_hw(fnew->flags))
24282428
fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
24292429

2430+
tcf_proto_update_usesw(tp, fnew->flags);
2431+
24302432
spin_lock(&tp->lock);
24312433

24322434
/* tp was deleted concurrently. -EAGAIN will cause caller to lookup

net/sched/cls_matchall.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
228228
if (!tc_in_hw(new->flags))
229229
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
230230

231+
tcf_proto_update_usesw(tp, new->flags);
232+
231233
*arg = head;
232234
rcu_assign_pointer(tp->root, new);
233235
return 0;

net/sched/cls_u32.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
941941
if (!tc_in_hw(new->flags))
942942
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
943943

944+
tcf_proto_update_usesw(tp, new->flags);
945+
944946
u32_replace_knode(tp, tp_c, new);
945947
tcf_unbind_filter(tp, &n->res);
946948
tcf_exts_get_net(&n->exts);
@@ -1154,6 +1156,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
11541156
if (!tc_in_hw(n->flags))
11551157
n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
11561158

1159+
tcf_proto_update_usesw(tp, n->flags);
1160+
11571161
ins = &ht->ht[TC_U32_HASH(handle)];
11581162
for (pins = rtnl_dereference(*ins); pins;
11591163
ins = &pins->next, pins = rtnl_dereference(*ins))

0 commit comments

Comments
 (0)