Skip to content

Commit 184fa9d

Browse files
author
Paolo Abeni
committed
Merge branch 'bonding-fix-negotiation-flapping-in-802-3ad-passive-mode'
Hangbin Liu says: ==================== bonding: fix negotiation flapping in 802.3ad passive mode This patch fixes unstable LACP negotiation when bonding is configured in passive mode (`lacp_active=off`). Previously, the actor would stop sending LACPDUs after initial negotiation succeeded, leading to the partner timing out and restarting the negotiation cycle. This resulted in continuous LACP state flapping. The fix ensures the passive actor starts sending periodic LACPDUs after receiving the first LACPDU from the partner, in accordance with IEEE 802.1AX-2020 section 6.4.1. ==================== Link: https://patch.msgid.link/20250815062000.22220-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2 parents c42be53 + 87951b5 commit 184fa9d

File tree

6 files changed

+159
-19
lines changed

6 files changed

+159
-19
lines changed

drivers/net/bonding/bond_3ad.c

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,13 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker);
9595
static void ad_mux_machine(struct port *port, bool *update_slave_arr);
9696
static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
9797
static void ad_tx_machine(struct port *port);
98-
static void ad_periodic_machine(struct port *port, struct bond_params *bond_params);
98+
static void ad_periodic_machine(struct port *port);
9999
static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
100100
static void ad_agg_selection_logic(struct aggregator *aggregator,
101101
bool *update_slave_arr);
102102
static void ad_clear_agg(struct aggregator *aggregator);
103103
static void ad_initialize_agg(struct aggregator *aggregator);
104-
static void ad_initialize_port(struct port *port, int lacp_fast);
104+
static void ad_initialize_port(struct port *port, const struct bond_params *bond_params);
105105
static void ad_enable_collecting(struct port *port);
106106
static void ad_disable_distributing(struct port *port,
107107
bool *update_slave_arr);
@@ -1307,10 +1307,16 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
13071307
* case of EXPIRED even if LINK_DOWN didn't arrive for
13081308
* the port.
13091309
*/
1310-
port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION;
13111310
port->sm_vars &= ~AD_PORT_MATCHED;
1311+
/* Based on IEEE 8021AX-2014, Figure 6-18 - Receive
1312+
* machine state diagram, the statue should be
1313+
* Partner_Oper_Port_State.Synchronization = FALSE;
1314+
* Partner_Oper_Port_State.LACP_Timeout = Short Timeout;
1315+
* start current_while_timer(Short Timeout);
1316+
* Actor_Oper_Port_State.Expired = TRUE;
1317+
*/
1318+
port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION;
13121319
port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT;
1313-
port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY;
13141320
port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT));
13151321
port->actor_oper_port_state |= LACP_STATE_EXPIRED;
13161322
port->sm_vars |= AD_PORT_CHURNED;
@@ -1417,11 +1423,10 @@ static void ad_tx_machine(struct port *port)
14171423
/**
14181424
* ad_periodic_machine - handle a port's periodic state machine
14191425
* @port: the port we're looking at
1420-
* @bond_params: bond parameters we will use
14211426
*
14221427
* Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
14231428
*/
1424-
static void ad_periodic_machine(struct port *port, struct bond_params *bond_params)
1429+
static void ad_periodic_machine(struct port *port)
14251430
{
14261431
periodic_states_t last_state;
14271432

@@ -1430,8 +1435,7 @@ static void ad_periodic_machine(struct port *port, struct bond_params *bond_para
14301435

14311436
/* check if port was reinitialized */
14321437
if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
1433-
(!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
1434-
!bond_params->lacp_active) {
1438+
(!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))) {
14351439
port->sm_periodic_state = AD_NO_PERIODIC;
14361440
}
14371441
/* check if state machine should change state */
@@ -1955,16 +1959,16 @@ static void ad_initialize_agg(struct aggregator *aggregator)
19551959
/**
19561960
* ad_initialize_port - initialize a given port's parameters
19571961
* @port: the port we're looking at
1958-
* @lacp_fast: boolean. whether fast periodic should be used
1962+
* @bond_params: bond parameters we will use
19591963
*/
1960-
static void ad_initialize_port(struct port *port, int lacp_fast)
1964+
static void ad_initialize_port(struct port *port, const struct bond_params *bond_params)
19611965
{
19621966
static const struct port_params tmpl = {
19631967
.system_priority = 0xffff,
19641968
.key = 1,
19651969
.port_number = 1,
19661970
.port_priority = 0xff,
1967-
.port_state = 1,
1971+
.port_state = 0,
19681972
};
19691973
static const struct lacpdu lacpdu = {
19701974
.subtype = 0x01,
@@ -1982,12 +1986,14 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
19821986
port->actor_port_priority = 0xff;
19831987
port->actor_port_aggregator_identifier = 0;
19841988
port->ntt = false;
1985-
port->actor_admin_port_state = LACP_STATE_AGGREGATION |
1986-
LACP_STATE_LACP_ACTIVITY;
1987-
port->actor_oper_port_state = LACP_STATE_AGGREGATION |
1988-
LACP_STATE_LACP_ACTIVITY;
1989+
port->actor_admin_port_state = LACP_STATE_AGGREGATION;
1990+
port->actor_oper_port_state = LACP_STATE_AGGREGATION;
1991+
if (bond_params->lacp_active) {
1992+
port->actor_admin_port_state |= LACP_STATE_LACP_ACTIVITY;
1993+
port->actor_oper_port_state |= LACP_STATE_LACP_ACTIVITY;
1994+
}
19891995

1990-
if (lacp_fast)
1996+
if (bond_params->lacp_fast)
19911997
port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT;
19921998

19931999
memcpy(&port->partner_admin, &tmpl, sizeof(tmpl));
@@ -2201,7 +2207,7 @@ void bond_3ad_bind_slave(struct slave *slave)
22012207
/* port initialization */
22022208
port = &(SLAVE_AD_INFO(slave)->port);
22032209

2204-
ad_initialize_port(port, bond->params.lacp_fast);
2210+
ad_initialize_port(port, &bond->params);
22052211

22062212
port->slave = slave;
22072213
port->actor_port_number = SLAVE_AD_INFO(slave)->id;
@@ -2513,7 +2519,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
25132519
}
25142520

25152521
ad_rx_machine(NULL, port);
2516-
ad_periodic_machine(port, &bond->params);
2522+
ad_periodic_machine(port);
25172523
ad_port_selection_logic(port, &update_slave_arr);
25182524
ad_mux_machine(port, &update_slave_arr);
25192525
ad_tx_machine(port);
@@ -2883,6 +2889,31 @@ void bond_3ad_update_lacp_rate(struct bonding *bond)
28832889
spin_unlock_bh(&bond->mode_lock);
28842890
}
28852891

2892+
/**
2893+
* bond_3ad_update_lacp_active - change the lacp active
2894+
* @bond: bonding struct
2895+
*
2896+
* Update actor_oper_port_state when lacp_active is modified.
2897+
*/
2898+
void bond_3ad_update_lacp_active(struct bonding *bond)
2899+
{
2900+
struct port *port = NULL;
2901+
struct list_head *iter;
2902+
struct slave *slave;
2903+
int lacp_active;
2904+
2905+
lacp_active = bond->params.lacp_active;
2906+
spin_lock_bh(&bond->mode_lock);
2907+
bond_for_each_slave(bond, slave, iter) {
2908+
port = &(SLAVE_AD_INFO(slave)->port);
2909+
if (lacp_active)
2910+
port->actor_oper_port_state |= LACP_STATE_LACP_ACTIVITY;
2911+
else
2912+
port->actor_oper_port_state &= ~LACP_STATE_LACP_ACTIVITY;
2913+
}
2914+
spin_unlock_bh(&bond->mode_lock);
2915+
}
2916+
28862917
size_t bond_3ad_stats_size(void)
28872918
{
28882919
return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */

drivers/net/bonding/bond_options.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1660,6 +1660,7 @@ static int bond_option_lacp_active_set(struct bonding *bond,
16601660
netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n",
16611661
newval->string, newval->value);
16621662
bond->params.lacp_active = newval->value;
1663+
bond_3ad_update_lacp_active(bond);
16631664

16641665
return 0;
16651666
}

include/net/bond_3ad.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
307307
struct slave *slave);
308308
int bond_3ad_set_carrier(struct bonding *bond);
309309
void bond_3ad_update_lacp_rate(struct bonding *bond);
310+
void bond_3ad_update_lacp_active(struct bonding *bond);
310311
void bond_3ad_update_ad_actor_settings(struct bonding *bond);
311312
int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
312313
size_t bond_3ad_stats_size(void);

tools/testing/selftests/drivers/net/bonding/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ TEST_PROGS := \
1010
mode-2-recovery-updelay.sh \
1111
bond_options.sh \
1212
bond-eth-type-change.sh \
13-
bond_macvlan_ipvlan.sh
13+
bond_macvlan_ipvlan.sh \
14+
bond_passive_lacp.sh
1415

1516
TEST_FILES := \
1617
lag_lib.sh \
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/bin/bash
2+
# SPDX-License-Identifier: GPL-2.0
3+
#
4+
# Test if a bond interface works with lacp_active=off.
5+
6+
# shellcheck disable=SC2034
7+
REQUIRE_MZ=no
8+
NUM_NETIFS=0
9+
lib_dir=$(dirname "$0")
10+
# shellcheck disable=SC1091
11+
source "$lib_dir"/../../../net/forwarding/lib.sh
12+
13+
# shellcheck disable=SC2317
14+
check_port_state()
15+
{
16+
local netns=$1
17+
local port=$2
18+
local state=$3
19+
20+
ip -n "${netns}" -d -j link show "$port" | \
21+
jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null
22+
}
23+
24+
check_pkt_count()
25+
{
26+
RET=0
27+
local ns="$1"
28+
local iface="$2"
29+
30+
# wait 65s, one per 30s
31+
slowwait_for_counter 65 2 tc_rule_handle_stats_get \
32+
"dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null
33+
}
34+
35+
setup() {
36+
setup_ns c_ns s_ns
37+
38+
# shellcheck disable=SC2154
39+
ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
40+
ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
41+
42+
# Add tc filter to count the pkts
43+
tc -n "${c_ns}" qdisc add dev eth0 clsact
44+
tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass
45+
tc -n "${s_ns}" qdisc add dev eth1 clsact
46+
tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass
47+
48+
ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast
49+
ip -n "${s_ns}" link set eth0 master bond0
50+
ip -n "${s_ns}" link set eth1 master bond0
51+
52+
ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast
53+
ip -n "${c_ns}" link set eth0 master bond0
54+
ip -n "${c_ns}" link set eth1 master bond0
55+
56+
}
57+
58+
trap cleanup_all_ns EXIT
59+
setup
60+
61+
# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s
62+
# after interface up
63+
ip -n "${c_ns}" link set bond0 up
64+
sleep 2
65+
66+
# 1. The passive side shouldn't send LACPDU.
67+
check_pkt_count "${c_ns}" "eth0" && RET=1
68+
log_test "802.3ad lacp_active off" "init port"
69+
70+
ip -n "${s_ns}" link set bond0 up
71+
# 2. The passive side should not have the 'active' flag.
72+
RET=0
73+
slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1
74+
log_test "802.3ad lacp_active off" "port state active"
75+
76+
# 3. The active side should have the 'active' flag.
77+
RET=0
78+
slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1
79+
log_test "802.3ad lacp_active on" "port state active"
80+
81+
# 4. Make sure the connection is not expired.
82+
RET=0
83+
slowwait 5 check_port_state "${s_ns}" "eth0" "distributing"
84+
slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1
85+
log_test "bond 802.3ad lacp_active off" "port connection"
86+
87+
# After testing, disconnect one port on each side to check the state.
88+
ip -n "${s_ns}" link set eth0 nomaster
89+
ip -n "${s_ns}" link set eth0 up
90+
ip -n "${c_ns}" link set eth1 nomaster
91+
ip -n "${c_ns}" link set eth1 up
92+
# Due to Periodic Machine and Rx Machine state change, the bond will still
93+
# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure
94+
# negotiation finished
95+
sleep 5
96+
97+
# 5. The active side should keep sending LACPDU.
98+
check_pkt_count "${s_ns}" "eth1" || RET=1
99+
log_test "bond 802.3ad lacp_active on" "port pkt after disconnect"
100+
101+
# 6. The passive side shouldn't send LACPDU anymore.
102+
check_pkt_count "${c_ns}" "eth0" && RET=1
103+
log_test "bond 802.3ad lacp_active off" "port pkt after disconnect"
104+
105+
exit "$EXIT_STATUS"

tools/testing/selftests/drivers/net/bonding/config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ CONFIG_MACVLAN=y
66
CONFIG_IPVLAN=y
77
CONFIG_NET_ACT_GACT=y
88
CONFIG_NET_CLS_FLOWER=y
9+
CONFIG_NET_CLS_MATCHALL=m
910
CONFIG_NET_SCH_INGRESS=y
1011
CONFIG_NLMON=y
1112
CONFIG_VETH=y

0 commit comments

Comments
 (0)