Skip to content

Commit 4b1745b

Browse files
authored
Merge pull request #1983 from stackhpc/ovn-fix-chassis-priorities-backport
OVN fix chassis priorities backport to 2024.1
2 parents 2a1283f + 0c161bc commit 4b1745b

File tree

2 files changed

+134
-40
lines changed

2 files changed

+134
-40
lines changed

etc/kayobe/ansible/ovn-fix-chassis-priorities.yml

Lines changed: 128 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,38 @@
55
# metal/SR-IOV) ports.
66

77
# This playbook can be used to fix the issue by realigning the priorities of
8-
# the table entries. It does so by assigning the highest priority to the
9-
# "first" (sorted alphabetically) OVN NB DB host. This results in all gateways
10-
# being scheduled to a single host, but is less complicated than trying to
11-
# balance them (and it's also not clear to me how to map between individual
12-
# ha_chassis and gateway_chassis entries).
8+
# the table entries. It executes a small inline shell script against the
9+
# OVN northbound database to ensure that, for each router, the HA chassis
10+
# backing its internal networks is aligned with the chassis currently hosting
11+
# the router's external gateway interface.
1312

1413
# The playbook can be run as follows:
15-
# kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/ovn-fix-chassis-priorities.yml
14+
# kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/fixes/ovn-fix-chassis-priorities.yml
15+
# By default this runs in dry-run mode; pass '-e apply=yes' to perform the updates.
1616

1717
# If the 'controllers' group does not align with the group used to deploy the
1818
# OVN NB DB, this can be overridden by passing the following:
1919
# '-e ovn_nb_db_group=some_other_group'
2020

21-
- name: Find OVN DB DB Leader
21+
- name: Find OVN NB DB Leader
2222
hosts: "{{ ovn_nb_db_group | default('controllers') }}"
2323
tasks:
24-
- name: Find OVN DB Leader
24+
- name: Find OVN NB DB Leader
2525
when: kolla_enable_ovn | bool
2626
block:
2727
- name: Find the OVN NB DB leader
28-
ansible.builtin.command: docker exec ovn_nb_db ovn-nbctl get-connection
28+
ansible.builtin.command: >-
29+
docker exec ovn_nb_db
30+
ovs-appctl -t /var/run/ovn/ovnnb_db.ctl
31+
cluster/status OVN_Northbound
2932
changed_when: false
3033
failed_when: false
31-
register: ovn_check_result
34+
register: ovn_cluster_status
3235
check_mode: false
3336

3437
- name: Group hosts by leader/follower role
3538
ansible.builtin.group_by:
36-
key: ovn_nb_{{ 'leader' if ovn_check_result.rc == 0 else 'follower' }}
39+
key: "{{ 'ovn_nb_leader' if 'Role: leader' in ovn_cluster_status.stdout else 'ovn_nb_follower' }}"
3740
changed_when: false
3841

3942
- name: Assert one leader exists
@@ -43,34 +46,119 @@
4346

4447
- name: Fix OVN chassis priorities
4548
hosts: ovn_nb_leader
49+
gather_facts: false
4650
vars:
47-
ovn_nb_db_group: controllers
48-
ovn_nb_db_hosts_sorted: "{{ query('inventory_hostnames', ovn_nb_db_group) | sort | list }}"
49-
ha_chassis_max_priority: 32767
50-
gateway_chassis_max_priority: "{{ ovn_nb_db_hosts_sorted | length }}"
51+
apply_updates: "{{ apply | default(false) | bool }}"
5152
tasks:
52-
- name: Fix ha_chassis priorities
53-
ansible.builtin.command: >-
54-
docker exec ovn_nb_db
55-
bash -c '
56-
ovn-nbctl find ha_chassis chassis_name={{ item }} |
57-
awk '\''$1 == "_uuid" { print $3 }'\'' |
58-
while read uuid; do ovn-nbctl set ha_chassis $uuid priority={{ priority }}; done'
59-
loop: "{{ ovn_nb_db_hosts_sorted }}"
60-
vars:
61-
priority: "{{ ha_chassis_max_priority | int - ovn_nb_db_hosts_sorted.index(item) }}"
62-
register: ha_chassis_command
63-
changed_when: ha_chassis_command.rc == 0
64-
65-
- name: Fix gateway_chassis priorities
66-
ansible.builtin.command: >-
67-
docker exec ovn_nb_db
68-
bash -c '
69-
ovn-nbctl find gateway_chassis chassis_name={{ item }} |
70-
awk '\''$1 == "_uuid" { print $3 }'\'' |
71-
while read uuid; do ovn-nbctl set gateway_chassis $uuid priority={{ priority }}; done'
72-
loop: "{{ ovn_nb_db_hosts_sorted }}"
73-
vars:
74-
priority: "{{ gateway_chassis_max_priority | int - ovn_nb_db_hosts_sorted.index(item) }}"
75-
register: gateway_chassis_command
76-
changed_when: gateway_chassis_command.rc == 0
53+
- name: Realign HA chassis priorities with active gateways
54+
when: kolla_enable_ovn | bool
55+
ansible.builtin.shell: |
56+
docker exec -i ovn_nb_db bash -s <<'EOF'
57+
set -euo pipefail
58+
59+
MAX_PRIORITY=32767
60+
APPLY="{{ 'yes' if apply_updates else 'no' }}"
61+
62+
if [ "$APPLY" = "yes" ]; then
63+
echo "APPLY MODE: Updating OVN HA priorities"
64+
else
65+
echo "DRY-RUN MODE: Showing proposed changes only"
66+
echo "Re-run with -e apply=yes to apply changes"
67+
fi
68+
echo ""
69+
70+
# Get all external gateway ports
71+
ext_ports=$(ovn-nbctl --data=bare --no-headings --columns=name find logical_router_port 'external_ids:"neutron:is_ext_gw"="True"')
72+
73+
for ext_port in $ext_ports; do
74+
# Get router name
75+
router=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$ext_port" 'external_ids:"neutron:router_name"' | tr -d '"')
76+
77+
if [ -z "$router" ]; then
78+
echo "Skipping $ext_port: no router name found"
79+
continue
80+
fi
81+
82+
# Get gateway chassis list (ordered by priority)
83+
gateway_chassis=""
84+
gateway_info=$(ovn-nbctl lrp-get-gateway-chassis "$ext_port" 2>/dev/null || true)
85+
86+
while IFS= read -r line; do
87+
# Strip prefix, allowing '-' or '_' separator
88+
chassis=$(echo "$line" | awk '{print $1}' | sed "s/^${ext_port}[-_]//")
89+
gateway_chassis="$gateway_chassis $chassis"
90+
done <<< "$gateway_info"
91+
92+
gateway_chassis=${gateway_chassis# }
93+
94+
if [ -z "$gateway_chassis" ]; then
95+
echo "Router $router: no gateway chassis configured"
96+
continue
97+
fi
98+
99+
# The first chassis in the list is the active gateway
100+
active_gateway=$(echo "$gateway_chassis" | awk '{print $1}')
101+
echo "Router: $router | Port: $ext_port | Active Gateway: $active_gateway"
102+
103+
# Process all internal ports on this router
104+
router_ports=$(ovn-nbctl --data=bare --no-headings --columns=name \
105+
find logical_router_port "external_ids:\"neutron:router_name\"=\"$router\"")
106+
107+
for port in $router_ports; do
108+
109+
# Skip external gateway ports
110+
is_external=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$port" 'external_ids:"neutron:is_ext_gw"' 2>/dev/null)
111+
[ "$is_external" = "True" ] && continue
112+
113+
# Get network name and HA chassis group
114+
network=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$port" 'external_ids:"neutron:network_name"' 2>/dev/null)
115+
ha_group=$(ovn-nbctl --data=bare --no-headings --columns=_uuid find ha_chassis_group name="$network")
116+
117+
if [ -z "$ha_group" ]; then
118+
echo " Port $port: no HA group found for network '$network'"
119+
continue
120+
fi
121+
122+
echo " Port: $port | Network: $network"
123+
124+
# Update priorities for each chassis in the HA group
125+
ha_chassis_list=$(ovn-nbctl --data=bare --no-headings get ha_chassis_group "$ha_group" ha_chassis | tr -d '[],')
126+
127+
for uuid in $ha_chassis_list; do
128+
chassis_name=$(ovn-nbctl --data=bare --no-headings get ha_chassis "$uuid" chassis_name)
129+
current_priority=$(ovn-nbctl --data=bare --no-headings get ha_chassis "$uuid" priority)
130+
131+
# Calculate desired priority
132+
desired_priority=""
133+
index=0
134+
for gw in $gateway_chassis; do
135+
if [ "$chassis_name" = "$gw" ]; then
136+
desired_priority=$((MAX_PRIORITY - index))
137+
break
138+
fi
139+
index=$((index + 1))
140+
done
141+
[ -z "$desired_priority" ] && continue
142+
143+
# Apply or report change
144+
if [ "$current_priority" -ne "$desired_priority" ]; then
145+
if [ "$APPLY" = "yes" ]; then
146+
ovn-nbctl set ha_chassis "$uuid" priority=$desired_priority
147+
echo " $chassis_name: updated priority $current_priority to $desired_priority"
148+
else
149+
echo " $chassis_name: would update priority $current_priority to $desired_priority"
150+
fi
151+
else
152+
echo " $chassis_name: priority $current_priority (no change needed)"
153+
fi
154+
done
155+
done
156+
echo ""
157+
done
158+
EOF
159+
register: fix_output
160+
changed_when: apply_updates and ('updated priority' in (fix_output.stdout | default('')))
161+
162+
- name: Display results
163+
ansible.builtin.debug:
164+
msg: "{{ fix_output.stdout }}"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
fixes:
3+
- |
4+
Updated the OVN chassis priority fix playbook to detect the northbound
5+
database leader via ``ovs-appctl cluster/status``, ensuring only the true
6+
leader runs the priority alignment.

0 commit comments

Comments
 (0)