Skip to content

Commit 2be3fd2

Browse files
committed
rewrite ovn-fix-chassis-priorities playbook
It is now aligning the HA chassis priorities based on gateway chassis ones. Signed-off-by: Bartosz Bezak <bartosz@stackhpc.com>
1 parent dbd9f1f commit 2be3fd2

File tree

1 file changed

+121
-36
lines changed

1 file changed

+121
-36
lines changed

etc/kayobe/ansible/fixes/ovn-fix-chassis-priorities.yml

Lines changed: 121 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,23 @@
55
# metal/SR-IOV) ports.
66

77
# This playbook can be used to fix the issue by realigning the priorities of
8-
# the table entries. It does so by assigning the highest priority to the
9-
# "first" (sorted alphabetically) OVN NB DB host. This results in all gateways
10-
# being scheduled to a single host, but is less complicated than trying to
11-
# balance them (and it's also not clear to me how to map between individual
12-
# ha_chassis and gateway_chassis entries).
8+
# the table entries. It executes a small inline shell script against the
9+
# OVN northbound database to ensure that, for each router, the HA chassis
10+
# backing its internal networks is aligned with the chassis currently hosting
11+
# the router's external gateway interface.
1312

1413
# The playbook can be run as follows:
1514
# kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/fixes/ovn-fix-chassis-priorities.yml
15+
# By default this runs in dry-run mode; pass '-e apply=yes' to perform the updates.
1616

1717
# If the 'controllers' group does not align with the group used to deploy the
1818
# OVN NB DB, this can be overridden by passing the following:
1919
# '-e ovn_nb_db_group=some_other_group'
2020

21-
- name: Find OVN DB DB Leader
21+
- name: Find OVN NB DB Leader
2222
hosts: "{{ ovn_nb_db_group | default('controllers') }}"
2323
tasks:
24-
- name: Find OVN DB Leader
24+
- name: Find OVN NB DB Leader
2525
when: kolla_enable_ovn | bool
2626
block:
2727
- name: Find the OVN NB DB leader
@@ -43,34 +43,119 @@
4343

4444
- name: Fix OVN chassis priorities
4545
hosts: ovn_nb_leader
46+
gather_facts: false
4647
vars:
47-
ovn_nb_db_group: controllers
48-
ovn_nb_db_hosts_sorted: "{{ query('inventory_hostnames', ovn_nb_db_group) | sort | list }}"
49-
ha_chassis_max_priority: 32767
50-
gateway_chassis_max_priority: "{{ ovn_nb_db_hosts_sorted | length }}"
48+
apply_updates: "{{ apply | default(false) | bool }}"
5149
tasks:
52-
- name: Fix ha_chassis priorities
53-
ansible.builtin.command: >-
54-
docker exec ovn_nb_db
55-
bash -c '
56-
ovn-nbctl find ha_chassis chassis_name={{ item }} |
57-
awk '\''$1 == "_uuid" { print $3 }'\'' |
58-
while read uuid; do ovn-nbctl set ha_chassis $uuid priority={{ priority }}; done'
59-
loop: "{{ ovn_nb_db_hosts_sorted }}"
60-
vars:
61-
priority: "{{ ha_chassis_max_priority | int - ovn_nb_db_hosts_sorted.index(item) }}"
62-
register: ha_chassis_command
63-
changed_when: ha_chassis_command.rc == 0
64-
65-
- name: Fix gateway_chassis priorities
66-
ansible.builtin.command: >-
67-
docker exec ovn_nb_db
68-
bash -c '
69-
ovn-nbctl find gateway_chassis chassis_name={{ item }} |
70-
awk '\''$1 == "_uuid" { print $3 }'\'' |
71-
while read uuid; do ovn-nbctl set gateway_chassis $uuid priority={{ priority }}; done'
72-
loop: "{{ ovn_nb_db_hosts_sorted }}"
73-
vars:
74-
priority: "{{ gateway_chassis_max_priority | int - ovn_nb_db_hosts_sorted.index(item) }}"
75-
register: gateway_chassis_command
76-
changed_when: gateway_chassis_command.rc == 0
50+
- name: Realign HA chassis priorities with active gateways
51+
when: kolla_enable_ovn | bool
52+
ansible.builtin.shell: |
53+
docker exec -i ovn_nb_db bash -s <<'EOF'
54+
set -euo pipefail
55+
56+
MAX_PRIORITY=32767
57+
APPLY="{{ 'yes' if apply_updates else 'no' }}"
58+
59+
if [ "$APPLY" = "yes" ]; then
60+
echo "APPLY MODE: Updating OVN HA priorities"
61+
else
62+
echo "DRY-RUN MODE: Showing proposed changes only"
63+
echo "Re-run with -e apply=yes to apply changes"
64+
fi
65+
echo ""
66+
67+
# Get all external gateway ports
68+
ext_ports=$(ovn-nbctl --data=bare --no-headings --columns=name find logical_router_port 'external_ids:"neutron:is_ext_gw"="True"')
69+
70+
for ext_port in $ext_ports; do
71+
# Get router name
72+
router=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$ext_port" 'external_ids:"neutron:router_name"' | tr -d '"')
73+
74+
if [ -z "$router" ]; then
75+
echo "Skipping $ext_port: no router name found"
76+
continue
77+
fi
78+
79+
# Get gateway chassis list (ordered by priority)
80+
gateway_chassis=""
81+
gateway_info=$(ovn-nbctl lrp-get-gateway-chassis "$ext_port" 2>/dev/null || true)
82+
83+
while IFS= read -r line; do
84+
# Strip prefix
85+
chassis=$(echo "$line" | awk '{print $1}' | cut -d'_' -f2-)
86+
gateway_chassis="$gateway_chassis $chassis"
87+
done <<< "$gateway_info"
88+
89+
gateway_chassis=${gateway_chassis# }
90+
91+
if [ -z "$gateway_chassis" ]; then
92+
echo "Router $router: no gateway chassis configured"
93+
continue
94+
fi
95+
96+
# The first chassis in the list is the active gateway
97+
active_gateway=$(echo "$gateway_chassis" | awk '{print $1}')
98+
echo "Router: $router | Port: $ext_port | Active Gateway: $active_gateway"
99+
100+
# Process all internal ports on this router
101+
router_ports=$(ovn-nbctl --data=bare --no-headings --columns=name \
102+
find logical_router_port "external_ids:\"neutron:router_name\"=\"$router\"")
103+
104+
for port in $router_ports; do
105+
106+
# Skip external gateway ports
107+
is_external=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$port" 'external_ids:"neutron:is_ext_gw"' 2>/dev/null)
108+
[ "$is_external" = "True" ] && continue
109+
110+
# Get network name and HA chassis group
111+
network=$(ovn-nbctl --data=bare --no-headings get logical_router_port "$port" 'external_ids:"neutron:network_name"' 2>/dev/null)
112+
ha_group=$(ovn-nbctl --data=bare --no-headings --columns=_uuid find ha_chassis_group name="$network")
113+
114+
if [ -z "$ha_group" ]; then
115+
echo " Port $port: no HA group found for network '$network'"
116+
continue
117+
fi
118+
119+
echo " Port: $port | Network: $network"
120+
121+
# Update priorities for each chassis in the HA group
122+
ha_chassis_list=$(ovn-nbctl --data=bare --no-headings get ha_chassis_group "$ha_group" ha_chassis | tr -d '[],')
123+
124+
for uuid in $ha_chassis_list; do
125+
chassis_name=$(ovn-nbctl --data=bare --no-headings get ha_chassis "$uuid" chassis_name)
126+
current_priority=$(ovn-nbctl --data=bare --no-headings get ha_chassis "$uuid" priority)
127+
128+
# Calculate desired priority
129+
desired_priority=""
130+
index=0
131+
for gw in $gateway_chassis; do
132+
if [ "$chassis_name" = "$gw" ]; then
133+
desired_priority=$((MAX_PRIORITY - index))
134+
break
135+
fi
136+
index=$((index + 1))
137+
done
138+
[ -z "$desired_priority" ] && continue
139+
140+
# Apply or report change
141+
if [ "$current_priority" -ne "$desired_priority" ]; then
142+
if [ "$APPLY" = "yes" ]; then
143+
ovn-nbctl set ha_chassis "$uuid" priority=$desired_priority
144+
echo " $chassis_name: updated priority $current_priority to $desired_priority"
145+
else
146+
echo " $chassis_name: would update priority $current_priority to $desired_priority"
147+
fi
148+
else
149+
echo " $chassis_name: priority $current_priority (no change needed)"
150+
fi
151+
done
152+
done
153+
echo ""
154+
done
155+
EOF
156+
register: fix_output
157+
changed_when: apply_updates and ('updated priority' in (fix_output.stdout | default('')))
158+
159+
- name: Display results
160+
ansible.builtin.debug:
161+
msg: "{{ fix_output.stdout }}"

0 commit comments

Comments
 (0)