|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# Healthcheck script for MariaDB |
| 4 | +# |
| 5 | +# Runs various tests on the MariaDB server to check its health. Pass the tests |
| 6 | +# to run as arguments. If all tests succeed, the server is considered healthy, |
| 7 | +# otherwise it's not. |
| 8 | +# |
| 9 | +# Arguments are processed in strict order. Set replication_* options before |
| 10 | +# the --replication option. This allows a different set of replication checks |
| 11 | +# on different connections. |
| 12 | +# |
| 13 | +# --su{=|mysql} is option to run the healthcheck as a different unix user. |
| 14 | +# Useful if mysql@localhost user exists with unix socket authentication |
| 15 | +# Using this option disregards previous options/tests, so should usually be the |
| 16 | +# first option. |
| 17 | +# |
| 18 | +# Some tests require SQL privileges. |
| 19 | +# |
| 20 | +# TEST GRANTS REQUIRED |
| 21 | +# connect none* |
| 22 | +# innodb_buffer_pool_loaded USAGE |
| 23 | +# galera_online USAGE |
| 24 | +# replication SUPER or REPLICATION_CLIENT or REPLICA MONITOR (10.5+) |
| 25 | +# mariadbupgrade none, however unix user permissions on datadir |
| 26 | +# |
| 27 | +# The SQL user used is the default for the mysql client. This can be the unix user |
| 28 | +# if no user(or password) is set in the [mariadb-client] section of a configuration |
| 29 | +# file. --defaults-{file,extra-file,group-suffix} can specify a file/configuration |
| 30 | +# different from elsewhere. |
| 31 | +# |
| 32 | +# Note * though denied error message will result in error log without |
| 33 | +# any permissions. |
| 34 | + |
| 35 | +set -eo pipefail |
| 36 | + |
| 37 | +_process_sql() |
| 38 | +{ |
| 39 | + mysql ${nodefaults:+--no-defaults} \ |
| 40 | + ${def['file']:+--defaults-file=${def['file']}} \ |
| 41 | + ${def['extra_file']:+--defaults-extra-file=${def['extra_file']}} \ |
| 42 | + ${def['group_suffix']:+--defaults-group-suffix=${def['group_suffix']}} \ |
| 43 | + -B "$@" |
| 44 | +} |
| 45 | + |
| 46 | +# TESTS |
| 47 | + |
| 48 | + |
| 49 | +# CONNECT |
| 50 | +# |
| 51 | +# Tests that a connection can be made over TCP, the final state |
| 52 | +# of the entrypoint and is listening. The authentication used |
| 53 | +# isn't tested. |
| 54 | +connect() |
| 55 | +{ |
| 56 | + set +e +o pipefail |
| 57 | + mysql ${nodefaults:+--no-defaults} \ |
| 58 | + ${def['file']:+--defaults-file=${def['file']}} \ |
| 59 | + ${def['extra_file']:+--defaults-extra-file=${def['extra_file']}} \ |
| 60 | + ${def['group_suffix']:+--defaults-group-suffix=${def['group_suffix']}} \ |
| 61 | + -h localhost --protocol tcp -e 'select 1' 2>&1 \ |
| 62 | + | grep -qF "Can't connect" |
| 63 | + local ret=${PIPESTATUS[1]} |
| 64 | + set -eo pipefail |
| 65 | + if (( "$ret" == 0 )); then |
| 66 | + # grep Matched "Can't connect" so we fail |
| 67 | + return 1 |
| 68 | + fi |
| 69 | + return 0 |
| 70 | +} |
| 71 | + |
| 72 | +# INNODB_BUFFER_POOL_LOADED |
| 73 | +# |
| 74 | +# Tests the load of the innodb buffer pool as been complete |
| 75 | +# implies innodb_buffer_pool_load_at_startup=1 (default), or if |
| 76 | +# manually SET innodb_buffer_pool_load_now=1 |
| 77 | +innodb_buffer_pool_loaded() |
| 78 | +{ |
| 79 | + local s |
| 80 | + s=$(_process_sql --skip-column-names -e 'select VARIABLE_VALUE from information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME="Innodb_buffer_pool_load_status"') |
| 81 | + if [[ $s =~ 'load completed' ]]; then |
| 82 | + return 0 |
| 83 | + fi |
| 84 | + return 1 |
| 85 | +} |
| 86 | + |
| 87 | +# GALERAONLINE |
| 88 | +# |
| 89 | +# Tests that the galera node is in the SYNCed state |
| 90 | +galeraonline() |
| 91 | +{ |
| 92 | + local s |
| 93 | + s=$(_process_sql --skip-column-names -e 'select VARIABLE_VALUE from information_schema.GLOBAL_STATUS WHERE VARIABLE_NAME="WSREP_LOCAL_STATE"') |
| 94 | + # 4 from https://galeracluster.com/library/documentation/node-states.html#node-state-changes |
| 95 | + # not https://xkcd.com/221/ |
| 96 | + if [[ $s -eq 4 ]]; then |
| 97 | + return 0 |
| 98 | + fi |
| 99 | + return 1 |
| 100 | +} |
| 101 | + |
| 102 | +# REPLICATION |
| 103 | +# |
| 104 | +# Tests the replication has the required set of functions: |
| 105 | +# --replication_all -> Checks all replication sources |
| 106 | +# --replication_name=n -> sets the multisource connection name tested |
| 107 | +# --replication_io -> IO thread is running |
| 108 | +# --replication_sql -> SQL thread is running |
| 109 | +# --replication_seconds_behind_master=n -> less than or equal this seconds of delay |
| 110 | +# --replication_sql_remaining_delay=n -> less than or equal this seconds of remaining delay |
| 111 | +# (ref: https://mariadb.com/kb/en/delayed-replication/) |
| 112 | +replication() |
| 113 | +{ |
| 114 | + # SHOW REPLICA available 10.5+ |
| 115 | + # https://github.com/koalaman/shellcheck/issues/2383 |
| 116 | + # shellcheck disable=SC2016,SC2026 |
| 117 | + _process_sql -e "show ${repl['all']:+all} slave${repl['all']:+s} ${repl['name']:+'${repl['name']}'} status\G" | \ |
| 118 | + { |
| 119 | + # required for trim of leading space. |
| 120 | + shopt -s extglob |
| 121 | + # Row header |
| 122 | + read -t 5 -r |
| 123 | + # read timeout |
| 124 | + [ $? -gt 128 ] && return 1 |
| 125 | + while IFS=":" read -t 1 -r n v; do |
| 126 | + # Trim leading space |
| 127 | + n=${n##+([[:space:]])} |
| 128 | + # Leading space on all values by the \G format needs to be trimmed. |
| 129 | + v=${v:1} |
| 130 | + case "$n" in |
| 131 | + Slave_IO_Running) |
| 132 | + if [ -n "${repl['io']}" ] && [ "$v" = 'No' ]; then |
| 133 | + return 1 |
| 134 | + fi |
| 135 | + ;; |
| 136 | + Slave_SQL_Running) |
| 137 | + if [ -n "${repl['sql']}" ] && [ "$v" = 'No' ]; then |
| 138 | + return 1 |
| 139 | + fi |
| 140 | + ;; |
| 141 | + Seconds_Behind_Master) |
| 142 | + # A NULL value is the IO thread not running: |
| 143 | + if [ -n "${repl['seconds_behind_master']}" ] && |
| 144 | + { [ "$v" = NULL ] || |
| 145 | + (( "${repl['seconds_behind_master']}" < "$v" )); }; then |
| 146 | + return 1 |
| 147 | + fi |
| 148 | + ;; |
| 149 | + SQL_Remaining_Delay) |
| 150 | + # Unlike Seconds_Behind_Master, sql_remaining_delay will hit NULL |
| 151 | + # once replication is caught up - https://mariadb.com/kb/en/delayed-replication/ |
| 152 | + if [ -n "${repl['sql_remaining_delay']}" ] && |
| 153 | + [ "$v" != NULL ] && |
| 154 | + (( "${repl['sql_remaining_delay']}" < "$v" )); then |
| 155 | + return 1 |
| 156 | + fi |
| 157 | + ;; |
| 158 | + esac |
| 159 | + done |
| 160 | + # read timeout |
| 161 | + [ $? -gt 128 ] && return 1 |
| 162 | + return 0 |
| 163 | + } |
| 164 | + return $? |
| 165 | +} |
| 166 | + |
| 167 | +# mariadbupgrade |
| 168 | +# |
| 169 | +# Test the lock on the file /var/lib/mysql_upgrade_info |
| 170 | +# https://jira.mariadb.org/browse/MDEV-27068 |
| 171 | +mariadbupgrade() |
| 172 | +{ |
| 173 | + local f="$datadir/mysql_upgrade_info" |
| 174 | + if [ -r "$f" ]; then |
| 175 | + flock --exclusive --nonblock -n 9 9<"$f" |
| 176 | + return $? |
| 177 | + fi |
| 178 | + return 0 |
| 179 | +} |
| 180 | + |
| 181 | + |
| 182 | +# MAIN |
| 183 | + |
| 184 | +if [ $# -eq 0 ]; then |
| 185 | + echo "At least one argument required" >&2 |
| 186 | + exit 1 |
| 187 | +fi |
| 188 | + |
| 189 | +# Global variables used by tests |
| 190 | +declare -A repl |
| 191 | +declare -A def |
| 192 | +nodefaults= |
| 193 | +datadir=/var/lib/mysql |
| 194 | + |
| 195 | +_repl_param_check() |
| 196 | +{ |
| 197 | + case "$1" in |
| 198 | + seconds_behind_master) ;& |
| 199 | + sql_remaining_delay) |
| 200 | + if [ -z "${repl['io']}" ]; then |
| 201 | + repl['io']=1 |
| 202 | + echo "Forcing --replication_io=1, $1 requires IO thread to be running" >&2 |
| 203 | + fi |
| 204 | + ;; |
| 205 | + all) |
| 206 | + if [ -n "${repl['name']}" ]; then |
| 207 | + unset 'repl[name]' |
| 208 | + echo "Option --replication_all incompatible with specied source --replication_name, clearing replication_name" >&2 |
| 209 | + fi |
| 210 | + ;; |
| 211 | + name) |
| 212 | + if [ -n "${repl['all']}" ]; then |
| 213 | + unset 'repl[all]' |
| 214 | + echo "Option --replication_name incompatible with --replication_all, clearing replication_all" >&2 |
| 215 | + fi |
| 216 | + ;; |
| 217 | + esac |
| 218 | +} |
| 219 | + |
| 220 | +_test_exists() { |
| 221 | + declare -F "$1" |
| 222 | + return $? |
| 223 | +} |
| 224 | + |
| 225 | +# Marks the end of mysql -> mariadb name changes in 10.6+ |
| 226 | +#ENDOFSUBSTITUTIONS |
| 227 | +while [ $# -gt 0 ]; do |
| 228 | + case "$1" in |
| 229 | + --su=*) |
| 230 | + u="${1#*-}" |
| 231 | + shift |
| 232 | + exec gosu "${u}" "${BASH_SOURCE[0]}" "$@" |
| 233 | + ;; |
| 234 | + --su-mysql) |
| 235 | + shift |
| 236 | + exec gosu mysql "${BASH_SOURCE[0]}" "$@" |
| 237 | + ;; |
| 238 | + --replication_*=*) |
| 239 | + # Change the n to what is between _ and = and make lower case |
| 240 | + n=${1#*_} |
| 241 | + n=${n%%=*} |
| 242 | + n=${n,,*} |
| 243 | + # v is after the = |
| 244 | + v=${1#*=} |
| 245 | + repl[$n]=$v |
| 246 | + _repl_param_check "$n" |
| 247 | + ;; |
| 248 | + --replication_*) |
| 249 | + # Without =, look for a non --option next as the value, |
| 250 | + # otherwise treat it as an "enable", just equate to 1. |
| 251 | + # Clearing option is possible with "--replication_X=" |
| 252 | + n=${1#*_} |
| 253 | + n=${n,,*} |
| 254 | + if [ "${2:0:2}" == '--' ]; then |
| 255 | + repl[$n]=1 |
| 256 | + else |
| 257 | + repl[$n]=$2 |
| 258 | + shift |
| 259 | + fi |
| 260 | + _repl_param_check "$n" |
| 261 | + ;; |
| 262 | + --datadir=*) |
| 263 | + datadir=${1#*=} |
| 264 | + ;; |
| 265 | + --no-defaults) |
| 266 | + unset def |
| 267 | + nodefaults=1 |
| 268 | + ;; |
| 269 | + --defaults-file=*|--defaults-extra-file=*|--defaults-group-suffix=*) |
| 270 | + n=${1:11} # length --defaults- |
| 271 | + n=${n%%=*} |
| 272 | + n=${n//-/_} |
| 273 | + # v is after the = |
| 274 | + v=${1#*=} |
| 275 | + def[$n]=$v |
| 276 | + nodefaults= |
| 277 | + ;; |
| 278 | + --defaults-file|--defaults-extra-file|--defaults-group-suffix) |
| 279 | + n=${1:11} # length --defaults- |
| 280 | + n=${n//-/_} |
| 281 | + if [ "${2:0:2}" == '--' ]; then |
| 282 | + def[$n]="" |
| 283 | + else |
| 284 | + def[$n]=$2 |
| 285 | + shift |
| 286 | + fi |
| 287 | + nodefaults= |
| 288 | + ;; |
| 289 | + --*) |
| 290 | + test=${1#--} |
| 291 | + ;; |
| 292 | + *) |
| 293 | + echo "Unknown healthcheck option $1" >&2 |
| 294 | + exit 1 |
| 295 | + esac |
| 296 | + if [ -n "$test" ]; then |
| 297 | + if ! _test_exists "$test" ; then |
| 298 | + echo "healthcheck unknown test '$test'" >&2 |
| 299 | + exit 1 |
| 300 | + elif ! "$test"; then |
| 301 | + echo "healthcheck $test failed" >&2 |
| 302 | + exit 1 |
| 303 | + fi |
| 304 | + fi |
| 305 | + shift |
| 306 | +done |
0 commit comments