@@ -94,18 +94,18 @@ class CheckMysqlReplicationStatus < Sensu::Plugin::Check::CLI
9494 # #YELLOW
9595 proc : lambda { |s | s . to_i } # rubocop:disable Lambda
9696
97+ option :lag_outlier_retry ,
98+ long : '--lag-outlier-retry=VALUE' ,
99+ description : 'Number of retries when lag outlier is detected (0 = disable)' ,
100+ default : 0 ,
101+ proc : lambda { |s | s . to_i } # rubocop:disable Lambda
102+
97103 option :lag_outlier_threshold ,
98104 long : '--lag-outlier-threshold=VALUE' ,
99105 description : 'Lag threshold to trigger outlier protection' ,
100106 default : 100000 ,
101107 proc : lambda { |s | s . to_i } # rubocop:disable Lambda
102108
103- option :lag_outlier_retry ,
104- long : '--lag-outlier-retry=VALUE' ,
105- description : 'Number of retries when lag outlier protection is triggered' ,
106- default : 0 ,
107- proc : lambda { |s | s . to_i } # rubocop:disable Lambda
108-
109109 option :lag_outlier_sleep ,
110110 long : '--lag-outlier-sleep=VALUE' ,
111111 description : 'Sleep between lag outlier protection retries' ,
@@ -197,6 +197,8 @@ def run
197197 db = open_connection
198198
199199 retries = config [ :lag_outlier_retry ]
200+ unknown "Invalid value for --lag-outlier-retry" if retries < 0
201+
200202 while retries >= 0
201203 row = query_slave_status ( db )
202204 ok 'show slave status was nil. This server is not a slave.' if row . nil?
@@ -207,18 +209,18 @@ def run
207209
208210 replication_delay = row [ 'Seconds_Behind_Master' ] . to_i
209211 retries -= 1
210- if replication_delay >= config [ :lag_outlier_threshold ] && retries >= 0
211- sleep config [ :lag_outlier_sleep ]
212- next
213- end
214-
215- message = "replication delayed by #{ replication_delay } "
216- # TODO (breaking change): Thresholds are exclusive which is not consistent with all other checks
217- critical message if replication_delay > config [ :crit ]
218- warning message if replication_delay > config [ :warn ]
219- ok "#{ ok_slave_message } , #{ message } "
212+
213+ break if retries < 0 || replication_delay < config [ :lag_outlier_threshold ]
214+
215+ # Outlier detected - wait and retry
216+ sleep config [ :lag_outlier_sleep ]
220217 end
221- unknown "unable to retrieve slave status"
218+
219+ message = "replication delayed by #{ replication_delay } "
220+ # TODO (breaking change): Thresholds are exclusive which is not consistent with all other checks
221+ critical message if replication_delay > config [ :crit ]
222+ warning message if replication_delay > config [ :warn ]
223+ ok "#{ ok_slave_message } , #{ message } "
222224 rescue Mysql ::Error => e
223225 errstr = "Error code: #{ e . errno } Error message: #{ e . error } "
224226 critical "#{ errstr } SQLSTATE: #{ e . sqlstate } " if e . respond_to? ( 'sqlstate' )
0 commit comments