77
88class Xlog (Plugin ):
99 DEFAULT_CONFIG = {'lag_more_than_in_sec' : str (60 * 5 )}
10- query_wal_lsn_diff = " select pg_catalog.pg_wal_lsn_diff " \
10+
11+ # get amount of WAL since '0/00000000'
12+ query_wal_lsn_diff = " SELECT pg_catalog.pg_wal_lsn_diff " \
1113 "(pg_catalog.pg_current_wal_lsn(), '0/00000000');"
12- query_xlog_lsn_diff = "select pg_catalog.pg_xlog_location_diff " \
14+
15+ query_xlog_lsn_diff = "SELECT pg_catalog.pg_xlog_location_diff " \
1316 "(pg_catalog.pg_current_xlog_location(), '0/00000000');"
17+
18+ # get time of replication lag
1419 query_agent_replication_lag = "SELECT CASE WHEN extract(epoch from now()-pg_last_xact_replay_timestamp()) " \
1520 "IS NULL THEN 0 ELSE extract(epoch from now()-pg_last_xact_replay_timestamp()) END"
21+
22+ # get diff in lsn for replica (for version 10 and higher also write, flush and replay
23+
24+ query_wal_lag_lsn = "SELECT application_name, " \
25+ " flush_lag, replay_lag, write_lag, " \
26+ " pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) AS total_lag " \
27+ " FROM pg_stat_replication;"
28+ query_xlog_lag_lsn = "SELECT application_name, " \
29+ "pg_xlog_location_diff(pg_current_xlog_location(), replay_location) AS total_lag " \
30+ "FROM pg_stat_replication;"
31+
32+ # for discovery rule for name of each replica
33+ key_lsn_replication_discovery = "pgsql.replication.discovery{0}"
34+ key_total_lag = 'pgsql.replication.total_lag{0}'
35+ # for PG 10 and higher
36+ key_flush = 'pgsql.replication.flush_lag{0}'
37+ key_replay = 'pgsql.replication.replay_lag{0}'
38+ key_write = 'pgsql.replication.write_lag{0}'
39+
1640 key_wall = 'pgsql.wal.write{0}'
1741 key_count_wall = "pgsql.wal.count{0}"
1842 key_replication = "pgsql.replication_lag{0}"
@@ -27,12 +51,39 @@ def run(self, zbx):
2751 zbx .send ('pgsql.replication_lag[sec]' , float (lag [0 ][0 ]))
2852 else :
2953 Pooler .run_sql_type ('replication_lag_master_query' )
30- # xlog location
54+
3155 if Pooler .server_version_greater ('10.0' ):
3256 result = Pooler .query (self .query_wal_lsn_diff )
57+ result_lags = Pooler .query (self .query_wal_lag_lsn )
58+ if result_lags :
59+ lags = []
60+ for info in result_lags :
61+ lags .append ({'{#APPLICATION_NAME}' : info [0 ]})
62+ zbx .send ('pgsql.replication.flush_lag[{0}]' .format (
63+ info [0 ]), info [1 ])
64+ zbx .send ('pgsql.replication.replay_lag[{0}]' .format (
65+ info [0 ]), info [2 ])
66+ zbx .send ('pgsql.replication.write_lag[{0}]' .format (
67+ info [0 ]), info [3 ])
68+ zbx .send ('pgsql.replication.total_lag[{0}]' .format (
69+ info [0 ]), float (info [4 ]), self .DELTA_SPEED )
70+ zbx .send ('pgsql.replication.discovery[]' , zbx .json ({'data' : lags }))
71+ del lags
3372 else :
3473 result = Pooler .query (self .query_xlog_lsn_diff )
74+ result_lags = Pooler .query (self .query_xlog_lag_lsn )
75+ if result_lags :
76+ lags = []
77+ for info in result_lags :
78+ lags .append ({'{#APPLICATION_NAME}' : info [0 ]})
79+
80+ zbx .send ('pgsql.replication.total_lag[{0}]' .format (
81+ info [0 ]), float (info [1 ]), self .DELTA_SPEED )
82+ zbx .send ('pgsql.replication.discovery[]' , zbx .json ({'data' : lags }))
83+ del lags
84+
3585 zbx .send (self .key_wall .format ("[]" ), float (result [0 ][0 ]), self .DELTA_SPEED )
86+
3687 # count of xlog files
3788 if Pooler .server_version_greater ('10.0' ):
3889 result = Pooler .run_sql_type ('count_wal_files' )
@@ -89,6 +140,59 @@ def triggers(self, template):
89140 self .plugin_config ('lag_more_than_in_sec' )
90141 })
91142
143+ def discovery_rules (self , template ):
144+ rule = {
145+ 'name' : 'Replication lag discovery' ,
146+ 'key' : self .key_lsn_replication_discovery .format ('[{0}]' .format (self .Macros [self .Type ])),
147+
148+ }
149+ if Plugin .old_zabbix :
150+ conditions = []
151+ rule ['filter' ] = '{#APPLICATION_NAME}:.*'
152+ else :
153+ conditions = [
154+ {
155+ 'condition' : [
156+ {'macro' : '{#APPLICATION_NAME}' ,
157+ 'value' : '.*' ,
158+ 'operator' : None ,
159+ 'formulaid' : 'A' }
160+ ]
161+ }
162+
163+ ]
164+ items = [
165+ {'key' : self .right_type (self .key_flush , var_discovery = "{#APPLICATION_NAME}," ),
166+ 'name' : 'Time elapsed between flushing recent WAL locally and receiving notification that '
167+ 'this standby server {#APPLICATION_NAME} has written and flushed it' ,
168+ 'value_type' : Plugin .VALUE_TYPE .text ,
169+ 'delay' : self .plugin_config ('interval' )},
170+ {'key' : self .right_type (self .key_replay , var_discovery = "{#APPLICATION_NAME}," ),
171+ 'name' : 'Time elapsed between flushing recent WAL locally and receiving notification that '
172+ 'this standby server {#APPLICATION_NAME} has written, flushed and applied' ,
173+ 'value_type' : Plugin .VALUE_TYPE .text ,
174+ 'delay' : self .plugin_config ('interval' )},
175+ {'key' : self .right_type (self .key_write , var_discovery = "{#APPLICATION_NAME}," ),
176+ 'name' : 'Time elapsed between flushing recent WAL locally and receiving notification that '
177+ 'this standby server {#APPLICATION_NAME} has written it' ,
178+ 'value_type' : Plugin .VALUE_TYPE .text ,
179+ 'delay' : self .plugin_config ('interval' )},
180+ {'key' : self .right_type (self .key_total_lag , var_discovery = "{#APPLICATION_NAME}," ),
181+ 'name' : 'Delta of total lag for {#APPLICATION_NAME}' ,
182+ 'value_type' : Plugin .VALUE_TYPE .numeric_float ,
183+ 'delay' : self .plugin_config ('interval' )}
184+ ]
185+ graphs = [
186+ {
187+ 'name' : 'Delta of total lag for {#APPLICATION_NAME}' ,
188+ 'items' : [
189+ {'color' : 'CC0000' ,
190+ 'key' : self .right_type (self .key_total_lag , var_discovery = "{#APPLICATION_NAME}," )},
191+ ]
192+ }
193+ ]
194+ return template .discovery_rule (rule = rule , conditions = conditions , items = items , graphs = graphs )
195+
92196 def keys_and_queries (self , template_zabbix ):
93197 result = []
94198 if LooseVersion (self .VersionPG ) < LooseVersion ('10' ):
0 commit comments