@@ -26,55 +26,85 @@ def __init__(self, host, port, user, password, authdb, max_lag_secs, retries=5):
2626 def close (self ):
2727 return self .connection .close ()
2828
29- def get_rs_status (self ):
29+ def admin_command (self , admin_command ):
3030 tries = 0
3131 status = None
3232 while not status and tries < self .retries :
3333 try :
34- status = self .connection ['admin' ].command ("replSetGetStatus" )
34+ status = self .connection ['admin' ].command (admin_command )
3535 if not status :
3636 raise e
3737 except Exception , e :
38- logging .error ("Error running command 'replSetGetStatus ': %s" % e )
38+ logging .error ("Error running command '%s ': %s" % ( admin_command , e ) )
3939 tries += 1
4040 sleep (1 )
4141 if not status :
42- raise Exception , "Could not get output from command: 'replSetGetStatus ' after %i retries!" % self .retries , None
42+ raise Exception , "Could not get output from command: '%s ' after %i retries!" % ( admin_command , self .retries ) , None
4343 return status
4444
45+ def get_rs_status (self ):
46+ return self .admin_command ('replSetGetStatus' )
47+
48+ def get_rs_config (self ):
49+ return self .admin_command ('replSetGetConfig' )
50+
4551 def find_desirable_secondary (self ):
4652 rs_status = self .get_rs_status ()
53+ rs_config = self .get_rs_config ()
4754 rs_name = rs_status ['set' ]
4855 quorum_count = ceil (len (rs_status ['members' ]) / 2.0 )
49- secondary = None
50- primary = None
56+
57+ primary = None
5158 for member in rs_status ['members' ]:
52- if 'health' in member and member ['health' ] > 0 :
53- logging .debug ("Found %s: %s/%s with optime %s" % (
54- member ['stateStr' ],
59+ if member ['stateStr' ] == 'PRIMARY' and member ['health' ] > 0 :
60+ primary = {
61+ 'host' : member ['name' ],
62+ 'optime' : member ['optimeDate' ]
63+ }
64+ logging .debug ("Found PRIMARY: %s/%s with optime %s" % (
5565 rs_name ,
5666 member ['name' ],
5767 str (member ['optime' ]['ts' ])
5868 ))
69+ if primary is None :
70+ logging .fatal ("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name )
71+ raise Exception , "Unable to locate a PRIMARY member for replset %s, giving up" % rs_name , None
5972
60- if member ['stateStr' ] == 'PRIMARY' :
61- primary = {
62- 'host' : member ['name' ],
63- 'optime' : member ['optimeDate' ]
64- }
65- elif member ['stateStr' ] == 'SECONDARY' :
66- if secondary is None or secondary ['optime' ] < member ['optimeDate' ]:
73+ secondary = None
74+ for member in rs_status ['members' ]:
75+ if member ['stateStr' ] == 'SECONDARY' and member ['health' ] > 0 :
76+ log_data = {}
77+ score = 100
78+
79+ for member_config in rs_config ['config' ]['members' ]:
80+ if member_config ['host' ] == member ['name' ]:
81+ if 'hidden' in member_config and member_config ['hidden' ] == True :
82+ score += 20
83+ log_data ['hidden' ] = True
84+ if 'priority' in member_config :
85+ log_data ['priority' ] = member_config ['priority' ]
86+ if member_config ['priority' ] > 1 :
87+ score = score - member_config ['priority' ]
88+ break
89+
90+ rep_lag = (mktime (primary ['optime' ].timetuple ()) - mktime (member ['optimeDate' ].timetuple ()))
91+ score = score - rep_lag
92+ if rep_lag < self .max_lag_secs :
93+ if secondary is None or score > secondary ['score' ]:
6794 secondary = {
68- 'replSet' : rs_status [ 'set' ] ,
95+ 'replSet' : rs_name ,
6996 'count' : 1 if secondary is None else secondary ['count' ] + 1 ,
7097 'host' : member ['name' ],
71- 'optime' : member ['optimeDate' ]
98+ 'optime' : member ['optimeDate' ],
99+ 'score' : score
72100 }
101+ log_msg = "Found SECONDARY %s/%s" % (rs_name , member ['name' ])
102+ else :
103+ log_msg = "Found SECONDARY %s/%s with too-high replication lag! Skipping" % (rs_name , member ['name' ])
73104
74- if primary is None :
75- logging .fatal ("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name )
76- raise Exception , "Unable to locate a PRIMARY member for replset %s, giving up" % rs_name , None
77-
105+ log_data ['optime' ] = member ['optime' ]['ts' ]
106+ log_data ['score' ] = score
107+ logging .debug ("%s: %s" % (log_msg , str (log_data )))
78108 if secondary is None or (secondary ['count' ] + 1 ) < quorum_count :
79109 logging .fatal ("Not enough secondaries in replset %s to take backup! Num replset members: %i, required quorum: %i" % (
80110 rs_name ,
@@ -83,12 +113,7 @@ def find_desirable_secondary(self):
83113 ))
84114 raise Exception , "Not enough secondaries in replset %s to safely take backup!" % rs_name , None
85115
86- rep_lag = (mktime (primary ['optime' ].timetuple ()) - mktime (secondary ['optime' ].timetuple ()))
87- if rep_lag > self .max_lag_secs :
88- logging .fatal ("No secondary found in replset %s within %s lag time!" % (rs_name , self .max_lag_secs ))
89- raise Exception , "No secondary found in replset %s within %s lag time!" % (rs_name , self .max_lag_secs ), None
90-
91- logging .debug ("Choosing SECONDARY %s for replica set %s" % (secondary ['host' ], rs_name ))
116+ logging .debug ("Choosing SECONDARY %s for replica set %s (score: %i)" % (secondary ['host' ], rs_name , secondary ['score' ]))
92117 return secondary
93118
94119
0 commit comments