11import logging
22
33from math import ceil
4- from time import mktime , sleep
4+ from time import mktime
55
66from DB import DB
77from ShardingHandler import ShardingHandler
88
99
1010class ReplsetHandler :
11- def __init__ (self , host , port , user , password , authdb , max_lag_secs , retries = 5 ):
11+ def __init__ (self , host , port , user , password , authdb , max_lag_secs ):
1212 self .host = host
1313 self .port = port
1414 self .user = user
1515 self .password = password
1616 self .authdb = authdb
1717 self .max_lag_secs = max_lag_secs
18- self .retries = retries
1918
2019 try :
21- self .connection = DB (self .host , self .port , self .user , self .password , self .authdb ).connection ()
20+ self .db = DB (self .host , self .port , self .user , self .password , self .authdb )
21+ self .connection = self .db .connection ()
2222 except Exception , e :
2323 logging .fatal ("Could not get DB connection! Error: %s" % e )
2424 raise e
@@ -27,54 +27,85 @@ def close(self):
2727 return self .connection .close ()
2828
2929 def get_rs_status (self ):
30- tries = 0
31- status = None
32- while not status and tries < self . retries :
33- try :
34- status = self . connection [ 'admin' ]. command ( "replSetGetStatus" )
35- if not status :
36- raise e
37- except Exception , e :
38- logging . error ( "Error running command 'replSetGetStatus': %s" % e )
39- tries += 1
40- sleep ( 1 )
41- if not status :
42- raise Exception , "Could not get output from command: 'replSetGetStatus' after %i retries!" % self . retries , None
43- return status
30+ try :
31+ return self . db . admin_command ( 'replSetGetStatus' )
32+ except Exception , e :
33+ raise Exception , "Error getting replica set status! Error: %s" % e , None
34+
35+ def get_rs_config ( self ) :
36+ try :
37+ if self . db . server_version () > tuple ( "2.4.0" . split ( "." )) :
38+ output = self . db . admin_command ( 'replSetGetConfig' )
39+ return output [ 'config' ]
40+ else :
41+ return self . connection [ 'local' ]. system . replset . find_one ()
42+ except Exception , e :
43+ raise Exception , "Error getting replica set config! Error: %s" % e , None
4444
4545 def find_desirable_secondary (self ):
4646 rs_status = self .get_rs_status ()
47+ rs_config = self .get_rs_config ()
4748 rs_name = rs_status ['set' ]
4849 quorum_count = ceil (len (rs_status ['members' ]) / 2.0 )
49- secondary = None
50- primary = None
50+
51+ primary = None
5152 for member in rs_status ['members' ]:
52- if 'health' in member and member ['health' ] > 0 :
53- logging .debug ("Found %s: %s/%s with optime %s" % (
54- member ['stateStr' ],
53+ if member ['stateStr' ] == 'PRIMARY' and member ['health' ] > 0 :
54+ primary = {
55+ 'host' : member ['name' ],
56+ 'optime' : member ['optimeDate' ]
57+ }
58+ optime = member ['optime' ]
59+ if 'ts' in member ['optime' ]:
60+ optime = member ['optime' ]['ts' ]
61+ logging .debug ("Found PRIMARY: %s/%s with optime %s" % (
5562 rs_name ,
5663 member ['name' ],
57- str (member [ ' optime' ][ 'ts' ] )
64+ str (optime )
5865 ))
66+ if primary is None :
67+ logging .fatal ("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name )
68+ raise Exception , "Unable to locate a PRIMARY member for replset %s, giving up" % rs_name , None
5969
60- if member ['stateStr' ] == 'PRIMARY' :
61- primary = {
62- 'host' : member ['name' ],
63- 'optime' : member ['optimeDate' ]
64- }
65- elif member ['stateStr' ] == 'SECONDARY' :
66- if secondary is None or secondary ['optime' ] < member ['optimeDate' ]:
70+ secondary = None
71+ for member in rs_status ['members' ]:
72+ if member ['stateStr' ] == 'SECONDARY' and member ['health' ] > 0 :
73+ score = self .max_lag_secs * 10
74+ score_scale = 100 / score
75+ log_data = {}
76+
77+ hidden_weight = 0.20
78+ for member_config in rs_config ['members' ]:
79+ if member_config ['host' ] == member ['name' ]:
80+ if 'hidden' in member_config and member_config ['hidden' ] == True :
81+ score += (score * hidden_weight )
82+ log_data ['hidden' ] = True
83+ if 'priority' in member_config :
84+ log_data ['priority' ] = int (member_config ['priority' ])
85+ if member_config ['priority' ] > 0 :
86+ score = score - member_config ['priority' ]
87+ break
88+
89+ rep_lag = (mktime (primary ['optime' ].timetuple ()) - mktime (member ['optimeDate' ].timetuple ()))
90+ score = ceil ((score - rep_lag ) * score_scale )
91+ if rep_lag < self .max_lag_secs :
92+ if secondary is None or score > secondary ['score' ]:
6793 secondary = {
68- 'replSet' : rs_status [ 'set' ] ,
94+ 'replSet' : rs_name ,
6995 'count' : 1 if secondary is None else secondary ['count' ] + 1 ,
7096 'host' : member ['name' ],
71- 'optime' : member ['optimeDate' ]
97+ 'optime' : member ['optimeDate' ],
98+ 'score' : score
7299 }
73-
74- if primary is None :
75- logging .fatal ("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name )
76- raise Exception , "Unable to locate a PRIMARY member for replset %s, giving up" % rs_name , None
77-
100+ log_msg = "Found SECONDARY %s/%s" % (rs_name , member ['name' ])
101+ else :
102+ log_msg = "Found SECONDARY %s/%s with too-high replication lag! Skipping" % (rs_name , member ['name' ])
103+
104+ log_data ['optime' ] = member ['optime' ]
105+ if 'ts' in member ['optime' ]:
106+ log_data ['optime' ] = member ['optime' ]['ts' ]
107+ log_data ['score' ] = int (score )
108+ logging .debug ("%s: %s" % (log_msg , str (log_data )))
78109 if secondary is None or (secondary ['count' ] + 1 ) < quorum_count :
79110 logging .fatal ("Not enough secondaries in replset %s to take backup! Num replset members: %i, required quorum: %i" % (
80111 rs_name ,
@@ -83,12 +114,7 @@ def find_desirable_secondary(self):
83114 ))
84115 raise Exception , "Not enough secondaries in replset %s to safely take backup!" % rs_name , None
85116
86- rep_lag = (mktime (primary ['optime' ].timetuple ()) - mktime (secondary ['optime' ].timetuple ()))
87- if rep_lag > self .max_lag_secs :
88- logging .fatal ("No secondary found in replset %s within %s lag time!" % (rs_name , self .max_lag_secs ))
89- raise Exception , "No secondary found in replset %s within %s lag time!" % (rs_name , self .max_lag_secs ), None
90-
91- logging .debug ("Choosing SECONDARY %s for replica set %s" % (secondary ['host' ], rs_name ))
117+ logging .debug ("Choosing SECONDARY %s for replica set %s (score: %i)" % (secondary ['host' ], rs_name , secondary ['score' ]))
92118 return secondary
93119
94120
0 commit comments