Skip to content

Commit a75cb66

Browse files
author
Tim Vaillancourt
committed
moving to score-based secondary setup that is aware of hidden and priority flags
1 parent 8b9ca8c commit a75cb66

File tree

1 file changed

+53
-28
lines changed

1 file changed

+53
-28
lines changed

MongoBackup/ReplsetHandler.py

Lines changed: 53 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,55 +26,85 @@ def __init__(self, host, port, user, password, authdb, max_lag_secs, retries=5):
2626
def close(self):
2727
return self.connection.close()
2828

29-
def get_rs_status(self):
29+
def admin_command(self, admin_command):
3030
tries = 0
3131
status = None
3232
while not status and tries < self.retries:
3333
try:
34-
status = self.connection['admin'].command("replSetGetStatus")
34+
status = self.connection['admin'].command(admin_command)
3535
if not status:
3636
raise e
3737
except Exception, e:
38-
logging.error("Error running command 'replSetGetStatus': %s" % e)
38+
logging.error("Error running command '%s': %s" % (admin_command, e))
3939
tries += 1
4040
sleep(1)
4141
if not status:
42-
raise Exception, "Could not get output from command: 'replSetGetStatus' after %i retries!" % self.retries, None
42+
raise Exception, "Could not get output from command: '%s' after %i retries!" % (admin_command, self.retries), None
4343
return status
4444

45+
def get_rs_status(self):
46+
return self.admin_command('replSetGetStatus')
47+
48+
def get_rs_config(self):
49+
return self.admin_command('replSetGetConfig')
50+
4551
def find_desirable_secondary(self):
4652
rs_status = self.get_rs_status()
53+
rs_config = self.get_rs_config()
4754
rs_name = rs_status['set']
4855
quorum_count = ceil(len(rs_status['members']) / 2.0)
49-
secondary = None
50-
primary = None
56+
57+
primary = None
5158
for member in rs_status['members']:
52-
if 'health' in member and member['health'] > 0:
53-
logging.debug("Found %s: %s/%s with optime %s" % (
54-
member['stateStr'],
59+
if member['stateStr'] == 'PRIMARY' and member['health'] > 0:
60+
primary = {
61+
'host': member['name'],
62+
'optime': member['optimeDate']
63+
}
64+
logging.debug("Found PRIMARY: %s/%s with optime %s" % (
5565
rs_name,
5666
member['name'],
5767
str(member['optime']['ts'])
5868
))
69+
if primary is None:
70+
logging.fatal("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name)
71+
raise Exception, "Unable to locate a PRIMARY member for replset %s, giving up" % rs_name, None
5972

60-
if member['stateStr'] == 'PRIMARY':
61-
primary = {
62-
'host': member['name'],
63-
'optime': member['optimeDate']
64-
}
65-
elif member['stateStr'] == 'SECONDARY':
66-
if secondary is None or secondary['optime'] < member['optimeDate']:
73+
secondary = None
74+
for member in rs_status['members']:
75+
if member['stateStr'] == 'SECONDARY' and member['health'] > 0:
76+
log_data = {}
77+
score = 100
78+
79+
for member_config in rs_config['config']['members']:
80+
if member_config['host'] == member['name']:
81+
if 'hidden' in member_config and member_config['hidden'] == True:
82+
score += 20
83+
log_data['hidden'] = True
84+
if 'priority' in member_config:
85+
log_data['priority'] = member_config['priority']
86+
if member_config['priority'] > 1:
87+
score = score - member_config['priority']
88+
break
89+
90+
rep_lag = (mktime(primary['optime'].timetuple()) - mktime(member['optimeDate'].timetuple()))
91+
score = score - rep_lag
92+
if rep_lag < self.max_lag_secs:
93+
if secondary is None or score > secondary['score']:
6794
secondary = {
68-
'replSet': rs_status['set'],
95+
'replSet': rs_name,
6996
'count': 1 if secondary is None else secondary['count'] + 1,
7097
'host': member['name'],
71-
'optime': member['optimeDate']
98+
'optime': member['optimeDate'],
99+
'score': score
72100
}
101+
log_msg = "Found SECONDARY %s/%s" % (rs_name, member['name'])
102+
else:
103+
log_msg = "Found SECONDARY %s/%s with too-high replication lag! Skipping" % (rs_name, member['name'])
73104

74-
if primary is None:
75-
logging.fatal("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name)
76-
raise Exception, "Unable to locate a PRIMARY member for replset %s, giving up" % rs_name, None
77-
105+
log_data['optime'] = member['optime']['ts']
106+
log_data['score'] = score
107+
logging.debug("%s: %s" % (log_msg, str(log_data)))
78108
if secondary is None or (secondary['count'] + 1) < quorum_count:
79109
logging.fatal("Not enough secondaries in replset %s to take backup! Num replset members: %i, required quorum: %i" % (
80110
rs_name,
@@ -83,12 +113,7 @@ def find_desirable_secondary(self):
83113
))
84114
raise Exception, "Not enough secondaries in replset %s to safely take backup!" % rs_name, None
85115

86-
rep_lag = (mktime(primary['optime'].timetuple()) - mktime(secondary['optime'].timetuple()))
87-
if rep_lag > self.max_lag_secs:
88-
logging.fatal("No secondary found in replset %s within %s lag time!" % (rs_name, self.max_lag_secs))
89-
raise Exception, "No secondary found in replset %s within %s lag time!" % (rs_name, self.max_lag_secs), None
90-
91-
logging.debug("Choosing SECONDARY %s for replica set %s" % (secondary['host'], rs_name))
116+
logging.debug("Choosing SECONDARY %s for replica set %s (score: %i)" % (secondary['host'], rs_name, secondary['score']))
92117
return secondary
93118

94119

0 commit comments

Comments
 (0)