diff --git a/cluster/cluster_fail.go b/cluster/cluster_fail.go index b371e9ab2..c68b467bf 100644 --- a/cluster/cluster_fail.go +++ b/cluster/cluster_fail.go @@ -217,10 +217,10 @@ func (cluster *Cluster) MasterFailover(fail bool) bool { cluster.LogSQL(logs, err, cluster.master.URL, "MasterFailover", config.LvlInfo, "Flush Log on new Master %d", ctbinlog) } time.Sleep(2 * time.Second) - ms, logs, err := dbhelper.GetMasterStatus(cluster.master.Conn, cluster.master.DBVersion) - cluster.master.FailoverMasterLogFile = ms.File + sms, logs, err := dbhelper.GetMasterStatus(cluster.master.Conn, cluster.master.DBVersion) + cluster.master.FailoverMasterLogFile = sms.File cluster.master.FailoverMasterLogPos = "4" - crash.FailoverMasterLogFile = ms.File + crash.FailoverMasterLogFile = sms.File crash.FailoverMasterLogPos = "4" cluster.LogSQL(logs, err, cluster.master.URL, "MasterFailover", config.LvlInfo, "Backing up master pos %s %s", crash.FailoverMasterLogFile, crash.FailoverMasterLogPos) @@ -312,6 +312,7 @@ func (cluster *Cluster) MasterFailover(fail bool) bool { changemasteropt.Logpos = cluster.master.BinaryLogPos changemasteropt.Retry = strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry) changemasteropt.Heartbeat = strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime) + changemasteropt.RetryCount = strconv.Itoa(cluster.Conf.MasterRetryCount) changemasteropt.SSL = cluster.Conf.ReplicationSSL changemasteropt.Channel = cluster.Conf.MasterConn changemasteropt.IsDelayed = cluster.oldMaster.IsDelayed @@ -418,6 +419,45 @@ func (cluster *Cluster) MasterFailover(fail bool) bool { return true } +func (cluster *Cluster) pointSlaveToMasterWithMode(sl *ServerMonitor, mode string) (string, error) { + changemasteropt := cluster.GetChangeMasterBaseOptForSlave(sl, cluster.master, sl.IsDelayed) + changemasteropt.Mode = mode + + return dbhelper.ChangeMaster(sl.Conn, changemasteropt, sl.DBVersion) +} + +func (cluster *Cluster) pointSlaveToMasterPositional(sl *ServerMonitor) (string, error) { + changemasteropt := cluster.GetChangeMasterBaseOptForSlave(sl, cluster.master, sl.IsDelayed) + + pseudoGTID, logs, err := sl.GetLastPseudoGTID() + cluster.LogSQL(logs, err, sl.URL, "MasterFailover", config.LvlErr, "Could not get pseudoGTID on slave %s, %s", sl.URL, err) + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found pseudoGTID %s", pseudoGTID) + slFile, slPos, logs, err := sl.GetBinlogPosFromPseudoGTID(pseudoGTID) + cluster.LogSQL(logs, err, sl.URL, "MasterFailover", config.LvlErr, "Could not find pseudoGTID in slave %s, %s", sl.URL, err) + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found Coordinates on slave %s, %s", slFile, slPos) + slSkip, logs, err := sl.GetNumberOfEventsAfterPos(slFile, slPos) + cluster.LogSQL(logs, err, sl.URL, "MasterFailover", config.LvlErr, "Could not find number of events after pseudoGTID in slave %s, %s", sl.URL, err) + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found %d events to skip after coordinates on slave %s,%s", slSkip, slFile, slPos) + + mFile, mPos, logs, err := cluster.master.GetBinlogPosFromPseudoGTID(pseudoGTID) + cluster.LogSQL(logs, err, cluster.master.URL, "MasterFailover", config.LvlErr, "Could not find pseudoGTID in master %s, %s", cluster.master.URL, err) + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found coordinate on master %s ,%s", mFile, mPos) + mFile, mPos, logs, err = cluster.master.GetBinlogPosAfterSkipNumberOfEvents(mFile, mPos, slSkip) + cluster.LogSQL(logs, err, cluster.master.URL, "MasterFailover", config.LvlErr, "Could not skip event after pseudoGTID in master %s, %s", cluster.master.URL, err) + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found skip coordinate on master %s, %s", mFile, mPos) + + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Doing Positional switch of slave %s", sl.URL) + changemasteropt.Logfile = mFile + changemasteropt.Logpos = mPos + changemasteropt.Mode = "POSITIONAL" + + return dbhelper.ChangeMaster(sl.Conn, changemasteropt, sl.DBVersion) +} + +func (cluster *Cluster) pointSlaveToMasterMxsNoGtid(sl *ServerMonitor) (string, error) { + return dbhelper.ChangeMaster(sl.Conn, cluster.GetChangeMasterBaseOptForMxs(sl, cluster.master), sl.DBVersion) +} + func (cluster *Cluster) SwitchSlavesToMaster(fail bool) { var err error var logs string @@ -446,112 +486,26 @@ func (cluster *Cluster) SwitchSlavesToMaster(fail bool) { var changeMasterErr error - var changemasteropt dbhelper.ChangeMasterOpt - changemasteropt.Host = cluster.master.Host - changemasteropt.Port = cluster.master.Port - changemasteropt.User = cluster.GetRplUser() - changemasteropt.Password = cluster.GetRplPass() - changemasteropt.Retry = strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry) - changemasteropt.Heartbeat = strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime) - changemasteropt.SSL = cluster.Conf.ReplicationSSL - changemasteropt.Channel = cluster.Conf.MasterConn - changemasteropt.IsDelayed = sl.IsDelayed - changemasteropt.Delay = strconv.Itoa(sl.ClusterGroup.Conf.HostsDelayedTime) - changemasteropt.PostgressDB = cluster.master.PostgressDB - // Not MariaDB and not using MySQL GTID, 2.0 stop doing any thing until pseudo GTID if sl.HasMariaDBGTID() == false && cluster.master.HasMySQLGTID() == false { - if cluster.Conf.AutorejoinSlavePositionalHeartbeat == true { - - pseudoGTID, logs, err := sl.GetLastPseudoGTID() - cluster.LogSQL(logs, err, sl.URL, "MasterFailover", config.LvlErr, "Could not get pseudoGTID on slave %s, %s", sl.URL, err) - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found pseudoGTID %s", pseudoGTID) - slFile, slPos, logs, err := sl.GetBinlogPosFromPseudoGTID(pseudoGTID) - cluster.LogSQL(logs, err, sl.URL, "MasterFailover", config.LvlErr, "Could not find pseudoGTID in slave %s, %s", sl.URL, err) - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found Coordinates on slave %s, %s", slFile, slPos) - slSkip, logs, err := sl.GetNumberOfEventsAfterPos(slFile, slPos) - cluster.LogSQL(logs, err, sl.URL, "MasterFailover", config.LvlErr, "Could not find number of events after pseudoGTID in slave %s, %s", sl.URL, err) - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found %d events to skip after coordinates on slave %s,%s", slSkip, slFile, slPos) - - mFile, mPos, logs, err := cluster.master.GetBinlogPosFromPseudoGTID(pseudoGTID) - cluster.LogSQL(logs, err, cluster.master.URL, "MasterFailover", config.LvlErr, "Could not find pseudoGTID in master %s, %s", cluster.master.URL, err) - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found coordinate on master %s ,%s", mFile, mPos) - mFile, mPos, logs, err = cluster.master.GetBinlogPosAfterSkipNumberOfEvents(mFile, mPos, slSkip) - cluster.LogSQL(logs, err, cluster.master.URL, "MasterFailover", config.LvlErr, "Could not skip event after pseudoGTID in master %s, %s", cluster.master.URL, err) - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Found skip coordinate on master %s, %s", mFile, mPos) - - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Doing Positional switch of slave %s", sl.URL) - changemasteropt.Logfile = mFile - changemasteropt.Logpos = mPos - changemasteropt.Mode = "POSITIONAL" - logs, changeMasterErr = dbhelper.ChangeMaster(sl.Conn, changemasteropt, sl.DBVersion) + logs, changeMasterErr = cluster.pointSlaveToMasterPositional(sl) } else { sl.SetMaintenance() } // do nothing stay connected to dead master proceed with relay fix later } else if cluster.oldMaster.DBVersion.IsMySQLOrPerconaGreater57() && cluster.master.HasMySQLGTID() == true { - logs, changeMasterErr = dbhelper.ChangeMaster(sl.Conn, dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "MASTER_AUTO_POSITION", - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - IsDelayed: sl.IsDelayed, - Delay: strconv.Itoa(sl.ClusterGroup.Conf.HostsDelayedTime), - PostgressDB: cluster.master.PostgressDB, - }, sl.DBVersion) + logs, changeMasterErr = cluster.pointSlaveToMasterWithMode(sl, "MASTER_AUTO_POSITION") } else if cluster.Conf.MxsBinlogOn == false { - //MariaDB all cases use GTID - - logs, changeMasterErr = dbhelper.ChangeMaster(sl.Conn, dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "SLAVE_POS", - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - IsDelayed: sl.IsDelayed, - Delay: strconv.Itoa(sl.ClusterGroup.Conf.HostsDelayedTime), - PostgressDB: cluster.master.PostgressDB, - }, sl.DBVersion) + logs, changeMasterErr = cluster.pointSlaveToMasterWithMode(sl, "SLAVE_POS") } else { // We deduct we are in maxscale binlog server , but can have support for GTID or not cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Pointing relay to the new master: %s:%s", cluster.master.Host, cluster.master.Port) if sl.MxsHaveGtid { - logs, changeMasterErr = dbhelper.ChangeMaster(sl.Conn, dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "SLAVE_POS", - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - IsDelayed: sl.IsDelayed, - Delay: strconv.Itoa(sl.ClusterGroup.Conf.HostsDelayedTime), - PostgressDB: cluster.master.PostgressDB, - }, sl.DBVersion) + logs, changeMasterErr = cluster.pointSlaveToMasterWithMode(sl, "SLAVE_POS") } else { - logs, changeMasterErr = dbhelper.ChangeMaster(sl.Conn, dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "MXS", - SSL: cluster.Conf.ReplicationSSL, - }, sl.DBVersion) + logs, changeMasterErr = cluster.pointSlaveToMasterMxsNoGtid(sl) } } cluster.LogSQL(logs, changeMasterErr, sl.URL, "MasterFailover", config.LvlErr, "Change master failed on slave %s, %s", sl.URL, changeMasterErr) @@ -623,6 +577,7 @@ func (cluster *Cluster) FailoverExtraMultiSource(oldMaster *ServerMonitor, NewMa changemasteropt.Password = myparentrplpassword changemasteropt.Retry = strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry) changemasteropt.Heartbeat = strconv.Itoa(int(rep.SlaveHeartbeatPeriod)) + changemasteropt.RetryCount = strconv.Itoa(cluster.Conf.MasterRetryCount) changemasteropt.Logfile = rep.MasterLogFile.String changemasteropt.Logpos = rep.ExecMasterLogPos.String changemasteropt.SSL = cluster.Conf.ReplicationSSL @@ -1481,36 +1436,31 @@ func (cluster *Cluster) CloseRing(oldMaster *ServerMonitor) error { hasMyGTID := parent.HasMySQLGTID() var changeMasterErr error + changeMasterOpt := dbhelper.ChangeMasterOpt{ + Host: parent.Host, + Port: parent.Port, + User: cluster.GetRplUser(), + Password: cluster.GetRplPass(), + Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), + Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), + RetryCount: strconv.Itoa(cluster.Conf.MasterRetryCount), + SSL: cluster.Conf.ReplicationSSL, + Channel: cluster.Conf.MasterConn, + PostgressDB: parent.PostgressDB, + } // Not MariaDB and not using MySQL GTID, 2.0 stop doing any thing until pseudo GTID - if parent.DBVersion.IsMySQLOrPerconaGreater57() && hasMyGTID == true { - logs, changeMasterErr = dbhelper.ChangeMaster(child.Conn, dbhelper.ChangeMasterOpt{ - Host: parent.Host, - Port: parent.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "", - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - PostgressDB: parent.PostgressDB, - }, child.DBVersion) + if parent.DBVersion.IsMySQLOrPerconaGreater57() { + if hasMyGTID { + changeMasterOpt.Mode = "MASTER_AUTO_POSITION" + } else { + changeMasterOpt.Mode = "" + } + logs, changeMasterErr = dbhelper.ChangeMaster(child.Conn, changeMasterOpt, child.DBVersion) } else { //MariaDB all cases use GTID - - logs, changeMasterErr = dbhelper.ChangeMaster(child.Conn, dbhelper.ChangeMasterOpt{ - Host: parent.Host, - Port: parent.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "SLAVE_POS", - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - PostgressDB: parent.PostgressDB, - }, child.DBVersion) + changeMasterOpt.Mode = "SLAVE_POS" + logs, changeMasterErr = dbhelper.ChangeMaster(child.Conn, changeMasterOpt, child.DBVersion) } cluster.LogSQL(logs, changeMasterErr, child.URL, "MasterFailover", config.LvlErr, "Could not change masteron server %s, %s", child.URL, changeMasterErr) diff --git a/cluster/cluster_get.go b/cluster/cluster_get.go index 70df7f85d..898db4593 100644 --- a/cluster/cluster_get.go +++ b/cluster/cluster_get.go @@ -1770,3 +1770,56 @@ func (cluster *Cluster) GetDeprecatedKeyMap() map[string]bool { } return keys } + +func (cluster *Cluster) GetChangeMasterBaseOptForSlave(sl *ServerMonitor, master *ServerMonitor, delayed bool) dbhelper.ChangeMasterOpt { + opt := dbhelper.ChangeMasterOpt{ + Host: master.Host, + Port: master.Port, + User: cluster.GetRplUser(), + Password: cluster.GetRplPass(), + Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), + Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), + RetryCount: strconv.Itoa(cluster.Conf.MasterRetryCount), + SSL: cluster.Conf.ReplicationSSL, + Channel: cluster.Conf.MasterConn, + IsDelayed: delayed, + PostgressDB: master.PostgressDB, + } + + if delayed { + opt.Delay = strconv.Itoa(cluster.Conf.HostsDelayedTime) + } + + return opt +} + +func (cluster *Cluster) GetChangeMasterBaseOptForReplGroup(sl *ServerMonitor) dbhelper.ChangeMasterOpt { + opt := dbhelper.ChangeMasterOpt{ + User: cluster.GetRplUser(), + Password: cluster.GetRplPass(), + Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), + Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), + RetryCount: strconv.Itoa(cluster.Conf.MasterRetryCount), + Mode: "GROUP_REPL", + Channel: "group_replication_recovery", + IsDelayed: sl.IsDelayed, + Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), + SSL: cluster.Conf.ReplicationSSL, + PostgressDB: sl.PostgressDB, + } + + return opt +} + +func (cluster *Cluster) GetChangeMasterBaseOptForMxs(sl *ServerMonitor, master *ServerMonitor) dbhelper.ChangeMasterOpt { + return dbhelper.ChangeMasterOpt{ + Host: master.Host, + Port: master.Port, + User: cluster.GetRplUser(), + Password: cluster.GetRplPass(), + Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), + Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), + Mode: "MXS", + SSL: cluster.Conf.ReplicationSSL, + } +} diff --git a/cluster/cluster_staging.go b/cluster/cluster_staging.go index 36313c6cd..8ff1eafec 100644 --- a/cluster/cluster_staging.go +++ b/cluster/cluster_staging.go @@ -5,7 +5,6 @@ import ( "fmt" "os" "os/exec" - "strconv" "strings" "sync" "time" @@ -467,17 +466,8 @@ func (cluster *Cluster) ReseedFromParentCluster(parent *Cluster, target *ServerM cluster.LogSQL(logs, err, target.URL, "Rejoin", config.LvlErr, "Failed stop slave on server: %s %s", target.URL, err) } - changeOpt := dbhelper.ChangeMasterOpt{ - Host: pmaster.Host, - Port: pmaster.Port, - User: parent.GetRplUser(), - Password: parent.GetRplPass(), - Retry: strconv.Itoa(parent.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(parent.Conf.ForceSlaveHeartbeatTime), - Mode: "SLAVE_POS", - SSL: parent.Conf.ReplicationSSL, - Channel: parent.Conf.MasterConn, - } + changeOpt := parent.GetChangeMasterBaseOptForSlave(target, pmaster, false) + changeOpt.Mode = "SLAVE_POS" if target.DBVersion.IsMySQLOrPercona() { if target.HasMySQLGTID() { @@ -530,28 +520,23 @@ func (cluster *Cluster) ReseedFromParentCluster(parent *Cluster, target *ServerM cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModTask, config.LvlWarn, "Task only updated in runtime. Error while writing to jobs table: %s", e2.Error()) } cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModTask, config.LvlErr, "Reseed logical backup %s from parent cluster failed on %s", backtype, target.URL) + return "", err + } - } else { - if e2 := target.JobsUpdateState(task, "Reseed completed", 3, 1); e2 != nil { - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModTask, config.LvlWarn, "Task only updated in runtime. Error while writing to jobs table: %s", e2.Error()) - } + if e2 := target.JobsUpdateState(task, "Reseed completed", 3, 1); e2 != nil { + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModTask, config.LvlWarn, "Task only updated in runtime. Error while writing to jobs table: %s", e2.Error()) + } - if target.IsMaster() { - _, err2 := target.StartSlaveChannel(parent.Conf.MasterConn) - if err2 != nil { - cluster.LogSQL(logs, err, target.URL, "Rejoin", config.LvlErr, "Failed start slave on server: %s %s", target.URL, err) - } else { - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModTask, config.LvlInfo, "Start slave on %s", target.URL) - } + if target.IsMaster() { + _, err2 := target.StartSlaveChannel(parent.Conf.MasterConn) + if err2 != nil { + cluster.LogSQL(logs, err, target.URL, "Rejoin", config.LvlErr, "Failed start slave on server: %s %s", target.URL, err) + } else { + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModTask, config.LvlInfo, "Start slave on %s", target.URL) } - - cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModTask, config.LvlInfo, "Reseed logical backup %s from parent cluster completed on %s", backtype, target.URL) - } - if err != nil { - return "", err - } + cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModTask, config.LvlInfo, "Reseed logical backup %s from parent cluster completed on %s", backtype, target.URL) return masterCurrentGTID, nil } diff --git a/cluster/srv.go b/cluster/srv.go index 2b81e748e..6208d9d8c 100644 --- a/cluster/srv.go +++ b/cluster/srv.go @@ -1832,20 +1832,7 @@ func (server *ServerMonitor) ChangeMasterTo(master *ServerMonitor, master_use_gi hasMyGTID := server.HasMySQLGTID() if cluster.Conf.MultiMasterGrouprep { //MySQL group replication - logs, err = dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: "", - Port: "", - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "GROUP_REPL", - Channel: "group_replication_recovery", - IsDelayed: server.IsDelayed, - Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), - SSL: cluster.Conf.ReplicationSSL, - PostgressDB: server.PostgressDB, - }, server.DBVersion) + logs, err = dbhelper.ChangeMaster(server.Conn, cluster.GetChangeMasterBaseOptForReplGroup(server), server.DBVersion) cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Group Replication bootstrapped for", server.URL) } else if cluster.Conf.ForceSlaveNoGtid == false && server.DBVersion.IsMariaDB() && server.DBVersion.Major >= 10 { //mariadb using GTID @@ -1854,59 +1841,24 @@ func (server *ServerMonitor) ChangeMasterTo(master *ServerMonitor, master_use_gi if err != nil { return err } - logs, err = dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: master.Host, - Port: master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: master_use_gitd, - Channel: cluster.Conf.MasterConn, - IsDelayed: server.IsDelayed, - Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), - SSL: cluster.Conf.ReplicationSSL, - PostgressDB: server.PostgressDB, - }, server.DBVersion) + changemasteropt := cluster.GetChangeMasterBaseOptForSlave(server, master, server.IsDelayed) + changemasteropt.Mode = master_use_gitd + logs, err = dbhelper.ChangeMaster(server.Conn, changemasteropt, server.DBVersion) cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Replication bootstrapped on %s with %s as master", server.URL, master.URL) } else if hasMyGTID && cluster.Conf.ForceSlaveNoGtid == false { // MySQL GTID - logs, err = dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: master.Host, - Port: master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "MASTER_AUTO_POSITION", - IsDelayed: server.IsDelayed, - Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - PostgressDB: server.PostgressDB, - }, server.DBVersion) + changemasteropt := cluster.GetChangeMasterBaseOptForSlave(server, master, server.IsDelayed) + changemasteropt.Mode = "MASTER_AUTO_POSITION" + logs, err = dbhelper.ChangeMaster(server.Conn, changemasteropt, server.DBVersion) cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Replication bootstrapped on %s with MySQL GTID replication style and %s as master", server.URL, master.URL) } else { - // Old Style file pos as default - logs, err = dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: master.Host, - Port: master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "POSITIONAL", - Logfile: master.BinaryLogFile, - Logpos: master.BinaryLogPos, - Channel: cluster.Conf.MasterConn, - IsDelayed: server.IsDelayed, - Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), - SSL: cluster.Conf.ReplicationSSL, - PostgressDB: server.PostgressDB, - }, server.DBVersion) + changemasteropt := cluster.GetChangeMasterBaseOptForSlave(server, master, server.IsDelayed) + changemasteropt.Mode = "POSITIONAL" + changemasteropt.Logfile = master.BinaryLogFile + changemasteropt.Logpos = master.BinaryLogPos + logs, err = dbhelper.ChangeMaster(server.Conn, changemasteropt, server.DBVersion) cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Replication bootstrapped on %s with old replication style and %s as master", server.URL, master.URL) - } if err != nil { cluster.LogSQL(logs, err, server.URL, "BootstrapReplication", config.LvlErr, "Replication can't be bootstrap for server %s with %s as master: %s ", server.URL, master.URL, err) diff --git a/cluster/srv_chk.go b/cluster/srv_chk.go index 508bc042d..3077156ea 100644 --- a/cluster/srv_chk.go +++ b/cluster/srv_chk.go @@ -129,6 +129,68 @@ func (server *ServerMonitor) CheckDisks() { } } +func (server *ServerMonitor) checkStoppedReplication(ss *dbhelper.SlaveStatus) string { + // log.Printf("replicationCheck %s %s", server.SQLThread, server.IOThread) + if ss.SlaveSQLRunning.String == "Yes" && ss.SlaveIORunning.String == "No" { + if server.IsRelay == false && server.IsMaxscale == false { + server.SetState(stateSlaveErr) + } else if server.IsRelay { + server.SetState(stateRelayErr) + } + return fmt.Sprintf("NOT OK, IO Stopped (%s)", ss.LastIOErrno.String) + } else if ss.SlaveSQLRunning.String == "No" && ss.SlaveIORunning.String == "Yes" { + if server.IsRelay == false && server.IsMaxscale == false { + server.SetState(stateSlaveErr) + } else if server.IsRelay { + server.SetState(stateRelayErr) + } + return fmt.Sprintf("NOT OK, SQL Stopped (%s)", ss.LastSQLErrno.String) + } else if ss.SlaveSQLRunning.String == "No" && ss.SlaveIORunning.String == "No" { + if server.IsRelay == false && server.IsMaxscale == false { + server.SetState(stateSlaveErr) + } else if server.IsRelay { + server.SetState(stateRelayErr) + } + return "NOT OK, ALL Stopped" + } else if ss.SlaveSQLRunning.String == "Connecting" { + if server.IsRelay == false && server.IsMaxscale == false { + server.SetState(stateSlave) + } else if server.IsRelay { + server.SetState(stateRelay) + } + return "NOT OK, IO Connecting" + } + + if server.IsRelay == false && server.IsMaxscale == false { + server.SetState(stateSlave) + } else if server.IsRelay { + server.SetState(stateRelay) + } + return "Running OK" +} + +func (server *ServerMonitor) checkLateReplication(ss *dbhelper.SlaveStatus) string { + if ss.SecondsBehindMaster.Int64 > server.ClusterGroup.Conf.FailMaxDelay && server.ClusterGroup.Conf.RplChecks == true { + if server.IsRelay == false && server.IsMaxscale == false { + server.SetState(stateSlaveLate) + } else if server.IsRelay { + server.SetState(stateRelayLate) + } + + } else { + if server.IsRelay == false && server.IsMaxscale == false { + server.SetState(stateSlave) + } else if server.IsRelay { + server.SetState(stateRelay) + } + } + + if server.ClusterGroup.Conf.DelayStatCapture { + server.DelayStat.UpdateDelayStat(ss.SecondsBehindMaster.Int64, server.ClusterGroup.Conf.DelayStatRotate) // Capture Delay Stat + } + return "Behind master" +} + // CheckReplication Check replication health and return status string func (server *ServerMonitor) CheckReplication() string { cluster := server.ClusterGroup @@ -172,67 +234,13 @@ func (server *ServerMonitor) CheckReplication() string { return "Not a slave" } if ss.SecondsBehindMaster.Valid == false { - - // log.Printf("replicationCheck %s %s", server.SQLThread, server.IOThread) - if ss.SlaveSQLRunning.String == "Yes" && ss.SlaveIORunning.String == "No" { - if server.IsRelay == false && server.IsMaxscale == false { - server.SetState(stateSlaveErr) - } else if server.IsRelay { - server.SetState(stateRelayErr) - } - return fmt.Sprintf("NOT OK, IO Stopped (%s)", ss.LastIOErrno.String) - } else if ss.SlaveSQLRunning.String == "No" && ss.SlaveIORunning.String == "Yes" { - if server.IsRelay == false && server.IsMaxscale == false { - server.SetState(stateSlaveErr) - } else if server.IsRelay { - server.SetState(stateRelayErr) - } - return fmt.Sprintf("NOT OK, SQL Stopped (%s)", ss.LastSQLErrno.String) - } else if ss.SlaveSQLRunning.String == "No" && ss.SlaveIORunning.String == "No" { - if server.IsRelay == false && server.IsMaxscale == false { - server.SetState(stateSlaveErr) - } else if server.IsRelay { - server.SetState(stateRelayErr) - } - return "NOT OK, ALL Stopped" - } else if ss.SlaveSQLRunning.String == "Connecting" { - if server.IsRelay == false && server.IsMaxscale == false { - server.SetState(stateSlave) - } else if server.IsRelay { - server.SetState(stateRelay) - } - return "NOT OK, IO Connecting" - } - - if server.IsRelay == false && server.IsMaxscale == false { - server.SetState(stateSlave) - } else if server.IsRelay { - server.SetState(stateRelay) - } - return "Running OK" + return server.checkStoppedReplication(ss) } if ss.SecondsBehindMaster.Int64 > 0 { - if ss.SecondsBehindMaster.Int64 > cluster.Conf.FailMaxDelay && cluster.Conf.RplChecks == true { - if server.IsRelay == false && server.IsMaxscale == false { - server.SetState(stateSlaveLate) - } else if server.IsRelay { - server.SetState(stateRelayLate) - } - - } else { - if server.IsRelay == false && server.IsMaxscale == false { - server.SetState(stateSlave) - } else if server.IsRelay { - server.SetState(stateRelay) - } - } - - if cluster.Conf.DelayStatCapture { - server.DelayStat.UpdateDelayStat(ss.SecondsBehindMaster.Int64, cluster.Conf.DelayStatRotate) // Capture Delay Stat - } - return "Behind master" + return server.checkLateReplication(ss) } + if server.IsRelay == false && server.IsMaxscale == false { server.SetState(stateSlave) } else if server.IsRelay { diff --git a/cluster/srv_job.go b/cluster/srv_job.go index a350912a1..48c2cbabb 100644 --- a/cluster/srv_job.go +++ b/cluster/srv_job.go @@ -35,7 +35,6 @@ import ( dumplingext "github.com/pingcap/dumpling/v4/export" "github.com/signal18/replication-manager/config" "github.com/signal18/replication-manager/utils/crypto" - "github.com/signal18/replication-manager/utils/dbhelper" "github.com/signal18/replication-manager/utils/misc" river "github.com/signal18/replication-manager/utils/river" "github.com/signal18/replication-manager/utils/s18log" @@ -512,17 +511,7 @@ func (server *ServerMonitor) JobReseedPhysicalBackup(backtype string) error { cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Failed stop slave on server: %s %s", server.URL, err) } - logs, err = dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "SLAVE_POS", - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - }, server.DBVersion) + logs, err = cluster.pointSlaveToMasterWithMode(server, "SLAVE_POS") if err != nil { cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Reseed can't changing master for physical backup %s request for server: %s %s", backtype, server.URL, err) return err @@ -597,16 +586,7 @@ func (server *ServerMonitor) JobFlashbackPhysicalBackup() error { cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Failed stop slave on server: %s %s", server.URL, err) } - logs, err = dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "SLAVE_POS", - SSL: cluster.Conf.ReplicationSSL, - }, server.DBVersion) + logs, err = cluster.pointSlaveToMasterWithMode(server, "SLAVE_POS") if err != nil { cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Flashback can't changing master for physical backup %s request for server: %s %s", cluster.Conf.BackupPhysicalType, server.URL, err) if server.HasReseedingState(task) { @@ -734,27 +714,15 @@ func (server *ServerMonitor) JobReseedLogicalBackup(backtype string) error { cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Failed stop slave on server: %s %s", server.URL, err) } - changeOpt := dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "SLAVE_POS", - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - } - if server.DBVersion.IsMySQLOrPercona() { if server.HasMySQLGTID() { - changeOpt.Mode = "MASTER_AUTO_POSITION" + cluster.pointSlaveToMasterWithMode(server, "MASTER_AUTO_POSITION") } else { - changeOpt.Mode = "POSITIONAL" + cluster.pointSlaveToMasterPositional(server) } + } else { + cluster.pointSlaveToMasterWithMode(server, "SLAVE_POS") } - - dbhelper.ChangeMaster(server.Conn, changeOpt, server.DBVersion) // Ignore error } server.JobsUpdateState(task, "processing", 1, 0) @@ -911,18 +879,15 @@ func (server *ServerMonitor) JobFlashbackLogicalBackup() error { cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Failed stop slave on server: %s %s", server.URL, err) } - // Reconfigure replication to point to master - logs, err = dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "SLAVE_POS", - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - }, server.DBVersion) + if server.DBVersion.IsMySQLOrPerconaGreater57() { + if server.HasMySQLGTID() { + logs, err = cluster.pointSlaveToMasterWithMode(server, "MASTER_AUTO_POSITION") + } else { + logs, err = cluster.pointSlaveToMasterPositional(server) + } + } else { + logs, err = cluster.pointSlaveToMasterWithMode(server, "SLAVE_POS") + } if err != nil { cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "flashback can't changing master for logical backup %s request for server: %s %s", cluster.Conf.BackupLogicalType, server.URL, err) return err diff --git a/cluster/srv_rejoin.go b/cluster/srv_rejoin.go index 89df71a3e..23acd84b8 100644 --- a/cluster/srv_rejoin.go +++ b/cluster/srv_rejoin.go @@ -261,18 +261,10 @@ func (server *ServerMonitor) rejoinMasterSync(crash *Crash) error { return err } } else if cluster.Conf.MxsBinlogOn { - logs, err := dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: realmaster.Host, - Port: realmaster.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "MXS", - Logfile: crash.FailoverMasterLogFile, - Logpos: crash.FailoverMasterLogPos, - SSL: cluster.Conf.ReplicationSSL, - }, server.DBVersion) + opt := cluster.GetChangeMasterBaseOptForMxs(server, realmaster) + opt.Logfile = crash.FailoverMasterLogFile + opt.Logpos = crash.FailoverMasterLogPos + logs, err := dbhelper.ChangeMaster(server.Conn, opt, server.DBVersion) cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Change master positional failed in Rejoin old Master in sync to maxscale %s", err) if err != nil { return err @@ -280,22 +272,11 @@ func (server *ServerMonitor) rejoinMasterSync(crash *Crash) error { } else { // not maxscale the new master coordonate are in crash cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, "INFO", "Change master to positional in Rejoin old Master") - logs, err := dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: realmaster.Host, - Port: realmaster.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "POSITIONAL", - Logfile: crash.NewMasterLogFile, - Logpos: crash.NewMasterLogPos, - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - IsDelayed: server.IsDelayed, - Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), - PostgressDB: server.PostgressDB, - }, server.DBVersion) + opt := cluster.GetChangeMasterBaseOptForSlave(server, realmaster, server.IsDelayed) + opt.Mode = "POSITIONAL" + opt.Logfile = crash.NewMasterLogFile + opt.Logpos = crash.NewMasterLogPos + logs, err := dbhelper.ChangeMaster(server.Conn, opt, server.DBVersion) cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Change master positional failed in Rejoin old Master in sync %s", err) if err != nil { return err @@ -356,7 +337,7 @@ func (server *ServerMonitor) rejoinMasterFlashBack(crash *Crash) error { if server.MxsHaveGtid || server.IsMaxscale == false { logs, err2 = server.SetReplicationGTIDSlavePosFromServer(realmaster) } else { - logs, err2 = server.SetReplicationFromMaxsaleServer(realmaster) + logs, err2 = server.SetReplicationFromMaxscaleServer(realmaster) } cluster.LogSQL(logs, err2, server.URL, "Rejoin", config.LvlInfo, "Failed SetReplicationGTIDSlavePosFromServer on %s: %s", server.URL, err2) if err2 != nil { @@ -413,19 +394,11 @@ func (server *ServerMonitor) RejoinDirectDump() error { cluster.LogSQL(logs, err3, server.URL, "Rejoin", config.LvlInfo, "Failed SetReplicationGTIDSlavePosFromServer on %s: %s", server.URL, err3) } else { - logs, err3 := dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: realmaster.Host, - Port: realmaster.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Mode: "MXS", - Logfile: realmaster.FailoverMasterLogFile, - Logpos: realmaster.FailoverMasterLogPos, - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - }, server.DBVersion) + opt := cluster.GetChangeMasterBaseOptForMxs(server, realmaster) + opt.Logfile = realmaster.FailoverMasterLogFile + opt.Logpos = realmaster.FailoverMasterLogPos + + logs, err3 := dbhelper.ChangeMaster(server.Conn, opt, server.DBVersion) cluster.LogSQL(logs, err3, server.URL, "Rejoin", config.LvlErr, "Failed change master maxscale on %s: %s", server.URL, err3) } if err3 != nil { @@ -610,21 +583,11 @@ func (server *ServerMonitor) rejoinSlave(ss dbhelper.SlaveStatus) error { logs, err := server.StopSlave() cluster.LogSQL(logs, err, server.URL, "Rejoin", config.LvlErr, "Failed to stop slave on server %s: %s", server.URL, err) cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, "INFO", "Doing Positional switch of slave %s", server.URL) - logs, changeMasterErr := dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: cluster.master.Host, - Port: cluster.master.Port, - User: cluster.GetRplUser(), - Password: cluster.GetRplPass(), - Logfile: myparentss.MasterLogFile.String, - Logpos: myparentss.ReadMasterLogPos.String, - Retry: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime), - Channel: cluster.Conf.MasterConn, - IsDelayed: server.IsDelayed, - Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), - SSL: cluster.Conf.ReplicationSSL, - PostgressDB: server.PostgressDB, - }, server.DBVersion) + opt := cluster.GetChangeMasterBaseOptForSlave(server, cluster.master, server.IsDelayed) + opt.Mode = "POSITIONAL" + opt.Logfile = myparentss.MasterLogFile.String + opt.Logpos = myparentss.ReadMasterLogPos.String + logs, changeMasterErr := dbhelper.ChangeMaster(server.Conn, opt, server.DBVersion) cluster.LogSQL(logs, changeMasterErr, server.URL, "Rejoin", config.LvlErr, "Rejoin Failed doing Positional switch of slave %s: %s", server.URL, changeMasterErr) diff --git a/cluster/srv_set.go b/cluster/srv_set.go index 3fd9acbcb..4cd63dbe5 100644 --- a/cluster/srv_set.go +++ b/cluster/srv_set.go @@ -15,7 +15,6 @@ import ( "fmt" "os" "runtime" - "strconv" "strings" "time" @@ -269,75 +268,30 @@ func (server *ServerMonitor) SetReplicationGTIDSlavePosFromServer(master *Server cluster := server.ClusterGroup server.StopSlave() - changeOpt := dbhelper.ChangeMasterOpt{ - Host: master.Host, - Port: master.Port, - User: master.ClusterGroup.GetRplUser(), - Password: master.ClusterGroup.GetRplPass(), - Retry: strconv.Itoa(master.ClusterGroup.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(master.ClusterGroup.Conf.ForceSlaveHeartbeatTime), - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - IsDelayed: server.IsDelayed, - Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), - PostgressDB: server.PostgressDB, - } - if server.IsMariaDB() { - changeOpt.Mode = "SLAVE_POS" - return dbhelper.ChangeMaster(server.Conn, changeOpt, server.DBVersion) + return cluster.pointSlaveToMasterWithMode(server, "SLAVE_POS") } - changeOpt.Mode = "MASTER_AUTO_POSITION" - return dbhelper.ChangeMaster(server.Conn, changeOpt, server.DBVersion) + + return cluster.pointSlaveToMasterWithMode(server, "MASTER_AUTO_POSITION") } func (server *ServerMonitor) SetReplicationGTIDCurrentPosFromServer(master *ServerMonitor) (string, error) { cluster := server.ClusterGroup - var err error - logs := "" - changeOpt := dbhelper.ChangeMasterOpt{ - SSL: cluster.Conf.ReplicationSSL, - Channel: cluster.Conf.MasterConn, - IsDelayed: server.IsDelayed, - Delay: strconv.Itoa(cluster.Conf.HostsDelayedTime), - PostgressDB: server.PostgressDB, - } + if server.DBVersion.IsMySQLOrPerconaGreater57() { // We can do MySQL 5.7 style failover cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlInfo, "Doing MySQL GTID switch of the old master") - changeOpt.Host = cluster.master.Host - changeOpt.Port = cluster.master.Port - changeOpt.User = cluster.GetRplUser() - changeOpt.Password = cluster.GetRplPass() - changeOpt.Retry = strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatRetry) - changeOpt.Heartbeat = strconv.Itoa(cluster.Conf.ForceSlaveHeartbeatTime) - changeOpt.Mode = "MASTER_AUTO_POSITION" - logs, err = dbhelper.ChangeMaster(server.Conn, changeOpt, server.DBVersion) - } else { - changeOpt.Host = master.Host - changeOpt.Port = master.Port - changeOpt.User = master.ClusterGroup.GetRplUser() - changeOpt.Password = master.ClusterGroup.GetRplPass() - changeOpt.Retry = strconv.Itoa(master.ClusterGroup.Conf.ForceSlaveHeartbeatRetry) - changeOpt.Heartbeat = strconv.Itoa(master.ClusterGroup.Conf.ForceSlaveHeartbeatTime) - changeOpt.Mode = "CURRENT_POS" - logs, err = dbhelper.ChangeMaster(server.Conn, changeOpt, server.DBVersion) + return cluster.pointSlaveToMasterWithMode(server, "MASTER_AUTO_POSITION") } - return logs, err + + return cluster.pointSlaveToMasterWithMode(server, "CURRENT_POS") } -func (server *ServerMonitor) SetReplicationFromMaxsaleServer(master *ServerMonitor) (string, error) { - return dbhelper.ChangeMaster(server.Conn, dbhelper.ChangeMasterOpt{ - Host: master.Host, - Port: master.Port, - User: master.ClusterGroup.GetRplUser(), - Password: master.ClusterGroup.GetRplPass(), - Retry: strconv.Itoa(master.ClusterGroup.Conf.ForceSlaveHeartbeatRetry), - Heartbeat: strconv.Itoa(master.ClusterGroup.Conf.ForceSlaveHeartbeatTime), - Mode: "MXS", - Logfile: master.FailoverMasterLogFile, - Logpos: master.FailoverMasterLogPos, - }, server.DBVersion) +func (server *ServerMonitor) SetReplicationFromMaxscaleServer(master *ServerMonitor) (string, error) { + opt := server.ClusterGroup.GetChangeMasterBaseOptForMxs(server, master) + opt.Logfile = master.FailoverMasterLogFile + opt.Logpos = master.FailoverMasterLogPos + return dbhelper.ChangeMaster(server.Conn, opt, server.DBVersion) } func (server *ServerMonitor) SetReplicationChannel(source string) (string, error) { diff --git a/config/config.go b/config/config.go index 1dbc2c102..6635f16a3 100644 --- a/config/config.go +++ b/config/config.go @@ -197,6 +197,7 @@ type Config struct { ClusterHead string `mapstructure:"cluster-head" toml:"cluster-head" json:"clusterHead"` ReplicationMultisourceHeadClusters string `mapstructure:"replication-multisource-head-clusters" toml:"replication-multisource-head-clusters" json:"replicationMultisourceHeadClusters"` MasterConnectRetry int `mapstructure:"replication-master-connect-retry" toml:"replication-master-connect-retry" json:"replicationMasterConnectRetry"` + MasterRetryCount int `mapstructure:"replication-master-retry-count" toml:"replication-master-retry-count" json:"replicationMasterRetryCount"` RplUser string `mapstructure:"replication-credential" toml:"replication-credential" json:"replicationCredential"` ReplicationErrorScript string `mapstructure:"replication-error-script" toml:"replication-error-script" json:"replicationErrorScript"` MasterConn string `mapstructure:"replication-source-name" toml:"replication-source-name" json:"replicationSourceName"` diff --git a/config/error.go b/config/error.go index 2d8a50601..b9840537f 100644 --- a/config/error.go +++ b/config/error.go @@ -114,7 +114,7 @@ var ClusterError = map[string]string{ "ERR00094": "Proxysql %s can not set %s as OFFLINE_SOFT: %s", "ERR00095": "ProxySQL %s could not load servers to runtime: %s", "ERR00096": "Proxysql %s can not save changes to disk: %s", - "ERR00097": "Auto failover locked with false positive conditions: %s", + "ERR00097": "Auto failover locked with false positive conditions: %v", "ERR00098": "Failed to enable GTID Mode on master node. Err: %s", "ERR00099": "Failed to enable GTID Mode on slave %s. Err: %s", "ERR00100": "Cluster is in switchover. Switchover started at %s", diff --git a/server/api_cluster.go b/server/api_cluster.go index e0392b659..b9ed56fcc 100644 --- a/server/api_cluster.go +++ b/server/api_cluster.go @@ -3295,6 +3295,9 @@ func (repman *ReplicationManager) setClusterSetting(mycluster *cluster.Cluster, mycluster.Conf.ReplicationMultisourceHeadClusters = value case "replication-source-name": mycluster.Conf.MasterConn = value + case "replication-master-retry-count": + val, _ := strconv.Atoi(value) + mycluster.Conf.MasterRetryCount = val case "db-servers-tls-ssl-mode": mycluster.Conf.HostsTlsSslMode = value diff --git a/server/server.go b/server/server.go index 01b5cee99..141a7a3e4 100644 --- a/server/server.go +++ b/server/server.go @@ -469,6 +469,7 @@ func (repman *ReplicationManager) AddFlags(flags *pflag.FlagSet, conf *config.Co flags.StringVar(&conf.HostsDelayed, "replication-delayed-hosts", "", "Database hosts list that need delayed replication separated by commas") flags.IntVar(&conf.HostsDelayedTime, "replication-delayed-time", 3600, "Delayed replication time") flags.IntVar(&conf.MasterConnectRetry, "replication-master-connect-retry", 10, "Replication is define using this connection retry timeout") + flags.IntVar(&conf.MasterRetryCount, "replication-master-retry-count", 100000, "Number of retry to connect to master before failing. Default 100000 by MariaDB 10.6 standard") flags.StringVar(&conf.RplUser, "replication-credential", "root:mariadb", "Replication user in the [user]:[password] format") flags.BoolVar(&conf.ReplicationSSL, "replication-use-ssl", false, "Replication use SSL encryption to replicate from master") flags.BoolVar(&conf.ActivePassive, "replication-active-passive", false, "Active Passive topology") diff --git a/share/dashboard_react/src/Pages/Dashboard/components/DBServers/DBServerGrid/index.jsx b/share/dashboard_react/src/Pages/Dashboard/components/DBServers/DBServerGrid/index.jsx index a251fdb3c..aa4e7d3b1 100644 --- a/share/dashboard_react/src/Pages/Dashboard/components/DBServers/DBServerGrid/index.jsx +++ b/share/dashboard_react/src/Pages/Dashboard/components/DBServers/DBServerGrid/index.jsx @@ -259,20 +259,21 @@ function DBServerGrid({ value: getDelay(rowData) }, { - key: getSlaveGtidHeader(hasMariadbGtid, hasMysqlGtid), + key: 'Master last event', value: ( - + ) }, { - key: 'Delay', - value: getDelay(rowData) + key: 'Slave last event', + value: ( + + ) + }, + { + key: 'Master-Slave diff', + value: replication?.masterSlaveTimeDiff?.Valid ? replication?.masterSlaveTimeDiff?.Int64 : '' }, - { key: 'SQL error', value: replication?.lastSqlError?.String diff --git a/share/dashboard_react/src/Pages/Settings/RepConfigSettings.jsx b/share/dashboard_react/src/Pages/Settings/RepConfigSettings.jsx index 6f5bce1fa..fc09bdc56 100644 --- a/share/dashboard_react/src/Pages/Settings/RepConfigSettings.jsx +++ b/share/dashboard_react/src/Pages/Settings/RepConfigSettings.jsx @@ -6,6 +6,7 @@ import TableType2 from '../../components/TableType2' import { setSetting, switchSetting } from '../../redux/settingsSlice' import RMSwitch from '../../components/RMSwitch' import TextForm from '../../components/TextForm' +import NumberInput from '../../components/NumberInput' function RepConfigSettings({ selectedCluster, user, openConfirmModal, closeConfirmModal }) { const dispatch = useDispatch() @@ -23,6 +24,22 @@ function RepConfigSettings({ selectedCluster, user, openConfirmModal, closeConfi /> ) }, + { + key: 'Master retry count', + value: ( + dispatch(setSetting({ clusterName: selectedCluster?.name, setting: 'replication-master-retry-count', value }))} + /> + ) + }, { key: 'Enforce read only on replicas', value: ( diff --git a/share/dashboard_react/src/components/NumberInput/index.jsx b/share/dashboard_react/src/components/NumberInput/index.jsx index d2abdf8ab..75131d104 100644 --- a/share/dashboard_react/src/components/NumberInput/index.jsx +++ b/share/dashboard_react/src/components/NumberInput/index.jsx @@ -9,6 +9,7 @@ function NumberInput({ min = 2, max = 120, step = 1, + inputWidth ='75px', defaultValue, value, isDisabled, @@ -61,7 +62,7 @@ function NumberInput({ - + {showEditButton && !isDisabled ? ( diff --git a/utils/dbhelper/dbhelper.go b/utils/dbhelper/dbhelper.go index 09811d6c5..d8f441456 100644 --- a/utils/dbhelper/dbhelper.go +++ b/utils/dbhelper/dbhelper.go @@ -218,6 +218,9 @@ type SlaveStatus struct { SlaveSQLRunning sql.NullString `db:"Slave_SQL_Running" json:"slaveSqlRunning"` ExecMasterLogPos sql.NullString `db:"Exec_Master_Log_Pos" json:"execMasterLogPos"` SecondsBehindMaster sql.NullInt64 `db:"Seconds_Behind_Master" json:"secondsBehindMaster"` + MasterLastEventTime sql.NullString `db:"Master_last_event_time" json:"masterLastEventTime"` + SlaveLastEventTime sql.NullString `db:"Slave_last_event_time" json:"slaveLastEventTime"` + MasterSlaveTimeDiff sql.NullInt64 `db:"Master_Slave_Time_Diff" json:"masterSlaveTimeDiff"` LastIOErrno sql.NullString `db:"Last_IO_Errno" json:"lastIoErrno"` LastIOError sql.NullString `db:"Last_IO_Error" json:"lastIoError"` LastSQLErrno sql.NullString `db:"Last_SQL_Errno" json:"lastSqlErrno"` @@ -243,6 +246,7 @@ type SlaveStatus struct { SQLDelay sql.NullInt64 `db:"SQL_Delay" json:"sqlDelay"` SQLRemainingDelay sql.NullInt64 `db:"SQL_Remaining_Delay" json:"sqlRemainingDelay"` AutoPosition int `db:"Auto_Position" json:"autoPosition"` + MasterRetryCount sql.NullInt64 `db:"Master_Retry_Count" json:"masterRetryCount"` } func (s *SlaveStatus) ImportFromReplicaStatus(rs *ReplicaStatus) { @@ -282,6 +286,7 @@ func (s *SlaveStatus) ImportFromReplicaStatus(rs *ReplicaStatus) { s.SQLDelay = rs.SQLDelay s.SQLRemainingDelay = rs.SQLRemainingDelay s.AutoPosition = rs.AutoPosition + s.MasterRetryCount = rs.SourceRetryCount } type ReplicaStatus struct { @@ -761,6 +766,8 @@ type ChangeMasterOpt struct { Logpos string Mode string + RetryCount string // Start from MariaDB 12 and MySQL 8.4 + Channel string PostgressDB string IsDelayed bool @@ -890,6 +897,13 @@ func ChangeMaster(db *sqlx.DB, opt ChangeMasterOpt, myver *version.Version) (str cm += ", " + masterOrSource + "_SSL=1" //cm +=, MASTER_SSL_CA='" + opt.SSLCa + "', MASTER_SSL_CERT='" + opt.SSLCert + "', MASTER_SSL_KEY=" + opt.SSLKey + "'" } + + // Retry count supported from MariaDB 12 and MySQL 8.4 + if myver.IsMariaDBGreater12() || myver.IsMySQLOrPerconaGreater84() { + if opt.RetryCount != "" { + cm += ", " + masterOrSource + "_RETRY_COUNT=" + opt.RetryCount + } + } if myver.IsMySQLOrPercona() && opt.Channel != "" { cm += " FOR CHANNEL '" + opt.Channel + "'" } diff --git a/utils/version/version.go b/utils/version/version.go index ec14e41aa..ccc64f449 100644 --- a/utils/version/version.go +++ b/utils/version/version.go @@ -314,6 +314,10 @@ func (mv *Version) IsMariaDB() bool { return mv.Flavor == "MariaDB" } +func (mv *Version) IsMariaDBGreater12() bool { + return mv.Flavor == "MariaDB" && mv.Major >= 12 +} + func (mv *Version) IsMySQL57() bool { return mv.Flavor == "MySQL" && mv.Major == 5 && mv.Minor > 6 }