Skip to content

Commit e819660

Browse files
author
lamai93
committed
If all members are dropped, create a new one.
1 parent 241e91a commit e819660

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

pkg/deployment/deployment_inspector.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ func (d *Deployment) inspectDeployment(lastInterval util.Interval) util.Interval
121121
d.CreateEvent(k8sutil.NewErrorEvent("Member failure detection failed", err, d.apiObject))
122122
}
123123

124+
// Immediate actions
125+
if err := d.reconciler.CheckDeployment(); err != nil {
126+
hasError = true
127+
d.CreateEvent(k8sutil.NewErrorEvent("Reconciler immediate actions failed", err, d.apiObject))
128+
}
129+
124130
// Create scale/update plan
125131
if err := d.reconciler.CreatePlan(); err != nil {
126132
hasError = true

pkg/deployment/reconcile/reconciler.go

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222

2323
package reconcile
2424

25-
import "github.com/rs/zerolog"
25+
import (
26+
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
27+
"github.com/rs/zerolog"
28+
)
2629

2730
// Reconciler is the service that takes care of bring the a deployment
2831
// in line with its (changed) specification.
@@ -38,3 +41,35 @@ func NewReconciler(log zerolog.Logger, context Context) *Reconciler {
3841
context: context,
3942
}
4043
}
44+
45+
// CheckDeployment checks for obviously broken things and fixes them immediately
46+
func (r *Reconciler) CheckDeployment() error {
47+
spec := r.context.GetSpec()
48+
status, _ := r.context.GetStatus()
49+
50+
if spec.GetMode().HasCoordinators() {
51+
52+
// Check if there are coordinators
53+
if len(status.Members.Coordinators) == 0 {
54+
// No more coordinators! Take immediate action
55+
r.log.Error().Msg("No Coordinator members! Create one member immediately")
56+
_, err := r.context.CreateMember(api.ServerGroupCoordinators, "")
57+
if err != nil {
58+
return err
59+
}
60+
} else if status.Members.Coordinators.AllFailed() {
61+
r.log.Error().Msg("All coordinators failed - reset")
62+
for _, m := range status.Members.Coordinators {
63+
if err := r.context.DeletePod(m.PodName); err != nil {
64+
r.log.Error().Err(err).Msg("Failed to delete pod")
65+
}
66+
m.Phase = api.MemberPhaseNone
67+
if err := status.Members.Update(m, api.ServerGroupCoordinators); err != nil {
68+
r.log.Error().Err(err).Msg("Failed to update member")
69+
}
70+
}
71+
}
72+
}
73+
74+
return nil
75+
}

0 commit comments

Comments
 (0)