Skip to content

Commit a956f3e

Browse files
Rollout-planner improve checks for scalingOrInPlaceUpdateInProgress
1 parent 2e7fcc6 commit a956f3e

File tree

3 files changed

+110
-24
lines changed

3 files changed

+110
-24
lines changed

internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,12 @@ func withStatusAvailableReplicas(r int32) fakeMachineSetOption {
12271227
}
12281228
}
12291229

1230+
func withStatusUpToDateReplicas(r int32) fakeMachineSetOption {
1231+
return func(ms *clusterv1.MachineSet) {
1232+
ms.Status.UpToDateReplicas = ptr.To(r)
1233+
}
1234+
}
1235+
12301236
func withMSAnnotation(name, value string) fakeMachineSetOption {
12311237
return func(ms *clusterv1.MachineSet) {
12321238
if ms.Annotations == nil {

internal/controllers/machinedeployment/machinedeployment_rollout_rollingupdate.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@ import (
2828
ctrl "sigs.k8s.io/controller-runtime"
2929

3030
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
31+
runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1"
3132
"sigs.k8s.io/cluster-api/feature"
3233
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
34+
"sigs.k8s.io/cluster-api/internal/hooks"
3335
"sigs.k8s.io/cluster-api/util"
3436
"sigs.k8s.io/cluster-api/util/collections"
3537
)
@@ -502,6 +504,7 @@ func (p *rolloutPlanner) reconcileInPlaceUpdateIntent(ctx context.Context) error
502504
}
503505

504506
func (p *rolloutPlanner) scalingOrInPlaceUpdateInProgress(_ context.Context) bool {
507+
// Check if the new MS or old MS are scaling.
505508
if ptr.Deref(p.newMS.Spec.Replicas, 0) != ptr.Deref(p.newMS.Status.Replicas, 0) {
506509
return true
507510
}
@@ -513,11 +516,30 @@ func (p *rolloutPlanner) scalingOrInPlaceUpdateInProgress(_ context.Context) boo
513516
return true
514517
}
515518
}
519+
520+
// Check that there are no updates in progress.
521+
// We check both that the newMS MachineSet will report that the update is completed via .status.upToDateReplicas
522+
// and the Machine controller through the annotations so this code does not depend on a specific execution order
523+
// of the MachineSet and Machine controllers.
524+
if ptr.Deref(p.newMS.Spec.Replicas, 0) != ptr.Deref(p.newMS.Status.UpToDateReplicas, 0) {
525+
return true
526+
}
516527
for _, m := range p.machines {
517-
if _, ok := m.Annotations[clusterv1.UpdateInProgressAnnotation]; ok {
528+
_, inPlaceUpdateInProgress := m.Annotations[clusterv1.UpdateInProgressAnnotation]
529+
hasUpdateMachinePending := hooks.IsPending(runtimehooksv1.UpdateMachine, m)
530+
if inPlaceUpdateInProgress || hasUpdateMachinePending {
518531
return true
519532
}
520533
}
534+
535+
// We are also checking AvailableReplicas because we want to make sure that we wait until the
536+
// Machine goes back to Available after the in-place update is completed.
537+
// If we would not wait for this, the rolloutPlaner would use maxSurge to create an additional Machine.
538+
// Note: This also means that if any Machine of the new MachineSet becomes unavailable we are blocking
539+
// further progress of the in-place update.
540+
if ptr.Deref(p.newMS.Spec.Replicas, 0) != ptr.Deref(p.newMS.Status.AvailableReplicas, 0) {
541+
return true
542+
}
521543
return false
522544
}
523545

internal/controllers/machinedeployment/machinedeployment_rollout_rollingupdate_test.go

Lines changed: 81 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -881,7 +881,7 @@ func TestReconcileInPlaceUpdateIntent(t *testing.T) {
881881
{
882882
name: "When moving replicas from oldMS to newMS, preserve newMS scale up intent if it does not use maxSurge",
883883
md: createMD("v2", 3, withRollingUpdateStrategy(1, 0)),
884-
newMS: createMS("ms2", "v2", 1),
884+
newMS: createMS("ms2", "v2", 1, withStatusUpToDateReplicas(1), withStatusAvailableReplicas(1)),
885885
oldMS: []*clusterv1.MachineSet{
886886
createMS("ms1", "v1", 1),
887887
},
@@ -909,7 +909,7 @@ func TestReconcileInPlaceUpdateIntent(t *testing.T) {
909909
{
910910
name: "When moving replicas from oldMS to newMS, preserve one usage of MaxSurge in the newMS scale up intent when required to start the rollout (maxSurge 1, maxUnavailable 0)",
911911
md: createMD("v2", 3, withRollingUpdateStrategy(1, 0)),
912-
newMS: createMS("ms2", "v2", 0),
912+
newMS: createMS("ms2", "v2", 0, withStatusUpToDateReplicas(0), withStatusAvailableReplicas(0)),
913913
oldMS: []*clusterv1.MachineSet{
914914
createMS("ms1", "v1", 3),
915915
},
@@ -939,7 +939,7 @@ func TestReconcileInPlaceUpdateIntent(t *testing.T) {
939939
{
940940
name: "When moving replicas from oldMS to newMS, preserve one usage of MaxSurge in the newMS scale up intent when required to start the rollout (maxSurge 3, maxUnavailable 0)",
941941
md: createMD("v2", 3, withRollingUpdateStrategy(3, 0)),
942-
newMS: createMS("ms2", "v2", 0),
942+
newMS: createMS("ms2", "v2", 0, withStatusUpToDateReplicas(0), withStatusAvailableReplicas(0)),
943943
oldMS: []*clusterv1.MachineSet{
944944
createMS("ms1", "v1", 3),
945945
},
@@ -967,9 +967,39 @@ func TestReconcileInPlaceUpdateIntent(t *testing.T) {
967967
},
968968
},
969969
{
970-
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there are oldMS with scale down intent (maxSurge 3, maxUnavailable 1)",
970+
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there are oldMS with scale down from a previous reconcile (maxSurge 3, maxUnavailable 1)",
971971
md: createMD("v2", 3, withRollingUpdateStrategy(3, 1)),
972972
newMS: createMS("ms2", "v2", 0),
973+
oldMS: []*clusterv1.MachineSet{
974+
createMS("ms1", "v1", 3, withStatusReplicas(4), withStatusUpToDateReplicas(4), withStatusAvailableReplicas(4)), // scale down from a previous reconcile
975+
},
976+
machines: []*clusterv1.Machine{
977+
createM("m1", "ms1", "v1"),
978+
createM("m2", "ms1", "v1"),
979+
createM("m3", "ms1", "v1"),
980+
createM("m4", "ms1", "v1"),
981+
},
982+
scaleIntents: map[string]int32{
983+
"ms2": 2, // +2 => MD expect 3, has currently 4 replicas, +2 replica it is using maxSurge 3
984+
},
985+
upToDateResults: map[string]mdutil.UpToDateResult{
986+
"ms1": {EligibleForInPlaceUpdate: true},
987+
},
988+
expectedCanUpdateCalls: map[string]bool{
989+
"ms1": true,
990+
},
991+
canUpdateAnswer: map[string]bool{
992+
"ms1": true,
993+
},
994+
expectMoveFromMS: []string{"ms1"},
995+
expectScaleIntents: map[string]int32{
996+
// "ms2": 3, +3 replica using maxSurge dropped, oldMS is scaling down
997+
},
998+
},
999+
{
1000+
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there are oldMS with scale down intent (maxSurge 3, maxUnavailable 1)",
1001+
md: createMD("v2", 3, withRollingUpdateStrategy(3, 1)),
1002+
newMS: createMS("ms2", "v2", 0, withStatusUpToDateReplicas(0), withStatusAvailableReplicas(0)),
9731003
oldMS: []*clusterv1.MachineSet{
9741004
createMS("ms1", "v1", 3),
9751005
},
@@ -998,20 +1028,22 @@ func TestReconcileInPlaceUpdateIntent(t *testing.T) {
9981028
},
9991029
},
10001030
{
1001-
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there are oldMS with scale down from a previous reconcile (maxSurge 3, maxUnavailable 1)",
1002-
md: createMD("v2", 3, withRollingUpdateStrategy(3, 1)),
1003-
newMS: createMS("ms2", "v2", 0),
1031+
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there newMS is scaling from a previous reconcile (maxSurge 3, maxUnavailable 1)",
1032+
md: createMD("v6", 6, withRollingUpdateStrategy(3, 1)),
1033+
newMS: createMS("ms2", "v2", 3, withStatusReplicas(2), withStatusUpToDateReplicas(2), withStatusAvailableReplicas(2)), // scaling from a previous reconcile
10041034
oldMS: []*clusterv1.MachineSet{
1005-
createMS("ms1", "v1", 3, withStatusReplicas(4)), // scale down from a previous reconcile
1035+
createMS("ms1", "v1", 3),
10061036
},
10071037
machines: []*clusterv1.Machine{
10081038
createM("m1", "ms1", "v1"),
10091039
createM("m2", "ms1", "v1"),
10101040
createM("m3", "ms1", "v1"),
1011-
createM("m4", "ms1", "v1"),
1041+
createM("m4", "ms2", "v2"),
1042+
createM("m5", "ms2", "v2"),
1043+
createM("m6", "ms2", "v2"),
10121044
},
10131045
scaleIntents: map[string]int32{
1014-
"ms2": 2, // +2 => MD expect 3, has currently 4 replicas, +2 replica it is using maxSurge 3
1046+
"ms2": 6, // +3 => MD expect 6, has currently 6 replicas, +3 replica it is using maxSurge 3
10151047
},
10161048
upToDateResults: map[string]mdutil.UpToDateResult{
10171049
"ms1": {EligibleForInPlaceUpdate: true},
@@ -1024,13 +1056,42 @@ func TestReconcileInPlaceUpdateIntent(t *testing.T) {
10241056
},
10251057
expectMoveFromMS: []string{"ms1"},
10261058
expectScaleIntents: map[string]int32{
1027-
// "ms2": 3, +3 replica using maxSurge dropped, oldMS is scaling down
1059+
// "ms2": 6, +3 replicas from maxSurge dropped, newMS is already scaling up
1060+
},
1061+
},
1062+
{
1063+
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there are still not UpToDateReplicas on the newMS (maxSurge 1, maxUnavailable 0)",
1064+
md: createMD("v2", 3, withRollingUpdateStrategy(1, 0)),
1065+
newMS: createMS("ms2", "v2", 2, withStatusUpToDateReplicas(1), withStatusAvailableReplicas(1)),
1066+
oldMS: []*clusterv1.MachineSet{
1067+
createMS("ms1", "v1", 1),
1068+
},
1069+
machines: []*clusterv1.Machine{
1070+
createM("m1", "ms1", "v1"),
1071+
createM("m2", "ms2", "v1"),
1072+
createM("m3", "ms2", "v1"),
1073+
},
1074+
scaleIntents: map[string]int32{
1075+
"ms2": 3, // +1 => MD expect 3, has currently 3 replicas, +1 replica it is using maxSurge
1076+
},
1077+
upToDateResults: map[string]mdutil.UpToDateResult{
1078+
"ms1": {EligibleForInPlaceUpdate: true},
1079+
},
1080+
expectedCanUpdateCalls: map[string]bool{
1081+
"ms1": true,
1082+
},
1083+
canUpdateAnswer: map[string]bool{
1084+
"ms1": true,
1085+
},
1086+
expectMoveFromMS: []string{"ms1"},
1087+
expectScaleIntents: map[string]int32{
1088+
// "ms2": 1, +1 replica using maxSurge dropped, there is a machine updating in place
10281089
},
10291090
},
10301091
{
10311092
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there machines in-place updating (maxSurge 3, maxUnavailable 0)",
10321093
md: createMD("v2", 3, withRollingUpdateStrategy(3, 0)),
1033-
newMS: createMS("ms2", "v2", 0),
1094+
newMS: createMS("ms2", "v2", 0, withStatusUpToDateReplicas(0), withStatusAvailableReplicas(0)),
10341095
oldMS: []*clusterv1.MachineSet{
10351096
createMS("ms1", "v1", 3),
10361097
},
@@ -1057,22 +1118,19 @@ func TestReconcileInPlaceUpdateIntent(t *testing.T) {
10571118
},
10581119
},
10591120
{
1060-
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there newMS is scaling from a previous reconcile (maxSurge 3, maxUnavailable 1)",
1061-
md: createMD("v6", 6, withRollingUpdateStrategy(3, 1)),
1062-
newMS: createMS("ms2", "v2", 3, withStatusReplicas(2)), // scaling from a previous reconcile
1121+
name: "When moving replicas from oldMS to newMS, drop usage of MaxSurge in the newMS scale up intent when there are still not AvailableReplicas on the newMS (maxSurge 1, maxUnavailable 0)",
1122+
md: createMD("v2", 3, withRollingUpdateStrategy(1, 0)),
1123+
newMS: createMS("ms2", "v2", 2, withStatusUpToDateReplicas(2), withStatusAvailableReplicas(1)),
10631124
oldMS: []*clusterv1.MachineSet{
1064-
createMS("ms1", "v1", 3),
1125+
createMS("ms1", "v1", 1),
10651126
},
10661127
machines: []*clusterv1.Machine{
10671128
createM("m1", "ms1", "v1"),
1068-
createM("m2", "ms1", "v1"),
1069-
createM("m3", "ms1", "v1"),
1070-
createM("m4", "ms2", "v2"),
1071-
createM("m5", "ms2", "v2"),
1072-
createM("m6", "ms2", "v2"),
1129+
createM("m2", "ms2", "v1"),
1130+
createM("m3", "ms2", "v1"),
10731131
},
10741132
scaleIntents: map[string]int32{
1075-
"ms2": 6, // +3 => MD expect 6, has currently 6 replicas, +3 replica it is using maxSurge 3
1133+
"ms2": 3, // +1 => MD expect 3, has currently 3 replicas, +1 replica it is using maxSurge
10761134
},
10771135
upToDateResults: map[string]mdutil.UpToDateResult{
10781136
"ms1": {EligibleForInPlaceUpdate: true},
@@ -1085,7 +1143,7 @@ func TestReconcileInPlaceUpdateIntent(t *testing.T) {
10851143
},
10861144
expectMoveFromMS: []string{"ms1"},
10871145
expectScaleIntents: map[string]int32{
1088-
// "ms2": 6, +3 replicas from maxSurge dropped, newMS is already scaling up
1146+
// "ms2": 1, +1 replica using maxSurge dropped, there is a machine updating in place
10891147
},
10901148
},
10911149
}

0 commit comments

Comments
 (0)