Skip to content

Commit 8e2b1ba

Browse files
✨ Add in-place to rollout planner (#12865)
* Add in-place to rollout planner * Address comments * More comments * More comments
1 parent f370aec commit 8e2b1ba

File tree

32 files changed

+4409
-123
lines changed

32 files changed

+4409
-123
lines changed

api/core/v1beta2/machine_types.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,17 @@ const (
8787

8888
// ManagedNodeAnnotationDomain is one of the CAPI managed Node annotation domains.
8989
ManagedNodeAnnotationDomain = "node.cluster.x-k8s.io"
90+
91+
// PendingAcknowledgeMoveAnnotation is an internal annotation added by the MS controller to a machine when being
92+
// moved from the oldMS to the newMS. The annotation is removed as soon as the MS controller get the acknowledgment about the
93+
// replica being accounted from the corresponding MD.
94+
// Note: The annotation is added when reconciling the oldMS, and it is removed when reconciling the newMS.
95+
// Note: This annotation is used in pair with AcknowledgedMoveAnnotation on MachineSets.
96+
PendingAcknowledgeMoveAnnotation = "in-place-updates.internal.cluster.x-k8s.io/pending-acknowledge-move"
97+
98+
// UpdateInProgressAnnotation is an internal annotation added to machines by the controller owning the Machine when in-place update
99+
// is started, e.g. by the MachineSet controller; the annotation will be removed by the Machine controller when in-place update is completed.
100+
UpdateInProgressAnnotation = "in-place-updates.internal.cluster.x-k8s.io/update-in-progress"
90101
)
91102

92103
// Machine's Available condition and corresponding reasons.

api/core/v1beta2/machineset_types.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,29 @@ const (
3333
// MachineSetFinalizer is the finalizer used by the MachineSet controller to
3434
// ensure ordered cleanup of corresponding Machines when a Machineset is being deleted.
3535
MachineSetFinalizer = "cluster.x-k8s.io/machineset"
36+
37+
// MachineSetMoveMachinesToMachineSetAnnotation is an internal annotation added by the MD controller to the oldMS
38+
// when it should scale down by moving machines that can be updated in-place to the newMS instead of deleting them.
39+
// The annotation value is the newMS name.
40+
// Note: This annotation is used in pair with MachineSetReceiveMachinesFromMachineSetsAnnotation to perform a two-ways check before moving a machine from oldMS to newMS:
41+
//
42+
// "oldMS must have: move to newMS" and "newMS must have: receive replicas from oldMS"
43+
MachineSetMoveMachinesToMachineSetAnnotation = "in-place-updates.internal.cluster.x-k8s.io/move-machines-to-machineset"
44+
45+
// MachineSetReceiveMachinesFromMachineSetsAnnotation is an internal annotation added by the MD controller to the newMS
46+
// when it should receive replicas from oldMSs as a first step of an in-place upgrade operation
47+
// The annotation value is a comma separated list of oldMSs.
48+
// Note: This annotation is used in pair with MachineSetMoveMachinesToMachineSetAnnotation to perform a two-ways check before moving a machine from oldMS to newMS:
49+
//
50+
// "oldMS must have: move to newMS" and "newMS must have: receive replicas from oldMS"
51+
MachineSetReceiveMachinesFromMachineSetsAnnotation = "in-place-updates.internal.cluster.x-k8s.io/receive-machines-from-machinesets"
52+
53+
// AcknowledgedMoveAnnotation is an internal annotation with a list of machines added by the MD controller
54+
// to a MachineSet when it acknowledges a machine pending acknowledge after being moved from an oldMS.
55+
// The annotation value is a comma separated list of Machines already acknowledged; a machine is dropped
56+
// from this annotation as soon as pending-acknowledge-move is removed from the machine; the annotation is dropped when empty.
57+
// Note: This annotation is used in pair with PendingAcknowledgeMoveAnnotation on Machines.
58+
AcknowledgedMoveAnnotation = "in-place-updates.internal.cluster.x-k8s.io/acknowledged-move"
3659
)
3760

3861
// MachineSetSpec defines the desired state of MachineSet.

docs/book/src/reference/api/labels-and-annotations.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,29 @@
5858
| topology.cluster.x-k8s.io/dry-run | It is an annotation that gets set on objects by the topology controller only during a server side dry run apply operation. It is used for validating update webhooks for objects which get updated by template rotation (e.g. InfrastructureMachineTemplate). When the annotation is set and the admission request is a dry run, the webhook should deny validation due to immutability. By that the request will succeed (without any changes to the actual object because it is a dry run) and the topology controller will receive the resulting object. | Cluster API | Template rotation objects |
5959
| topology.cluster.x-k8s.io/hold-upgrade-sequence | It can be used to hold the entire MachineDeployment upgrade sequence. If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade for this MachineDeployment topology and all subsequent ones is deferred. | Cluster API | MachineDeployments in Cluster.topology |
6060
| topology.cluster.x-k8s.io/upgrade-concurrency | It can be used to configure the maximum concurrency while upgrading MachineDeployments of a classy Cluster. It is set as a top level annotation on the Cluster object. The value should be >= 1. If unspecified the upgrade concurrency will default to 1. | Cluster API | Clusters |
61-
| topology.internal.cluster.x-k8s.io/upgrade-step | This is an annotation used by CAPI internally to track upgrade steps. Name, meaning and semantic of the annotation can change anytime and it should not be used outside of CAPI controllers. | Cluster API | Clusters |
6261
| unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check | It can be used to disable the webhook check on update that disallows a pre-existing Cluster to be populated with Topology information and Class. | User | Clusters |
6362
| unsafe.topology.cluster.x-k8s.io/disable-update-version-check | It can be used to disable the webhook checks on update that disallows updating the .topology.spec.version on certain conditions. | User | Clusters |
63+
64+
65+
# Internal Annotations
66+
67+
Following annotation are used by CAPI internally.
68+
69+
<aside class="note warning">
70+
71+
<h1>Internal annotations should not be used outside CAPI controllers</h1>
72+
73+
Name, meaning and semantic of internal annotations can change anytime.
74+
75+
Users must not change or remove internal annotation on CAPI resources, because this can lead to issues or unexpected behaviour of the system.
76+
77+
</aside>
78+
79+
| Annotation | Note | Applies to |
80+
|------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|
81+
| in-place-updates.internal.cluster.x-k8s.io/acknowledge-move | This annotation is added by the MD controller to a MachineSet when it acknowledges a machine pending acknowledge after being moved from an oldMS | MachineSet |
82+
| in-place-updates.internal.cluster.x-k8s.io/move-machines-to-machineset | This annotation is added by the MD controller to the oldMS when it should scale down by moving machines that can be updated in-place to the newMS instead of deleting them. | MachineSet |
83+
| in-place-updates.internal.cluster.x-k8s.io/pending-acknowledge-move | This annotation is by the MS controller to a machine when being moved from the oldMS to the newMS | Machine |
84+
| in-place-updates.internal.cluster.x-k8s.io/receive-machines-from-machinesets | This annotation is added by the MD controller to the newMS when it should receive replicas from an oldMS | MachineSet |
85+
| in-place-updates.internal.cluster.x-k8s.io/update-in-progress | This annotation is added to machines by the controller owning the Machine when in-place update is started | Machine |
86+
| topology.internal.cluster.x-k8s.io/upgrade-step | This is an annotation used by the topology controller to a cluster to track upgrade steps. | Clusters |

internal/controllers/machinedeployment/machinedeployment_controller.go

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"sigs.k8s.io/cluster-api/controllers/external"
4444
"sigs.k8s.io/cluster-api/internal/util/ssa"
4545
"sigs.k8s.io/cluster-api/util"
46+
"sigs.k8s.io/cluster-api/util/collections"
4647
v1beta1conditions "sigs.k8s.io/cluster-api/util/conditions/deprecated/v1beta1"
4748
"sigs.k8s.io/cluster-api/util/finalizers"
4849
clog "sigs.k8s.io/cluster-api/util/log"
@@ -163,6 +164,17 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct
163164
cluster: cluster,
164165
}
165166

167+
// Get machines.
168+
selectorMap, err := metav1.LabelSelectorAsMap(&s.machineDeployment.Spec.Selector)
169+
if err != nil {
170+
return ctrl.Result{}, errors.Wrap(err, "failed to convert label selector to a map")
171+
}
172+
machineList := &clusterv1.MachineList{}
173+
if err := r.Client.List(ctx, machineList, client.InNamespace(s.machineDeployment.Namespace), client.MatchingLabels(selectorMap)); err != nil {
174+
return ctrl.Result{}, errors.Wrap(err, "failed to list Machines")
175+
}
176+
s.machines = collections.FromMachineList(machineList)
177+
166178
defer func() {
167179
if err := r.updateStatus(ctx, s); err != nil {
168180
reterr = kerrors.NewAggregate([]error{reterr, err})
@@ -195,6 +207,7 @@ type scope struct {
195207
machineDeployment *clusterv1.MachineDeployment
196208
cluster *clusterv1.Cluster
197209
machineSets []*clusterv1.MachineSet
210+
machines collections.Machines
198211
bootstrapTemplateNotFound bool
199212
bootstrapTemplateExists bool
200213
infrastructureTemplateNotFound bool
@@ -291,15 +304,15 @@ func (r *Reconciler) reconcile(ctx context.Context, s *scope) error {
291304
templateExists := s.infrastructureTemplateExists && (!md.Spec.Template.Spec.Bootstrap.ConfigRef.IsDefined() || s.bootstrapTemplateExists)
292305

293306
if ptr.Deref(md.Spec.Paused, false) {
294-
return r.sync(ctx, md, s.machineSets, templateExists)
307+
return r.sync(ctx, md, s.machineSets, s.machines, templateExists)
295308
}
296309

297310
if md.Spec.Rollout.Strategy.Type == clusterv1.RollingUpdateMachineDeploymentStrategyType {
298-
return r.rolloutRollingUpdate(ctx, md, s.machineSets, templateExists)
311+
return r.rolloutRollingUpdate(ctx, md, s.machineSets, s.machines, templateExists)
299312
}
300313

301314
if md.Spec.Rollout.Strategy.Type == clusterv1.OnDeleteMachineDeploymentStrategyType {
302-
return r.rolloutOnDelete(ctx, md, s.machineSets, templateExists)
315+
return r.rolloutOnDelete(ctx, md, s.machineSets, s.machines, templateExists)
303316
}
304317

305318
return errors.Errorf("unexpected deployment strategy type: %s", md.Spec.Rollout.Strategy.Type)
@@ -320,7 +333,6 @@ func (r *Reconciler) createOrUpdateMachineSetsAndSyncMachineDeploymentRevision(c
320333
log = log.WithValues("MachineSet", klog.KObj(ms))
321334
ctx = ctrl.LoggerInto(ctx, log)
322335

323-
originalReplicas := ptr.Deref(ms.Spec.Replicas, 0)
324336
if scaleIntent, ok := p.scaleIntents[ms.Name]; ok {
325337
ms.Spec.Replicas = &scaleIntent
326338
}
@@ -363,22 +375,30 @@ func (r *Reconciler) createOrUpdateMachineSetsAndSyncMachineDeploymentRevision(c
363375
if !ok {
364376
return errors.Errorf("failed to update MachineSet %s, original MS is missing", klog.KObj(ms))
365377
}
378+
originalReplicas := ptr.Deref(originalMS.Spec.Replicas, 0)
366379

367380
err := ssa.Patch(ctx, r.Client, machineDeploymentManagerName, ms, ssa.WithCachingProxy{Cache: r.ssaCache, Original: originalMS})
368381
if err != nil {
369382
r.recorder.Eventf(p.md, corev1.EventTypeWarning, "FailedUpdate", "Failed to update MachineSet %s: %v", klog.KObj(ms), err)
370383
return errors.Wrapf(err, "failed to update MachineSet %s", klog.KObj(ms))
371384
}
372385

386+
changes := getAnnotationChanges(originalMS, ms)
387+
373388
newReplicas := ptr.Deref(ms.Spec.Replicas, 0)
374389
if newReplicas < originalReplicas {
375-
log.Info(fmt.Sprintf("Scaled down MachineSet %s to %d replicas (-%d)", ms.Name, newReplicas, originalReplicas-newReplicas))
390+
changes = append(changes, "replicas", newReplicas)
391+
log.Info(fmt.Sprintf("Scaled down MachineSet %s to %d replicas (-%d)", ms.Name, newReplicas, originalReplicas-newReplicas), changes...)
376392
r.recorder.Eventf(p.md, corev1.EventTypeNormal, "SuccessfulScale", "Scaled down MachineSet %v: %d -> %d", ms.Name, originalReplicas, newReplicas)
377393
}
378394
if newReplicas > originalReplicas {
379-
log.Info(fmt.Sprintf("Scaled up MachineSet %s to %d replicas (+%d)", ms.Name, newReplicas, newReplicas-originalReplicas))
395+
changes = append(changes, "replicas", newReplicas)
396+
log.Info(fmt.Sprintf("Scaled up MachineSet %s to %d replicas (+%d)", ms.Name, newReplicas, newReplicas-originalReplicas), changes...)
380397
r.recorder.Eventf(p.md, corev1.EventTypeNormal, "SuccessfulScale", "Scaled up MachineSet %v: %d -> %d", ms.Name, originalReplicas, newReplicas)
381398
}
399+
if newReplicas == originalReplicas && len(changes) > 0 {
400+
log.Info(fmt.Sprintf("MachineSet %s updated", ms.Name), changes...)
401+
}
382402
}
383403

384404
// Surface the revision annotation on the MD level
@@ -392,6 +412,42 @@ func (r *Reconciler) createOrUpdateMachineSetsAndSyncMachineDeploymentRevision(c
392412
return nil
393413
}
394414

415+
func getAnnotationChanges(originalMS *clusterv1.MachineSet, ms *clusterv1.MachineSet) []any {
416+
changes := []any{}
417+
if originalMS.Annotations[clusterv1.MachineSetMoveMachinesToMachineSetAnnotation] != ms.Annotations[clusterv1.MachineSetMoveMachinesToMachineSetAnnotation] {
418+
if value, ok := ms.Annotations[clusterv1.MachineSetMoveMachinesToMachineSetAnnotation]; ok {
419+
changes = append(changes, clusterv1.MachineSetMoveMachinesToMachineSetAnnotation, value)
420+
} else {
421+
changes = append(changes, clusterv1.MachineSetMoveMachinesToMachineSetAnnotation, "(annotation removed)")
422+
}
423+
}
424+
425+
if originalMS.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation] != ms.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation] {
426+
if value, ok := ms.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation]; ok {
427+
changes = append(changes, clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation, value)
428+
} else {
429+
changes = append(changes, clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation, "(annotation removed)")
430+
}
431+
}
432+
433+
if originalMS.Annotations[clusterv1.AcknowledgedMoveAnnotation] != ms.Annotations[clusterv1.AcknowledgedMoveAnnotation] {
434+
if value, ok := ms.Annotations[clusterv1.AcknowledgedMoveAnnotation]; ok {
435+
changes = append(changes, clusterv1.AcknowledgedMoveAnnotation, value)
436+
} else {
437+
changes = append(changes, clusterv1.AcknowledgedMoveAnnotation, "(annotation removed)")
438+
}
439+
}
440+
441+
if originalMS.Annotations[clusterv1.DisableMachineCreateAnnotation] != ms.Annotations[clusterv1.DisableMachineCreateAnnotation] {
442+
if value, ok := ms.Annotations[clusterv1.DisableMachineCreateAnnotation]; ok {
443+
changes = append(changes, clusterv1.DisableMachineCreateAnnotation, value)
444+
} else {
445+
changes = append(changes, clusterv1.DisableMachineCreateAnnotation, "(annotation removed)")
446+
}
447+
}
448+
return changes
449+
}
450+
395451
func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) error {
396452
log := ctrl.LoggerFrom(ctx)
397453
if err := r.getAndAdoptMachineSetsForDeployment(ctx, s); err != nil {

internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,13 @@ import (
2727

2828
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
2929
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
30+
"sigs.k8s.io/cluster-api/util/collections"
3031
)
3132

3233
// rolloutOnDelete reconcile machine sets controlled by a MachineDeployment that is using the OnDelete strategy.
33-
func (r *Reconciler) rolloutOnDelete(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, templateExists bool) error {
34+
func (r *Reconciler) rolloutOnDelete(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, machines collections.Machines, templateExists bool) error {
3435
planner := newRolloutPlanner()
35-
if err := planner.init(ctx, md, msList, nil, true, templateExists); err != nil {
36+
if err := planner.init(ctx, md, msList, machines.UnsortedList(), true, templateExists); err != nil {
3637
return err
3738
}
3839

0 commit comments

Comments
 (0)