Skip to content

Commit 6c59755

Browse files
authored
Merge pull request #12831 from alexander-demicev/machinecontroller
✨ Add in-place updates support for machine controller
2 parents 68b0e0c + c315bcf commit 6c59755

File tree

7 files changed

+967
-7
lines changed

7 files changed

+967
-7
lines changed

api/core/v1beta2/machine_types.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,24 @@ const (
164164
MachineNotUpToDateReason = "NotUpToDate"
165165
)
166166

167+
// Machine's Updating condition and corresponding reasons.
168+
// Note: Updating condition is set by the Machine controller during in-place updates.
169+
const (
170+
// MachineUpdatingCondition is true while an in-place update is in progress on the Machine.
171+
// The condition is owned by the Machine controller and is used to track the progress of in-place updates.
172+
// This condition is considered when computing the UpToDate condition.
173+
MachineUpdatingCondition = "Updating"
174+
175+
// MachineNotUpdatingReason surfaces when the Machine is not performing an in-place update.
176+
MachineNotUpdatingReason = "NotUpdating"
177+
178+
// MachineInPlaceUpdatingReason surfaces when the Machine is waiting for in-place update to complete.
179+
MachineInPlaceUpdatingReason = "InPlaceUpdating"
180+
181+
// MachineInPlaceUpdateFailedReason surfaces when the in-place update has failed.
182+
MachineInPlaceUpdateFailedReason = "InPlaceUpdateFailed"
183+
)
184+
167185
// Machine's BootstrapConfigReady condition and corresponding reasons.
168186
// Note: when possible, BootstrapConfigReady condition will use reasons surfaced from the underlying bootstrap config object.
169187
const (

controllers/alias.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,10 @@ func (r *ClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag
7272

7373
// MachineReconciler reconciles a Machine object.
7474
type MachineReconciler struct {
75-
Client client.Client
76-
APIReader client.Reader
77-
ClusterCache clustercache.ClusterCache
75+
Client client.Client
76+
APIReader client.Reader
77+
ClusterCache clustercache.ClusterCache
78+
RuntimeClient runtimeclient.Client
7879

7980
// WatchFilterValue is the label value used to filter events prior to reconciliation.
8081
WatchFilterValue string
@@ -90,6 +91,7 @@ func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag
9091
Client: r.Client,
9192
APIReader: r.APIReader,
9293
ClusterCache: r.ClusterCache,
94+
RuntimeClient: r.RuntimeClient,
9395
WatchFilterValue: r.WatchFilterValue,
9496
RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod,
9597
AdditionalSyncMachineLabels: r.AdditionalSyncMachineLabels,

internal/controllers/machine/machine_controller.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ import (
5252
"sigs.k8s.io/cluster-api/controllers/clustercache"
5353
"sigs.k8s.io/cluster-api/controllers/external"
5454
"sigs.k8s.io/cluster-api/controllers/noderefutil"
55+
runtimeclient "sigs.k8s.io/cluster-api/exp/runtime/client"
5556
"sigs.k8s.io/cluster-api/feature"
5657
"sigs.k8s.io/cluster-api/internal/contract"
5758
"sigs.k8s.io/cluster-api/internal/controllers/machine/drain"
@@ -93,9 +94,10 @@ var (
9394

9495
// Reconciler reconciles a Machine object.
9596
type Reconciler struct {
96-
Client client.Client
97-
APIReader client.Reader
98-
ClusterCache clustercache.ClusterCache
97+
Client client.Client
98+
APIReader client.Reader
99+
ClusterCache clustercache.ClusterCache
100+
RuntimeClient runtimeclient.Client
99101

100102
// WatchFilterValue is the label value used to filter events prior to reconciliation.
101103
WatchFilterValue string
@@ -129,6 +131,9 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
129131
// to have some buffer.
130132
return errors.New("Client, APIReader and ClusterCache must not be nil and RemoteConditionsGracePeriod must not be < 2m")
131133
}
134+
if feature.Gates.Enabled(feature.InPlaceUpdates) && r.RuntimeClient == nil {
135+
return errors.New("RuntimeClient must not be nil when InPlaceUpdates feature gate is enabled")
136+
}
132137

133138
r.predicateLog = ptr.To(ctrl.LoggerFrom(ctx).WithValues("controller", "machine"))
134139
clusterToMachines, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme())
@@ -282,7 +287,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
282287
}
283288

284289
// Handle normal reconciliation loop.
285-
return doReconcile(ctx, alwaysReconcile, s)
290+
reconcileNormal := append(
291+
alwaysReconcile,
292+
r.reconcileInPlaceUpdate,
293+
)
294+
295+
return doReconcile(ctx, reconcileNormal, s)
286296
}
287297

288298
func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clusterv1.Machine, options ...patch.Option) error {
@@ -326,6 +336,7 @@ func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clust
326336
clusterv1.MachineNodeReadyCondition,
327337
clusterv1.MachineNodeHealthyCondition,
328338
clusterv1.MachineDeletingCondition,
339+
clusterv1.MachineUpdatingCondition,
329340
}},
330341
)
331342

@@ -397,6 +408,12 @@ type scope struct {
397408

398409
// deletingMessage is the message that should be used when setting the Deleting condition.
399410
deletingMessage string
411+
412+
// updatingReason is the reason that should be used when setting the Updating condition.
413+
updatingReason string
414+
415+
// updatingMessage is the message that should be used when setting the Updating condition.
416+
updatingMessage string
400417
}
401418

402419
func (r *Reconciler) reconcileMachineOwnerAndLabels(_ context.Context, s *scope) (ctrl.Result, error) {
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package machine
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"time"
23+
24+
"github.com/pkg/errors"
25+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
27+
"k8s.io/apimachinery/pkg/runtime"
28+
"k8s.io/klog/v2"
29+
"k8s.io/utils/ptr"
30+
ctrl "sigs.k8s.io/controller-runtime"
31+
"sigs.k8s.io/controller-runtime/pkg/client"
32+
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"
33+
34+
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
35+
runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1"
36+
"sigs.k8s.io/cluster-api/feature"
37+
"sigs.k8s.io/cluster-api/internal/hooks"
38+
)
39+
40+
// reconcileInPlaceUpdate handles the in-place update workflow for a Machine.
41+
func (r *Reconciler) reconcileInPlaceUpdate(ctx context.Context, s *scope) (ctrl.Result, error) {
42+
if !feature.Gates.Enabled(feature.InPlaceUpdates) {
43+
return ctrl.Result{}, nil
44+
}
45+
46+
log := ctrl.LoggerFrom(ctx)
47+
48+
machineAnnotations := s.machine.GetAnnotations()
49+
_, inPlaceUpdateInProgress := machineAnnotations[clusterv1.UpdateInProgressAnnotation]
50+
hasUpdateMachinePending := hooks.IsPending(runtimehooksv1.UpdateMachine, s.machine)
51+
52+
if !inPlaceUpdateInProgress {
53+
// Clean up any orphaned pending hooks and annotations before exiting.
54+
// This can happen if the in-place update annotation was removed from Machine
55+
// but the UpdateMachine hook is still pending or annotations are still on InfraMachine/BootstrapConfig.
56+
if hasUpdateMachinePending {
57+
log.Info("In-place update annotation removed but UpdateMachine hook still pending, cleaning up orphaned hook and annotations")
58+
if err := r.completeInPlaceUpdate(ctx, s); err != nil {
59+
return ctrl.Result{}, errors.Wrap(err, "failed to clean up orphaned UpdateMachine hook and annotations")
60+
}
61+
}
62+
63+
return ctrl.Result{}, nil
64+
}
65+
66+
// If hook is not pending, we're waiting for the owner controller to mark it as pending.
67+
if !hasUpdateMachinePending {
68+
log.Info("In-place update annotation is set, waiting for UpdateMachine hook to be marked as pending")
69+
return ctrl.Result{}, nil
70+
}
71+
72+
if !ptr.Deref(s.machine.Status.Initialization.InfrastructureProvisioned, false) {
73+
log.V(5).Info("Infrastructure not yet provisioned, skipping in-place update")
74+
return ctrl.Result{}, nil
75+
}
76+
if !ptr.Deref(s.machine.Status.Initialization.BootstrapDataSecretCreated, false) {
77+
log.V(5).Info("Bootstrap data secret not yet created, skipping in-place update")
78+
return ctrl.Result{}, nil
79+
}
80+
81+
if s.infraMachine == nil {
82+
s.updatingReason = clusterv1.MachineInPlaceUpdateFailedReason
83+
s.updatingMessage = "In-place update not possible: InfraMachine not found"
84+
return ctrl.Result{}, errors.New("in-place update failed: InfraMachine not found")
85+
}
86+
87+
infraReady := r.isInfraMachineReadyForUpdate(s)
88+
bootstrapReady := r.isBootstrapConfigReadyForUpdate(s)
89+
90+
if !infraReady || !bootstrapReady {
91+
log.Info("Waiting for InfraMachine and BootstrapConfig to be marked for in-place update")
92+
return ctrl.Result{}, nil
93+
}
94+
95+
result, message, err := r.callUpdateMachineHook(ctx, s)
96+
if err != nil {
97+
s.updatingReason = clusterv1.MachineInPlaceUpdateFailedReason
98+
s.updatingMessage = "UpdateMachine hook failed: please check controller logs for errors"
99+
return ctrl.Result{}, errors.Wrap(err, "in-place update failed")
100+
}
101+
102+
if result.RequeueAfter > 0 {
103+
s.updatingReason = clusterv1.MachineInPlaceUpdatingReason
104+
if message != "" {
105+
s.updatingMessage = fmt.Sprintf("In-place update in progress: %s", message)
106+
} else {
107+
s.updatingMessage = "In-place update in progress"
108+
}
109+
return result, nil
110+
}
111+
112+
log.Info("In-place update completed successfully")
113+
if err := r.completeInPlaceUpdate(ctx, s); err != nil {
114+
return ctrl.Result{}, errors.Wrap(err, "failed to complete in-place update")
115+
}
116+
117+
return ctrl.Result{}, nil
118+
}
119+
120+
// isInfraMachineReadyForUpdate checks if the InfraMachine has the in-place update annotation.
121+
func (r *Reconciler) isInfraMachineReadyForUpdate(s *scope) bool {
122+
_, hasAnnotation := s.infraMachine.GetAnnotations()[clusterv1.UpdateInProgressAnnotation]
123+
return hasAnnotation
124+
}
125+
126+
// isBootstrapConfigReadyForUpdate checks if the BootstrapConfig has the in-place update annotation.
127+
func (r *Reconciler) isBootstrapConfigReadyForUpdate(s *scope) bool {
128+
if s.bootstrapConfig == nil {
129+
return true
130+
}
131+
_, hasAnnotation := s.bootstrapConfig.GetAnnotations()[clusterv1.UpdateInProgressAnnotation]
132+
return hasAnnotation
133+
}
134+
135+
// callUpdateMachineHook calls the UpdateMachine runtime hook for the machine.
136+
func (r *Reconciler) callUpdateMachineHook(ctx context.Context, s *scope) (ctrl.Result, string, error) {
137+
log := ctrl.LoggerFrom(ctx)
138+
139+
// Validate that exactly one extension is registered for the UpdateMachine hook.
140+
// For the current iteration, we only support a single extension to ensure safe behavior.
141+
// Support for multiple extensions will be introduced in a future iteration.
142+
extensions, err := r.RuntimeClient.GetAllExtensions(ctx, runtimehooksv1.UpdateMachine, s.machine)
143+
if err != nil {
144+
return ctrl.Result{}, "", err
145+
}
146+
147+
if len(extensions) == 0 {
148+
return ctrl.Result{}, "", errors.New("no extensions registered for UpdateMachine hook")
149+
}
150+
151+
if len(extensions) > 1 {
152+
return ctrl.Result{}, "", errors.Errorf("multiple extensions registered for UpdateMachine hook: only one extension is supported, found %d extensions: %v", len(extensions), extensions)
153+
}
154+
155+
// Note: When building request message, dropping status; Runtime extension should treat UpdateMachine
156+
// requests as desired state; it is up to them to compare with current state and perform necessary actions.
157+
request := &runtimehooksv1.UpdateMachineRequest{
158+
Desired: runtimehooksv1.UpdateMachineRequestObjects{
159+
Machine: *cleanupMachine(s.machine),
160+
InfrastructureMachine: runtime.RawExtension{Object: cleanupUnstructured(s.infraMachine)},
161+
},
162+
}
163+
164+
if s.bootstrapConfig != nil {
165+
request.Desired.BootstrapConfig = runtime.RawExtension{Object: cleanupUnstructured(s.bootstrapConfig)}
166+
}
167+
168+
response := &runtimehooksv1.UpdateMachineResponse{}
169+
170+
if err := r.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.UpdateMachine, s.machine, request, response); err != nil {
171+
return ctrl.Result{}, "", err
172+
}
173+
174+
if response.GetRetryAfterSeconds() != 0 {
175+
log.Info(fmt.Sprintf("UpdateMachine hook requested retry after %d seconds", response.GetRetryAfterSeconds()))
176+
return ctrl.Result{RequeueAfter: time.Duration(response.GetRetryAfterSeconds()) * time.Second}, response.GetMessage(), nil
177+
}
178+
179+
log.Info("UpdateMachine hook completed successfully")
180+
return ctrl.Result{}, response.GetMessage(), nil
181+
}
182+
183+
// completeInPlaceUpdate removes in-place update annotations from InfraMachine, BootstrapConfig, Machine,
184+
// and then marks the UpdateMachine hook as done (removes it from pending-hooks annotation).
185+
func (r *Reconciler) completeInPlaceUpdate(ctx context.Context, s *scope) error {
186+
log := ctrl.LoggerFrom(ctx)
187+
188+
if err := r.removeInPlaceUpdateAnnotation(ctx, s.machine); err != nil {
189+
return err
190+
}
191+
192+
if s.infraMachine == nil {
193+
log.Info("InfraMachine not found during in-place update completion, skipping annotation removal")
194+
} else {
195+
if err := r.removeInPlaceUpdateAnnotation(ctx, s.infraMachine); err != nil {
196+
return err
197+
}
198+
}
199+
200+
if s.bootstrapConfig != nil {
201+
if err := r.removeInPlaceUpdateAnnotation(ctx, s.bootstrapConfig); err != nil {
202+
return err
203+
}
204+
}
205+
206+
if err := hooks.MarkAsDone(ctx, r.Client, s.machine, runtimehooksv1.UpdateMachine); err != nil {
207+
return err
208+
}
209+
210+
log.Info("In place upgrade completed!")
211+
return nil
212+
}
213+
214+
// removeInPlaceUpdateAnnotation removes the in-place update annotation from an object and patches it immediately.
215+
func (r *Reconciler) removeInPlaceUpdateAnnotation(ctx context.Context, obj client.Object) error {
216+
annotations := obj.GetAnnotations()
217+
if _, exists := annotations[clusterv1.UpdateInProgressAnnotation]; !exists {
218+
return nil
219+
}
220+
221+
gvk, err := apiutil.GVKForObject(obj, r.Client.Scheme())
222+
if err != nil {
223+
return errors.Wrapf(err, "failed to remove %s annotation from object %s", clusterv1.UpdateInProgressAnnotation, klog.KObj(obj))
224+
}
225+
226+
orig := obj.DeepCopyObject().(client.Object)
227+
delete(annotations, clusterv1.UpdateInProgressAnnotation)
228+
obj.SetAnnotations(annotations)
229+
230+
if err := r.Client.Patch(ctx, obj, client.MergeFrom(orig)); err != nil {
231+
return errors.Wrapf(err, "failed to remove %s annotation from %s %s", clusterv1.UpdateInProgressAnnotation, gvk.Kind, klog.KObj(obj))
232+
}
233+
234+
return nil
235+
}
236+
237+
func cleanupMachine(machine *clusterv1.Machine) *clusterv1.Machine {
238+
return &clusterv1.Machine{
239+
// Set GVK because object is later marshalled with json.Marshal when the hook request is sent.
240+
TypeMeta: metav1.TypeMeta{
241+
APIVersion: clusterv1.GroupVersion.String(),
242+
Kind: "Machine",
243+
},
244+
ObjectMeta: metav1.ObjectMeta{
245+
Name: machine.Name,
246+
Namespace: machine.Namespace,
247+
Labels: machine.Labels,
248+
Annotations: machine.Annotations,
249+
},
250+
Spec: *machine.Spec.DeepCopy(),
251+
}
252+
}
253+
254+
func cleanupUnstructured(u *unstructured.Unstructured) *unstructured.Unstructured {
255+
cleanedUpU := &unstructured.Unstructured{
256+
Object: map[string]interface{}{
257+
"apiVersion": u.GetAPIVersion(),
258+
"kind": u.GetKind(),
259+
"spec": u.Object["spec"],
260+
},
261+
}
262+
cleanedUpU.SetName(u.GetName())
263+
cleanedUpU.SetNamespace(u.GetNamespace())
264+
cleanedUpU.SetLabels(u.GetLabels())
265+
cleanedUpU.SetAnnotations(u.GetAnnotations())
266+
return cleanedUpU
267+
}

0 commit comments

Comments
 (0)