Skip to content

Commit 833cdb5

Browse files
committed
Sync VSphereMachines in VMG controller
- Sync VSphereMachines during day-2 operations in VMG controller - Only wait for all intended VSphereMachines during initial Cluster creation - Use annotations in VMG for per-md-zone info Signed-off-by: Gong Zhang <gong.zhang@broadcom.com>
1 parent 8474942 commit 833cdb5

File tree

4 files changed

+648
-177
lines changed

4 files changed

+648
-177
lines changed

controllers/vmware/virtualmachinegroup_controller.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121

2222
vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2"
2323
apitypes "k8s.io/apimachinery/pkg/types"
24+
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
2425
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
2526
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
2627
"sigs.k8s.io/cluster-api/util/predicates"
@@ -70,11 +71,16 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa
7071
&clusterv1.Cluster{},
7172
handler.EnqueueRequestsFromMapFunc(reconciler.ClusterToVirtualMachineGroup),
7273
).
74+
Watches(
75+
&vmwarev1.VSphereMachine{},
76+
handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToVirtualMachineGroup),
77+
).
7378
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue))
7479

7580
return builder.Complete(reconciler)
7681
}
7782

83+
// ClusterToVirtualMachineGroup maps Cluster events to VirtualMachineGroup reconcile requests.
7884
func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request {
7985
cluster, ok := a.(*clusterv1.Cluster)
8086
if !ok {
@@ -89,3 +95,36 @@ func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.
8995
},
9096
}}
9197
}
98+
99+
// VsphereMachineToVirtualMachineGroup maps VSphereMachine events to VirtualMachineGroup reconcile requests.
100+
// This handler only processes VSphereMachine objects for Day-2 operations, ensuring VSphereMachine state stays
101+
// in sync with its owning VMG. If no corresponding VMG is found, this is a no-op.
102+
103+
func (r VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request {
104+
vSphereMachine, ok := a.(*vmwarev1.VSphereMachine)
105+
if !ok {
106+
return nil
107+
}
108+
109+
clusterName, ok := vSphereMachine.Labels[clusterv1.ClusterNameLabel]
110+
if !ok || clusterName == "" {
111+
return nil
112+
}
113+
114+
vmg := &vmoprv1.VirtualMachineGroup{}
115+
err := r.Client.Get(ctx, apitypes.NamespacedName{
116+
Namespace: vSphereMachine.Namespace,
117+
Name: clusterName,
118+
}, vmg)
119+
120+
if err != nil {
121+
return nil
122+
}
123+
124+
return []reconcile.Request{{
125+
NamespacedName: apitypes.NamespacedName{
126+
Namespace: vmg.Namespace,
127+
Name: vmg.Name,
128+
},
129+
}}
130+
}

controllers/vmware/virtualmachinegroup_reconciler.go

Lines changed: 50 additions & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -69,61 +69,6 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl.
6969
return reconcile.Result{}, nil
7070
}
7171

72-
vmg := &vmoprv1.VirtualMachineGroup{}
73-
key := &client.ObjectKey{
74-
Namespace: cluster.Namespace,
75-
Name: cluster.Name,
76-
}
77-
78-
if err := r.Client.Get(ctx, *key, vmg); err != nil {
79-
if !apierrors.IsNotFound(err) {
80-
log.Error(err, "failed to get VirtualMachineGroup")
81-
return ctrl.Result{}, err
82-
}
83-
vmg = &vmoprv1.VirtualMachineGroup{
84-
ObjectMeta: metav1.ObjectMeta{
85-
Name: key.Name,
86-
Namespace: key.Namespace,
87-
},
88-
}
89-
}
90-
91-
// // Proceed only if multiple zones are available.
92-
// // If there is only one zone(default), node automatic placement is unnecessary
93-
// // because all Machine Deployments will be scheduled into that single zone.
94-
// // The VSphereCluster resource discovers the underlying zones,
95-
// // which we treat as the source of truth.
96-
// vsphereClusterList := &vmwarev1.VSphereClusterList{}
97-
// labelKey := clusterv1.ClusterNameLabel
98-
// if err := r.Client.List(ctx, vsphereClusterList,
99-
// client.InNamespace(cluster.Namespace),
100-
// client.MatchingLabels(map[string]string{labelKey: cluster.Name}),
101-
// ); err != nil {
102-
// return reconcile.Result{}, fmt.Errorf("failed to list VSphereClusters in namespace %s: %w", cluster.Namespace, err)
103-
// }
104-
105-
// vsphereCluster := &vmwarev1.VSphereCluster{}
106-
// switch len(vsphereClusterList.Items) {
107-
// case 0:
108-
// return reconcile.Result{}, fmt.Errorf("no VSphereCluster found with label %s=%s in namespace %s", labelKey, cluster.Name, cluster.Namespace)
109-
// case 1:
110-
// vsphereCluster = &vsphereClusterList.Items[0]
111-
// default:
112-
// return reconcile.Result{}, fmt.Errorf("found %d VSphereClusters with label %s=%s in namespace %s; expected exactly 1", len(vsphereClusterList.Items), labelKey, cluster.Name, cluster.Namespace)
113-
// }
114-
115-
// // Fetch the VSphereCluster instance.
116-
// if vsphereCluster.Status.Ready != true {
117-
// log.Info("Waiting for VSphereCluster to be ready with failure domain discovered")
118-
// return reconcile.Result{RequeueAfter: reconciliationDelay}, nil
119-
120-
// }
121-
122-
// if len(vsphereCluster.Status.FailureDomains) <= 1 {
123-
// log.Info("Single or no zone detected; skipping node automatic placement")
124-
// return reconcile.Result{}, nil
125-
// }
126-
12772
// If ControlPlane haven't initialized, requeue it since VSphereMachines of MachineDeployment will only be created after
12873
// ControlPlane is initialized.
12974
if !conditions.IsTrue(cluster, clusterv1.ClusterControlPlaneInitializedCondition) {
@@ -132,69 +77,59 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl.
13277
}
13378

13479
// Continue with the main logic.
135-
return r.createOrUpdateVMG(ctx, cluster, vmg)
80+
return r.createOrUpdateVMG(ctx, cluster)
13681

13782
}
13883

13984
// createOrUpdateVMG Create or Update VirtualMachineGroup
140-
func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, cluster *clusterv1.Cluster, desiredVMG *vmoprv1.VirtualMachineGroup) (_ reconcile.Result, reterr error) {
85+
func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) {
14186
log := ctrl.LoggerFrom(ctx)
14287

143-
// Calculate expected Machines of all MachineDeployments.
144-
expectedMachines := getExpectedMachines(cluster)
145-
if expectedMachines == 0 {
146-
log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario")
147-
return reconcile.Result{}, nil
148-
}
149-
15088
// Calculate current Machines of all MachineDeployments.
151-
currentVSphereMachines, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name)
89+
current, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name)
15290
if err != nil {
15391
return reconcile.Result{}, errors.Wrapf(err, "failed to get current VSphereMachine of cluster %s/%s",
15492
cluster.Name, cluster.Namespace)
15593
}
15694

157-
// Wait until all VSphereMachines are create, this could happen during initial deployment or day-2 like cluster update.
158-
current := int32(len(currentVSphereMachines))
159-
if current < expectedMachines {
160-
// Only check timeout if VMG doesn't exist.
161-
// if desiredVMG.CreationTimestamp.IsZero() {
162-
// if _, err := r.isMDDefined(ctx, cluster); err != nil {
163-
// log.Error(err, "cluster MachineDeployments are not defined")
164-
// return reconcile.Result{}, nil
165-
// }
166-
167-
// mdList := &clusterv1.MachineDeploymentList{}
168-
// if err := r.Client.List(ctx, mdList,
169-
// client.InNamespace(cluster.Namespace),
170-
// client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name},
171-
// ); err != nil {
172-
// return reconcile.Result{}, errors.Errorf("failed to list MachineDeployments: %w", err)
173-
// }
174-
175-
// // If no deployments exist, report error
176-
// if len(mdList.Items) == 0 {
177-
// return reconcile.Result{}, errors.Errorf("no MachineDeployments found for cluster %s/%s", cluster.Namespace, cluster.Name)
178-
// }
179-
180-
// // Check one MachineDeployment's creation timestamp
181-
// firstMD := mdList.Items[0]
182-
// if time.Since(firstMD.CreationTimestamp.Time) > 1*time.Minute {
183-
// log.Error(errors.New("timeout waiting for VSphereMachines"), "1 minute timeout after MachineDeployment creation",
184-
// "MachineDeployment", firstMD.Name, "Cluster", cluster.Namespace+"/"+cluster.Name)
185-
186-
// return reconcile.Result{}, nil
187-
// }
188-
// }
189-
190-
log.Info("current VSphereMachines do not match expected", "Expected:", expectedMachines,
191-
"Current:", current, "ClusterName", cluster.Name, "Namespace", cluster.Namespace)
192-
return reconcile.Result{RequeueAfter: reconciliationDelay}, nil
95+
desiredVMG := &vmoprv1.VirtualMachineGroup{}
96+
key := &client.ObjectKey{
97+
Namespace: cluster.Namespace,
98+
Name: cluster.Name,
99+
}
100+
101+
if err := r.Client.Get(ctx, *key, desiredVMG); err != nil {
102+
if !apierrors.IsNotFound(err) {
103+
log.Error(err, "failed to get VirtualMachineGroup")
104+
return ctrl.Result{}, err
105+
}
106+
107+
// Calculate expected Machines of all MachineDeployments.
108+
expected := getExpectedVSphereMachines(cluster)
109+
if expected == 0 {
110+
log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario")
111+
return reconcile.Result{}, nil
112+
}
113+
114+
// Wait for all intended VSphereMachines corresponding to MachineDeployment to exist only during initial Cluster creation.
115+
current := int32(len(current))
116+
if current < expected {
117+
log.Info("current VSphereMachines do not match expected", "Expected:", expected,
118+
"Current:", current, "ClusterName", cluster.Name, "Namespace", cluster.Namespace)
119+
return reconcile.Result{RequeueAfter: reconciliationDelay}, nil
120+
}
121+
122+
desiredVMG = &vmoprv1.VirtualMachineGroup{
123+
ObjectMeta: metav1.ObjectMeta{
124+
Name: key.Name,
125+
Namespace: key.Namespace,
126+
},
127+
}
193128
}
194129

195130
// Generate VM names according to the naming strategy set on the VSphereMachine.
196-
vmNames := make([]string, 0, len(currentVSphereMachines))
197-
for _, machine := range currentVSphereMachines {
131+
vmNames := make([]string, 0, len(current))
132+
for _, machine := range current {
198133
name, err := GenerateVirtualMachineName(machine.Name, machine.Spec.NamingStrategy)
199134
if err != nil {
200135
return reconcile.Result{}, err
@@ -206,7 +141,7 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c
206141
return vmNames[i] < vmNames[j]
207142
})
208143

209-
members := make([]vmoprv1.GroupMember, 0, len(currentVSphereMachines))
144+
members := make([]vmoprv1.GroupMember, 0, len(current))
210145
for _, name := range vmNames {
211146
members = append(members, vmoprv1.GroupMember{
212147
Name: name,
@@ -242,7 +177,7 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c
242177
// Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done
243178
// Do not update per-md-zone label once set, as placement decision should not change without user explicitly
244179
// ask.
245-
placementDecisionLabels, err := GenerateVMGPlacementLabels(ctx, desiredVMG, mdNames)
180+
placementDecisionLabels, err := GenerateVMGPlacementAnnotations(ctx, desiredVMG, mdNames)
246181
if err != nil {
247182
return err
248183
}
@@ -318,9 +253,9 @@ func (r *VirtualMachineGroupReconciler) isExplicitPlacement(cluster *clusterv1.C
318253
return false, nil
319254
}
320255

321-
// getExpectedMachines returns the total number of replicas across all
256+
// getExpectedVSphereMachines returns the total number of replicas across all
322257
// MachineDeployments in the Cluster's Topology.Workers.
323-
func getExpectedMachines(cluster *clusterv1.Cluster) int32 {
258+
func getExpectedVSphereMachines(cluster *clusterv1.Cluster) int32 {
324259
if !cluster.Spec.Topology.IsDefined() {
325260
return 0
326261
}
@@ -337,66 +272,6 @@ func getExpectedMachines(cluster *clusterv1.Cluster) int32 {
337272
func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) {
338273
log := ctrl.LoggerFrom(ctx)
339274

340-
// // List MachineDeployments for the cluster.
341-
// var mdList clusterv1.MachineDeploymentList
342-
// if err := kubeClient.List(ctx, &mdList,
343-
// client.InNamespace(clusterNamespace),
344-
// client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName},
345-
// ); err != nil {
346-
// return nil, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s", clusterNamespace, clusterName)
347-
// }
348-
// validMDs := make(map[string]struct{})
349-
// for _, md := range mdList.Items {
350-
// validMDs[md.Name] = struct{}{}
351-
// }
352-
// log.V(6).Info("Identified active MachineDeployments", "count", len(validMDs))
353-
354-
// // List MachineSets and filter those owned by a valid MachineDeployment.
355-
// var msList clusterv1.MachineSetList
356-
// if err := kubeClient.List(ctx, &msList,
357-
// client.InNamespace(clusterNamespace),
358-
// client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName},
359-
// ); err != nil {
360-
// return nil, errors.Wrapf(err, "failed to list MachineSets for cluster %s/%s", clusterNamespace, clusterName)
361-
// }
362-
// validMS := make(map[string]struct{})
363-
// for _, ms := range msList.Items {
364-
// for _, owner := range ms.OwnerReferences {
365-
// if owner.Kind == "MachineDeployment" && owner.APIVersion == clusterv1.GroupVersion.String() {
366-
// if _, ok := validMDs[owner.Name]; ok {
367-
// validMS[ms.Name] = struct{}{}
368-
// break
369-
// }
370-
// }
371-
// }
372-
// }
373-
// log.V(6).Info("Filtered MachineSets owned by valid MachineDeployments", "count", len(validMS))
374-
375-
// // List Machines and filter those owned by valid MachineSets (skip control plane).
376-
// var machineList clusterv1.MachineList
377-
// if err := kubeClient.List(ctx, &machineList,
378-
// client.InNamespace(clusterNamespace),
379-
// client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName},
380-
// ); err != nil {
381-
// return nil, errors.Wrapf(err, "failed to list Machines for cluster %s/%s", clusterNamespace, clusterName)
382-
// }
383-
384-
// workerMachines := make(map[string]struct{})
385-
// for _, m := range machineList.Items {
386-
// if _, isControlPlane := m.Labels[clusterv1.MachineControlPlaneLabel]; isControlPlane {
387-
// continue
388-
// }
389-
// for _, owner := range m.OwnerReferences {
390-
// if owner.Kind == "MachineSet" && owner.APIVersion == clusterv1.GroupVersion.String() {
391-
// if _, ok := validMS[owner.Name]; ok {
392-
// workerMachines[m.Name] = struct{}{}
393-
// break
394-
// }
395-
// }
396-
// }
397-
// }
398-
// log.V(5).Info("Identified worker Machines linked to MachineSets", "count", len(workerMachines))
399-
400275
// List VSphereMachine objects
401276
var vsMachineList vmwarev1.VSphereMachineList
402277
if err := kubeClient.List(ctx, &vsMachineList,
@@ -418,10 +293,10 @@ func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, cl
418293
return result, nil
419294
}
420295

421-
// GenerateVMGPlacementLabels returns labels per MachineDeployment which contain zone info for placed VMs for day-2 operations.
422-
func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) {
296+
// GenerateVMGPlacementAnnotations returns annotations per MachineDeployment which contains zone info for placed VMs for day-2 operations.
297+
func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) {
423298
log := ctrl.LoggerFrom(ctx)
424-
labels := make(map[string]string)
299+
annotations := make(map[string]string)
425300

426301
// For each member in status
427302
for _, member := range vmg.Status.Members {
@@ -436,11 +311,11 @@ func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachine
436311
}
437312

438313
// Check if this VM belongs to any of our target Machine Deployments
439-
// Use machine deployment name as the label key.
314+
// Use machine deployment name as the annotation key prefix.
440315
for _, md := range machineDeployments {
441316
// Check if we already found placement for this Machine Deployments
442-
if _, found := labels[md]; found {
443-
log.Info(fmt.Sprintf("Skipping Machine Deployment %s, placement already found", md))
317+
if _, found := annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)]; found {
318+
log.Info(fmt.Sprintf("Skipping Machine Deployment %s, placement already found in annotations", md))
444319
continue
445320
}
446321

@@ -462,12 +337,12 @@ func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachine
462337
}
463338

464339
log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, vmg.Namespace, vmg.Name, zone))
465-
labels[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)] = zone
340+
annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)] = zone
466341
}
467342
}
468343
}
469344

470-
return labels, nil
345+
return annotations, nil
471346
}
472347

473348
// TODO: de-dup this logic with vmopmachine.go

0 commit comments

Comments
 (0)