From fa60556a5990a2510c68e9ef0153014960472ffc Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Fri, 26 Sep 2025 14:23:02 +0800 Subject: [PATCH 01/25] Bump VMOP and add NodeAutoPlacement Feature Gate - Bump VMOP including Node AF/AAF support - Add NodeAutoPlacement Feature Gate (cherry picked from commit 700c8aee46af17b6a2eae9bd4722300104c96629) --- config/manager/manager.yaml | 2 +- feature/feature.go | 6 ++++++ go.mod | 4 ++-- go.sum | 4 ++-- test/go.mod | 5 +++-- test/go.sum | 4 ++-- 6 files changed, 16 insertions(+), 9 deletions(-) diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 401dd765e5..102217c078 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -21,7 +21,7 @@ spec: - "--diagnostics-address=${CAPI_DIAGNOSTICS_ADDRESS:=:8443}" - "--insecure-diagnostics=${CAPI_INSECURE_DIAGNOSTICS:=false}" - --v=4 - - "--feature-gates=MultiNetworks=${EXP_MULTI_NETWORKS:=false},NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false},PriorityQueue=${EXP_PRIORITY_QUEUE:=false}" + - "--feature-gates=MultiNetworks=${EXP_MULTI_NETWORKS:=false},NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false},NodeAutoPlacement=${EXP_NODE_AUTO_PLACEMENT:=false},PriorityQueue=${EXP_PRIORITY_QUEUE:=false}" image: controller:latest imagePullPolicy: IfNotPresent name: manager diff --git a/feature/feature.go b/feature/feature.go index a233d351c7..1799aaeb68 100644 --- a/feature/feature.go +++ b/feature/feature.go @@ -44,6 +44,11 @@ const ( // alpha: v1.11 NamespaceScopedZones featuregate.Feature = "NamespaceScopedZones" + // NodeAutoPlacement is a feature gate for the NodeAutoPlacement functionality for supervisor. + // + // alpha: v1.15 + NodeAutoPlacement featuregate.Feature = "NodeAutoPlacement" + // PriorityQueue is a feature gate that controls if the controller uses the controller-runtime PriorityQueue // instead of the default queue implementation. // @@ -61,6 +66,7 @@ var defaultCAPVFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ // Every feature should be initiated here: NodeAntiAffinity: {Default: false, PreRelease: featuregate.Alpha}, NamespaceScopedZones: {Default: false, PreRelease: featuregate.Alpha}, + NodeAutoPlacement: {Default: false, PreRelease: featuregate.Alpha}, PriorityQueue: {Default: false, PreRelease: featuregate.Alpha}, MultiNetworks: {Default: false, PreRelease: featuregate.Alpha}, } diff --git a/go.mod b/go.mod index 44f52fa9ea..845cbf2a17 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.24.0 replace sigs.k8s.io/cluster-api => sigs.k8s.io/cluster-api v1.11.0-rc.0.0.20250905091528-eb4e38c46ff6 -replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v0.0.0-20240404200847-de75746a9505 +replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20250908141901-a9e1dfbc0045 // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.8.6 @@ -13,7 +13,7 @@ require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.8.6 + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 github.com/vmware/govmomi v0.52.0 ) diff --git a/go.sum b/go.sum index 47a16466b0..934a82e6ac 100644 --- a/go.sum +++ b/go.sum @@ -243,8 +243,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU= -github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 h1:zME8crazIAWVJGboJpSLl+qcRYQ8yA6hPQojz28gY5M= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045/go.mod h1:hkc/QZCSHcosWWMPS6VWWR12WenZcNE3BaTJ/8A8sNE= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/test/go.mod b/test/go.mod index bcab8743c0..4339ef147d 100644 --- a/test/go.mod +++ b/test/go.mod @@ -8,7 +8,7 @@ replace sigs.k8s.io/cluster-api/test => sigs.k8s.io/cluster-api/test v1.11.0-rc. replace sigs.k8s.io/cluster-api-provider-vsphere => ../ -replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v0.0.0-20240404200847-de75746a9505 +replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20250908141901-a9e1dfbc0045 // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-testsz replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.8.6 @@ -16,8 +16,9 @@ replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-op require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.8.6 + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 github.com/vmware/govmomi v0.52.0 + ) require ( diff --git a/test/go.sum b/test/go.sum index 8ac8dfd79b..0f616f4cb9 100644 --- a/test/go.sum +++ b/test/go.sum @@ -360,8 +360,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU= -github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 h1:zME8crazIAWVJGboJpSLl+qcRYQ8yA6hPQojz28gY5M= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045/go.mod h1:hkc/QZCSHcosWWMPS6VWWR12WenZcNE3BaTJ/8A8sNE= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= From b52056f4e508fc603cbb577cded6285fc478b8e4 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Thu, 18 Sep 2025 17:51:23 +0800 Subject: [PATCH 02/25] Add VMG controller of node auto placement (cherry picked from commit cfeb862ab03b32f8f71000feb2840e0da063eefe) --- config/rbac/role.yaml | 7 + .../vmware/virtualmachinegroup_controller.go | 91 ++++ .../vmware/virtualmachinegroup_reconciler.go | 465 ++++++++++++++++++ controllers/vspherecluster_reconciler.go | 1 + main.go | 10 + packaging/go.sum | 4 +- pkg/services/network/netop_provider.go | 2 +- pkg/services/network/nsxt_provider.go | 2 +- pkg/services/network/nsxt_vpc_provider.go | 6 +- .../vmoperator/control_plane_endpoint.go | 2 +- test/go.mod | 1 - 11 files changed, 582 insertions(+), 9 deletions(-) create mode 100644 controllers/vmware/virtualmachinegroup_controller.go create mode 100644 controllers/vmware/virtualmachinegroup_reconciler.go diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ff4613da71..c57a326fb9 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -249,6 +249,7 @@ rules: - apiGroups: - vmoperator.vmware.com resources: + - virtualmachinegroups - virtualmachineimages - virtualmachineimages/status - virtualmachines @@ -264,6 +265,12 @@ rules: - patch - update - watch +- apiGroups: + - vmoperator.vmware.com + resources: + - virtualmachinegroups/status + verbs: + - get - apiGroups: - vmware.com resources: diff --git a/controllers/vmware/virtualmachinegroup_controller.go b/controllers/vmware/virtualmachinegroup_controller.go new file mode 100644 index 0000000000..edfc5d0211 --- /dev/null +++ b/controllers/vmware/virtualmachinegroup_controller.go @@ -0,0 +1,91 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vmware + +import ( + "context" + + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + apitypes "k8s.io/apimachinery/pkg/types" + capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/util/predicates" + ctrl "sigs.k8s.io/controller-runtime" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get +// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vsphereclusters,verbs=get;list;watch +// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch + +// AddVirtualMachineGroupControllerToManager adds the VirtualMachineGroup controller to the provided +// manager. +func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerManagerCtx *capvcontext.ControllerManagerContext, mgr manager.Manager, options controller.Options) error { + predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "virtualmachinegroup") + + reconciler := &VirtualMachineGroupReconciler{ + Client: controllerManagerCtx.Client, + Recorder: mgr.GetEventRecorderFor("virtualmachinegroup-controller"), + } + + // Predicate: only allow VMG with the cluster-name label + hasClusterNameLabel := predicate.NewPredicateFuncs(func(obj ctrlclient.Object) bool { + labels := obj.GetLabels() + if labels == nil { + return false + } + _, ok := labels[clusterv1.ClusterNameLabel] + return ok + }) + + builder := ctrl.NewControllerManagedBy(mgr). + For(&vmoprv1.VirtualMachineGroup{}). + WithOptions(options). + WithEventFilter(hasClusterNameLabel). + Watches( + &clusterv1.Cluster{}, + handler.EnqueueRequestsFromMapFunc(reconciler.ClusterToVirtualMachineGroup), + ). + WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue)) + + return builder.Complete(reconciler) +} + +func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { + cluster, ok := a.(*clusterv1.Cluster) + if !ok { + return nil + } + + // Always enqueue a request for the "would-be VMG" + return []reconcile.Request{{ + NamespacedName: apitypes.NamespacedName{ + Namespace: cluster.Namespace, + Name: cluster.Name, + }, + }} +} diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go new file mode 100644 index 0000000000..df5490bf05 --- /dev/null +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -0,0 +1,465 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package vmware contains the VirtualMachineGroup Reconciler. +package vmware + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/pkg/errors" + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "sigs.k8s.io/cluster-api/util/conditions" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +const ( + reconciliationDelay = 10 * time.Second +) + +// VirtualMachineGroupReconciler reconciles VirtualMachineGroup. +type VirtualMachineGroupReconciler struct { + Client client.Client + Recorder record.EventRecorder +} + +func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + + // Fetch the Cluster instance. + cluster := &clusterv1.Cluster{} + if err := r.Client.Get(ctx, req.NamespacedName, cluster); err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, nil + } + return reconcile.Result{}, err + } + + log = log.WithValues("Cluster", klog.KObj(cluster)) + // If Cluster is deleted, just return as VirtualMachineGroup will be GCed and no extral process needed. + if !cluster.DeletionTimestamp.IsZero() { + return reconcile.Result{}, nil + } + + vmg := &vmoprv1.VirtualMachineGroup{} + + key := &client.ObjectKey{ + Namespace: cluster.Namespace, + Name: cluster.Name, + } + + if err := r.Client.Get(ctx, *key, vmg); err != nil { + if !apierrors.IsNotFound(err) { + log.Error(err, "failed to get VirtualMachineGroup") + return ctrl.Result{}, err + } + // Define the VM Operator VirtualMachine resource to reconcile. + vmg = &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + } + } + + // If as least one MachineDeployment of Cluster is specified with failureDomain, then return. + // No need to handle Cluster using explicit placement. For VC 9.1, no mixed mode of explicit and automatic placement + // during initial deployment. + if vmg.CreationTimestamp.IsZero() { + explicitPlacement, err := r.isExplicitPlacement(cluster) + if err != nil { + return reconcile.Result{}, err + } + + if explicitPlacement { + log.Info("No need to create VirtualMachineGroup for Cluster using explicit placement.") + return reconcile.Result{}, nil + } + } + + // Proceed only if multiple zones are available. + // If there is only one zone(default), node automatic placement is unnecessary + // because all Machine Deployments will be scheduled into that single zone. + // The VSphereCluster resource discovers the underlying zones, + // which we treat as the source of truth. + vsphereClusterList := &vmwarev1.VSphereClusterList{} + labelKey := clusterv1.ClusterNameLabel + if err := r.Client.List(ctx, vsphereClusterList, + client.InNamespace(cluster.Namespace), + client.MatchingLabels(map[string]string{labelKey: cluster.Name}), + ); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to list VSphereClusters in namespace %s: %w", cluster.Namespace, err) + } + + vsphereCluster := &vmwarev1.VSphereCluster{} + switch len(vsphereClusterList.Items) { + case 0: + return reconcile.Result{}, fmt.Errorf("no VSphereCluster found with label %s=%s in namespace %s", labelKey, cluster.Name, cluster.Namespace) + case 1: + vsphereCluster = &vsphereClusterList.Items[0] + default: + return reconcile.Result{}, fmt.Errorf("found %d VSphereClusters with label %s=%s in namespace %s; expected exactly 1", len(vsphereClusterList.Items), labelKey, cluster.Name, cluster.Namespace) + } + + // Fetch the VSphereCluster instance. + if vsphereCluster.Status.Ready != true { + log.Info("Waiting for VSphereCluster to be ready with failure domain discovered") + return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + + } + + if len(vsphereCluster.Status.FailureDomains) <= 1 { + log.Info("Single or no zone detected; skipping node automatic placement") + return reconcile.Result{}, nil + } + + // If ControlPlane haven't initialized, requeue it since VSphereMachines of MachineDeployment will only be created after + // ControlPlane is initialized. + if !conditions.IsTrue(cluster, clusterv1.ClusterControlPlaneInitializedCondition) { + log.Info("Waiting for Cluster ControlPlaneInitialized") + return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + } + + // Continue with the main logic. + return r.createOrUpdateVMG(ctx, cluster, vmg) + +} + +// createOrUpdateVMG Create or Update VirtualMachineGroup +func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, cluster *clusterv1.Cluster, desiredVMG *vmoprv1.VirtualMachineGroup) (_ reconcile.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + + // Calculate expected Machines of all MachineDeployments. + expectdMachines := getExpectedMachines(cluster) + if expectdMachines == 0 { + log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario") + return reconcile.Result{}, nil + } + + // Calculate current Machines of all MachineDeployments. + currentVSphereMachines, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name) + if err != nil { + return reconcile.Result{}, errors.Wrapf(err, "failed to get current VSphereMachine of cluster %s/%s", + cluster.Name, cluster.Namespace) + } + + // Wait until all VSphereMachines are create, this could happen during initial deployment or day-2 like cluster update. + current := int32(len(currentVSphereMachines)) + if expectdMachines != current { + // Only check timeout if VMG doesn't exist. + if desiredVMG.CreationTimestamp.IsZero() { + if _, err := r.isMDDefined(ctx, cluster); err != nil { + log.Error(err, "cluster MachineDeployments are not defined") + return reconcile.Result{}, nil + } + + mdList := &clusterv1.MachineDeploymentList{} + if err := r.Client.List(ctx, mdList, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, + ); err != nil { + return reconcile.Result{}, errors.Errorf("failed to list MachineDeployments: %w", err) + } + + // If no deployments exist, report error + if len(mdList.Items) == 0 { + return reconcile.Result{}, errors.Errorf("no MachineDeployments found for cluster %s/%s", cluster.Namespace, cluster.Name) + } + + // Check one MachineDeployment's creation timestamp + firstMD := mdList.Items[0] + if time.Since(firstMD.CreationTimestamp.Time) > 1*time.Minute { + log.Error(errors.New("timeout waiting for VSphereMachines"), "1 minute timeout after MachineDeployment creation", + "MachineDeployment", firstMD.Name, "Cluster", cluster.Namespace+"/"+cluster.Name) + + return reconcile.Result{}, nil + } + } + + log.Info("current VSphereMachines do not match expected", "Expected:", expectdMachines, + "Current:", current, "ClusterName", cluster.Name, "Namespace", cluster.Namespace) + return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + } + + // Generate all the members of the VirtualMachineGroup. + members := make([]vmoprv1.GroupMember, 0, len(currentVSphereMachines)) + for _, vm := range currentVSphereMachines { + members = append(members, vmoprv1.GroupMember{ + Name: vm.Name, + Kind: "VirtualMachine", + }) + } + + // Get all the names of MachineDeployments of the Cluster. + if !cluster.Spec.Topology.IsDefined() { + return reconcile.Result{}, errors.Errorf("Cluster Topology is not defined %s/%s", + cluster.Namespace, cluster.Name) + } + mds := cluster.Spec.Topology.Workers.MachineDeployments + mdNames := make([]string, 0, len(mds)) + for _, md := range mds { + mdNames = append(mdNames, md.Name) + } + + // Use CreateOrPatch to create or update the VirtualMachineGroup. + _, err = controllerutil.CreateOrPatch(ctx, r.Client, desiredVMG, func() error { + // Set the desired labels + if desiredVMG.Labels == nil { + desiredVMG.Labels = make(map[string]string) + // Set Cluster name label + desiredVMG.Labels[clusterv1.ClusterNameLabel] = cluster.Name + } + + // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done + // Do not update per-md-zone label once set, as placement decision should not change without user explicitly + // ask. + placementDecisionLabels, err := GenerateVMGPlacementLabels(ctx, desiredVMG, mdNames) + if len(placementDecisionLabels) > 0 { + for k, v := range placementDecisionLabels { + if _, exists := desiredVMG.Labels[k]; exists { + // Skip if the label already exists + continue + } + desiredVMG.Labels[k] = v + } + } + + // Compose bootOrder. + desiredVMG.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: members, + }, + } + + // Make sure the Cluster owns the VM Operator VirtualMachineGroup. + if err = ctrlutil.SetControllerReference(cluster, desiredVMG, r.Client.Scheme()); err != nil { + return errors.Wrapf(err, "failed to mark %s %s/%s as owner of %s %s/%s", + cluster.GroupVersionKind(), + cluster.Namespace, + cluster.Name, + desiredVMG.GroupVersionKind(), + desiredVMG.Namespace, + desiredVMG.Name) + } + + return nil + }) + + return reconcile.Result{}, err +} + +// isMDDefined checks if there are any MachineDeployments for the given cluster +// by listing objects with the cluster.x-k8s.io/cluster-name label. +func (r *VirtualMachineGroupReconciler) isMDDefined(ctx context.Context, cluster *clusterv1.Cluster) (bool, error) { + mdList := &clusterv1.MachineDeploymentList{} + if err := r.Client.List(ctx, mdList, client.InNamespace(cluster.Namespace), client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}); err != nil { + return false, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s", + cluster.Namespace, cluster.Name) + } + + if len(mdList.Items) == 0 { + return false, errors.Errorf("no MachineDeployments found for cluster %s/%s", + cluster.Namespace, cluster.Name) + } + + return true, nil +} + +// isExplicitPlacement checks if any MachineDeployment has an explicit failure domain set. +func (r *VirtualMachineGroupReconciler) isExplicitPlacement(cluster *clusterv1.Cluster) (bool, error) { + // First, ensure MachineDeployments are defined + mdDefined, err := r.isMDDefined(context.Background(), cluster) + if !mdDefined { + return false, err + } + + // Iterate through MachineDeployments to find if an explicit failure domain is set. + mds := cluster.Spec.Topology.Workers.MachineDeployments + for _, md := range mds { + // If a failure domain is specified for any MachineDeployment, it indicates + // explicit placement is configured, so return true. + if md.FailureDomain != "" { + return true, nil + } + } + + return false, nil +} + +// getExpectedMachines returns the total number of replicas across all +// MachineDeployments in the Cluster's Topology.Workers. +func getExpectedMachines(cluster *clusterv1.Cluster) int32 { + if !cluster.Spec.Topology.IsDefined() { + return 0 + } + + var total int32 + for _, md := range cluster.Spec.Topology.Workers.MachineDeployments { + if md.Replicas != nil { + total += *md.Replicas + } + } + return total +} + +func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) { + log := ctrl.LoggerFrom(ctx) + + // List MachineDeployments for the cluster. + var mdList clusterv1.MachineDeploymentList + if err := kubeClient.List(ctx, &mdList, + client.InNamespace(clusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + ); err != nil { + return nil, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s", clusterNamespace, clusterName) + } + validMDs := make(map[string]struct{}) + for _, md := range mdList.Items { + validMDs[md.Name] = struct{}{} + } + log.V(6).Info("Identified active MachineDeployments", "count", len(validMDs)) + + // List MachineSets and filter those owned by a valid MachineDeployment. + var msList clusterv1.MachineSetList + if err := kubeClient.List(ctx, &msList, + client.InNamespace(clusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + ); err != nil { + return nil, errors.Wrapf(err, "failed to list MachineSets for cluster %s/%s", clusterNamespace, clusterName) + } + validMS := make(map[string]struct{}) + for _, ms := range msList.Items { + for _, owner := range ms.OwnerReferences { + if owner.Kind == "MachineDeployment" && owner.APIVersion == clusterv1.GroupVersion.String() { + if _, ok := validMDs[owner.Name]; ok { + validMS[ms.Name] = struct{}{} + break + } + } + } + } + log.V(6).Info("Filtered MachineSets owned by valid MachineDeployments", "count", len(validMS)) + + // List Machines and filter those owned by valid MachineSets (skip control plane). + var machineList clusterv1.MachineList + if err := kubeClient.List(ctx, &machineList, + client.InNamespace(clusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + ); err != nil { + return nil, errors.Wrapf(err, "failed to list Machines for cluster %s/%s", clusterNamespace, clusterName) + } + + workerMachines := make(map[string]struct{}) + for _, m := range machineList.Items { + if _, isControlPlane := m.Labels[clusterv1.MachineControlPlaneLabel]; isControlPlane { + continue + } + for _, owner := range m.OwnerReferences { + if owner.Kind == "MachineSet" && owner.APIVersion == clusterv1.GroupVersion.String() { + if _, ok := validMS[owner.Name]; ok { + workerMachines[m.Name] = struct{}{} + break + } + } + } + } + log.V(5).Info("Identified worker Machines linked to MachineSets", "count", len(workerMachines)) + + // List VSphereMachines and filter those owned by valid worker Machines. + var vsMachineList vmwarev1.VSphereMachineList + if err := kubeClient.List(ctx, &vsMachineList, + client.InNamespace(clusterNamespace), + ); err != nil { + return nil, errors.Wrapf(err, "failed to list VSphereMachines in namespace %s", clusterNamespace) + } + + var result []vmwarev1.VSphereMachine + for _, vs := range vsMachineList.Items { + for _, owner := range vs.OwnerReferences { + if owner.Kind == "Machine" && owner.APIVersion == clusterv1.GroupVersion.String() { + if _, ok := workerMachines[owner.Name]; ok { + result = append(result, vs) + break + } + } + } + } + log.V(4).Info("Final list of VSphereMachines for VMG member generation", "count", len(result)) + + return result, nil +} + +// GenerateVMGPlacementLabels returns labels per MachineDeployment which contain zone info for placed VMs for day-2 operationss +func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) { + log := ctrl.LoggerFrom(ctx) + labels := make(map[string]string) + + // For each member in status + for _, member := range vmg.Status.Members { + // Skip if not a VM or not placement ready, + if member.Kind != "VirtualMachine" { + return nil, errors.Errorf("VirtualMachineGroup %s/%s contains none VirtualMachine member, member.Kind %s", vmg.Namespace, vmg.Name, member.Kind) + } + + // Once member VM is placed, VirtualMachineGroupMemberConditionPlacementReady will be set to true. + if !conditions.IsTrue(&member, vmoprv1.VirtualMachineGroupMemberConditionPlacementReady) { + continue + } + + // Check if this VM belongs to any of our target Machine Deployments + // Use machine deployment name as the label key. + for _, md := range machineDeployments { + // Check if we already found placement for this Machine Deployments + if _, found := labels[md]; found { + log.Info(fmt.Sprintf("Skipping Machine Deployment %s, placement already found", md)) + continue + } + + // Check if VM belongs to a Machine Deployment by name (e.g. cluster-1-np-1-vm-xxx contains np-1) + if strings.Contains(member.Name, md) { + // Get the VM placement information by member status. + if member.Placement == nil { + return nil, errors.Errorf("VM %s in VMG %s/%s has no placement info. Placement is nil)", member.Name, vmg.Namespace, vmg.Name) + } + + // Get the VM placement information by member status. + zone := member.Placement.Zone + if zone == "" { + return nil, errors.Errorf("VM %s in VMG %s/%s has no placement info. Zone is empty", member.Name, vmg.Namespace, vmg.Name) + } + + log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, vmg.Namespace, vmg.Name, zone)) + labels[md] = zone + } + } + } + + return labels, nil +} diff --git a/controllers/vspherecluster_reconciler.go b/controllers/vspherecluster_reconciler.go index 18d818f3a4..cabf13db2e 100644 --- a/controllers/vspherecluster_reconciler.go +++ b/controllers/vspherecluster_reconciler.go @@ -427,6 +427,7 @@ func (r *clusterReconciler) reconcileDeploymentZones(ctx context.Context, cluste failureDomains := clusterv1beta1.FailureDomains{} for _, zone := range deploymentZoneList.Items { if zone.Spec.Server != clusterCtx.VSphereCluster.Spec.Server { + continue } diff --git a/main.go b/main.go index b92f48d25a..6d6ea9e011 100644 --- a/main.go +++ b/main.go @@ -94,6 +94,7 @@ var ( vSphereVMConcurrency int vSphereClusterIdentityConcurrency int vSphereDeploymentZoneConcurrency int + virtualMachineGroupConcurrency int skipCRDMigrationPhases []string managerOptions = capiflags.ManagerOptions{} @@ -141,6 +142,9 @@ func InitFlags(fs *pflag.FlagSet) { fs.IntVar(&vSphereDeploymentZoneConcurrency, "vspheredeploymentzone-concurrency", 10, "Number of vSphere deployment zones to process simultaneously") + fs.IntVar(&virtualMachineGroupConcurrency, "virtualmachinegroup-concurrency", 10, + "Number of virtual machine group to process simultaneously") + fs.StringVar( &managerOpts.PodName, "pod-name", @@ -482,6 +486,12 @@ func setupSupervisorControllers(ctx context.Context, controllerCtx *capvcontext. return err } + if feature.Gates.Enabled(feature.NamespaceScopedZones) && feature.Gates.Enabled(feature.NodeAutoPlacement) { + if err := vmware.AddVirtualMachineGroupControllerToManager(ctx, controllerCtx, mgr, concurrency(virtualMachineGroupConcurrency)); err != nil { + return err + } + } + return vmware.AddServiceDiscoveryControllerToManager(ctx, controllerCtx, mgr, clusterCache, concurrency(serviceDiscoveryConcurrency)) } diff --git a/packaging/go.sum b/packaging/go.sum index 14a389257b..8a4cb28435 100644 --- a/packaging/go.sum +++ b/packaging/go.sum @@ -135,8 +135,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU= -github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 h1:zME8crazIAWVJGboJpSLl+qcRYQ8yA6hPQojz28gY5M= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045/go.mod h1:hkc/QZCSHcosWWMPS6VWWR12WenZcNE3BaTJ/8A8sNE= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/pkg/services/network/netop_provider.go b/pkg/services/network/netop_provider.go index fa1c1860fa..e13de3bd4d 100644 --- a/pkg/services/network/netop_provider.go +++ b/pkg/services/network/netop_provider.go @@ -136,7 +136,7 @@ func (np *netopNetworkProvider) ConfigureVirtualMachine(ctx context.Context, clu // Set the VM primary interface vm.Spec.Network.Interfaces = append(vm.Spec.Network.Interfaces, vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: PrimaryInterfaceName, - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: NetworkGVKNetOperator.Kind, APIVersion: NetworkGVKNetOperator.GroupVersion().String(), diff --git a/pkg/services/network/nsxt_provider.go b/pkg/services/network/nsxt_provider.go index 96a0450bb7..90885cb568 100644 --- a/pkg/services/network/nsxt_provider.go +++ b/pkg/services/network/nsxt_provider.go @@ -223,7 +223,7 @@ func (np *nsxtNetworkProvider) ConfigureVirtualMachine(_ context.Context, cluste } vm.Spec.Network.Interfaces = append(vm.Spec.Network.Interfaces, vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: fmt.Sprintf("eth%d", len(vm.Spec.Network.Interfaces)), - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: NetworkGVKNSXT.Kind, APIVersion: NetworkGVKNSXT.GroupVersion().String(), diff --git a/pkg/services/network/nsxt_vpc_provider.go b/pkg/services/network/nsxt_vpc_provider.go index 0c3533a37c..9b2c8defa0 100644 --- a/pkg/services/network/nsxt_vpc_provider.go +++ b/pkg/services/network/nsxt_vpc_provider.go @@ -224,7 +224,7 @@ func (vp *nsxtVPCNetworkProvider) ConfigureVirtualMachine(_ context.Context, clu networkName := clusterCtx.VSphereCluster.Name vm.Spec.Network.Interfaces = append(vm.Spec.Network.Interfaces, vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: PrimaryInterfaceName, - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: NetworkGVKNSXTVPCSubnetSet.Kind, APIVersion: NetworkGVKNSXTVPCSubnetSet.GroupVersion().String(), @@ -243,7 +243,7 @@ func (vp *nsxtVPCNetworkProvider) ConfigureVirtualMachine(_ context.Context, clu } vmInterface := vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: PrimaryInterfaceName, - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: primary.Network.Kind, APIVersion: primary.Network.APIVersion, @@ -281,7 +281,7 @@ func setVMSecondaryInterfaces(machine *vmwarev1.VSphereMachine, vm *vmoprv1.Virt } vmInterface := vmoprv1.VirtualMachineNetworkInterfaceSpec{ Name: secondaryInterface.Name, - Network: vmoprv1common.PartialObjectRef{ + Network: &vmoprv1common.PartialObjectRef{ TypeMeta: metav1.TypeMeta{ Kind: secondaryInterface.Network.Kind, APIVersion: secondaryInterface.Network.APIVersion, diff --git a/pkg/services/vmoperator/control_plane_endpoint.go b/pkg/services/vmoperator/control_plane_endpoint.go index e0070188e3..3b500711d7 100644 --- a/pkg/services/vmoperator/control_plane_endpoint.go +++ b/pkg/services/vmoperator/control_plane_endpoint.go @@ -189,7 +189,7 @@ func newVirtualMachineService(ctx *vmware.ClusterContext) *vmoprv1.VirtualMachin Namespace: ctx.Cluster.Namespace, }, TypeMeta: metav1.TypeMeta{ - APIVersion: vmoprv1.SchemeGroupVersion.String(), + APIVersion: vmoprv1.GroupVersion.String(), Kind: "VirtualMachineService", }, } diff --git a/test/go.mod b/test/go.mod index 4339ef147d..9f55c7dc8a 100644 --- a/test/go.mod +++ b/test/go.mod @@ -18,7 +18,6 @@ require ( // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 github.com/vmware/govmomi v0.52.0 - ) require ( From 72fad61a9e1c3de1836e0c76ea129e4e63735479 Mon Sep 17 00:00:00 2001 From: Sagar Muchhal Date: Mon, 6 Oct 2025 15:06:02 -0700 Subject: [PATCH 03/25] Updates logic for VMG creation Removes the extra cases for VMG creation, such that VMG is created for: 1. Multiple zones, multiple MDs with no failureDomain 2. Multiple zones, multiple MDs with failureDomain 3. Single zone, existing cluster with no failureDomain MDs Signed-off-by: Sagar Muchhal --- .../vmware/virtualmachinegroup_reconciler.go | 299 +++++++++--------- 1 file changed, 142 insertions(+), 157 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index df5490bf05..b00e948f60 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -20,6 +20,7 @@ package vmware import ( "context" "fmt" + "sort" "strings" "time" @@ -37,7 +38,6 @@ import ( vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" - ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) const ( @@ -63,13 +63,12 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. } log = log.WithValues("Cluster", klog.KObj(cluster)) - // If Cluster is deleted, just return as VirtualMachineGroup will be GCed and no extral process needed. + // If Cluster is deleted, just return as VirtualMachineGroup will be GCed and no extra processing needed. if !cluster.DeletionTimestamp.IsZero() { return reconcile.Result{}, nil } vmg := &vmoprv1.VirtualMachineGroup{} - key := &client.ObjectKey{ Namespace: cluster.Namespace, Name: cluster.Name, @@ -80,7 +79,6 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. log.Error(err, "failed to get VirtualMachineGroup") return ctrl.Result{}, err } - // Define the VM Operator VirtualMachine resource to reconcile. vmg = &vmoprv1.VirtualMachineGroup{ ObjectMeta: metav1.ObjectMeta{ Name: key.Name, @@ -89,56 +87,41 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. } } - // If as least one MachineDeployment of Cluster is specified with failureDomain, then return. - // No need to handle Cluster using explicit placement. For VC 9.1, no mixed mode of explicit and automatic placement - // during initial deployment. - if vmg.CreationTimestamp.IsZero() { - explicitPlacement, err := r.isExplicitPlacement(cluster) - if err != nil { - return reconcile.Result{}, err - } - - if explicitPlacement { - log.Info("No need to create VirtualMachineGroup for Cluster using explicit placement.") - return reconcile.Result{}, nil - } - } - - // Proceed only if multiple zones are available. - // If there is only one zone(default), node automatic placement is unnecessary - // because all Machine Deployments will be scheduled into that single zone. - // The VSphereCluster resource discovers the underlying zones, - // which we treat as the source of truth. - vsphereClusterList := &vmwarev1.VSphereClusterList{} - labelKey := clusterv1.ClusterNameLabel - if err := r.Client.List(ctx, vsphereClusterList, - client.InNamespace(cluster.Namespace), - client.MatchingLabels(map[string]string{labelKey: cluster.Name}), - ); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to list VSphereClusters in namespace %s: %w", cluster.Namespace, err) - } - - vsphereCluster := &vmwarev1.VSphereCluster{} - switch len(vsphereClusterList.Items) { - case 0: - return reconcile.Result{}, fmt.Errorf("no VSphereCluster found with label %s=%s in namespace %s", labelKey, cluster.Name, cluster.Namespace) - case 1: - vsphereCluster = &vsphereClusterList.Items[0] - default: - return reconcile.Result{}, fmt.Errorf("found %d VSphereClusters with label %s=%s in namespace %s; expected exactly 1", len(vsphereClusterList.Items), labelKey, cluster.Name, cluster.Namespace) - } - - // Fetch the VSphereCluster instance. - if vsphereCluster.Status.Ready != true { - log.Info("Waiting for VSphereCluster to be ready with failure domain discovered") - return reconcile.Result{RequeueAfter: reconciliationDelay}, nil - - } - - if len(vsphereCluster.Status.FailureDomains) <= 1 { - log.Info("Single or no zone detected; skipping node automatic placement") - return reconcile.Result{}, nil - } + // // Proceed only if multiple zones are available. + // // If there is only one zone(default), node automatic placement is unnecessary + // // because all Machine Deployments will be scheduled into that single zone. + // // The VSphereCluster resource discovers the underlying zones, + // // which we treat as the source of truth. + // vsphereClusterList := &vmwarev1.VSphereClusterList{} + // labelKey := clusterv1.ClusterNameLabel + // if err := r.Client.List(ctx, vsphereClusterList, + // client.InNamespace(cluster.Namespace), + // client.MatchingLabels(map[string]string{labelKey: cluster.Name}), + // ); err != nil { + // return reconcile.Result{}, fmt.Errorf("failed to list VSphereClusters in namespace %s: %w", cluster.Namespace, err) + // } + + // vsphereCluster := &vmwarev1.VSphereCluster{} + // switch len(vsphereClusterList.Items) { + // case 0: + // return reconcile.Result{}, fmt.Errorf("no VSphereCluster found with label %s=%s in namespace %s", labelKey, cluster.Name, cluster.Namespace) + // case 1: + // vsphereCluster = &vsphereClusterList.Items[0] + // default: + // return reconcile.Result{}, fmt.Errorf("found %d VSphereClusters with label %s=%s in namespace %s; expected exactly 1", len(vsphereClusterList.Items), labelKey, cluster.Name, cluster.Namespace) + // } + + // // Fetch the VSphereCluster instance. + // if vsphereCluster.Status.Ready != true { + // log.Info("Waiting for VSphereCluster to be ready with failure domain discovered") + // return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + + // } + + // if len(vsphereCluster.Status.FailureDomains) <= 1 { + // log.Info("Single or no zone detected; skipping node automatic placement") + // return reconcile.Result{}, nil + // } // If ControlPlane haven't initialized, requeue it since VSphereMachines of MachineDeployment will only be created after // ControlPlane is initialized. @@ -157,8 +140,8 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c log := ctrl.LoggerFrom(ctx) // Calculate expected Machines of all MachineDeployments. - expectdMachines := getExpectedMachines(cluster) - if expectdMachines == 0 { + expectedMachines := getExpectedMachines(cluster) + if expectedMachines == 0 { log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario") return reconcile.Result{}, nil } @@ -172,44 +155,49 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c // Wait until all VSphereMachines are create, this could happen during initial deployment or day-2 like cluster update. current := int32(len(currentVSphereMachines)) - if expectdMachines != current { + if current < expectedMachines { // Only check timeout if VMG doesn't exist. - if desiredVMG.CreationTimestamp.IsZero() { - if _, err := r.isMDDefined(ctx, cluster); err != nil { - log.Error(err, "cluster MachineDeployments are not defined") - return reconcile.Result{}, nil - } - - mdList := &clusterv1.MachineDeploymentList{} - if err := r.Client.List(ctx, mdList, - client.InNamespace(cluster.Namespace), - client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, - ); err != nil { - return reconcile.Result{}, errors.Errorf("failed to list MachineDeployments: %w", err) - } - - // If no deployments exist, report error - if len(mdList.Items) == 0 { - return reconcile.Result{}, errors.Errorf("no MachineDeployments found for cluster %s/%s", cluster.Namespace, cluster.Name) - } - - // Check one MachineDeployment's creation timestamp - firstMD := mdList.Items[0] - if time.Since(firstMD.CreationTimestamp.Time) > 1*time.Minute { - log.Error(errors.New("timeout waiting for VSphereMachines"), "1 minute timeout after MachineDeployment creation", - "MachineDeployment", firstMD.Name, "Cluster", cluster.Namespace+"/"+cluster.Name) - - return reconcile.Result{}, nil - } - } - - log.Info("current VSphereMachines do not match expected", "Expected:", expectdMachines, + // if desiredVMG.CreationTimestamp.IsZero() { + // if _, err := r.isMDDefined(ctx, cluster); err != nil { + // log.Error(err, "cluster MachineDeployments are not defined") + // return reconcile.Result{}, nil + // } + + // mdList := &clusterv1.MachineDeploymentList{} + // if err := r.Client.List(ctx, mdList, + // client.InNamespace(cluster.Namespace), + // client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, + // ); err != nil { + // return reconcile.Result{}, errors.Errorf("failed to list MachineDeployments: %w", err) + // } + + // // If no deployments exist, report error + // if len(mdList.Items) == 0 { + // return reconcile.Result{}, errors.Errorf("no MachineDeployments found for cluster %s/%s", cluster.Namespace, cluster.Name) + // } + + // // Check one MachineDeployment's creation timestamp + // firstMD := mdList.Items[0] + // if time.Since(firstMD.CreationTimestamp.Time) > 1*time.Minute { + // log.Error(errors.New("timeout waiting for VSphereMachines"), "1 minute timeout after MachineDeployment creation", + // "MachineDeployment", firstMD.Name, "Cluster", cluster.Namespace+"/"+cluster.Name) + + // return reconcile.Result{}, nil + // } + // } + + log.Info("current VSphereMachines do not match expected", "Expected:", expectedMachines, "Current:", current, "ClusterName", cluster.Name, "Namespace", cluster.Namespace) return reconcile.Result{RequeueAfter: reconciliationDelay}, nil } // Generate all the members of the VirtualMachineGroup. members := make([]vmoprv1.GroupMember, 0, len(currentVSphereMachines)) + // Sort the VSphereMachines by name for consistent ordering + sort.Slice(currentVSphereMachines, func(i, j int) bool { + return currentVSphereMachines[i].Name < currentVSphereMachines[j].Name + }) + for _, vm := range currentVSphereMachines { members = append(members, vmoprv1.GroupMember{ Name: vm.Name, @@ -259,7 +247,7 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c } // Make sure the Cluster owns the VM Operator VirtualMachineGroup. - if err = ctrlutil.SetControllerReference(cluster, desiredVMG, r.Client.Scheme()); err != nil { + if err = controllerutil.SetControllerReference(cluster, desiredVMG, r.Client.Scheme()); err != nil { return errors.Wrapf(err, "failed to mark %s %s/%s as owner of %s %s/%s", cluster.GroupVersionKind(), cluster.Namespace, @@ -332,83 +320,80 @@ func getExpectedMachines(cluster *clusterv1.Cluster) int32 { func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) { log := ctrl.LoggerFrom(ctx) - // List MachineDeployments for the cluster. - var mdList clusterv1.MachineDeploymentList - if err := kubeClient.List(ctx, &mdList, - client.InNamespace(clusterNamespace), - client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, - ); err != nil { - return nil, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s", clusterNamespace, clusterName) - } - validMDs := make(map[string]struct{}) - for _, md := range mdList.Items { - validMDs[md.Name] = struct{}{} - } - log.V(6).Info("Identified active MachineDeployments", "count", len(validMDs)) - - // List MachineSets and filter those owned by a valid MachineDeployment. - var msList clusterv1.MachineSetList - if err := kubeClient.List(ctx, &msList, - client.InNamespace(clusterNamespace), - client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, - ); err != nil { - return nil, errors.Wrapf(err, "failed to list MachineSets for cluster %s/%s", clusterNamespace, clusterName) - } - validMS := make(map[string]struct{}) - for _, ms := range msList.Items { - for _, owner := range ms.OwnerReferences { - if owner.Kind == "MachineDeployment" && owner.APIVersion == clusterv1.GroupVersion.String() { - if _, ok := validMDs[owner.Name]; ok { - validMS[ms.Name] = struct{}{} - break - } - } - } - } - log.V(6).Info("Filtered MachineSets owned by valid MachineDeployments", "count", len(validMS)) - - // List Machines and filter those owned by valid MachineSets (skip control plane). - var machineList clusterv1.MachineList - if err := kubeClient.List(ctx, &machineList, - client.InNamespace(clusterNamespace), - client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, - ); err != nil { - return nil, errors.Wrapf(err, "failed to list Machines for cluster %s/%s", clusterNamespace, clusterName) - } - - workerMachines := make(map[string]struct{}) - for _, m := range machineList.Items { - if _, isControlPlane := m.Labels[clusterv1.MachineControlPlaneLabel]; isControlPlane { - continue - } - for _, owner := range m.OwnerReferences { - if owner.Kind == "MachineSet" && owner.APIVersion == clusterv1.GroupVersion.String() { - if _, ok := validMS[owner.Name]; ok { - workerMachines[m.Name] = struct{}{} - break - } - } - } - } - log.V(5).Info("Identified worker Machines linked to MachineSets", "count", len(workerMachines)) - - // List VSphereMachines and filter those owned by valid worker Machines. + // // List MachineDeployments for the cluster. + // var mdList clusterv1.MachineDeploymentList + // if err := kubeClient.List(ctx, &mdList, + // client.InNamespace(clusterNamespace), + // client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + // ); err != nil { + // return nil, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s", clusterNamespace, clusterName) + // } + // validMDs := make(map[string]struct{}) + // for _, md := range mdList.Items { + // validMDs[md.Name] = struct{}{} + // } + // log.V(6).Info("Identified active MachineDeployments", "count", len(validMDs)) + + // // List MachineSets and filter those owned by a valid MachineDeployment. + // var msList clusterv1.MachineSetList + // if err := kubeClient.List(ctx, &msList, + // client.InNamespace(clusterNamespace), + // client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + // ); err != nil { + // return nil, errors.Wrapf(err, "failed to list MachineSets for cluster %s/%s", clusterNamespace, clusterName) + // } + // validMS := make(map[string]struct{}) + // for _, ms := range msList.Items { + // for _, owner := range ms.OwnerReferences { + // if owner.Kind == "MachineDeployment" && owner.APIVersion == clusterv1.GroupVersion.String() { + // if _, ok := validMDs[owner.Name]; ok { + // validMS[ms.Name] = struct{}{} + // break + // } + // } + // } + // } + // log.V(6).Info("Filtered MachineSets owned by valid MachineDeployments", "count", len(validMS)) + + // // List Machines and filter those owned by valid MachineSets (skip control plane). + // var machineList clusterv1.MachineList + // if err := kubeClient.List(ctx, &machineList, + // client.InNamespace(clusterNamespace), + // client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + // ); err != nil { + // return nil, errors.Wrapf(err, "failed to list Machines for cluster %s/%s", clusterNamespace, clusterName) + // } + + // workerMachines := make(map[string]struct{}) + // for _, m := range machineList.Items { + // if _, isControlPlane := m.Labels[clusterv1.MachineControlPlaneLabel]; isControlPlane { + // continue + // } + // for _, owner := range m.OwnerReferences { + // if owner.Kind == "MachineSet" && owner.APIVersion == clusterv1.GroupVersion.String() { + // if _, ok := validMS[owner.Name]; ok { + // workerMachines[m.Name] = struct{}{} + // break + // } + // } + // } + // } + // log.V(5).Info("Identified worker Machines linked to MachineSets", "count", len(workerMachines)) + + // List VSphereMachine objects var vsMachineList vmwarev1.VSphereMachineList if err := kubeClient.List(ctx, &vsMachineList, client.InNamespace(clusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + client.HasLabels{clusterv1.MachineDeploymentNameLabel}, ); err != nil { return nil, errors.Wrapf(err, "failed to list VSphereMachines in namespace %s", clusterNamespace) } var result []vmwarev1.VSphereMachine for _, vs := range vsMachineList.Items { - for _, owner := range vs.OwnerReferences { - if owner.Kind == "Machine" && owner.APIVersion == clusterv1.GroupVersion.String() { - if _, ok := workerMachines[owner.Name]; ok { - result = append(result, vs) - break - } - } + if vs.DeletionTimestamp.IsZero() { + result = append(result, vs) } } log.V(4).Info("Final list of VSphereMachines for VMG member generation", "count", len(result)) @@ -456,7 +441,7 @@ func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachine } log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, vmg.Namespace, vmg.Name, zone)) - labels[md] = zone + labels[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)] = zone } } } From 124623a0cc91e2d72928b663d93e96cc35caceaf Mon Sep 17 00:00:00 2001 From: Sagar Muchhal Date: Mon, 29 Sep 2025 15:24:17 -0700 Subject: [PATCH 04/25] Initial impl for VSphereMachine AAF changes - Updates VMOP API dependency Misc VMG fixes - Use namingStrategy to calculate VM names - Use MachineDeployment names for VMG placement label - Includes all machinedeployments to generate node-pool -> zone mapping Fixes VMG webhook validation error - Adds cluster-name label to Af/AAF spec - re-adds zone topology key back to anti-aff spec Signed-off-by: Sagar Muchhal --- .../vmware/virtualmachinegroup_reconciler.go | 54 ++++-- go.mod | 2 +- go.sum | 4 +- pkg/services/vmoperator/constants.go | 2 - pkg/services/vmoperator/vmopmachine.go | 171 ++++++++++++++++-- 5 files changed, 205 insertions(+), 28 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index b00e948f60..a9a417bf28 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -37,6 +37,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" ) @@ -191,16 +192,24 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c return reconcile.Result{RequeueAfter: reconciliationDelay}, nil } - // Generate all the members of the VirtualMachineGroup. - members := make([]vmoprv1.GroupMember, 0, len(currentVSphereMachines)) - // Sort the VSphereMachines by name for consistent ordering - sort.Slice(currentVSphereMachines, func(i, j int) bool { - return currentVSphereMachines[i].Name < currentVSphereMachines[j].Name + // Generate VM names according to the naming strategy set on the VSphereMachine. + vmNames := make([]string, 0, len(currentVSphereMachines)) + for _, machine := range currentVSphereMachines { + name, err := GenerateVirtualMachineName(machine.Name, machine.Spec.NamingStrategy) + if err != nil { + return reconcile.Result{}, err + } + vmNames = append(vmNames, name) + } + // Sort the VM names alphabetically for consistent ordering + sort.Slice(vmNames, func(i, j int) bool { + return vmNames[i] < vmNames[j] }) - for _, vm := range currentVSphereMachines { + members := make([]vmoprv1.GroupMember, 0, len(currentVSphereMachines)) + for _, name := range vmNames { members = append(members, vmoprv1.GroupMember{ - Name: vm.Name, + Name: name, Kind: "VirtualMachine", }) } @@ -210,9 +219,14 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c return reconcile.Result{}, errors.Errorf("Cluster Topology is not defined %s/%s", cluster.Namespace, cluster.Name) } - mds := cluster.Spec.Topology.Workers.MachineDeployments - mdNames := make([]string, 0, len(mds)) - for _, md := range mds { + machineDeployments := &clusterv1.MachineDeploymentList{} + if err := r.Client.List(ctx, machineDeployments, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}); err != nil { + return reconcile.Result{}, err + } + mdNames := []string{} + for _, md := range machineDeployments.Items { mdNames = append(mdNames, md.Name) } @@ -401,7 +415,7 @@ func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, cl return result, nil } -// GenerateVMGPlacementLabels returns labels per MachineDeployment which contain zone info for placed VMs for day-2 operationss +// GenerateVMGPlacementLabels returns labels per MachineDeployment which contain zone info for placed VMs for day-2 operations. func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) { log := ctrl.LoggerFrom(ctx) labels := make(map[string]string) @@ -428,6 +442,7 @@ func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachine } // Check if VM belongs to a Machine Deployment by name (e.g. cluster-1-np-1-vm-xxx contains np-1) + // TODO: Establish membership via the machine deployment name label if strings.Contains(member.Name, md) { // Get the VM placement information by member status. if member.Placement == nil { @@ -448,3 +463,20 @@ func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachine return labels, nil } + +// TODO: de-dup this logic with vmopmachine.go +// GenerateVirtualMachineName generates the name of a VirtualMachine based on the naming strategy. +func GenerateVirtualMachineName(machineName string, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) (string, error) { + // Per default the name of the VirtualMachine should be equal to the Machine name (this is the same as "{{ .machine.name }}") + if namingStrategy == nil || namingStrategy.Template == nil { + // Note: No need to trim to max length in this case as valid Machine names will also be valid VirtualMachine names. + return machineName, nil + } + + name, err := infrautilv1.GenerateMachineNameFromTemplate(machineName, namingStrategy.Template) + if err != nil { + return "", errors.Wrap(err, "failed to generate name for VirtualMachine") + } + + return name, nil +} diff --git a/go.mod b/go.mod index 845cbf2a17..a43e7997df 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251007154704-e2d6e85d9ec7 github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 github.com/vmware/govmomi v0.52.0 ) diff --git a/go.sum b/go.sum index 934a82e6ac..8df98ba83c 100644 --- a/go.sum +++ b/go.sum @@ -243,8 +243,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 h1:zME8crazIAWVJGboJpSLl+qcRYQ8yA6hPQojz28gY5M= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045/go.mod h1:hkc/QZCSHcosWWMPS6VWWR12WenZcNE3BaTJ/8A8sNE= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251007154704-e2d6e85d9ec7 h1:VlnaiDKI1H1buwBOgL8R3HRB3EQNN96xMdz25vE5FUo= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251007154704-e2d6e85d9ec7/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/pkg/services/vmoperator/constants.go b/pkg/services/vmoperator/constants.go index 011082a06c..37ca556fc6 100644 --- a/pkg/services/vmoperator/constants.go +++ b/pkg/services/vmoperator/constants.go @@ -18,8 +18,6 @@ limitations under the License. package vmoperator const ( - kubeTopologyZoneLabelKey = "topology.kubernetes.io/zone" - // ControlPlaneVMClusterModuleGroupName is the name used for the control plane Cluster Module. ControlPlaneVMClusterModuleGroupName = "control-plane-group" // ClusterModuleNameAnnotationKey is key for the Cluster Module annotation. diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 840b166406..0595b42b13 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -41,6 +41,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "sigs.k8s.io/cluster-api-provider-vsphere/feature" capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware" infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" @@ -163,6 +164,15 @@ func (v *VmopMachineService) SyncFailureReason(_ context.Context, machineCtx cap return supervisorMachineCtx.VSphereMachine.Status.FailureReason != nil || supervisorMachineCtx.VSphereMachine.Status.FailureMessage != nil, nil } +type affinityInfo struct { + affinitySpec *vmoprv1.AffinitySpec + vmGroupName string + failureDomain *string + + // TODO: is this needed for the single zone case? + // zones []topologyv1.Zone +} + // ReconcileNormal reconciles create and update events for VM Operator VMs. func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx capvcontext.MachineContext) (bool, error) { log := ctrl.LoggerFrom(ctx) @@ -171,10 +181,6 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap return false, errors.New("received unexpected SupervisorMachineContext type") } - if supervisorMachineCtx.Machine.Spec.FailureDomain != "" { - supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain) - } - // If debug logging is enabled, report the number of vms in the cluster before and after the reconcile if log.V(5).Enabled() { vms, err := v.getVirtualMachinesInCluster(ctx, supervisorMachineCtx) @@ -188,6 +194,112 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap // Set the VM state. Will get reset throughout the reconcile supervisorMachineCtx.VSphereMachine.Status.VMStatus = vmwarev1.VirtualMachineStatePending + var affInfo affinityInfo + if feature.Gates.Enabled(feature.NodeAutoPlacement) && + !infrautilv1.IsControlPlaneMachine(machineCtx.GetVSphereMachine()) { + // Check for the presence of a VirtualMachineGroup with the name and namespace same as the name of the Cluster + vmOperatorVMGroup := &vmoprv1.VirtualMachineGroup{} + key := client.ObjectKey{ + Namespace: supervisorMachineCtx.Cluster.Namespace, + Name: supervisorMachineCtx.Cluster.Name, + } + err := v.Client.Get(ctx, key, vmOperatorVMGroup) + if err != nil { + if !apierrors.IsNotFound(err) { + return false, err + } + if apierrors.IsNotFound(err) { + log.V(4).Info("VirtualMachineGroup not found, requeueing") + return true, nil + } + } + + // Check if the current machine is a member of the boot order + // in the VirtualMachineGroup. + if !v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) { + log.V(4).Info("Waiting for VirtualMachineGroup membership, requeueing") + return true, nil + } + + // Initialize the affinityInfo for the VM + affInfo = affinityInfo{ + vmGroupName: vmOperatorVMGroup.Name, + } + + // Check the presence of the node-pool label on the VirtualMachineGroup object + nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] + if zone, ok := vmOperatorVMGroup.Labels[fmt.Sprintf("zone.cluster.x-k8s.io/%s", nodePool)]; ok && zone != "" { + affInfo.failureDomain = ptr.To(zone) + } + + // Fetch machine deployments without explicit failureDomain specified + // to use when setting the anti-affinity rules + machineDeployments := &clusterv1.MachineDeploymentList{} + if err := v.Client.List(ctx, machineDeployments, + client.InNamespace(supervisorMachineCtx.Cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name}); err != nil { + return false, err + } + mdNames := []string{} + for _, machineDeployment := range machineDeployments.Items { + // Not adding node pool with explicit failureDomain specified to propose anti-affinity behavior + // among node pools with automatic placement only. + if machineDeployment.Spec.Template.Spec.FailureDomain == "" && machineDeployment.Name != nodePool { + mdNames = append(mdNames, machineDeployment.Name) + } + } + // turn to v4 log + log.V(2).Info("Gathered anti-affine MDs", "mdNames", mdNames) + + affInfo.affinitySpec = &vmoprv1.AffinitySpec{ + VMAffinity: &vmoprv1.VMAffinitySpec{ + RequiredDuringSchedulingPreferredDuringExecution: []vmoprv1.VMAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.MachineDeploymentNameLabel: nodePool, + clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name, + }, + }, + TopologyKey: corev1.LabelTopologyZone, + }, + }, + }, + VMAntiAffinity: &vmoprv1.VMAntiAffinitySpec{ + PreferredDuringSchedulingPreferredDuringExecution: []vmoprv1.VMAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.MachineDeploymentNameLabel: nodePool, + clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name, + }, + }, + TopologyKey: corev1.LabelHostname, + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name, + }, + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: clusterv1.MachineDeploymentNameLabel, + Operator: metav1.LabelSelectorOpIn, + Values: mdNames, + }, + }, + }, + TopologyKey: corev1.LabelTopologyZone, + }, + }, + }, + } + } + + if supervisorMachineCtx.Machine.Spec.FailureDomain != "" { + supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain) + } + // Check for the presence of an existing object vmOperatorVM := &vmoprv1.VirtualMachine{} key, err := virtualMachineObjectKey(supervisorMachineCtx.Machine.Name, supervisorMachineCtx.Machine.Namespace, supervisorMachineCtx.VSphereMachine.Spec.NamingStrategy) @@ -208,7 +320,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } // Reconcile the VM Operator VirtualMachine. - if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM); err != nil { + if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM, &affInfo); err != nil { v1beta1conditions.MarkFalse(supervisorMachineCtx.VSphereMachine, infrav1.VMProvisionedCondition, vmwarev1.VMCreationFailedReason, clusterv1beta1.ConditionSeverityWarning, "failed to create or update VirtualMachine: %v", err) v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ @@ -378,7 +490,7 @@ func (v *VmopMachineService) GetHostInfo(ctx context.Context, machineCtx capvcon return vmOperatorVM.Status.Host, nil } -func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervisorMachineCtx *vmware.SupervisorMachineContext, vmOperatorVM *vmoprv1.VirtualMachine) error { +func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervisorMachineCtx *vmware.SupervisorMachineContext, vmOperatorVM *vmoprv1.VirtualMachine, affinityInfo *affinityInfo) error { // All Machine resources should define the version of Kubernetes to use. if supervisorMachineCtx.Machine.Spec.Version == "" { return errors.Errorf( @@ -472,7 +584,7 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis } // Assign the VM's labels. - vmOperatorVM.Labels = getVMLabels(supervisorMachineCtx, vmOperatorVM.Labels) + vmOperatorVM.Labels = getVMLabels(supervisorMachineCtx, vmOperatorVM.Labels, affinityInfo) addResourcePolicyAnnotations(supervisorMachineCtx, vmOperatorVM) @@ -494,6 +606,15 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis vmOperatorVM = typedModified } + if affinityInfo != nil && affinityInfo.affinitySpec != nil { + if vmOperatorVM.Spec.Affinity == nil { + vmOperatorVM.Spec.Affinity = affinityInfo.affinitySpec + } + if vmOperatorVM.Spec.GroupName == "" { + vmOperatorVM.Spec.GroupName = affinityInfo.vmGroupName + } + } + // Make sure the VSphereMachine owns the VM Operator VirtualMachine. if err := ctrlutil.SetControllerReference(supervisorMachineCtx.VSphereMachine, vmOperatorVM, v.Client.Scheme()); err != nil { return errors.Wrapf(err, "failed to mark %s %s/%s as owner of %s %s/%s", @@ -735,7 +856,7 @@ func (v *VmopMachineService) addVolumes(ctx context.Context, supervisorMachineCt if zone := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; zonal && zone != nil { topology := []map[string]string{ - {kubeTopologyZoneLabelKey: *zone}, + {corev1.LabelTopologyZone: *zone}, } b, err := json.Marshal(topology) if err != nil { @@ -777,7 +898,7 @@ func (v *VmopMachineService) addVolumes(ctx context.Context, supervisorMachineCt } // getVMLabels returns the labels applied to a VirtualMachine. -func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels map[string]string) map[string]string { +func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels map[string]string, affinityInfo *affinityInfo) map[string]string { if vmLabels == nil { vmLabels = map[string]string{} } @@ -791,7 +912,11 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels // Get the labels that determine the VM's placement inside of a stretched // cluster. - topologyLabels := getTopologyLabels(supervisorMachineCtx) + var failureDomain *string + if affinityInfo != nil && affinityInfo.failureDomain != nil { + failureDomain = affinityInfo.failureDomain + } + topologyLabels := getTopologyLabels(supervisorMachineCtx, failureDomain) for k, v := range topologyLabels { vmLabels[k] = v } @@ -800,6 +925,9 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels // resources associated with the target cluster. vmLabels[clusterv1.ClusterNameLabel] = supervisorMachineCtx.GetClusterContext().Cluster.Name + // Ensure the VM has the machine deployment name label + vmLabels[clusterv1.MachineDeploymentNameLabel] = supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] + return vmLabels } @@ -809,10 +937,16 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels // // and thus the code is optimized as such. However, in the future // this function may return a more diverse topology. -func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext) map[string]string { +func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, failureDomain *string) map[string]string { + // TODO: Make it so that we always set the zone label, might require enquiring the zones present (when unset) if fd := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; fd != nil && *fd != "" { return map[string]string{ - kubeTopologyZoneLabelKey: *fd, + corev1.LabelTopologyZone: *fd, + } + } + if failureDomain != nil && *failureDomain != "" { + return map[string]string{ + corev1.LabelTopologyZone: *failureDomain, } } return nil @@ -823,3 +957,16 @@ func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext) ma func getMachineDeploymentNameForCluster(cluster *clusterv1.Cluster) string { return fmt.Sprintf("%s-workers-0", cluster.Name) } + +// checkVirtualMachineGroupMembership checks if the machine is in the first boot order group +// and performs logic if a match is found. +func (v *VmopMachineService) checkVirtualMachineGroupMembership(vmOperatorVMGroup *vmoprv1.VirtualMachineGroup, supervisorMachineCtx *vmware.SupervisorMachineContext) bool { + if len(vmOperatorVMGroup.Spec.BootOrder) > 0 { + for _, member := range vmOperatorVMGroup.Spec.BootOrder[0].Members { + if member.Name == supervisorMachineCtx.Machine.Name { + return true + } + } + } + return false +} From 9f261d5f284b6f668d1e0f68280c6da70e7222e5 Mon Sep 17 00:00:00 2001 From: Sagar Muchhal Date: Fri, 10 Oct 2025 13:28:02 -0700 Subject: [PATCH 05/25] Removes cluster-name label from affinity rules Signed-off-by: Sagar Muchhal --- pkg/services/vmoperator/vmopmachine.go | 28 +- pkg/services/vmoperator/vmopmachine_test.go | 300 ++++++++++++++++++++ 2 files changed, 309 insertions(+), 19 deletions(-) diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 0595b42b13..34586f7a9a 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "sort" "github.com/pkg/errors" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" @@ -164,13 +165,11 @@ func (v *VmopMachineService) SyncFailureReason(_ context.Context, machineCtx cap return supervisorMachineCtx.VSphereMachine.Status.FailureReason != nil || supervisorMachineCtx.VSphereMachine.Status.FailureMessage != nil, nil } +// affinityInfo is an internal to store VM affinity information. type affinityInfo struct { affinitySpec *vmoprv1.AffinitySpec vmGroupName string failureDomain *string - - // TODO: is this needed for the single zone case? - // zones []topologyv1.Zone } // ReconcileNormal reconciles create and update events for VM Operator VMs. @@ -197,7 +196,6 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap var affInfo affinityInfo if feature.Gates.Enabled(feature.NodeAutoPlacement) && !infrautilv1.IsControlPlaneMachine(machineCtx.GetVSphereMachine()) { - // Check for the presence of a VirtualMachineGroup with the name and namespace same as the name of the Cluster vmOperatorVMGroup := &vmoprv1.VirtualMachineGroup{} key := client.ObjectKey{ Namespace: supervisorMachineCtx.Cluster.Namespace, @@ -214,26 +212,24 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } } - // Check if the current machine is a member of the boot order - // in the VirtualMachineGroup. + // Proceed only if the machine is a member of the VirtualMachineGroup. if !v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) { log.V(4).Info("Waiting for VirtualMachineGroup membership, requeueing") return true, nil } - // Initialize the affinityInfo for the VM affInfo = affinityInfo{ vmGroupName: vmOperatorVMGroup.Name, } - // Check the presence of the node-pool label on the VirtualMachineGroup object + // Reuse the label from the node pool -> zone mapping. nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] if zone, ok := vmOperatorVMGroup.Labels[fmt.Sprintf("zone.cluster.x-k8s.io/%s", nodePool)]; ok && zone != "" { affInfo.failureDomain = ptr.To(zone) } // Fetch machine deployments without explicit failureDomain specified - // to use when setting the anti-affinity rules + // to use when setting the anti-affinity rules. machineDeployments := &clusterv1.MachineDeploymentList{} if err := v.Client.List(ctx, machineDeployments, client.InNamespace(supervisorMachineCtx.Cluster.Namespace), @@ -242,14 +238,11 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } mdNames := []string{} for _, machineDeployment := range machineDeployments.Items { - // Not adding node pool with explicit failureDomain specified to propose anti-affinity behavior - // among node pools with automatic placement only. if machineDeployment.Spec.Template.Spec.FailureDomain == "" && machineDeployment.Name != nodePool { mdNames = append(mdNames, machineDeployment.Name) } } - // turn to v4 log - log.V(2).Info("Gathered anti-affine MDs", "mdNames", mdNames) + sort.Strings(mdNames) affInfo.affinitySpec = &vmoprv1.AffinitySpec{ VMAffinity: &vmoprv1.VMAffinitySpec{ @@ -258,7 +251,6 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ clusterv1.MachineDeploymentNameLabel: nodePool, - clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name, }, }, TopologyKey: corev1.LabelTopologyZone, @@ -271,16 +263,12 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ clusterv1.MachineDeploymentNameLabel: nodePool, - clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name, }, }, TopologyKey: corev1.LabelHostname, }, { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name, - }, MatchExpressions: []metav1.LabelSelectorRequirement{ { Key: clusterv1.MachineDeploymentNameLabel, @@ -926,7 +914,9 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels vmLabels[clusterv1.ClusterNameLabel] = supervisorMachineCtx.GetClusterContext().Cluster.Name // Ensure the VM has the machine deployment name label - vmLabels[clusterv1.MachineDeploymentNameLabel] = supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] + if !infrautilv1.IsControlPlaneMachine(supervisorMachineCtx.Machine) { + vmLabels[clusterv1.MachineDeploymentNameLabel] = supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] + } return vmLabels } diff --git a/pkg/services/vmoperator/vmopmachine_test.go b/pkg/services/vmoperator/vmopmachine_test.go index aa91556341..0a30b560b8 100644 --- a/pkg/services/vmoperator/vmopmachine_test.go +++ b/pkg/services/vmoperator/vmopmachine_test.go @@ -18,6 +18,7 @@ package vmoperator import ( "context" + "fmt" "testing" "time" @@ -32,6 +33,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/utils/ptr" clusterv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" @@ -40,6 +42,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "sigs.k8s.io/cluster-api-provider-vsphere/feature" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/fake" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/network" @@ -65,6 +68,33 @@ func updateReconciledVMStatus(ctx context.Context, vmService VmopMachineService, Expect(err).ShouldNot(HaveOccurred()) } +// verifyVMAffinityRules is a helper method to assert the VM affinity rules. +func verifyVMAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName, clusterName string) { + Expect(vmopVM.Spec.Affinity.VMAffinity).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity.VMAffinity.RequiredDuringSchedulingPreferredDuringExecution).To(HaveLen(1)) + + vmAffinityTerm := vmopVM.Spec.Affinity.VMAffinity.RequiredDuringSchedulingPreferredDuringExecution[0] + Expect(vmAffinityTerm.LabelSelector.MatchLabels).To(HaveKeyWithValue(clusterv1.MachineDeploymentNameLabel, machineDeploymentName)) + Expect(vmAffinityTerm.TopologyKey).To(Equal(corev1.LabelTopologyZone)) +} + +// verifyVMAntiAffinityRules is a helper method to assert the VM anti-affinity rules. +func verifyVMAntiAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName, clusterName string) { + Expect(vmopVM.Spec.Affinity.VMAntiAffinity).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution).To(HaveLen(2)) + + // First anti-affinity term - same machine deployment, different hosts + antiAffinityTerm1 := vmopVM.Spec.Affinity.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution[0] + Expect(antiAffinityTerm1.LabelSelector.MatchLabels).To(HaveKeyWithValue(clusterv1.MachineDeploymentNameLabel, machineDeploymentName)) + Expect(antiAffinityTerm1.TopologyKey).To(Equal(corev1.LabelHostname)) + + // Second anti-affinity term - different machine deployments + antiAffinityTerm2 := vmopVM.Spec.Affinity.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution[1] + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions).To(HaveLen(1)) + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Key).To(Equal(clusterv1.MachineDeploymentNameLabel)) + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Operator).To(Equal(metav1.LabelSelectorOpIn)) +} + const ( machineName = "test-machine" clusterName = "test-cluster" @@ -655,6 +685,276 @@ var _ = Describe("VirtualMachine tests", func() { Expect(vmopVM.Spec.Volumes[i]).To(BeEquivalentTo(vmVolume)) } }) + + Context("With auto placement feature gate enabled", func() { + BeforeEach(func() { + t := GinkgoT() + featuregatetesting.SetFeatureGateDuringTest(t, feature.Gates, feature.NodeAutoPlacement, true) + }) + + // control plane machine is the machine with the control plane label set + Specify("Reconcile valid control plane Machine", func() { + // Control plane machines should not have auto placement logic applied + expectReconcileError = false + expectVMOpVM = true + expectedImageName = imageName + expectedRequeue = true + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machine.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machine.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machine.Spec.Bootstrap.DataSecretName = &secretName + expectedConditions = append(expectedConditions, clusterv1beta1.Condition{ + Type: infrav1.VMProvisionedCondition, + Status: corev1.ConditionFalse, + Reason: vmwarev1.VMProvisionStartedReason, + Message: "", + }) + + By("VirtualMachine is created") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + verifyOutput(supervisorMachineContext) + + By("Verify that control plane machine does not have affinity spec set") + vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext) + Expect(vmopVM).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity).To(BeNil()) + + By("Verify that control plane machine has correct labels") + Expect(vmopVM.Labels[nodeSelectorKey]).To(Equal(roleControlPlane)) + + By("Verify that machine-deployment label is not set for control plane") + Expect(vmopVM.Labels).ToNot(HaveKey(clusterv1.MachineDeploymentNameLabel)) + }) + + Context("For worker machine", func() { + var ( + machineDeploymentName string + vmGroup *vmoprv1.VirtualMachineGroup + ) + + BeforeEach(func() { + // Create a worker machine (no control plane label) + machineDeploymentName = "test-md" + workerMachineName := "test-worker-machine" + machine = util.CreateMachine(workerMachineName, clusterName, k8sVersion, false) + machine.Labels[clusterv1.MachineDeploymentNameLabel] = machineDeploymentName + + vsphereMachine = util.CreateVSphereMachine(workerMachineName, clusterName, className, imageName, storageClass, false) + + clusterContext, controllerManagerContext := util.CreateClusterContext(cluster, vsphereCluster) + supervisorMachineContext = util.CreateMachineContext(clusterContext, machine, vsphereMachine) + supervisorMachineContext.ControllerManagerContext = controllerManagerContext + + // Create a VirtualMachineGroup for the cluster + vmGroup = &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: corev1.NamespaceDefault, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + { + Name: workerMachineName, + Kind: "VirtualMachine", + }, + }, + }, + }, + }, + } + Expect(vmService.Client.Create(ctx, vmGroup)).To(Succeed()) + + // Create a MachineDeployment for the worker + machineDeployment := &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: machineDeploymentName, + Namespace: corev1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + }, + }, + Spec: clusterv1.MachineDeploymentSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + // No failure domain set + }, + }, + }, + } + Expect(vmService.Client.Create(ctx, machineDeployment)).To(Succeed()) + }) + + Specify("Reconcile valid Machine with no failure domain set", func() { + expectReconcileError = false + expectVMOpVM = true + expectedImageName = imageName + expectedRequeue = true + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machine.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machine.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machine.Spec.Bootstrap.DataSecretName = &secretName + + By("VirtualMachine is created") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + Expect(err).ShouldNot(HaveOccurred()) + Expect(requeue).Should(BeTrue()) + + By("Verify that worker machine has affinity spec set") + vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext) + Expect(vmopVM).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity).ShouldNot(BeNil()) + + By("Verify VM affinity rules are set correctly") + verifyVMAffinityRules(vmopVM, machineDeploymentName, clusterName) + + By("Verify VM anti-affinity rules are set correctly") + verifyVMAntiAffinityRules(vmopVM, machineDeploymentName, clusterName) + + By("Verify that worker machine has machine deploymet label set") + Expect(vmopVM.Labels[clusterv1.MachineDeploymentNameLabel]).To(Equal(machineDeploymentName)) + + By("Verify that GroupName is set from VirtualMachineGroup") + Expect(vmopVM.Spec.GroupName).To(Equal(clusterName)) + }) + + Specify("Reconcile machine with failure domain set", func() { + expectReconcileError = false + expectVMOpVM = true + expectedImageName = imageName + expectedRequeue = true + + failureDomainName := "zone-1" + machineDeploymentName := "test-md-with-fd" + workerMachineName := "test-worker-machine-with-fd" + fdClusterName := "test-cluster-fd" + + // Create a separate cluster for this test to avoid VirtualMachineGroup conflicts + fdCluster := util.CreateCluster(fdClusterName) + fdVSphereCluster := util.CreateVSphereCluster(fdClusterName) + fdVSphereCluster.Status.ResourcePolicyName = resourcePolicyName + + // Create a worker machine with failure domain + machine = util.CreateMachine(workerMachineName, fdClusterName, k8sVersion, false) + machine.Labels[clusterv1.MachineDeploymentNameLabel] = machineDeploymentName + machine.Spec.FailureDomain = failureDomainName + + vsphereMachine = util.CreateVSphereMachine(workerMachineName, fdClusterName, className, imageName, storageClass, false) + + fdClusterContext, fdControllerManagerContext := util.CreateClusterContext(fdCluster, fdVSphereCluster) + supervisorMachineContext = util.CreateMachineContext(fdClusterContext, machine, vsphereMachine) + supervisorMachineContext.ControllerManagerContext = fdControllerManagerContext + + // Create a VirtualMachineGroup for the cluster with zone label + vmGroup := &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: fdClusterName, + Namespace: corev1.NamespaceDefault, + Labels: map[string]string{ + fmt.Sprintf("zone.cluster.x-k8s.io/%s", machineDeploymentName): failureDomainName, + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + { + Name: workerMachineName, + Kind: "VirtualMachine", + }, + }, + }, + }, + }, + } + Expect(vmService.Client.Create(ctx, vmGroup)).To(Succeed()) + + // Create a MachineDeployment for the worker with no explicit failure domain + machineDeployment := &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: machineDeploymentName, + Namespace: corev1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: fdClusterName, + }, + }, + Spec: clusterv1.MachineDeploymentSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + // No failure domain set on template + }, + }, + }, + } + Expect(vmService.Client.Create(ctx, machineDeployment)).To(Succeed()) + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machine.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machine.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machine.Spec.Bootstrap.DataSecretName = &secretName + + By("VirtualMachine is created with auto placement and failure domain") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + Expect(err).ShouldNot(HaveOccurred()) + Expect(requeue).Should(BeTrue()) + + By("Verify that worker machine has affinity spec set") + vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext) + Expect(vmopVM).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity).ShouldNot(BeNil()) + + By("Verify VM affinity rules are set correctly") + verifyVMAffinityRules(vmopVM, machineDeploymentName, fdClusterName) + + By("Verify VM anti-affinity rules are set correctly") + verifyVMAntiAffinityRules(vmopVM, machineDeploymentName, fdClusterName) + + By("Verify that worker machine has correct labels including topology") + Expect(vmopVM.Labels[clusterv1.MachineDeploymentNameLabel]).To(Equal(machineDeploymentName)) + Expect(vmopVM.Labels[corev1.LabelTopologyZone]).To(Equal(failureDomainName)) + + By("Verify that GroupName is set from VirtualMachineGroup") + Expect(vmopVM.Spec.GroupName).To(Equal(fdClusterName)) + }) + }) + + }) }) Context("Delete tests", func() { From 22d7db061d1aaaee8ea825b83e75d52dfbed7a0c Mon Sep 17 00:00:00 2001 From: Sagar Muchhal Date: Mon, 13 Oct 2025 11:08:14 -0700 Subject: [PATCH 06/25] Selectively add node-pool AAF constraint Signed-off-by: Sagar Muchhal --- pkg/services/vmoperator/vmopmachine.go | 25 +-- pkg/services/vmoperator/vmopmachine_test.go | 165 ++++++++++++++------ 2 files changed, 133 insertions(+), 57 deletions(-) diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 34586f7a9a..18e4aae3e7 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -267,20 +267,25 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap }, TopologyKey: corev1.LabelHostname, }, - { - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: clusterv1.MachineDeploymentNameLabel, - Operator: metav1.LabelSelectorOpIn, - Values: mdNames, - }, + }, + }, + } + if len(mdNames) > 0 { + affInfo.affinitySpec.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution = append( + affInfo.affinitySpec.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution, + vmoprv1.VMAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: clusterv1.MachineDeploymentNameLabel, + Operator: metav1.LabelSelectorOpIn, + Values: mdNames, }, }, - TopologyKey: corev1.LabelTopologyZone, }, + TopologyKey: corev1.LabelTopologyZone, }, - }, + ) } } diff --git a/pkg/services/vmoperator/vmopmachine_test.go b/pkg/services/vmoperator/vmopmachine_test.go index 0a30b560b8..8d4ca34510 100644 --- a/pkg/services/vmoperator/vmopmachine_test.go +++ b/pkg/services/vmoperator/vmopmachine_test.go @@ -19,6 +19,7 @@ package vmoperator import ( "context" "fmt" + "slices" "testing" "time" @@ -68,8 +69,7 @@ func updateReconciledVMStatus(ctx context.Context, vmService VmopMachineService, Expect(err).ShouldNot(HaveOccurred()) } -// verifyVMAffinityRules is a helper method to assert the VM affinity rules. -func verifyVMAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName, clusterName string) { +func verifyVMAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName string) { Expect(vmopVM.Spec.Affinity.VMAffinity).ShouldNot(BeNil()) Expect(vmopVM.Spec.Affinity.VMAffinity.RequiredDuringSchedulingPreferredDuringExecution).To(HaveLen(1)) @@ -78,21 +78,38 @@ func verifyVMAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName Expect(vmAffinityTerm.TopologyKey).To(Equal(corev1.LabelTopologyZone)) } -// verifyVMAntiAffinityRules is a helper method to assert the VM anti-affinity rules. -func verifyVMAntiAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName, clusterName string) { +func verifyVMAntiAffinityRules(vmopVM *vmoprv1.VirtualMachine, machineDeploymentName string, extraMDs ...string) { Expect(vmopVM.Spec.Affinity.VMAntiAffinity).ShouldNot(BeNil()) - Expect(vmopVM.Spec.Affinity.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution).To(HaveLen(2)) - // First anti-affinity term - same machine deployment, different hosts - antiAffinityTerm1 := vmopVM.Spec.Affinity.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution[0] + expectedNumAntiAffinityTerms := 1 + if len(extraMDs) > 0 { + expectedNumAntiAffinityTerms = 2 + } + + antiAffinityTerms := vmopVM.Spec.Affinity.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution + Expect(antiAffinityTerms).To(HaveLen(expectedNumAntiAffinityTerms)) + + // First anti-affinity constraint - same machine deployment, different hosts + antiAffinityTerm1 := antiAffinityTerms[0] Expect(antiAffinityTerm1.LabelSelector.MatchLabels).To(HaveKeyWithValue(clusterv1.MachineDeploymentNameLabel, machineDeploymentName)) Expect(antiAffinityTerm1.TopologyKey).To(Equal(corev1.LabelHostname)) // Second anti-affinity term - different machine deployments - antiAffinityTerm2 := vmopVM.Spec.Affinity.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution[1] - Expect(antiAffinityTerm2.LabelSelector.MatchExpressions).To(HaveLen(1)) - Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Key).To(Equal(clusterv1.MachineDeploymentNameLabel)) - Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Operator).To(Equal(metav1.LabelSelectorOpIn)) + if len(extraMDs) > 0 { + isSortedAlphabetically := func(actual []string) (bool, error) { + return slices.IsSorted(actual), nil + } + antiAffinityTerm2 := antiAffinityTerms[1] + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions).To(HaveLen(1)) + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Key).To(Equal(clusterv1.MachineDeploymentNameLabel)) + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Operator).To(Equal(metav1.LabelSelectorOpIn)) + + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Values).To(HaveLen(len(extraMDs))) + Expect(antiAffinityTerm2.LabelSelector.MatchExpressions[0].Values).To( + WithTransform(isSortedAlphabetically, BeTrue()), + "Expected extra machine deployments to be sorted alphabetically", + ) + } } const ( @@ -111,6 +128,32 @@ const ( clusterNameLabel = clusterv1.ClusterNameLabel ) +func createMachineDeployment(name, namespace, clusterName, failureDomain string) *clusterv1.MachineDeployment { + md := &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + }, + }, + Spec: clusterv1.MachineDeploymentSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + // FailureDomain will be set conditionally below + }, + }, + }, + } + + // Only set failure domain if it's provided and not empty + if failureDomain != "" { + md.Spec.Template.Spec.FailureDomain = failureDomain + } + + return md +} + var _ = Describe("VirtualMachine tests", func() { var ( @@ -779,22 +822,7 @@ var _ = Describe("VirtualMachine tests", func() { Expect(vmService.Client.Create(ctx, vmGroup)).To(Succeed()) // Create a MachineDeployment for the worker - machineDeployment := &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Name: machineDeploymentName, - Namespace: corev1.NamespaceDefault, - Labels: map[string]string{ - clusterv1.ClusterNameLabel: clusterName, - }, - }, - Spec: clusterv1.MachineDeploymentSpec{ - Template: clusterv1.MachineTemplateSpec{ - Spec: clusterv1.MachineSpec{ - // No failure domain set - }, - }, - }, - } + machineDeployment := createMachineDeployment(machineDeploymentName, corev1.NamespaceDefault, clusterName, "") Expect(vmService.Client.Create(ctx, machineDeployment)).To(Succeed()) }) @@ -831,10 +859,10 @@ var _ = Describe("VirtualMachine tests", func() { Expect(vmopVM.Spec.Affinity).ShouldNot(BeNil()) By("Verify VM affinity rules are set correctly") - verifyVMAffinityRules(vmopVM, machineDeploymentName, clusterName) + verifyVMAffinityRules(vmopVM, machineDeploymentName) By("Verify VM anti-affinity rules are set correctly") - verifyVMAntiAffinityRules(vmopVM, machineDeploymentName, clusterName) + verifyVMAntiAffinityRules(vmopVM, machineDeploymentName) By("Verify that worker machine has machine deploymet label set") Expect(vmopVM.Labels[clusterv1.MachineDeploymentNameLabel]).To(Equal(machineDeploymentName)) @@ -895,22 +923,7 @@ var _ = Describe("VirtualMachine tests", func() { Expect(vmService.Client.Create(ctx, vmGroup)).To(Succeed()) // Create a MachineDeployment for the worker with no explicit failure domain - machineDeployment := &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Name: machineDeploymentName, - Namespace: corev1.NamespaceDefault, - Labels: map[string]string{ - clusterv1.ClusterNameLabel: fdClusterName, - }, - }, - Spec: clusterv1.MachineDeploymentSpec{ - Template: clusterv1.MachineTemplateSpec{ - Spec: clusterv1.MachineSpec{ - // No failure domain set on template - }, - }, - }, - } + machineDeployment := createMachineDeployment(machineDeploymentName, corev1.NamespaceDefault, fdClusterName, "") Expect(vmService.Client.Create(ctx, machineDeployment)).To(Succeed()) // Provide valid bootstrap data @@ -940,10 +953,10 @@ var _ = Describe("VirtualMachine tests", func() { Expect(vmopVM.Spec.Affinity).ShouldNot(BeNil()) By("Verify VM affinity rules are set correctly") - verifyVMAffinityRules(vmopVM, machineDeploymentName, fdClusterName) + verifyVMAffinityRules(vmopVM, machineDeploymentName) By("Verify VM anti-affinity rules are set correctly") - verifyVMAntiAffinityRules(vmopVM, machineDeploymentName, fdClusterName) + verifyVMAntiAffinityRules(vmopVM, machineDeploymentName) By("Verify that worker machine has correct labels including topology") Expect(vmopVM.Labels[clusterv1.MachineDeploymentNameLabel]).To(Equal(machineDeploymentName)) @@ -952,6 +965,64 @@ var _ = Describe("VirtualMachine tests", func() { By("Verify that GroupName is set from VirtualMachineGroup") Expect(vmopVM.Spec.GroupName).To(Equal(fdClusterName)) }) + + Context("For multiple machine deployments", func() { + const ( + otherMdName1 = "other-md-1" + otherMdName2 = "other-md-2" + ) + + BeforeEach(func() { + otherMd1 := createMachineDeployment(otherMdName1, corev1.NamespaceDefault, clusterName, "") + Expect(vmService.Client.Create(ctx, otherMd1)).To(Succeed()) + + otherMd2 := createMachineDeployment(otherMdName2, corev1.NamespaceDefault, clusterName, "") + Expect(vmService.Client.Create(ctx, otherMd2)).To(Succeed()) + + // Create a MachineDeployment with failure domain + otherMdWithFd := createMachineDeployment("other-md-with-fd", corev1.NamespaceDefault, clusterName, "zone-1") + Expect(vmService.Client.Create(ctx, otherMdWithFd)).To(Succeed()) + }) + + Specify("Reconcile valid machine with additional anti-affinity term added", func() { + expectReconcileError = false + expectVMOpVM = true + expectedImageName = imageName + expectedRequeue = true + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machine.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machine.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machine.Spec.Bootstrap.DataSecretName = &secretName + + By("VirtualMachine is created") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + Expect(err).ShouldNot(HaveOccurred()) + Expect(requeue).Should(BeTrue()) + + By("Verify that worker machine has affinity spec set") + vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext) + Expect(vmopVM).ShouldNot(BeNil()) + Expect(vmopVM.Spec.Affinity).ShouldNot(BeNil()) + + By("Verify VM affinity rules are set correctly") + verifyVMAffinityRules(vmopVM, machineDeploymentName) + + By("Verify VM anti-affinity rules are set correctly") + verifyVMAntiAffinityRules(vmopVM, machineDeploymentName, otherMdName1, otherMdName2) + }) + }) }) }) From 84749426953dff5274e0b84327b4f662013d6eab Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Tue, 14 Oct 2025 02:16:19 +0800 Subject: [PATCH 07/25] Refine VMG controller when generate per-MD zone labels (#71) * Refine VMG controller when generate per-MD zone labels - Skip legacy already-placed VM which do not have placement info - Skip VM which do not have zone info * Apply suggestions from code review --------- Co-authored-by: Sagar Muchhal --- controllers/vmware/virtualmachinegroup_reconciler.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index a9a417bf28..aa65ba62a4 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -243,6 +243,9 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c // Do not update per-md-zone label once set, as placement decision should not change without user explicitly // ask. placementDecisionLabels, err := GenerateVMGPlacementLabels(ctx, desiredVMG, mdNames) + if err != nil { + return err + } if len(placementDecisionLabels) > 0 { for k, v := range placementDecisionLabels { if _, exists := desiredVMG.Labels[k]; exists { @@ -445,14 +448,17 @@ func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachine // TODO: Establish membership via the machine deployment name label if strings.Contains(member.Name, md) { // Get the VM placement information by member status. + // VMs that have undergone placement do not have Placement info set, skip. if member.Placement == nil { - return nil, errors.Errorf("VM %s in VMG %s/%s has no placement info. Placement is nil)", member.Name, vmg.Namespace, vmg.Name) + log.V(4).Info("VM in VMG has no placement info. Placement is nil", "VM", member.Name, "VMG", vmg.Name, "Namespace", vmg.Namespace) + continue } - // Get the VM placement information by member status. + // Skip to next member if Zone is empty. zone := member.Placement.Zone if zone == "" { - return nil, errors.Errorf("VM %s in VMG %s/%s has no placement info. Zone is empty", member.Name, vmg.Namespace, vmg.Name) + log.V(4).Info("VM in VMG has no placement info. Zone is empty", "VM", member.Name, "VMG", vmg.Name, "Namespace", vmg.Namespace) + continue } log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, vmg.Namespace, vmg.Name, zone)) From 833cdb527a439b3fa2c70acb63fda6ca3e3f00c2 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Wed, 22 Oct 2025 16:34:51 +0800 Subject: [PATCH 08/25] Sync VSphereMachines in VMG controller - Sync VSphereMachines during day-2 operations in VMG controller - Only wait for all intended VSphereMachines during initial Cluster creation - Use annotations in VMG for per-md-zone info Signed-off-by: Gong Zhang --- .../vmware/virtualmachinegroup_controller.go | 39 ++ .../vmware/virtualmachinegroup_reconciler.go | 225 ++----- .../virtualmachinegroup_reconciler_test.go | 556 ++++++++++++++++++ pkg/services/vmoperator/vmopmachine.go | 5 +- 4 files changed, 648 insertions(+), 177 deletions(-) create mode 100644 controllers/vmware/virtualmachinegroup_reconciler_test.go diff --git a/controllers/vmware/virtualmachinegroup_controller.go b/controllers/vmware/virtualmachinegroup_controller.go index edfc5d0211..94606f541d 100644 --- a/controllers/vmware/virtualmachinegroup_controller.go +++ b/controllers/vmware/virtualmachinegroup_controller.go @@ -21,6 +21,7 @@ import ( vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" apitypes "k8s.io/apimachinery/pkg/types" + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/util/predicates" @@ -70,11 +71,16 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa &clusterv1.Cluster{}, handler.EnqueueRequestsFromMapFunc(reconciler.ClusterToVirtualMachineGroup), ). + Watches( + &vmwarev1.VSphereMachine{}, + handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToVirtualMachineGroup), + ). WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue)) return builder.Complete(reconciler) } +// ClusterToVirtualMachineGroup maps Cluster events to VirtualMachineGroup reconcile requests. func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { cluster, ok := a.(*clusterv1.Cluster) if !ok { @@ -89,3 +95,36 @@ func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context. }, }} } + +// VsphereMachineToVirtualMachineGroup maps VSphereMachine events to VirtualMachineGroup reconcile requests. +// This handler only processes VSphereMachine objects for Day-2 operations, ensuring VSphereMachine state stays +// in sync with its owning VMG. If no corresponding VMG is found, this is a no-op. + +func (r VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { + vSphereMachine, ok := a.(*vmwarev1.VSphereMachine) + if !ok { + return nil + } + + clusterName, ok := vSphereMachine.Labels[clusterv1.ClusterNameLabel] + if !ok || clusterName == "" { + return nil + } + + vmg := &vmoprv1.VirtualMachineGroup{} + err := r.Client.Get(ctx, apitypes.NamespacedName{ + Namespace: vSphereMachine.Namespace, + Name: clusterName, + }, vmg) + + if err != nil { + return nil + } + + return []reconcile.Request{{ + NamespacedName: apitypes.NamespacedName{ + Namespace: vmg.Namespace, + Name: vmg.Name, + }, + }} +} diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index aa65ba62a4..9f2c811449 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -69,61 +69,6 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, nil } - vmg := &vmoprv1.VirtualMachineGroup{} - key := &client.ObjectKey{ - Namespace: cluster.Namespace, - Name: cluster.Name, - } - - if err := r.Client.Get(ctx, *key, vmg); err != nil { - if !apierrors.IsNotFound(err) { - log.Error(err, "failed to get VirtualMachineGroup") - return ctrl.Result{}, err - } - vmg = &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{ - Name: key.Name, - Namespace: key.Namespace, - }, - } - } - - // // Proceed only if multiple zones are available. - // // If there is only one zone(default), node automatic placement is unnecessary - // // because all Machine Deployments will be scheduled into that single zone. - // // The VSphereCluster resource discovers the underlying zones, - // // which we treat as the source of truth. - // vsphereClusterList := &vmwarev1.VSphereClusterList{} - // labelKey := clusterv1.ClusterNameLabel - // if err := r.Client.List(ctx, vsphereClusterList, - // client.InNamespace(cluster.Namespace), - // client.MatchingLabels(map[string]string{labelKey: cluster.Name}), - // ); err != nil { - // return reconcile.Result{}, fmt.Errorf("failed to list VSphereClusters in namespace %s: %w", cluster.Namespace, err) - // } - - // vsphereCluster := &vmwarev1.VSphereCluster{} - // switch len(vsphereClusterList.Items) { - // case 0: - // return reconcile.Result{}, fmt.Errorf("no VSphereCluster found with label %s=%s in namespace %s", labelKey, cluster.Name, cluster.Namespace) - // case 1: - // vsphereCluster = &vsphereClusterList.Items[0] - // default: - // return reconcile.Result{}, fmt.Errorf("found %d VSphereClusters with label %s=%s in namespace %s; expected exactly 1", len(vsphereClusterList.Items), labelKey, cluster.Name, cluster.Namespace) - // } - - // // Fetch the VSphereCluster instance. - // if vsphereCluster.Status.Ready != true { - // log.Info("Waiting for VSphereCluster to be ready with failure domain discovered") - // return reconcile.Result{RequeueAfter: reconciliationDelay}, nil - - // } - - // if len(vsphereCluster.Status.FailureDomains) <= 1 { - // log.Info("Single or no zone detected; skipping node automatic placement") - // return reconcile.Result{}, nil - // } - // If ControlPlane haven't initialized, requeue it since VSphereMachines of MachineDeployment will only be created after // ControlPlane is initialized. if !conditions.IsTrue(cluster, clusterv1.ClusterControlPlaneInitializedCondition) { @@ -132,69 +77,59 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. } // Continue with the main logic. - return r.createOrUpdateVMG(ctx, cluster, vmg) + return r.createOrUpdateVMG(ctx, cluster) } // createOrUpdateVMG Create or Update VirtualMachineGroup -func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, cluster *clusterv1.Cluster, desiredVMG *vmoprv1.VirtualMachineGroup) (_ reconcile.Result, reterr error) { +func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) { log := ctrl.LoggerFrom(ctx) - // Calculate expected Machines of all MachineDeployments. - expectedMachines := getExpectedMachines(cluster) - if expectedMachines == 0 { - log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario") - return reconcile.Result{}, nil - } - // Calculate current Machines of all MachineDeployments. - currentVSphereMachines, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name) + current, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name) if err != nil { return reconcile.Result{}, errors.Wrapf(err, "failed to get current VSphereMachine of cluster %s/%s", cluster.Name, cluster.Namespace) } - // Wait until all VSphereMachines are create, this could happen during initial deployment or day-2 like cluster update. - current := int32(len(currentVSphereMachines)) - if current < expectedMachines { - // Only check timeout if VMG doesn't exist. - // if desiredVMG.CreationTimestamp.IsZero() { - // if _, err := r.isMDDefined(ctx, cluster); err != nil { - // log.Error(err, "cluster MachineDeployments are not defined") - // return reconcile.Result{}, nil - // } - - // mdList := &clusterv1.MachineDeploymentList{} - // if err := r.Client.List(ctx, mdList, - // client.InNamespace(cluster.Namespace), - // client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, - // ); err != nil { - // return reconcile.Result{}, errors.Errorf("failed to list MachineDeployments: %w", err) - // } - - // // If no deployments exist, report error - // if len(mdList.Items) == 0 { - // return reconcile.Result{}, errors.Errorf("no MachineDeployments found for cluster %s/%s", cluster.Namespace, cluster.Name) - // } - - // // Check one MachineDeployment's creation timestamp - // firstMD := mdList.Items[0] - // if time.Since(firstMD.CreationTimestamp.Time) > 1*time.Minute { - // log.Error(errors.New("timeout waiting for VSphereMachines"), "1 minute timeout after MachineDeployment creation", - // "MachineDeployment", firstMD.Name, "Cluster", cluster.Namespace+"/"+cluster.Name) - - // return reconcile.Result{}, nil - // } - // } - - log.Info("current VSphereMachines do not match expected", "Expected:", expectedMachines, - "Current:", current, "ClusterName", cluster.Name, "Namespace", cluster.Namespace) - return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + desiredVMG := &vmoprv1.VirtualMachineGroup{} + key := &client.ObjectKey{ + Namespace: cluster.Namespace, + Name: cluster.Name, + } + + if err := r.Client.Get(ctx, *key, desiredVMG); err != nil { + if !apierrors.IsNotFound(err) { + log.Error(err, "failed to get VirtualMachineGroup") + return ctrl.Result{}, err + } + + // Calculate expected Machines of all MachineDeployments. + expected := getExpectedVSphereMachines(cluster) + if expected == 0 { + log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario") + return reconcile.Result{}, nil + } + + // Wait for all intended VSphereMachines corresponding to MachineDeployment to exist only during initial Cluster creation. + current := int32(len(current)) + if current < expected { + log.Info("current VSphereMachines do not match expected", "Expected:", expected, + "Current:", current, "ClusterName", cluster.Name, "Namespace", cluster.Namespace) + return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + } + + desiredVMG = &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + } } // Generate VM names according to the naming strategy set on the VSphereMachine. - vmNames := make([]string, 0, len(currentVSphereMachines)) - for _, machine := range currentVSphereMachines { + vmNames := make([]string, 0, len(current)) + for _, machine := range current { name, err := GenerateVirtualMachineName(machine.Name, machine.Spec.NamingStrategy) if err != nil { return reconcile.Result{}, err @@ -206,7 +141,7 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c return vmNames[i] < vmNames[j] }) - members := make([]vmoprv1.GroupMember, 0, len(currentVSphereMachines)) + members := make([]vmoprv1.GroupMember, 0, len(current)) for _, name := range vmNames { members = append(members, vmoprv1.GroupMember{ Name: name, @@ -242,7 +177,7 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done // Do not update per-md-zone label once set, as placement decision should not change without user explicitly // ask. - placementDecisionLabels, err := GenerateVMGPlacementLabels(ctx, desiredVMG, mdNames) + placementDecisionLabels, err := GenerateVMGPlacementAnnotations(ctx, desiredVMG, mdNames) if err != nil { return err } @@ -318,9 +253,9 @@ func (r *VirtualMachineGroupReconciler) isExplicitPlacement(cluster *clusterv1.C return false, nil } -// getExpectedMachines returns the total number of replicas across all +// getExpectedVSphereMachines returns the total number of replicas across all // MachineDeployments in the Cluster's Topology.Workers. -func getExpectedMachines(cluster *clusterv1.Cluster) int32 { +func getExpectedVSphereMachines(cluster *clusterv1.Cluster) int32 { if !cluster.Spec.Topology.IsDefined() { return 0 } @@ -337,66 +272,6 @@ func getExpectedMachines(cluster *clusterv1.Cluster) int32 { func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) { log := ctrl.LoggerFrom(ctx) - // // List MachineDeployments for the cluster. - // var mdList clusterv1.MachineDeploymentList - // if err := kubeClient.List(ctx, &mdList, - // client.InNamespace(clusterNamespace), - // client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, - // ); err != nil { - // return nil, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s", clusterNamespace, clusterName) - // } - // validMDs := make(map[string]struct{}) - // for _, md := range mdList.Items { - // validMDs[md.Name] = struct{}{} - // } - // log.V(6).Info("Identified active MachineDeployments", "count", len(validMDs)) - - // // List MachineSets and filter those owned by a valid MachineDeployment. - // var msList clusterv1.MachineSetList - // if err := kubeClient.List(ctx, &msList, - // client.InNamespace(clusterNamespace), - // client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, - // ); err != nil { - // return nil, errors.Wrapf(err, "failed to list MachineSets for cluster %s/%s", clusterNamespace, clusterName) - // } - // validMS := make(map[string]struct{}) - // for _, ms := range msList.Items { - // for _, owner := range ms.OwnerReferences { - // if owner.Kind == "MachineDeployment" && owner.APIVersion == clusterv1.GroupVersion.String() { - // if _, ok := validMDs[owner.Name]; ok { - // validMS[ms.Name] = struct{}{} - // break - // } - // } - // } - // } - // log.V(6).Info("Filtered MachineSets owned by valid MachineDeployments", "count", len(validMS)) - - // // List Machines and filter those owned by valid MachineSets (skip control plane). - // var machineList clusterv1.MachineList - // if err := kubeClient.List(ctx, &machineList, - // client.InNamespace(clusterNamespace), - // client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, - // ); err != nil { - // return nil, errors.Wrapf(err, "failed to list Machines for cluster %s/%s", clusterNamespace, clusterName) - // } - - // workerMachines := make(map[string]struct{}) - // for _, m := range machineList.Items { - // if _, isControlPlane := m.Labels[clusterv1.MachineControlPlaneLabel]; isControlPlane { - // continue - // } - // for _, owner := range m.OwnerReferences { - // if owner.Kind == "MachineSet" && owner.APIVersion == clusterv1.GroupVersion.String() { - // if _, ok := validMS[owner.Name]; ok { - // workerMachines[m.Name] = struct{}{} - // break - // } - // } - // } - // } - // log.V(5).Info("Identified worker Machines linked to MachineSets", "count", len(workerMachines)) - // List VSphereMachine objects var vsMachineList vmwarev1.VSphereMachineList if err := kubeClient.List(ctx, &vsMachineList, @@ -418,10 +293,10 @@ func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, cl return result, nil } -// GenerateVMGPlacementLabels returns labels per MachineDeployment which contain zone info for placed VMs for day-2 operations. -func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) { +// GenerateVMGPlacementAnnotations returns annotations per MachineDeployment which contains zone info for placed VMs for day-2 operations. +func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) { log := ctrl.LoggerFrom(ctx) - labels := make(map[string]string) + annotations := make(map[string]string) // For each member in status for _, member := range vmg.Status.Members { @@ -436,11 +311,11 @@ func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachine } // Check if this VM belongs to any of our target Machine Deployments - // Use machine deployment name as the label key. + // Use machine deployment name as the annotation key prefix. for _, md := range machineDeployments { // Check if we already found placement for this Machine Deployments - if _, found := labels[md]; found { - log.Info(fmt.Sprintf("Skipping Machine Deployment %s, placement already found", md)) + if _, found := annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)]; found { + log.Info(fmt.Sprintf("Skipping Machine Deployment %s, placement already found in annotations", md)) continue } @@ -462,12 +337,12 @@ func GenerateVMGPlacementLabels(ctx context.Context, vmg *vmoprv1.VirtualMachine } log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, vmg.Namespace, vmg.Name, zone)) - labels[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)] = zone + annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)] = zone } } } - return labels, nil + return annotations, nil } // TODO: de-dup this logic with vmopmachine.go diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go new file mode 100644 index 0000000000..9e1e0aff72 --- /dev/null +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -0,0 +1,556 @@ +package vmware + +import ( + "context" + "fmt" + "testing" + "time" + + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/internal/apis/topology/v1alpha1" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +var s = runtime.NewScheme() + +func init() { + // Register all necessary API types for the fake client + _ = vmoprv1.AddToScheme(s) + _ = infrav1.AddToScheme(s) + _ = vmwarev1.AddToScheme(s) + _ = topologyv1.AddToScheme(s) + _ = clusterv1.AddToScheme(s) + _ = corev1.AddToScheme(s) +} + +const ( + clusterName = "test-cluster" + clusterNamespace = "test-ns" + mdName1 = "md-worker-a" + mdName2 = "md-worker-b" + zoneA = "zone-a" + zoneB = "zone-b" +) + +// Helper function to create a basic Cluster object +func newCluster(name, namespace string, initialized bool, topology bool) *clusterv1.Cluster { + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{clusterv1.ClusterNameLabel: name}, + }, + } + if initialized { + conditions.MarkTrue(cluster, clusterv1.ClusterControlPlaneInitializedCondition) + } else { + conditions.MarkFalse(cluster, clusterv1.ClusterControlPlaneInitializedCondition, "Waiting", clusterv1.ConditionSeverityInfo, "") + } + + if topology { + cluster.Spec.Topology = &clusterv1.Topology{} + cluster.Spec.Topology.Workers = &clusterv1.Workers{ + MachineDeployments: clusterv1.MachineDeploymentTopology{}, + } + } + return cluster +} + +// Helper function to create a MachineDeploymentTopology for the Cluster spec +func newMDTopology(name string, replicas int32, fd string) clusterv1.MachineDeploymentTopology { + return clusterv1.MachineDeploymentTopology{ + Class: "test-class", + Name: name, + FailureDomain: &fd, // Pointer to FailureDomain string + Replicas: &replicas, + } +} + +// Helper function to create a VSphereCluster +func newVSphereCluster(name, namespace string, ready bool, zones ...string) *vmwarev1.VSphereCluster { + vsc := &vmwarev1.VSphereCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{clusterv1.ClusterNameLabel: name}, + }, + } + if ready { + conditions.MarkTrue(vsc, vmwarev1.VSphereClusterReadyCondition) + } + + for _, zone := range zones { + vsc.Status.FailureDomains = append(vsc.Status.FailureDomains, vmwarev1.FailureDomainStatus{Name: zone}) + } + return vsc +} + +// Helper function to create a CAPI Machine (worker or control plane) +func newMachine(name, mdName string, isControlPlane bool) *clusterv1.Machine { + m := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: clusterNamespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + clusterv1.MachineDeploymentNameLabel: mdName, + }, + }, + } + if isControlPlane { + m.Labels[clusterv1.MachineControlPlaneLabel] = "true" + } + return m +} + +// Helper function to create a VSphereMachine (owned by a CAPI Machine) +func newVSphereMachine(name, ownerMachineName string) *infrav1.VSphereMachine { + return &infrav1.VSphereMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: clusterNamespace, + OwnerReferences: metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Machine", + Name: ownerMachineName, + }, + }, + }, + } +} + +// Helper function to create a VMG member status with placement info +func newVMGMemberStatus(name, kind string, ready bool, zone string) vmoprv1.GroupMember { + member := vmoprv1.GroupMember{ + Name: name, + Kind: kind, + } + + if ready { + conditions.MarkTrue(&member, vmoprv1.VirtualMachineGroupMemberConditionPlacementReady) + member.Placement = &vmoprv1.Placement{ + Zone: zone, + } + } + return member +} + +// Helper function to create a mock MachineDeployment +func newMachineDeployment(name string) *clusterv1.MachineDeployment { + return &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: clusterNamespace, + Labels: map[string]string{clusterv1.ClusterNameLabel: clusterName}, + }, + } +} + +func TestHelperFunctions(t *testing.T) { + g := NewWithT(t) + // Create cluster topology with mixed placement + cluster := newCluster(clusterName, clusterNamespace, true, true) + cluster.Spec.Topology.Workers.MachineDeployments = clusterv1.MachineDeploymentTopology{ + newMDTopology(mdName1, 3, ""), // Automatic placement + newMDTopology(mdName2, 5, zoneB), // Explicit placement + } + g.Expect(cluster.Spec.Topology.IsDefined()).To(BeTrue()) + + // Test isExplicitPlacement + explicit, err := isExplicitPlacement(cluster) + g.Expect(err).NotTo(HaveOccurred()) + // Should be true because MD2 has a FailureDomain specified + g.Expect(explicit).To(BeTrue(), "isExplicitPlacement should be true when any MD has FD set") + + // Test getExpectedMachines + expected := getExpectedMachines(cluster) + // Expected total replicas: 3 + 5 = 8 + g.Expect(expected).To(BeEquivalentTo(8), "Expected machines count should be 8") + + // Test getExpectedMachines with nil replicas + clusterNoReplicas := newCluster(clusterName, clusterNamespace, true, true) + clusterNoReplicas.Spec.Topology.Workers.MachineDeployments = clusterv1.MachineDeploymentTopology{ + {Name: "md-1", Class: "c1"}, + } + expectedZero := getExpectedMachines(clusterNoReplicas) + g.Expect(expectedZero).To(BeEquivalentTo(0), "Expected machines count should be 0 for nil replicas") +} + +func TestIsExplicitPlacement(t *testing.T) { + g := NewWithT(t) + + // Setup cluster for test cases + baseCluster := newCluster(clusterName, clusterNamespace, true, true) + + tests := struct { + name string + mds clusterv1.MachineDeploymentTopology + want bool + }{ + { + name: "All MDs use automatic placement (empty FD)", + mds: clusterv1.MachineDeploymentTopology{ + newMDTopology(mdName1, 3, ""), + newMDTopology(mdName2, 2, ""), + }, + want: false, + }, + { + name: "One MD uses explicit placement (non-empty FD)", + mds: clusterv1.MachineDeploymentTopology{ + newMDTopology(mdName1, 3, ""), + newMDTopology(mdName2, 2, zoneA), + }, + want: true, + }, + { + name: "All MDs use explicit placement", + mds: clusterv1.MachineDeploymentTopology{ + newMDTopology(mdName1, 3, zoneA), + newMDTopology(mdName2, 2, zoneB), + }, + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cluster := baseCluster.DeepCopy() + cluster.Spec.Topology.Workers.MachineDeployments = tt.mds + got, err := isExplicitPlacement(cluster) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(got).To(Equal(tt.want)) + }) + } +} + +func TestGetCurrentVSphereMachines(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + // Define object names for workers and CPs + cpMachineName := "cp-machine-0" + workerMachineA1 := "worker-a-1" + workerMachineB1 := "worker-b-1" + // Define a machine that belongs to a non-existent MD + strayMachine := "stray-machine" + + tests := struct { + name string + objectsclient.Object + want int + }{ + { + name: "Success: Correctly filters worker VSphereMachines", + objects: client.Object{ + // Active MDs + newMachineDeployment(mdName1), + newMachineDeployment(mdName2), + // CAPI Machines + newMachine(cpMachineName, "", true), // Control Plane (should be skipped) + newMachine(workerMachineA1, mdName1, false), + newMachine(workerMachineB1, mdName2, false), + newMachine(strayMachine, "non-existent-md", false), // Stray worker (should be skipped) + // VSphereMachines (Infrastructure objects) + newVSphereMachine("vsm-cp-0", cpMachineName), // Should be skipped + newVSphereMachine("vsm-a-1", workerMachineA1), + newVSphereMachine("vsm-b-1", workerMachineB1), + newVSphereMachine("vsm-stray", strayMachine), // Should be skipped + }, + want: 2, // Only vsm-a-1 and vsm-b-1 + }, + { + name: "No VSphereMachines found", + objects: client.Object{ + newMachineDeployment(mdName1), + newMachine(workerMachineA1, mdName1, false), + }, + want: 0, + }, + { + name: "No objects exist", + objects: client.Object{}, + want: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(tt.objects...).Build() + reconciler := &VirtualMachineGroupReconciler{Client: fakeClient} + + got, err := getCurrentVSphereMachines(ctx, reconciler.Client, clusterNamespace, clusterName) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(len(got)).To(Equal(tt.want)) + }) + } +} + +func TestGenerateVMGPlacementLabels(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + tests := struct { + name string + vmg *vmoprv1.VirtualMachineGroup + nodepoolsstring + want map[string]string + wantErr bool + }{ + { + name: "Success: VMG with one placed VM per nodepool", + vmg: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: vmoprv1.GroupMember{ + // Placed member for MD1 + newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1), "VirtualMachine", true, zoneA), + // Placed member for MD2 + newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-1", clusterName, mdName2), "VirtualMachine", true, zoneB), + // Unplaced member for MD1 (should be ignored) + newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-2", clusterName, mdName1), "VirtualMachine", false, ""), + }, + }, + }, + nodepools: string{mdName1, mdName2}, + want: map[string]string{ + fmt.Sprintf("%s/%s", VMGPlacementLabelPrefix, mdName1): zoneA, + fmt.Sprintf("%s/%s", VMGPlacementLabelPrefix, mdName2): zoneB, + }, + wantErr: false, + }, + { + name: "Success: Multiple placed VMs for the same nodepool (should take first)", + vmg: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: vmoprv1.GroupMember{ + // First placed VM (Zone B) + newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1), "VirtualMachine", true, zoneB), + // Second placed VM (Zone A) - should be skipped + newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-2", clusterName, mdName1), "VirtualMachine", true, zoneA), + }, + }, + }, + nodepools: string{mdName1}, + want: map[string]string{ + fmt.Sprintf("%s/%s", VMGPlacementLabelPrefix, mdName1): zoneB, + }, + wantErr: false, + }, + { + name: "Error: PlacementReady true but Placement is nil", + vmg: &vmoprv1.VirtualMachineGroup{ + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: vmoprv1.GroupMember{ + { + Name: "vm-1", + Kind: "VirtualMachine", + // Condition marked true, but Placement field is nil + Conditions: metav1.Condition{{Type: vmoprv1.VirtualMachineGroupMemberConditionPlacementReady, Status: metav1.ConditionTrue}}, + }, + }, + }, + }, + nodepools: string{mdName1}, + want: nil, + wantErr: true, // Expect an error about nil placement info + }, + { + name: "Skip: No members are PlacementReady", + vmg: &vmoprv1.VirtualMachineGroup{ + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: vmoprv1.GroupMember{ + newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1), "VirtualMachine", false, ""), + }, + }, + }, + nodepools: string{mdName1}, + want: map[string]string{}, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := GenerateVMGPlacementLabels(ctx, tt.vmg, tt.nodepools) + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(got).To(Equal(tt.want)) + } + }) + } +} + +func TestVMGReconcile(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + + // Define all required mock objects for a successful run + cluster := newCluster(clusterName, clusterNamespace, true, true) + cluster.Spec.Topology.Workers.MachineDeployments = clusterv1.MachineDeploymentTopology{ + newMDTopology(mdName1, 1, ""), + } + vsc := newVSphereCluster(clusterName, clusterNamespace, true, zoneA, zoneB) // Two zones + md := newMachineDeployment(mdName1) + machine := newMachine("worker-1", mdName1, false) + vsMachine := newVSphereMachine("vsm-1", "worker-1") + + tests := struct { + name string + initialObjects client.Object + expectedResult reconcile.Result + checkVMGExists bool + checkVMGMembers string // Expected member names + checkVMGReplicas int32 // Expected VMG replicas (for sanity check) + }{ + { + name: "Exit: Cluster not found (GC)", + initialObjects: client.Object{}, + expectedResult: reconcile.Result{}, + checkVMGExists: false, + checkVMGReplicas: 0, + }, + { + name: "Exit: Cluster marked for deletion", + initialObjects: client.Object{ + func() client.Object { + c := cluster.DeepCopy() + c.DeletionTimestamp = &metav1.Time{Time: time.Now()} + return c + }(), + }, + expectedResult: reconcile.Result{}, + checkVMGExists: false, + checkVMGReplicas: 0, + }, + { + name: "Exit: Explicit placement used", + initialObjects: client.Object{ + func() client.Object { + c := cluster.DeepCopy() + c.Spec.Topology.Workers.MachineDeployments.FailureDomain = stringPtr(zoneA) + return c + }(), + }, + expectedResult: reconcile.Result{}, + checkVMGExists: false, + checkVMGReplicas: 0, + }, + { + name: "Requeue: VSphereCluster not ready", + initialObjects: client.Object{ + cluster.DeepCopy(), + newVSphereCluster(clusterName, clusterNamespace, false, zoneA, zoneB), // Not Ready + }, + expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + checkVMGExists: false, + checkVMGReplicas: 0, + }, + { + name: "Exit: Single zone detected", + initialObjects: client.Object{ + cluster.DeepCopy(), + newVSphereCluster(clusterName, clusterNamespace, true, zoneA), // Only one zone + }, + expectedResult: reconcile.Result{}, + checkVMGExists: false, + checkVMGReplicas: 0, + }, + { + name: "Requeue: ControlPlane not initialized", + initialObjects: client.Object{ + newCluster(clusterName, clusterNamespace, false, true), // Not Initialized + vsc.DeepCopy(), + }, + expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + checkVMGExists: false, + checkVMGReplicas: 0, + }, + { + name: "Requeue: Machines not fully created (0/1)", + initialObjects: client.Object{ + cluster.DeepCopy(), + vsc.DeepCopy(), + md.DeepCopy(), + // No Machine or VSphereMachine objects created yet + }, + expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + checkVMGExists: false, + checkVMGReplicas: 0, + }, + { + name: "Success: VMG created with correct members (1/1)", + initialObjects: client.Object{ + cluster.DeepCopy(), + vsc.DeepCopy(), + md.DeepCopy(), + machine.DeepCopy(), + vsMachine.DeepCopy(), + }, + expectedResult: reconcile.Result{}, + checkVMGExists: true, + checkVMGMembers: string{"vsm-1"}, + checkVMGReplicas: 1, // Replica count derived from VMG spec + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(tt.initialObjects...).Build() + reconciler := &VirtualMachineGroupReconciler{ + Client: fakeClient, + Recorder: record.NewFakeRecorder(1), + } + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}} + + result, err := reconciler.Reconcile(ctx, req) + + g.Expect(err).NotTo(HaveOccurred(), "Reconcile should not return an error") + g.Expect(result).To(Equal(tt.expectedResult)) + + vmg := &vmoprv1.VirtualMachineGroup{} + vmgKey := types.NamespacedName{Name: clusterName, Namespace: clusterNamespace} + err = fakeClient.Get(ctx, vmgKey, vmg) + + if tt.checkVMGExists { + g.Expect(err).NotTo(HaveOccurred(), "VMG should exist") + // Check owner reference + g.Expect(vmg.OwnerReferences).To(HaveLen(1)) + g.Expect(vmg.OwnerReferences.Name).To(Equal(clusterName)) + g.Expect(vmg.OwnerReferences.Kind).To(Equal("Cluster")) + g.Expect(vmg.OwnerReferences.Controller).To(PointTo(BeTrue())) + + // Check members + g.Expect(vmg.Spec.BootOrder).To(HaveLen(1)) + members := vmg.Spec.BootOrder.Members + g.Expect(members).To(HaveLen(len(tt.checkVMGMembers))) + if len(members) > 0 { + g.Expect(members.Name).To(Equal(tt.checkVMGMembers)) + g.Expect(members.Kind).To(Equal("VirtualMachine")) + } + } else { + g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "VMG should not exist") + } + }) + } +} + +// stringPtr converts a string to a *string +func stringPtr(s string) *string { return &s } diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 18e4aae3e7..45dcdcaf49 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -222,9 +222,10 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap vmGroupName: vmOperatorVMGroup.Name, } - // Reuse the label from the node pool -> zone mapping. + // Set the zone label using the annotation of the machine deployment:zone mapping from VMG. + // This is for new VMs created during day-2 operations in VC 9.1. nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] - if zone, ok := vmOperatorVMGroup.Labels[fmt.Sprintf("zone.cluster.x-k8s.io/%s", nodePool)]; ok && zone != "" { + if zone, ok := vmOperatorVMGroup.Annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", nodePool)]; ok && zone != "" { affInfo.failureDomain = ptr.To(zone) } From 36c355038f01ac3c7b5748486a893b4307c0310d Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Thu, 23 Oct 2025 14:42:57 +0800 Subject: [PATCH 09/25] Add VMG reconciler unit test and bump VMOP - Add VMG recociler unit test - Bump VMOP due to API change - Filter out VSphereMachine event except create/delete events Signed-off-by: Gong Zhang --- controllers/vmware/controllers_suite_test.go | 2 + .../vmware/virtualmachinegroup_controller.go | 12 +- .../virtualmachinegroup_reconciler_test.go | 625 +++++++----------- go.mod | 6 +- go.sum | 4 +- internal/test/helpers/envtest.go | 2 + test/go.mod | 4 +- 7 files changed, 268 insertions(+), 387 deletions(-) diff --git a/controllers/vmware/controllers_suite_test.go b/controllers/vmware/controllers_suite_test.go index 87d99112e0..128ee2086d 100644 --- a/controllers/vmware/controllers_suite_test.go +++ b/controllers/vmware/controllers_suite_test.go @@ -26,6 +26,7 @@ import ( . "github.com/onsi/ginkgo/v2" "github.com/onsi/ginkgo/v2/types" . "github.com/onsi/gomega" + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -71,6 +72,7 @@ func setup(ctx context.Context) (*helpers.TestEnvironment, clustercache.ClusterC utilruntime.Must(infrav1.AddToScheme(scheme.Scheme)) utilruntime.Must(clusterv1.AddToScheme(scheme.Scheme)) utilruntime.Must(vmwarev1.AddToScheme(scheme.Scheme)) + utilruntime.Must(vmoprv1.AddToScheme(scheme.Scheme)) testEnv := helpers.NewTestEnvironment(ctx) diff --git a/controllers/vmware/virtualmachinegroup_controller.go b/controllers/vmware/virtualmachinegroup_controller.go index 94606f541d..e9102587e5 100644 --- a/controllers/vmware/virtualmachinegroup_controller.go +++ b/controllers/vmware/virtualmachinegroup_controller.go @@ -26,8 +26,10 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/util/predicates" ctrl "sigs.k8s.io/controller-runtime" + ctrlbldr "sigs.k8s.io/controller-runtime/pkg/builder" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -38,7 +40,6 @@ import ( // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get // +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get -// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vsphereclusters,verbs=get;list;watch // +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch @@ -53,7 +54,7 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa Recorder: mgr.GetEventRecorderFor("virtualmachinegroup-controller"), } - // Predicate: only allow VMG with the cluster-name label + // Predicate: only allow VMG with the cluster-name label. Ensures the controller only works on VMG objects created by CAPV. hasClusterNameLabel := predicate.NewPredicateFuncs(func(obj ctrlclient.Object) bool { labels := obj.GetLabels() if labels == nil { @@ -74,6 +75,13 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa Watches( &vmwarev1.VSphereMachine{}, handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToVirtualMachineGroup), + ctrlbldr.WithPredicates( + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { return false }, + CreateFunc: func(event.CreateEvent) bool { return true }, + DeleteFunc: func(event.DeleteEvent) bool { return true }, + GenericFunc: func(event.GenericEvent) bool { return false }, + }), ). WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue)) diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index 9e1e0aff72..4be9d868ab 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -3,21 +3,20 @@ package vmware import ( "context" "fmt" + "sort" "testing" "time" + . "github.com/onsi/gomega" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" - corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" - infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" - topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/internal/apis/topology/v1alpha1" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" - "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" + "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -26,16 +25,6 @@ import ( var s = runtime.NewScheme() -func init() { - // Register all necessary API types for the fake client - _ = vmoprv1.AddToScheme(s) - _ = infrav1.AddToScheme(s) - _ = vmwarev1.AddToScheme(s) - _ = topologyv1.AddToScheme(s) - _ = clusterv1.AddToScheme(s) - _ = corev1.AddToScheme(s) -} - const ( clusterName = "test-cluster" clusterNamespace = "test-ns" @@ -45,195 +34,38 @@ const ( zoneB = "zone-b" ) -// Helper function to create a basic Cluster object -func newCluster(name, namespace string, initialized bool, topology bool) *clusterv1.Cluster { - cluster := &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: map[string]string{clusterv1.ClusterNameLabel: name}, - }, - } - if initialized { - conditions.MarkTrue(cluster, clusterv1.ClusterControlPlaneInitializedCondition) - } else { - conditions.MarkFalse(cluster, clusterv1.ClusterControlPlaneInitializedCondition, "Waiting", clusterv1.ConditionSeverityInfo, "") - } - - if topology { - cluster.Spec.Topology = &clusterv1.Topology{} - cluster.Spec.Topology.Workers = &clusterv1.Workers{ - MachineDeployments: clusterv1.MachineDeploymentTopology{}, - } - } - return cluster -} - -// Helper function to create a MachineDeploymentTopology for the Cluster spec -func newMDTopology(name string, replicas int32, fd string) clusterv1.MachineDeploymentTopology { - return clusterv1.MachineDeploymentTopology{ - Class: "test-class", - Name: name, - FailureDomain: &fd, // Pointer to FailureDomain string - Replicas: &replicas, - } -} - -// Helper function to create a VSphereCluster -func newVSphereCluster(name, namespace string, ready bool, zones ...string) *vmwarev1.VSphereCluster { - vsc := &vmwarev1.VSphereCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: map[string]string{clusterv1.ClusterNameLabel: name}, - }, - } - if ready { - conditions.MarkTrue(vsc, vmwarev1.VSphereClusterReadyCondition) - } - - for _, zone := range zones { - vsc.Status.FailureDomains = append(vsc.Status.FailureDomains, vmwarev1.FailureDomainStatus{Name: zone}) - } - return vsc -} - -// Helper function to create a CAPI Machine (worker or control plane) -func newMachine(name, mdName string, isControlPlane bool) *clusterv1.Machine { - m := &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: clusterNamespace, - Labels: map[string]string{ - clusterv1.ClusterNameLabel: clusterName, - clusterv1.MachineDeploymentNameLabel: mdName, - }, - }, - } - if isControlPlane { - m.Labels[clusterv1.MachineControlPlaneLabel] = "true" - } - return m -} - -// Helper function to create a VSphereMachine (owned by a CAPI Machine) -func newVSphereMachine(name, ownerMachineName string) *infrav1.VSphereMachine { - return &infrav1.VSphereMachine{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: clusterNamespace, - OwnerReferences: metav1.OwnerReference{ - { - APIVersion: clusterv1.GroupVersion.String(), - Kind: "Machine", - Name: ownerMachineName, - }, - }, - }, - } -} - -// Helper function to create a VMG member status with placement info -func newVMGMemberStatus(name, kind string, ready bool, zone string) vmoprv1.GroupMember { - member := vmoprv1.GroupMember{ - Name: name, - Kind: kind, - } - - if ready { - conditions.MarkTrue(&member, vmoprv1.VirtualMachineGroupMemberConditionPlacementReady) - member.Placement = &vmoprv1.Placement{ - Zone: zone, - } - } - return member -} - -// Helper function to create a mock MachineDeployment -func newMachineDeployment(name string) *clusterv1.MachineDeployment { - return &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: clusterNamespace, - Labels: map[string]string{clusterv1.ClusterNameLabel: clusterName}, - }, - } -} - -func TestHelperFunctions(t *testing.T) { - g := NewWithT(t) - // Create cluster topology with mixed placement - cluster := newCluster(clusterName, clusterNamespace, true, true) - cluster.Spec.Topology.Workers.MachineDeployments = clusterv1.MachineDeploymentTopology{ - newMDTopology(mdName1, 3, ""), // Automatic placement - newMDTopology(mdName2, 5, zoneB), // Explicit placement - } - g.Expect(cluster.Spec.Topology.IsDefined()).To(BeTrue()) - - // Test isExplicitPlacement - explicit, err := isExplicitPlacement(cluster) - g.Expect(err).NotTo(HaveOccurred()) - // Should be true because MD2 has a FailureDomain specified - g.Expect(explicit).To(BeTrue(), "isExplicitPlacement should be true when any MD has FD set") - - // Test getExpectedMachines - expected := getExpectedMachines(cluster) - // Expected total replicas: 3 + 5 = 8 - g.Expect(expected).To(BeEquivalentTo(8), "Expected machines count should be 8") - - // Test getExpectedMachines with nil replicas - clusterNoReplicas := newCluster(clusterName, clusterNamespace, true, true) - clusterNoReplicas.Spec.Topology.Workers.MachineDeployments = clusterv1.MachineDeploymentTopology{ - {Name: "md-1", Class: "c1"}, - } - expectedZero := getExpectedMachines(clusterNoReplicas) - g.Expect(expectedZero).To(BeEquivalentTo(0), "Expected machines count should be 0 for nil replicas") -} - -func TestIsExplicitPlacement(t *testing.T) { +func TestGetExpectedVSphereMachines(t *testing.T) { g := NewWithT(t) - // Setup cluster for test cases - baseCluster := newCluster(clusterName, clusterNamespace, true, true) - - tests := struct { - name string - mds clusterv1.MachineDeploymentTopology - want bool + tests := []struct { + name string + cluster *clusterv1.Cluster + expected int32 }{ { - name: "All MDs use automatic placement (empty FD)", - mds: clusterv1.MachineDeploymentTopology{ - newMDTopology(mdName1, 3, ""), - newMDTopology(mdName2, 2, ""), - }, - want: false, + name: "Defined topology with replicas", + cluster: newCluster(clusterName, clusterNamespace, true, 3, 2), + expected: 5, }, { - name: "One MD uses explicit placement (non-empty FD)", - mds: clusterv1.MachineDeploymentTopology{ - newMDTopology(mdName1, 3, ""), - newMDTopology(mdName2, 2, zoneA), - }, - want: true, + name: "Defined topology with zero replicas", + cluster: newCluster(clusterName, clusterNamespace, true, 0, 0), + expected: 0, }, { - name: "All MDs use explicit placement", - mds: clusterv1.MachineDeploymentTopology{ - newMDTopology(mdName1, 3, zoneA), - newMDTopology(mdName2, 2, zoneB), - }, - want: true, + name: "Undefined topology", + cluster: func() *clusterv1.Cluster { + c := newCluster(clusterName, clusterNamespace, true, 1, 1) + c.Spec.Topology = clusterv1.Topology{} + return c + }(), + expected: 0, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - cluster := baseCluster.DeepCopy() - cluster.Spec.Topology.Workers.MachineDeployments = tt.mds - got, err := isExplicitPlacement(cluster) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(got).To(Equal(tt.want)) + g.Expect(getExpectedVSphereMachines(tt.cluster)).To(Equal(tt.expected)) }) } } @@ -242,48 +74,31 @@ func TestGetCurrentVSphereMachines(t *testing.T) { g := NewWithT(t) ctx := context.Background() - // Define object names for workers and CPs - cpMachineName := "cp-machine-0" - workerMachineA1 := "worker-a-1" - workerMachineB1 := "worker-b-1" - // Define a machine that belongs to a non-existent MD - strayMachine := "stray-machine" - - tests := struct { - name string - objectsclient.Object - want int + // VM names are based on CAPI Machine names, not VSphereMachine names, but we use VSM objects here. + vsm1 := newVSphereMachine("vsm-1", mdName1, false, nil) + vsm2 := newVSphereMachine("vsm-2", mdName2, false, nil) + vsmDeleting := newVSphereMachine("vsm-3", mdName1, true, nil) // Deleting + vsmControlPlane := newVSphereMachine("vsm-cp", "cp-md", false, nil) + vsmControlPlane.Labels[clusterv1.MachineControlPlaneLabel] = "true" // Should be filtered by label in production, but here filtered implicitly by only listing MD-labelled objects + + tests := []struct { + name string + objects []client.Object + want int }{ { - name: "Success: Correctly filters worker VSphereMachines", - objects: client.Object{ - // Active MDs - newMachineDeployment(mdName1), - newMachineDeployment(mdName2), - // CAPI Machines - newMachine(cpMachineName, "", true), // Control Plane (should be skipped) - newMachine(workerMachineA1, mdName1, false), - newMachine(workerMachineB1, mdName2, false), - newMachine(strayMachine, "non-existent-md", false), // Stray worker (should be skipped) - // VSphereMachines (Infrastructure objects) - newVSphereMachine("vsm-cp-0", cpMachineName), // Should be skipped - newVSphereMachine("vsm-a-1", workerMachineA1), - newVSphereMachine("vsm-b-1", workerMachineB1), - newVSphereMachine("vsm-stray", strayMachine), // Should be skipped + name: "Success: Filtered non-deleting worker VSphereMachines", + objects: []client.Object{ + vsm1, + vsm2, + vsmDeleting, + vsmControlPlane, }, - want: 2, // Only vsm-a-1 and vsm-b-1 + want: 2, // Should exclude vsm-3 (deleting) and vsm-cp (no MD label used in the actual listing logic) }, { - name: "No VSphereMachines found", - objects: client.Object{ - newMachineDeployment(mdName1), - newMachine(workerMachineA1, mdName1, false), - }, - want: 0, - }, - { - name: "No objects exist", - objects: client.Object{}, + name: "No VSphereMachines found", + objects: []client.Object{}, want: 0, }, } @@ -291,223 +106,207 @@ func TestGetCurrentVSphereMachines(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(tt.objects...).Build() - reconciler := &VirtualMachineGroupReconciler{Client: fakeClient} - - got, err := getCurrentVSphereMachines(ctx, reconciler.Client, clusterNamespace, clusterName) + got, err := getCurrentVSphereMachines(ctx, fakeClient, clusterNamespace, clusterName) g.Expect(err).NotTo(HaveOccurred()) g.Expect(len(got)).To(Equal(tt.want)) + + // Check that the correct machines are present (e.g., vsm1 and vsm2) + if tt.want > 0 { + names := make([]string, len(got)) + for i, vsm := range got { + names[i] = vsm.Name + } + sort.Strings(names) + g.Expect(names).To(Equal([]string{"vsm-1", "vsm-2"})) + } }) } } -func TestGenerateVMGPlacementLabels(t *testing.T) { +func TestGenerateVMGPlacementAnnotations(t *testing.T) { g := NewWithT(t) - ctx := context.Background() - tests := struct { - name string - vmg *vmoprv1.VirtualMachineGroup - nodepoolsstring - want map[string]string - wantErr bool + // Define object names for members + vmName1 := fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1) + vmName2 := fmt.Sprintf("%s-%s-vm-1", clusterName, mdName2) + vmNameUnplaced := fmt.Sprintf("%s-%s-vm-2", clusterName, mdName1) + vmNameWrongKind := "not-a-vm" + + tests := []struct { + name string + vmg *vmoprv1.VirtualMachineGroup + machineDeployments []string + wantAnnotations map[string]string + wantErr bool }{ { - name: "Success: VMG with one placed VM per nodepool", + name: "Success: Two placed VMs for two MDs", vmg: &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, Status: vmoprv1.VirtualMachineGroupStatus{ - Members: vmoprv1.GroupMember{ - // Placed member for MD1 - newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1), "VirtualMachine", true, zoneA), - // Placed member for MD2 - newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-1", clusterName, mdName2), "VirtualMachine", true, zoneB), - // Unplaced member for MD1 (should be ignored) - newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-2", clusterName, mdName1), "VirtualMachine", false, ""), + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + // Placed member for MD1 in Zone A + newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA), + // Placed member for MD2 in Zone B + newVMGMemberStatus(vmName2, "VirtualMachine", true, zoneB), }, }, }, - nodepools: string{mdName1, mdName2}, - want: map[string]string{ - fmt.Sprintf("%s/%s", VMGPlacementLabelPrefix, mdName1): zoneA, - fmt.Sprintf("%s/%s", VMGPlacementLabelPrefix, mdName2): zoneB, + machineDeployments: []string{mdName1, mdName2}, + wantAnnotations: map[string]string{ + fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName1): zoneA, + fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName2): zoneB, }, wantErr: false, }, { - name: "Success: Multiple placed VMs for the same nodepool (should take first)", + name: "Skip: Unplaced VM (PlacementReady false)", vmg: &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, Status: vmoprv1.VirtualMachineGroupStatus{ - Members: vmoprv1.GroupMember{ - // First placed VM (Zone B) - newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1), "VirtualMachine", true, zoneB), - // Second placed VM (Zone A) - should be skipped - newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-2", clusterName, mdName1), "VirtualMachine", true, zoneA), + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vmName1, "VirtualMachine", false, ""), }, }, }, - nodepools: string{mdName1}, - want: map[string]string{ - fmt.Sprintf("%s/%s", VMGPlacementLabelPrefix, mdName1): zoneB, + machineDeployments: []string{mdName1}, + wantAnnotations: map[string]string{}, + wantErr: false, + }, + { + name: "Skip: PlacementReady but missing Zone info", + vmg: &vmoprv1.VirtualMachineGroup{ + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vmName1, "VirtualMachine", true, ""), + }, + }, }, - wantErr: false, + machineDeployments: []string{mdName1}, + wantAnnotations: map[string]string{}, + wantErr: false, }, { - name: "Error: PlacementReady true but Placement is nil", + name: "Skip: Placement already found for MD", vmg: &vmoprv1.VirtualMachineGroup{ Status: vmoprv1.VirtualMachineGroupStatus{ - Members: vmoprv1.GroupMember{ - { - Name: "vm-1", - Kind: "VirtualMachine", - // Condition marked true, but Placement field is nil - Conditions: metav1.Condition{{Type: vmoprv1.VirtualMachineGroupMemberConditionPlacementReady, Status: metav1.ConditionTrue}}, - }, + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + // First VM sets the placement + newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA), + // Second VM is ignored (logic skips finding placement twice) + newVMGMemberStatus(vmNameUnplaced, "VirtualMachine", true, zoneB), }, }, }, - nodepools: string{mdName1}, - want: nil, - wantErr: true, // Expect an error about nil placement info + machineDeployments: []string{mdName1}, + wantAnnotations: map[string]string{ + fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName1): zoneA, + }, + wantErr: false, }, { - name: "Skip: No members are PlacementReady", + name: "Error: Member Kind is not VirtualMachine", vmg: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, Status: vmoprv1.VirtualMachineGroupStatus{ - Members: vmoprv1.GroupMember{ - newVMGMemberStatus(fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1), "VirtualMachine", false, ""), + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vmNameWrongKind, "Pod", true, zoneA), }, }, }, - nodepools: string{mdName1}, - want: map[string]string{}, - wantErr: false, + machineDeployments: []string{mdName1}, + wantAnnotations: nil, + wantErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := GenerateVMGPlacementLabels(ctx, tt.vmg, tt.nodepools) + // Mock client is needed for the logging in the function, but not used for API calls + ctx := ctrl.LoggerInto(context.Background(), ctrl.LoggerFrom(context.Background())) + + got, err := GenerateVMGPlacementAnnotations(ctx, tt.vmg, tt.machineDeployments) + if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) - g.Expect(got).To(Equal(tt.want)) + g.Expect(got).To(Equal(tt.wantAnnotations)) } }) } } -func TestVMGReconcile(t *testing.T) { +func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { g := NewWithT(t) ctx := context.Background() - // Define all required mock objects for a successful run - cluster := newCluster(clusterName, clusterNamespace, true, true) - cluster.Spec.Topology.Workers.MachineDeployments = clusterv1.MachineDeploymentTopology{ - newMDTopology(mdName1, 1, ""), - } - vsc := newVSphereCluster(clusterName, clusterNamespace, true, zoneA, zoneB) // Two zones - md := newMachineDeployment(mdName1) - machine := newMachine("worker-1", mdName1, false) - vsMachine := newVSphereMachine("vsm-1", "worker-1") - - tests := struct { - name string - initialObjects client.Object - expectedResult reconcile.Result - checkVMGExists bool - checkVMGMembers string // Expected member names - checkVMGReplicas int32 // Expected VMG replicas (for sanity check) + // Initial objects for the successful VMG creation path (Expected: 1, Current: 1) + cluster := newCluster(clusterName, clusterNamespace, true, 1, 0) // Expect 1 machine + vsm1 := newVSphereMachine("vsm-1", mdName1, false, nil) + md1 := newMachineDeployment(mdName1) + + tests := []struct { + name string + initialObjects []client.Object + expectedResult reconcile.Result + checkVMGExists bool }{ { - name: "Exit: Cluster not found (GC)", - initialObjects: client.Object{}, - expectedResult: reconcile.Result{}, - checkVMGExists: false, - checkVMGReplicas: 0, + name: "Exit: Cluster Not Found", + initialObjects: []client.Object{}, + expectedResult: reconcile.Result{}, + checkVMGExists: false, }, { - name: "Exit: Cluster marked for deletion", - initialObjects: client.Object{ + name: "Exit: Cluster Deletion Timestamp Set", + initialObjects: []client.Object{ func() client.Object { c := cluster.DeepCopy() c.DeletionTimestamp = &metav1.Time{Time: time.Now()} return c }(), }, - expectedResult: reconcile.Result{}, - checkVMGExists: false, - checkVMGReplicas: 0, - }, - { - name: "Exit: Explicit placement used", - initialObjects: client.Object{ - func() client.Object { - c := cluster.DeepCopy() - c.Spec.Topology.Workers.MachineDeployments.FailureDomain = stringPtr(zoneA) - return c - }(), - }, - expectedResult: reconcile.Result{}, - checkVMGExists: false, - checkVMGReplicas: 0, + expectedResult: reconcile.Result{}, + checkVMGExists: false, }, { - name: "Requeue: VSphereCluster not ready", - initialObjects: client.Object{ - cluster.DeepCopy(), - newVSphereCluster(clusterName, clusterNamespace, false, zoneA, zoneB), // Not Ready + name: "Requeue: ControlPlane Not Initialized", + initialObjects: []client.Object{ + newCluster(clusterName, clusterNamespace, false, 1, 0), // Not Initialized }, - expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, - checkVMGExists: false, - checkVMGReplicas: 0, + expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + checkVMGExists: false, }, { - name: "Exit: Single zone detected", - initialObjects: client.Object{ + name: "Requeue: VMG Not Found, Machines Missing (0/1)", + initialObjects: []client.Object{ cluster.DeepCopy(), - newVSphereCluster(clusterName, clusterNamespace, true, zoneA), // Only one zone + md1.DeepCopy(), }, - expectedResult: reconcile.Result{}, - checkVMGExists: false, - checkVMGReplicas: 0, + expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + checkVMGExists: false, }, { - name: "Requeue: ControlPlane not initialized", - initialObjects: client.Object{ - newCluster(clusterName, clusterNamespace, false, true), // Not Initialized - vsc.DeepCopy(), - }, - expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, - checkVMGExists: false, - checkVMGReplicas: 0, - }, - { - name: "Requeue: Machines not fully created (0/1)", - initialObjects: client.Object{ + name: "Success: VMG Created (1/1)", + initialObjects: []client.Object{ cluster.DeepCopy(), - vsc.DeepCopy(), - md.DeepCopy(), - // No Machine or VSphereMachine objects created yet + md1.DeepCopy(), + vsm1.DeepCopy(), }, - expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, - checkVMGExists: false, - checkVMGReplicas: 0, + expectedResult: reconcile.Result{}, + checkVMGExists: true, }, { - name: "Success: VMG created with correct members (1/1)", - initialObjects: client.Object{ + name: "Success: VMG Updated (Already Exists)", + initialObjects: []client.Object{ cluster.DeepCopy(), - vsc.DeepCopy(), - md.DeepCopy(), - machine.DeepCopy(), - vsMachine.DeepCopy(), + md1.DeepCopy(), + vsm1.DeepCopy(), + &vmoprv1.VirtualMachineGroup{ // Pre-existing VMG + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, + }, }, - expectedResult: reconcile.Result{}, - checkVMGExists: true, - checkVMGMembers: string{"vsm-1"}, - checkVMGReplicas: 1, // Replica count derived from VMG spec + expectedResult: reconcile.Result{}, + checkVMGExists: true, }, } @@ -531,26 +330,96 @@ func TestVMGReconcile(t *testing.T) { if tt.checkVMGExists { g.Expect(err).NotTo(HaveOccurred(), "VMG should exist") - // Check owner reference - g.Expect(vmg.OwnerReferences).To(HaveLen(1)) - g.Expect(vmg.OwnerReferences.Name).To(Equal(clusterName)) - g.Expect(vmg.OwnerReferences.Kind).To(Equal("Cluster")) - g.Expect(vmg.OwnerReferences.Controller).To(PointTo(BeTrue())) - - // Check members + // Check that the core fields were set by the MutateFn + g.Expect(vmg.Labels).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, clusterName)) g.Expect(vmg.Spec.BootOrder).To(HaveLen(1)) - members := vmg.Spec.BootOrder.Members - g.Expect(members).To(HaveLen(len(tt.checkVMGMembers))) - if len(members) > 0 { - g.Expect(members.Name).To(Equal(tt.checkVMGMembers)) - g.Expect(members.Kind).To(Equal("VirtualMachine")) - } + g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(getExpectedVSphereMachines(cluster)))) + + // VMG members should match the VSphereMachine (name: vsm-1) + g.Expect(vmg.Spec.BootOrder[0].Members[0].Name).To(ContainElement("vsm-1")) } else { - g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "VMG should not exist") + g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "VMG should not exist or NotFound should be handled gracefully") } }) } } -// stringPtr converts a string to a *string +// Helper function to create a *string func stringPtr(s string) *string { return &s } + +// Helper function to create a basic Cluster object +func newCluster(name, namespace string, initialized bool, replicasMD1, replicasMD2 int32) *clusterv1.Cluster { + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{clusterv1.ClusterNameLabel: name}, + }, + Spec: clusterv1.ClusterSpec{ + Topology: clusterv1.Topology{ + Workers: clusterv1.WorkersTopology{ + MachineDeployments: []clusterv1.MachineDeploymentTopology{ + {Name: mdName1, Replicas: &replicasMD1}, + {Name: mdName2, Replicas: &replicasMD2}, + }, + }, + }, + }, + } + if initialized { + conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterControlPlaneInitializedCondition, + Status: metav1.ConditionTrue, + }) + } + return cluster +} + +// Helper function to create a VSphereMachine (worker, owned by a CAPI Machine) +func newVSphereMachine(name, mdName string, deleted bool, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) *vmwarev1.VSphereMachine { + vsm := &vmwarev1.VSphereMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: clusterNamespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + clusterv1.MachineDeploymentNameLabel: mdName, + }, + }, + Spec: vmwarev1.VSphereMachineSpec{ + NamingStrategy: namingStrategy, + }, + } + if deleted { + vsm.DeletionTimestamp = &metav1.Time{Time: time.Now()} + } + return vsm +} + +// Helper function to create a VMG member status with placement info +func newVMGMemberStatus(name, kind string, isPlacementReady bool, zone string) vmoprv1.VirtualMachineGroupMemberStatus { + memberStatus := vmoprv1.VirtualMachineGroupMemberStatus{ + Name: name, + Kind: kind, + } + + if isPlacementReady { + conditions.Set(&memberStatus, metav1.Condition{ + Type: vmoprv1.VirtualMachineGroupMemberConditionPlacementReady, + Status: metav1.ConditionTrue, + }) + memberStatus.Placement = &vmoprv1.VirtualMachinePlacementStatus{Zone: zone} + } + return memberStatus +} + +// Helper function to create a MachineDeployment (for listing MD names) +func newMachineDeployment(name string) *clusterv1.MachineDeployment { + return &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: clusterNamespace, + Labels: map[string]string{clusterv1.ClusterNameLabel: clusterName}, + }, + } +} diff --git a/go.mod b/go.mod index a43e7997df..df97e28e79 100644 --- a/go.mod +++ b/go.mod @@ -4,16 +4,16 @@ go 1.24.0 replace sigs.k8s.io/cluster-api => sigs.k8s.io/cluster-api v1.11.0-rc.0.0.20250905091528-eb4e38c46ff6 -replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20250908141901-a9e1dfbc0045 +replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251003150112-9b458d311c4c // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests -replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.8.6 +replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251007154704-e2d6e85d9ec7 + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 github.com/vmware/govmomi v0.52.0 ) diff --git a/go.sum b/go.sum index 8df98ba83c..34bb470a23 100644 --- a/go.sum +++ b/go.sum @@ -243,8 +243,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251007154704-e2d6e85d9ec7 h1:VlnaiDKI1H1buwBOgL8R3HRB3EQNN96xMdz25vE5FUo= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251007154704-e2d6e85d9ec7/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/internal/test/helpers/envtest.go b/internal/test/helpers/envtest.go index 41341b70cb..0acbcd68eb 100644 --- a/internal/test/helpers/envtest.go +++ b/internal/test/helpers/envtest.go @@ -29,6 +29,7 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/pkg/errors" + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" "github.com/vmware/govmomi/simulator" "golang.org/x/tools/go/packages" admissionv1 "k8s.io/api/admissionregistration/v1" @@ -89,6 +90,7 @@ func init() { utilruntime.Must(admissionv1.AddToScheme(scheme)) utilruntime.Must(clusterv1.AddToScheme(scheme)) utilruntime.Must(infrav1.AddToScheme(scheme)) + utilruntime.Must(vmoprv1.AddToScheme(scheme)) // Get the root of the current file to use in CRD paths. _, filename, _, ok := goruntime.Caller(0) diff --git a/test/go.mod b/test/go.mod index 9f55c7dc8a..69d0a9c7d9 100644 --- a/test/go.mod +++ b/test/go.mod @@ -8,7 +8,7 @@ replace sigs.k8s.io/cluster-api/test => sigs.k8s.io/cluster-api/test v1.11.0-rc. replace sigs.k8s.io/cluster-api-provider-vsphere => ../ -replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20250908141901-a9e1dfbc0045 +replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251003150112-9b458d311c4c // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-testsz replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.8.6 @@ -16,7 +16,7 @@ replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-op require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c github.com/vmware/govmomi v0.52.0 ) From 189f6d24d8ba18c9d2827e3e8586a52cef7300da Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Fri, 24 Oct 2025 16:44:19 +0800 Subject: [PATCH 10/25] Surface error when VM is waiting for VMG Signed-off-by: Gong Zhang --- apis/v1beta1/vspheremachine_types.go | 4 + .../vmware/virtualmachinegroup_controller.go | 11 ++- .../vmware/virtualmachinegroup_reconciler.go | 82 ++++++++----------- .../virtualmachinegroup_reconciler_test.go | 34 ++++---- pkg/services/vmoperator/vmopmachine.go | 20 +++-- pkg/services/vmoperator/vmopmachine_test.go | 8 +- 6 files changed, 77 insertions(+), 82 deletions(-) diff --git a/apis/v1beta1/vspheremachine_types.go b/apis/v1beta1/vspheremachine_types.go index cc6d31d1aa..cbdd5ff7ac 100644 --- a/apis/v1beta1/vspheremachine_types.go +++ b/apis/v1beta1/vspheremachine_types.go @@ -81,6 +81,10 @@ const ( // Note: This reason is used only in supervisor mode. VSphereMachineVirtualMachinePoweringOnV1Beta2Reason = "PoweringOn" + // VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason surfaces that the VirtualMachine + // is waiting for its corresponding VirtualMachineGroup to be created and to include this VM as a member. + VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason = "WaitingForVirtualMachineGroup" + // VSphereMachineVirtualMachineWaitingForNetworkAddressV1Beta2Reason surfaces when the VirtualMachine that is controlled // by the VSphereMachine waiting for the machine network settings to be reported after machine being powered on. VSphereMachineVirtualMachineWaitingForNetworkAddressV1Beta2Reason = "WaitingForNetworkAddress" diff --git a/controllers/vmware/virtualmachinegroup_controller.go b/controllers/vmware/virtualmachinegroup_controller.go index e9102587e5..984414f30c 100644 --- a/controllers/vmware/virtualmachinegroup_controller.go +++ b/controllers/vmware/virtualmachinegroup_controller.go @@ -89,7 +89,7 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa } // ClusterToVirtualMachineGroup maps Cluster events to VirtualMachineGroup reconcile requests. -func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { +func (r *VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { cluster, ok := a.(*clusterv1.Cluster) if !ok { return nil @@ -104,11 +104,10 @@ func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context. }} } -// VsphereMachineToVirtualMachineGroup maps VSphereMachine events to VirtualMachineGroup reconcile requests. -// This handler only processes VSphereMachine objects for Day-2 operations, ensuring VSphereMachine state stays -// in sync with its owning VMG. If no corresponding VMG is found, this is a no-op. - -func (r VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { +// VSphereMachineToVirtualMachineGroup maps VSphereMachine events to VirtualMachineGroup reconcile requests. +// This handler only processes VSphereMachine objects for Day-2 operations when VMG could be found, ensuring +// VMG member list in sync with VSphereMachines. If no corresponding VMG is found, this is a no-op. +func (r *VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { vSphereMachine, ok := a.(*vmwarev1.VSphereMachine) if !ok { return nil diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index 9f2c811449..ce607fb92c 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -51,6 +51,14 @@ type VirtualMachineGroupReconciler struct { Recorder record.EventRecorder } +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get +// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch + func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { log := ctrl.LoggerFrom(ctx) @@ -105,7 +113,12 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c } // Calculate expected Machines of all MachineDeployments. - expected := getExpectedVSphereMachines(cluster) + expected, err := getExpectedVSphereMachines(ctx, r.Client, cluster) + if err != nil { + log.Error(err, "failed to get expected Machines of all MachineDeployment") + return ctrl.Result{}, err + } + if expected == 0 { log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario") return reconcile.Result{}, nil @@ -215,60 +228,31 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c return reconcile.Result{}, err } -// isMDDefined checks if there are any MachineDeployments for the given cluster -// by listing objects with the cluster.x-k8s.io/cluster-name label. -func (r *VirtualMachineGroupReconciler) isMDDefined(ctx context.Context, cluster *clusterv1.Cluster) (bool, error) { - mdList := &clusterv1.MachineDeploymentList{} - if err := r.Client.List(ctx, mdList, client.InNamespace(cluster.Namespace), client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}); err != nil { - return false, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s", - cluster.Namespace, cluster.Name) - } - - if len(mdList.Items) == 0 { - return false, errors.Errorf("no MachineDeployments found for cluster %s/%s", - cluster.Namespace, cluster.Name) - } - - return true, nil -} - -// isExplicitPlacement checks if any MachineDeployment has an explicit failure domain set. -func (r *VirtualMachineGroupReconciler) isExplicitPlacement(cluster *clusterv1.Cluster) (bool, error) { - // First, ensure MachineDeployments are defined - mdDefined, err := r.isMDDefined(context.Background(), cluster) - if !mdDefined { - return false, err - } - - // Iterate through MachineDeployments to find if an explicit failure domain is set. - mds := cluster.Spec.Topology.Workers.MachineDeployments - for _, md := range mds { - // If a failure domain is specified for any MachineDeployment, it indicates - // explicit placement is configured, so return true. - if md.FailureDomain != "" { - return true, nil - } - } - - return false, nil -} - // getExpectedVSphereMachines returns the total number of replicas across all -// MachineDeployments in the Cluster's Topology.Workers. -func getExpectedVSphereMachines(cluster *clusterv1.Cluster) int32 { - if !cluster.Spec.Topology.IsDefined() { - return 0 +// MachineDeployments belonging to the Cluster +func getExpectedVSphereMachines(ctx context.Context, kubeClient client.Client, cluster *clusterv1.Cluster) (int32, error) { + var mdList clusterv1.MachineDeploymentList + if err := kubeClient.List( + ctx, + &mdList, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, + ); err != nil { + return 0, errors.Wrap(err, "failed to list MachineDeployments") } var total int32 - for _, md := range cluster.Spec.Topology.Workers.MachineDeployments { - if md.Replicas != nil { - total += *md.Replicas + for _, md := range mdList.Items { + if md.Spec.Replicas != nil { + total += *md.Spec.Replicas } } - return total + + return total, nil } +// getCurrentVSphereMachines returns the list of VSphereMachines belonging to the Cluster’s MachineDeployments. +// VSphereMachines marked for removal are excluded from the result. func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) { log := ctrl.LoggerFrom(ctx) @@ -321,7 +305,7 @@ func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMa // Check if VM belongs to a Machine Deployment by name (e.g. cluster-1-np-1-vm-xxx contains np-1) // TODO: Establish membership via the machine deployment name label - if strings.Contains(member.Name, md) { + if strings.Contains(member.Name, "-"+md+"-") { // Get the VM placement information by member status. // VMs that have undergone placement do not have Placement info set, skip. if member.Placement == nil { @@ -345,7 +329,7 @@ func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMa return annotations, nil } -// TODO: de-dup this logic with vmopmachine.go +// Duplicated this logic from pkg/services/vmoperator/vmopmachine.go // GenerateVirtualMachineName generates the name of a VirtualMachine based on the naming strategy. func GenerateVirtualMachineName(machineName string, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) (string, error) { // Per default the name of the VirtualMachine should be equal to the Machine name (this is the same as "{{ .machine.name }}") diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index 4be9d868ab..2a36700e8c 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -36,6 +36,7 @@ const ( func TestGetExpectedVSphereMachines(t *testing.T) { g := NewWithT(t) + ctx := context.Background() tests := []struct { name string @@ -64,8 +65,9 @@ func TestGetExpectedVSphereMachines(t *testing.T) { } for _, tt := range tests { + fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(tt.cluster).Build() t.Run(tt.name, func(t *testing.T) { - g.Expect(getExpectedVSphereMachines(tt.cluster)).To(Equal(tt.expected)) + g.Expect(getExpectedVSphereMachines(ctx, fakeClient, tt.cluster)).To(Equal(tt.expected)) }) } } @@ -74,12 +76,12 @@ func TestGetCurrentVSphereMachines(t *testing.T) { g := NewWithT(t) ctx := context.Background() - // VM names are based on CAPI Machine names, not VSphereMachine names, but we use VSM objects here. + // VM names are based on CAPI Machine names, not VSphereMachine names, but we use VSphereMachine here. vsm1 := newVSphereMachine("vsm-1", mdName1, false, nil) vsm2 := newVSphereMachine("vsm-2", mdName2, false, nil) vsmDeleting := newVSphereMachine("vsm-3", mdName1, true, nil) // Deleting vsmControlPlane := newVSphereMachine("vsm-cp", "cp-md", false, nil) - vsmControlPlane.Labels[clusterv1.MachineControlPlaneLabel] = "true" // Should be filtered by label in production, but here filtered implicitly by only listing MD-labelled objects + vsmControlPlane.Labels[clusterv1.MachineControlPlaneLabel] = "true" tests := []struct { name string @@ -94,7 +96,7 @@ func TestGetCurrentVSphereMachines(t *testing.T) { vsmDeleting, vsmControlPlane, }, - want: 2, // Should exclude vsm-3 (deleting) and vsm-cp (no MD label used in the actual listing logic) + want: 2, // Should exclude vsm-3 (deleting) and vsm-cp (control plane VSphereMachine) }, { name: "No VSphereMachines found", @@ -110,7 +112,7 @@ func TestGetCurrentVSphereMachines(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) g.Expect(len(got)).To(Equal(tt.want)) - // Check that the correct machines are present (e.g., vsm1 and vsm2) + // Check that the correct Machines are present if tt.want > 0 { names := make([]string, len(got)) for i, vsm := range got { @@ -191,7 +193,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { Members: []vmoprv1.VirtualMachineGroupMemberStatus{ // First VM sets the placement newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA), - // Second VM is ignored (logic skips finding placement twice) + // Second VM is ignored newVMGMemberStatus(vmNameUnplaced, "VirtualMachine", true, zoneB), }, }, @@ -208,7 +210,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, Status: vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vmNameWrongKind, "Pod", true, zoneA), + newVMGMemberStatus(vmNameWrongKind, "VirtualMachineGroup", true, zoneA), }, }, }, @@ -220,7 +222,6 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Mock client is needed for the logging in the function, but not used for API calls ctx := ctrl.LoggerInto(context.Background(), ctrl.LoggerFrom(context.Background())) got, err := GenerateVMGPlacementAnnotations(ctx, tt.vmg, tt.machineDeployments) @@ -240,7 +241,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { ctx := context.Background() // Initial objects for the successful VMG creation path (Expected: 1, Current: 1) - cluster := newCluster(clusterName, clusterNamespace, true, 1, 0) // Expect 1 machine + cluster := newCluster(clusterName, clusterNamespace, true, 1, 0) vsm1 := newVSphereMachine("vsm-1", mdName1, false, nil) md1 := newMachineDeployment(mdName1) @@ -271,13 +272,13 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { { name: "Requeue: ControlPlane Not Initialized", initialObjects: []client.Object{ - newCluster(clusterName, clusterNamespace, false, 1, 0), // Not Initialized + newCluster(clusterName, clusterNamespace, false, 1, 0), }, expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, checkVMGExists: false, }, { - name: "Requeue: VMG Not Found, Machines Missing (0/1)", + name: "Requeue: VMG Not Found", initialObjects: []client.Object{ cluster.DeepCopy(), md1.DeepCopy(), @@ -286,7 +287,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { checkVMGExists: false, }, { - name: "Success: VMG Created (1/1)", + name: "Success: VMG Created", initialObjects: []client.Object{ cluster.DeepCopy(), md1.DeepCopy(), @@ -301,7 +302,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { cluster.DeepCopy(), md1.DeepCopy(), vsm1.DeepCopy(), - &vmoprv1.VirtualMachineGroup{ // Pre-existing VMG + &vmoprv1.VirtualMachineGroup{ ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, }, }, @@ -333,7 +334,9 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { // Check that the core fields were set by the MutateFn g.Expect(vmg.Labels).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, clusterName)) g.Expect(vmg.Spec.BootOrder).To(HaveLen(1)) - g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(getExpectedVSphereMachines(cluster)))) + expected, err := getExpectedVSphereMachines(ctx, fakeClient, tt.initialObjects[0].(*clusterv1.Cluster)) + g.Expect(err).NotTo(HaveOccurred(), "Should get expected Machines") + g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(expected))) // VMG members should match the VSphereMachine (name: vsm-1) g.Expect(vmg.Spec.BootOrder[0].Members[0].Name).To(ContainElement("vsm-1")) @@ -344,9 +347,6 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { } } -// Helper function to create a *string -func stringPtr(s string) *string { return &s } - // Helper function to create a basic Cluster object func newCluster(name, namespace string, initialized bool, replicasMD1, replicasMD2 int32) *clusterv1.Cluster { cluster := &clusterv1.Cluster{ diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 45dcdcaf49..a60292c2d0 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -207,14 +207,19 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap return false, err } if apierrors.IsNotFound(err) { - log.V(4).Info("VirtualMachineGroup not found, requeueing") + log.V(4).Info("VirtualMachineGroup not found, requeueing", "Name", key.Name, "Namespace", key.Namespace) return true, nil } } // Proceed only if the machine is a member of the VirtualMachineGroup. if !v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) { - log.V(4).Info("Waiting for VirtualMachineGroup membership, requeueing") + v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ + Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, + }) + log.V(4).Info("Waiting for VirtualMachineGroup membership, requeueing", "VM Name", supervisorMachineCtx.Machine.Name) return true, nil } @@ -222,7 +227,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap vmGroupName: vmOperatorVMGroup.Name, } - // Set the zone label using the annotation of the machine deployment:zone mapping from VMG. + // Set the zone label using the annotation of the per-md zone mapping from VMG. // This is for new VMs created during day-2 operations in VC 9.1. nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] if zone, ok := vmOperatorVMGroup.Annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", nodePool)]; ok && zone != "" { @@ -842,10 +847,13 @@ func (v *VmopMachineService) addVolumes(ctx context.Context, supervisorMachineCt }, } + // Before VC 9.1: // The CSI zone annotation must be set when using a zonal storage class, // which is required when the cluster has multiple (3) zones. // Single zone clusters (legacy/default) do not support zonal storage and must not // have the zone annotation set. + // Since VC 9.1: With Node Auto Placement enabled, failureDomain is optional and CAPV no longer + // sets PVC annotations. PVC placement now follows the StorageClass behavior (Immediate or WaitForFirstConsumer). zonal := len(supervisorMachineCtx.VSphereCluster.Status.FailureDomains) > 1 if zone := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; zonal && zone != nil { @@ -904,8 +912,7 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels vmLabels[k] = v } - // Get the labels that determine the VM's placement inside of a stretched - // cluster. + // Get the labels that determine the VM's placement var failureDomain *string if affinityInfo != nil && affinityInfo.failureDomain != nil { failureDomain = affinityInfo.failureDomain @@ -934,12 +941,13 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels // and thus the code is optimized as such. However, in the future // this function may return a more diverse topology. func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, failureDomain *string) map[string]string { - // TODO: Make it so that we always set the zone label, might require enquiring the zones present (when unset) + // This is for explicit placement. if fd := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; fd != nil && *fd != "" { return map[string]string{ corev1.LabelTopologyZone: *fd, } } + // This is for automatic placement. if failureDomain != nil && *failureDomain != "" { return map[string]string{ corev1.LabelTopologyZone: *failureDomain, diff --git a/pkg/services/vmoperator/vmopmachine_test.go b/pkg/services/vmoperator/vmopmachine_test.go index 8d4ca34510..c36c9616ee 100644 --- a/pkg/services/vmoperator/vmopmachine_test.go +++ b/pkg/services/vmoperator/vmopmachine_test.go @@ -729,7 +729,7 @@ var _ = Describe("VirtualMachine tests", func() { } }) - Context("With auto placement feature gate enabled", func() { + Context("With node auto placement feature gate enabled", func() { BeforeEach(func() { t := GinkgoT() featuregatetesting.SetFeatureGateDuringTest(t, feature.Gates, feature.NodeAutoPlacement, true) @@ -864,7 +864,7 @@ var _ = Describe("VirtualMachine tests", func() { By("Verify VM anti-affinity rules are set correctly") verifyVMAntiAffinityRules(vmopVM, machineDeploymentName) - By("Verify that worker machine has machine deploymet label set") + By("Verify that worker machine has machine deployment label set") Expect(vmopVM.Labels[clusterv1.MachineDeploymentNameLabel]).To(Equal(machineDeploymentName)) By("Verify that GroupName is set from VirtualMachineGroup") @@ -898,12 +898,12 @@ var _ = Describe("VirtualMachine tests", func() { supervisorMachineContext = util.CreateMachineContext(fdClusterContext, machine, vsphereMachine) supervisorMachineContext.ControllerManagerContext = fdControllerManagerContext - // Create a VirtualMachineGroup for the cluster with zone label + // Create a VirtualMachineGroup for the cluster with per-md zone annotation vmGroup := &vmoprv1.VirtualMachineGroup{ ObjectMeta: metav1.ObjectMeta{ Name: fdClusterName, Namespace: corev1.NamespaceDefault, - Labels: map[string]string{ + Annotations: map[string]string{ fmt.Sprintf("zone.cluster.x-k8s.io/%s", machineDeploymentName): failureDomainName, }, }, From 8cc8e8a8ab52f54cf507a1054169d287d9a996b5 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Tue, 28 Oct 2025 11:45:54 +0800 Subject: [PATCH 11/25] Update dependency in packaging&test Signed-off-by: Gong Zhang --- packaging/go.sum | 4 ++-- test/go.sum | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packaging/go.sum b/packaging/go.sum index 8a4cb28435..0659c3663f 100644 --- a/packaging/go.sum +++ b/packaging/go.sum @@ -135,8 +135,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 h1:zME8crazIAWVJGboJpSLl+qcRYQ8yA6hPQojz28gY5M= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045/go.mod h1:hkc/QZCSHcosWWMPS6VWWR12WenZcNE3BaTJ/8A8sNE= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/test/go.sum b/test/go.sum index 0f616f4cb9..8ac8dfd79b 100644 --- a/test/go.sum +++ b/test/go.sum @@ -360,8 +360,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 h1:zME8crazIAWVJGboJpSLl+qcRYQ8yA6hPQojz28gY5M= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045/go.mod h1:hkc/QZCSHcosWWMPS6VWWR12WenZcNE3BaTJ/8A8sNE= +github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU= +github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= From 2a21acc5cbe8e4863990775ac51e2c9f7c1b06f2 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Tue, 28 Oct 2025 15:51:55 +0800 Subject: [PATCH 12/25] Fix UT errors Signed-off-by: Gong Zhang --- .../vmware/virtualmachinegroup_controller.go | 9 +- .../vmware/virtualmachinegroup_reconciler.go | 11 +- .../virtualmachinegroup_reconciler_test.go | 200 ++++++++++++------ controllers/vspherecluster_reconciler.go | 1 - test/framework/vmoperator/vmoperator.go | 2 +- test/go.mod | 4 +- test/go.sum | 4 +- 7 files changed, 151 insertions(+), 80 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_controller.go b/controllers/vmware/virtualmachinegroup_controller.go index 984414f30c..d0c60aee01 100644 --- a/controllers/vmware/virtualmachinegroup_controller.go +++ b/controllers/vmware/virtualmachinegroup_controller.go @@ -21,8 +21,6 @@ import ( vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" apitypes "k8s.io/apimachinery/pkg/types" - vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" - capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/util/predicates" ctrl "sigs.k8s.io/controller-runtime" @@ -34,6 +32,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" + + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" ) // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch @@ -77,7 +78,7 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToVirtualMachineGroup), ctrlbldr.WithPredicates( predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { return false }, + UpdateFunc: func(event.UpdateEvent) bool { return false }, CreateFunc: func(event.CreateEvent) bool { return true }, DeleteFunc: func(event.DeleteEvent) bool { return true }, GenericFunc: func(event.GenericEvent) bool { return false }, @@ -89,7 +90,7 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa } // ClusterToVirtualMachineGroup maps Cluster events to VirtualMachineGroup reconcile requests. -func (r *VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { +func (r *VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(_ context.Context, a ctrlclient.Object) []reconcile.Request { cluster, ok := a.(*clusterv1.Cluster) if !ok { return nil diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index ce607fb92c..0be615857b 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -30,6 +30,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -38,7 +39,6 @@ import ( vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" - clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" ) const ( @@ -72,6 +72,7 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. } log = log.WithValues("Cluster", klog.KObj(cluster)) + // If Cluster is deleted, just return as VirtualMachineGroup will be GCed and no extra processing needed. if !cluster.DeletionTimestamp.IsZero() { return reconcile.Result{}, nil @@ -86,10 +87,9 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. // Continue with the main logic. return r.createOrUpdateVMG(ctx, cluster) - } -// createOrUpdateVMG Create or Update VirtualMachineGroup +// createOrUpdateVMG Create or Update VirtualMachineGroup. func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) { log := ctrl.LoggerFrom(ctx) @@ -228,8 +228,7 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c return reconcile.Result{}, err } -// getExpectedVSphereMachines returns the total number of replicas across all -// MachineDeployments belonging to the Cluster +// MachineDeployments belonging to the Cluster. func getExpectedVSphereMachines(ctx context.Context, kubeClient client.Client, cluster *clusterv1.Cluster) (int32, error) { var mdList clusterv1.MachineDeploymentList if err := kubeClient.List( @@ -329,8 +328,8 @@ func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMa return annotations, nil } -// Duplicated this logic from pkg/services/vmoperator/vmopmachine.go // GenerateVirtualMachineName generates the name of a VirtualMachine based on the naming strategy. +// Duplicated this logic from pkg/services/vmoperator/vmopmachine.go. func GenerateVirtualMachineName(machineName string, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) (string, error) { // Per default the name of the VirtualMachine should be equal to the Machine name (this is the same as "{{ .machine.name }}") if namingStrategy == nil || namingStrategy.Template == nil { diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index 2a36700e8c..efbd36a022 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -1,3 +1,19 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package vmware import ( @@ -9,24 +25,24 @@ import ( . "github.com/onsi/gomega" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" - vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/reconcile" -) -var s = runtime.NewScheme() + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" +) const ( clusterName = "test-cluster" + otherClusterName = "other-cluster" clusterNamespace = "test-ns" mdName1 = "md-worker-a" mdName2 = "md-worker-b" @@ -38,36 +54,62 @@ func TestGetExpectedVSphereMachines(t *testing.T) { g := NewWithT(t) ctx := context.Background() + targetCluster := newTestCluster(clusterName, clusterNamespace) + + mdA := newMachineDeployment("md-a", clusterName, clusterNamespace, ptr.To(int32(3))) + mdB := newMachineDeployment("md-b", clusterName, clusterNamespace, ptr.To(int32(5))) + mdCNil := newMachineDeployment("md-c-nil", clusterName, clusterNamespace, nil) + mdDZero := newMachineDeployment("md-d-zero", clusterName, clusterNamespace, ptr.To(int32(0))) + // Create an MD for a different cluster (should be filtered) + mdOtherCluster := newMachineDeployment("md-other", otherClusterName, clusterNamespace, ptr.To(int32(5))) + tests := []struct { - name string - cluster *clusterv1.Cluster - expected int32 + name string + initialObjects []client.Object + expectedTotal int32 + wantErr bool }{ { - name: "Defined topology with replicas", - cluster: newCluster(clusterName, clusterNamespace, true, 3, 2), - expected: 5, + name: "Sum of two MDs", + initialObjects: []client.Object{mdA, mdB}, + expectedTotal: 8, + wantErr: false, + }, + { + name: "Should succeed when MDs include nil and zero replicas", + initialObjects: []client.Object{mdA, mdB, mdCNil, mdDZero}, + expectedTotal: 8, + wantErr: false, }, { - name: "Defined topology with zero replicas", - cluster: newCluster(clusterName, clusterNamespace, true, 0, 0), - expected: 0, + name: "Should filters out MDs from other clusters", + initialObjects: []client.Object{mdA, mdB, mdOtherCluster}, + expectedTotal: 8, + wantErr: false, }, { - name: "Undefined topology", - cluster: func() *clusterv1.Cluster { - c := newCluster(clusterName, clusterNamespace, true, 1, 1) - c.Spec.Topology = clusterv1.Topology{} - return c - }(), - expected: 0, + name: "Should succeed when no MachineDeployments found", + initialObjects: []client.Object{}, + expectedTotal: 0, + wantErr: false, }, } for _, tt := range tests { - fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(tt.cluster).Build() - t.Run(tt.name, func(t *testing.T) { - g.Expect(getExpectedVSphereMachines(ctx, fakeClient, tt.cluster)).To(Equal(tt.expected)) + // Looks odd, but need to reinitialize test variable + tt := tt + t.Run(tt.name, func(_ *testing.T) { + scheme := runtime.NewScheme() + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialObjects...).Build() + total, err := getExpectedVSphereMachines(ctx, fakeClient, targetCluster) + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(total).To(Equal(tt.expectedTotal)) + } }) } } @@ -76,12 +118,14 @@ func TestGetCurrentVSphereMachines(t *testing.T) { g := NewWithT(t) ctx := context.Background() - // VM names are based on CAPI Machine names, not VSphereMachine names, but we use VSphereMachine here. - vsm1 := newVSphereMachine("vsm-1", mdName1, false, nil) - vsm2 := newVSphereMachine("vsm-2", mdName2, false, nil) - vsmDeleting := newVSphereMachine("vsm-3", mdName1, true, nil) // Deleting - vsmControlPlane := newVSphereMachine("vsm-cp", "cp-md", false, nil) - vsmControlPlane.Labels[clusterv1.MachineControlPlaneLabel] = "true" + scheme := runtime.NewScheme() + g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) + + // VSphereMachine names are based on CAPI Machine names, but we use fake name here. + vsm1 := newVSphereMachine("vsm-1", mdName1, false, false, nil) + vsm2 := newVSphereMachine("vsm-2", mdName2, false, false, nil) + vsmDeleting := newVSphereMachine("vsm-3", mdName1, false, true, nil) // Deleting + vsmControlPlane := newVSphereMachine("vsm-cp", "not-md", true, false, nil) tests := []struct { name string @@ -89,28 +133,30 @@ func TestGetCurrentVSphereMachines(t *testing.T) { want int }{ { - name: "Success: Filtered non-deleting worker VSphereMachines", + name: "Should filtered out deleting VSphereMachines", objects: []client.Object{ vsm1, vsm2, vsmDeleting, vsmControlPlane, }, - want: 2, // Should exclude vsm-3 (deleting) and vsm-cp (control plane VSphereMachine) + want: 2, }, { - name: "No VSphereMachines found", + name: "Want no Error if no VSphereMachines found", objects: []client.Object{}, want: 0, }, } for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(tt.objects...).Build() + // Looks odd, but need to reinitialize test variable + tt := tt + t.Run(tt.name, func(_ *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() got, err := getCurrentVSphereMachines(ctx, fakeClient, clusterNamespace, clusterName) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(len(got)).To(Equal(tt.want)) + g.Expect(got).To(HaveLen(tt.want)) // Check that the correct Machines are present if tt.want > 0 { @@ -142,7 +188,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { wantErr bool }{ { - name: "Success: Two placed VMs for two MDs", + name: "Should get placement annotation when two placed VMs for two MDs", vmg: &vmoprv1.VirtualMachineGroup{ Status: vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ @@ -161,7 +207,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { wantErr: false, }, { - name: "Skip: Unplaced VM (PlacementReady false)", + name: "No placement annotation when VM PlacementReady is false)", vmg: &vmoprv1.VirtualMachineGroup{ Status: vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ @@ -174,7 +220,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { wantErr: false, }, { - name: "Skip: PlacementReady but missing Zone info", + name: "No placement annotation when PlacementReady but missing Zone info", vmg: &vmoprv1.VirtualMachineGroup{ Status: vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ @@ -187,7 +233,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { wantErr: false, }, { - name: "Skip: Placement already found for MD", + name: "Should keep placement annotation when first placement decision is found", vmg: &vmoprv1.VirtualMachineGroup{ Status: vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ @@ -205,7 +251,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { wantErr: false, }, { - name: "Error: Member Kind is not VirtualMachine", + name: "Should return Error if Member Kind is not VirtualMachine", vmg: &vmoprv1.VirtualMachineGroup{ ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, Status: vmoprv1.VirtualMachineGroupStatus{ @@ -221,7 +267,9 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { } for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { + // Looks odd, but need to reinitialize test variable + tt := tt + t.Run(tt.name, func(_ *testing.T) { ctx := ctrl.LoggerInto(context.Background(), ctrl.LoggerFrom(context.Background())) got, err := GenerateVMGPlacementAnnotations(ctx, tt.vmg, tt.machineDeployments) @@ -240,10 +288,15 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { g := NewWithT(t) ctx := context.Background() + scheme := runtime.NewScheme() + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) + g.Expect(vmoprv1.AddToScheme(scheme)).To(Succeed()) + // Initial objects for the successful VMG creation path (Expected: 1, Current: 1) cluster := newCluster(clusterName, clusterNamespace, true, 1, 0) - vsm1 := newVSphereMachine("vsm-1", mdName1, false, nil) - md1 := newMachineDeployment(mdName1) + vsm1 := newVSphereMachine("vsm-1", mdName1, false, false, nil) + md1 := newMachineDeployment(mdName1, clusterName, clusterNamespace, ptr.To(int32(1))) tests := []struct { name string @@ -252,16 +305,17 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { checkVMGExists bool }{ { - name: "Exit: Cluster Not Found", + name: "Should Exit if Cluster Not Found", initialObjects: []client.Object{}, expectedResult: reconcile.Result{}, checkVMGExists: false, }, { - name: "Exit: Cluster Deletion Timestamp Set", + name: "Should Exit if Cluster Deletion Timestamp Set", initialObjects: []client.Object{ func() client.Object { c := cluster.DeepCopy() + c.Finalizers = []string{"test.finalizer.cluster"} c.DeletionTimestamp = &metav1.Time{Time: time.Now()} return c }(), @@ -270,7 +324,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { checkVMGExists: false, }, { - name: "Requeue: ControlPlane Not Initialized", + name: "Should Requeue if ControlPlane Not Initialized", initialObjects: []client.Object{ newCluster(clusterName, clusterNamespace, false, 1, 0), }, @@ -278,7 +332,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { checkVMGExists: false, }, { - name: "Requeue: VMG Not Found", + name: "Should Requeue if VMG Not Found", initialObjects: []client.Object{ cluster.DeepCopy(), md1.DeepCopy(), @@ -287,7 +341,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { checkVMGExists: false, }, { - name: "Success: VMG Created", + name: "Should Succeed if VMG is created", initialObjects: []client.Object{ cluster.DeepCopy(), md1.DeepCopy(), @@ -297,7 +351,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { checkVMGExists: true, }, { - name: "Success: VMG Updated (Already Exists)", + name: "Should Succeed if VMG is already existed", initialObjects: []client.Object{ cluster.DeepCopy(), md1.DeepCopy(), @@ -312,8 +366,10 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { } for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(tt.initialObjects...).Build() + // Looks odd, but need to reinitialize test variable + tt := tt + t.Run(tt.name, func(_ *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialObjects...).Build() reconciler := &VirtualMachineGroupReconciler{ Client: fakeClient, Recorder: record.NewFakeRecorder(1), @@ -338,16 +394,14 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { g.Expect(err).NotTo(HaveOccurred(), "Should get expected Machines") g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(expected))) - // VMG members should match the VSphereMachine (name: vsm-1) - g.Expect(vmg.Spec.BootOrder[0].Members[0].Name).To(ContainElement("vsm-1")) - } else { - g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "VMG should not exist or NotFound should be handled gracefully") + // VMG members should match the VSphereMachine name + g.Expect(vmg.Spec.BootOrder[0].Members[0].Name).To(Equal("vsm-1")) } }) } } -// Helper function to create a basic Cluster object +// Helper function to create a basic Cluster object. func newCluster(name, namespace string, initialized bool, replicasMD1, replicasMD2 int32) *clusterv1.Cluster { cluster := &clusterv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ @@ -375,28 +429,33 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM return cluster } -// Helper function to create a VSphereMachine (worker, owned by a CAPI Machine) -func newVSphereMachine(name, mdName string, deleted bool, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) *vmwarev1.VSphereMachine { +// Helper function to create a VSphereMachine (worker, owned by a CAPI Machine). +func newVSphereMachine(name, mdName string, isCP, deleted bool, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) *vmwarev1.VSphereMachine { vsm := &vmwarev1.VSphereMachine{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: clusterNamespace, Labels: map[string]string{ - clusterv1.ClusterNameLabel: clusterName, - clusterv1.MachineDeploymentNameLabel: mdName, + clusterv1.ClusterNameLabel: clusterName, }, }, Spec: vmwarev1.VSphereMachineSpec{ NamingStrategy: namingStrategy, }, } + if !isCP { + vsm.Labels[clusterv1.MachineDeploymentNameLabel] = mdName + } else { + vsm.Labels[clusterv1.MachineControlPlaneLabel] = "true" + } if deleted { + vsm.Finalizers = []string{"test.finalizer.0"} vsm.DeletionTimestamp = &metav1.Time{Time: time.Now()} } return vsm } -// Helper function to create a VMG member status with placement info +// Helper function to create a VMG member status with placement info. func newVMGMemberStatus(name, kind string, isPlacementReady bool, zone string) vmoprv1.VirtualMachineGroupMemberStatus { memberStatus := vmoprv1.VirtualMachineGroupMemberStatus{ Name: name, @@ -413,13 +472,26 @@ func newVMGMemberStatus(name, kind string, isPlacementReady bool, zone string) v return memberStatus } -// Helper function to create a MachineDeployment (for listing MD names) -func newMachineDeployment(name string) *clusterv1.MachineDeployment { +// Helper function to create a MachineDeployment object. +func newMachineDeployment(name, clusterName, clusterNS string, replicas *int32) *clusterv1.MachineDeployment { return &clusterv1.MachineDeployment{ ObjectMeta: metav1.ObjectMeta{ Name: name, - Namespace: clusterNamespace, + Namespace: clusterNS, Labels: map[string]string{clusterv1.ClusterNameLabel: clusterName}, }, + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: replicas, + }, + } +} + +// Helper function to create a basic Cluster object used as input. +func newTestCluster(name, namespace string) *clusterv1.Cluster { + return &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, } } diff --git a/controllers/vspherecluster_reconciler.go b/controllers/vspherecluster_reconciler.go index cabf13db2e..18d818f3a4 100644 --- a/controllers/vspherecluster_reconciler.go +++ b/controllers/vspherecluster_reconciler.go @@ -427,7 +427,6 @@ func (r *clusterReconciler) reconcileDeploymentZones(ctx context.Context, cluste failureDomains := clusterv1beta1.FailureDomains{} for _, zone := range deploymentZoneList.Items { if zone.Spec.Server != clusterCtx.VSphereCluster.Spec.Server { - continue } diff --git a/test/framework/vmoperator/vmoperator.go b/test/framework/vmoperator/vmoperator.go index c80ec76545..2c1e367b01 100644 --- a/test/framework/vmoperator/vmoperator.go +++ b/test/framework/vmoperator/vmoperator.go @@ -534,7 +534,7 @@ func ReconcileDependencies(ctx context.Context, c client.Client, dependenciesCon Namespace: config.Namespace, }, Spec: vmoprv1.VirtualMachineImageSpec{ - ProviderRef: vmoprv1common.LocalObjectRef{ + ProviderRef: &vmoprv1common.LocalObjectRef{ Kind: "ContentLibraryItem", }, }, diff --git a/test/go.mod b/test/go.mod index 69d0a9c7d9..db1b6ea8b6 100644 --- a/test/go.mod +++ b/test/go.mod @@ -10,8 +10,8 @@ replace sigs.k8s.io/cluster-api-provider-vsphere => ../ replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251003150112-9b458d311c4c -// The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-testsz -replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.8.6 +// The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests +replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d diff --git a/test/go.sum b/test/go.sum index 8ac8dfd79b..e5e682ab61 100644 --- a/test/go.sum +++ b/test/go.sum @@ -360,8 +360,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU= -github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= From 947dffa603727273be5a7f021909e0631cb9dcc4 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Fri, 7 Nov 2025 16:32:54 +0800 Subject: [PATCH 13/25] Fix annotations Signed-off-by: Gong Zhang --- .../vmware/virtualmachinegroup_reconciler.go | 16 ++++++++++------ .../virtualmachinegroup_reconciler_test.go | 16 ++++++++-------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index 0be615857b..3bde6135fd 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -187,20 +187,24 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c desiredVMG.Labels[clusterv1.ClusterNameLabel] = cluster.Name } + if desiredVMG.Annotations == nil { + desiredVMG.Annotations = make(map[string]string) + } + // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done // Do not update per-md-zone label once set, as placement decision should not change without user explicitly // ask. - placementDecisionLabels, err := GenerateVMGPlacementAnnotations(ctx, desiredVMG, mdNames) + placementDecisionAnnotations, err := GenerateVMGPlacementAnnotations(ctx, desiredVMG, mdNames) if err != nil { return err } - if len(placementDecisionLabels) > 0 { - for k, v := range placementDecisionLabels { - if _, exists := desiredVMG.Labels[k]; exists { + if len(placementDecisionAnnotations) > 0 { + for k, v := range placementDecisionAnnotations { + if _, exists := desiredVMG.Annotations[k]; exists { // Skip if the label already exists continue } - desiredVMG.Labels[k] = v + desiredVMG.Annotations[k] = v } } @@ -304,7 +308,7 @@ func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMa // Check if VM belongs to a Machine Deployment by name (e.g. cluster-1-np-1-vm-xxx contains np-1) // TODO: Establish membership via the machine deployment name label - if strings.Contains(member.Name, "-"+md+"-") { + if strings.Contains(member.Name, md+"-") { // Get the VM placement information by member status. // VMs that have undergone placement do not have Placement info set, skip. if member.Placement == nil { diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index efbd36a022..3702f7653a 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -199,10 +199,10 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { }, }, }, - machineDeployments: []string{mdName1, mdName2}, + machineDeployments: []string{clusterName + mdName1, clusterName + mdName2}, wantAnnotations: map[string]string{ - fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName1): zoneA, - fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName2): zoneB, + fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+mdName1): zoneA, + fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+mdName2): zoneB, }, wantErr: false, }, @@ -215,7 +215,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { }, }, }, - machineDeployments: []string{mdName1}, + machineDeployments: []string{clusterName + mdName1}, wantAnnotations: map[string]string{}, wantErr: false, }, @@ -228,7 +228,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { }, }, }, - machineDeployments: []string{mdName1}, + machineDeployments: []string{clusterName + mdName1}, wantAnnotations: map[string]string{}, wantErr: false, }, @@ -244,9 +244,9 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { }, }, }, - machineDeployments: []string{mdName1}, + machineDeployments: []string{clusterName + mdName1}, wantAnnotations: map[string]string{ - fmt.Sprintf("zone.cluster.x-k8s.io/%s", mdName1): zoneA, + fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+mdName1): zoneA, }, wantErr: false, }, @@ -260,7 +260,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { }, }, }, - machineDeployments: []string{mdName1}, + machineDeployments: []string{clusterName + mdName1}, wantAnnotations: nil, wantErr: true, }, From 5847119b5e9e1bb8f37f3a37ee8c88978731df95 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Mon, 10 Nov 2025 10:51:20 +0800 Subject: [PATCH 14/25] Fix vmg UT errors Signed-off-by: Gong Zhang --- .../virtualmachinegroup_reconciler_test.go | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index 3702f7653a..d4a3a80ad6 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -176,8 +176,8 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { // Define object names for members vmName1 := fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1) - vmName2 := fmt.Sprintf("%s-%s-vm-1", clusterName, mdName2) - vmNameUnplaced := fmt.Sprintf("%s-%s-vm-2", clusterName, mdName1) + vmName2 := fmt.Sprintf("%s-%s-vm-2", clusterName, mdName2) + vmNameUnplaced := fmt.Sprintf("%s-%s-vm-unplaced", clusterName, mdName1) vmNameWrongKind := "not-a-vm" tests := []struct { @@ -199,10 +199,10 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { }, }, }, - machineDeployments: []string{clusterName + mdName1, clusterName + mdName2}, + machineDeployments: []string{clusterName + "-" + mdName1, clusterName + "-" + mdName2}, wantAnnotations: map[string]string{ - fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+mdName1): zoneA, - fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+mdName2): zoneB, + fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+"-"+mdName1): zoneA, + fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+"-"+mdName2): zoneB, }, wantErr: false, }, @@ -211,11 +211,11 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { vmg: &vmoprv1.VirtualMachineGroup{ Status: vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vmName1, "VirtualMachine", false, ""), + newVMGMemberStatus(vmNameUnplaced, "VirtualMachine", false, ""), }, }, }, - machineDeployments: []string{clusterName + mdName1}, + machineDeployments: []string{clusterName + "-" + mdName1}, wantAnnotations: map[string]string{}, wantErr: false, }, @@ -228,7 +228,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { }, }, }, - machineDeployments: []string{clusterName + mdName1}, + machineDeployments: []string{clusterName + "-" + mdName1}, wantAnnotations: map[string]string{}, wantErr: false, }, @@ -240,13 +240,13 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { // First VM sets the placement newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA), // Second VM is ignored - newVMGMemberStatus(vmNameUnplaced, "VirtualMachine", true, zoneB), + newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneB), }, }, }, - machineDeployments: []string{clusterName + mdName1}, + machineDeployments: []string{clusterName + "-" + mdName1}, wantAnnotations: map[string]string{ - fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+mdName1): zoneA, + fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+"-"+mdName1): zoneA, }, wantErr: false, }, @@ -260,7 +260,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) { }, }, }, - machineDeployments: []string{clusterName + mdName1}, + machineDeployments: []string{clusterName + "-" + mdName1}, wantAnnotations: nil, wantErr: true, }, From e3078c9e8edd9a74f76e059640a220f529c40629 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Wed, 12 Nov 2025 16:11:54 +0800 Subject: [PATCH 15/25] Refine VMG controller - Update to watch Cluster as primary - Decouple functions - Update to accurate match when checking if VM is a member of VMG - Update UT - Refine godoc - Miscellaneous Signed-off-by: Gong Zhang --- .../vmware/virtualmachinegroup_controller.go | 49 +--- .../vmware/virtualmachinegroup_reconciler.go | 228 +++++++++------ .../virtualmachinegroup_reconciler_test.go | 260 +++++++++++------- pkg/services/vmoperator/vmopmachine.go | 23 +- pkg/services/vmoperator/vmopmachine_test.go | 43 +++ 5 files changed, 371 insertions(+), 232 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_controller.go b/controllers/vmware/virtualmachinegroup_controller.go index d0c60aee01..d3bf325ec5 100644 --- a/controllers/vmware/virtualmachinegroup_controller.go +++ b/controllers/vmware/virtualmachinegroup_controller.go @@ -45,8 +45,7 @@ import ( // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch -// AddVirtualMachineGroupControllerToManager adds the VirtualMachineGroup controller to the provided -// manager. +// AddVirtualMachineGroupControllerToManager adds the VirtualMachineGroup controller to the provided manager. func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerManagerCtx *capvcontext.ControllerManagerContext, mgr manager.Manager, options controller.Options) error { predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "virtualmachinegroup") @@ -55,27 +54,18 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa Recorder: mgr.GetEventRecorderFor("virtualmachinegroup-controller"), } - // Predicate: only allow VMG with the cluster-name label. Ensures the controller only works on VMG objects created by CAPV. - hasClusterNameLabel := predicate.NewPredicateFuncs(func(obj ctrlclient.Object) bool { - labels := obj.GetLabels() - if labels == nil { - return false - } - _, ok := labels[clusterv1.ClusterNameLabel] - return ok - }) - builder := ctrl.NewControllerManagedBy(mgr). - For(&vmoprv1.VirtualMachineGroup{}). + For(&clusterv1.Cluster{}). WithOptions(options). - WithEventFilter(hasClusterNameLabel). + // Set the controller's name explicitly to virtualmachinegroup. + Named("virtualmachinegroup"). Watches( - &clusterv1.Cluster{}, - handler.EnqueueRequestsFromMapFunc(reconciler.ClusterToVirtualMachineGroup), + &vmoprv1.VirtualMachineGroup{}, + handler.EnqueueRequestForOwner(mgr.GetScheme(), reconciler.Client.RESTMapper(), &clusterv1.Cluster{}), ). Watches( &vmwarev1.VSphereMachine{}, - handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToVirtualMachineGroup), + handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToCluster), ctrlbldr.WithPredicates( predicate.Funcs{ UpdateFunc: func(event.UpdateEvent) bool { return false }, @@ -89,26 +79,10 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa return builder.Complete(reconciler) } -// ClusterToVirtualMachineGroup maps Cluster events to VirtualMachineGroup reconcile requests. -func (r *VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(_ context.Context, a ctrlclient.Object) []reconcile.Request { - cluster, ok := a.(*clusterv1.Cluster) - if !ok { - return nil - } - - // Always enqueue a request for the "would-be VMG" - return []reconcile.Request{{ - NamespacedName: apitypes.NamespacedName{ - Namespace: cluster.Namespace, - Name: cluster.Name, - }, - }} -} - -// VSphereMachineToVirtualMachineGroup maps VSphereMachine events to VirtualMachineGroup reconcile requests. +// VSphereMachineToCluster maps VSphereMachine events to Cluster reconcile requests. // This handler only processes VSphereMachine objects for Day-2 operations when VMG could be found, ensuring // VMG member list in sync with VSphereMachines. If no corresponding VMG is found, this is a no-op. -func (r *VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request { +func (r *VirtualMachineGroupReconciler) VSphereMachineToCluster(ctx context.Context, a ctrlclient.Object) []reconcile.Request { vSphereMachine, ok := a.(*vmwarev1.VSphereMachine) if !ok { return nil @@ -120,10 +94,7 @@ func (r *VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx } vmg := &vmoprv1.VirtualMachineGroup{} - err := r.Client.Get(ctx, apitypes.NamespacedName{ - Namespace: vSphereMachine.Namespace, - Name: clusterName, - }, vmg) + err := r.Client.Get(ctx, apitypes.NamespacedName{Namespace: vSphereMachine.Namespace, Name: clusterName}, vmg) if err != nil { return nil diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index 3bde6135fd..5503cc181b 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -21,13 +21,13 @@ import ( "context" "fmt" "sort" - "strings" "time" "github.com/pkg/errors" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" @@ -42,7 +42,10 @@ import ( ) const ( + // reconciliationDelay is the delay time for requeueAfter. reconciliationDelay = 10 * time.Second + // ZoneAnnotationPrefix is the prefix used for placement decision annotations which will be set on VirtualMachineGroup. + ZoneAnnotationPrefix = "zone.cluster.x-k8s.io" ) // VirtualMachineGroupReconciler reconciles VirtualMachineGroup. @@ -59,7 +62,22 @@ type VirtualMachineGroupReconciler struct { // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch -func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { +// This controller is introduced by CAPV to coordinate the creation and maintenance of +// the VirtualMachineGroup (VMG) object with respect to the worker VSphereMachines in the Cluster. +// +// - Batch Coordination: Gating the initial creation of the VMG until all expected worker +// VSphereMachines are present. This ensures the complete VM member list is sent to the VM +// Service in a single batch operation due to a limitation of underlying service. +// +// - Placement Persistence: Persisting the MachineDeployment-to-Zone mapping (placement decision) as a +// metadata annotation on the VMG object. This decision is crucial for guiding newer VMs created +// during Day-2 operations such as scaling, upgrades, and remediations, ensuring consistency. This is also due to +// a known limitation of underlying services. +// +// - Membership Maintenance: Dynamically updating the VMG's member list to reflect the current +// state of VMs belonging to MachineDeployments (handling scale-up/down events). + +func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) // Fetch the Cluster instance. @@ -86,11 +104,11 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. } // Continue with the main logic. - return r.createOrUpdateVMG(ctx, cluster) + return r.createOrUpdateVirtualMachineGroup(ctx, cluster) } -// createOrUpdateVMG Create or Update VirtualMachineGroup. -func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, cluster *clusterv1.Cluster) (_ reconcile.Result, reterr error) { +// createOrUpdateVirtualMachineGroup Create or Update VirtualMachineGroup. +func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx) // Calculate current Machines of all MachineDeployments. @@ -109,19 +127,22 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c if err := r.Client.Get(ctx, *key, desiredVMG); err != nil { if !apierrors.IsNotFound(err) { log.Error(err, "failed to get VirtualMachineGroup") - return ctrl.Result{}, err + return reconcile.Result{}, err } // Calculate expected Machines of all MachineDeployments. - expected, err := getExpectedVSphereMachines(ctx, r.Client, cluster) + // CAPV retrieves placement decisions from the VirtualMachineGroup to guide + // day-2 VM placement. At least one VM is required for each MachineDeployment. + expected, err := getExpectedVSphereMachineCount(ctx, r.Client, cluster) if err != nil { log.Error(err, "failed to get expected Machines of all MachineDeployment") - return ctrl.Result{}, err + return reconcile.Result{}, err } if expected == 0 { - log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario") - return reconcile.Result{}, nil + errMsg := fmt.Sprintf("Found 0 desired VSphereMachine for Cluster %s/%s", cluster.Name, cluster.Namespace) + log.Error(nil, errMsg) + return reconcile.Result{}, errors.New(errMsg) } // Wait for all intended VSphereMachines corresponding to MachineDeployment to exist only during initial Cluster creation. @@ -163,10 +184,6 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c } // Get all the names of MachineDeployments of the Cluster. - if !cluster.Spec.Topology.IsDefined() { - return reconcile.Result{}, errors.Errorf("Cluster Topology is not defined %s/%s", - cluster.Namespace, cluster.Name) - } machineDeployments := &clusterv1.MachineDeploymentList{} if err := r.Client.List(ctx, machineDeployments, client.InNamespace(cluster.Namespace), @@ -180,60 +197,56 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c // Use CreateOrPatch to create or update the VirtualMachineGroup. _, err = controllerutil.CreateOrPatch(ctx, r.Client, desiredVMG, func() error { - // Set the desired labels - if desiredVMG.Labels == nil { - desiredVMG.Labels = make(map[string]string) - // Set Cluster name label - desiredVMG.Labels[clusterv1.ClusterNameLabel] = cluster.Name - } + return r.reconcileVirtualMachineState(ctx, desiredVMG, cluster, members, mdNames) + }) - if desiredVMG.Annotations == nil { - desiredVMG.Annotations = make(map[string]string) - } + return reconcile.Result{}, err +} - // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done - // Do not update per-md-zone label once set, as placement decision should not change without user explicitly - // ask. - placementDecisionAnnotations, err := GenerateVMGPlacementAnnotations(ctx, desiredVMG, mdNames) - if err != nil { - return err - } - if len(placementDecisionAnnotations) > 0 { - for k, v := range placementDecisionAnnotations { - if _, exists := desiredVMG.Annotations[k]; exists { - // Skip if the label already exists - continue - } +// reconcileVirtualMachineState mutates the desiredVMG object to reflect the necessary spec and metadata changes. +func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineState(ctx context.Context, desiredVMG *vmoprv1.VirtualMachineGroup, cluster *clusterv1.Cluster, members []vmoprv1.GroupMember, mdNames []string) error { + // Set the desired labels + if desiredVMG.Labels == nil { + desiredVMG.Labels = make(map[string]string) + desiredVMG.Labels[clusterv1.ClusterNameLabel] = cluster.Name + } + + if desiredVMG.Annotations == nil { + desiredVMG.Annotations = make(map[string]string) + } + + // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done. + // Do not update per-md-zone label once set, as placement decision should not change without user explicitly + // set failureDomain. + placementAnnotations, err := GenerateVirtualMachineGroupAnnotations(ctx, r.Client, desiredVMG, mdNames) + if err != nil { + return err + } + if len(placementAnnotations) > 0 { + for k, v := range placementAnnotations { + if _, exists := desiredVMG.Annotations[k]; !exists { desiredVMG.Annotations[k] = v } } + } - // Compose bootOrder. - desiredVMG.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ - { - Members: members, - }, - } - - // Make sure the Cluster owns the VM Operator VirtualMachineGroup. - if err = controllerutil.SetControllerReference(cluster, desiredVMG, r.Client.Scheme()); err != nil { - return errors.Wrapf(err, "failed to mark %s %s/%s as owner of %s %s/%s", - cluster.GroupVersionKind(), - cluster.Namespace, - cluster.Name, - desiredVMG.GroupVersionKind(), - desiredVMG.Namespace, - desiredVMG.Name) - } + // Set the BootOrder spec as the + desiredVMG.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: members, + }, + } - return nil - }) + // Set the owner reference + if err := controllerutil.SetControllerReference(cluster, desiredVMG, r.Client.Scheme()); err != nil { + return errors.Wrapf(err, "failed to mark %s as owner of %s", klog.KObj(cluster), klog.KObj(desiredVMG)) + } - return reconcile.Result{}, err + return nil } -// MachineDeployments belonging to the Cluster. -func getExpectedVSphereMachines(ctx context.Context, kubeClient client.Client, cluster *clusterv1.Cluster) (int32, error) { +// getExpectedVSphereMachineCount get expected total count of Machines belonging to the Cluster. +func getExpectedVSphereMachineCount(ctx context.Context, kubeClient client.Client, cluster *clusterv1.Cluster) (int32, error) { var mdList clusterv1.MachineDeploymentList if err := kubeClient.List( ctx, @@ -280,52 +293,89 @@ func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, cl return result, nil } -// GenerateVMGPlacementAnnotations returns annotations per MachineDeployment which contains zone info for placed VMs for day-2 operations. -func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) { +// GenerateVirtualMachineGroupAnnotations checks the VMG status for placed members, verifies their ownership +// by fetching the corresponding VSphereMachine, and extracts the zone information to persist it +// as an annotation on the VMG object for Day-2 operations. +// +// The function attempts to find at least one successfully placed VM (VirtualMachineGroupMemberConditionPlacementReady==True) +// for each MachineDeployment and records its zone. Once a zone is recorded for an MD, subsequent VMs +// belonging to that same MD are skipped. +func GenerateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient client.Client, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) { log := ctrl.LoggerFrom(ctx) - annotations := make(map[string]string) + log.V(4).Info(fmt.Sprintf("Generating annotations for VirtualMachineGroup %s/%s", vmg.Name, vmg.Namespace)) - // For each member in status + annotations := vmg.Annotations + if annotations == nil { + annotations = make(map[string]string) + } + + // Iterate through the VMG's members in Status. for _, member := range vmg.Status.Members { - // Skip if not a VM or not placement ready, + ns := vmg.Namespace + // Only VirtualMachines contribute to placement decisions. if member.Kind != "VirtualMachine" { - return nil, errors.Errorf("VirtualMachineGroup %s/%s contains none VirtualMachine member, member.Kind %s", vmg.Namespace, vmg.Name, member.Kind) + log.Info(fmt.Sprintf("Member %s of %s/%s is not VirtualMachine type, skipping it", member.Name, vmg.Name, vmg.Namespace)) + continue } - // Once member VM is placed, VirtualMachineGroupMemberConditionPlacementReady will be set to true. + // Skip it if member's VirtualMachineGroupMemberConditionPlacementReady is still not true. if !conditions.IsTrue(&member, vmoprv1.VirtualMachineGroupMemberConditionPlacementReady) { + log.Info(fmt.Sprintf("Member %s of %s/%s is not PlacementReady, skipping it", member.Name, vmg.Name, vmg.Namespace)) + continue + } + + // Get VSphereMachine which share the same Name of the member Name and get the MachineDeployment Name it belonged to. + vsmKey := types.NamespacedName{ + Name: member.Name, + Namespace: vmg.Namespace, + } + vsm := &vmwarev1.VSphereMachine{} + if err := kubeClient.Get(ctx, vsmKey, vsm); err != nil { + if apierrors.IsNotFound(err) { + log.Info(fmt.Sprintf("VSphereMachine %s/%s by member Name %s is not found, skipping it", member.Name, ns, member.Name)) + continue + } + log.Error(err, "failed to get VSphereMachine %s/%s", member.Name, ns) + return nil, err + } + + mdNameFromLabel, found := vsm.Labels[clusterv1.MachineDeploymentNameLabel] + if !found { + log.Info(fmt.Sprintf("Failed to get MachineDeployment label from VSphereMachine %s/%s, skipping it", member.Name, ns)) + continue + } + + // If we already found placement for this MachineDeployment, continue and move to next member. + if v, found := annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameFromLabel)]; found { + log.V(4).Info(fmt.Sprintf("Skipping MachineDeployment %s/%s, placement annotation %s already found", mdNameFromLabel, vsm.Namespace, v)) continue } - // Check if this VM belongs to any of our target Machine Deployments - // Use machine deployment name as the annotation key prefix. + // Check if this VM belongs to any of our target MachineDeployments. + // Annotation format is "zone.cluster.x-k8s.io/{machine-deployment-name}". for _, md := range machineDeployments { - // Check if we already found placement for this Machine Deployments - if _, found := annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)]; found { - log.Info(fmt.Sprintf("Skipping Machine Deployment %s, placement already found in annotations", md)) + if mdNameFromLabel != md { continue } - // Check if VM belongs to a Machine Deployment by name (e.g. cluster-1-np-1-vm-xxx contains np-1) - // TODO: Establish membership via the machine deployment name label - if strings.Contains(member.Name, md+"-") { - // Get the VM placement information by member status. - // VMs that have undergone placement do not have Placement info set, skip. - if member.Placement == nil { - log.V(4).Info("VM in VMG has no placement info. Placement is nil", "VM", member.Name, "VMG", vmg.Name, "Namespace", vmg.Namespace) - continue - } - - // Skip to next member if Zone is empty. - zone := member.Placement.Zone - if zone == "" { - log.V(4).Info("VM in VMG has no placement info. Zone is empty", "VM", member.Name, "VMG", vmg.Name, "Namespace", vmg.Namespace) - continue - } - - log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, vmg.Namespace, vmg.Name, zone)) - annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", md)] = zone + // Get the VM placement information by member status. + // VMs that have undergone placement do not have Placement info set, skip. + if member.Placement == nil { + log.V(4).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Placement is nil", member.Name, vmg.Name, ns)) + continue } + + // Skip to next member if Zone is empty. + zone := member.Placement.Zone + if zone == "" { + log.V(4).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Zone is empty", member.Name, "VMG", ns)) + continue + } + + log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, ns, vmg.Name, zone)) + annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, md)] = zone + // Break from the inner loop as placement for this MachineDeployment is found. + break } } diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index d4a3a80ad6..52c25501df 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -50,18 +50,21 @@ const ( zoneB = "zone-b" ) -func TestGetExpectedVSphereMachines(t *testing.T) { +func TestGetExpectedVSphereMachineCount(t *testing.T) { g := NewWithT(t) ctx := context.Background() + scheme := runtime.NewScheme() + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + targetCluster := newTestCluster(clusterName, clusterNamespace) - mdA := newMachineDeployment("md-a", clusterName, clusterNamespace, ptr.To(int32(3))) - mdB := newMachineDeployment("md-b", clusterName, clusterNamespace, ptr.To(int32(5))) - mdCNil := newMachineDeployment("md-c-nil", clusterName, clusterNamespace, nil) - mdDZero := newMachineDeployment("md-d-zero", clusterName, clusterNamespace, ptr.To(int32(0))) + mdA := newMachineDeployment("md-a", clusterName, clusterNamespace, true, ptr.To(int32(3))) + mdB := newMachineDeployment("md-b", clusterName, clusterNamespace, true, ptr.To(int32(5))) + mdCNil := newMachineDeployment("md-c-nil", clusterName, clusterNamespace, false, nil) + mdDZero := newMachineDeployment("md-d-zero", clusterName, clusterNamespace, true, ptr.To(int32(0))) // Create an MD for a different cluster (should be filtered) - mdOtherCluster := newMachineDeployment("md-other", otherClusterName, clusterNamespace, ptr.To(int32(5))) + mdOtherCluster := newMachineDeployment("md-other", otherClusterName, clusterNamespace, true, ptr.To(int32(5))) tests := []struct { name string @@ -76,7 +79,7 @@ func TestGetExpectedVSphereMachines(t *testing.T) { wantErr: false, }, { - name: "Should succeed when MDs include nil and zero replicas", + name: "Should get count when MDs include nil and zero replicas", initialObjects: []client.Object{mdA, mdB, mdCNil, mdDZero}, expectedTotal: 8, wantErr: false, @@ -98,12 +101,10 @@ func TestGetExpectedVSphereMachines(t *testing.T) { for _, tt := range tests { // Looks odd, but need to reinitialize test variable tt := tt - t.Run(tt.name, func(_ *testing.T) { - scheme := runtime.NewScheme() - g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) - + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialObjects...).Build() - total, err := getExpectedVSphereMachines(ctx, fakeClient, targetCluster) + total, err := getExpectedVSphereMachineCount(ctx, fakeClient, targetCluster) if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { @@ -122,8 +123,10 @@ func TestGetCurrentVSphereMachines(t *testing.T) { g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) // VSphereMachine names are based on CAPI Machine names, but we use fake name here. - vsm1 := newVSphereMachine("vsm-1", mdName1, false, false, nil) - vsm2 := newVSphereMachine("vsm-2", mdName2, false, false, nil) + vsmName1 := fmt.Sprintf("%s-%s", mdName1, "vsm-1") + vsmName2 := fmt.Sprintf("%s-%s", mdName2, "vsm-2") + vsm1 := newVSphereMachine(vsmName1, mdName1, false, false, nil) + vsm2 := newVSphereMachine(vsmName2, mdName2, false, false, nil) vsmDeleting := newVSphereMachine("vsm-3", mdName1, false, true, nil) // Deleting vsmControlPlane := newVSphereMachine("vsm-cp", "not-md", true, false, nil) @@ -152,7 +155,8 @@ func TestGetCurrentVSphereMachines(t *testing.T) { for _, tt := range tests { // Looks odd, but need to reinitialize test variable tt := tt - t.Run(tt.name, func(_ *testing.T) { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() got, err := getCurrentVSphereMachines(ctx, fakeClient, clusterNamespace, clusterName) g.Expect(err).NotTo(HaveOccurred()) @@ -165,120 +169,174 @@ func TestGetCurrentVSphereMachines(t *testing.T) { names[i] = vsm.Name } sort.Strings(names) - g.Expect(names).To(Equal([]string{"vsm-1", "vsm-2"})) + g.Expect(names).To(Equal([]string{vsmName1, vsmName2})) } }) } } - -func TestGenerateVMGPlacementAnnotations(t *testing.T) { +func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { g := NewWithT(t) + ctx := context.Background() + + scheme := runtime.NewScheme() + g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) + + baseVMG := &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + Annotations: make(map[string]string), + }, + } - // Define object names for members - vmName1 := fmt.Sprintf("%s-%s-vm-1", clusterName, mdName1) - vmName2 := fmt.Sprintf("%s-%s-vm-2", clusterName, mdName2) - vmNameUnplaced := fmt.Sprintf("%s-%s-vm-unplaced", clusterName, mdName1) - vmNameWrongKind := "not-a-vm" + // VSphereMachines corresponding to the VMG members + vsmName1 := fmt.Sprintf("%s-%s", mdName1, "vsm-1") + vsmName2 := fmt.Sprintf("%s-%s", mdName2, "vsm-2") + vsm1 := newVSphereMachine(vsmName1, mdName1, false, false, nil) + vsm2 := newVSphereMachine(vsmName2, mdName2, false, false, nil) + vsmMissingLabel := newVSphereMachine("vsm-nolabel", mdName2, false, false, nil) + vsmMissingLabel.Labels = nil // Explicitly remove labels for test case tests := []struct { - name string - vmg *vmoprv1.VirtualMachineGroup - machineDeployments []string - wantAnnotations map[string]string - wantErr bool + name string + vmg *vmoprv1.VirtualMachineGroup + machineDeployments []string + initialClientObjects []client.Object + expectedAnnotations map[string]string + wantErr bool }{ { - name: "Should get placement annotation when two placed VMs for two MDs", - vmg: &vmoprv1.VirtualMachineGroup{ - Status: vmoprv1.VirtualMachineGroupStatus{ + name: "Placement found for two distinct MDs", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - // Placed member for MD1 in Zone A - newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA), - // Placed member for MD2 in Zone B - newVMGMemberStatus(vmName2, "VirtualMachine", true, zoneB), + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), + newVMGMemberStatus(vsmName2, "VirtualMachine", true, true, zoneB), }, - }, + } + return v + }(), + machineDeployments: []string{mdName1, mdName2}, + initialClientObjects: []client.Object{vsm1, vsm2}, + expectedAnnotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, }, - machineDeployments: []string{clusterName + "-" + mdName1, clusterName + "-" + mdName2}, - wantAnnotations: map[string]string{ - fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+"-"+mdName1): zoneA, - fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+"-"+mdName2): zoneB, + wantErr: false, + }, + { + name: "Skip as placement already exists in VMG Annotations", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Annotations = map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA} + v.Status.Members = []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneB), + } + return v + }(), + machineDeployments: []string{mdName1}, + initialClientObjects: []client.Object{vsm1}, + // Should retain existing zone-a + expectedAnnotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, }, wantErr: false, }, { - name: "No placement annotation when VM PlacementReady is false)", - vmg: &vmoprv1.VirtualMachineGroup{ - Status: vmoprv1.VirtualMachineGroupStatus{ + name: "Skip if Member Kind is not VirtualMachine", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vmNameUnplaced, "VirtualMachine", false, ""), + newVMGMemberStatus("VMG-1", "VirtualMachineGroup", true, true, "zone-x"), }, - }, - }, - machineDeployments: []string{clusterName + "-" + mdName1}, - wantAnnotations: map[string]string{}, - wantErr: false, + } + return v + }(), + machineDeployments: []string{}, + initialClientObjects: []client.Object{}, + expectedAnnotations: map[string]string{}, + wantErr: false, }, { - name: "No placement annotation when PlacementReady but missing Zone info", - vmg: &vmoprv1.VirtualMachineGroup{ - Status: vmoprv1.VirtualMachineGroupStatus{ + name: "Skip if VSphereMachine Missing MachineDeployment Label", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vmName1, "VirtualMachine", true, ""), + newVMGMemberStatus("vsm-nolabel", "VirtualMachine", true, true, zoneA), }, - }, - }, - machineDeployments: []string{clusterName + "-" + mdName1}, - wantAnnotations: map[string]string{}, - wantErr: false, + } + return v + }(), + machineDeployments: []string{mdName1}, + initialClientObjects: []client.Object{vsmMissingLabel}, + expectedAnnotations: map[string]string{}, + wantErr: false, }, { - name: "Should keep placement annotation when first placement decision is found", - vmg: &vmoprv1.VirtualMachineGroup{ - Status: vmoprv1.VirtualMachineGroupStatus{ + name: "Skip if VSphereMachine is Not Found in API", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - // First VM sets the placement - newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA), - // Second VM is ignored - newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneB), + newVMGMemberStatus("non-existent-vm", "VirtualMachine", true, true, zoneA), }, - }, - }, - machineDeployments: []string{clusterName + "-" + mdName1}, - wantAnnotations: map[string]string{ - fmt.Sprintf("zone.cluster.x-k8s.io/%s", clusterName+"-"+mdName1): zoneA, - }, - wantErr: false, + } + return v + }(), + machineDeployments: []string{mdName1}, + initialClientObjects: []client.Object{vsm1}, + expectedAnnotations: map[string]string{}, + wantErr: false, }, { - name: "Should return Error if Member Kind is not VirtualMachine", - vmg: &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, - Status: vmoprv1.VirtualMachineGroupStatus{ + name: "Skip if placement is nil", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vmNameWrongKind, "VirtualMachineGroup", true, zoneA), + newVMGMemberStatus(vsmName1, "VirtualMachine", true, false, zoneA), }, - }, - }, - machineDeployments: []string{clusterName + "-" + mdName1}, - wantAnnotations: nil, - wantErr: true, + } + return v + }(), + machineDeployments: []string{mdName1}, + initialClientObjects: []client.Object{vsm1}, + expectedAnnotations: map[string]string{}, + wantErr: false, + }, + { + name: "Skip if Zone is empty string", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Status = vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, ""), + }, + } + return v + }(), + machineDeployments: []string{mdName1}, + initialClientObjects: []client.Object{vsm1}, + expectedAnnotations: map[string]string{}, + wantErr: false, }, } for _, tt := range tests { // Looks odd, but need to reinitialize test variable tt := tt - t.Run(tt.name, func(_ *testing.T) { - ctx := ctrl.LoggerInto(context.Background(), ctrl.LoggerFrom(context.Background())) - - got, err := GenerateVMGPlacementAnnotations(ctx, tt.vmg, tt.machineDeployments) - + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialClientObjects...).Build() + annotations, err := GenerateVirtualMachineGroupAnnotations(ctx, fakeClient, tt.vmg, tt.machineDeployments) if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) - g.Expect(got).To(Equal(tt.wantAnnotations)) + g.Expect(annotations).To(Equal(tt.expectedAnnotations)) } }) } @@ -296,7 +354,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { // Initial objects for the successful VMG creation path (Expected: 1, Current: 1) cluster := newCluster(clusterName, clusterNamespace, true, 1, 0) vsm1 := newVSphereMachine("vsm-1", mdName1, false, false, nil) - md1 := newMachineDeployment(mdName1, clusterName, clusterNamespace, ptr.To(int32(1))) + md1 := newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(1))) tests := []struct { name string @@ -368,7 +426,8 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { for _, tt := range tests { // Looks odd, but need to reinitialize test variable tt := tt - t.Run(tt.name, func(_ *testing.T) { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialObjects...).Build() reconciler := &VirtualMachineGroupReconciler{ Client: fakeClient, @@ -390,7 +449,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { // Check that the core fields were set by the MutateFn g.Expect(vmg.Labels).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, clusterName)) g.Expect(vmg.Spec.BootOrder).To(HaveLen(1)) - expected, err := getExpectedVSphereMachines(ctx, fakeClient, tt.initialObjects[0].(*clusterv1.Cluster)) + expected, err := getExpectedVSphereMachineCount(ctx, fakeClient, tt.initialObjects[0].(*clusterv1.Cluster)) g.Expect(err).NotTo(HaveOccurred(), "Should get expected Machines") g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(expected))) @@ -456,7 +515,7 @@ func newVSphereMachine(name, mdName string, isCP, deleted bool, namingStrategy * } // Helper function to create a VMG member status with placement info. -func newVMGMemberStatus(name, kind string, isPlacementReady bool, zone string) vmoprv1.VirtualMachineGroupMemberStatus { +func newVMGMemberStatus(name, kind string, isPlacementReady, placement bool, zone string) vmoprv1.VirtualMachineGroupMemberStatus { memberStatus := vmoprv1.VirtualMachineGroupMemberStatus{ Name: name, Kind: kind, @@ -467,23 +526,32 @@ func newVMGMemberStatus(name, kind string, isPlacementReady bool, zone string) v Type: vmoprv1.VirtualMachineGroupMemberConditionPlacementReady, Status: metav1.ConditionTrue, }) + } + + if placement { memberStatus.Placement = &vmoprv1.VirtualMachinePlacementStatus{Zone: zone} } + return memberStatus } // Helper function to create a MachineDeployment object. -func newMachineDeployment(name, clusterName, clusterNS string, replicas *int32) *clusterv1.MachineDeployment { - return &clusterv1.MachineDeployment{ +func newMachineDeployment(name, clusterName, clusterNS string, isReplicaSet bool, replicas *int32) *clusterv1.MachineDeployment { + md := &clusterv1.MachineDeployment{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: clusterNS, Labels: map[string]string{clusterv1.ClusterNameLabel: clusterName}, }, - Spec: clusterv1.MachineDeploymentSpec{ + } + + if isReplicaSet { + md.Spec = clusterv1.MachineDeploymentSpec{ Replicas: replicas, - }, + } } + + return md } // Helper function to create a basic Cluster object used as input. diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index a60292c2d0..4c7ad8234c 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -213,7 +213,11 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } // Proceed only if the machine is a member of the VirtualMachineGroup. - if !v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) { + isMember, err := v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) + if err != nil { + return true, errors.Wrapf(err, "%s", fmt.Sprintf("failed to check if VirtualMachine %s is a member of VirtualMachineGroup %s/%s", supervisorMachineCtx.VSphereMachine.Name, vmOperatorVMGroup.Name, vmOperatorVMGroup.Namespace)) + } + if !isMember { v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, Status: metav1.ConditionFalse, @@ -847,13 +851,12 @@ func (v *VmopMachineService) addVolumes(ctx context.Context, supervisorMachineCt }, } - // Before VC 9.1: // The CSI zone annotation must be set when using a zonal storage class, // which is required when the cluster has multiple (3) zones. // Single zone clusters (legacy/default) do not support zonal storage and must not // have the zone annotation set. - // Since VC 9.1: With Node Auto Placement enabled, failureDomain is optional and CAPV no longer - // sets PVC annotations. PVC placement now follows the StorageClass behavior (Immediate or WaitForFirstConsumer). + // However, with Node Auto Placement enabled, failureDomain is optional and CAPV no longer + // sets PVC annotations. PVC placement now follows the StorageClass behavior (Immediate or WaitForFirstConsumer).Í zonal := len(supervisorMachineCtx.VSphereCluster.Status.FailureDomains) > 1 if zone := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; zonal && zone != nil { @@ -964,13 +967,17 @@ func getMachineDeploymentNameForCluster(cluster *clusterv1.Cluster) string { // checkVirtualMachineGroupMembership checks if the machine is in the first boot order group // and performs logic if a match is found. -func (v *VmopMachineService) checkVirtualMachineGroupMembership(vmOperatorVMGroup *vmoprv1.VirtualMachineGroup, supervisorMachineCtx *vmware.SupervisorMachineContext) bool { +func (v *VmopMachineService) checkVirtualMachineGroupMembership(vmOperatorVMGroup *vmoprv1.VirtualMachineGroup, supervisorMachineCtx *vmware.SupervisorMachineContext) (bool, error) { if len(vmOperatorVMGroup.Spec.BootOrder) > 0 { for _, member := range vmOperatorVMGroup.Spec.BootOrder[0].Members { - if member.Name == supervisorMachineCtx.Machine.Name { - return true + virtualMachineName, err := GenerateVirtualMachineName(supervisorMachineCtx.Machine.Name, supervisorMachineCtx.VSphereMachine.Spec.NamingStrategy) + if err != nil { + return false, err + } + if member.Name == virtualMachineName { + return true, nil } } } - return false + return false, nil } diff --git a/pkg/services/vmoperator/vmopmachine_test.go b/pkg/services/vmoperator/vmopmachine_test.go index c36c9616ee..aaa9e9b437 100644 --- a/pkg/services/vmoperator/vmopmachine_test.go +++ b/pkg/services/vmoperator/vmopmachine_test.go @@ -826,6 +826,49 @@ var _ = Describe("VirtualMachine tests", func() { Expect(vmService.Client.Create(ctx, machineDeployment)).To(Succeed()) }) + Specify("Requeue valid Machine but not a member of the VirtualMachineGroup yet", func() { + machineDeploymentNotMemberName := "test-md-not-member" + workerMachineNotMember := "test-worker-machine-not-member" + machineNotMember := util.CreateMachine(workerMachineNotMember, clusterName, k8sVersion, false) + machineNotMember.Labels[clusterv1.MachineDeploymentNameLabel] = machineDeploymentNotMemberName + + vsphereMachineNotMember := util.CreateVSphereMachine(workerMachineNotMember, clusterName, className, imageName, storageClass, false) + + clusterContext, controllerManagerContext := util.CreateClusterContext(cluster, vsphereCluster) + supervisorMachineContext = util.CreateMachineContext(clusterContext, machineNotMember, vsphereMachineNotMember) + supervisorMachineContext.ControllerManagerContext = controllerManagerContext + + // Create a MachineDeployment for the worker + machineDeploymentNotMember := createMachineDeployment(machineDeploymentNotMemberName, corev1.NamespaceDefault, clusterName, "") + Expect(vmService.Client.Create(ctx, machineDeploymentNotMember)).To(Succeed()) + + expectReconcileError = false + expectVMOpVM = false + expectedImageName = imageName + expectedRequeue = true + + // Provide valid bootstrap data + By("bootstrap data is created") + secretName := machineNotMember.GetName() + "-data" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: machineNotMember.GetNamespace(), + }, + Data: map[string][]byte{ + "value": []byte(bootstrapData), + }, + } + Expect(vmService.Client.Create(ctx, secret)).To(Succeed()) + + machineNotMember.Spec.Bootstrap.DataSecretName = &secretName + + By("VirtualMachine is not created") + requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext) + Expect(err).ShouldNot(HaveOccurred()) + Expect(requeue).Should(BeTrue()) + }) + Specify("Reconcile valid Machine with no failure domain set", func() { expectReconcileError = false expectVMOpVM = true From dc441f6eee677336b6caffc2db97b32ded31009a Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Fri, 14 Nov 2025 18:26:22 +0800 Subject: [PATCH 16/25] Address review comments - Refine VMG controller watch - Handle race conditions in VMG controller by gating member update - Refine data struct for VM Affinity config - Refine UT - Refine naming, logging, godoc - Miscellaneous Signed-off-by: Gong Zhang --- config/rbac/role.yaml | 7 +- .../vmware/virtualmachinegroup_controller.go | 38 +- .../vmware/virtualmachinegroup_reconciler.go | 394 +++++++++++------ .../virtualmachinegroup_reconciler_test.go | 410 +++++++++++++++++- pkg/services/vmoperator/vmopmachine.go | 92 ++-- pkg/services/vmoperator/vmopmachine_test.go | 2 +- 6 files changed, 734 insertions(+), 209 deletions(-) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c57a326fb9..d3963fb5bf 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -250,6 +250,7 @@ rules: - vmoperator.vmware.com resources: - virtualmachinegroups + - virtualmachinegroups/status - virtualmachineimages - virtualmachineimages/status - virtualmachines @@ -265,12 +266,6 @@ rules: - patch - update - watch -- apiGroups: - - vmoperator.vmware.com - resources: - - virtualmachinegroups/status - verbs: - - get - apiGroups: - vmware.com resources: diff --git a/controllers/vmware/virtualmachinegroup_controller.go b/controllers/vmware/virtualmachinegroup_controller.go index d3bf325ec5..22767f12f3 100644 --- a/controllers/vmware/virtualmachinegroup_controller.go +++ b/controllers/vmware/virtualmachinegroup_controller.go @@ -37,14 +37,6 @@ import ( capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" ) -// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch -// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get -// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get -// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch -// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch -// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch - // AddVirtualMachineGroupControllerToManager adds the VirtualMachineGroup controller to the provided manager. func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerManagerCtx *capvcontext.ControllerManagerContext, mgr manager.Manager, options controller.Options) error { predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "virtualmachinegroup") @@ -62,15 +54,24 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa Watches( &vmoprv1.VirtualMachineGroup{}, handler.EnqueueRequestForOwner(mgr.GetScheme(), reconciler.Client.RESTMapper(), &clusterv1.Cluster{}), + ctrlbldr.WithPredicates(predicates.ResourceIsChanged(mgr.GetScheme(), predicateLog)), ). Watches( &vmwarev1.VSphereMachine{}, handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToCluster), ctrlbldr.WithPredicates( predicate.Funcs{ - UpdateFunc: func(event.UpdateEvent) bool { return false }, - CreateFunc: func(event.CreateEvent) bool { return true }, - DeleteFunc: func(event.DeleteEvent) bool { return true }, + UpdateFunc: func(event.UpdateEvent) bool { return false }, + CreateFunc: func(e event.CreateEvent) bool { + // Only handle VSphereMachine which belongs to a MachineDeployment + _, found := e.Object.GetLabels()[clusterv1.MachineDeploymentNameLabel] + return found + }, + DeleteFunc: func(e event.DeleteEvent) bool { + // Only handle VSphereMachine which belongs to a MachineDeployment + _, found := e.Object.GetLabels()[clusterv1.MachineDeploymentNameLabel] + return found + }, GenericFunc: func(event.GenericEvent) bool { return false }, }), ). @@ -80,9 +81,7 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa } // VSphereMachineToCluster maps VSphereMachine events to Cluster reconcile requests. -// This handler only processes VSphereMachine objects for Day-2 operations when VMG could be found, ensuring -// VMG member list in sync with VSphereMachines. If no corresponding VMG is found, this is a no-op. -func (r *VirtualMachineGroupReconciler) VSphereMachineToCluster(ctx context.Context, a ctrlclient.Object) []reconcile.Request { +func (r *VirtualMachineGroupReconciler) VSphereMachineToCluster(_ context.Context, a ctrlclient.Object) []reconcile.Request { vSphereMachine, ok := a.(*vmwarev1.VSphereMachine) if !ok { return nil @@ -93,17 +92,10 @@ func (r *VirtualMachineGroupReconciler) VSphereMachineToCluster(ctx context.Cont return nil } - vmg := &vmoprv1.VirtualMachineGroup{} - err := r.Client.Get(ctx, apitypes.NamespacedName{Namespace: vSphereMachine.Namespace, Name: clusterName}, vmg) - - if err != nil { - return nil - } - return []reconcile.Request{{ NamespacedName: apitypes.NamespacedName{ - Namespace: vmg.Namespace, - Name: vmg.Name, + Namespace: vSphereMachine.Namespace, + Name: clusterName, }, }} } diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index 5503cc181b..3e02cc5f21 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -20,14 +20,15 @@ package vmware import ( "context" "fmt" - "sort" - "time" + "strings" "github.com/pkg/errors" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + "golang.org/x/exp/slices" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" @@ -35,6 +36,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" @@ -42,10 +44,8 @@ import ( ) const ( - // reconciliationDelay is the delay time for requeueAfter. - reconciliationDelay = 10 * time.Second // ZoneAnnotationPrefix is the prefix used for placement decision annotations which will be set on VirtualMachineGroup. - ZoneAnnotationPrefix = "zone.cluster.x-k8s.io" + ZoneAnnotationPrefix = "zone.vmware.infrastructure.cluster.x-k8s.io" ) // VirtualMachineGroupReconciler reconciles VirtualMachineGroup. @@ -57,22 +57,24 @@ type VirtualMachineGroupReconciler struct { // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get // +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get +// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch -// This controller is introduced by CAPV to coordinate the creation and maintenance of +// This controller is introduced to coordinate the creation and maintenance of // the VirtualMachineGroup (VMG) object with respect to the worker VSphereMachines in the Cluster. // -// - Batch Coordination: Gating the initial creation of the VMG until all expected worker -// VSphereMachines are present. This ensures the complete VM member list is sent to the VM -// Service in a single batch operation due to a limitation of underlying service. +// - Batch Coordination: Gating the initial creation of the VMG until for the first time all the +// MachineDeployment replicas will have a corresponding VSphereMachine. +// Once this condition is met, the VirtualMachineGroup is created considering +// the initial set of machines for the initial placement decision. +// When the VirtualMachineGroup reports the placement decision, then finally +// creation of VirtualMachines is unblocked. // // - Placement Persistence: Persisting the MachineDeployment-to-Zone mapping (placement decision) as a -// metadata annotation on the VMG object. This decision is crucial for guiding newer VMs created -// during Day-2 operations such as scaling, upgrades, and remediations, ensuring consistency. This is also due to -// a known limitation of underlying services. +// metadata annotation on the VMG object. The same decision must be respected also for placement +// of machines created after initial placement. // // - Membership Maintenance: Dynamically updating the VMG's member list to reflect the current // state of VMs belonging to MachineDeployments (handling scale-up/down events). @@ -89,21 +91,22 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, err } - log = log.WithValues("Cluster", klog.KObj(cluster)) + // Note: VirtualMachineGroup is going to have same name and namespace of the cluster. + // Using cluster here, because VirtualMachineGroup is created only after initial placement completes. + log = log.WithValues("VirtualMachineGroup", klog.KObj(cluster)) + ctx = ctrl.LoggerInto(ctx, log) // If Cluster is deleted, just return as VirtualMachineGroup will be GCed and no extra processing needed. if !cluster.DeletionTimestamp.IsZero() { return reconcile.Result{}, nil } - // If ControlPlane haven't initialized, requeue it since VSphereMachines of MachineDeployment will only be created after - // ControlPlane is initialized. + // If ControlPlane haven't initialized, requeue it since CAPV will only start to reconcile VSphereMachines of + // MachineDeployment after ControlPlane is initialized. if !conditions.IsTrue(cluster, clusterv1.ClusterControlPlaneInitializedCondition) { - log.Info("Waiting for Cluster ControlPlaneInitialized") - return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + return reconcile.Result{}, nil } - // Continue with the main logic. return r.createOrUpdateVirtualMachineGroup(ctx, cluster) } @@ -111,49 +114,49 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx) - // Calculate current Machines of all MachineDeployments. - current, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name) + // Get current VSphereMachines of all MachineDeployments. + currentVSphereMachines, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name) if err != nil { - return reconcile.Result{}, errors.Wrapf(err, "failed to get current VSphereMachine of cluster %s/%s", - cluster.Name, cluster.Namespace) + return reconcile.Result{}, err } - desiredVMG := &vmoprv1.VirtualMachineGroup{} + vmg := &vmoprv1.VirtualMachineGroup{} key := &client.ObjectKey{ Namespace: cluster.Namespace, Name: cluster.Name, } - if err := r.Client.Get(ctx, *key, desiredVMG); err != nil { + if err := r.Client.Get(ctx, *key, vmg); err != nil { if !apierrors.IsNotFound(err) { - log.Error(err, "failed to get VirtualMachineGroup") - return reconcile.Result{}, err + return reconcile.Result{}, errors.Wrapf(err, "failed to get VirtualMachineGroup %s", klog.KObj(vmg)) } - // Calculate expected Machines of all MachineDeployments. - // CAPV retrieves placement decisions from the VirtualMachineGroup to guide - // day-2 VM placement. At least one VM is required for each MachineDeployment. - expected, err := getExpectedVSphereMachineCount(ctx, r.Client, cluster) + // If the VirtualMachineGroup does not exist yet, + // calculate expected VSphereMachine count of all MachineDeployments. + expectedVSphereMachineCount, err := getExpectedVSphereMachineCount(ctx, r.Client, cluster) if err != nil { - log.Error(err, "failed to get expected Machines of all MachineDeployment") - return reconcile.Result{}, err + return reconcile.Result{}, errors.Wrapf(err, "failed to get expected Machines of all MachineDeployment, Cluster %s", klog.KObj(cluster)) } - if expected == 0 { - errMsg := fmt.Sprintf("Found 0 desired VSphereMachine for Cluster %s/%s", cluster.Name, cluster.Namespace) - log.Error(nil, errMsg) - return reconcile.Result{}, errors.New(errMsg) + // Since CAPV retrieves placement decisions from the VirtualMachineGroup to guide + // day-2 worker VM placement. At least one VM is expected for each MachineDeployment. + // If no worker of MachineDeployment is defined,the controller + // interprets this as an intentional configuration, just logs the observation and no-op. + if expectedVSphereMachineCount == 0 { + log.Info("Found 0 desired VSphereMachine of MachineDeployment, stop reconcile") + return reconcile.Result{}, nil } // Wait for all intended VSphereMachines corresponding to MachineDeployment to exist only during initial Cluster creation. - current := int32(len(current)) - if current < expected { - log.Info("current VSphereMachines do not match expected", "Expected:", expected, - "Current:", current, "ClusterName", cluster.Name, "Namespace", cluster.Namespace) - return reconcile.Result{RequeueAfter: reconciliationDelay}, nil + // For day-2, VirtualMachineGroup exists and should not run into here wait for VSphereMachines. + currentVSphereMachineCount := int32(len(currentVSphereMachines)) + if currentVSphereMachineCount != expectedVSphereMachineCount { + log.Info("Waiting for expected VSphereMachines required for the initial placement call", "Expected:", expectedVSphereMachineCount, + "Current:", currentVSphereMachineCount, "Cluster", klog.KObj(cluster)) + return reconcile.Result{}, nil } - desiredVMG = &vmoprv1.VirtualMachineGroup{ + vmg = &vmoprv1.VirtualMachineGroup{ ObjectMeta: metav1.ObjectMeta{ Name: key.Name, Namespace: key.Namespace, @@ -162,8 +165,8 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx co } // Generate VM names according to the naming strategy set on the VSphereMachine. - vmNames := make([]string, 0, len(current)) - for _, machine := range current { + vmNames := make([]string, 0, len(currentVSphereMachines)) + for _, machine := range currentVSphereMachines { name, err := GenerateVirtualMachineName(machine.Name, machine.Spec.NamingStrategy) if err != nil { return reconcile.Result{}, err @@ -171,11 +174,9 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx co vmNames = append(vmNames, name) } // Sort the VM names alphabetically for consistent ordering - sort.Slice(vmNames, func(i, j int) bool { - return vmNames[i] < vmNames[j] - }) + slices.Sort(vmNames) - members := make([]vmoprv1.GroupMember, 0, len(current)) + members := make([]vmoprv1.GroupMember, 0, len(currentVSphereMachines)) for _, name := range vmNames { members = append(members, vmoprv1.GroupMember{ Name: name, @@ -192,59 +193,197 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx co } mdNames := []string{} for _, md := range machineDeployments.Items { - mdNames = append(mdNames, md.Name) + // Skip MachineDeployment marked for removal. + if !md.DeletionTimestamp.IsZero() { + mdNames = append(mdNames, md.Name) + } } // Use CreateOrPatch to create or update the VirtualMachineGroup. - _, err = controllerutil.CreateOrPatch(ctx, r.Client, desiredVMG, func() error { - return r.reconcileVirtualMachineState(ctx, desiredVMG, cluster, members, mdNames) + _, err = controllerutil.CreateOrPatch(ctx, r.Client, vmg, func() error { + return r.reconcileVirtualMachineGroup(ctx, vmg, cluster, members, mdNames) }) return reconcile.Result{}, err } -// reconcileVirtualMachineState mutates the desiredVMG object to reflect the necessary spec and metadata changes. -func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineState(ctx context.Context, desiredVMG *vmoprv1.VirtualMachineGroup, cluster *clusterv1.Cluster, members []vmoprv1.GroupMember, mdNames []string) error { +// reconcileVirtualMachineGroup mutates the VirtualMachineGroup object to reflect the necessary spec and metadata changes. +func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, cluster *clusterv1.Cluster, members []vmoprv1.GroupMember, mdNames []string) error { // Set the desired labels - if desiredVMG.Labels == nil { - desiredVMG.Labels = make(map[string]string) - desiredVMG.Labels[clusterv1.ClusterNameLabel] = cluster.Name + if vmg.Labels == nil { + vmg.Labels = make(map[string]string) } + // Always ensure cluster name label is set + vmg.Labels[clusterv1.ClusterNameLabel] = cluster.Name - if desiredVMG.Annotations == nil { - desiredVMG.Annotations = make(map[string]string) + if vmg.Annotations == nil { + vmg.Annotations = make(map[string]string) } // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done. // Do not update per-md-zone label once set, as placement decision should not change without user explicitly // set failureDomain. - placementAnnotations, err := GenerateVirtualMachineGroupAnnotations(ctx, r.Client, desiredVMG, mdNames) - if err != nil { + if err := generateVirtualMachineGroupAnnotations(ctx, r.Client, vmg, mdNames); err != nil { return err } - if len(placementAnnotations) > 0 { - for k, v := range placementAnnotations { - if _, exists := desiredVMG.Annotations[k]; !exists { - desiredVMG.Annotations[k] = v - } - } + + // Member Update: + // The VirtualMachineGroup's BootOrder.Members list, is only allowed to be set or added + // during two phases to maintain control over VM placement: + // + // 1. Initial Creation: When the VirtualMachineGroup object does not yet exist. + // 2. Post-Placement: After the VirtualMachineGroup exists AND is marked Ready which means all members are placed successfully, + // and critically, all MachineDeployments have a corresponding zone placement annotation recorded on the VMG. + // + // For member removal, this is always allowed since it doesn't impact ongoing placement or rely on the placement annotation. + // + // This prevents member updates that could lead to new VMs being created + // without necessary zone labels, resulting in undesired placement, such as VM within a MachineDeployment but are + // placed to different Zones. + + isMemberUpdateAllowed, err := isMemberUpdateAllowed(ctx, r.Client, members, vmg) + if err != nil { + return err } - // Set the BootOrder spec as the - desiredVMG.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ - { - Members: members, - }, + if isMemberUpdateAllowed { + vmg.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: members, + }, + } } // Set the owner reference - if err := controllerutil.SetControllerReference(cluster, desiredVMG, r.Client.Scheme()); err != nil { - return errors.Wrapf(err, "failed to mark %s as owner of %s", klog.KObj(cluster), klog.KObj(desiredVMG)) + if err := controllerutil.SetControllerReference(cluster, vmg, r.Client.Scheme()); err != nil { + return errors.Wrapf(err, "failed to mark Cluster %s as owner of VirtualMachineGroup %s", klog.KObj(cluster), klog.KObj(vmg)) } return nil } +// isMemberUpdateAllowed determines if the BootOrder.Members field can be safely updated on the VirtualMachineGroup. +// It allows updates only during initial creation or after all member placement are completed successfully. +func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, targetMember []vmoprv1.GroupMember, vmg *vmoprv1.VirtualMachineGroup) (bool, error) { + logger := log.FromContext(ctx) + key := client.ObjectKey{ + Namespace: vmg.Namespace, + Name: vmg.Name, + } + + // Retrieve the current VirtualMachineGroup state + currentVMG := &vmoprv1.VirtualMachineGroup{} + if err := kubeClient.Get(ctx, key, currentVMG); err != nil { + if apierrors.IsNotFound(err) { + // If VirtualMachineGroup is not found, allow member update as it should be in initial creation phase. + logger.V(5).Info("VirtualMachineGroup not found, allowing member update for initial creation.") + return true, nil + } + return false, errors.Wrapf(err, "failed to get VirtualMachineGroup %s/%s", vmg.Namespace, vmg.Name) + } + // Copy retrieved data back to the input pointer for consistency + *vmg = *currentVMG + + // Get current member names from VirtualMachineGroup Spec.BootOrder + currentMemberNames := make(map[string]struct{}) + if len(vmg.Spec.BootOrder) > 0 { + for _, m := range vmg.Spec.BootOrder[0].Members { + currentMemberNames[m.Name] = struct{}{} + } + } + + // 1. If removing members, allow immediately since it doesn't impact placement or placement annotation set. + if len(targetMember) < len(currentMemberNames) { + logger.V(5).Info("Scaling down detected (fewer target members), allowing member update.") + return true, nil + } + + // 2. If adding members, continue following checks. + var newMembers []vmoprv1.GroupMember + for _, m := range targetMember { + if _, exists := currentMemberNames[m.Name]; !exists { + newMembers = append(newMembers, m) + } + } + + // 3. Check newly added members for Machine.Spec.FailureDomain via VSphereMachine.If a member belongs to a Machine + // which has failureDomain specified, allow it since it will skip the placement + // process. If not, continue to check if the belonging MachineDeployment has got placement annotation. + for _, newMember := range newMembers { + vsphereMachineKey := types.NamespacedName{ + Namespace: vmg.Namespace, + Name: newMember.Name, // Member Name is the VSphereMachine Name. + } + vsphereMachine := &vmwarev1.VSphereMachine{} + if err := kubeClient.Get(ctx, vsphereMachineKey, vsphereMachine); err != nil { + if apierrors.IsNotFound(err) { + logger.V(5).Info("VSphereMachine for new member not found, temporarily blocking update.", "VSphereMachineName", newMember.Name) + return false, nil + } + return false, errors.Wrapf(err, "failed to get VSphereMachine %s", klog.KRef(newMember.Name, vmg.Namespace)) + } + + var machineOwnerName string + for _, owner := range vsphereMachine.OwnerReferences { + if owner.Kind == "Machine" { + machineOwnerName = owner.Name + break + } + } + + if machineOwnerName == "" { + // VSphereMachine found but owner Machine reference is missing + logger.V(5).Info("VSphereMachine found but owner Machine reference is missing, temporarily blocking update.", "VSphereMachineName", newMember.Name) + return false, nil + } + + machineKey := types.NamespacedName{ + Namespace: vmg.Namespace, + Name: machineOwnerName, + } + machine := &clusterv1.Machine{} + + if err := kubeClient.Get(ctx, machineKey, machine); err != nil { + if apierrors.IsNotFound(err) { + logger.V(5).Info("CAPI Machine not found via owner reference, temporarily blocking update.", "Machine", klog.KRef(machineOwnerName, vmg.Namespace)) + return false, nil + } + return false, errors.Wrapf(err, "failed to get CAPI Machine %s", klog.KRef(machineOwnerName, vmg.Namespace)) + } + + // If FailureDomain is set on CAPI Machine, placement process will be skipped. Allow update. + fd := machine.Spec.FailureDomain + if fd != "" { + logger.V(5).Info("New member's Machine has FailureDomain specified. Allowing VMG update for this member.") + continue + } + + // If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to full VMG Annotation check. + logger.V(5).Info("New member's CAPI Machine lacks FailureDomain. Falling through to full VMG Ready and Annotation check.", "MachineName", machineOwnerName) + + // If no Placement Annotations, skip member update and wait for it. + annotations := vmg.GetAnnotations() + if len(annotations) == 0 { + return false, nil + } + + mdLabelName := vsphereMachine.Labels[clusterv1.MachineDeploymentNameLabel] + + annotationKey := fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdLabelName) + + if _, found := annotations[annotationKey]; !found { + logger.V(5).Info("Required placement annotation is missing.", + "Member", newMember, "Annotation", annotationKey) + return false, nil + } + + logger.V(5).Info("New member requires placement annotation and it is present. Allowing this member.", "Member", newMember) + } + + logger.V(5).Info("Either no new members, or all newly added members existed or have satisfied placement requirements, allowing update.") + return true, nil +} + // getExpectedVSphereMachineCount get expected total count of Machines belonging to the Cluster. func getExpectedVSphereMachineCount(ctx context.Context, kubeClient client.Client, cluster *clusterv1.Cluster) (int32, error) { var mdList clusterv1.MachineDeploymentList @@ -259,7 +398,8 @@ func getExpectedVSphereMachineCount(ctx context.Context, kubeClient client.Clien var total int32 for _, md := range mdList.Items { - if md.Spec.Replicas != nil { + // Skip MachineDeployment marked for removal + if md.DeletionTimestamp.IsZero() && md.Spec.Replicas != nil { total += *md.Spec.Replicas } } @@ -270,8 +410,6 @@ func getExpectedVSphereMachineCount(ctx context.Context, kubeClient client.Clien // getCurrentVSphereMachines returns the list of VSphereMachines belonging to the Cluster’s MachineDeployments. // VSphereMachines marked for removal are excluded from the result. func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) { - log := ctrl.LoggerFrom(ctx) - // List VSphereMachine objects var vsMachineList vmwarev1.VSphereMachineList if err := kubeClient.List(ctx, &vsMachineList, @@ -279,7 +417,7 @@ func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, cl client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, client.HasLabels{clusterv1.MachineDeploymentNameLabel}, ); err != nil { - return nil, errors.Wrapf(err, "failed to list VSphereMachines in namespace %s", clusterNamespace) + return nil, errors.Wrapf(err, "failed to list VSphereMachines of Cluster %s", klog.KRef(clusterNamespace, clusterName)) } var result []vmwarev1.VSphereMachine @@ -288,39 +426,58 @@ func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, cl result = append(result, vs) } } - log.V(4).Info("Final list of VSphereMachines for VMG member generation", "count", len(result)) - return result, nil } -// GenerateVirtualMachineGroupAnnotations checks the VMG status for placed members, verifies their ownership +// generateVirtualMachineGroupAnnotations checks the VMG status for placed members, verifies their ownership // by fetching the corresponding VSphereMachine, and extracts the zone information to persist it -// as an annotation on the VMG object for Day-2 operations. +// as an annotation on the VMG object for Day-2 operations. It will also clean up +// any existing placement annotations that correspond to MachineDeployments that no longer exist. // // The function attempts to find at least one successfully placed VM (VirtualMachineGroupMemberConditionPlacementReady==True) // for each MachineDeployment and records its zone. Once a zone is recorded for an MD, subsequent VMs // belonging to that same MD are skipped. -func GenerateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient client.Client, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) (map[string]string, error) { +func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient client.Client, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) error { log := ctrl.LoggerFrom(ctx) - log.V(4).Info(fmt.Sprintf("Generating annotations for VirtualMachineGroup %s/%s", vmg.Name, vmg.Namespace)) + log.V(5).Info(fmt.Sprintf("Generating annotations for VirtualMachineGroup %s/%s", vmg.Name, vmg.Namespace)) + if vmg.Annotations == nil { + vmg.Annotations = make(map[string]string) + } annotations := vmg.Annotations - if annotations == nil { - annotations = make(map[string]string) + + // If a MachineDeployment has been deleted, its corresponding placement annotation + // on the VirtualMachineGroup should also be removed to avoid configuration drift. + activeMDs := make(map[string]bool) + for _, md := range machineDeployments { + activeMDs[md] = true } + // Iterate over existing VirtualMachineGroup annotations and delete those that are stale. + for key := range annotations { + if !strings.HasPrefix(key, ZoneAnnotationPrefix+"/") { + // Skip non-placement annotations + continue + } + + mdName := strings.TrimPrefix(key, ZoneAnnotationPrefix+"/") + + // If the MD name is NOT in the list of currently active MDs, delete the annotation. + if found := activeMDs[mdName]; !found { + log.Info(fmt.Sprintf("Cleaning up stale placement annotation for none-existed MachineDeployment %s", mdName)) + delete(annotations, key) + } + } + + // Pre-computation: Convert the list of valid MachineDeployment names into a set. + mdNames := sets.New(machineDeployments...) + // Iterate through the VMG's members in Status. for _, member := range vmg.Status.Members { ns := vmg.Namespace - // Only VirtualMachines contribute to placement decisions. - if member.Kind != "VirtualMachine" { - log.Info(fmt.Sprintf("Member %s of %s/%s is not VirtualMachine type, skipping it", member.Name, vmg.Name, vmg.Namespace)) - continue - } // Skip it if member's VirtualMachineGroupMemberConditionPlacementReady is still not true. if !conditions.IsTrue(&member, vmoprv1.VirtualMachineGroupMemberConditionPlacementReady) { - log.Info(fmt.Sprintf("Member %s of %s/%s is not PlacementReady, skipping it", member.Name, vmg.Name, vmg.Namespace)) continue } @@ -335,51 +492,46 @@ func GenerateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie log.Info(fmt.Sprintf("VSphereMachine %s/%s by member Name %s is not found, skipping it", member.Name, ns, member.Name)) continue } - log.Error(err, "failed to get VSphereMachine %s/%s", member.Name, ns) - return nil, err + return errors.Wrapf(err, "failed to get VSphereMachine %s/%s", member.Name, ns) } - mdNameFromLabel, found := vsm.Labels[clusterv1.MachineDeploymentNameLabel] + mdName, found := vsm.Labels[clusterv1.MachineDeploymentNameLabel] if !found { log.Info(fmt.Sprintf("Failed to get MachineDeployment label from VSphereMachine %s/%s, skipping it", member.Name, ns)) continue } // If we already found placement for this MachineDeployment, continue and move to next member. - if v, found := annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameFromLabel)]; found { - log.V(4).Info(fmt.Sprintf("Skipping MachineDeployment %s/%s, placement annotation %s already found", mdNameFromLabel, vsm.Namespace, v)) + if _, found := annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName)]; found { continue } // Check if this VM belongs to any of our target MachineDeployments. - // Annotation format is "zone.cluster.x-k8s.io/{machine-deployment-name}". - for _, md := range machineDeployments { - if mdNameFromLabel != md { - continue - } - - // Get the VM placement information by member status. - // VMs that have undergone placement do not have Placement info set, skip. - if member.Placement == nil { - log.V(4).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Placement is nil", member.Name, vmg.Name, ns)) - continue - } + if !mdNames.Has(mdName) { + log.V(5).Info("Skipping member as its MachineDeployment name is not in the known list.", + "VMName", member.Name, "MDName", mdName) + continue + } - // Skip to next member if Zone is empty. - zone := member.Placement.Zone - if zone == "" { - log.V(4).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Zone is empty", member.Name, "VMG", ns)) - continue - } + // Get the VM placement information by member status. + // VMs that have undergone placement do not have Placement info set, skip. + if member.Placement == nil { + log.V(5).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Placement is nil", member.Name, vmg.Name, ns)) + continue + } - log.Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, ns, vmg.Name, zone)) - annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, md)] = zone - // Break from the inner loop as placement for this MachineDeployment is found. - break + // Skip to next member if Zone is empty. + zone := member.Placement.Zone + if zone == "" { + log.V(5).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Zone is empty", member.Name, "VMG", ns)) + continue } + + log.V(5).Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, ns, vmg.Name, zone)) + annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName)] = zone } - return annotations, nil + return nil } // GenerateVirtualMachineName generates the name of a VirtualMachine based on the naming strategy. @@ -393,7 +545,7 @@ func GenerateVirtualMachineName(machineName string, namingStrategy *vmwarev1.Vir name, err := infrautilv1.GenerateMachineNameFromTemplate(machineName, namingStrategy.Template) if err != nil { - return "", errors.Wrap(err, "failed to generate name for VirtualMachine") + return "", errors.Wrapf(err, "failed to generate name for VirtualMachine %s", machineName) } return name, nil diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index 52c25501df..d8bed93b50 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -46,10 +46,332 @@ const ( clusterNamespace = "test-ns" mdName1 = "md-worker-a" mdName2 = "md-worker-b" + mdNameStale = "md-stale-c" zoneA = "zone-a" zoneB = "zone-b" + vmgName = "test-vmg" + vmgNamespace = "test-vmg-ns" + memberName1 = "vm-01" + memberName2 = "vm-02" + memberKind = "VirtualMachine" + failureDomainA = "zone-1" ) +func TestIsMemberUpdateAllowed(t *testing.T) { + ctx := context.Background() + + baseVMG := &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{Name: vmgName, Namespace: vmgNamespace}, + Status: vmoprv1.VirtualMachineGroupStatus{}, + Spec: vmoprv1.VirtualMachineGroupSpec{}, + } + + member := func(name string) vmoprv1.GroupMember { return vmoprv1.GroupMember{Name: name} } + + // CAPI Machine helpers + makeCAPIMachine := func(name, namespace string, fd *string) *clusterv1.Machine { + m := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + } + if fd != nil { + m.Spec = clusterv1.MachineSpec{FailureDomain: *fd} + } + return m + } + makeUnplacedCAPIMachine := func(name, namespace string) *clusterv1.Machine { + return makeCAPIMachine(name, namespace, nil) + } + + // VSphereMachine helpers + makeVSphereMachineOwned := func(vmName, vmgNamespace, ownerMachineName, mdName string) *vmwarev1.VSphereMachine { + return &vmwarev1.VSphereMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: vmName, + Namespace: vmgNamespace, + OwnerReferences: []metav1.OwnerReference{ + { + Kind: "Machine", + Name: ownerMachineName, + UID: types.UID(ownerMachineName + "-uid"), + }, + }, + Labels: map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName): "zone-1"}, + }, + } + } + makeVSphereMachineNoOwner := func(vmName, ns string) *vmwarev1.VSphereMachine { + return &vmwarev1.VSphereMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: vmName, + Namespace: ns, + OwnerReferences: []metav1.OwnerReference{}, + }, + } + } + + tests := []struct { + name string + targetMember []vmoprv1.GroupMember + vmgInput *vmoprv1.VirtualMachineGroup + mdNames []string + existingObjects []runtime.Object + wantAllowed bool + wantErr bool + }{ + { + name: "Allow member update if VirtualMachineGroup not existed", + targetMember: []vmoprv1.GroupMember{member(memberName1)}, + vmgInput: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{Name: vmgName, Namespace: vmgNamespace}, + }, + mdNames: []string{mdName1}, + existingObjects: nil, + wantAllowed: true, + wantErr: false, + }, + { + name: "Allow member update if it is removing", + targetMember: []vmoprv1.GroupMember{}, + vmgInput: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + {Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }}}} + return v + }(), + mdNames: []string{mdName1}, + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + {Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }}}} + return []runtime.Object{v} + }(), + wantAllowed: true, + wantErr: false, + }, + { + name: "Allow member update when VMG Ready and All Annotations Present", + targetMember: []vmoprv1.GroupMember{member(memberName1)}, + vmgInput: baseVMG.DeepCopy(), + mdNames: []string{mdName1}, + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + conditions.Set(v, metav1.Condition{ + Type: vmoprv1.ReadyConditionType, + Status: metav1.ConditionTrue}) + v.Annotations = map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA} + + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + + return []runtime.Object{v} + }(), + wantAllowed: true, + wantErr: false, + }, + { + name: "Skip member update if new member VSphereMachine Not Found", + targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new + vmgInput: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + return v + }(), + mdNames: []string{mdName1}, + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + // vm-02 VSphereMachine is missing + return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, ptr.To(failureDomainA))} + }(), + wantAllowed: false, + wantErr: false, + }, + { + name: "Skip member update if VSphereMachine found but CAPI Machine missing", + targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new + vmgInput: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + return v + }(), + mdNames: []string{mdName1}, + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + // vm-02 VSphereMachine exists but has no owner ref + return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, ptr.To(failureDomainA)), makeVSphereMachineNoOwner(memberName2, vmgNamespace)} + }(), + wantAllowed: false, + wantErr: false, + }, + { + name: "Allow member update if all new members have Machine FailureDomain specified", + targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new + vmgInput: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + return v + }(), + mdNames: []string{mdName1}, + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + // m-02 (owner of vm-02) has FailureDomain set -> Allowed + return []runtime.Object{ + v, + makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, nil), + makeVSphereMachineOwned(memberName2, vmgNamespace, "m-02", mdName2), makeCAPIMachine("m-02", vmgNamespace, ptr.To(failureDomainA)), + } + }(), + wantAllowed: true, // Allowed because new members don't require VMO placement + wantErr: false, + }, + { + name: "Allow member update if no new member", + targetMember: []vmoprv1.GroupMember{member(memberName1)}, // No new members + vmgInput: baseVMG.DeepCopy(), + mdNames: []string{mdName1}, // Expects mdName1 annotation + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + // Annotation for mdName1 is missing + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + return []runtime.Object{v} + }(), + wantAllowed: true, + wantErr: false, + }, + { + name: "Skip member update if new member Machine requires placement annotation", + targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new and requires placement + vmgInput: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + return v + }(), + mdNames: []string{mdName1}, + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + // m-02 lacks FailureDomain and new Member requires placement annotation + return []runtime.Object{ + v, + makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, ptr.To(failureDomainA)), + makeVSphereMachineOwned(memberName2, vmgNamespace, "m-02", mdName2), makeUnplacedCAPIMachine("m-02", vmgNamespace), + } + }(), + wantAllowed: false, + wantErr: false, + }, + { + name: "Allow new member Machine since required placement annotation exists", + targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new and requires placement + vmgInput: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + return v + }(), + mdNames: []string{mdName1}, + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + v.Annotations = map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA} + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + return []runtime.Object{ + v, + makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, ptr.To(failureDomainA)), + makeVSphereMachineOwned(memberName2, vmgNamespace, "m-02", mdName2), makeUnplacedCAPIMachine("m-02", vmgNamespace), + } + }(), + wantAllowed: true, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + kubeClient := fake.NewClientBuilder().WithRuntimeObjects(tt.existingObjects...).Build() + + vmgInput := tt.vmgInput.DeepCopy() + + gotAllowed, err := isMemberUpdateAllowed(ctx, kubeClient, tt.targetMember, vmgInput) + + if (err != nil) != tt.wantErr { + t.Fatalf("isMemberUpdateAllowed() error = %v, wantErr %v", err, tt.wantErr) + } + + if gotAllowed != tt.wantAllowed { + t.Errorf("isMemberUpdateAllowed() gotAllowed = %t, wantAllowed %t", gotAllowed, tt.wantAllowed) + } + }) + } +} + func TestGetExpectedVSphereMachineCount(t *testing.T) { g := NewWithT(t) ctx := context.Background() @@ -244,44 +566,44 @@ func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { wantErr: false, }, { - name: "Skip if Member Kind is not VirtualMachine", + name: "Skip if VSphereMachine Missing MachineDeployment Label", vmg: func() *vmoprv1.VirtualMachineGroup { v := baseVMG.DeepCopy() v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus("VMG-1", "VirtualMachineGroup", true, true, "zone-x"), + newVMGMemberStatus("vsm-nolabel", "VirtualMachine", true, true, zoneA), }, } return v }(), - machineDeployments: []string{}, - initialClientObjects: []client.Object{}, + machineDeployments: []string{mdName1}, + initialClientObjects: []client.Object{vsmMissingLabel}, expectedAnnotations: map[string]string{}, wantErr: false, }, { - name: "Skip if VSphereMachine Missing MachineDeployment Label", + name: "Skip if VSphereMachine is Not Found in API", vmg: func() *vmoprv1.VirtualMachineGroup { v := baseVMG.DeepCopy() v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus("vsm-nolabel", "VirtualMachine", true, true, zoneA), + newVMGMemberStatus("non-existent-vm", "VirtualMachine", true, true, zoneA), }, } return v }(), machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsmMissingLabel}, + initialClientObjects: []client.Object{vsm1}, expectedAnnotations: map[string]string{}, wantErr: false, }, { - name: "Skip if VSphereMachine is Not Found in API", + name: "Skip if placement is nil", vmg: func() *vmoprv1.VirtualMachineGroup { v := baseVMG.DeepCopy() v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus("non-existent-vm", "VirtualMachine", true, true, zoneA), + newVMGMemberStatus(vsmName1, "VirtualMachine", true, false, zoneA), }, } return v @@ -292,12 +614,12 @@ func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { wantErr: false, }, { - name: "Skip if placement is nil", + name: "Skip if Zone is empty string", vmg: func() *vmoprv1.VirtualMachineGroup { v := baseVMG.DeepCopy() v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, false, zoneA), + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, ""), }, } return v @@ -308,20 +630,48 @@ func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { wantErr: false, }, { - name: "Skip if Zone is empty string", + name: "Deletes stale annotation for none-existed MD", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + // This MD (mdNameStale) is NOT in the machineDeployments list below. + v.SetAnnotations(map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameStale): zoneA, + "other/annotation": "keep-me", + }) + v.Status = vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{}, + } + return v + }(), + machineDeployments: []string{mdName1}, + initialClientObjects: []client.Object{}, + expectedAnnotations: map[string]string{ + "other/annotation": "keep-me", + }, + wantErr: false, + }, + { + name: "Cleans stale and adds new annotations", vmg: func() *vmoprv1.VirtualMachineGroup { v := baseVMG.DeepCopy() + // Stale annotation to be deleted + v.SetAnnotations(map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameStale): zoneB, + }) v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, ""), + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), }, } return v }(), machineDeployments: []string{mdName1}, initialClientObjects: []client.Object{vsm1}, - expectedAnnotations: map[string]string{}, - wantErr: false, + expectedAnnotations: map[string]string{ + // Stale annotation for mdNameStale should be gone + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + }, + wantErr: false, }, } @@ -331,12 +681,12 @@ func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialClientObjects...).Build() - annotations, err := GenerateVirtualMachineGroupAnnotations(ctx, fakeClient, tt.vmg, tt.machineDeployments) + err := generateVirtualMachineGroupAnnotations(ctx, fakeClient, tt.vmg, tt.machineDeployments) if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) - g.Expect(annotations).To(Equal(tt.expectedAnnotations)) + g.Expect(tt.vmg.Annotations).To(Equal(tt.expectedAnnotations)) } }) } @@ -386,7 +736,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { initialObjects: []client.Object{ newCluster(clusterName, clusterNamespace, false, 1, 0), }, - expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + expectedResult: reconcile.Result{}, checkVMGExists: false, }, { @@ -395,11 +745,11 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { cluster.DeepCopy(), md1.DeepCopy(), }, - expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay}, + expectedResult: reconcile.Result{}, checkVMGExists: false, }, { - name: "Should Succeed if VMG is created", + name: "Should Succeed to create VMG", initialObjects: []client.Object{ cluster.DeepCopy(), md1.DeepCopy(), @@ -415,11 +765,25 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { md1.DeepCopy(), vsm1.DeepCopy(), &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace}, + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + Labels: map[string]string{clusterv1.ClusterNameLabel: cluster.Name}, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + { + Name: vsm1.Name, + Kind: "VSphereMachine", + }, + }, + }, + }, + }, }, }, - expectedResult: reconcile.Result{}, - checkVMGExists: true, }, } diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 4c7ad8234c..98b1a2e308 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -31,6 +31,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" apitypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/klog/v2" "k8s.io/utils/ptr" clusterv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" @@ -48,6 +49,11 @@ import ( infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" ) +const ( + // ZoneAnnotationPrefix is the prefix used for placement decision annotations which will be set on VirtualMachineGroup. + ZoneAnnotationPrefix = "zone.vmware.infrastructure.cluster.x-k8s.io" +) + // VmopMachineService reconciles VM Operator VM. type VmopMachineService struct { Client client.Client @@ -165,11 +171,11 @@ func (v *VmopMachineService) SyncFailureReason(_ context.Context, machineCtx cap return supervisorMachineCtx.VSphereMachine.Status.FailureReason != nil || supervisorMachineCtx.VSphereMachine.Status.FailureMessage != nil, nil } -// affinityInfo is an internal to store VM affinity information. +// affinityInfo is an internal struct used to store information about VM affinity. type affinityInfo struct { - affinitySpec *vmoprv1.AffinitySpec + affinitySpec vmoprv1.AffinitySpec vmGroupName string - failureDomain *string + failureDomain string } // ReconcileNormal reconciles create and update events for VM Operator VMs. @@ -193,7 +199,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap // Set the VM state. Will get reset throughout the reconcile supervisorMachineCtx.VSphereMachine.Status.VMStatus = vmwarev1.VirtualMachineStatePending - var affInfo affinityInfo + var affInfo *affinityInfo if feature.Gates.Enabled(feature.NodeAutoPlacement) && !infrautilv1.IsControlPlaneMachine(machineCtx.GetVSphereMachine()) { vmOperatorVMGroup := &vmoprv1.VirtualMachineGroup{} @@ -206,16 +212,20 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap if !apierrors.IsNotFound(err) { return false, err } - if apierrors.IsNotFound(err) { - log.V(4).Info("VirtualMachineGroup not found, requeueing", "Name", key.Name, "Namespace", key.Namespace) - return true, nil - } + + v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ + Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, + }) + log.V(4).Info(fmt.Sprintf("Waiting for VirtualMachineGroup %s, requeueing", key.Name), "VirtualMachineGroup", klog.KRef(key.Namespace, key.Name)) + return true, nil } // Proceed only if the machine is a member of the VirtualMachineGroup. isMember, err := v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) if err != nil { - return true, errors.Wrapf(err, "%s", fmt.Sprintf("failed to check if VirtualMachine %s is a member of VirtualMachineGroup %s/%s", supervisorMachineCtx.VSphereMachine.Name, vmOperatorVMGroup.Name, vmOperatorVMGroup.Namespace)) + return true, errors.Wrapf(err, "%s", fmt.Sprintf("failed to check if VirtualMachine %s is a member of VirtualMachineGroup %s", supervisorMachineCtx.VSphereMachine.Name, klog.KObj(vmOperatorVMGroup))) } if !isMember { v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ @@ -223,19 +233,19 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap Status: metav1.ConditionFalse, Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, }) - log.V(4).Info("Waiting for VirtualMachineGroup membership, requeueing", "VM Name", supervisorMachineCtx.Machine.Name) + log.V(4).Info(fmt.Sprintf("Waiting for VirtualMachineGroup %s membership, requeueing", key.Name), "VirtualMachineGroup", klog.KRef(key.Namespace, key.Name)) return true, nil } - affInfo = affinityInfo{ + affInfo = &affinityInfo{ vmGroupName: vmOperatorVMGroup.Name, } - // Set the zone label using the annotation of the per-md zone mapping from VMG. - // This is for new VMs created during day-2 operations in VC 9.1. - nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] - if zone, ok := vmOperatorVMGroup.Annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", nodePool)]; ok && zone != "" { - affInfo.failureDomain = ptr.To(zone) + // Set the zone label using the annotation of the per-md zone mapping from VirtualMachineGroup. + // This is for new VMs created during day-2 operations when Node Auto Placement is enabled. + mdName := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] + if fd, ok := vmOperatorVMGroup.Annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName)]; ok && fd != "" { + affInfo.failureDomain = fd } // Fetch machine deployments without explicit failureDomain specified @@ -246,21 +256,21 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap client.MatchingLabels{clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name}); err != nil { return false, err } - mdNames := []string{} + othermMDNames := []string{} for _, machineDeployment := range machineDeployments.Items { - if machineDeployment.Spec.Template.Spec.FailureDomain == "" && machineDeployment.Name != nodePool { - mdNames = append(mdNames, machineDeployment.Name) + if machineDeployment.Spec.Template.Spec.FailureDomain == "" && machineDeployment.Name != mdName { + othermMDNames = append(othermMDNames, machineDeployment.Name) } } - sort.Strings(mdNames) + sort.Strings(othermMDNames) - affInfo.affinitySpec = &vmoprv1.AffinitySpec{ + affInfo.affinitySpec = vmoprv1.AffinitySpec{ VMAffinity: &vmoprv1.VMAffinitySpec{ RequiredDuringSchedulingPreferredDuringExecution: []vmoprv1.VMAffinityTerm{ { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - clusterv1.MachineDeploymentNameLabel: nodePool, + clusterv1.MachineDeploymentNameLabel: mdName, }, }, TopologyKey: corev1.LabelTopologyZone, @@ -272,7 +282,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - clusterv1.MachineDeploymentNameLabel: nodePool, + clusterv1.MachineDeploymentNameLabel: mdName, }, }, TopologyKey: corev1.LabelHostname, @@ -280,7 +290,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap }, }, } - if len(mdNames) > 0 { + if len(othermMDNames) > 0 { affInfo.affinitySpec.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution = append( affInfo.affinitySpec.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution, vmoprv1.VMAffinityTerm{ @@ -289,7 +299,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap { Key: clusterv1.MachineDeploymentNameLabel, Operator: metav1.LabelSelectorOpIn, - Values: mdNames, + Values: othermMDNames, }, }, }, @@ -323,7 +333,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } // Reconcile the VM Operator VirtualMachine. - if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM, &affInfo); err != nil { + if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM, affInfo); err != nil { v1beta1conditions.MarkFalse(supervisorMachineCtx.VSphereMachine, infrav1.VMProvisionedCondition, vmwarev1.VMCreationFailedReason, clusterv1beta1.ConditionSeverityWarning, "failed to create or update VirtualMachine: %v", err) v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ @@ -609,9 +619,17 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis vmOperatorVM = typedModified } - if affinityInfo != nil && affinityInfo.affinitySpec != nil { + // Set VM Affinity rules and GroupName. + // The Affinity rules set in Spec.Affinity primarily take effect only during the + // initial placement. + // These rules DO NOT impact new VMs created after initial placement, such as scaling up, + // because placement relies on information derived from + // VirtualMachineGroup annotations. This ensures all the VMs + // for a MachineDeployment are placed in the same failureDomain. + // Note: no matter of the different placement behaviour, we are setting affinity rules on all machines for consistency. + if affinityInfo != nil { if vmOperatorVM.Spec.Affinity == nil { - vmOperatorVM.Spec.Affinity = affinityInfo.affinitySpec + vmOperatorVM.Spec.Affinity = &affinityInfo.affinitySpec } if vmOperatorVM.Spec.GroupName == "" { vmOperatorVM.Spec.GroupName = affinityInfo.vmGroupName @@ -855,8 +873,10 @@ func (v *VmopMachineService) addVolumes(ctx context.Context, supervisorMachineCt // which is required when the cluster has multiple (3) zones. // Single zone clusters (legacy/default) do not support zonal storage and must not // have the zone annotation set. + // // However, with Node Auto Placement enabled, failureDomain is optional and CAPV no longer - // sets PVC annotations. PVC placement now follows the StorageClass behavior (Immediate or WaitForFirstConsumer).Í + // sets PVC annotations when creating worker VMs. PVC placement now follows the StorageClass behavior (Immediate or WaitForFirstConsumer). + // Control Plane VMs will still have failureDomain set, and we will set PVC annotation. zonal := len(supervisorMachineCtx.VSphereCluster.Status.FailureDomains) > 1 if zone := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; zonal && zone != nil { @@ -915,9 +935,11 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels vmLabels[k] = v } - // Get the labels that determine the VM's placement - var failureDomain *string - if affinityInfo != nil && affinityInfo.failureDomain != nil { + // Set the labels that determine the VM's placement. + // Note: if the failureDomain is not set, auto placement will happen according to affinity rules on VM during initial Cluster creation. + // For VM created during day-2 operation like scaling up, we should expect the failureDomain to be always set. + var failureDomain string + if affinityInfo != nil && affinityInfo.failureDomain != "" { failureDomain = affinityInfo.failureDomain } topologyLabels := getTopologyLabels(supervisorMachineCtx, failureDomain) @@ -943,7 +965,7 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels // // and thus the code is optimized as such. However, in the future // this function may return a more diverse topology. -func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, failureDomain *string) map[string]string { +func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, failureDomain string) map[string]string { // This is for explicit placement. if fd := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; fd != nil && *fd != "" { return map[string]string{ @@ -951,9 +973,9 @@ func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, fa } } // This is for automatic placement. - if failureDomain != nil && *failureDomain != "" { + if failureDomain != "" { return map[string]string{ - corev1.LabelTopologyZone: *failureDomain, + corev1.LabelTopologyZone: failureDomain, } } return nil diff --git a/pkg/services/vmoperator/vmopmachine_test.go b/pkg/services/vmoperator/vmopmachine_test.go index aaa9e9b437..f2f80789ad 100644 --- a/pkg/services/vmoperator/vmopmachine_test.go +++ b/pkg/services/vmoperator/vmopmachine_test.go @@ -947,7 +947,7 @@ var _ = Describe("VirtualMachine tests", func() { Name: fdClusterName, Namespace: corev1.NamespaceDefault, Annotations: map[string]string{ - fmt.Sprintf("zone.cluster.x-k8s.io/%s", machineDeploymentName): failureDomainName, + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, machineDeploymentName): failureDomainName, }, }, Spec: vmoprv1.VirtualMachineGroupSpec{ From 9cc5762e4f692b77944d6e9f9c40a3a17ae6c851 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Sun, 23 Nov 2025 19:35:04 +0800 Subject: [PATCH 17/25] Fix UT failure Signed-off-by: Gong Zhang --- .../vmware/virtualmachinegroup_reconciler.go | 5 +- .../virtualmachinegroup_reconciler_test.go | 134 +++++++----------- 2 files changed, 52 insertions(+), 87 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index 3e02cc5f21..dbe43b6172 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -359,7 +359,7 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target } // If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to full VMG Annotation check. - logger.V(5).Info("New member's CAPI Machine lacks FailureDomain. Falling through to full VMG Ready and Annotation check.", "MachineName", machineOwnerName) + logger.V(5).Info("New member's CAPI Machine lacks FailureDomain. Falling through to VMG Annotation check.", "MachineName", machineOwnerName) // If no Placement Annotations, skip member update and wait for it. annotations := vmg.GetAnnotations() @@ -368,6 +368,9 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target } mdLabelName := vsphereMachine.Labels[clusterv1.MachineDeploymentNameLabel] + if mdLabelName == "" { + return false, errors.Wrapf(nil, "VSphereMachine doesn't have MachineDeployment name label %s", klog.KObj(vsphereMachine)) + } annotationKey := fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdLabelName) diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index d8bed93b50..da6f64fc54 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -41,20 +41,22 @@ import ( ) const ( - clusterName = "test-cluster" - otherClusterName = "other-cluster" - clusterNamespace = "test-ns" - mdName1 = "md-worker-a" - mdName2 = "md-worker-b" - mdNameStale = "md-stale-c" - zoneA = "zone-a" - zoneB = "zone-b" - vmgName = "test-vmg" - vmgNamespace = "test-vmg-ns" - memberName1 = "vm-01" - memberName2 = "vm-02" - memberKind = "VirtualMachine" - failureDomainA = "zone-1" + clusterName = "test-cluster" + otherClusterName = "other-cluster" + clusterNamespace = "test-ns" + mdName1 = "md-worker-a" + mdName2 = "md-worker-b" + mdNameStale = "md-stale-c" + zoneA = "zone-a" + zoneB = "zone-b" + vmgName = "test-vmg" + vmgNamespace = "test-vmg-ns" + memberName1 = "vm-01" + memberName2 = "vm-02" + ownerMachineName1 = "m-01" + ownerMachineName2 = "m-02" + memberKind = "VirtualMachine" + failureDomainA = "zone-a" ) func TestIsMemberUpdateAllowed(t *testing.T) { @@ -95,7 +97,7 @@ func TestIsMemberUpdateAllowed(t *testing.T) { UID: types.UID(ownerMachineName + "-uid"), }, }, - Labels: map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName): "zone-1"}, + Labels: map[string]string{clusterv1.MachineDeploymentNameLabel: mdName}, }, } } @@ -183,17 +185,8 @@ func TestIsMemberUpdateAllowed(t *testing.T) { { name: "Skip member update if new member VSphereMachine Not Found", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new - vmgInput: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - return v - }(), - mdNames: []string{mdName1}, + vmgInput: baseVMG.DeepCopy(), + mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ @@ -203,7 +196,7 @@ func TestIsMemberUpdateAllowed(t *testing.T) { }, }}} // vm-02 VSphereMachine is missing - return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, ptr.To(failureDomainA))} + return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, ownerMachineName1, mdName1), makeCAPIMachine(ownerMachineName1, vmgNamespace, ptr.To(failureDomainA))} }(), wantAllowed: false, wantErr: false, @@ -211,17 +204,8 @@ func TestIsMemberUpdateAllowed(t *testing.T) { { name: "Skip member update if VSphereMachine found but CAPI Machine missing", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new - vmgInput: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - return v - }(), - mdNames: []string{mdName1}, + vmgInput: baseVMG.DeepCopy(), + mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ @@ -231,7 +215,7 @@ func TestIsMemberUpdateAllowed(t *testing.T) { }, }}} // vm-02 VSphereMachine exists but has no owner ref - return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, ptr.To(failureDomainA)), makeVSphereMachineNoOwner(memberName2, vmgNamespace)} + return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, ptr.To(failureDomainA)), makeVSphereMachineNoOwner(memberName2, vmgNamespace)} }(), wantAllowed: false, wantErr: false, @@ -239,17 +223,8 @@ func TestIsMemberUpdateAllowed(t *testing.T) { { name: "Allow member update if all new members have Machine FailureDomain specified", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new - vmgInput: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - return v - }(), - mdNames: []string{mdName1}, + vmgInput: baseVMG.DeepCopy(), + mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ @@ -258,11 +233,11 @@ func TestIsMemberUpdateAllowed(t *testing.T) { Kind: memberKind, }, }}} - // m-02 (owner of vm-02) has FailureDomain set -> Allowed + // m-02 (owner of vownerMachineName2) has FailureDomain set return []runtime.Object{ v, - makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, nil), - makeVSphereMachineOwned(memberName2, vmgNamespace, "m-02", mdName2), makeCAPIMachine("m-02", vmgNamespace, ptr.To(failureDomainA)), + makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, nil), + makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeCAPIMachine("ownerMachineName2", vmgNamespace, ptr.To(failureDomainA)), } }(), wantAllowed: true, // Allowed because new members don't require VMO placement @@ -290,17 +265,8 @@ func TestIsMemberUpdateAllowed(t *testing.T) { { name: "Skip member update if new member Machine requires placement annotation", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new and requires placement - vmgInput: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - return v - }(), - mdNames: []string{mdName1}, + vmgInput: baseVMG.DeepCopy(), + mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ @@ -312,8 +278,8 @@ func TestIsMemberUpdateAllowed(t *testing.T) { // m-02 lacks FailureDomain and new Member requires placement annotation return []runtime.Object{ v, - makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, ptr.To(failureDomainA)), - makeVSphereMachineOwned(memberName2, vmgNamespace, "m-02", mdName2), makeUnplacedCAPIMachine("m-02", vmgNamespace), + makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, ptr.To(failureDomainA)), + makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeUnplacedCAPIMachine("ownerMachineName2", vmgNamespace), } }(), wantAllowed: false, @@ -322,20 +288,14 @@ func TestIsMemberUpdateAllowed(t *testing.T) { { name: "Allow new member Machine since required placement annotation exists", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new and requires placement - vmgInput: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - return v - }(), - mdNames: []string{mdName1}, + vmgInput: baseVMG.DeepCopy(), + mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() - v.Annotations = map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA} + v.Annotations = map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, + } v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ { Name: memberName1, @@ -344,8 +304,8 @@ func TestIsMemberUpdateAllowed(t *testing.T) { }}} return []runtime.Object{ v, - makeVSphereMachineOwned(memberName1, vmgNamespace, "m-01", mdName1), makeCAPIMachine("m-01", vmgNamespace, ptr.To(failureDomainA)), - makeVSphereMachineOwned(memberName2, vmgNamespace, "m-02", mdName2), makeUnplacedCAPIMachine("m-02", vmgNamespace), + makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, nil), + makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeUnplacedCAPIMachine("ownerMachineName2", vmgNamespace), } }(), wantAllowed: true, @@ -354,19 +314,21 @@ func TestIsMemberUpdateAllowed(t *testing.T) { } for _, tt := range tests { + // Looks odd, but need to reinitialize test variable + tt := tt t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) kubeClient := fake.NewClientBuilder().WithRuntimeObjects(tt.existingObjects...).Build() vmgInput := tt.vmgInput.DeepCopy() gotAllowed, err := isMemberUpdateAllowed(ctx, kubeClient, tt.targetMember, vmgInput) - if (err != nil) != tt.wantErr { - t.Fatalf("isMemberUpdateAllowed() error = %v, wantErr %v", err, tt.wantErr) - } - - if gotAllowed != tt.wantAllowed { - t.Errorf("isMemberUpdateAllowed() gotAllowed = %t, wantAllowed %t", gotAllowed, tt.wantAllowed) + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(gotAllowed).To(Equal(true)) } }) } From b98116890dcda9a163de90cd896a31110cbadeea Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Sun, 23 Nov 2025 21:12:30 +0800 Subject: [PATCH 18/25] Refine godoc Signed-off-by: Gong Zhang --- .../vmware/virtualmachinegroup_reconciler.go | 21 +----- .../virtualmachinegroup_reconciler_test.go | 23 ++----- pkg/services/vmoperator/vmopmachine.go | 64 +++++++++++-------- 3 files changed, 45 insertions(+), 63 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index dbe43b6172..e35488d034 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -40,7 +40,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" - infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" + "sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/vmoperator" ) const ( @@ -167,7 +167,7 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx co // Generate VM names according to the naming strategy set on the VSphereMachine. vmNames := make([]string, 0, len(currentVSphereMachines)) for _, machine := range currentVSphereMachines { - name, err := GenerateVirtualMachineName(machine.Name, machine.Spec.NamingStrategy) + name, err := vmoperator.GenerateVirtualMachineName(machine.Name, machine.Spec.NamingStrategy) if err != nil { return reconcile.Result{}, err } @@ -536,20 +536,3 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie return nil } - -// GenerateVirtualMachineName generates the name of a VirtualMachine based on the naming strategy. -// Duplicated this logic from pkg/services/vmoperator/vmopmachine.go. -func GenerateVirtualMachineName(machineName string, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) (string, error) { - // Per default the name of the VirtualMachine should be equal to the Machine name (this is the same as "{{ .machine.name }}") - if namingStrategy == nil || namingStrategy.Template == nil { - // Note: No need to trim to max length in this case as valid Machine names will also be valid VirtualMachine names. - return machineName, nil - } - - name, err := infrautilv1.GenerateMachineNameFromTemplate(machineName, namingStrategy.Template) - if err != nil { - return "", errors.Wrapf(err, "failed to generate name for VirtualMachine %s", machineName) - } - - return name, nil -} diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index da6f64fc54..a7a887f862 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -121,11 +121,9 @@ func TestIsMemberUpdateAllowed(t *testing.T) { wantErr bool }{ { - name: "Allow member update if VirtualMachineGroup not existed", - targetMember: []vmoprv1.GroupMember{member(memberName1)}, - vmgInput: &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{Name: vmgName, Namespace: vmgNamespace}, - }, + name: "Allow member update if VirtualMachineGroup not existed", + targetMember: []vmoprv1.GroupMember{member(memberName1)}, + vmgInput: baseVMG.DeepCopy(), mdNames: []string{mdName1}, existingObjects: nil, wantAllowed: true, @@ -134,17 +132,8 @@ func TestIsMemberUpdateAllowed(t *testing.T) { { name: "Allow member update if it is removing", targetMember: []vmoprv1.GroupMember{}, - vmgInput: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ - {Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }}}} - return v - }(), - mdNames: []string{mdName1}, + vmgInput: baseVMG.DeepCopy(), + mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -328,7 +317,7 @@ func TestIsMemberUpdateAllowed(t *testing.T) { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) - g.Expect(gotAllowed).To(Equal(true)) + g.Expect(gotAllowed).To(Equal(tt.wantAllowed)) } }) } diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 98b1a2e308..460ad50fdf 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -199,15 +199,31 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap // Set the VM state. Will get reset throughout the reconcile supervisorMachineCtx.VSphereMachine.Status.VMStatus = vmwarev1.VirtualMachineStatePending + // Get the VirtualMachine object Key + vmOperatorVM := &vmoprv1.VirtualMachine{} + vmKey, err := virtualMachineObjectKey(supervisorMachineCtx.Machine.Name, supervisorMachineCtx.Machine.Namespace, supervisorMachineCtx.VSphereMachine.Spec.NamingStrategy) + if err != nil { + return false, err + } + + // When creating a new cluster and the user doesn't provide info about placement of VMs in a specific failure domain, + // CAPV will define affinity rules to ensure proper placement of the machine. + // + // - All the machines belonging to the same MachineDeployment should be placed in the same failure domain - required. + // - All the machines belonging to the same MachineDeployment should be spread across esxi hosts in the same failure domain - best-efforts. + // - Different MachineDeployments and corresponding VMs should be spread across failure domains - best-efforts. + // + // Note: Control plane VM placement doesn't follow the above rules, and the assumption + // is that failureDomain is always set for control plane VMs. var affInfo *affinityInfo if feature.Gates.Enabled(feature.NodeAutoPlacement) && !infrautilv1.IsControlPlaneMachine(machineCtx.GetVSphereMachine()) { - vmOperatorVMGroup := &vmoprv1.VirtualMachineGroup{} + vmGroup := &vmoprv1.VirtualMachineGroup{} key := client.ObjectKey{ Namespace: supervisorMachineCtx.Cluster.Namespace, Name: supervisorMachineCtx.Cluster.Name, } - err := v.Client.Get(ctx, key, vmOperatorVMGroup) + err := v.Client.Get(ctx, key, vmGroup) if err != nil { if !apierrors.IsNotFound(err) { return false, err @@ -222,11 +238,8 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap return true, nil } - // Proceed only if the machine is a member of the VirtualMachineGroup. - isMember, err := v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) - if err != nil { - return true, errors.Wrapf(err, "%s", fmt.Sprintf("failed to check if VirtualMachine %s is a member of VirtualMachineGroup %s", supervisorMachineCtx.VSphereMachine.Name, klog.KObj(vmOperatorVMGroup))) - } + // Proceed only if the VSphereMachine is a member of the VirtualMachineGroup. + isMember := v.checkVirtualMachineGroupMembership(vmGroup, vmKey.Name) if !isMember { v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, @@ -238,18 +251,19 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } affInfo = &affinityInfo{ - vmGroupName: vmOperatorVMGroup.Name, + vmGroupName: vmGroup.Name, } // Set the zone label using the annotation of the per-md zone mapping from VirtualMachineGroup. // This is for new VMs created during day-2 operations when Node Auto Placement is enabled. mdName := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] - if fd, ok := vmOperatorVMGroup.Annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName)]; ok && fd != "" { + if fd, ok := vmGroup.Annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName)]; ok && fd != "" { affInfo.failureDomain = fd } - // Fetch machine deployments without explicit failureDomain specified - // to use when setting the anti-affinity rules. + // VM in a MachineDeployment ideally should be placed in a different failure domain than VMs + // in other MachineDeployments. + // In order to do so, collect names of all the MachineDeployments except the one the VM belongs to. machineDeployments := &clusterv1.MachineDeploymentList{} if err := v.Client.List(ctx, machineDeployments, client.InNamespace(supervisorMachineCtx.Cluster.Namespace), @@ -266,6 +280,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap affInfo.affinitySpec = vmoprv1.AffinitySpec{ VMAffinity: &vmoprv1.VMAffinitySpec{ + // All the machines belonging to the same MachineDeployment should be placed in the same failure domain - required. RequiredDuringSchedulingPreferredDuringExecution: []vmoprv1.VMAffinityTerm{ { LabelSelector: &metav1.LabelSelector{ @@ -278,6 +293,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap }, }, VMAntiAffinity: &vmoprv1.VMAntiAffinitySpec{ + // All the machines belonging to the same MachineDeployment should be spread across esxi hosts in the same failure domain - best-efforts. PreferredDuringSchedulingPreferredDuringExecution: []vmoprv1.VMAffinityTerm{ { LabelSelector: &metav1.LabelSelector{ @@ -291,6 +307,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap }, } if len(othermMDNames) > 0 { + // Different MachineDeployments and corresponding VMs should be spread across failure domains - best-efforts. affInfo.affinitySpec.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution = append( affInfo.affinitySpec.VMAntiAffinity.PreferredDuringSchedulingPreferredDuringExecution, vmoprv1.VMAffinityTerm{ @@ -309,25 +326,22 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } } + // If the failureDomain is explicitly define for a machine, forward this info to the VM. + // Note: for consistency, affinity rules will be set on all the VMs, no matter if they are explicitly assigned to a failureDomain or not. if supervisorMachineCtx.Machine.Spec.FailureDomain != "" { supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain) } // Check for the presence of an existing object - vmOperatorVM := &vmoprv1.VirtualMachine{} - key, err := virtualMachineObjectKey(supervisorMachineCtx.Machine.Name, supervisorMachineCtx.Machine.Namespace, supervisorMachineCtx.VSphereMachine.Spec.NamingStrategy) - if err != nil { - return false, err - } - if err := v.Client.Get(ctx, *key, vmOperatorVM); err != nil { + if err := v.Client.Get(ctx, *vmKey, vmOperatorVM); err != nil { if !apierrors.IsNotFound(err) { return false, err } // Define the VM Operator VirtualMachine resource to reconcile. vmOperatorVM = &vmoprv1.VirtualMachine{ ObjectMeta: metav1.ObjectMeta{ - Name: key.Name, - Namespace: key.Namespace, + Name: vmKey.Name, + Namespace: vmKey.Namespace, }, } } @@ -988,18 +1002,14 @@ func getMachineDeploymentNameForCluster(cluster *clusterv1.Cluster) string { } // checkVirtualMachineGroupMembership checks if the machine is in the first boot order group -// and performs logic if a match is found. -func (v *VmopMachineService) checkVirtualMachineGroupMembership(vmOperatorVMGroup *vmoprv1.VirtualMachineGroup, supervisorMachineCtx *vmware.SupervisorMachineContext) (bool, error) { +// and performs logic if a match is found, as first boot order contains all the worker VMs. +func (v *VmopMachineService) checkVirtualMachineGroupMembership(vmOperatorVMGroup *vmoprv1.VirtualMachineGroup, virtualMachineName string) bool { if len(vmOperatorVMGroup.Spec.BootOrder) > 0 { for _, member := range vmOperatorVMGroup.Spec.BootOrder[0].Members { - virtualMachineName, err := GenerateVirtualMachineName(supervisorMachineCtx.Machine.Name, supervisorMachineCtx.VSphereMachine.Spec.NamingStrategy) - if err != nil { - return false, err - } if member.Name == virtualMachineName { - return true, nil + return true } } } - return false, nil + return false } From 2ac47d434b861dae15712de344f5a95f9db9c80a Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Mon, 24 Nov 2025 14:19:04 +0800 Subject: [PATCH 19/25] Refine VMG member update to avoid race condition Signed-off-by: Gong Zhang --- .../vmware/virtualmachinegroup_reconciler.go | 168 +++++++++--------- .../virtualmachinegroup_reconciler_test.go | 162 +++++++++-------- 2 files changed, 170 insertions(+), 160 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index e35488d034..52f0cf2d02 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -184,31 +184,37 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx co }) } - // Get all the names of MachineDeployments of the Cluster. - machineDeployments := &clusterv1.MachineDeploymentList{} - if err := r.Client.List(ctx, machineDeployments, - client.InNamespace(cluster.Namespace), - client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}); err != nil { + // The core purpose of isCreateOrPatchAllowed is to prevent the VirtualMachineGroup from being updated with new members + // that require placement, unless the VirtualMachineGroup + // has successfully completed its initial placement and added the required + // placement annotations. This stabilizes placement decisions before allowing new VMs + // to be added under the group. + //s + // The CreateOrPatch is allowed if: + // 1. The VirtualMachineGroup is being initially created. + // 2. The update is a scale-down operation. + // 3. The VirtualMachineGroup is placement Ready. + // 4. The new member's underlying CAPI Machine has a FailureDomain set (will skip placement process). + // 5. The new member requires placement annotation AND the VirtualMachineGroup has the corresponding + // placement annotation for the member's MachineDeployment. + // + // This prevents member updates that could lead to new VMs being created + // without necessary zone labels, resulting in undesired placement. + err = isCreateOrPatchAllowed(ctx, r.Client, members, vmg) + if err != nil { return reconcile.Result{}, err } - mdNames := []string{} - for _, md := range machineDeployments.Items { - // Skip MachineDeployment marked for removal. - if !md.DeletionTimestamp.IsZero() { - mdNames = append(mdNames, md.Name) - } - } // Use CreateOrPatch to create or update the VirtualMachineGroup. _, err = controllerutil.CreateOrPatch(ctx, r.Client, vmg, func() error { - return r.reconcileVirtualMachineGroup(ctx, vmg, cluster, members, mdNames) + return r.reconcileVirtualMachineGroup(ctx, vmg, cluster, members) }) return reconcile.Result{}, err } // reconcileVirtualMachineGroup mutates the VirtualMachineGroup object to reflect the necessary spec and metadata changes. -func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, cluster *clusterv1.Cluster, members []vmoprv1.GroupMember, mdNames []string) error { +func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, cluster *clusterv1.Cluster, members []vmoprv1.GroupMember) error { // Set the desired labels if vmg.Labels == nil { vmg.Labels = make(map[string]string) @@ -220,6 +226,21 @@ func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context vmg.Annotations = make(map[string]string) } + // Get all the names of MachineDeployments of the Cluster. + machineDeployments := &clusterv1.MachineDeploymentList{} + if err := r.Client.List(ctx, machineDeployments, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}); err != nil { + return err + } + mdNames := []string{} + for _, md := range machineDeployments.Items { + // Skip MachineDeployment marked for removal. + if !md.DeletionTimestamp.IsZero() { + mdNames = append(mdNames, md.Name) + } + } + // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done. // Do not update per-md-zone label once set, as placement decision should not change without user explicitly // set failureDomain. @@ -227,31 +248,10 @@ func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context return err } - // Member Update: - // The VirtualMachineGroup's BootOrder.Members list, is only allowed to be set or added - // during two phases to maintain control over VM placement: - // - // 1. Initial Creation: When the VirtualMachineGroup object does not yet exist. - // 2. Post-Placement: After the VirtualMachineGroup exists AND is marked Ready which means all members are placed successfully, - // and critically, all MachineDeployments have a corresponding zone placement annotation recorded on the VMG. - // - // For member removal, this is always allowed since it doesn't impact ongoing placement or rely on the placement annotation. - // - // This prevents member updates that could lead to new VMs being created - // without necessary zone labels, resulting in undesired placement, such as VM within a MachineDeployment but are - // placed to different Zones. - - isMemberUpdateAllowed, err := isMemberUpdateAllowed(ctx, r.Client, members, vmg) - if err != nil { - return err - } - - if isMemberUpdateAllowed { - vmg.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ - { - Members: members, - }, - } + vmg.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: members, + }, } // Set the owner reference @@ -262,9 +262,8 @@ func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context return nil } -// isMemberUpdateAllowed determines if the BootOrder.Members field can be safely updated on the VirtualMachineGroup. -// It allows updates only during initial creation or after all member placement are completed successfully. -func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, targetMember []vmoprv1.GroupMember, vmg *vmoprv1.VirtualMachineGroup) (bool, error) { +// isCreateOrPatchAllowed checks if a VirtualMachineGroup is allowd to create or patch by check if BootOrder.Members update is allowed. +func isCreateOrPatchAllowed(ctx context.Context, kubeClient client.Client, targetMember []vmoprv1.GroupMember, vmg *vmoprv1.VirtualMachineGroup) error { logger := log.FromContext(ctx) key := client.ObjectKey{ Namespace: vmg.Namespace, @@ -275,16 +274,16 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target currentVMG := &vmoprv1.VirtualMachineGroup{} if err := kubeClient.Get(ctx, key, currentVMG); err != nil { if apierrors.IsNotFound(err) { - // If VirtualMachineGroup is not found, allow member update as it should be in initial creation phase. - logger.V(5).Info("VirtualMachineGroup not found, allowing member update for initial creation.") - return true, nil + // 1. If VirtualMachineGroup is not found, allow CreateOrPatch as it should be in initial creation phase. + logger.V(6).Info("VirtualMachineGroup not created yet, allowing create") + return nil } - return false, errors.Wrapf(err, "failed to get VirtualMachineGroup %s/%s", vmg.Namespace, vmg.Name) + return errors.Wrapf(err, "failed to get VirtualMachineGroup %s/%s, blocking patch", vmg.Namespace, vmg.Name) } - // Copy retrieved data back to the input pointer for consistency + // Copy retrieved data back to the input pointer for consistency. *vmg = *currentVMG - // Get current member names from VirtualMachineGroup Spec.BootOrder + // Get current member names from VirtualMachineGroup Spec.BootOrder. currentMemberNames := make(map[string]struct{}) if len(vmg.Spec.BootOrder) > 0 { for _, m := range vmg.Spec.BootOrder[0].Members { @@ -292,13 +291,12 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target } } - // 1. If removing members, allow immediately since it doesn't impact placement or placement annotation set. + // 2. If removing members, allow immediately since it doesn't impact placement or placement annotation set. if len(targetMember) < len(currentMemberNames) { - logger.V(5).Info("Scaling down detected (fewer target members), allowing member update.") - return true, nil + logger.V(6).Info("Scaling down detected (fewer target members), allowing patch.") + return nil } - // 2. If adding members, continue following checks. var newMembers []vmoprv1.GroupMember for _, m := range targetMember { if _, exists := currentMemberNames[m.Name]; !exists { @@ -306,7 +304,18 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target } } - // 3. Check newly added members for Machine.Spec.FailureDomain via VSphereMachine.If a member belongs to a Machine + // If no new member added, allow patch. + if len(newMembers) == 0 { + logger.V(6).Info("No new member detected, allowing patch.") + return nil + } + + // 3. If initial placement is still in progress, block adding new member. + if !conditions.IsTrue(vmg, vmoprv1.ReadyConditionType) { + return fmt.Errorf("waiting for VirtualMachineGroup %s to get condition %s to true, temporarily blocking patch", klog.KObj(vmg), vmoprv1.ReadyConditionType) + } + + // 4. Check newly added members for Machine.Spec.FailureDomain via VSphereMachine.If a member belongs to a Machine // which has failureDomain specified, allow it since it will skip the placement // process. If not, continue to check if the belonging MachineDeployment has got placement annotation. for _, newMember := range newMembers { @@ -317,10 +326,9 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target vsphereMachine := &vmwarev1.VSphereMachine{} if err := kubeClient.Get(ctx, vsphereMachineKey, vsphereMachine); err != nil { if apierrors.IsNotFound(err) { - logger.V(5).Info("VSphereMachine for new member not found, temporarily blocking update.", "VSphereMachineName", newMember.Name) - return false, nil + return errors.Wrapf(err, "VSphereMachine for new member %s not found, temporarily blocking patch", newMember.Name) } - return false, errors.Wrapf(err, "failed to get VSphereMachine %s", klog.KRef(newMember.Name, vmg.Namespace)) + return errors.Wrapf(err, "failed to get VSphereMachine %s", klog.KRef(newMember.Name, vmg.Namespace)) } var machineOwnerName string @@ -333,8 +341,7 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target if machineOwnerName == "" { // VSphereMachine found but owner Machine reference is missing - logger.V(5).Info("VSphereMachine found but owner Machine reference is missing, temporarily blocking update.", "VSphereMachineName", newMember.Name) - return false, nil + return fmt.Errorf("VSphereMachine %s found but owner Machine reference is missing, temporarily blocking patch", newMember.Name) } machineKey := types.NamespacedName{ @@ -345,46 +352,38 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target if err := kubeClient.Get(ctx, machineKey, machine); err != nil { if apierrors.IsNotFound(err) { - logger.V(5).Info("CAPI Machine not found via owner reference, temporarily blocking update.", "Machine", klog.KRef(machineOwnerName, vmg.Namespace)) - return false, nil + return errors.Wrapf(err, "Machine %s not found via owner reference, temporarily blocking patch", klog.KRef(machineOwnerName, vmg.Namespace)) } - return false, errors.Wrapf(err, "failed to get CAPI Machine %s", klog.KRef(machineOwnerName, vmg.Namespace)) + return errors.Wrapf(err, "failed to get CAPI Machine %s", klog.KRef(machineOwnerName, vmg.Namespace)) } - // If FailureDomain is set on CAPI Machine, placement process will be skipped. Allow update. + // If FailureDomain is set on CAPI Machine, placement process will be skipped. Allow update for this member. fd := machine.Spec.FailureDomain if fd != "" { - logger.V(5).Info("New member's Machine has FailureDomain specified. Allowing VMG update for this member.") + logger.V(6).Info("New member's Machine has FailureDomain specified. Allowing patch", "Member", newMember.Name) continue } - // If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to full VMG Annotation check. - logger.V(5).Info("New member's CAPI Machine lacks FailureDomain. Falling through to VMG Annotation check.", "MachineName", machineOwnerName) - - // If no Placement Annotations, skip member update and wait for it. + // 5. If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to Annotation check. + // If no Placement Annotations, block member update and wait for it. annotations := vmg.GetAnnotations() if len(annotations) == 0 { - return false, nil + return fmt.Errorf("waiting for placement annotation to add VMG member %s, temporarily blocking patch", newMember.Name) } mdLabelName := vsphereMachine.Labels[clusterv1.MachineDeploymentNameLabel] if mdLabelName == "" { - return false, errors.Wrapf(nil, "VSphereMachine doesn't have MachineDeployment name label %s", klog.KObj(vsphereMachine)) + return fmt.Errorf("VSphereMachine doesn't have MachineDeployment name label %s, blocking patch", klog.KObj(vsphereMachine)) } annotationKey := fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdLabelName) - if _, found := annotations[annotationKey]; !found { - logger.V(5).Info("Required placement annotation is missing.", - "Member", newMember, "Annotation", annotationKey) - return false, nil + return fmt.Errorf("waiting for placement annotation %s to add VMG member %s, temporarily blocking patch", annotationKey, newMember.Name) } - - logger.V(5).Info("New member requires placement annotation and it is present. Allowing this member.", "Member", newMember) } - logger.V(5).Info("Either no new members, or all newly added members existed or have satisfied placement requirements, allowing update.") - return true, nil + logger.V(6).Info("All newly added members either existed or have satisfied placement requirements, allowing patch") + return nil } // getExpectedVSphereMachineCount get expected total count of Machines belonging to the Cluster. @@ -438,7 +437,7 @@ func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, cl // any existing placement annotations that correspond to MachineDeployments that no longer exist. // // The function attempts to find at least one successfully placed VM (VirtualMachineGroupMemberConditionPlacementReady==True) -// for each MachineDeployment and records its zone. Once a zone is recorded for an MD, subsequent VMs +// for each MachineDeployment and records its zone. Once a Zone is recorded for an MD, subsequent VMs // belonging to that same MD are skipped. func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient client.Client, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) error { log := ctrl.LoggerFrom(ctx) @@ -451,10 +450,7 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie // If a MachineDeployment has been deleted, its corresponding placement annotation // on the VirtualMachineGroup should also be removed to avoid configuration drift. - activeMDs := make(map[string]bool) - for _, md := range machineDeployments { - activeMDs[md] = true - } + activeMDs := sets.New(machineDeployments...) // Iterate over existing VirtualMachineGroup annotations and delete those that are stale. for key := range annotations { @@ -466,15 +462,12 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie mdName := strings.TrimPrefix(key, ZoneAnnotationPrefix+"/") // If the MD name is NOT in the list of currently active MDs, delete the annotation. - if found := activeMDs[mdName]; !found { + if !activeMDs.Has(mdName) { log.Info(fmt.Sprintf("Cleaning up stale placement annotation for none-existed MachineDeployment %s", mdName)) delete(annotations, key) } } - // Pre-computation: Convert the list of valid MachineDeployment names into a set. - mdNames := sets.New(machineDeployments...) - // Iterate through the VMG's members in Status. for _, member := range vmg.Status.Members { ns := vmg.Namespace @@ -510,7 +503,7 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie } // Check if this VM belongs to any of our target MachineDeployments. - if !mdNames.Has(mdName) { + if !activeMDs.Has(mdName) { log.V(5).Info("Skipping member as its MachineDeployment name is not in the known list.", "VMName", member.Name, "MDName", mdName) continue @@ -518,6 +511,7 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie // Get the VM placement information by member status. // VMs that have undergone placement do not have Placement info set, skip. + // VMs of Machine with failureDomain specified do not have Placement info set, skip. if member.Placement == nil { log.V(5).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Placement is nil", member.Name, vmg.Name, ns)) continue diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index a7a887f862..8d5839cc7c 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -59,7 +59,7 @@ const ( failureDomainA = "zone-a" ) -func TestIsMemberUpdateAllowed(t *testing.T) { +func TestIsCreateOrPatchAllowed(t *testing.T) { ctx := context.Background() baseVMG := &vmoprv1.VirtualMachineGroup{ @@ -115,25 +115,21 @@ func TestIsMemberUpdateAllowed(t *testing.T) { name string targetMember []vmoprv1.GroupMember vmgInput *vmoprv1.VirtualMachineGroup - mdNames []string existingObjects []runtime.Object wantAllowed bool wantErr bool }{ { - name: "Allow member update if VirtualMachineGroup not existed", + name: "Allow Create if VirtualMachineGroup not existed", targetMember: []vmoprv1.GroupMember{member(memberName1)}, vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, existingObjects: nil, - wantAllowed: true, wantErr: false, }, { - name: "Allow member update if it is removing", + name: "Allow Patch if it is removing members", targetMember: []vmoprv1.GroupMember{}, vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -144,14 +140,45 @@ func TestIsMemberUpdateAllowed(t *testing.T) { }}}} return []runtime.Object{v} }(), - wantAllowed: true, - wantErr: false, + wantErr: false, + }, + { + name: "Allow Patch if no new member", + targetMember: []vmoprv1.GroupMember{member(memberName1)}, // No new members + vmgInput: baseVMG.DeepCopy(), + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + // Annotation for mdName1 is missing + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }, + }}} + return []runtime.Object{v} + }(), + wantErr: false, + }, + { + name: "Block Patch if VirtualMachineGroup is not Placement Ready", + targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, + vmgInput: baseVMG.DeepCopy(), + existingObjects: func() []runtime.Object { + v := baseVMG.DeepCopy() + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + {Members: []vmoprv1.GroupMember{ + { + Name: memberName1, + Kind: memberKind, + }}}} + return []runtime.Object{v} + }(), + wantErr: true, }, { - name: "Allow member update when VMG Ready and All Annotations Present", + name: "Allow Patch when VMG Ready and All Annotations Present", targetMember: []vmoprv1.GroupMember{member(memberName1)}, vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() conditions.Set(v, metav1.Condition{ @@ -168,16 +195,17 @@ func TestIsMemberUpdateAllowed(t *testing.T) { return []runtime.Object{v} }(), - wantAllowed: true, - wantErr: false, + wantErr: false, }, { - name: "Skip member update if new member VSphereMachine Not Found", + name: "Block Patch if new member VSphereMachine Not Found", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() + conditions.Set(v, metav1.Condition{ + Type: vmoprv1.ReadyConditionType, + Status: metav1.ConditionTrue}) v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ { Name: memberName1, @@ -187,16 +215,17 @@ func TestIsMemberUpdateAllowed(t *testing.T) { // vm-02 VSphereMachine is missing return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, ownerMachineName1, mdName1), makeCAPIMachine(ownerMachineName1, vmgNamespace, ptr.To(failureDomainA))} }(), - wantAllowed: false, - wantErr: false, + wantErr: true, }, { - name: "Skip member update if VSphereMachine found but CAPI Machine missing", + name: "Block Patch if VSphereMachine found but CAPI Machine missing", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() + conditions.Set(v, metav1.Condition{ + Type: vmoprv1.ReadyConditionType, + Status: metav1.ConditionTrue}) v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ { Name: memberName1, @@ -206,58 +235,45 @@ func TestIsMemberUpdateAllowed(t *testing.T) { // vm-02 VSphereMachine exists but has no owner ref return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, ptr.To(failureDomainA)), makeVSphereMachineNoOwner(memberName2, vmgNamespace)} }(), - wantAllowed: false, - wantErr: false, + wantErr: true, }, { - name: "Allow member update if all new members have Machine FailureDomain specified", + name: "Allow Patch if all new members have Machine FailureDomain specified", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() + conditions.Set(v, metav1.Condition{ + Type: vmoprv1.ReadyConditionType, + Status: metav1.ConditionTrue}) v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ { Name: memberName1, Kind: memberKind, }, }}} - // m-02 (owner of vownerMachineName2) has FailureDomain set + // m-02 (owner of ownerMachineName2) has FailureDomain set return []runtime.Object{ v, makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, nil), makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeCAPIMachine("ownerMachineName2", vmgNamespace, ptr.To(failureDomainA)), } }(), - wantAllowed: true, // Allowed because new members don't require VMO placement - wantErr: false, - }, - { - name: "Allow member update if no new member", - targetMember: []vmoprv1.GroupMember{member(memberName1)}, // No new members - vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, // Expects mdName1 annotation - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - // Annotation for mdName1 is missing - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - return []runtime.Object{v} - }(), - wantAllowed: true, - wantErr: false, + // Allowed because new members don't require placement + wantErr: false, }, { - name: "Skip member update if new member Machine requires placement annotation", + name: "Block Patch if placement annotation is missing", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new and requires placement vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() + conditions.Set(v, metav1.Condition{ + Type: vmoprv1.ReadyConditionType, + Status: metav1.ConditionTrue}) + v.Annotations = map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + } v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ { Name: memberName1, @@ -271,16 +287,17 @@ func TestIsMemberUpdateAllowed(t *testing.T) { makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeUnplacedCAPIMachine("ownerMachineName2", vmgNamespace), } }(), - wantAllowed: false, - wantErr: false, + wantErr: true, }, { - name: "Allow new member Machine since required placement annotation exists", + name: "Allow Patch Machine since required placement annotation exists", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new and requires placement vmgInput: baseVMG.DeepCopy(), - mdNames: []string{mdName1}, existingObjects: func() []runtime.Object { v := baseVMG.DeepCopy() + conditions.Set(v, metav1.Condition{ + Type: vmoprv1.ReadyConditionType, + Status: metav1.ConditionTrue}) v.Annotations = map[string]string{ fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, @@ -297,8 +314,7 @@ func TestIsMemberUpdateAllowed(t *testing.T) { makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeUnplacedCAPIMachine("ownerMachineName2", vmgNamespace), } }(), - wantAllowed: true, - wantErr: false, + wantErr: false, }, } @@ -311,13 +327,12 @@ func TestIsMemberUpdateAllowed(t *testing.T) { vmgInput := tt.vmgInput.DeepCopy() - gotAllowed, err := isMemberUpdateAllowed(ctx, kubeClient, tt.targetMember, vmgInput) + err := isCreateOrPatchAllowed(ctx, kubeClient, tt.targetMember, vmgInput) if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) - g.Expect(gotAllowed).To(Equal(tt.wantAllowed)) } }) } @@ -715,25 +730,26 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { cluster.DeepCopy(), md1.DeepCopy(), vsm1.DeepCopy(), - &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{ - Name: clusterName, - Namespace: clusterNamespace, - Labels: map[string]string{clusterv1.ClusterNameLabel: cluster.Name}, - }, - Spec: vmoprv1.VirtualMachineGroupSpec{ - BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + func() client.Object { + v := vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: clusterNamespace, + Labels: map[string]string{clusterv1.ClusterNameLabel: cluster.Name}, + }} + v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ + {Members: []vmoprv1.GroupMember{ { - Members: []vmoprv1.GroupMember{ - { - Name: vsm1.Name, - Kind: "VSphereMachine", - }, - }, - }, - }, - }, - }, + Name: vsm1.Name, + Kind: memberKind, + }}}} + + conditions.Set(&v, metav1.Condition{ + Type: vmoprv1.ReadyConditionType, + Status: metav1.ConditionTrue, + }) + return v.DeepCopyObject().(client.Object) + }(), }, }, } From c41abeb04f3a0da6e41c89106f4d0ee0fca5ef31 Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Wed, 26 Nov 2025 15:30:16 +0800 Subject: [PATCH 20/25] Update vm-operator package to v1.9.1-0.20251003150112-9b458d311c4c Since Encryption Class requires API in this newer version, bump vm-operator package Signed-off-by: Gong Zhang --- go.mod | 6 +++--- go.sum | 4 ++-- packaging/go.sum | 4 ++-- test/go.mod | 6 +++--- test/go.sum | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index df97e28e79..1e30e9c7ca 100644 --- a/go.mod +++ b/go.mod @@ -4,16 +4,16 @@ go 1.24.0 replace sigs.k8s.io/cluster-api => sigs.k8s.io/cluster-api v1.11.0-rc.0.0.20250905091528-eb4e38c46ff6 -replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251003150112-9b458d311c4c +replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251029150609-93918c59a719 // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests -replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c +replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719 require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719 github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 github.com/vmware/govmomi v0.52.0 ) diff --git a/go.sum b/go.sum index 34bb470a23..38eb0f2114 100644 --- a/go.sum +++ b/go.sum @@ -243,8 +243,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719 h1:nb/5ytRj7E/5eo9UzLfaR29JytMtbGpqMVs3hjaRwZ0= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/packaging/go.sum b/packaging/go.sum index 0659c3663f..449ae296a6 100644 --- a/packaging/go.sum +++ b/packaging/go.sum @@ -135,8 +135,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719 h1:nb/5ytRj7E/5eo9UzLfaR29JytMtbGpqMVs3hjaRwZ0= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= diff --git a/test/go.mod b/test/go.mod index db1b6ea8b6..f83674bcbd 100644 --- a/test/go.mod +++ b/test/go.mod @@ -8,15 +8,15 @@ replace sigs.k8s.io/cluster-api/test => sigs.k8s.io/cluster-api/test v1.11.0-rc. replace sigs.k8s.io/cluster-api-provider-vsphere => ../ -replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251003150112-9b458d311c4c +replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20251029150609-93918c59a719 // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests -replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c +replace github.com/vmware-tanzu/vm-operator/api => github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719 require ( github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d // The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests - github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c + github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719 github.com/vmware/govmomi v0.52.0 ) diff --git a/test/go.sum b/test/go.sum index e5e682ab61..bc5369f066 100644 --- a/test/go.sum +++ b/test/go.sum @@ -360,8 +360,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw= github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c h1:XISTT0dw/XwMlyyiOPHPsXCxfI1Ro2Zuozi6eIacXGo= -github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251003150112-9b458d311c4c/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719 h1:nb/5ytRj7E/5eo9UzLfaR29JytMtbGpqMVs3hjaRwZ0= +github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20251029150609-93918c59a719/go.mod h1:nWTPpxfe4gHuuYuFcrs86+NMxfkqPk3a3IlvI8TCWak= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU= github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I= github.com/vmware/govmomi v0.52.0 h1:JyxQ1IQdllrY7PJbv2am9mRsv3p9xWlIQ66bv+XnyLw= From 516af8466e18091de587112523c29e37c3545bab Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Thu, 27 Nov 2025 11:46:30 +0800 Subject: [PATCH 21/25] Added reconcile flow cases Signed-off-by: Gong Zhang --- .../vmware/virtualmachinegroup_reconciler.go | 16 +- .../virtualmachinegroup_reconciler_test.go | 499 +++++++++++++----- 2 files changed, 384 insertions(+), 131 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index 52f0cf2d02..f8f97ec414 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -189,14 +189,16 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx co // has successfully completed its initial placement and added the required // placement annotations. This stabilizes placement decisions before allowing new VMs // to be added under the group. - //s + // // The CreateOrPatch is allowed if: // 1. The VirtualMachineGroup is being initially created. - // 2. The update is a scale-down operation. - // 3. The VirtualMachineGroup is placement Ready. - // 4. The new member's underlying CAPI Machine has a FailureDomain set (will skip placement process). - // 5. The new member requires placement annotation AND the VirtualMachineGroup has the corresponding - // placement annotation for the member's MachineDeployment. + // 2. The update won't add new member: + // 1) scale-down operation + // 2) no member change. + // 3. When the VirtualMachineGroup is placement Ready, continue to check following. + // 1) The new member's underlying CAPI Machine has a FailureDomain set (will skip placement process). + // 2) The new member requires placement annotation AND the VirtualMachineGroup has the corresponding + // placement annotation for the member's MachineDeployment. // // This prevents member updates that could lead to new VMs being created // without necessary zone labels, resulting in undesired placement. @@ -468,7 +470,7 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie } } - // Iterate through the VMG's members in Status. + // Iterate through the VirtualMachineGroup's members in Status. for _, member := range vmg.Status.Members { ns := vmg.Namespace diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index 8d5839cc7c..a52ba083c2 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -20,11 +20,13 @@ import ( "context" "fmt" "sort" + "strings" "testing" "time" . "github.com/onsi/gomega" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -80,7 +82,7 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { } return m } - makeUnplacedCAPIMachine := func(name, namespace string) *clusterv1.Machine { + makeCAPIMachineNoFailureDomain := func(name, namespace string) *clusterv1.Machine { return makeCAPIMachine(name, namespace, nil) } @@ -116,8 +118,8 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { targetMember []vmoprv1.GroupMember vmgInput *vmoprv1.VirtualMachineGroup existingObjects []runtime.Object - wantAllowed bool wantErr bool + expectedErrMsg string }{ { name: "Allow Create if VirtualMachineGroup not existed", @@ -125,6 +127,7 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { vmgInput: baseVMG.DeepCopy(), existingObjects: nil, wantErr: false, + expectedErrMsg: "", }, { name: "Allow Patch if it is removing members", @@ -140,7 +143,8 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { }}}} return []runtime.Object{v} }(), - wantErr: false, + wantErr: false, + expectedErrMsg: "", }, { name: "Allow Patch if no new member", @@ -157,10 +161,11 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { }}} return []runtime.Object{v} }(), - wantErr: false, + wantErr: false, + expectedErrMsg: "", }, { - name: "Block Patch if VirtualMachineGroup is not Placement Ready", + name: "Block Patch to add new member if VirtualMachineGroup is not Placement Ready", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, vmgInput: baseVMG.DeepCopy(), existingObjects: func() []runtime.Object { @@ -173,29 +178,8 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { }}}} return []runtime.Object{v} }(), - wantErr: true, - }, - { - name: "Allow Patch when VMG Ready and All Annotations Present", - targetMember: []vmoprv1.GroupMember{member(memberName1)}, - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - conditions.Set(v, metav1.Condition{ - Type: vmoprv1.ReadyConditionType, - Status: metav1.ConditionTrue}) - v.Annotations = map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA} - - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - - return []runtime.Object{v} - }(), - wantErr: false, + wantErr: true, + expectedErrMsg: fmt.Sprintf("waiting for VirtualMachineGroup %s to get condition Ready to true, temporarily blocking patch", vmgName), }, { name: "Block Patch if new member VSphereMachine Not Found", @@ -215,10 +199,11 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { // vm-02 VSphereMachine is missing return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, ownerMachineName1, mdName1), makeCAPIMachine(ownerMachineName1, vmgNamespace, ptr.To(failureDomainA))} }(), - wantErr: true, + wantErr: true, + expectedErrMsg: fmt.Sprintf("VSphereMachine for new member %s not found, temporarily blocking patch", memberName2), }, { - name: "Block Patch if VSphereMachine found but CAPI Machine missing", + name: "Block Patch if VSphereMachine found but owner CAPI Machine missing", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new vmgInput: baseVMG.DeepCopy(), existingObjects: func() []runtime.Object { @@ -235,7 +220,8 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { // vm-02 VSphereMachine exists but has no owner ref return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, ptr.To(failureDomainA)), makeVSphereMachineNoOwner(memberName2, vmgNamespace)} }(), - wantErr: true, + wantErr: true, + expectedErrMsg: fmt.Sprintf("VSphereMachine %s found but owner Machine reference is missing, temporarily blocking patch", memberName2), }, { name: "Allow Patch if all new members have Machine FailureDomain specified", @@ -260,7 +246,8 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { } }(), // Allowed because new members don't require placement - wantErr: false, + wantErr: false, + expectedErrMsg: "", }, { name: "Block Patch if placement annotation is missing", @@ -280,14 +267,15 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { Kind: memberKind, }, }}} - // m-02 lacks FailureDomain and new Member requires placement annotation + // m-02 lacks FailureDomain and new Member vm-02 requires placement annotation but not exists return []runtime.Object{ v, makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, ptr.To(failureDomainA)), - makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeUnplacedCAPIMachine("ownerMachineName2", vmgNamespace), + makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeCAPIMachineNoFailureDomain("ownerMachineName2", vmgNamespace), } }(), - wantErr: true, + wantErr: true, + expectedErrMsg: fmt.Sprintf("waiting for placement annotation to add VMG member %s, temporarily blocking patch", memberName2), }, { name: "Allow Patch Machine since required placement annotation exists", @@ -311,10 +299,11 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { return []runtime.Object{ v, makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, nil), - makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeUnplacedCAPIMachine("ownerMachineName2", vmgNamespace), + makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeCAPIMachineNoFailureDomain("ownerMachineName2", vmgNamespace), } }(), - wantErr: false, + wantErr: false, + expectedErrMsg: "", }, } @@ -331,6 +320,7 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { if tt.wantErr { g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring(tt.expectedErrMsg)) } else { g.Expect(err).NotTo(HaveOccurred()) } @@ -480,8 +470,10 @@ func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { // VSphereMachines corresponding to the VMG members vsmName1 := fmt.Sprintf("%s-%s", mdName1, "vsm-1") vsmName2 := fmt.Sprintf("%s-%s", mdName2, "vsm-2") + vsmNameSameMD := fmt.Sprintf("%s-%s", mdName1, "vsm-same-md") vsm1 := newVSphereMachine(vsmName1, mdName1, false, false, nil) vsm2 := newVSphereMachine(vsmName2, mdName2, false, false, nil) + vsmSameMD := newVSphereMachine(vsmNameSameMD, mdName1, false, false, nil) vsmMissingLabel := newVSphereMachine("vsm-nolabel", mdName2, false, false, nil) vsmMissingLabel.Labels = nil // Explicitly remove labels for test case @@ -494,40 +486,23 @@ func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { wantErr bool }{ { - name: "Placement found for two distinct MDs", + name: "Deletes stale annotation for none-existed MD", vmg: func() *vmoprv1.VirtualMachineGroup { v := baseVMG.DeepCopy() + // This MD (mdNameStale) is NOT in the machineDeployments list below. + v.SetAnnotations(map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameStale): zoneA, + "other/annotation": "keep-me", + }) v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), - newVMGMemberStatus(vsmName2, "VirtualMachine", true, true, zoneB), - }, - } - return v - }(), - machineDeployments: []string{mdName1, mdName2}, - initialClientObjects: []client.Object{vsm1, vsm2}, - expectedAnnotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, - }, - wantErr: false, - }, - { - name: "Skip as placement already exists in VMG Annotations", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Annotations = map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA} - v.Status.Members = []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneB), + Members: []vmoprv1.VirtualMachineGroupMemberStatus{}, } return v }(), machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsm1}, - // Should retain existing zone-a + initialClientObjects: []client.Object{}, expectedAnnotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + "other/annotation": "keep-me", }, wantErr: false, }, @@ -563,6 +538,24 @@ func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { expectedAnnotations: map[string]string{}, wantErr: false, }, + { + name: "Skip as placement already exists in VMG Annotations", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Annotations = map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA} + v.Status.Members = []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneB), + } + return v + }(), + machineDeployments: []string{mdName1}, + initialClientObjects: []client.Object{vsm1}, + // Should retain existing zone-a + expectedAnnotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + }, + wantErr: false, + }, { name: "Skip if placement is nil", vmg: func() *vmoprv1.VirtualMachineGroup { @@ -596,45 +589,82 @@ func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { wantErr: false, }, { - name: "Deletes stale annotation for none-existed MD", + name: "Cleans stale and adds new annotations", vmg: func() *vmoprv1.VirtualMachineGroup { v := baseVMG.DeepCopy() - // This MD (mdNameStale) is NOT in the machineDeployments list below. + // Stale annotation to be deleted v.SetAnnotations(map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameStale): zoneA, - "other/annotation": "keep-me", + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameStale): zoneB, }) v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{}, + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), + }, } return v }(), machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{}, + initialClientObjects: []client.Object{vsm1}, expectedAnnotations: map[string]string{ - "other/annotation": "keep-me", + // Stale annotation for mdNameStale should be gone + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, }, wantErr: false, }, { - name: "Cleans stale and adds new annotations", + name: "Placement found for two distinct MDs", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Status = vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), + newVMGMemberStatus(vsmName2, "VirtualMachine", true, true, zoneB), + }, + } + return v + }(), + machineDeployments: []string{mdName1, mdName2}, + initialClientObjects: []client.Object{vsm1, vsm2}, + expectedAnnotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, + }, + wantErr: false, + }, + { + name: "Placement found for MD1 but not MD2 since PlacementReady is not true", + vmg: func() *vmoprv1.VirtualMachineGroup { + v := baseVMG.DeepCopy() + v.Status = vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), + newVMGMemberStatus(vsmName2, "VirtualMachine", false, false, ""), + }, + } + return v + }(), + machineDeployments: []string{mdName1, mdName2}, + initialClientObjects: []client.Object{vsm1, vsm2}, + expectedAnnotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + }, + wantErr: false, + }, + { + name: "Keep the original annotation if VMs for the same MD placed to new zone", vmg: func() *vmoprv1.VirtualMachineGroup { v := baseVMG.DeepCopy() - // Stale annotation to be deleted - v.SetAnnotations(map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameStale): zoneB, - }) v.Status = vmoprv1.VirtualMachineGroupStatus{ Members: []vmoprv1.VirtualMachineGroupMemberStatus{ newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), + newVMGMemberStatus(vsmNameSameMD, "VirtualMachine", true, true, zoneB), }, } return v }(), machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsm1}, + initialClientObjects: []client.Object{vsm1, vsmSameMD}, expectedAnnotations: map[string]string{ - // Stale annotation for mdNameStale should be gone fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, }, wantErr: false, @@ -667,22 +697,47 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) g.Expect(vmoprv1.AddToScheme(scheme)).To(Succeed()) - // Initial objects for the successful VMG creation path (Expected: 1, Current: 1) - cluster := newCluster(clusterName, clusterNamespace, true, 1, 0) + // Initial objects for the successful VMG creation path + cluster := newCluster(clusterName, clusterNamespace, true, 1, 1) vsm1 := newVSphereMachine("vsm-1", mdName1, false, false, nil) + vsm2 := newVSphereMachine("vsm-2", mdName2, false, false, nil) + // VSM 3 is in deletion (will be filtered out) + vsm3 := newVSphereMachine("vsm-3", mdName1, false, true, nil) md1 := newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(1))) + md2 := newMachineDeployment(mdName2, clusterName, clusterNamespace, true, ptr.To(int32(1))) + machine1 := newMachine("machine-vsm-1", mdName1, "") + machine2 := newMachine("machine-vsm-2", mdName2, "") + + // VMG Ready state for Day-2 checks + readyVMGMembers := []vmoprv1.GroupMember{ + {Name: vsm1.Name, Kind: memberKind}, + {Name: vsm2.Name, Kind: memberKind}, + } + + // VMG Ready but haven't added placement annotation + vmgReady := newVMG(clusterName, clusterNamespace, readyVMGMembers, true, nil) + + // VMG Ready and have placement annotation for Day-2 checks + vmgPlaced := newVMG(clusterName, clusterNamespace, readyVMGMembers, true, map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + }) tests := []struct { - name string - initialObjects []client.Object - expectedResult reconcile.Result - checkVMGExists bool + name string + initialObjects []client.Object + expectedResult reconcile.Result + expectVMGExists bool + expectedMembersCount int + expectedAnnotations map[string]string + expectedErrorMsg string }{ + // VMG Create { - name: "Should Exit if Cluster Not Found", - initialObjects: []client.Object{}, - expectedResult: reconcile.Result{}, - checkVMGExists: false, + name: "Should Exit if Cluster Not Found", + initialObjects: []client.Object{}, + expectedResult: reconcile.Result{}, + expectVMGExists: false, + expectedMembersCount: 0, }, { name: "Should Exit if Cluster Deletion Timestamp Set", @@ -694,64 +749,185 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { return c }(), }, - expectedResult: reconcile.Result{}, - checkVMGExists: false, + expectedResult: reconcile.Result{}, + expectVMGExists: false, }, { name: "Should Requeue if ControlPlane Not Initialized", initialObjects: []client.Object{ newCluster(clusterName, clusterNamespace, false, 1, 0), }, - expectedResult: reconcile.Result{}, - checkVMGExists: false, + expectedResult: reconcile.Result{}, + expectVMGExists: false, + }, + { + name: "Should Requeue if VMG Not Found and Machines not ready", + initialObjects: []client.Object{cluster.DeepCopy(), md1.DeepCopy(), md2.DeepCopy()}, + expectedResult: reconcile.Result{}, + expectVMGExists: false, + expectedMembersCount: 0, }, { - name: "Should Requeue if VMG Not Found", + name: "Should Succeed to create VMG", initialObjects: []client.Object{ cluster.DeepCopy(), md1.DeepCopy(), + vsm1.DeepCopy(), + md2.DeepCopy(), + vsm1.DeepCopy(), }, - expectedResult: reconcile.Result{}, - checkVMGExists: false, + expectedResult: reconcile.Result{}, + expectVMGExists: true, + expectedMembersCount: 2, }, + // VMG Update: Member Scale Down { - name: "Should Succeed to create VMG", + name: "Should Succeed to update VMG if removing member even placement is not ready", + initialObjects: []client.Object{ + cluster.DeepCopy(), + newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(1))), + // VSM3 is in deletion + vsm1.DeepCopy(), + vsm2.DeepCopy(), + vsm3.DeepCopy(), + // Existing VMG has vsm-1, vsm-2 and vsm-3, simulating scale-down state + newVMG(clusterName, clusterNamespace, []vmoprv1.GroupMember{ + {Name: "vsm-1", Kind: memberKind}, + {Name: "vsm-2", Kind: memberKind}, + {Name: "vsm-3", Kind: memberKind}, + }, false, nil), + }, + expectedResult: reconcile.Result{}, + expectVMGExists: true, + expectedMembersCount: 2, + }, + // VMG Placement Annotation + { + name: "Should add Placement annotation after Placement ready", initialObjects: []client.Object{ cluster.DeepCopy(), md1.DeepCopy(), vsm1.DeepCopy(), + machine1.DeepCopy(), + md2.DeepCopy(), + vsm2.DeepCopy(), + machine2.DeepCopy(), + vmgReady.DeepCopy(), + }, + expectedResult: reconcile.Result{}, + expectVMGExists: true, + expectedMembersCount: 2, + expectedAnnotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, }, - expectedResult: reconcile.Result{}, - checkVMGExists: true, }, { - name: "Should Succeed if VMG is already existed", + name: "Should cleanup stale VMG annotation for deleted MD", initialObjects: []client.Object{ cluster.DeepCopy(), + // MD1,MD2 is active md1.DeepCopy(), vsm1.DeepCopy(), - func() client.Object { - v := vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{ - Name: clusterName, - Namespace: clusterNamespace, - Labels: map[string]string{clusterv1.ClusterNameLabel: cluster.Name}, - }} - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ - {Members: []vmoprv1.GroupMember{ - { - Name: vsm1.Name, - Kind: memberKind, - }}}} - - conditions.Set(&v, metav1.Condition{ - Type: vmoprv1.ReadyConditionType, - Status: metav1.ConditionTrue, - }) - return v.DeepCopyObject().(client.Object) - }(), + machine1.DeepCopy(), + md2.DeepCopy(), + vsm2.DeepCopy(), + machine2.DeepCopy(), + // VMG has annotations and a stale one for md-old + newVMG(clusterName, clusterNamespace, readyVMGMembers, true, map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, + fmt.Sprintf("%s/md-old", ZoneAnnotationPrefix): "zone-c", + }), + }, + expectedResult: reconcile.Result{}, + expectVMGExists: true, + expectedMembersCount: 1, + expectedAnnotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, }, }, + { + name: "Should block adding member if VMG not Ready (waiting for initial placement)", + initialObjects: []client.Object{ + cluster.DeepCopy(), + // MD1 spec is 2 (scale-up target) + newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(2))), + // Only 1 VSM currently exists (vsm-1) for MD1 + vsm1.DeepCopy(), + machine1.DeepCopy(), + vsm2.DeepCopy(), + machine2.DeepCopy(), + newVSphereMachine("vsm-new", mdName1, false, false, nil), + // VMG exists but is NOT Ready (simulating placement in progress) + newVMG(clusterName, clusterNamespace, readyVMGMembers, false, nil), + }, + expectedResult: reconcile.Result{}, + expectVMGExists: true, + // Expect an error because isCreateOrPatchAllowed blocks + expectedErrorMsg: "waiting for VirtualMachineGroup", + expectedMembersCount: 2, + }, + { + name: "Should block adding member if VMG Ready but MD annotation is missing", + initialObjects: []client.Object{ + cluster.DeepCopy(), + newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(2))), + // Only vsm-1 currently exists for MD1 + vsm1.DeepCopy(), + machine1.DeepCopy(), + vsm2.DeepCopy(), + machine2.DeepCopy(), + // vsm-new is the new member requiring placement + newVSphereMachine("vsm-new", mdName1, false, false, nil), + newMachine("machine-vsm-new", mdName1, ""), + // VMG is Ready, but has no placement annotations + vmgReady.DeepCopy(), + }, + expectedResult: reconcile.Result{}, + expectVMGExists: true, + // Expected error from isCreateOrPatchAllowed: waiting for placement annotation + expectedErrorMsg: fmt.Sprintf("waiting for placement annotation %s/%s", ZoneAnnotationPrefix, mdName1), + expectedMembersCount: 2, + }, + { + name: "Should succeed adding member when VMG Ready AND placement annotation exists", + initialObjects: []client.Object{ + cluster.DeepCopy(), + newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(2))), + vsm1.DeepCopy(), + machine1.DeepCopy(), + vsm2.DeepCopy(), + machine2.DeepCopy(), + newVSphereMachine("vsm-new", mdName1, false, false, nil), + newMachine("machine-vsm-new", mdName1, ""), + // VMG is Placed (Ready + Annotation) + vmgPlaced.DeepCopy(), + }, + expectedResult: reconcile.Result{}, + expectVMGExists: true, + expectedMembersCount: 2, + }, + { + name: "Should succeed adding member if new member has FailureDomain set", + initialObjects: []client.Object{ + cluster.DeepCopy(), + newMachineDeployment("md-new", clusterName, clusterNamespace, true, ptr.To(int32(2))), + vsm1.DeepCopy(), + machine1.DeepCopy(), + vsm2.DeepCopy(), + machine2.DeepCopy(), + newVSphereMachine("vsm-new", "md-new", false, false, nil), + // New machine has a FailureDomain set, which bypasses the VMG placement annotation check + newMachine("machine-vsm-new", "md-new", "zone-new"), + // VMG is Ready, but has no placement annotation for new machine deployment (this should be bypassed) + vmgReady.DeepCopy(), + }, + expectedResult: reconcile.Result{}, + expectVMGExists: true, + expectedMembersCount: 2, // Scale-up should succeed due to FailureDomain bypass + }, } for _, tt := range tests { @@ -768,6 +944,12 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { result, err := reconciler.Reconcile(ctx, req) + if tt.expectedErrorMsg != "" { + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring(tt.expectedErrorMsg)) + return + } + g.Expect(err).NotTo(HaveOccurred(), "Reconcile should not return an error") g.Expect(result).To(Equal(tt.expectedResult)) @@ -775,17 +957,21 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { vmgKey := types.NamespacedName{Name: clusterName, Namespace: clusterNamespace} err = fakeClient.Get(ctx, vmgKey, vmg) - if tt.checkVMGExists { + if tt.expectVMGExists { g.Expect(err).NotTo(HaveOccurred(), "VMG should exist") // Check that the core fields were set by the MutateFn g.Expect(vmg.Labels).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, clusterName)) - g.Expect(vmg.Spec.BootOrder).To(HaveLen(1)) - expected, err := getExpectedVSphereMachineCount(ctx, fakeClient, tt.initialObjects[0].(*clusterv1.Cluster)) - g.Expect(err).NotTo(HaveOccurred(), "Should get expected Machines") - g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(expected))) - + // Check member count + g.Expect(vmg.Spec.BootOrder).To(HaveLen(tt.expectedMembersCount), "VMG members count mismatch") + // Check annotations + if tt.expectedAnnotations != nil { + g.Expect(vmg.Annotations).To(Equal(tt.expectedAnnotations)) + } // VMG members should match the VSphereMachine name g.Expect(vmg.Spec.BootOrder[0].Members[0].Name).To(Equal("vsm-1")) + } else { + // Check VMG does not exist if expected + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) } }) } @@ -842,6 +1028,14 @@ func newVSphereMachine(name, mdName string, isCP, deleted bool, namingStrategy * vsm.Finalizers = []string{"test.finalizer.0"} vsm.DeletionTimestamp = &metav1.Time{Time: time.Now()} } + + vsm.OwnerReferences = []metav1.OwnerReference{ + { + Kind: "Machine", + Name: fmt.Sprintf("machine-%s", name), + }, + } + return vsm } @@ -894,3 +1088,60 @@ func newTestCluster(name, namespace string) *clusterv1.Cluster { }, } } + +// Helper to create a new CAPI Machine. +func newMachine(name, mdName, fd string) *clusterv1.Machine { + m := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: clusterNamespace, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + clusterv1.MachineDeploymentNameLabel: mdName, + }, + }, + Spec: clusterv1.MachineSpec{ + FailureDomain: fd, + }, + } + // Machine owner reference for VSphereMachine + m.OwnerReferences = []metav1.OwnerReference{ + { + APIVersion: vmwarev1.GroupVersion.String(), + Kind: "VSphereMachine", + Name: strings.TrimPrefix(name, "machine-"), // VSphereMachine Name matches VM Name logic + }, + } + return m +} + +// Helper to create a new VMG with a list of members and conditions. +func newVMG(name, ns string, members []vmoprv1.GroupMember, ready bool, annotations map[string]string) *vmoprv1.VirtualMachineGroup { + v := &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + Labels: map[string]string{clusterv1.ClusterNameLabel: name}, + Annotations: annotations, + Finalizers: []string{"vmg.test.finalizer"}, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + {Members: members}, + }, + }, + } + if ready { + conditions.Set(v, metav1.Condition{ + Type: vmoprv1.ReadyConditionType, + Status: metav1.ConditionTrue, + }) + v.Status = vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + newVMGMemberStatus("vsm-1", "VirtualMachine", true, true, zoneA), + newVMGMemberStatus("vsm-2", "VirtualMachine", true, true, zoneB), + }, + } + } + return v +} From ac0ec4ba4ebce3c3ce2e3cf6f3d5bfd5d1c77b81 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Wed, 26 Nov 2025 01:43:09 +0100 Subject: [PATCH 22/25] POC AAF --- .../vmware/virtualmachinegroup_reconciler.go | 595 ++++---- .../virtualmachinegroup_reconciler_test.go | 1216 +++++++++++++++-- pkg/services/vmoperator/vmopmachine.go | 5 + 3 files changed, 1359 insertions(+), 457 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index f8f97ec414..0977044011 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -20,23 +20,22 @@ package vmware import ( "context" "fmt" + "maps" + "slices" "strings" "github.com/pkg/errors" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" - "golang.org/x/exp/slices" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" @@ -107,428 +106,330 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, nil } - return r.createOrUpdateVirtualMachineGroup(ctx, cluster) + return r.reconcileNormal(ctx, cluster) } // createOrUpdateVirtualMachineGroup Create or Update VirtualMachineGroup. -func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { - log := ctrl.LoggerFrom(ctx) - - // Get current VSphereMachines of all MachineDeployments. - currentVSphereMachines, err := getCurrentVSphereMachines(ctx, r.Client, cluster.Namespace, cluster.Name) +func (r *VirtualMachineGroupReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { + // Get all the data required for computing the desired VMG. + currentVMG, err := r.getVirtualMachineGroup(ctx, cluster) if err != nil { return reconcile.Result{}, err } - - vmg := &vmoprv1.VirtualMachineGroup{} - key := &client.ObjectKey{ - Namespace: cluster.Namespace, - Name: cluster.Name, + vSphereMachines, err := r.getVSphereMachines(ctx, cluster) + if err != nil { + return reconcile.Result{}, err + } + machineDeployments, err := r.getMachineDeployments(ctx, cluster) + if err != nil { + return reconcile.Result{}, err } - if err := r.Client.Get(ctx, *key, vmg); err != nil { - if !apierrors.IsNotFound(err) { - return reconcile.Result{}, errors.Wrapf(err, "failed to get VirtualMachineGroup %s", klog.KObj(vmg)) - } - - // If the VirtualMachineGroup does not exist yet, - // calculate expected VSphereMachine count of all MachineDeployments. - expectedVSphereMachineCount, err := getExpectedVSphereMachineCount(ctx, r.Client, cluster) - if err != nil { - return reconcile.Result{}, errors.Wrapf(err, "failed to get expected Machines of all MachineDeployment, Cluster %s", klog.KObj(cluster)) - } - - // Since CAPV retrieves placement decisions from the VirtualMachineGroup to guide - // day-2 worker VM placement. At least one VM is expected for each MachineDeployment. - // If no worker of MachineDeployment is defined,the controller - // interprets this as an intentional configuration, just logs the observation and no-op. - if expectedVSphereMachineCount == 0 { - log.Info("Found 0 desired VSphereMachine of MachineDeployment, stop reconcile") - return reconcile.Result{}, nil - } - - // Wait for all intended VSphereMachines corresponding to MachineDeployment to exist only during initial Cluster creation. - // For day-2, VirtualMachineGroup exists and should not run into here wait for VSphereMachines. - currentVSphereMachineCount := int32(len(currentVSphereMachines)) - if currentVSphereMachineCount != expectedVSphereMachineCount { - log.Info("Waiting for expected VSphereMachines required for the initial placement call", "Expected:", expectedVSphereMachineCount, - "Current:", currentVSphereMachineCount, "Cluster", klog.KObj(cluster)) + // Before initial placement VirtualMachineGroup does not exist yet. + if currentVMG == nil { + // VirtualMachineGroup creation starts the initial placement process that should take care + // of spreading VSphereMachines across failure domains in an ideal way / according to user intent. + // The initial placement can be performed only when all the VSphereMachines to be considered for the + // placement decision exists; if this condition is not met, return (watches will trigger new + // reconcile whenever new VSphereMachines are created). + // Note: In case there are no MachineDeployments, or all the MachineDeployments have zero replicas, + // no placement decision is required, and thus no VirtualMachineGroup will be created. + if !shouldCreateVirtualMachineGroup(ctx, machineDeployments, vSphereMachines) { return reconcile.Result{}, nil } - vmg = &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{ - Name: key.Name, - Namespace: key.Namespace, - }, - } - } - - // Generate VM names according to the naming strategy set on the VSphereMachine. - vmNames := make([]string, 0, len(currentVSphereMachines)) - for _, machine := range currentVSphereMachines { - name, err := vmoperator.GenerateVirtualMachineName(machine.Name, machine.Spec.NamingStrategy) + // Computes the new VirtualMachineGroup including all the VSphereMachines to be considered + // for the initial placement decision. + newVMG, err := computeVirtualMachineGroup(ctx, cluster, machineDeployments, vSphereMachines, nil) if err != nil { return reconcile.Result{}, err } - vmNames = append(vmNames, name) - } - // Sort the VM names alphabetically for consistent ordering - slices.Sort(vmNames) - - members := make([]vmoprv1.GroupMember, 0, len(currentVSphereMachines)) - for _, name := range vmNames { - members = append(members, vmoprv1.GroupMember{ - Name: name, - Kind: "VirtualMachine", - }) + + // FIXME: Log. Details? + if err := r.Client.Create(ctx, newVMG); err != nil { + return reconcile.Result{}, errors.Wrapf(err, "failed to create new VMG") + } + return reconcile.Result{}, nil } - // The core purpose of isCreateOrPatchAllowed is to prevent the VirtualMachineGroup from being updated with new members - // that require placement, unless the VirtualMachineGroup - // has successfully completed its initial placement and added the required - // placement annotations. This stabilizes placement decisions before allowing new VMs - // to be added under the group. + // If the VirtualMachineGroup exist, either the placement decision is being performed, or + // the placement decision has been already completed. In both cases, the VirtualMachineGroup + // must be keep up to date with the changes that happens to MachineDeployments and vSphereMachines. // - // The CreateOrPatch is allowed if: - // 1. The VirtualMachineGroup is being initially created. - // 2. The update won't add new member: - // 1) scale-down operation - // 2) no member change. - // 3. When the VirtualMachineGroup is placement Ready, continue to check following. - // 1) The new member's underlying CAPI Machine has a FailureDomain set (will skip placement process). - // 2) The new member requires placement annotation AND the VirtualMachineGroup has the corresponding - // placement annotation for the member's MachineDeployment. + // However, while the initial placement decision is being performed, the addition of new + // vSphereMachines to the VirtualMachineGroup must be deferred to prevent race conditions. // - // This prevents member updates that could lead to new VMs being created - // without necessary zone labels, resulting in undesired placement. - err = isCreateOrPatchAllowed(ctx, r.Client, members, vmg) + // After initial placement, new vSphereMachines will be added to the VirtualMachineGroup for + // sake of consistency, but those machines will be placed in the same failureDomain + // already used for the other vSphereMachines in the same MachineDeployment (new vSphereMachines + // will align to the initial placement decision). + + // Computes the updated VirtualMachineGroup including reflecting changes in the cluster. + updatedVMG, err := computeVirtualMachineGroup(ctx, cluster, machineDeployments, vSphereMachines, currentVMG) if err != nil { return reconcile.Result{}, err } - // Use CreateOrPatch to create or update the VirtualMachineGroup. - _, err = controllerutil.CreateOrPatch(ctx, r.Client, vmg, func() error { - return r.reconcileVirtualMachineGroup(ctx, vmg, cluster, members) - }) - - return reconcile.Result{}, err + // FIXME: Log. Diff? Details? + if err := r.Client.Patch(ctx, updatedVMG, client.MergeFromWithOptions(currentVMG, client.MergeFromWithOptimisticLock{})); err != nil { + return reconcile.Result{}, errors.Wrapf(err, "failed to patch VMG") + } + return reconcile.Result{}, nil } -// reconcileVirtualMachineGroup mutates the VirtualMachineGroup object to reflect the necessary spec and metadata changes. -func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context.Context, vmg *vmoprv1.VirtualMachineGroup, cluster *clusterv1.Cluster, members []vmoprv1.GroupMember) error { - // Set the desired labels - if vmg.Labels == nil { - vmg.Labels = make(map[string]string) +// computeVirtualMachineGroup gets the desired VirtualMachineGroup. +func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, mds []clusterv1.MachineDeployment, vSphereMachines []vmwarev1.VSphereMachine, existingVMG *vmoprv1.VirtualMachineGroup) (*vmoprv1.VirtualMachineGroup, error) { + // Create an empty VirtualMachineGroup + vmg := &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name, + Namespace: cluster.Namespace, + Annotations: map[string]string{}, + }, } - // Always ensure cluster name label is set - vmg.Labels[clusterv1.ClusterNameLabel] = cluster.Name - if vmg.Annotations == nil { + // If there is an VirtualMachineGroup, clone it into the desired VirtualMachineGroup + // and clean up all the info that must be re-computed during this reconcile. + if existingVMG != nil { + vmg = existingVMG.DeepCopy() vmg.Annotations = make(map[string]string) - } - - // Get all the names of MachineDeployments of the Cluster. - machineDeployments := &clusterv1.MachineDeploymentList{} - if err := r.Client.List(ctx, machineDeployments, - client.InNamespace(cluster.Namespace), - client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}); err != nil { - return err - } - mdNames := []string{} - for _, md := range machineDeployments.Items { - // Skip MachineDeployment marked for removal. - if !md.DeletionTimestamp.IsZero() { - mdNames = append(mdNames, md.Name) + for key, value := range existingVMG.Annotations { + if !strings.HasPrefix(key, ZoneAnnotationPrefix+"/") { + vmg.Annotations[key] = value + } } } + vmg.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{}} - // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done. - // Do not update per-md-zone label once set, as placement decision should not change without user explicitly - // set failureDomain. - if err := generateVirtualMachineGroupAnnotations(ctx, r.Client, vmg, mdNames); err != nil { - return err - } + // Compute the info required to compute the VirtualMachineGroup. - vmg.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ - { - Members: members, - }, + // Get the mapping between the virtualMachine name that will be generated by a vSphereMachine + // and the MachineDeployment that controls the vSphereMachine. + virtualMachineNameToMachineDeployment, err := getVirtualMachineNameToMachineDeploymentMapping(ctx, vSphereMachines) + if err != nil { + return nil, err } - // Set the owner reference - if err := controllerutil.SetControllerReference(cluster, vmg, r.Client.Scheme()); err != nil { - return errors.Wrapf(err, "failed to mark Cluster %s as owner of VirtualMachineGroup %s", klog.KObj(cluster), klog.KObj(vmg)) - } + // Sort virtualMachine names to ensure VirtualMachineGroup is generated in a consistent way across reconcile. + sortedVirtualMachineNames := slices.Sorted(maps.Keys(virtualMachineNameToMachineDeployment)) - return nil -} + // Get the mapping between the MachineDeployment and failure domain, which is one of: + // - the failureDomain explicitly assigned by the user to a MachineDeployment (by setting spec.template.spec.failureDomain). + // - the failureDomain selected by the placement decision for a MachineDeployment + // Note: if a MachineDeployment is not included in this mapping, the MachineDeployment is still pending a placement decision. + machineDeploymentToFailureDomain := getMachineDeploymentToFailureDomainMapping(ctx, mds, existingVMG, virtualMachineNameToMachineDeployment) -// isCreateOrPatchAllowed checks if a VirtualMachineGroup is allowd to create or patch by check if BootOrder.Members update is allowed. -func isCreateOrPatchAllowed(ctx context.Context, kubeClient client.Client, targetMember []vmoprv1.GroupMember, vmg *vmoprv1.VirtualMachineGroup) error { - logger := log.FromContext(ctx) - key := client.ObjectKey{ - Namespace: vmg.Namespace, - Name: vmg.Name, + // Set the annotations on the VirtualMachineGroup surfacing the failure domain selected during the + // placement decision for each MachineDeployment. + // Note: when a MachineDeployment will be deleted, the corresponding annotation will be removed (not added anymore by this func). + for md, failureDomain := range machineDeploymentToFailureDomain { + vmg.Annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, md)] = failureDomain } - // Retrieve the current VirtualMachineGroup state - currentVMG := &vmoprv1.VirtualMachineGroup{} - if err := kubeClient.Get(ctx, key, currentVMG); err != nil { - if apierrors.IsNotFound(err) { - // 1. If VirtualMachineGroup is not found, allow CreateOrPatch as it should be in initial creation phase. - logger.V(6).Info("VirtualMachineGroup not created yet, allowing create") - return nil - } - return errors.Wrapf(err, "failed to get VirtualMachineGroup %s/%s, blocking patch", vmg.Namespace, vmg.Name) - } - // Copy retrieved data back to the input pointer for consistency. - *vmg = *currentVMG - - // Get current member names from VirtualMachineGroup Spec.BootOrder. - currentMemberNames := make(map[string]struct{}) - if len(vmg.Spec.BootOrder) > 0 { - for _, m := range vmg.Spec.BootOrder[0].Members { - currentMemberNames[m.Name] = struct{}{} + // Compute the list of Members for the VirtualMachineGroup. + + // If the VirtualMachineGroup is being created, ensure that all the existing VirtualMachines are + // included in the VirtualMachineGroup for the initial placement decision. + if existingVMG == nil { + for _, virtualMachineName := range sortedVirtualMachineNames { + vmg.Spec.BootOrder[0].Members = append(vmg.Spec.BootOrder[0].Members, vmoprv1.GroupMember{ + Name: virtualMachineName, + Kind: "VirtualMachine", + }) } + return vmg, nil } - // 2. If removing members, allow immediately since it doesn't impact placement or placement annotation set. - if len(targetMember) < len(currentMemberNames) { - logger.V(6).Info("Scaling down detected (fewer target members), allowing patch.") - return nil - } + // If the VirtualMachineGroup exists, keep this list of VirtualMachine up to date. + // Note: while the initial placement decision is being performed, the addition of new + // VirtualMachine to the VirtualMachineGroup must be deferred to prevent race conditions. + // + // After initial placement, new VirtualMachine will be added to the VirtualMachineGroup for + // sake of consistency, but those machines will be placed in the same failureDomain + // already used for the other VirtualMachine in the same MachineDeployment (new VirtualMachine + // will align to the initial placement decision). - var newMembers []vmoprv1.GroupMember - for _, m := range targetMember { - if _, exists := currentMemberNames[m.Name]; !exists { - newMembers = append(newMembers, m) + existingVirtualMachineNames := sets.New[string]() + if len(existingVMG.Spec.BootOrder) > 0 { + for _, member := range existingVMG.Spec.BootOrder[0].Members { + existingVirtualMachineNames.Insert(member.Name) } } - // If no new member added, allow patch. - if len(newMembers) == 0 { - logger.V(6).Info("No new member detected, allowing patch.") - return nil - } + for _, virtualMachineName := range sortedVirtualMachineNames { + // If a VirtualMachine is already part of the VirtualMachineGroup, keep it in the VirtualMachineGroup + // Note: when a VirtualMachine will be deleted, the corresponding member will be removed (not added anymore by this func) + if existingVirtualMachineNames.Has(virtualMachineName) { + vmg.Spec.BootOrder[0].Members = append(vmg.Spec.BootOrder[0].Members, vmoprv1.GroupMember{ + Name: virtualMachineName, + Kind: "VirtualMachine", + }) + continue + } - // 3. If initial placement is still in progress, block adding new member. - if !conditions.IsTrue(vmg, vmoprv1.ReadyConditionType) { - return fmt.Errorf("waiting for VirtualMachineGroup %s to get condition %s to true, temporarily blocking patch", klog.KObj(vmg), vmoprv1.ReadyConditionType) + // If a VirtualMachine is not yet in the VirtualMachineGroup, it should be added only if + // the VirtualMachine is controlled by a MachineDeployment for which the placement decision is already + // completed. + // Note: If the placement decision for the MachineDeployment controlling a VirtualMachine is still pending, + // this logic defers adding the VirtualMachine in the VirtualMachineGroup to prevent race conditions. + md := virtualMachineNameToMachineDeployment[virtualMachineName] + if _, isPlaced := machineDeploymentToFailureDomain[md]; isPlaced { + vmg.Spec.BootOrder[0].Members = append(vmg.Spec.BootOrder[0].Members, vmoprv1.GroupMember{ + Name: virtualMachineName, + Kind: "VirtualMachine", + }) + } } - // 4. Check newly added members for Machine.Spec.FailureDomain via VSphereMachine.If a member belongs to a Machine - // which has failureDomain specified, allow it since it will skip the placement - // process. If not, continue to check if the belonging MachineDeployment has got placement annotation. - for _, newMember := range newMembers { - vsphereMachineKey := types.NamespacedName{ - Namespace: vmg.Namespace, - Name: newMember.Name, // Member Name is the VSphereMachine Name. - } - vsphereMachine := &vmwarev1.VSphereMachine{} - if err := kubeClient.Get(ctx, vsphereMachineKey, vsphereMachine); err != nil { - if apierrors.IsNotFound(err) { - return errors.Wrapf(err, "VSphereMachine for new member %s not found, temporarily blocking patch", newMember.Name) - } - return errors.Wrapf(err, "failed to get VSphereMachine %s", klog.KRef(newMember.Name, vmg.Namespace)) + return vmg, nil +} + +// getMachineDeploymentToFailureDomainMapping returns the mapping between MachineDeployment and failure domain. +// The mapping is computed according to following rules: +// - If the MachineDeployment is explicitly assigned to a failure domain by setting spec.template.spec.failureDomain, +// use this value for the mapping. +// - If the annotations on the VirtualMachineGroup already has the failure domain selected during the +// initial placement decision for a MachineDeployment, use it. +// - If annotations on the VirtualMachineGroup are not yet set, try to get the failure domain selected +// during the initial placement decision from VirtualMachineGroup status (placement decision just completed). +// - If none of the above rules are satisfied, the MachineDeployment is still pending a placement decision. +// +// Note: In case the failure domain is explicitly assigned by setting spec.template.spec.failureDomain, the mapping always +// report the latest value for this field (even if there might still be Machines yet to be rolled out to the new failure domain). +func getMachineDeploymentToFailureDomainMapping(_ context.Context, mds []clusterv1.MachineDeployment, existingVMG *vmoprv1.VirtualMachineGroup, virtualMachineNameToMachineDeployment map[string]string) map[string]string { + machineDeploymentToFailureDomainMapping := map[string]string{} + for _, md := range mds { + if !md.DeletionTimestamp.IsZero() { + continue } - var machineOwnerName string - for _, owner := range vsphereMachine.OwnerReferences { - if owner.Kind == "Machine" { - machineOwnerName = owner.Name - break - } + // If the MachineDeployment is explicitly assigned to a failure domain by setting spec.template.spec.failureDomain, use this value for the mapping. + if md.Spec.Template.Spec.FailureDomain != "" { + machineDeploymentToFailureDomainMapping[md.Name] = md.Spec.Template.Spec.FailureDomain + continue } - if machineOwnerName == "" { - // VSphereMachine found but owner Machine reference is missing - return fmt.Errorf("VSphereMachine %s found but owner Machine reference is missing, temporarily blocking patch", newMember.Name) + // If the MachineDeployment is not explicitly assigned to a failure domain (spec.template.spec.failureDomain is empty), + // and VirtualMachineGroup does not exist yet, the MachineDeployment is still pending a placement decision. + if existingVMG == nil { + continue } - machineKey := types.NamespacedName{ - Namespace: vmg.Namespace, - Name: machineOwnerName, + // If the VirtualMachineGroup exist, check if the placement decision for the MachineDeployment + // has been already surfaced into the VirtualMachineGroup annotations. + if failureDomain := existingVMG.Annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, md.Name)]; failureDomain != "" { + machineDeploymentToFailureDomainMapping[md.Name] = failureDomain + continue } - machine := &clusterv1.Machine{} - if err := kubeClient.Get(ctx, machineKey, machine); err != nil { - if apierrors.IsNotFound(err) { - return errors.Wrapf(err, "Machine %s not found via owner reference, temporarily blocking patch", klog.KRef(machineOwnerName, vmg.Namespace)) + // If the placement decision for the MachineDeployment, try to get the failure domain selected + // during the initial placement decision from VirtualMachineGroup status (placement decision just completed). + // Note: this info will surface in VirtualMachineGroup annotations at the end of the current reconcile. + for _, member := range existingVMG.Status.Members { + // Ignore members controller by other MachineDeployments + if memberMD := virtualMachineNameToMachineDeployment[member.Name]; memberMD != md.Name { + continue + } + + // Consider only VirtualMachineGroup members for which the placement decision has been completed. + // Note: given that all the VirtualMachines in a MachineDeployment must be placed in the + // same failure domain / zone, the mapping can be inferred as soon as one member is placed. + if !conditions.IsTrue(&member, vmoprv1.VirtualMachineGroupMemberConditionPlacementReady) { + continue + } + if member.Placement != nil && member.Placement.Zone != "" { + // FIXME: log + machineDeploymentToFailureDomainMapping[md.Name] = member.Placement.Zone + break } - return errors.Wrapf(err, "failed to get CAPI Machine %s", klog.KRef(machineOwnerName, vmg.Namespace)) } + } + return machineDeploymentToFailureDomainMapping +} - // If FailureDomain is set on CAPI Machine, placement process will be skipped. Allow update for this member. - fd := machine.Spec.FailureDomain - if fd != "" { - logger.V(6).Info("New member's Machine has FailureDomain specified. Allowing patch", "Member", newMember.Name) +// getVirtualMachineNameToMachineDeploymentMapping returns the mapping between VirtualMachine name and corresponding MachineDeployment. +// The mapping is inferred from vSphereMachines; please note: +// - The name of the VirtualMachine generated by a vSphereMachines can be computed in a deterministic way (it is not required to wait for the VirtualMachine to exist) +// - The name of the MachineDeployment corresponding to a vSphereMachine can be derived from the annotation that is propagated by CAPI. +func getVirtualMachineNameToMachineDeploymentMapping(_ context.Context, vSphereMachines []vmwarev1.VSphereMachine) (map[string]string, error) { + virtualMachineNameToMachineDeployment := map[string]string{} + for _, vsphereMachine := range vSphereMachines { + if !vsphereMachine.DeletionTimestamp.IsZero() { continue } - // 5. If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to Annotation check. - // If no Placement Annotations, block member update and wait for it. - annotations := vmg.GetAnnotations() - if len(annotations) == 0 { - return fmt.Errorf("waiting for placement annotation to add VMG member %s, temporarily blocking patch", newMember.Name) + virtualMachineName, err := vmoperator.GenerateVirtualMachineName(vsphereMachine.Name, vsphereMachine.Spec.NamingStrategy) + if err != nil { + return nil, err } - - mdLabelName := vsphereMachine.Labels[clusterv1.MachineDeploymentNameLabel] - if mdLabelName == "" { - return fmt.Errorf("VSphereMachine doesn't have MachineDeployment name label %s, blocking patch", klog.KObj(vsphereMachine)) + if md := vsphereMachine.Labels[clusterv1.MachineDeploymentNameLabel]; md != "" { + virtualMachineNameToMachineDeployment[virtualMachineName] = md } + } + return virtualMachineNameToMachineDeployment, nil +} - annotationKey := fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdLabelName) - if _, found := annotations[annotationKey]; !found { - return fmt.Errorf("waiting for placement annotation %s to add VMG member %s, temporarily blocking patch", annotationKey, newMember.Name) +// shouldCreateVirtualMachineGroup should return true when the conditions to create a VirtualMachineGroup are met. +func shouldCreateVirtualMachineGroup(ctx context.Context, mds []clusterv1.MachineDeployment, vSphereMachines []vmwarev1.VSphereMachine) bool { + log := ctrl.LoggerFrom(ctx) + + // Gets the total number or worker machines that should exist in the cluster at a given time. + // Note. Deleting MachineDeployment are ignored. + var expectedVSphereMachineCount int32 + for _, md := range mds { + if md.DeletionTimestamp.IsZero() { + expectedVSphereMachineCount += ptr.Deref(md.Spec.Replicas, 0) } } - logger.V(6).Info("All newly added members either existed or have satisfied placement requirements, allowing patch") - return nil -} + // In case there are no MachineDeployments or all the MachineDeployments have zero replicas, there is + // no need to create a VirtualMachineGroup. + if expectedVSphereMachineCount == 0 { + return false + } -// getExpectedVSphereMachineCount get expected total count of Machines belonging to the Cluster. -func getExpectedVSphereMachineCount(ctx context.Context, kubeClient client.Client, cluster *clusterv1.Cluster) (int32, error) { - var mdList clusterv1.MachineDeploymentList - if err := kubeClient.List( - ctx, - &mdList, - client.InNamespace(cluster.Namespace), - client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, - ); err != nil { - return 0, errors.Wrap(err, "failed to list MachineDeployments") + // If the number of workers VSphereMachines matches the number of expected replicas in the MachineDeployments, + // then all the VSphereMachines required for the initial placement decision do exist, then it is possible to create + // the VirtualMachineGroup. + // FIXME: we should probably include in the count only machines for MD included above (otherwise machines from deleting MS might lead to false positives / negatives + currentVSphereMachineCount := int32(len(vSphereMachines)) + if currentVSphereMachineCount != expectedVSphereMachineCount { + log.Info(fmt.Sprintf("Waiting for VSphereMachines required for the initial placement (expected %d, current %d)", expectedVSphereMachineCount, currentVSphereMachineCount)) + return false } + return true +} - var total int32 - for _, md := range mdList.Items { - // Skip MachineDeployment marked for removal - if md.DeletionTimestamp.IsZero() && md.Spec.Replicas != nil { - total += *md.Spec.Replicas +func (r *VirtualMachineGroupReconciler) getVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster) (*vmoprv1.VirtualMachineGroup, error) { + vmg := &vmoprv1.VirtualMachineGroup{} + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(cluster), vmg); err != nil { + if !apierrors.IsNotFound(err) { + return nil, errors.Wrapf(err, "failed to get VirtualMachineGroup %s", klog.KObj(vmg)) } + return nil, nil } - - return total, nil + return vmg, nil } -// getCurrentVSphereMachines returns the list of VSphereMachines belonging to the Cluster’s MachineDeployments. -// VSphereMachines marked for removal are excluded from the result. -func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) { - // List VSphereMachine objects +func (r *VirtualMachineGroupReconciler) getVSphereMachines(ctx context.Context, cluster *clusterv1.Cluster) ([]vmwarev1.VSphereMachine, error) { var vsMachineList vmwarev1.VSphereMachineList - if err := kubeClient.List(ctx, &vsMachineList, - client.InNamespace(clusterNamespace), - client.MatchingLabels{clusterv1.ClusterNameLabel: clusterName}, + if err := r.Client.List(ctx, &vsMachineList, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, client.HasLabels{clusterv1.MachineDeploymentNameLabel}, ); err != nil { - return nil, errors.Wrapf(err, "failed to list VSphereMachines of Cluster %s", klog.KRef(clusterNamespace, clusterName)) + return nil, errors.Wrap(err, "failed to get VSphereMachines") } - - var result []vmwarev1.VSphereMachine - for _, vs := range vsMachineList.Items { - if vs.DeletionTimestamp.IsZero() { - result = append(result, vs) - } - } - return result, nil + return vsMachineList.Items, nil } -// generateVirtualMachineGroupAnnotations checks the VMG status for placed members, verifies their ownership -// by fetching the corresponding VSphereMachine, and extracts the zone information to persist it -// as an annotation on the VMG object for Day-2 operations. It will also clean up -// any existing placement annotations that correspond to MachineDeployments that no longer exist. -// -// The function attempts to find at least one successfully placed VM (VirtualMachineGroupMemberConditionPlacementReady==True) -// for each MachineDeployment and records its zone. Once a Zone is recorded for an MD, subsequent VMs -// belonging to that same MD are skipped. -func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient client.Client, vmg *vmoprv1.VirtualMachineGroup, machineDeployments []string) error { - log := ctrl.LoggerFrom(ctx) - log.V(5).Info(fmt.Sprintf("Generating annotations for VirtualMachineGroup %s/%s", vmg.Name, vmg.Namespace)) - - if vmg.Annotations == nil { - vmg.Annotations = make(map[string]string) - } - annotations := vmg.Annotations - - // If a MachineDeployment has been deleted, its corresponding placement annotation - // on the VirtualMachineGroup should also be removed to avoid configuration drift. - activeMDs := sets.New(machineDeployments...) - - // Iterate over existing VirtualMachineGroup annotations and delete those that are stale. - for key := range annotations { - if !strings.HasPrefix(key, ZoneAnnotationPrefix+"/") { - // Skip non-placement annotations - continue - } - - mdName := strings.TrimPrefix(key, ZoneAnnotationPrefix+"/") - - // If the MD name is NOT in the list of currently active MDs, delete the annotation. - if !activeMDs.Has(mdName) { - log.Info(fmt.Sprintf("Cleaning up stale placement annotation for none-existed MachineDeployment %s", mdName)) - delete(annotations, key) - } - } - - // Iterate through the VirtualMachineGroup's members in Status. - for _, member := range vmg.Status.Members { - ns := vmg.Namespace - - // Skip it if member's VirtualMachineGroupMemberConditionPlacementReady is still not true. - if !conditions.IsTrue(&member, vmoprv1.VirtualMachineGroupMemberConditionPlacementReady) { - continue - } - - // Get VSphereMachine which share the same Name of the member Name and get the MachineDeployment Name it belonged to. - vsmKey := types.NamespacedName{ - Name: member.Name, - Namespace: vmg.Namespace, - } - vsm := &vmwarev1.VSphereMachine{} - if err := kubeClient.Get(ctx, vsmKey, vsm); err != nil { - if apierrors.IsNotFound(err) { - log.Info(fmt.Sprintf("VSphereMachine %s/%s by member Name %s is not found, skipping it", member.Name, ns, member.Name)) - continue - } - return errors.Wrapf(err, "failed to get VSphereMachine %s/%s", member.Name, ns) - } - - mdName, found := vsm.Labels[clusterv1.MachineDeploymentNameLabel] - if !found { - log.Info(fmt.Sprintf("Failed to get MachineDeployment label from VSphereMachine %s/%s, skipping it", member.Name, ns)) - continue - } - - // If we already found placement for this MachineDeployment, continue and move to next member. - if _, found := annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName)]; found { - continue - } - - // Check if this VM belongs to any of our target MachineDeployments. - if !activeMDs.Has(mdName) { - log.V(5).Info("Skipping member as its MachineDeployment name is not in the known list.", - "VMName", member.Name, "MDName", mdName) - continue - } - - // Get the VM placement information by member status. - // VMs that have undergone placement do not have Placement info set, skip. - // VMs of Machine with failureDomain specified do not have Placement info set, skip. - if member.Placement == nil { - log.V(5).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Placement is nil", member.Name, vmg.Name, ns)) - continue - } - - // Skip to next member if Zone is empty. - zone := member.Placement.Zone - if zone == "" { - log.V(5).Info(fmt.Sprintf("VM %s in VMG %s/%s has no placement info. Zone is empty", member.Name, "VMG", ns)) - continue - } - - log.V(5).Info(fmt.Sprintf("VM %s in VMG %s/%s has been placed in zone %s", member.Name, ns, vmg.Name, zone)) - annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName)] = zone +func (r *VirtualMachineGroupReconciler) getMachineDeployments(ctx context.Context, cluster *clusterv1.Cluster) ([]clusterv1.MachineDeployment, error) { + machineDeployments := &clusterv1.MachineDeploymentList{} + if err := r.Client.List(ctx, machineDeployments, + client.InNamespace(cluster.Namespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}, + ); err != nil { + return nil, errors.Wrap(err, "failed to list MachineDeployments") } - - return nil + return machineDeployments.Items, nil } diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index a52ba083c2..db8fa67d68 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -17,58 +17,84 @@ limitations under the License. package vmware import ( - "context" "fmt" - "sort" - "strings" + "testing" - "time" . "github.com/onsi/gomega" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/record" "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" - "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/controller-runtime/pkg/reconcile" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" ) -const ( - clusterName = "test-cluster" - otherClusterName = "other-cluster" - clusterNamespace = "test-ns" - mdName1 = "md-worker-a" - mdName2 = "md-worker-b" - mdNameStale = "md-stale-c" - zoneA = "zone-a" - zoneB = "zone-b" - vmgName = "test-vmg" - vmgNamespace = "test-vmg-ns" - memberName1 = "vm-01" - memberName2 = "vm-02" - ownerMachineName1 = "m-01" - ownerMachineName2 = "m-02" - memberKind = "VirtualMachine" - failureDomainA = "zone-a" -) - -func TestIsCreateOrPatchAllowed(t *testing.T) { - ctx := context.Background() - - baseVMG := &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{Name: vmgName, Namespace: vmgNamespace}, - Status: vmoprv1.VirtualMachineGroupStatus{}, - Spec: vmoprv1.VirtualMachineGroupSpec{}, +func Test_shouldCreateVirtualMachineGroup(t *testing.T) { + tests := []struct { + name string + mds []clusterv1.MachineDeployment + vSphereMachines []vmwarev1.VSphereMachine + want bool + }{ + { + name: "Should create a VMG if all the expected VSphereMachines exists", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "", 2), + *createMD("md2", "test-cluster", "", 1), + *createMD("md3", "test-cluster", "zone1", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", "test-cluster", "md1", ""), + *createVSphereMachine("m2", "test-cluster", "md1", ""), + *createVSphereMachine("m3", "test-cluster", "md2", ""), + *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), + }, + want: true, // tot replicas = 4, 4 VSphereMachine exist + }, + { + name: "Should create a VMG if all the expected VSphereMachines exists, deleting MD should be ignored", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "", 2), + *createMD("md2", "test-cluster", "", 1, func(md *clusterv1.MachineDeployment) { + md.DeletionTimestamp = ptr.To(metav1.Now()) + }), // Should not be included in the count + *createMD("md3", "test-cluster", "zone1", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", "test-cluster", "md1", ""), + *createVSphereMachine("m2", "test-cluster", "md1", ""), + *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), + }, + want: true, // tot replicas = 3 (one md is deleting, so not included in the total), 3 VSphereMachine exist + }, + { + name: "Should not create a VMG if some of the expected VSphereMachines does not exist", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "", 2), + *createMD("md2", "test-cluster", "", 1), + *createMD("md3", "test-cluster", "zone1", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", "test-cluster", "md1", ""), + *createVSphereMachine("m3", "test-cluster", "md2", ""), + *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), + }, + want: false, // tot replicas = 4, 3 VSphereMachine exist + }, + { + name: "Should not create a VMG there are no expected VSphereMachines", + mds: []clusterv1.MachineDeployment{}, // No Machine deployments + vSphereMachines: []vmwarev1.VSphereMachine{}, // No VSphereMachine + want: false, + }, } +<<<<<<< HEAD member := func(name string) vmoprv1.GroupMember { return vmoprv1.GroupMember{Name: name} } @@ -84,60 +110,595 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { } makeCAPIMachineNoFailureDomain := func(name, namespace string) *clusterv1.Machine { return makeCAPIMachine(name, namespace, nil) +======= + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + got := shouldCreateVirtualMachineGroup(ctx, tt.mds, tt.vSphereMachines) + g.Expect(got).To(Equal(tt.want)) + }) +>>>>>>> 9409e432 (POC AAF) } +} - // VSphereMachine helpers - makeVSphereMachineOwned := func(vmName, vmgNamespace, ownerMachineName, mdName string) *vmwarev1.VSphereMachine { - return &vmwarev1.VSphereMachine{ - ObjectMeta: metav1.ObjectMeta{ - Name: vmName, - Namespace: vmgNamespace, - OwnerReferences: []metav1.OwnerReference{ - { - Kind: "Machine", - Name: ownerMachineName, - UID: types.UID(ownerMachineName + "-uid"), +func Test_getVirtualMachineNameToMachineDeploymentMapping(t *testing.T) { + tests := []struct { + name string + vSphereMachines []vmwarev1.VSphereMachine + want map[string]string + }{ + { + name: "mapping from VirtualMachineName to MachineDeployment is inferred from vSphereMachines", + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", "test-cluster", "md1", ""), + *createVSphereMachine("m2", "test-cluster", "md1", ""), + *createVSphereMachine("m3", "test-cluster", "md2", ""), + *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), + }, + want: map[string]string{ + // Note VirtualMachineName is equal to the VSphereMachine because when using the default + "m1": "md1", + "m2": "md1", + "m3": "md2", + "m4": "md3", + }, + }, + { + name: "mapping from VirtualMachineName to MachineDeployment is inferred from vSphereMachines", + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", "test-cluster", "md1", ""), + *createVSphereMachine("m2", "test-cluster", "md1", ""), + *createVSphereMachine("m3", "test-cluster", "md2", ""), + *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), + }, + want: map[string]string{ + // Note VirtualMachineName is equal to the VSphereMachine name because when using the default naming strategy + "m1": "md1", + "m2": "md1", + "m3": "md2", + "m4": "md3", + }, + }, + { + name: "mapping from VirtualMachineName to MachineDeployment is inferred from vSphereMachines (custom naming strategy)", + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", "test-cluster", "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", "test-cluster", "md1", "", withCustomNamingStrategy(), func(m *vmwarev1.VSphereMachine) { + m.DeletionTimestamp = ptr.To(metav1.Now()) + }), // Should not be included in the mapping + *createVSphereMachine("m3", "test-cluster", "md2", "", withCustomNamingStrategy()), + *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), + }, + want: map[string]string{ + "m1-vm": "md1", + // "m2-vm" not be included in the count + "m3-vm": "md2", + "m4": "md3", + }, + }, + { + name: "deleting vSphereMachines are not included in the mapping", + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", "test-cluster", "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", "test-cluster", "md1", "", withCustomNamingStrategy(), func(m *vmwarev1.VSphereMachine) { + m.DeletionTimestamp = ptr.To(metav1.Now()) + }), // Should not be included in the mapping + *createVSphereMachine("m3", "test-cluster", "md2", "", withCustomNamingStrategy()), + *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), + }, + want: map[string]string{ + "m1-vm": "md1", + // "m2-vm" not be included in the count + "m3-vm": "md2", + "m4": "md3", + }, + }, + { + name: "vSphereMachines without the MachineDeploymentNameLabel are not included in the mapping", + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", "test-cluster", "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", "test-cluster", "md1", "", withCustomNamingStrategy(), func(m *vmwarev1.VSphereMachine) { + delete(m.Labels, clusterv1.MachineDeploymentNameLabel) + }), // Should not be included in the mapping + *createVSphereMachine("m3", "test-cluster", "md2", "", withCustomNamingStrategy()), + *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), + }, + want: map[string]string{ + "m1-vm": "md1", + // "m2-vm" not be included in the count + "m3-vm": "md2", + "m4": "md3", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + got, err := getVirtualMachineNameToMachineDeploymentMapping(ctx, tt.vSphereMachines) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(got).To(Equal(tt.want)) + }) + } +} + +func Test_getMachineDeploymentToFailureDomainMapping(t *testing.T) { + tests := []struct { + name string + mds []clusterv1.MachineDeployment + existingVMG *vmoprv1.VirtualMachineGroup + virtualMachineNameToMachineDeployment map[string]string + want map[string]string + }{ + { + name: "MachineDeployment mapping should use spec.failure domain", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "zone1", 1), // failure domain explicitly set + }, + existingVMG: nil, + virtualMachineNameToMachineDeployment: nil, + want: map[string]string{ + "md1": "zone1", + }, + }, + { + name: "MachineDeployment mapping should use spec.failure domain (latest value must be used)", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "zone2", 1), // failure domain explicitly set + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone1", // Previously md1 was assigned to zone1 }, }, - Labels: map[string]string{clusterv1.MachineDeploymentNameLabel: mdName}, }, - } + virtualMachineNameToMachineDeployment: nil, + want: map[string]string{ + "md1": "zone2", // latest spec.failure must be used + }, + }, + { + name: "MachineDeployment mapping should use placement decision from VirtualMachineGroup annotations", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "", 1), // failure domain not explicitly set + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone1", // Placement decision for md1 already reported into annotation + }, + }, + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + { + Name: "m1-vm", + Placement: &vmoprv1.VirtualMachinePlacementStatus{ + Zone: "zone2", // Note: this should never happen (different placement decision than what is in the annotation), but using this value to validate that the mapping used is the one from the annotation. + }, + }, + }, + }, + }, + virtualMachineNameToMachineDeployment: map[string]string{ + "m1-vm": "md1", + }, + want: map[string]string{ + "md1": "zone1", + }, + }, + { + name: "MachineDeployment mapping should use placement decision from VirtualMachineGroup status", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "", 1), // failure domain not explicitly set + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + // Placement decision for md1 not yet reported into annotation + }, + }, + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + { + Name: "m1-vm", + Placement: &vmoprv1.VirtualMachinePlacementStatus{ + Zone: "zone1", + }, + Conditions: []metav1.Condition{ + { + Type: vmoprv1.VirtualMachineGroupMemberConditionPlacementReady, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + }, + }, + virtualMachineNameToMachineDeployment: map[string]string{ + "m1-vm": "md1", + }, + want: map[string]string{ + "md1": "zone1", + }, + }, + { + name: "MachineDeployment not yet placed (VirtualMachineGroup not yet created)", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "", 1), // failure domain not explicitly set + }, + existingVMG: nil, + virtualMachineNameToMachineDeployment: map[string]string{ + "m1-vm": "md1", + }, + want: map[string]string{ + // "md1" not yet placed + }, + }, + { + name: "MachineDeployment not yet placed (VirtualMachineGroup status not yet reporting placement for MachineDeployment's VirtualMachines)", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "", 1), // failure domain not explicitly set + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + // Placement decision for md1 not yet reported into annotation + }, + }, + // Status empty + }, + virtualMachineNameToMachineDeployment: nil, + want: map[string]string{}, // "md1" not yet placed + }, + { + name: "MachineDeployment not yet placed (VirtualMachineGroup status not yet reporting placement completed for MachineDeployment's VirtualMachines)", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", "test-cluster", "", 1), // failure domain not explicitly set + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + // Placement decision for md1 not yet reported into annotation + }, + }, + Status: vmoprv1.VirtualMachineGroupStatus{ + Members: []vmoprv1.VirtualMachineGroupMemberStatus{ + { + Name: "m1-vm", + Conditions: []metav1.Condition{ + { + Type: vmoprv1.VirtualMachineGroupMemberConditionPlacementReady, + Status: metav1.ConditionFalse, // placement not completed yet + }, + }, + }, + }, + }, + }, + virtualMachineNameToMachineDeployment: map[string]string{ + "m1-vm": "md1", + }, + want: map[string]string{ + // "md1" not yet placed + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + got := getMachineDeploymentToFailureDomainMapping(ctx, tt.mds, tt.existingVMG, tt.virtualMachineNameToMachineDeployment) + g.Expect(got).To(Equal(tt.want)) + }) } - makeVSphereMachineNoOwner := func(vmName, ns string) *vmwarev1.VSphereMachine { - return &vmwarev1.VSphereMachine{ - ObjectMeta: metav1.ObjectMeta{ - Name: vmName, - Namespace: ns, - OwnerReferences: []metav1.OwnerReference{}, +} + +func TestVirtualMachineGroupReconciler_computeVirtualMachineGroup(t *testing.T) { + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceDefault, + Name: "test-cluster", + }, + } + tests := []struct { + name string + mds []clusterv1.MachineDeployment + vSphereMachines []vmwarev1.VSphereMachine + existingVMG *vmoprv1.VirtualMachineGroup + want *vmoprv1.VirtualMachineGroup + }{ + // Compute new VirtualMachineGroup (start initial placement) + { + name: "compute new VirtualMachineGroup", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", cluster.Name, "", 2), + *createMD("md2", cluster.Name, "", 1), + *createMD("md3", cluster.Name, "zone1", 1), }, - } + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", cluster.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", cluster.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m3", cluster.Name, "md2", "", withCustomNamingStrategy()), + *createVSphereMachine("m4", cluster.Name, "md3", "zone1"), + }, + existingVMG: nil, + want: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: cluster.Name, + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3-vm", Kind: "VirtualMachine"}, + {Name: "m4", Kind: "VirtualMachine"}, + }, + }, + }, + }, + }, + }, + + // Compute updated VirtualMachineGroup (during initial placement) + { + name: "compute updated VirtualMachineGroup during initial placement", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", cluster.Name, "", 2), + *createMD("md3", cluster.Name, "zone1", 2), + *createMD("md4", cluster.Name, "zone2", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", cluster.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m5", cluster.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m4", cluster.Name, "md3", "zone1"), + *createVSphereMachine("m6", cluster.Name, "md3", "zone1"), + *createVSphereMachine("m7", cluster.Name, "md4", "zone2"), + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: cluster.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, // Deleted after VMG creation + {Name: "m3-vm", Kind: "VirtualMachine"}, // Deleted after VMG creation (the entire md2 was deleted). + {Name: "m4", Kind: "VirtualMachine"}, + // m5-vm (md1), m6 (md3), m7 (md4) created after VMG creation. + }, + }, + }, + }, + // Not setting status for sake of simplicity (also we are simulating when placing decision is not yet completed) + }, + want: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: cluster.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, // existing before, still existing + // "m2-vm" was deleted + // "m3-vm" was deleted + {Name: "m4", Kind: "VirtualMachine"}, // existing before, still existing + // "m5-vm" was added, but it should not be added yet because md1 is not yet placed + {Name: "m6", Kind: "VirtualMachine"}, // added, failureDomain for md3 is explicitly set by the user + {Name: "m7", Kind: "VirtualMachine"}, // added, failureDomain for md4 is explicitly set by the user + }, + }, + }, + }, + }, + }, + + // Compute updated VirtualMachineGroup (after initial placement) + { + name: "compute updated VirtualMachineGroup after initial placement", + mds: []clusterv1.MachineDeployment{ + *createMD("md1", cluster.Name, "", 2), + *createMD("md3", cluster.Name, "zone1", 2), + *createMD("md4", cluster.Name, "zone2", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", cluster.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m5", cluster.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m4", cluster.Name, "md3", "zone1"), + *createVSphereMachine("m6", cluster.Name, "md3", "zone1"), + *createVSphereMachine("m7", cluster.Name, "md4", "zone2"), + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: cluster.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone5", // failureDomain for md2 set by initial placement + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, // Deleted after VMG creation + {Name: "m3-vm", Kind: "VirtualMachine"}, // Deleted after VMG creation (the entire md2 was deleted). + {Name: "m4", Kind: "VirtualMachine"}, + // m5-vm (md1), m6 (md3), m7 (md4) created after VMG creation. + }, + }, + }, + }, + // Not setting status for sake of simplicity (in a real VMG, after the placement decision status should have members) + }, + want: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: cluster.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + // annotation for md2 deleted, md2 does not exist anymore + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, // existing before, still existing + // "m2-vm" was deleted + // "m3-vm" was deleted + {Name: "m4", Kind: "VirtualMachine"}, // existing before, still existing + {Name: "m5-vm", Kind: "VirtualMachine"}, // added, failureDomain for md1 set by initial placement + {Name: "m6", Kind: "VirtualMachine"}, // added, failureDomain for md3 is explicitly set by the user + {Name: "m7", Kind: "VirtualMachine"}, // added, failureDomain for md4 is explicitly set by the user + }, + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + got, err := computeVirtualMachineGroup(ctx, cluster, tt.mds, tt.vSphereMachines, tt.existingVMG) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(got).To(BeComparableTo(tt.want)) + }) + } +} + +func TestVirtualMachineGroupReconciler_ReconcileSequence(t *testing.T) { + clusterNotYetInitialized := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceDefault, + Name: "test-cluster", + }, + } + clusterInitialized := clusterNotYetInitialized.DeepCopy() + clusterInitialized.Status.Conditions = []metav1.Condition{ + { + Type: clusterv1.ClusterControlPlaneInitializedCondition, + Status: metav1.ConditionTrue, + }, } tests := []struct { name string +<<<<<<< HEAD targetMember []vmoprv1.GroupMember vmgInput *vmoprv1.VirtualMachineGroup existingObjects []runtime.Object wantErr bool expectedErrMsg string +======= + cluster *clusterv1.Cluster + mds []clusterv1.MachineDeployment + vSphereMachines []vmwarev1.VSphereMachine + existingVMG *vmoprv1.VirtualMachineGroup + wantResult ctrl.Result + wantVMG *vmoprv1.VirtualMachineGroup +>>>>>>> 9409e432 (POC AAF) }{ + // Before initial placement { +<<<<<<< HEAD name: "Allow Create if VirtualMachineGroup not existed", targetMember: []vmoprv1.GroupMember{member(memberName1)}, vmgInput: baseVMG.DeepCopy(), existingObjects: nil, wantErr: false, expectedErrMsg: "", +======= + name: "VirtualMachineGroup should not be created when the cluster is not yet initialized", + cluster: clusterNotYetInitialized, + mds: nil, + vSphereMachines: nil, + existingVMG: nil, + wantResult: ctrl.Result{}, + wantVMG: nil, +>>>>>>> 9409e432 (POC AAF) }, { - name: "Allow Patch if it is removing members", - targetMember: []vmoprv1.GroupMember{}, - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ - {Members: []vmoprv1.GroupMember{ + name: "VirtualMachineGroup should not be created when waiting for vSphereMachines to exist", + cluster: clusterNotYetInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterNotYetInitialized.Name, "", 1), + *createMD("md2", clusterNotYetInitialized.Name, "zone1", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterNotYetInitialized.Name, "md1", "", withCustomNamingStrategy()), + }, + existingVMG: nil, + wantResult: ctrl.Result{}, + wantVMG: nil, + }, + { + name: "VirtualMachineGroup should not be created when waiting for vSphereMachines to exist (adapt to changes)", + cluster: clusterInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterInitialized.Name, "", 2), // Scaled up one additional machine is still missing + *createMD("md2", clusterInitialized.Name, "zone1", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m3", clusterInitialized.Name, "md2", "zone1"), + }, + existingVMG: nil, + wantResult: ctrl.Result{}, + wantVMG: nil, + }, + { + name: "VirtualMachineGroup should be created when all the vSphereMachines exist", + cluster: clusterInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterInitialized.Name, "", 2), + *createMD("md2", clusterInitialized.Name, "zone1", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m3", clusterInitialized.Name, "md2", "zone1"), + }, + existingVMG: nil, + wantResult: ctrl.Result{}, + wantVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ { +<<<<<<< HEAD Name: memberName1, Kind: memberKind, }}}} @@ -163,8 +724,22 @@ func TestIsCreateOrPatchAllowed(t *testing.T) { }(), wantErr: false, expectedErrMsg: "", +======= + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + }, + }, + }, + }, + }, +>>>>>>> 9409e432 (POC AAF) }, + + // During initial placement { +<<<<<<< HEAD name: "Block Patch to add new member if VirtualMachineGroup is not Placement Ready", targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, vmgInput: baseVMG.DeepCopy(), @@ -813,6 +1388,61 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { vsm2.DeepCopy(), machine2.DeepCopy(), vmgReady.DeepCopy(), +======= + name: "No op if nothing changes during initial placement", + cluster: clusterInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterInitialized.Name, "", 2), + *createMD("md2", clusterInitialized.Name, "zone1", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m3", clusterInitialized.Name, "md2", "zone1"), + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + }, + }, + }, + }, + }, + wantResult: ctrl.Result{}, + wantVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + }, + }, + }, + }, +>>>>>>> 9409e432 (POC AAF) }, expectedResult: reconcile.Result{}, expectVMGExists: true, @@ -928,6 +1558,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { expectVMGExists: true, expectedMembersCount: 2, // Scale-up should succeed due to FailureDomain bypass }, +<<<<<<< HEAD } for _, tt := range tests { @@ -984,27 +1615,387 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: name, Namespace: namespace, Labels: map[string]string{clusterv1.ClusterNameLabel: name}, +======= + { + name: "Only new VSphereMachines with an explicit placement are added during initial placement", + cluster: clusterInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterInitialized.Name, "", 3), // scaled up + *createMD("md2", clusterInitialized.Name, "zone1", 2), // scaled up + *createMD("md3", clusterInitialized.Name, "", 1), // new + *createMD("md4", clusterInitialized.Name, "zone2", 1), // new + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m4", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), // new + *createVSphereMachine("m3", clusterInitialized.Name, "md2", "zone1"), + *createVSphereMachine("m5", clusterInitialized.Name, "md2", "zone1"), // new + *createVSphereMachine("m6", clusterInitialized.Name, "md3", "", withCustomNamingStrategy()), // new + *createVSphereMachine("m7", clusterInitialized.Name, "md4", "zone3"), // new + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + }, + }, + }, + }, + }, + wantResult: ctrl.Result{}, + wantVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + // "m4-vm" not added, placement decision for md1 not yet completed + {Name: "m3", Kind: "VirtualMachine"}, + {Name: "m5", Kind: "VirtualMachine"}, // added, failureDomain for md2 is explicitly set by the user + // "m6-vm" not added, placement decision for md3 not yet completed + {Name: "m7", Kind: "VirtualMachine"}, // added, failureDomain for md4 is explicitly set by the user + }, + }, + }, + }, + }, +>>>>>>> 9409e432 (POC AAF) + }, + { + name: "VSphereMachines are removed during initial placement", + cluster: clusterInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterInitialized.Name, "", 3), // scaled down + *createMD("md2", clusterInitialized.Name, "zone1", 2), // scaled down + // md3 deleted + // md4 deleted + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + // m4 deleted + *createVSphereMachine("m3", clusterInitialized.Name, "md2", "zone1"), + // m5 deleted + // m6 deleted + // m7 deleted + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + // "m4-vm" not added, placement decision for md1 not yet completed + {Name: "m3", Kind: "VirtualMachine"}, + {Name: "m5", Kind: "VirtualMachine"}, // added, failureDomain for md2 is explicitly set by the user + // "m6-vm" not added, placement decision for md3 not yet completed + {Name: "m7", Kind: "VirtualMachine"}, // added, failureDomain for md4 is explicitly set by the user + }, + }, + }, + }, + }, + wantResult: ctrl.Result{}, + wantVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + // md4 deleted + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + // "m4-vm" deleted (it was never added) + {Name: "m3", Kind: "VirtualMachine"}, + // "m5" deleted + // "m6" deleted + // "m7" deleted + }, + }, + }, + }, + }, + }, + + // After initial placement + { + name: "No op if nothing changes after initial placement", + cluster: clusterInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterInitialized.Name, "", 2), + *createMD("md2", clusterInitialized.Name, "zone1", 1), + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m3", clusterInitialized.Name, "md2", "zone1"), + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + }, + }, + }, + }, + // Not setting status for sake of simplicity (in a real VMG, after the placement decision status should have members) + }, + wantResult: ctrl.Result{}, + wantVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + }, + }, + }, + }, + }, + }, + { + name: "New VSphereMachines are added after initial placement", + cluster: clusterInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterInitialized.Name, "", 3), // scaled up + *createMD("md2", clusterInitialized.Name, "zone1", 2), // scaled up + *createMD("md3", clusterInitialized.Name, "zone2", 1), // new + // Adding a new MD without explicit placement is not supported at this stage + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m4", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), // new + *createVSphereMachine("m3", clusterInitialized.Name, "md2", "zone1"), + *createVSphereMachine("m5", clusterInitialized.Name, "md2", "zone1"), // new + *createVSphereMachine("m6", clusterInitialized.Name, "md3", "zone2"), // new + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + }, + }, + }, + }, + // Not setting status for sake of simplicity (in a real VMG, after the placement decision status should have members) + }, + wantResult: ctrl.Result{}, + wantVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone2", // failureDomain for md3 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + {Name: "m4-vm", Kind: "VirtualMachine"}, // added, failureDomain for md1 set by initial placement + {Name: "m5", Kind: "VirtualMachine"}, // added, failureDomain for md2 is explicitly set by the user + {Name: "m6", Kind: "VirtualMachine"}, // added, failureDomain for md3 is explicitly set by the user + }, + }, + }, + }, + }, }, - Spec: clusterv1.ClusterSpec{ - Topology: clusterv1.Topology{ - Workers: clusterv1.WorkersTopology{ - MachineDeployments: []clusterv1.MachineDeploymentTopology{ - {Name: mdName1, Replicas: &replicasMD1}, - {Name: mdName2, Replicas: &replicasMD2}, + { + name: "VSphereMachines are removed during initial placement", + cluster: clusterInitialized, + mds: []clusterv1.MachineDeployment{ + *createMD("md1", clusterInitialized.Name, "", 3), // scaled down + *createMD("md2", clusterInitialized.Name, "zone1", 2), // scaled down + // md3 deleted + }, + vSphereMachines: []vmwarev1.VSphereMachine{ + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m2", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), + // m4 deleted + *createVSphereMachine("m3", clusterInitialized.Name, "md2", "zone1"), + // m5 deleted + // m5 deleted + }, + existingVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone2", // failureDomain for md3 is explicitly set by the user + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + {Name: "m4-vm", Kind: "VirtualMachine"}, // added, failureDomain for md1 set by initial placement + {Name: "m5", Kind: "VirtualMachine"}, // added, failureDomain for md2 is explicitly set by the user + {Name: "m6", Kind: "VirtualMachine"}, // added, failureDomain for md3 is explicitly set by the user + }, + }, + }, + }, + // Not setting status for sake of simplicity (in a real VMG, after the placement decision status should have members) + }, + wantResult: ctrl.Result{}, + wantVMG: &vmoprv1.VirtualMachineGroup{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: clusterInitialized.Namespace, + Name: clusterInitialized.Name, + UID: types.UID("uid"), + Annotations: map[string]string{ + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + // md3 deleted + }, + }, + Spec: vmoprv1.VirtualMachineGroupSpec{ + BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ + { + Members: []vmoprv1.GroupMember{ + {Name: "m1-vm", Kind: "VirtualMachine"}, + {Name: "m2-vm", Kind: "VirtualMachine"}, + {Name: "m3", Kind: "VirtualMachine"}, + // m4-vm deleted + // m5 deleted + // m6 deleted + }, + }, }, }, }, }, } - if initialized { - conditions.Set(cluster, metav1.Condition{ - Type: clusterv1.ClusterControlPlaneInitializedCondition, - Status: metav1.ConditionTrue, + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + objects := []client.Object{tt.cluster} + if tt.existingVMG != nil { + objects = append(objects, tt.existingVMG) + } + for _, md := range tt.mds { + objects = append(objects, &md) + } + for _, vSphereMachine := range tt.vSphereMachines { + objects = append(objects, &vSphereMachine) + } + + c := fake.NewClientBuilder().WithObjects(objects...).Build() + r := &VirtualMachineGroupReconciler{ + Client: c, + } + got, err := r.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: tt.cluster.Namespace, Name: tt.cluster.Name}}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(got).To(Equal(tt.wantResult)) + + vmg := &vmoprv1.VirtualMachineGroup{} + err = r.Client.Get(ctx, client.ObjectKeyFromObject(tt.cluster), vmg) + + if tt.wantVMG == nil { + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + return + } + + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(vmg.Annotations).To(Equal(tt.wantVMG.Annotations)) + g.Expect(vmg.Spec.BootOrder).To(Equal(tt.wantVMG.Spec.BootOrder)) }) } - return cluster } +<<<<<<< HEAD // Helper function to create a VSphereMachine (worker, owned by a CAPI Machine). func newVSphereMachine(name, mdName string, isCP, deleted bool, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) *vmwarev1.VSphereMachine { vsm := &vmwarev1.VSphereMachine{ @@ -1038,57 +2029,41 @@ func newVSphereMachine(name, mdName string, isCP, deleted bool, namingStrategy * return vsm } +======= +type machineDeploymentOption func(md *clusterv1.MachineDeployment) +>>>>>>> 9409e432 (POC AAF) -// Helper function to create a VMG member status with placement info. -func newVMGMemberStatus(name, kind string, isPlacementReady, placement bool, zone string) vmoprv1.VirtualMachineGroupMemberStatus { - memberStatus := vmoprv1.VirtualMachineGroupMemberStatus{ - Name: name, - Kind: kind, - } - - if isPlacementReady { - conditions.Set(&memberStatus, metav1.Condition{ - Type: vmoprv1.VirtualMachineGroupMemberConditionPlacementReady, - Status: metav1.ConditionTrue, - }) - } - - if placement { - memberStatus.Placement = &vmoprv1.VirtualMachinePlacementStatus{Zone: zone} - } - - return memberStatus -} - -// Helper function to create a MachineDeployment object. -func newMachineDeployment(name, clusterName, clusterNS string, isReplicaSet bool, replicas *int32) *clusterv1.MachineDeployment { +func createMD(name, cluster, failureDomain string, replicas int32, options ...machineDeploymentOption) *clusterv1.MachineDeployment { md := &clusterv1.MachineDeployment{ ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceDefault, Name: name, - Namespace: clusterNS, - Labels: map[string]string{clusterv1.ClusterNameLabel: clusterName}, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster, + }, + }, + Spec: clusterv1.MachineDeploymentSpec{ + Template: clusterv1.MachineTemplateSpec{Spec: clusterv1.MachineSpec{FailureDomain: failureDomain}}, + Replicas: &replicas, }, } - - if isReplicaSet { - md.Spec = clusterv1.MachineDeploymentSpec{ - Replicas: replicas, - } + for _, opt := range options { + opt(md) } - return md } -// Helper function to create a basic Cluster object used as input. -func newTestCluster(name, namespace string) *clusterv1.Cluster { - return &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, +type vSphereMachineOption func(m *vmwarev1.VSphereMachine) + +func withCustomNamingStrategy() func(m *vmwarev1.VSphereMachine) { + return func(m *vmwarev1.VSphereMachine) { + m.Spec.NamingStrategy = &vmwarev1.VirtualMachineNamingStrategy{ + Template: ptr.To[string]("{{ .machine.name }}-vm"), + } } } +<<<<<<< HEAD // Helper to create a new CAPI Machine. func newMachine(name, mdName, fd string) *clusterv1.Machine { m := &clusterv1.Machine{ @@ -1145,3 +2120,24 @@ func newVMG(name, ns string, members []vmoprv1.GroupMember, ready bool, annotati } return v } +======= +func createVSphereMachine(name, cluster, md, failureDomain string, options ...vSphereMachineOption) *vmwarev1.VSphereMachine { + m := &vmwarev1.VSphereMachine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceDefault, + Name: name, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster, + clusterv1.MachineDeploymentNameLabel: md, + }, + }, + Spec: vmwarev1.VSphereMachineSpec{ + FailureDomain: &failureDomain, + }, + } + for _, opt := range options { + opt(m) + } + return m +} +>>>>>>> 9409e432 (POC AAF) diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 460ad50fdf..ad4836aca3 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -949,6 +949,11 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels vmLabels[k] = v } + // FIXME: + // if the zone label is set, it should be immutable + // failure domain from machine should be used if set to set the zone label + // if failure domain from machine is not set, use the failure domain from VMG annotation + // Set the labels that determine the VM's placement. // Note: if the failureDomain is not set, auto placement will happen according to affinity rules on VM during initial Cluster creation. // For VM created during day-2 operation like scaling up, we should expect the failureDomain to be always set. From 822f8f0e64f67d18a3f9fe846ca07f59eeecc97b Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Wed, 26 Nov 2025 10:43:04 +0100 Subject: [PATCH 23/25] Review comments / fixes --- .../vmware/virtualmachinegroup_reconciler.go | 65 +++++++++---------- pkg/services/vmoperator/vmopmachine.go | 2 + 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index 0977044011..b56ea958e5 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -42,11 +42,6 @@ import ( "sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/vmoperator" ) -const ( - // ZoneAnnotationPrefix is the prefix used for placement decision annotations which will be set on VirtualMachineGroup. - ZoneAnnotationPrefix = "zone.vmware.infrastructure.cluster.x-k8s.io" -) - // VirtualMachineGroupReconciler reconciles VirtualMachineGroup. type VirtualMachineGroupReconciler struct { Client client.Client @@ -91,7 +86,7 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. } // Note: VirtualMachineGroup is going to have same name and namespace of the cluster. - // Using cluster here, because VirtualMachineGroup is created only after initial placement completes. + // Using cluster here, because VirtualMachineGroup is created only once we are ready. log = log.WithValues("VirtualMachineGroup", klog.KObj(cluster)) ctx = ctrl.LoggerInto(ctx, log) @@ -100,7 +95,7 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, nil } - // If ControlPlane haven't initialized, requeue it since CAPV will only start to reconcile VSphereMachines of + // If ControlPlane haven't initialized, return since CAPV will only start to reconcile VSphereMachines of // MachineDeployment after ControlPlane is initialized. if !conditions.IsTrue(cluster, clusterv1.ClusterControlPlaneInitializedCondition) { return reconcile.Result{}, nil @@ -109,7 +104,6 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. return r.reconcileNormal(ctx, cluster) } -// createOrUpdateVirtualMachineGroup Create or Update VirtualMachineGroup. func (r *VirtualMachineGroupReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { // Get all the data required for computing the desired VMG. currentVMG, err := r.getVirtualMachineGroup(ctx, cluster) @@ -130,8 +124,8 @@ func (r *VirtualMachineGroupReconciler) reconcileNormal(ctx context.Context, clu // VirtualMachineGroup creation starts the initial placement process that should take care // of spreading VSphereMachines across failure domains in an ideal way / according to user intent. // The initial placement can be performed only when all the VSphereMachines to be considered for the - // placement decision exists; if this condition is not met, return (watches will trigger new - // reconcile whenever new VSphereMachines are created). + // placement decision exist. If this condition is not met, return (watches will trigger new + // reconciles whenever new VSphereMachines are created). // Note: In case there are no MachineDeployments, or all the MachineDeployments have zero replicas, // no placement decision is required, and thus no VirtualMachineGroup will be created. if !shouldCreateVirtualMachineGroup(ctx, machineDeployments, vSphereMachines) { @@ -145,19 +139,19 @@ func (r *VirtualMachineGroupReconciler) reconcileNormal(ctx context.Context, clu return reconcile.Result{}, err } - // FIXME: Log. Details? + // FIXME: Log. Details? (add k/v pair for first 50 VM names + ... if necessary) if err := r.Client.Create(ctx, newVMG); err != nil { return reconcile.Result{}, errors.Wrapf(err, "failed to create new VMG") } return reconcile.Result{}, nil } - // If the VirtualMachineGroup exist, either the placement decision is being performed, or - // the placement decision has been already completed. In both cases, the VirtualMachineGroup - // must be keep up to date with the changes that happens to MachineDeployments and vSphereMachines. + // If the VirtualMachineGroup exists, either the initial placement is in progress or + // the initial placement has been already completed. In both cases, the VirtualMachineGroup + // must be kept up to date with the changes that happen to MachineDeployments and vSphereMachines. // - // However, while the initial placement decision is being performed, the addition of new - // vSphereMachines to the VirtualMachineGroup must be deferred to prevent race conditions. + // However, while the initial placement is in progress, the addition of new + // VSphereMachines to the VirtualMachineGroup must be deferred to prevent race conditions. // // After initial placement, new vSphereMachines will be added to the VirtualMachineGroup for // sake of consistency, but those machines will be placed in the same failureDomain @@ -170,7 +164,7 @@ func (r *VirtualMachineGroupReconciler) reconcileNormal(ctx context.Context, clu return reconcile.Result{}, err } - // FIXME: Log. Diff? Details? + // FIXME: Log. Diff? Details? delta VM names if err := r.Client.Patch(ctx, updatedVMG, client.MergeFromWithOptions(currentVMG, client.MergeFromWithOptimisticLock{})); err != nil { return reconcile.Result{}, errors.Wrapf(err, "failed to patch VMG") } @@ -185,7 +179,7 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, Name: cluster.Name, Namespace: cluster.Namespace, Annotations: map[string]string{}, - }, + }, // FIXME: looks like we lost the ownerRef and the ClusterNameLabel } // If there is an VirtualMachineGroup, clone it into the desired VirtualMachineGroup @@ -194,7 +188,7 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, vmg = existingVMG.DeepCopy() vmg.Annotations = make(map[string]string) for key, value := range existingVMG.Annotations { - if !strings.HasPrefix(key, ZoneAnnotationPrefix+"/") { + if !strings.HasPrefix(key, vmoperator.ZoneAnnotationPrefix+"/") { vmg.Annotations[key] = value } } @@ -203,14 +197,14 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, // Compute the info required to compute the VirtualMachineGroup. - // Get the mapping between the virtualMachine name that will be generated by a vSphereMachine + // Get the mapping between the VirtualMachine name that will be generated from a VSphereMachine // and the MachineDeployment that controls the vSphereMachine. virtualMachineNameToMachineDeployment, err := getVirtualMachineNameToMachineDeploymentMapping(ctx, vSphereMachines) if err != nil { return nil, err } - // Sort virtualMachine names to ensure VirtualMachineGroup is generated in a consistent way across reconcile. + // Sort VirtualMachine names to ensure VirtualMachineGroup is generated in a consistent way across reconciles. sortedVirtualMachineNames := slices.Sorted(maps.Keys(virtualMachineNameToMachineDeployment)) // Get the mapping between the MachineDeployment and failure domain, which is one of: @@ -223,7 +217,7 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, // placement decision for each MachineDeployment. // Note: when a MachineDeployment will be deleted, the corresponding annotation will be removed (not added anymore by this func). for md, failureDomain := range machineDeploymentToFailureDomain { - vmg.Annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, md)] = failureDomain + vmg.Annotations[fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, md)] = failureDomain } // Compute the list of Members for the VirtualMachineGroup. @@ -240,13 +234,13 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, return vmg, nil } - // If the VirtualMachineGroup exists, keep this list of VirtualMachine up to date. - // Note: while the initial placement decision is being performed, the addition of new - // VirtualMachine to the VirtualMachineGroup must be deferred to prevent race conditions. + // If the VirtualMachineGroup exists, keep the list of VirtualMachines up to date. + // Note: while the initial placement is in progress, the addition of new + // VirtualMachines to the VirtualMachineGroup must be deferred to prevent race conditions. // // After initial placement, new VirtualMachine will be added to the VirtualMachineGroup for - // sake of consistency, but those machines will be placed in the same failureDomain - // already used for the other VirtualMachine in the same MachineDeployment (new VirtualMachine + // sake of consistency, but those Machines will be placed in the same failureDomain + // already used for the other VirtualMachines in the same MachineDeployment (new VirtualMachine // will align to the initial placement decision). existingVirtualMachineNames := sets.New[string]() @@ -317,16 +311,17 @@ func getMachineDeploymentToFailureDomainMapping(_ context.Context, mds []cluster // If the VirtualMachineGroup exist, check if the placement decision for the MachineDeployment // has been already surfaced into the VirtualMachineGroup annotations. - if failureDomain := existingVMG.Annotations[fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, md.Name)]; failureDomain != "" { + if failureDomain := existingVMG.Annotations[fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, md.Name)]; failureDomain != "" { machineDeploymentToFailureDomainMapping[md.Name] = failureDomain continue } - // If the placement decision for the MachineDeployment, try to get the failure domain selected - // during the initial placement decision from VirtualMachineGroup status (placement decision just completed). + // If the placement decision for the MachineDeployment is not yet surfaced in the annotation, try to get + // the failure domain selected during the initial placement decision from VirtualMachineGroup status + // (placement decision just completed). // Note: this info will surface in VirtualMachineGroup annotations at the end of the current reconcile. for _, member := range existingVMG.Status.Members { - // Ignore members controller by other MachineDeployments + // Ignore members controlled by other MachineDeployments if memberMD := virtualMachineNameToMachineDeployment[member.Name]; memberMD != md.Name { continue } @@ -348,8 +343,8 @@ func getMachineDeploymentToFailureDomainMapping(_ context.Context, mds []cluster } // getVirtualMachineNameToMachineDeploymentMapping returns the mapping between VirtualMachine name and corresponding MachineDeployment. -// The mapping is inferred from vSphereMachines; please note: -// - The name of the VirtualMachine generated by a vSphereMachines can be computed in a deterministic way (it is not required to wait for the VirtualMachine to exist) +// The mapping is inferred from vSphereMachines. Please note: +// - The name of the VirtualMachine generated by a VSphereMachine can be computed in a deterministic way (it is not required to wait for the VirtualMachine to exist) // - The name of the MachineDeployment corresponding to a vSphereMachine can be derived from the annotation that is propagated by CAPI. func getVirtualMachineNameToMachineDeploymentMapping(_ context.Context, vSphereMachines []vmwarev1.VSphereMachine) (map[string]string, error) { virtualMachineNameToMachineDeployment := map[string]string{} @@ -388,6 +383,10 @@ func shouldCreateVirtualMachineGroup(ctx context.Context, mds []clusterv1.Machin return false } + // filter down VSphereMachines to the non-deleting MDs + + // => if any of these VSphereMachines deleting => return false + // If the number of workers VSphereMachines matches the number of expected replicas in the MachineDeployments, // then all the VSphereMachines required for the initial placement decision do exist, then it is possible to create // the VirtualMachineGroup. diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index ad4836aca3..66f42090ae 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -233,6 +233,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, + // FIXME: we should provide more details about this case in the message (vs other cases where we set this reason) }) log.V(4).Info(fmt.Sprintf("Waiting for VirtualMachineGroup %s, requeueing", key.Name), "VirtualMachineGroup", klog.KRef(key.Namespace, key.Name)) return true, nil @@ -245,6 +246,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, + // FIXME: we should provide more details about this case in the message (vs other cases where we set this reason) }) log.V(4).Info(fmt.Sprintf("Waiting for VirtualMachineGroup %s membership, requeueing", key.Name), "VirtualMachineGroup", klog.KRef(key.Namespace, key.Name)) return true, nil From db8095d7a88d7c2e0644e852c583dbcc5df22c41 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Wed, 26 Nov 2025 14:21:19 +0100 Subject: [PATCH 24/25] Final cleanup --- .../vmware/virtualmachinegroup_reconciler.go | 90 ++++++++--- .../virtualmachinegroup_reconciler_test.go | 152 ++++++++++++++---- pkg/services/vmoperator/vmopmachine.go | 21 +-- 3 files changed, 200 insertions(+), 63 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler.go b/controllers/vmware/virtualmachinegroup_reconciler.go index b56ea958e5..619ddbc4ad 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler.go +++ b/controllers/vmware/virtualmachinegroup_reconciler.go @@ -22,6 +22,7 @@ import ( "fmt" "maps" "slices" + "sort" "strings" "github.com/pkg/errors" @@ -33,6 +34,7 @@ import ( "k8s.io/klog/v2" "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -105,6 +107,8 @@ func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl. } func (r *VirtualMachineGroupReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { + log := ctrl.LoggerFrom(ctx) + // Get all the data required for computing the desired VMG. currentVMG, err := r.getVirtualMachineGroup(ctx, cluster) if err != nil { @@ -139,7 +143,7 @@ func (r *VirtualMachineGroupReconciler) reconcileNormal(ctx context.Context, clu return reconcile.Result{}, err } - // FIXME: Log. Details? (add k/v pair for first 50 VM names + ... if necessary) + log.Info("Creating VirtualMachineGroup", "members", nameList(memberNames(newVMG))) if err := r.Client.Create(ctx, newVMG); err != nil { return reconcile.Result{}, errors.Wrapf(err, "failed to create new VMG") } @@ -164,7 +168,14 @@ func (r *VirtualMachineGroupReconciler) reconcileNormal(ctx context.Context, clu return reconcile.Result{}, err } - // FIXME: Log. Diff? Details? delta VM names + existingVirtualMachineNames := sets.New[string](memberNames(currentVMG)...) + updatedVirtualMachineNames := sets.New[string](memberNames(updatedVMG)...) + + addedVirtualMachineNames := updatedVirtualMachineNames.Difference(existingVirtualMachineNames) + deletedVirtualMachineNames := existingVirtualMachineNames.Difference(updatedVirtualMachineNames) + if addedVirtualMachineNames.Len() > 0 || deletedVirtualMachineNames.Len() > 0 { + log.Info("Updating VirtualMachineGroup", "addedMembers", nameList(addedVirtualMachineNames.UnsortedList()), "deletedMembers", nameList(deletedVirtualMachineNames.UnsortedList())) + } if err := r.Client.Patch(ctx, updatedVMG, client.MergeFromWithOptions(currentVMG, client.MergeFromWithOptimisticLock{})); err != nil { return reconcile.Result{}, errors.Wrapf(err, "failed to patch VMG") } @@ -179,7 +190,7 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, Name: cluster.Name, Namespace: cluster.Namespace, Annotations: map[string]string{}, - }, // FIXME: looks like we lost the ownerRef and the ClusterNameLabel + }, } // If there is an VirtualMachineGroup, clone it into the desired VirtualMachineGroup @@ -195,6 +206,19 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, } vmg.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{}} + // Add cluster label and ownerReference to the cluster. + if vmg.Labels == nil { + vmg.Labels = map[string]string{} + } + vmg.Labels[clusterv1.ClusterNameLabel] = cluster.Name + vmg.OwnerReferences = util.EnsureOwnerRef(vmg.OwnerReferences, metav1.OwnerReference{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: cluster.Name, + UID: cluster.UID, + Controller: ptr.To(true), + }) + // Compute the info required to compute the VirtualMachineGroup. // Get the mapping between the VirtualMachine name that will be generated from a VSphereMachine @@ -242,13 +266,7 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, // sake of consistency, but those Machines will be placed in the same failureDomain // already used for the other VirtualMachines in the same MachineDeployment (new VirtualMachine // will align to the initial placement decision). - - existingVirtualMachineNames := sets.New[string]() - if len(existingVMG.Spec.BootOrder) > 0 { - for _, member := range existingVMG.Spec.BootOrder[0].Members { - existingVirtualMachineNames.Insert(member.Name) - } - } + existingVirtualMachineNames := sets.New[string](memberNames(existingVMG)...) for _, virtualMachineName := range sortedVirtualMachineNames { // If a VirtualMachine is already part of the VirtualMachineGroup, keep it in the VirtualMachineGroup @@ -290,7 +308,9 @@ func computeVirtualMachineGroup(ctx context.Context, cluster *clusterv1.Cluster, // // Note: In case the failure domain is explicitly assigned by setting spec.template.spec.failureDomain, the mapping always // report the latest value for this field (even if there might still be Machines yet to be rolled out to the new failure domain). -func getMachineDeploymentToFailureDomainMapping(_ context.Context, mds []clusterv1.MachineDeployment, existingVMG *vmoprv1.VirtualMachineGroup, virtualMachineNameToMachineDeployment map[string]string) map[string]string { +func getMachineDeploymentToFailureDomainMapping(ctx context.Context, mds []clusterv1.MachineDeployment, existingVMG *vmoprv1.VirtualMachineGroup, virtualMachineNameToMachineDeployment map[string]string) map[string]string { + log := ctrl.LoggerFrom(ctx) + machineDeploymentToFailureDomainMapping := map[string]string{} for _, md := range mds { if !md.DeletionTimestamp.IsZero() { @@ -333,7 +353,7 @@ func getMachineDeploymentToFailureDomainMapping(_ context.Context, mds []cluster continue } if member.Placement != nil && member.Placement.Zone != "" { - // FIXME: log + log.Info(fmt.Sprintf("MachineDeployment %s has been placed to failure domanin %s", md.Name, member.Placement.Zone), "MachineDeployment", klog.KObj(&md)) machineDeploymentToFailureDomainMapping[md.Name] = member.Placement.Zone break } @@ -371,10 +391,13 @@ func shouldCreateVirtualMachineGroup(ctx context.Context, mds []clusterv1.Machin // Gets the total number or worker machines that should exist in the cluster at a given time. // Note. Deleting MachineDeployment are ignored. var expectedVSphereMachineCount int32 + mdNames := sets.Set[string]{} for _, md := range mds { - if md.DeletionTimestamp.IsZero() { - expectedVSphereMachineCount += ptr.Deref(md.Spec.Replicas, 0) + if !md.DeletionTimestamp.IsZero() { + continue } + expectedVSphereMachineCount += ptr.Deref(md.Spec.Replicas, 0) + mdNames.Insert(md.Name) } // In case there are no MachineDeployments or all the MachineDeployments have zero replicas, there is @@ -383,15 +406,26 @@ func shouldCreateVirtualMachineGroup(ctx context.Context, mds []clusterv1.Machin return false } - // filter down VSphereMachines to the non-deleting MDs + // Filter down VSphereMachines to the ones belonging to the MachineDeployment considered above. + // Note: if at least one of those VSphereMachines is deleting, wait for the deletion to complete. + currentVSphereMachineCount := int32(0) + for _, vSphereMachine := range vSphereMachines { + md := vSphereMachine.Labels[clusterv1.MachineDeploymentNameLabel] + if !mdNames.Has(md) { + continue + } + + if !vSphereMachine.DeletionTimestamp.IsZero() { + log.Info("Waiting for VSphereMachines required for the initial placement to be deleted") + return false + } - // => if any of these VSphereMachines deleting => return false + currentVSphereMachineCount++ + } // If the number of workers VSphereMachines matches the number of expected replicas in the MachineDeployments, // then all the VSphereMachines required for the initial placement decision do exist, then it is possible to create // the VirtualMachineGroup. - // FIXME: we should probably include in the count only machines for MD included above (otherwise machines from deleting MS might lead to false positives / negatives - currentVSphereMachineCount := int32(len(vSphereMachines)) if currentVSphereMachineCount != expectedVSphereMachineCount { log.Info(fmt.Sprintf("Waiting for VSphereMachines required for the initial placement (expected %d, current %d)", expectedVSphereMachineCount, currentVSphereMachineCount)) return false @@ -432,3 +466,23 @@ func (r *VirtualMachineGroupReconciler) getMachineDeployments(ctx context.Contex } return machineDeployments.Items, nil } + +func memberNames(vmg *vmoprv1.VirtualMachineGroup) []string { + names := []string{} + if len(vmg.Spec.BootOrder) > 0 { + for _, member := range vmg.Spec.BootOrder[0].Members { + names = append(names, member.Name) + } + } + return names +} + +func nameList(names []string) string { + sort.Strings(names) + switch { + case len(names) <= 20: + return strings.Join(names, ", ") + default: + return fmt.Sprintf("%s, ... (%d more)", strings.Join(names[:20], ", "), len(names)-20) + } +} diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index db8fa67d68..71ca6fa674 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -33,6 +33,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/vmoperator" ) func Test_shouldCreateVirtualMachineGroup(t *testing.T) { @@ -248,7 +249,7 @@ func Test_getMachineDeploymentToFailureDomainMapping(t *testing.T) { existingVMG: &vmoprv1.VirtualMachineGroup{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone1", // Previously md1 was assigned to zone1 + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone1", // Previously md1 was assigned to zone1 }, }, }, @@ -265,7 +266,7 @@ func Test_getMachineDeploymentToFailureDomainMapping(t *testing.T) { existingVMG: &vmoprv1.VirtualMachineGroup{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone1", // Placement decision for md1 already reported into annotation + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone1", // Placement decision for md1 already reported into annotation }, }, Status: vmoprv1.VirtualMachineGroupStatus{ @@ -425,8 +426,20 @@ func TestVirtualMachineGroupReconciler_computeVirtualMachineGroup(t *testing.T) ObjectMeta: metav1.ObjectMeta{ Namespace: cluster.Namespace, Name: cluster.Name, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + }, Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: cluster.Name, + UID: cluster.UID, + Controller: ptr.To(true), + }, }, }, Spec: vmoprv1.VirtualMachineGroupSpec{ @@ -464,8 +477,20 @@ func TestVirtualMachineGroupReconciler_computeVirtualMachineGroup(t *testing.T) Namespace: cluster.Namespace, Name: cluster.Name, UID: types.UID("uid"), + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + }, Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: cluster.Name, + UID: cluster.UID, + Controller: ptr.To(true), + }, }, }, Spec: vmoprv1.VirtualMachineGroupSpec{ @@ -488,9 +513,21 @@ func TestVirtualMachineGroupReconciler_computeVirtualMachineGroup(t *testing.T) Namespace: cluster.Namespace, Name: cluster.Name, UID: types.UID("uid"), + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + }, Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: cluster.Name, + UID: cluster.UID, + Controller: ptr.To(true), + }, }, }, Spec: vmoprv1.VirtualMachineGroupSpec{ @@ -531,10 +568,22 @@ func TestVirtualMachineGroupReconciler_computeVirtualMachineGroup(t *testing.T) Namespace: cluster.Namespace, Name: cluster.Name, UID: types.UID("uid"), + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + }, Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone5", // failureDomain for md2 set by initial placement - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone5", // failureDomain for md2 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: cluster.Name, + UID: cluster.UID, + Controller: ptr.To(true), + }, }, }, Spec: vmoprv1.VirtualMachineGroupSpec{ @@ -557,11 +606,23 @@ func TestVirtualMachineGroupReconciler_computeVirtualMachineGroup(t *testing.T) Namespace: cluster.Namespace, Name: cluster.Name, UID: types.UID("uid"), + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + }, Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement // annotation for md2 deleted, md2 does not exist anymore - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md3"): "zone1", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: cluster.Name, + UID: cluster.UID, + Controller: ptr.To(true), + }, }, }, Spec: vmoprv1.VirtualMachineGroupSpec{ @@ -692,8 +753,9 @@ func TestVirtualMachineGroupReconciler_ReconcileSequence(t *testing.T) { Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1406,8 +1468,9 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1428,8 +1491,9 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1640,8 +1704,9 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1662,9 +1727,10 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1708,9 +1774,10 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md4"): "zone2", // failureDomain for md4 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1735,9 +1802,10 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user // md4 deleted }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1776,9 +1844,10 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1800,9 +1869,10 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1840,9 +1910,10 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1864,10 +1935,11 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone2", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md3"): "zone2", // failureDomain for md3 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1907,10 +1979,11 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md3"): "zone2", // failureDomain for md3 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md3"): "zone2", // failureDomain for md3 is explicitly set by the user }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1935,10 +2008,11 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM Name: clusterInitialized.Name, UID: types.UID("uid"), Annotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md1"): "zone4", // failureDomain for md1 set by initial placement + fmt.Sprintf("%s/%s", vmoperator.ZoneAnnotationPrefix, "md2"): "zone1", // failureDomain for md2 is explicitly set by the user // md3 deleted }, + // Not setting labels and ownerReferences for sake of simplicity }, Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ @@ -1989,6 +2063,14 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM } g.Expect(err).ToNot(HaveOccurred()) + g.Expect(vmg.Labels).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, tt.cluster.Name)) + g.Expect(vmg.OwnerReferences).To(ContainElement(metav1.OwnerReference{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: tt.cluster.Name, + UID: tt.cluster.UID, + Controller: ptr.To(true), + })) g.Expect(vmg.Annotations).To(Equal(tt.wantVMG.Annotations)) g.Expect(vmg.Spec.BootOrder).To(Equal(tt.wantVMG.Spec.BootOrder)) }) diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 66f42090ae..972cce8947 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -224,22 +224,26 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap Name: supervisorMachineCtx.Cluster.Name, } err := v.Client.Get(ctx, key, vmGroup) + + // The VirtualMachineGroup controller is going to create the vmg only when all the machines required for the placement + // decision exist. If the vmg does not exist yet, requeue. if err != nil { if !apierrors.IsNotFound(err) { return false, err } v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ - Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, - // FIXME: we should provide more details about this case in the message (vs other cases where we set this reason) + Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, + Message: "Waiting for all the VSphereMachine's VMGroup required for the placement decision to exist", }) log.V(4).Info(fmt.Sprintf("Waiting for VirtualMachineGroup %s, requeueing", key.Name), "VirtualMachineGroup", klog.KRef(key.Namespace, key.Name)) return true, nil } - // Proceed only if the VSphereMachine is a member of the VirtualMachineGroup. + // The VirtualMachineGroup controller is going to add a VM in the vmg only when the creation of this + // VM does not impact the placement decision. If the VM is not yet included in the member list, requeue. isMember := v.checkVirtualMachineGroupMembership(vmGroup, vmKey.Name) if !isMember { v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ @@ -951,14 +955,11 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels vmLabels[k] = v } - // FIXME: - // if the zone label is set, it should be immutable - // failure domain from machine should be used if set to set the zone label - // if failure domain from machine is not set, use the failure domain from VMG annotation - // Set the labels that determine the VM's placement. // Note: if the failureDomain is not set, auto placement will happen according to affinity rules on VM during initial Cluster creation. // For VM created during day-2 operation like scaling up, we should expect the failureDomain to be always set. + // Note: It is important that the value zone label is set on a vm must never change once it is set, + // because the zone in the VirtualMachineGroup might change in case this info is derived from spec.template.spec.failureDomain. var failureDomain string if affinityInfo != nil && affinityInfo.failureDomain != "" { failureDomain = affinityInfo.failureDomain From 08b8304129ac539d28efb9b4bc1bb0db472192ae Mon Sep 17 00:00:00 2001 From: Gong Zhang Date: Thu, 27 Nov 2025 13:38:38 +0800 Subject: [PATCH 25/25] Finalize VMG controller and UT Signed-off-by: Gong Zhang --- .../virtualmachinegroup_reconciler_test.go | 1018 +---------------- pkg/services/vmoperator/vmopmachine.go | 12 +- 2 files changed, 16 insertions(+), 1014 deletions(-) diff --git a/controllers/vmware/virtualmachinegroup_reconciler_test.go b/controllers/vmware/virtualmachinegroup_reconciler_test.go index 71ca6fa674..bc3be37f6c 100644 --- a/controllers/vmware/virtualmachinegroup_reconciler_test.go +++ b/controllers/vmware/virtualmachinegroup_reconciler_test.go @@ -18,7 +18,6 @@ package vmware import ( "fmt" - "testing" . "github.com/onsi/gomega" @@ -95,30 +94,12 @@ func Test_shouldCreateVirtualMachineGroup(t *testing.T) { want: false, }, } -<<<<<<< HEAD - - member := func(name string) vmoprv1.GroupMember { return vmoprv1.GroupMember{Name: name} } - - // CAPI Machine helpers - makeCAPIMachine := func(name, namespace string, fd *string) *clusterv1.Machine { - m := &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, - } - if fd != nil { - m.Spec = clusterv1.MachineSpec{FailureDomain: *fd} - } - return m - } - makeCAPIMachineNoFailureDomain := func(name, namespace string) *clusterv1.Machine { - return makeCAPIMachine(name, namespace, nil) -======= for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) got := shouldCreateVirtualMachineGroup(ctx, tt.mds, tt.vSphereMachines) g.Expect(got).To(Equal(tt.want)) }) ->>>>>>> 9409e432 (POC AAF) } } @@ -128,22 +109,6 @@ func Test_getVirtualMachineNameToMachineDeploymentMapping(t *testing.T) { vSphereMachines []vmwarev1.VSphereMachine want map[string]string }{ - { - name: "mapping from VirtualMachineName to MachineDeployment is inferred from vSphereMachines", - vSphereMachines: []vmwarev1.VSphereMachine{ - *createVSphereMachine("m1", "test-cluster", "md1", ""), - *createVSphereMachine("m2", "test-cluster", "md1", ""), - *createVSphereMachine("m3", "test-cluster", "md2", ""), - *createVSphereMachine("m4", "test-cluster", "md3", "zone1"), - }, - want: map[string]string{ - // Note VirtualMachineName is equal to the VSphereMachine because when using the default - "m1": "md1", - "m2": "md1", - "m3": "md2", - "m4": "md3", - }, - }, { name: "mapping from VirtualMachineName to MachineDeployment is inferred from vSphereMachines", vSphereMachines: []vmwarev1.VSphereMachine{ @@ -231,7 +196,7 @@ func Test_getMachineDeploymentToFailureDomainMapping(t *testing.T) { want map[string]string }{ { - name: "MachineDeployment mapping should use spec.failure domain", + name: "MachineDeployment mapping should use spec.FailureDomain", mds: []clusterv1.MachineDeployment{ *createMD("md1", "test-cluster", "zone1", 1), // failure domain explicitly set }, @@ -242,7 +207,7 @@ func Test_getMachineDeploymentToFailureDomainMapping(t *testing.T) { }, }, { - name: "MachineDeployment mapping should use spec.failure domain (latest value must be used)", + name: "MachineDeployment mapping should use spec.FailureDomain (latest value must be used)", mds: []clusterv1.MachineDeployment{ *createMD("md1", "test-cluster", "zone2", 1), // failure domain explicitly set }, @@ -670,31 +635,15 @@ func TestVirtualMachineGroupReconciler_ReconcileSequence(t *testing.T) { tests := []struct { name string -<<<<<<< HEAD - targetMember []vmoprv1.GroupMember - vmgInput *vmoprv1.VirtualMachineGroup - existingObjects []runtime.Object - wantErr bool - expectedErrMsg string -======= cluster *clusterv1.Cluster mds []clusterv1.MachineDeployment vSphereMachines []vmwarev1.VSphereMachine existingVMG *vmoprv1.VirtualMachineGroup wantResult ctrl.Result wantVMG *vmoprv1.VirtualMachineGroup ->>>>>>> 9409e432 (POC AAF) }{ // Before initial placement { -<<<<<<< HEAD - name: "Allow Create if VirtualMachineGroup not existed", - targetMember: []vmoprv1.GroupMember{member(memberName1)}, - vmgInput: baseVMG.DeepCopy(), - existingObjects: nil, - wantErr: false, - expectedErrMsg: "", -======= name: "VirtualMachineGroup should not be created when the cluster is not yet initialized", cluster: clusterNotYetInitialized, mds: nil, @@ -702,17 +651,16 @@ func TestVirtualMachineGroupReconciler_ReconcileSequence(t *testing.T) { existingVMG: nil, wantResult: ctrl.Result{}, wantVMG: nil, ->>>>>>> 9409e432 (POC AAF) }, { name: "VirtualMachineGroup should not be created when waiting for vSphereMachines to exist", - cluster: clusterNotYetInitialized, + cluster: clusterInitialized, mds: []clusterv1.MachineDeployment{ - *createMD("md1", clusterNotYetInitialized.Name, "", 1), - *createMD("md2", clusterNotYetInitialized.Name, "zone1", 1), + *createMD("md1", clusterInitialized.Name, "", 1), + *createMD("md2", clusterInitialized.Name, "zone1", 1), }, vSphereMachines: []vmwarev1.VSphereMachine{ - *createVSphereMachine("m1", clusterNotYetInitialized.Name, "md1", "", withCustomNamingStrategy()), + *createVSphereMachine("m1", clusterInitialized.Name, "md1", "", withCustomNamingStrategy()), }, existingVMG: nil, wantResult: ctrl.Result{}, @@ -760,33 +708,6 @@ func TestVirtualMachineGroupReconciler_ReconcileSequence(t *testing.T) { Spec: vmoprv1.VirtualMachineGroupSpec{ BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ { -<<<<<<< HEAD - Name: memberName1, - Kind: memberKind, - }}}} - return []runtime.Object{v} - }(), - wantErr: false, - expectedErrMsg: "", - }, - { - name: "Allow Patch if no new member", - targetMember: []vmoprv1.GroupMember{member(memberName1)}, // No new members - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - // Annotation for mdName1 is missing - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - return []runtime.Object{v} - }(), - wantErr: false, - expectedErrMsg: "", -======= Members: []vmoprv1.GroupMember{ {Name: "m1-vm", Kind: "VirtualMachine"}, {Name: "m2-vm", Kind: "VirtualMachine"}, @@ -796,661 +717,10 @@ func TestVirtualMachineGroupReconciler_ReconcileSequence(t *testing.T) { }, }, }, ->>>>>>> 9409e432 (POC AAF) }, // During initial placement { -<<<<<<< HEAD - name: "Block Patch to add new member if VirtualMachineGroup is not Placement Ready", - targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{ - {Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }}}} - return []runtime.Object{v} - }(), - wantErr: true, - expectedErrMsg: fmt.Sprintf("waiting for VirtualMachineGroup %s to get condition Ready to true, temporarily blocking patch", vmgName), - }, - { - name: "Block Patch if new member VSphereMachine Not Found", - targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - conditions.Set(v, metav1.Condition{ - Type: vmoprv1.ReadyConditionType, - Status: metav1.ConditionTrue}) - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - // vm-02 VSphereMachine is missing - return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, ownerMachineName1, mdName1), makeCAPIMachine(ownerMachineName1, vmgNamespace, ptr.To(failureDomainA))} - }(), - wantErr: true, - expectedErrMsg: fmt.Sprintf("VSphereMachine for new member %s not found, temporarily blocking patch", memberName2), - }, - { - name: "Block Patch if VSphereMachine found but owner CAPI Machine missing", - targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - conditions.Set(v, metav1.Condition{ - Type: vmoprv1.ReadyConditionType, - Status: metav1.ConditionTrue}) - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - // vm-02 VSphereMachine exists but has no owner ref - return []runtime.Object{v, makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, ptr.To(failureDomainA)), makeVSphereMachineNoOwner(memberName2, vmgNamespace)} - }(), - wantErr: true, - expectedErrMsg: fmt.Sprintf("VSphereMachine %s found but owner Machine reference is missing, temporarily blocking patch", memberName2), - }, - { - name: "Allow Patch if all new members have Machine FailureDomain specified", - targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - conditions.Set(v, metav1.Condition{ - Type: vmoprv1.ReadyConditionType, - Status: metav1.ConditionTrue}) - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - // m-02 (owner of ownerMachineName2) has FailureDomain set - return []runtime.Object{ - v, - makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, nil), - makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeCAPIMachine("ownerMachineName2", vmgNamespace, ptr.To(failureDomainA)), - } - }(), - // Allowed because new members don't require placement - wantErr: false, - expectedErrMsg: "", - }, - { - name: "Block Patch if placement annotation is missing", - targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new and requires placement - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - conditions.Set(v, metav1.Condition{ - Type: vmoprv1.ReadyConditionType, - Status: metav1.ConditionTrue}) - v.Annotations = map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - } - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - // m-02 lacks FailureDomain and new Member vm-02 requires placement annotation but not exists - return []runtime.Object{ - v, - makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, ptr.To(failureDomainA)), - makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeCAPIMachineNoFailureDomain("ownerMachineName2", vmgNamespace), - } - }(), - wantErr: true, - expectedErrMsg: fmt.Sprintf("waiting for placement annotation to add VMG member %s, temporarily blocking patch", memberName2), - }, - { - name: "Allow Patch Machine since required placement annotation exists", - targetMember: []vmoprv1.GroupMember{member(memberName1), member(memberName2)}, // vm-02 is new and requires placement - vmgInput: baseVMG.DeepCopy(), - existingObjects: func() []runtime.Object { - v := baseVMG.DeepCopy() - conditions.Set(v, metav1.Condition{ - Type: vmoprv1.ReadyConditionType, - Status: metav1.ConditionTrue}) - v.Annotations = map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, - } - v.Spec.BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup{{Members: []vmoprv1.GroupMember{ - { - Name: memberName1, - Kind: memberKind, - }, - }}} - return []runtime.Object{ - v, - makeVSphereMachineOwned(memberName1, vmgNamespace, "ownerMachineName1", mdName1), makeCAPIMachine("ownerMachineName1", vmgNamespace, nil), - makeVSphereMachineOwned(memberName2, vmgNamespace, "ownerMachineName2", mdName2), makeCAPIMachineNoFailureDomain("ownerMachineName2", vmgNamespace), - } - }(), - wantErr: false, - expectedErrMsg: "", - }, - } - - for _, tt := range tests { - // Looks odd, but need to reinitialize test variable - tt := tt - t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - kubeClient := fake.NewClientBuilder().WithRuntimeObjects(tt.existingObjects...).Build() - - vmgInput := tt.vmgInput.DeepCopy() - - err := isCreateOrPatchAllowed(ctx, kubeClient, tt.targetMember, vmgInput) - - if tt.wantErr { - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring(tt.expectedErrMsg)) - } else { - g.Expect(err).NotTo(HaveOccurred()) - } - }) - } -} - -func TestGetExpectedVSphereMachineCount(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - - scheme := runtime.NewScheme() - g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) - - targetCluster := newTestCluster(clusterName, clusterNamespace) - - mdA := newMachineDeployment("md-a", clusterName, clusterNamespace, true, ptr.To(int32(3))) - mdB := newMachineDeployment("md-b", clusterName, clusterNamespace, true, ptr.To(int32(5))) - mdCNil := newMachineDeployment("md-c-nil", clusterName, clusterNamespace, false, nil) - mdDZero := newMachineDeployment("md-d-zero", clusterName, clusterNamespace, true, ptr.To(int32(0))) - // Create an MD for a different cluster (should be filtered) - mdOtherCluster := newMachineDeployment("md-other", otherClusterName, clusterNamespace, true, ptr.To(int32(5))) - - tests := []struct { - name string - initialObjects []client.Object - expectedTotal int32 - wantErr bool - }{ - { - name: "Sum of two MDs", - initialObjects: []client.Object{mdA, mdB}, - expectedTotal: 8, - wantErr: false, - }, - { - name: "Should get count when MDs include nil and zero replicas", - initialObjects: []client.Object{mdA, mdB, mdCNil, mdDZero}, - expectedTotal: 8, - wantErr: false, - }, - { - name: "Should filters out MDs from other clusters", - initialObjects: []client.Object{mdA, mdB, mdOtherCluster}, - expectedTotal: 8, - wantErr: false, - }, - { - name: "Should succeed when no MachineDeployments found", - initialObjects: []client.Object{}, - expectedTotal: 0, - wantErr: false, - }, - } - - for _, tt := range tests { - // Looks odd, but need to reinitialize test variable - tt := tt - t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialObjects...).Build() - total, err := getExpectedVSphereMachineCount(ctx, fakeClient, targetCluster) - if tt.wantErr { - g.Expect(err).To(HaveOccurred()) - } else { - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(total).To(Equal(tt.expectedTotal)) - } - }) - } -} - -func TestGetCurrentVSphereMachines(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - - scheme := runtime.NewScheme() - g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) - - // VSphereMachine names are based on CAPI Machine names, but we use fake name here. - vsmName1 := fmt.Sprintf("%s-%s", mdName1, "vsm-1") - vsmName2 := fmt.Sprintf("%s-%s", mdName2, "vsm-2") - vsm1 := newVSphereMachine(vsmName1, mdName1, false, false, nil) - vsm2 := newVSphereMachine(vsmName2, mdName2, false, false, nil) - vsmDeleting := newVSphereMachine("vsm-3", mdName1, false, true, nil) // Deleting - vsmControlPlane := newVSphereMachine("vsm-cp", "not-md", true, false, nil) - - tests := []struct { - name string - objects []client.Object - want int - }{ - { - name: "Should filtered out deleting VSphereMachines", - objects: []client.Object{ - vsm1, - vsm2, - vsmDeleting, - vsmControlPlane, - }, - want: 2, - }, - { - name: "Want no Error if no VSphereMachines found", - objects: []client.Object{}, - want: 0, - }, - } - - for _, tt := range tests { - // Looks odd, but need to reinitialize test variable - tt := tt - t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() - got, err := getCurrentVSphereMachines(ctx, fakeClient, clusterNamespace, clusterName) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(got).To(HaveLen(tt.want)) - - // Check that the correct Machines are present - if tt.want > 0 { - names := make([]string, len(got)) - for i, vsm := range got { - names[i] = vsm.Name - } - sort.Strings(names) - g.Expect(names).To(Equal([]string{vsmName1, vsmName2})) - } - }) - } -} -func TestGenerateVirtualMachineGroupAnnotations(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - - scheme := runtime.NewScheme() - g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) - - baseVMG := &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{ - Name: clusterName, - Namespace: clusterNamespace, - Annotations: make(map[string]string), - }, - } - - // VSphereMachines corresponding to the VMG members - vsmName1 := fmt.Sprintf("%s-%s", mdName1, "vsm-1") - vsmName2 := fmt.Sprintf("%s-%s", mdName2, "vsm-2") - vsmNameSameMD := fmt.Sprintf("%s-%s", mdName1, "vsm-same-md") - vsm1 := newVSphereMachine(vsmName1, mdName1, false, false, nil) - vsm2 := newVSphereMachine(vsmName2, mdName2, false, false, nil) - vsmSameMD := newVSphereMachine(vsmNameSameMD, mdName1, false, false, nil) - vsmMissingLabel := newVSphereMachine("vsm-nolabel", mdName2, false, false, nil) - vsmMissingLabel.Labels = nil // Explicitly remove labels for test case - - tests := []struct { - name string - vmg *vmoprv1.VirtualMachineGroup - machineDeployments []string - initialClientObjects []client.Object - expectedAnnotations map[string]string - wantErr bool - }{ - { - name: "Deletes stale annotation for none-existed MD", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - // This MD (mdNameStale) is NOT in the machineDeployments list below. - v.SetAnnotations(map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameStale): zoneA, - "other/annotation": "keep-me", - }) - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{}, - } - return v - }(), - machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{}, - expectedAnnotations: map[string]string{ - "other/annotation": "keep-me", - }, - wantErr: false, - }, - { - name: "Skip if VSphereMachine Missing MachineDeployment Label", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus("vsm-nolabel", "VirtualMachine", true, true, zoneA), - }, - } - return v - }(), - machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsmMissingLabel}, - expectedAnnotations: map[string]string{}, - wantErr: false, - }, - { - name: "Skip if VSphereMachine is Not Found in API", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus("non-existent-vm", "VirtualMachine", true, true, zoneA), - }, - } - return v - }(), - machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsm1}, - expectedAnnotations: map[string]string{}, - wantErr: false, - }, - { - name: "Skip as placement already exists in VMG Annotations", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Annotations = map[string]string{fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA} - v.Status.Members = []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneB), - } - return v - }(), - machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsm1}, - // Should retain existing zone-a - expectedAnnotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - }, - wantErr: false, - }, - { - name: "Skip if placement is nil", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, false, zoneA), - }, - } - return v - }(), - machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsm1}, - expectedAnnotations: map[string]string{}, - wantErr: false, - }, - { - name: "Skip if Zone is empty string", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, ""), - }, - } - return v - }(), - machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsm1}, - expectedAnnotations: map[string]string{}, - wantErr: false, - }, - { - name: "Cleans stale and adds new annotations", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - // Stale annotation to be deleted - v.SetAnnotations(map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdNameStale): zoneB, - }) - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), - }, - } - return v - }(), - machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsm1}, - expectedAnnotations: map[string]string{ - // Stale annotation for mdNameStale should be gone - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - }, - wantErr: false, - }, - { - name: "Placement found for two distinct MDs", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), - newVMGMemberStatus(vsmName2, "VirtualMachine", true, true, zoneB), - }, - } - return v - }(), - machineDeployments: []string{mdName1, mdName2}, - initialClientObjects: []client.Object{vsm1, vsm2}, - expectedAnnotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, - }, - wantErr: false, - }, - { - name: "Placement found for MD1 but not MD2 since PlacementReady is not true", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), - newVMGMemberStatus(vsmName2, "VirtualMachine", false, false, ""), - }, - } - return v - }(), - machineDeployments: []string{mdName1, mdName2}, - initialClientObjects: []client.Object{vsm1, vsm2}, - expectedAnnotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - }, - wantErr: false, - }, - { - name: "Keep the original annotation if VMs for the same MD placed to new zone", - vmg: func() *vmoprv1.VirtualMachineGroup { - v := baseVMG.DeepCopy() - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus(vsmName1, "VirtualMachine", true, true, zoneA), - newVMGMemberStatus(vsmNameSameMD, "VirtualMachine", true, true, zoneB), - }, - } - return v - }(), - machineDeployments: []string{mdName1}, - initialClientObjects: []client.Object{vsm1, vsmSameMD}, - expectedAnnotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - }, - wantErr: false, - }, - } - - for _, tt := range tests { - // Looks odd, but need to reinitialize test variable - tt := tt - t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialClientObjects...).Build() - err := generateVirtualMachineGroupAnnotations(ctx, fakeClient, tt.vmg, tt.machineDeployments) - if tt.wantErr { - g.Expect(err).To(HaveOccurred()) - } else { - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(tt.vmg.Annotations).To(Equal(tt.expectedAnnotations)) - } - }) - } -} - -func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { - g := NewWithT(t) - ctx := context.Background() - - scheme := runtime.NewScheme() - g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) - g.Expect(vmwarev1.AddToScheme(scheme)).To(Succeed()) - g.Expect(vmoprv1.AddToScheme(scheme)).To(Succeed()) - - // Initial objects for the successful VMG creation path - cluster := newCluster(clusterName, clusterNamespace, true, 1, 1) - vsm1 := newVSphereMachine("vsm-1", mdName1, false, false, nil) - vsm2 := newVSphereMachine("vsm-2", mdName2, false, false, nil) - // VSM 3 is in deletion (will be filtered out) - vsm3 := newVSphereMachine("vsm-3", mdName1, false, true, nil) - md1 := newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(1))) - md2 := newMachineDeployment(mdName2, clusterName, clusterNamespace, true, ptr.To(int32(1))) - machine1 := newMachine("machine-vsm-1", mdName1, "") - machine2 := newMachine("machine-vsm-2", mdName2, "") - - // VMG Ready state for Day-2 checks - readyVMGMembers := []vmoprv1.GroupMember{ - {Name: vsm1.Name, Kind: memberKind}, - {Name: vsm2.Name, Kind: memberKind}, - } - - // VMG Ready but haven't added placement annotation - vmgReady := newVMG(clusterName, clusterNamespace, readyVMGMembers, true, nil) - - // VMG Ready and have placement annotation for Day-2 checks - vmgPlaced := newVMG(clusterName, clusterNamespace, readyVMGMembers, true, map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - }) - - tests := []struct { - name string - initialObjects []client.Object - expectedResult reconcile.Result - expectVMGExists bool - expectedMembersCount int - expectedAnnotations map[string]string - expectedErrorMsg string - }{ - // VMG Create - { - name: "Should Exit if Cluster Not Found", - initialObjects: []client.Object{}, - expectedResult: reconcile.Result{}, - expectVMGExists: false, - expectedMembersCount: 0, - }, - { - name: "Should Exit if Cluster Deletion Timestamp Set", - initialObjects: []client.Object{ - func() client.Object { - c := cluster.DeepCopy() - c.Finalizers = []string{"test.finalizer.cluster"} - c.DeletionTimestamp = &metav1.Time{Time: time.Now()} - return c - }(), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: false, - }, - { - name: "Should Requeue if ControlPlane Not Initialized", - initialObjects: []client.Object{ - newCluster(clusterName, clusterNamespace, false, 1, 0), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: false, - }, - { - name: "Should Requeue if VMG Not Found and Machines not ready", - initialObjects: []client.Object{cluster.DeepCopy(), md1.DeepCopy(), md2.DeepCopy()}, - expectedResult: reconcile.Result{}, - expectVMGExists: false, - expectedMembersCount: 0, - }, - { - name: "Should Succeed to create VMG", - initialObjects: []client.Object{ - cluster.DeepCopy(), - md1.DeepCopy(), - vsm1.DeepCopy(), - md2.DeepCopy(), - vsm1.DeepCopy(), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: true, - expectedMembersCount: 2, - }, - // VMG Update: Member Scale Down - { - name: "Should Succeed to update VMG if removing member even placement is not ready", - initialObjects: []client.Object{ - cluster.DeepCopy(), - newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(1))), - // VSM3 is in deletion - vsm1.DeepCopy(), - vsm2.DeepCopy(), - vsm3.DeepCopy(), - // Existing VMG has vsm-1, vsm-2 and vsm-3, simulating scale-down state - newVMG(clusterName, clusterNamespace, []vmoprv1.GroupMember{ - {Name: "vsm-1", Kind: memberKind}, - {Name: "vsm-2", Kind: memberKind}, - {Name: "vsm-3", Kind: memberKind}, - }, false, nil), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: true, - expectedMembersCount: 2, - }, - // VMG Placement Annotation - { - name: "Should add Placement annotation after Placement ready", - initialObjects: []client.Object{ - cluster.DeepCopy(), - md1.DeepCopy(), - vsm1.DeepCopy(), - machine1.DeepCopy(), - md2.DeepCopy(), - vsm2.DeepCopy(), - machine2.DeepCopy(), - vmgReady.DeepCopy(), -======= name: "No op if nothing changes during initial placement", cluster: clusterInitialized, mds: []clusterv1.MachineDeployment{ @@ -1506,180 +776,8 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) { }, }, }, ->>>>>>> 9409e432 (POC AAF) - }, - expectedResult: reconcile.Result{}, - expectVMGExists: true, - expectedMembersCount: 2, - expectedAnnotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, - }, - }, - { - name: "Should cleanup stale VMG annotation for deleted MD", - initialObjects: []client.Object{ - cluster.DeepCopy(), - // MD1,MD2 is active - md1.DeepCopy(), - vsm1.DeepCopy(), - machine1.DeepCopy(), - md2.DeepCopy(), - vsm2.DeepCopy(), - machine2.DeepCopy(), - // VMG has annotations and a stale one for md-old - newVMG(clusterName, clusterNamespace, readyVMGMembers, true, map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, - fmt.Sprintf("%s/md-old", ZoneAnnotationPrefix): "zone-c", - }), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: true, - expectedMembersCount: 1, - expectedAnnotations: map[string]string{ - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName1): zoneA, - fmt.Sprintf("%s/%s", ZoneAnnotationPrefix, mdName2): zoneB, }, }, - { - name: "Should block adding member if VMG not Ready (waiting for initial placement)", - initialObjects: []client.Object{ - cluster.DeepCopy(), - // MD1 spec is 2 (scale-up target) - newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(2))), - // Only 1 VSM currently exists (vsm-1) for MD1 - vsm1.DeepCopy(), - machine1.DeepCopy(), - vsm2.DeepCopy(), - machine2.DeepCopy(), - newVSphereMachine("vsm-new", mdName1, false, false, nil), - // VMG exists but is NOT Ready (simulating placement in progress) - newVMG(clusterName, clusterNamespace, readyVMGMembers, false, nil), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: true, - // Expect an error because isCreateOrPatchAllowed blocks - expectedErrorMsg: "waiting for VirtualMachineGroup", - expectedMembersCount: 2, - }, - { - name: "Should block adding member if VMG Ready but MD annotation is missing", - initialObjects: []client.Object{ - cluster.DeepCopy(), - newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(2))), - // Only vsm-1 currently exists for MD1 - vsm1.DeepCopy(), - machine1.DeepCopy(), - vsm2.DeepCopy(), - machine2.DeepCopy(), - // vsm-new is the new member requiring placement - newVSphereMachine("vsm-new", mdName1, false, false, nil), - newMachine("machine-vsm-new", mdName1, ""), - // VMG is Ready, but has no placement annotations - vmgReady.DeepCopy(), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: true, - // Expected error from isCreateOrPatchAllowed: waiting for placement annotation - expectedErrorMsg: fmt.Sprintf("waiting for placement annotation %s/%s", ZoneAnnotationPrefix, mdName1), - expectedMembersCount: 2, - }, - { - name: "Should succeed adding member when VMG Ready AND placement annotation exists", - initialObjects: []client.Object{ - cluster.DeepCopy(), - newMachineDeployment(mdName1, clusterName, clusterNamespace, true, ptr.To(int32(2))), - vsm1.DeepCopy(), - machine1.DeepCopy(), - vsm2.DeepCopy(), - machine2.DeepCopy(), - newVSphereMachine("vsm-new", mdName1, false, false, nil), - newMachine("machine-vsm-new", mdName1, ""), - // VMG is Placed (Ready + Annotation) - vmgPlaced.DeepCopy(), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: true, - expectedMembersCount: 2, - }, - { - name: "Should succeed adding member if new member has FailureDomain set", - initialObjects: []client.Object{ - cluster.DeepCopy(), - newMachineDeployment("md-new", clusterName, clusterNamespace, true, ptr.To(int32(2))), - vsm1.DeepCopy(), - machine1.DeepCopy(), - vsm2.DeepCopy(), - machine2.DeepCopy(), - newVSphereMachine("vsm-new", "md-new", false, false, nil), - // New machine has a FailureDomain set, which bypasses the VMG placement annotation check - newMachine("machine-vsm-new", "md-new", "zone-new"), - // VMG is Ready, but has no placement annotation for new machine deployment (this should be bypassed) - vmgReady.DeepCopy(), - }, - expectedResult: reconcile.Result{}, - expectVMGExists: true, - expectedMembersCount: 2, // Scale-up should succeed due to FailureDomain bypass - }, -<<<<<<< HEAD - } - - for _, tt := range tests { - // Looks odd, but need to reinitialize test variable - tt := tt - t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.initialObjects...).Build() - reconciler := &VirtualMachineGroupReconciler{ - Client: fakeClient, - Recorder: record.NewFakeRecorder(1), - } - req := ctrl.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: clusterNamespace}} - - result, err := reconciler.Reconcile(ctx, req) - - if tt.expectedErrorMsg != "" { - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring(tt.expectedErrorMsg)) - return - } - - g.Expect(err).NotTo(HaveOccurred(), "Reconcile should not return an error") - g.Expect(result).To(Equal(tt.expectedResult)) - - vmg := &vmoprv1.VirtualMachineGroup{} - vmgKey := types.NamespacedName{Name: clusterName, Namespace: clusterNamespace} - err = fakeClient.Get(ctx, vmgKey, vmg) - - if tt.expectVMGExists { - g.Expect(err).NotTo(HaveOccurred(), "VMG should exist") - // Check that the core fields were set by the MutateFn - g.Expect(vmg.Labels).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, clusterName)) - // Check member count - g.Expect(vmg.Spec.BootOrder).To(HaveLen(tt.expectedMembersCount), "VMG members count mismatch") - // Check annotations - if tt.expectedAnnotations != nil { - g.Expect(vmg.Annotations).To(Equal(tt.expectedAnnotations)) - } - // VMG members should match the VSphereMachine name - g.Expect(vmg.Spec.BootOrder[0].Members[0].Name).To(Equal("vsm-1")) - } else { - // Check VMG does not exist if expected - g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) - } - }) - } -} - -// Helper function to create a basic Cluster object. -func newCluster(name, namespace string, initialized bool, replicasMD1, replicasMD2 int32) *clusterv1.Cluster { - cluster := &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: map[string]string{clusterv1.ClusterNameLabel: name}, -======= { name: "Only new VSphereMachines with an explicit placement are added during initial placement", cluster: clusterInitialized, @@ -1748,7 +846,6 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM }, }, }, ->>>>>>> 9409e432 (POC AAF) }, { name: "VSphereMachines are removed during initial placement", @@ -1958,7 +1055,7 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM }, }, { - name: "VSphereMachines are removed during initial placement", + name: "VSphereMachines are removed after initial placement", cluster: clusterInitialized, mds: []clusterv1.MachineDeployment{ *createMD("md1", clusterInitialized.Name, "", 3), // scaled down @@ -1992,9 +1089,9 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM {Name: "m1-vm", Kind: "VirtualMachine"}, {Name: "m2-vm", Kind: "VirtualMachine"}, {Name: "m3", Kind: "VirtualMachine"}, - {Name: "m4-vm", Kind: "VirtualMachine"}, // added, failureDomain for md1 set by initial placement - {Name: "m5", Kind: "VirtualMachine"}, // added, failureDomain for md2 is explicitly set by the user - {Name: "m6", Kind: "VirtualMachine"}, // added, failureDomain for md3 is explicitly set by the user + {Name: "m4-vm", Kind: "VirtualMachine"}, + {Name: "m5", Kind: "VirtualMachine"}, + {Name: "m6", Kind: "VirtualMachine"}, }, }, }, @@ -2077,43 +1174,7 @@ func newCluster(name, namespace string, initialized bool, replicasMD1, replicasM } } -<<<<<<< HEAD -// Helper function to create a VSphereMachine (worker, owned by a CAPI Machine). -func newVSphereMachine(name, mdName string, isCP, deleted bool, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) *vmwarev1.VSphereMachine { - vsm := &vmwarev1.VSphereMachine{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: clusterNamespace, - Labels: map[string]string{ - clusterv1.ClusterNameLabel: clusterName, - }, - }, - Spec: vmwarev1.VSphereMachineSpec{ - NamingStrategy: namingStrategy, - }, - } - if !isCP { - vsm.Labels[clusterv1.MachineDeploymentNameLabel] = mdName - } else { - vsm.Labels[clusterv1.MachineControlPlaneLabel] = "true" - } - if deleted { - vsm.Finalizers = []string{"test.finalizer.0"} - vsm.DeletionTimestamp = &metav1.Time{Time: time.Now()} - } - - vsm.OwnerReferences = []metav1.OwnerReference{ - { - Kind: "Machine", - Name: fmt.Sprintf("machine-%s", name), - }, - } - - return vsm -} -======= type machineDeploymentOption func(md *clusterv1.MachineDeployment) ->>>>>>> 9409e432 (POC AAF) func createMD(name, cluster, failureDomain string, replicas int32, options ...machineDeploymentOption) *clusterv1.MachineDeployment { md := &clusterv1.MachineDeployment{ @@ -2145,64 +1206,6 @@ func withCustomNamingStrategy() func(m *vmwarev1.VSphereMachine) { } } -<<<<<<< HEAD -// Helper to create a new CAPI Machine. -func newMachine(name, mdName, fd string) *clusterv1.Machine { - m := &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: clusterNamespace, - Labels: map[string]string{ - clusterv1.ClusterNameLabel: clusterName, - clusterv1.MachineDeploymentNameLabel: mdName, - }, - }, - Spec: clusterv1.MachineSpec{ - FailureDomain: fd, - }, - } - // Machine owner reference for VSphereMachine - m.OwnerReferences = []metav1.OwnerReference{ - { - APIVersion: vmwarev1.GroupVersion.String(), - Kind: "VSphereMachine", - Name: strings.TrimPrefix(name, "machine-"), // VSphereMachine Name matches VM Name logic - }, - } - return m -} - -// Helper to create a new VMG with a list of members and conditions. -func newVMG(name, ns string, members []vmoprv1.GroupMember, ready bool, annotations map[string]string) *vmoprv1.VirtualMachineGroup { - v := &vmoprv1.VirtualMachineGroup{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: ns, - Labels: map[string]string{clusterv1.ClusterNameLabel: name}, - Annotations: annotations, - Finalizers: []string{"vmg.test.finalizer"}, - }, - Spec: vmoprv1.VirtualMachineGroupSpec{ - BootOrder: []vmoprv1.VirtualMachineGroupBootOrderGroup{ - {Members: members}, - }, - }, - } - if ready { - conditions.Set(v, metav1.Condition{ - Type: vmoprv1.ReadyConditionType, - Status: metav1.ConditionTrue, - }) - v.Status = vmoprv1.VirtualMachineGroupStatus{ - Members: []vmoprv1.VirtualMachineGroupMemberStatus{ - newVMGMemberStatus("vsm-1", "VirtualMachine", true, true, zoneA), - newVMGMemberStatus("vsm-2", "VirtualMachine", true, true, zoneB), - }, - } - } - return v -} -======= func createVSphereMachine(name, cluster, md, failureDomain string, options ...vSphereMachineOption) *vmwarev1.VSphereMachine { m := &vmwarev1.VSphereMachine{ ObjectMeta: metav1.ObjectMeta{ @@ -2222,4 +1225,3 @@ func createVSphereMachine(name, cluster, md, failureDomain string, options ...vS } return m } ->>>>>>> 9409e432 (POC AAF) diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index 972cce8947..4a7ba23188 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -225,7 +225,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap } err := v.Client.Get(ctx, key, vmGroup) - // The VirtualMachineGroup controller is going to create the vmg only when all the machines required for the placement + // The VirtualMachineGroup controller is going to create the vmg only when all the VSphereMachines required for the placement // decision exist. If the vmg does not exist yet, requeue. if err != nil { if !apierrors.IsNotFound(err) { @@ -236,7 +236,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, - Message: "Waiting for all the VSphereMachine's VMGroup required for the placement decision to exist", + Message: fmt.Sprintf("Waiting for VSphereMachine's VirtualMachineGroup %s to exist", key), }) log.V(4).Info(fmt.Sprintf("Waiting for VirtualMachineGroup %s, requeueing", key.Name), "VirtualMachineGroup", klog.KRef(key.Namespace, key.Name)) return true, nil @@ -247,10 +247,10 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap isMember := v.checkVirtualMachineGroupMembership(vmGroup, vmKey.Name) if !isMember { v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ - Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, - // FIXME: we should provide more details about this case in the message (vs other cases where we set this reason) + Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason, + Message: fmt.Sprintf("Waiting for VirtualMachineGroup %s membership", klog.KRef(key.Namespace, key.Name)), }) log.V(4).Info(fmt.Sprintf("Waiting for VirtualMachineGroup %s membership, requeueing", key.Name), "VirtualMachineGroup", klog.KRef(key.Namespace, key.Name)) return true, nil