@@ -184,31 +184,37 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx co
184184 })
185185 }
186186
187- // Get all the names of MachineDeployments of the Cluster.
188- machineDeployments := & clusterv1.MachineDeploymentList {}
189- if err := r .Client .List (ctx , machineDeployments ,
190- client .InNamespace (cluster .Namespace ),
191- client.MatchingLabels {clusterv1 .ClusterNameLabel : cluster .Name }); err != nil {
187+ // The core purpose of isCreateOrPatchAllowed is to prevent the VirtualMachineGroup from being updated with new members
188+ // that require placement, unless the VirtualMachineGroup
189+ // has successfully completed its initial placement and added the required
190+ // placement annotations. This stabilizes placement decisions before allowing new VMs
191+ // to be added under the group.
192+ //s
193+ // The CreateOrPatch is allowed if:
194+ // 1. The VirtualMachineGroup is being initially created.
195+ // 2. The update is a scale-down operation.
196+ // 3. The VirtualMachineGroup is placement Ready.
197+ // 4. The new member's underlying CAPI Machine has a FailureDomain set (will skip placement process).
198+ // 5. The new member requires placement annotation AND the VirtualMachineGroup has the corresponding
199+ // placement annotation for the member's MachineDeployment.
200+ //
201+ // This prevents member updates that could lead to new VMs being created
202+ // without necessary zone labels, resulting in undesired placement.
203+ err = isCreateOrPatchAllowed (ctx , r .Client , members , vmg )
204+ if err != nil {
192205 return reconcile.Result {}, err
193206 }
194- mdNames := []string {}
195- for _ , md := range machineDeployments .Items {
196- // Skip MachineDeployment marked for removal.
197- if ! md .DeletionTimestamp .IsZero () {
198- mdNames = append (mdNames , md .Name )
199- }
200- }
201207
202208 // Use CreateOrPatch to create or update the VirtualMachineGroup.
203209 _ , err = controllerutil .CreateOrPatch (ctx , r .Client , vmg , func () error {
204- return r .reconcileVirtualMachineGroup (ctx , vmg , cluster , members , mdNames )
210+ return r .reconcileVirtualMachineGroup (ctx , vmg , cluster , members )
205211 })
206212
207213 return reconcile.Result {}, err
208214}
209215
210216// reconcileVirtualMachineGroup mutates the VirtualMachineGroup object to reflect the necessary spec and metadata changes.
211- func (r * VirtualMachineGroupReconciler ) reconcileVirtualMachineGroup (ctx context.Context , vmg * vmoprv1.VirtualMachineGroup , cluster * clusterv1.Cluster , members []vmoprv1.GroupMember , mdNames [] string ) error {
217+ func (r * VirtualMachineGroupReconciler ) reconcileVirtualMachineGroup (ctx context.Context , vmg * vmoprv1.VirtualMachineGroup , cluster * clusterv1.Cluster , members []vmoprv1.GroupMember ) error {
212218 // Set the desired labels
213219 if vmg .Labels == nil {
214220 vmg .Labels = make (map [string ]string )
@@ -220,38 +226,32 @@ func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context
220226 vmg .Annotations = make (map [string ]string )
221227 }
222228
229+ // Get all the names of MachineDeployments of the Cluster.
230+ machineDeployments := & clusterv1.MachineDeploymentList {}
231+ if err := r .Client .List (ctx , machineDeployments ,
232+ client .InNamespace (cluster .Namespace ),
233+ client.MatchingLabels {clusterv1 .ClusterNameLabel : cluster .Name }); err != nil {
234+ return err
235+ }
236+ mdNames := []string {}
237+ for _ , md := range machineDeployments .Items {
238+ // Skip MachineDeployment marked for removal.
239+ if ! md .DeletionTimestamp .IsZero () {
240+ mdNames = append (mdNames , md .Name )
241+ }
242+ }
243+
223244 // Add per-md-zone label for day-2 operations once placement of a VM belongs to MachineDeployment is done.
224245 // Do not update per-md-zone label once set, as placement decision should not change without user explicitly
225246 // set failureDomain.
226247 if err := generateVirtualMachineGroupAnnotations (ctx , r .Client , vmg , mdNames ); err != nil {
227248 return err
228249 }
229250
230- // Member Update:
231- // The VirtualMachineGroup's BootOrder.Members list, is only allowed to be set or added
232- // during two phases to maintain control over VM placement:
233- //
234- // 1. Initial Creation: When the VirtualMachineGroup object does not yet exist.
235- // 2. Post-Placement: After the VirtualMachineGroup exists AND is marked Ready which means all members are placed successfully,
236- // and critically, all MachineDeployments have a corresponding zone placement annotation recorded on the VMG.
237- //
238- // For member removal, this is always allowed since it doesn't impact ongoing placement or rely on the placement annotation.
239- //
240- // This prevents member updates that could lead to new VMs being created
241- // without necessary zone labels, resulting in undesired placement, such as VM within a MachineDeployment but are
242- // placed to different Zones.
243-
244- isMemberUpdateAllowed , err := isMemberUpdateAllowed (ctx , r .Client , members , vmg )
245- if err != nil {
246- return err
247- }
248-
249- if isMemberUpdateAllowed {
250- vmg .Spec .BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup {
251- {
252- Members : members ,
253- },
254- }
251+ vmg .Spec .BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup {
252+ {
253+ Members : members ,
254+ },
255255 }
256256
257257 // Set the owner reference
@@ -262,9 +262,8 @@ func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context
262262 return nil
263263}
264264
265- // isMemberUpdateAllowed determines if the BootOrder.Members field can be safely updated on the VirtualMachineGroup.
266- // It allows updates only during initial creation or after all member placement are completed successfully.
267- func isMemberUpdateAllowed (ctx context.Context , kubeClient client.Client , targetMember []vmoprv1.GroupMember , vmg * vmoprv1.VirtualMachineGroup ) (bool , error ) {
265+ // isCreateOrPatchAllowed checks if a VirtualMachineGroup is allowd to create or patch by check if BootOrder.Members update is allowed.
266+ func isCreateOrPatchAllowed (ctx context.Context , kubeClient client.Client , targetMember []vmoprv1.GroupMember , vmg * vmoprv1.VirtualMachineGroup ) error {
268267 logger := log .FromContext (ctx )
269268 key := client.ObjectKey {
270269 Namespace : vmg .Namespace ,
@@ -275,38 +274,48 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target
275274 currentVMG := & vmoprv1.VirtualMachineGroup {}
276275 if err := kubeClient .Get (ctx , key , currentVMG ); err != nil {
277276 if apierrors .IsNotFound (err ) {
278- // If VirtualMachineGroup is not found, allow member update as it should be in initial creation phase.
279- logger .V (5 ).Info ("VirtualMachineGroup not found , allowing member update for initial creation. " )
280- return true , nil
277+ // 1. If VirtualMachineGroup is not found, allow CreateOrPatch as it should be in initial creation phase.
278+ logger .V (6 ).Info ("VirtualMachineGroup not created yet , allowing create " )
279+ return nil
281280 }
282- return false , errors .Wrapf (err , "failed to get VirtualMachineGroup %s/%s" , vmg .Namespace , vmg .Name )
281+ return errors .Wrapf (err , "failed to get VirtualMachineGroup %s/%s, blocking patch " , vmg .Namespace , vmg .Name )
283282 }
284- // Copy retrieved data back to the input pointer for consistency
283+ // Copy retrieved data back to the input pointer for consistency.
285284 * vmg = * currentVMG
286285
287- // Get current member names from VirtualMachineGroup Spec.BootOrder
286+ // Get current member names from VirtualMachineGroup Spec.BootOrder.
288287 currentMemberNames := make (map [string ]struct {})
289288 if len (vmg .Spec .BootOrder ) > 0 {
290289 for _ , m := range vmg .Spec .BootOrder [0 ].Members {
291290 currentMemberNames [m .Name ] = struct {}{}
292291 }
293292 }
294293
295- // 1 . If removing members, allow immediately since it doesn't impact placement or placement annotation set.
294+ // 2 . If removing members, allow immediately since it doesn't impact placement or placement annotation set.
296295 if len (targetMember ) < len (currentMemberNames ) {
297- logger .V (5 ).Info ("Scaling down detected (fewer target members), allowing member update ." )
298- return true , nil
296+ logger .V (6 ).Info ("Scaling down detected (fewer target members), allowing patch ." )
297+ return nil
299298 }
300299
301- // 2. If adding members, continue following checks.
302300 var newMembers []vmoprv1.GroupMember
303301 for _ , m := range targetMember {
304302 if _ , exists := currentMemberNames [m .Name ]; ! exists {
305303 newMembers = append (newMembers , m )
306304 }
307305 }
308306
309- // 3. Check newly added members for Machine.Spec.FailureDomain via VSphereMachine.If a member belongs to a Machine
307+ // If no new member added, allow patch.
308+ if len (newMembers ) == 0 {
309+ logger .V (6 ).Info ("No new member detected, allowing patch." )
310+ return nil
311+ }
312+
313+ // 3. If initial placement is still in progress, block adding new member.
314+ if ! conditions .IsTrue (vmg , vmoprv1 .ReadyConditionType ) {
315+ return fmt .Errorf ("waiting for VirtualMachineGroup %s to get condition %s to true, temporarily blocking patch" , klog .KObj (vmg ), vmoprv1 .ReadyConditionType )
316+ }
317+
318+ // 4. Check newly added members for Machine.Spec.FailureDomain via VSphereMachine.If a member belongs to a Machine
310319 // which has failureDomain specified, allow it since it will skip the placement
311320 // process. If not, continue to check if the belonging MachineDeployment has got placement annotation.
312321 for _ , newMember := range newMembers {
@@ -317,10 +326,9 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target
317326 vsphereMachine := & vmwarev1.VSphereMachine {}
318327 if err := kubeClient .Get (ctx , vsphereMachineKey , vsphereMachine ); err != nil {
319328 if apierrors .IsNotFound (err ) {
320- logger .V (5 ).Info ("VSphereMachine for new member not found, temporarily blocking update." , "VSphereMachineName" , newMember .Name )
321- return false , nil
329+ return errors .Wrapf (err , "VSphereMachine for new member %s not found, temporarily blocking patch" , newMember .Name )
322330 }
323- return false , errors .Wrapf (err , "failed to get VSphereMachine %s" , klog .KRef (newMember .Name , vmg .Namespace ))
331+ return errors .Wrapf (err , "failed to get VSphereMachine %s" , klog .KRef (newMember .Name , vmg .Namespace ))
324332 }
325333
326334 var machineOwnerName string
@@ -333,8 +341,7 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target
333341
334342 if machineOwnerName == "" {
335343 // VSphereMachine found but owner Machine reference is missing
336- logger .V (5 ).Info ("VSphereMachine found but owner Machine reference is missing, temporarily blocking update." , "VSphereMachineName" , newMember .Name )
337- return false , nil
344+ return fmt .Errorf ("VSphereMachine %s found but owner Machine reference is missing, temporarily blocking patch" , newMember .Name )
338345 }
339346
340347 machineKey := types.NamespacedName {
@@ -345,46 +352,38 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target
345352
346353 if err := kubeClient .Get (ctx , machineKey , machine ); err != nil {
347354 if apierrors .IsNotFound (err ) {
348- logger .V (5 ).Info ("CAPI Machine not found via owner reference, temporarily blocking update." , "Machine" , klog .KRef (machineOwnerName , vmg .Namespace ))
349- return false , nil
355+ return errors .Wrapf (err , "Machine %s not found via owner reference, temporarily blocking patch" , klog .KRef (machineOwnerName , vmg .Namespace ))
350356 }
351- return false , errors .Wrapf (err , "failed to get CAPI Machine %s" , klog .KRef (machineOwnerName , vmg .Namespace ))
357+ return errors .Wrapf (err , "failed to get CAPI Machine %s" , klog .KRef (machineOwnerName , vmg .Namespace ))
352358 }
353359
354- // If FailureDomain is set on CAPI Machine, placement process will be skipped. Allow update.
360+ // If FailureDomain is set on CAPI Machine, placement process will be skipped. Allow update for this member .
355361 fd := machine .Spec .FailureDomain
356362 if fd != "" {
357- logger .V (5 ).Info ("New member's Machine has FailureDomain specified. Allowing VMG update for this member." )
363+ logger .V (6 ).Info ("New member's Machine has FailureDomain specified. Allowing patch" , "Member" , newMember . Name )
358364 continue
359365 }
360366
361- // If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to full VMG Annotation check.
362- logger .V (5 ).Info ("New member's CAPI Machine lacks FailureDomain. Falling through to VMG Annotation check." , "MachineName" , machineOwnerName )
363-
364- // If no Placement Annotations, skip member update and wait for it.
367+ // 5. If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to Annotation check.
368+ // If no Placement Annotations, block member update and wait for it.
365369 annotations := vmg .GetAnnotations ()
366370 if len (annotations ) == 0 {
367- return false , nil
371+ return fmt . Errorf ( "waiting for placement annotation to add VMG member %s, temporarily blocking patch" , newMember . Name )
368372 }
369373
370374 mdLabelName := vsphereMachine .Labels [clusterv1 .MachineDeploymentNameLabel ]
371375 if mdLabelName == "" {
372- return false , errors . Wrapf ( nil , "VSphereMachine doesn't have MachineDeployment name label %s" , klog .KObj (vsphereMachine ))
376+ return fmt . Errorf ( "VSphereMachine doesn't have MachineDeployment name label %s, blocking patch " , klog .KObj (vsphereMachine ))
373377 }
374378
375379 annotationKey := fmt .Sprintf ("%s/%s" , ZoneAnnotationPrefix , mdLabelName )
376-
377380 if _ , found := annotations [annotationKey ]; ! found {
378- logger .V (5 ).Info ("Required placement annotation is missing." ,
379- "Member" , newMember , "Annotation" , annotationKey )
380- return false , nil
381+ return fmt .Errorf ("waiting for placement annotation %s to add VMG member %s, temporarily blocking patch" , annotationKey , newMember .Name )
381382 }
382-
383- logger .V (5 ).Info ("New member requires placement annotation and it is present. Allowing this member." , "Member" , newMember )
384383 }
385384
386- logger .V (5 ).Info ("Either no new members, or all newly added members existed or have satisfied placement requirements, allowing update. " )
387- return true , nil
385+ logger .V (6 ).Info ("All newly added members either existed or have satisfied placement requirements, allowing patch " )
386+ return nil
388387}
389388
390389// getExpectedVSphereMachineCount get expected total count of Machines belonging to the Cluster.
@@ -438,7 +437,7 @@ func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, cl
438437// any existing placement annotations that correspond to MachineDeployments that no longer exist.
439438//
440439// The function attempts to find at least one successfully placed VM (VirtualMachineGroupMemberConditionPlacementReady==True)
441- // for each MachineDeployment and records its zone. Once a zone is recorded for an MD, subsequent VMs
440+ // for each MachineDeployment and records its zone. Once a Zone is recorded for an MD, subsequent VMs
442441// belonging to that same MD are skipped.
443442func generateVirtualMachineGroupAnnotations (ctx context.Context , kubeClient client.Client , vmg * vmoprv1.VirtualMachineGroup , machineDeployments []string ) error {
444443 log := ctrl .LoggerFrom (ctx )
@@ -451,10 +450,7 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie
451450
452451 // If a MachineDeployment has been deleted, its corresponding placement annotation
453452 // on the VirtualMachineGroup should also be removed to avoid configuration drift.
454- activeMDs := make (map [string ]bool )
455- for _ , md := range machineDeployments {
456- activeMDs [md ] = true
457- }
453+ activeMDs := sets .New (machineDeployments ... )
458454
459455 // Iterate over existing VirtualMachineGroup annotations and delete those that are stale.
460456 for key := range annotations {
@@ -466,15 +462,12 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie
466462 mdName := strings .TrimPrefix (key , ZoneAnnotationPrefix + "/" )
467463
468464 // If the MD name is NOT in the list of currently active MDs, delete the annotation.
469- if found := activeMDs [ mdName ]; ! found {
465+ if activeMDs . Has ( mdName ) {
470466 log .Info (fmt .Sprintf ("Cleaning up stale placement annotation for none-existed MachineDeployment %s" , mdName ))
471467 delete (annotations , key )
472468 }
473469 }
474470
475- // Pre-computation: Convert the list of valid MachineDeployment names into a set.
476- mdNames := sets .New (machineDeployments ... )
477-
478471 // Iterate through the VMG's members in Status.
479472 for _ , member := range vmg .Status .Members {
480473 ns := vmg .Namespace
@@ -510,14 +503,15 @@ func generateVirtualMachineGroupAnnotations(ctx context.Context, kubeClient clie
510503 }
511504
512505 // Check if this VM belongs to any of our target MachineDeployments.
513- if ! mdNames .Has (mdName ) {
506+ if ! activeMDs .Has (mdName ) {
514507 log .V (5 ).Info ("Skipping member as its MachineDeployment name is not in the known list." ,
515508 "VMName" , member .Name , "MDName" , mdName )
516509 continue
517510 }
518511
519512 // Get the VM placement information by member status.
520513 // VMs that have undergone placement do not have Placement info set, skip.
514+ // VMs of Machine with failureDomain specified do not have Placement info set, skip.
521515 if member .Placement == nil {
522516 log .V (5 ).Info (fmt .Sprintf ("VM %s in VMG %s/%s has no placement info. Placement is nil" , member .Name , vmg .Name , ns ))
523517 continue
0 commit comments