@@ -199,6 +199,27 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVirtualMachineGroup(ctx co
199199 }
200200 }
201201
202+ // The core purpose of isCreateOrPatchAllowed is to prevent the VirtualMachineGroup from being updated with new members
203+ // that require placement, unless the VirtualMachineGroup
204+ // has successfully completed its initial placement and added the required
205+ // placement annotations. This stabilizes placement decisions before allowing new VMs
206+ // to be added under the group.
207+ //s
208+ // The CreateOrPatch is allowed if:
209+ // 1. The VirtualMachineGroup is being initially created.
210+ // 2. The update is a scale-down operation.
211+ // 3. The VirtualMachineGroup is placement Ready.
212+ // 4. The new member's underlying CAPI Machine has a FailureDomain set (will skip placement process).
213+ // 5. The new member requires placement annotation AND the VirtualMachineGroup has the corresponding
214+ // placement annotation for the member's MachineDeployment.
215+ //
216+ // This prevents member updates that could lead to new VMs being created
217+ // without necessary zone labels, resulting in undesired placement.
218+ err = isCreateOrPatchAllowed (ctx , r .Client , members , vmg )
219+ if err != nil {
220+ return reconcile.Result {}, err
221+ }
222+
202223 // Use CreateOrPatch to create or update the VirtualMachineGroup.
203224 _ , err = controllerutil .CreateOrPatch (ctx , r .Client , vmg , func () error {
204225 return r .reconcileVirtualMachineGroup (ctx , vmg , cluster , members , mdNames )
@@ -227,31 +248,10 @@ func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context
227248 return err
228249 }
229250
230- // Member Update:
231- // The VirtualMachineGroup's BootOrder.Members list, is only allowed to be set or added
232- // during two phases to maintain control over VM placement:
233- //
234- // 1. Initial Creation: When the VirtualMachineGroup object does not yet exist.
235- // 2. Post-Placement: After the VirtualMachineGroup exists AND is marked Ready which means all members are placed successfully,
236- // and critically, all MachineDeployments have a corresponding zone placement annotation recorded on the VMG.
237- //
238- // For member removal, this is always allowed since it doesn't impact ongoing placement or rely on the placement annotation.
239- //
240- // This prevents member updates that could lead to new VMs being created
241- // without necessary zone labels, resulting in undesired placement, such as VM within a MachineDeployment but are
242- // placed to different Zones.
243-
244- isMemberUpdateAllowed , err := isMemberUpdateAllowed (ctx , r .Client , members , vmg )
245- if err != nil {
246- return err
247- }
248-
249- if isMemberUpdateAllowed {
250- vmg .Spec .BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup {
251- {
252- Members : members ,
253- },
254- }
251+ vmg .Spec .BootOrder = []vmoprv1.VirtualMachineGroupBootOrderGroup {
252+ {
253+ Members : members ,
254+ },
255255 }
256256
257257 // Set the owner reference
@@ -262,9 +262,8 @@ func (r *VirtualMachineGroupReconciler) reconcileVirtualMachineGroup(ctx context
262262 return nil
263263}
264264
265- // isMemberUpdateAllowed determines if the BootOrder.Members field can be safely updated on the VirtualMachineGroup.
266- // It allows updates only during initial creation or after all member placement are completed successfully.
267- func isMemberUpdateAllowed (ctx context.Context , kubeClient client.Client , targetMember []vmoprv1.GroupMember , vmg * vmoprv1.VirtualMachineGroup ) (bool , error ) {
265+ // isCreateOrPatchAllowed checks if a VirtualMachineGroup is allowd to create or patch by check if BootOrder.Members update is allowed.
266+ func isCreateOrPatchAllowed (ctx context.Context , kubeClient client.Client , targetMember []vmoprv1.GroupMember , vmg * vmoprv1.VirtualMachineGroup ) error {
268267 logger := log .FromContext (ctx )
269268 key := client.ObjectKey {
270269 Namespace : vmg .Namespace ,
@@ -275,38 +274,48 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target
275274 currentVMG := & vmoprv1.VirtualMachineGroup {}
276275 if err := kubeClient .Get (ctx , key , currentVMG ); err != nil {
277276 if apierrors .IsNotFound (err ) {
278- // If VirtualMachineGroup is not found, allow member update as it should be in initial creation phase.
279- logger .V (5 ).Info ("VirtualMachineGroup not found , allowing member update for initial creation. " )
280- return true , nil
277+ // 1. If VirtualMachineGroup is not found, allow CreateOrPatch as it should be in initial creation phase.
278+ logger .V (6 ).Info ("VirtualMachineGroup not created yet , allowing create " )
279+ return nil
281280 }
282- return false , errors .Wrapf (err , "failed to get VirtualMachineGroup %s/%s" , vmg .Namespace , vmg .Name )
281+ return errors .Wrapf (err , "failed to get VirtualMachineGroup %s/%s, blocking patch " , vmg .Namespace , vmg .Name )
283282 }
284- // Copy retrieved data back to the input pointer for consistency
283+ // Copy retrieved data back to the input pointer for consistency.
285284 * vmg = * currentVMG
286285
287- // Get current member names from VirtualMachineGroup Spec.BootOrder
286+ // Get current member names from VirtualMachineGroup Spec.BootOrder.
288287 currentMemberNames := make (map [string ]struct {})
289288 if len (vmg .Spec .BootOrder ) > 0 {
290289 for _ , m := range vmg .Spec .BootOrder [0 ].Members {
291290 currentMemberNames [m .Name ] = struct {}{}
292291 }
293292 }
294293
295- // 1 . If removing members, allow immediately since it doesn't impact placement or placement annotation set.
294+ // 2 . If removing members, allow immediately since it doesn't impact placement or placement annotation set.
296295 if len (targetMember ) < len (currentMemberNames ) {
297- logger .V (5 ).Info ("Scaling down detected (fewer target members), allowing member update ." )
298- return true , nil
296+ logger .V (6 ).Info ("Scaling down detected (fewer target members), allowing patch ." )
297+ return nil
299298 }
300299
301- // 2. If adding members, continue following checks.
302300 var newMembers []vmoprv1.GroupMember
303301 for _ , m := range targetMember {
304302 if _ , exists := currentMemberNames [m .Name ]; ! exists {
305303 newMembers = append (newMembers , m )
306304 }
307305 }
308306
309- // 3. Check newly added members for Machine.Spec.FailureDomain via VSphereMachine.If a member belongs to a Machine
307+ // If no new member added, allow patch.
308+ if len (newMembers ) == 0 {
309+ logger .V (6 ).Info ("No new member detected, allowing patch." )
310+ return nil
311+ }
312+
313+ // 3. If initial placement is still in progress, block adding new member.
314+ if ! conditions .IsTrue (vmg , vmoprv1 .ReadyConditionType ) {
315+ return fmt .Errorf ("waiting for VirtualMachineGroup %s to get condition %s to true, temporarily blocking patch" , klog .KObj (vmg ), vmoprv1 .ReadyConditionType )
316+ }
317+
318+ // 4. Check newly added members for Machine.Spec.FailureDomain via VSphereMachine.If a member belongs to a Machine
310319 // which has failureDomain specified, allow it since it will skip the placement
311320 // process. If not, continue to check if the belonging MachineDeployment has got placement annotation.
312321 for _ , newMember := range newMembers {
@@ -317,10 +326,9 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target
317326 vsphereMachine := & vmwarev1.VSphereMachine {}
318327 if err := kubeClient .Get (ctx , vsphereMachineKey , vsphereMachine ); err != nil {
319328 if apierrors .IsNotFound (err ) {
320- logger .V (5 ).Info ("VSphereMachine for new member not found, temporarily blocking update." , "VSphereMachineName" , newMember .Name )
321- return false , nil
329+ return errors .Wrapf (err , "VSphereMachine for new member %s not found, temporarily blocking patch" , newMember .Name )
322330 }
323- return false , errors .Wrapf (err , "failed to get VSphereMachine %s" , klog .KRef (newMember .Name , vmg .Namespace ))
331+ return errors .Wrapf (err , "failed to get VSphereMachine %s" , klog .KRef (newMember .Name , vmg .Namespace ))
324332 }
325333
326334 var machineOwnerName string
@@ -333,8 +341,7 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target
333341
334342 if machineOwnerName == "" {
335343 // VSphereMachine found but owner Machine reference is missing
336- logger .V (5 ).Info ("VSphereMachine found but owner Machine reference is missing, temporarily blocking update." , "VSphereMachineName" , newMember .Name )
337- return false , nil
344+ return fmt .Errorf ("VSphereMachine %s found but owner Machine reference is missing, temporarily blocking patch" , newMember .Name )
338345 }
339346
340347 machineKey := types.NamespacedName {
@@ -345,46 +352,38 @@ func isMemberUpdateAllowed(ctx context.Context, kubeClient client.Client, target
345352
346353 if err := kubeClient .Get (ctx , machineKey , machine ); err != nil {
347354 if apierrors .IsNotFound (err ) {
348- logger .V (5 ).Info ("CAPI Machine not found via owner reference, temporarily blocking update." , "Machine" , klog .KRef (machineOwnerName , vmg .Namespace ))
349- return false , nil
355+ return errors .Wrapf (err , "Machine %s not found via owner reference, temporarily blocking patch" , klog .KRef (machineOwnerName , vmg .Namespace ))
350356 }
351- return false , errors .Wrapf (err , "failed to get CAPI Machine %s" , klog .KRef (machineOwnerName , vmg .Namespace ))
357+ return errors .Wrapf (err , "failed to get CAPI Machine %s" , klog .KRef (machineOwnerName , vmg .Namespace ))
352358 }
353359
354- // If FailureDomain is set on CAPI Machine, placement process will be skipped. Allow update.
360+ // If FailureDomain is set on CAPI Machine, placement process will be skipped. Allow update for this member .
355361 fd := machine .Spec .FailureDomain
356362 if fd != "" {
357- logger .V (5 ).Info ("New member's Machine has FailureDomain specified. Allowing VMG update for this member." )
363+ logger .V (6 ).Info ("New member's Machine has FailureDomain specified. Allowing patch" , "Member" , newMember . Name )
358364 continue
359365 }
360366
361- // If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to full VMG Annotation check.
362- logger .V (5 ).Info ("New member's CAPI Machine lacks FailureDomain. Falling through to VMG Annotation check." , "MachineName" , machineOwnerName )
363-
364- // If no Placement Annotations, skip member update and wait for it.
367+ // 5. If FailureDomain is NOT set. Requires placement or placement Annotation. Fall through to Annotation check.
368+ // If no Placement Annotations, block member update and wait for it.
365369 annotations := vmg .GetAnnotations ()
366370 if len (annotations ) == 0 {
367- return false , nil
371+ return fmt . Errorf ( "waiting for placement annotation to add VMG member %s, temporarily blocking patch" , newMember . Name )
368372 }
369373
370374 mdLabelName := vsphereMachine .Labels [clusterv1 .MachineDeploymentNameLabel ]
371375 if mdLabelName == "" {
372- return false , errors . Wrapf ( nil , "VSphereMachine doesn't have MachineDeployment name label %s" , klog .KObj (vsphereMachine ))
376+ return fmt . Errorf ( "VSphereMachine doesn't have MachineDeployment name label %s, blocking patch " , klog .KObj (vsphereMachine ))
373377 }
374378
375379 annotationKey := fmt .Sprintf ("%s/%s" , ZoneAnnotationPrefix , mdLabelName )
376-
377380 if _ , found := annotations [annotationKey ]; ! found {
378- logger .V (5 ).Info ("Required placement annotation is missing." ,
379- "Member" , newMember , "Annotation" , annotationKey )
380- return false , nil
381+ return fmt .Errorf ("waiting for placement annotation %s to add VMG member %s, temporarily blocking patch" , annotationKey , newMember .Name )
381382 }
382-
383- logger .V (5 ).Info ("New member requires placement annotation and it is present. Allowing this member." , "Member" , newMember )
384383 }
385384
386- logger .V (5 ).Info ("Either no new members, or all newly added members existed or have satisfied placement requirements, allowing update. " )
387- return true , nil
385+ logger .V (6 ).Info ("All newly added members either existed or have satisfied placement requirements, allowing patch " )
386+ return nil
388387}
389388
390389// getExpectedVSphereMachineCount get expected total count of Machines belonging to the Cluster.
0 commit comments