@@ -79,8 +79,6 @@ type Cluster struct {
7979var _ webhook.CustomDefaulter = & Cluster {}
8080var _ webhook.CustomValidator = & Cluster {}
8181
82- var errClusterClassNotReconciled = errors .New ("ClusterClass is not successfully reconciled" )
83-
8482// Default satisfies the defaulting webhook interface.
8583func (webhook * Cluster ) Default (ctx context.Context , obj runtime.Object ) error {
8684 // We gather all defaulting errors and return them together.
@@ -109,14 +107,15 @@ func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error {
109107 return apierrors .NewInvalid (clusterv1 .GroupVersion .WithKind ("Cluster" ).GroupKind (), cluster .Name , allErrs )
110108 }
111109
112- clusterClass , err := webhook .pollClusterClassForCluster (ctx , cluster )
110+ clusterClass , clusterClassNotReconciled , clusterClassNotFound , err := webhook .pollClusterClassForCluster (ctx , cluster )
113111 if err != nil {
114- // If the ClusterClass can't be found or is not up to date ignore the error.
115- if apierrors .IsNotFound (err ) || errors .Is (err , errClusterClassNotReconciled ) {
116- return nil
117- }
118112 return apierrors .NewInternalError (errors .Wrapf (err , "Cluster %s can't be defaulted. ClusterClass %s can not be retrieved" , cluster .Name , cluster .GetClassKey ().Name ))
119113 }
114+ if clusterClassNotReconciled || clusterClassNotFound {
115+ // If the ClusterClass can't be found or is not reconciled, return as we shouldn't
116+ // default and validate variables in that case.
117+ return nil
118+ }
120119
121120 // Validate cluster class variables transitions that may be enforced by CEL validation rules on variables.
122121 // If no request found in context, then this has not come via a webhook request, so skip validation of old cluster.
@@ -292,6 +291,8 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
292291 // metadata in topology should be valid
293292 allErrs = append (allErrs , validateTopologyMetadata (newCluster .Spec .Topology , fldPath )... )
294293
294+ allErrs = append (allErrs , validateTopologyRollout (newCluster .Spec .Topology , fldPath )... )
295+
295296 // upgrade concurrency should be a numeric value.
296297 if concurrency , ok := newCluster .Annotations [clusterv1 .ClusterTopologyUpgradeConcurrencyAnnotation ]; ok {
297298 concurrencyAnnotationField := field .NewPath ("metadata" , "annotations" , clusterv1 .ClusterTopologyUpgradeConcurrencyAnnotation )
@@ -312,39 +313,36 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
312313 }
313314
314315 // Get the ClusterClass referenced in the Cluster.
315- clusterClass , warnings , clusterClassPollErr := webhook .validateClusterClassExistsAndIsReconciled (ctx , newCluster )
316- // If the error is anything other than "NotFound" or "NotReconciled" return all errors.
317- if clusterClassPollErr != nil && (! apierrors .IsNotFound (clusterClassPollErr ) && ! errors .Is (clusterClassPollErr , errClusterClassNotReconciled )) {
318- allErrs = append (
319- allErrs , field .InternalError (
320- fldPath .Child ("class" ),
321- clusterClassPollErr ))
322- return allWarnings , allErrs
316+ // Note: If the ClusterClass is not found, a warning and no err is returned and the ClusterClass is nil.
317+ // Note: If the ClusterClass is not reconciled, a warning and no err is returned and the ClusterClass is returned.
318+ clusterClass , warnings , err := webhook .validateClusterClassExistsAndIsReconciled (ctx , newCluster )
319+ if err != nil {
320+ return allWarnings , append (allErrs , field .InternalError (fldPath .Child ("class" ), err ))
323321 }
324-
325- // Add the warnings if no error was returned.
326322 allWarnings = append (allWarnings , warnings ... )
327323
328- // If there's no error validate the Cluster based on the ClusterClass.
329- if clusterClassPollErr = = nil {
324+ // If we could get the ClusterClass validate the Cluster based on the ClusterClass.
325+ if clusterClass ! = nil {
330326 allErrs = append (allErrs , ValidateClusterForClusterClass (newCluster , clusterClass )... )
331327 }
332328
333329 // Validate the Cluster and associated ClusterClass' autoscaler annotations.
330+ // Note: ClusterClass validation is only run if clusterClass is not nil.
334331 allErrs = append (allErrs , validateAutoscalerAnnotationsForCluster (newCluster , clusterClass )... )
335332
336333 if oldCluster != nil { // On update
337- // The ClusterClass must exist to proceed with update validation. Return an error if the ClusterClass was
338- // not found.
339- if apierrors .IsNotFound (clusterClassPollErr ) {
334+ // The ClusterClass must exist to proceed with update validation as in that case the ClusterClass
335+ // should always be there (vs. during create it could be racy if Cluster and ClusterClass are created
336+ // at the same time). Return an error if the ClusterClass was not found.
337+ if clusterClass == nil {
340338 allErrs = append (
341339 allErrs , field .InternalError (
342340 fldPath .Child ("class" ),
343- clusterClassPollErr ))
341+ errors . Errorf ( "ClusterClass %s not found" , newCluster . GetClassKey ()) ))
344342 return allWarnings , allErrs
345343 }
346344
347- // Topology or Class can not be added on update unless ClusterTopologyUnsafeUpdateClassNameAnnotation is set.
345+ // Topology or Class can not be added or update unless ClusterTopologyUnsafeUpdateClassNameAnnotation is set.
348346 if ! oldCluster .Spec .Topology .IsDefined () || oldCluster .GetClassKey ().Name == "" {
349347 if _ , ok := newCluster .Annotations [clusterv1 .ClusterTopologyUnsafeUpdateClassNameAnnotation ]; ok {
350348 return allWarnings , allErrs
@@ -402,29 +400,21 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
402400
403401 // If the ClusterClass referenced in the Topology has changed compatibility checks are needed.
404402 if oldCluster .GetClassKey () != newCluster .GetClassKey () {
405- if clusterClassPollErr != nil {
406- allErrs = append (
407- allErrs , field .Forbidden (
408- fldPath .Child ("class" ),
409- fmt .Sprintf ("cannot rebase to ClusterClass %q: %s" ,
410- newCluster .GetClassKey (), clusterClassPollErr .Error ())))
411- // Return early with errors if the new ClusterClass can't be retrieved.
412- return allWarnings , allErrs
413- }
414-
415403 // Check to see if the ClusterClass referenced in the old version of the Cluster exists.
416- oldClusterClass , err := webhook .pollClusterClassForCluster (ctx , oldCluster )
417- if err != nil {
404+ // Return early with errors if the old ClusterClass can't be retrieved.
405+ oldClusterClass := & clusterv1.ClusterClass {}
406+ if err := webhook .Client .Get (ctx , oldCluster .GetClassKey (), oldClusterClass ); err != nil {
418407 allErrs = append (
419408 allErrs , field .Forbidden (
420409 fldPath .Child ("class" ),
421410 fmt .Sprintf ("valid ClusterClass with name %q could not be retrieved, change from class %[1]q to class %q cannot be validated. Error: %s" ,
422411 oldCluster .GetClassKey (), newCluster .GetClassKey (), err .Error ())))
423-
424- // Return early with errors if the old ClusterClass can't be retrieved.
425412 return allWarnings , allErrs
426413 }
427414
415+ // Note: We don't care if the old ClusterClass is reconciled as the validation below doesn't need it
416+ // and we want to allow to rebase away from a broken ClusterClass.
417+
428418 // Check if the new and old ClusterClasses are compatible with one another.
429419 allErrs = append (allErrs , check .ClusterClassesAreCompatible (oldClusterClass , clusterClass )... )
430420 }
@@ -639,11 +629,11 @@ func validateTopologyMachinePoolVersions(ctx context.Context, ctrlClient client.
639629 return nil
640630}
641631
642- func validateClusterRollout ( cluster * clusterv1.Cluster ) field.ErrorList {
632+ func validateTopologyRollout ( topology clusterv1.Topology , fldPath * field. Path ) field.ErrorList {
643633 var allErrs field.ErrorList
644634
645- for _ , md := range cluster . Spec . Topology .Workers .MachineDeployments {
646- fldPath := field . NewPath ( "spec" , "topology" , "workers" , "machineDeployments" ).Key (md .Name ).Child ("rollout" )
635+ for _ , md := range topology .Workers .MachineDeployments {
636+ fldPath := fldPath . Child ( "workers" , "machineDeployments" ).Key (md .Name ).Child ("rollout" )
647637 allErrs = append (allErrs , validateRolloutStrategy (fldPath .Child ("strategy" ), md .Rollout .Strategy .RollingUpdate .MaxUnavailable , md .Rollout .Strategy .RollingUpdate .MaxSurge )... )
648638 }
649639
@@ -917,8 +907,6 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl
917907
918908 allErrs = append (allErrs , check .MachinePoolTopologiesAreValidAndDefinedInClusterClass (cluster , clusterClass )... )
919909
920- allErrs = append (allErrs , validateClusterRollout (cluster )... )
921-
922910 // Validate the MachineHealthChecks defined in the cluster topology.
923911 allErrs = append (allErrs , validateMachineHealthChecks (cluster , clusterClass )... )
924912 return allErrs
@@ -928,74 +916,78 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl
928916// In any other case it will return an error.
929917func (webhook * Cluster ) validateClusterClassExistsAndIsReconciled (ctx context.Context , newCluster * clusterv1.Cluster ) (* clusterv1.ClusterClass , admission.Warnings , error ) {
930918 var allWarnings admission.Warnings
931- clusterClass , clusterClassPollErr := webhook .pollClusterClassForCluster (ctx , newCluster )
932- if clusterClassPollErr != nil {
933- // Add a warning if the Class does not exist or if it has not been successfully reconciled.
934- switch {
935- case apierrors .IsNotFound (clusterClassPollErr ):
936- allWarnings = append (allWarnings ,
937- fmt .Sprintf (
938- "Cluster refers to ClusterClass %s, but this ClusterClass does not exist. " +
939- "Cluster topology has not been fully validated. " +
940- "The ClusterClass must be created to reconcile the Cluster" , newCluster .GetClassKey ()),
941- )
942- case errors .Is (clusterClassPollErr , errClusterClassNotReconciled ):
943- allWarnings = append (allWarnings ,
944- fmt .Sprintf (
945- "Cluster refers to ClusterClass %s, but this ClusterClass hasn't been successfully reconciled. " +
946- "Cluster topology has not been fully validated. " +
947- "Please take a look at the ClusterClass status" , newCluster .GetClassKey ()),
948- )
949- // If there's any other error return a generic warning with the error message.
950- default :
951- allWarnings = append (allWarnings ,
952- fmt .Sprintf (
953- "Cluster refers to ClusterClass %s, but this ClusterClass could not be retrieved. " +
954- "Cluster topology has not been fully validated: %s" , newCluster .GetClassKey (), clusterClassPollErr .Error ()),
955- )
956- }
919+ clusterClass , clusterClassNotReconciled , clusterClassNotFound , err := webhook .pollClusterClassForCluster (ctx , newCluster )
920+ // Add a warning if the Class does not exist or if it has not been successfully reconciled.
921+ switch {
922+ case err != nil :
923+ allWarnings = append (allWarnings ,
924+ fmt .Sprintf (
925+ "Cluster refers to ClusterClass %s, but this ClusterClass could not be retrieved. " +
926+ "Cluster topology has not been fully validated: %s" , newCluster .GetClassKey (), err .Error ()),
927+ )
928+ case clusterClassNotFound :
929+ allWarnings = append (allWarnings ,
930+ fmt .Sprintf (
931+ "Cluster refers to ClusterClass %s, but this ClusterClass does not exist. " +
932+ "Cluster topology has not been fully validated. " +
933+ "The ClusterClass must be created to reconcile the Cluster" , newCluster .GetClassKey ()),
934+ )
935+ case clusterClassNotReconciled :
936+ allWarnings = append (allWarnings ,
937+ fmt .Sprintf (
938+ "Cluster refers to ClusterClass %s, but this ClusterClass hasn't been successfully reconciled. " +
939+ "Cluster topology has not been fully validated. " +
940+ "Please take a look at the ClusterClass status" , newCluster .GetClassKey ()),
941+ )
957942 }
958- return clusterClass , allWarnings , clusterClassPollErr
943+ return clusterClass , allWarnings , err
959944}
960945
961946// pollClusterClassForCluster will retry getting the ClusterClass referenced in the Cluster for two seconds.
962- func (webhook * Cluster ) pollClusterClassForCluster (ctx context.Context , cluster * clusterv1.Cluster ) (* clusterv1.ClusterClass , error ) {
947+ func (webhook * Cluster ) pollClusterClassForCluster (ctx context.Context , cluster * clusterv1.Cluster ) (_ * clusterv1.ClusterClass , clusterClassNotReconciled , clusterClassNotFound bool , _ error ) {
948+ var errClusterClassNotReconciled = errors .New ("ClusterClass is not successfully reconciled" )
949+
963950 clusterClass := & clusterv1.ClusterClass {}
964951 var clusterClassPollErr error
965952 _ = wait .PollUntilContextTimeout (ctx , 200 * time .Millisecond , 2 * time .Second , true , func (ctx context.Context ) (bool , error ) {
966953 if clusterClassPollErr = webhook .Client .Get (ctx , cluster .GetClassKey (), clusterClass ); clusterClassPollErr != nil {
967954 return false , nil //nolint:nilerr
968955 }
969956
970- if clusterClassPollErr = clusterClassIsReconciled (clusterClass ); clusterClassPollErr != nil {
971- return false , nil //nolint:nilerr
957+ if ! clusterClassIsReconciled (clusterClass ) {
958+ clusterClassPollErr = errClusterClassNotReconciled
959+ return false , nil
972960 }
961+
973962 clusterClassPollErr = nil
974963 return true , nil
975964 })
976965 if clusterClassPollErr != nil {
966+ if apierrors .IsNotFound (clusterClassPollErr ) {
967+ return nil , false , true , nil
968+ }
977969 if errors .Is (clusterClassPollErr , errClusterClassNotReconciled ) {
978970 // Return ClusterClass if we were able to get it and it's just not reconciled.
979- return clusterClass , clusterClassPollErr
971+ return clusterClass , true , false , nil
980972 }
981- return nil , clusterClassPollErr
973+ return nil , false , false , clusterClassPollErr
982974 }
983- return clusterClass , nil
975+ return clusterClass , false , false , nil
984976}
985977
986978// clusterClassIsReconciled returns errClusterClassNotReconciled if the ClusterClass has not successfully reconciled or if the
987979// ClusterClass variables have not been successfully reconciled.
988- func clusterClassIsReconciled (clusterClass * clusterv1.ClusterClass ) error {
980+ func clusterClassIsReconciled (clusterClass * clusterv1.ClusterClass ) bool {
989981 // If the clusterClass metadata generation does not match the status observed generation, the ClusterClass has not been successfully reconciled.
990982 if clusterClass .Generation != clusterClass .Status .ObservedGeneration {
991- return errClusterClassNotReconciled
983+ return false
992984 }
993985 // If the clusterClass does not have ClusterClassVariablesReconciled==True, the ClusterClass has not been successfully reconciled.
994986 if ! conditions .Has (clusterClass , clusterv1 .ClusterClassVariablesReadyCondition ) ||
995987 conditions .IsFalse (clusterClass , clusterv1 .ClusterClassVariablesReadyCondition ) {
996- return errClusterClassNotReconciled
988+ return false
997989 }
998- return nil
990+ return true
999991}
1000992
1001993func validateTopologyMetadata (topology clusterv1.Topology , fldPath * field.Path ) field.ErrorList {
0 commit comments