Skip to content

Commit eaa7539

Browse files
authored
Merge pull request #12895 from sbueringer/pr-simplify-cluster-webhook
🌱 Simplify Cluster webhook
2 parents 4e01957 + dbf8257 commit eaa7539

File tree

2 files changed

+74
-82
lines changed

2 files changed

+74
-82
lines changed

internal/webhooks/cluster.go

Lines changed: 72 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,6 @@ type Cluster struct {
7979
var _ webhook.CustomDefaulter = &Cluster{}
8080
var _ webhook.CustomValidator = &Cluster{}
8181

82-
var errClusterClassNotReconciled = errors.New("ClusterClass is not successfully reconciled")
83-
8482
// Default satisfies the defaulting webhook interface.
8583
func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error {
8684
// We gather all defaulting errors and return them together.
@@ -109,14 +107,15 @@ func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error {
109107
return apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("Cluster").GroupKind(), cluster.Name, allErrs)
110108
}
111109

112-
clusterClass, err := webhook.pollClusterClassForCluster(ctx, cluster)
110+
clusterClass, clusterClassNotReconciled, clusterClassNotFound, err := webhook.pollClusterClassForCluster(ctx, cluster)
113111
if err != nil {
114-
// If the ClusterClass can't be found or is not up to date ignore the error.
115-
if apierrors.IsNotFound(err) || errors.Is(err, errClusterClassNotReconciled) {
116-
return nil
117-
}
118112
return apierrors.NewInternalError(errors.Wrapf(err, "Cluster %s can't be defaulted. ClusterClass %s can not be retrieved", cluster.Name, cluster.GetClassKey().Name))
119113
}
114+
if clusterClassNotReconciled || clusterClassNotFound {
115+
// If the ClusterClass can't be found or is not reconciled, return as we shouldn't
116+
// default and validate variables in that case.
117+
return nil
118+
}
120119

121120
// Validate cluster class variables transitions that may be enforced by CEL validation rules on variables.
122121
// If no request found in context, then this has not come via a webhook request, so skip validation of old cluster.
@@ -292,6 +291,8 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
292291
// metadata in topology should be valid
293292
allErrs = append(allErrs, validateTopologyMetadata(newCluster.Spec.Topology, fldPath)...)
294293

294+
allErrs = append(allErrs, validateTopologyRollout(newCluster.Spec.Topology, fldPath)...)
295+
295296
// upgrade concurrency should be a numeric value.
296297
if concurrency, ok := newCluster.Annotations[clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation]; ok {
297298
concurrencyAnnotationField := field.NewPath("metadata", "annotations", clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation)
@@ -312,39 +313,36 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
312313
}
313314

314315
// Get the ClusterClass referenced in the Cluster.
315-
clusterClass, warnings, clusterClassPollErr := webhook.validateClusterClassExistsAndIsReconciled(ctx, newCluster)
316-
// If the error is anything other than "NotFound" or "NotReconciled" return all errors.
317-
if clusterClassPollErr != nil && (!apierrors.IsNotFound(clusterClassPollErr) && !errors.Is(clusterClassPollErr, errClusterClassNotReconciled)) {
318-
allErrs = append(
319-
allErrs, field.InternalError(
320-
fldPath.Child("class"),
321-
clusterClassPollErr))
322-
return allWarnings, allErrs
316+
// Note: If the ClusterClass is not found, a warning and no err is returned and the ClusterClass is nil.
317+
// Note: If the ClusterClass is not reconciled, a warning and no err is returned and the ClusterClass is returned.
318+
clusterClass, warnings, err := webhook.validateClusterClassExistsAndIsReconciled(ctx, newCluster)
319+
if err != nil {
320+
return allWarnings, append(allErrs, field.InternalError(fldPath.Child("class"), err))
323321
}
324-
325-
// Add the warnings if no error was returned.
326322
allWarnings = append(allWarnings, warnings...)
327323

328-
// If there's no error validate the Cluster based on the ClusterClass.
329-
if clusterClassPollErr == nil {
324+
// If we could get the ClusterClass validate the Cluster based on the ClusterClass.
325+
if clusterClass != nil {
330326
allErrs = append(allErrs, ValidateClusterForClusterClass(newCluster, clusterClass)...)
331327
}
332328

333329
// Validate the Cluster and associated ClusterClass' autoscaler annotations.
330+
// Note: ClusterClass validation is only run if clusterClass is not nil.
334331
allErrs = append(allErrs, validateAutoscalerAnnotationsForCluster(newCluster, clusterClass)...)
335332

336333
if oldCluster != nil { // On update
337-
// The ClusterClass must exist to proceed with update validation. Return an error if the ClusterClass was
338-
// not found.
339-
if apierrors.IsNotFound(clusterClassPollErr) {
334+
// The ClusterClass must exist to proceed with update validation as in that case the ClusterClass
335+
// should always be there (vs. during create it could be racy if Cluster and ClusterClass are created
336+
// at the same time). Return an error if the ClusterClass was not found.
337+
if clusterClass == nil {
340338
allErrs = append(
341339
allErrs, field.InternalError(
342340
fldPath.Child("class"),
343-
clusterClassPollErr))
341+
errors.Errorf("ClusterClass %s not found", newCluster.GetClassKey())))
344342
return allWarnings, allErrs
345343
}
346344

347-
// Topology or Class can not be added on update unless ClusterTopologyUnsafeUpdateClassNameAnnotation is set.
345+
// Topology or Class can not be added or update unless ClusterTopologyUnsafeUpdateClassNameAnnotation is set.
348346
if !oldCluster.Spec.Topology.IsDefined() || oldCluster.GetClassKey().Name == "" {
349347
if _, ok := newCluster.Annotations[clusterv1.ClusterTopologyUnsafeUpdateClassNameAnnotation]; ok {
350348
return allWarnings, allErrs
@@ -402,29 +400,21 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
402400

403401
// If the ClusterClass referenced in the Topology has changed compatibility checks are needed.
404402
if oldCluster.GetClassKey() != newCluster.GetClassKey() {
405-
if clusterClassPollErr != nil {
406-
allErrs = append(
407-
allErrs, field.Forbidden(
408-
fldPath.Child("class"),
409-
fmt.Sprintf("cannot rebase to ClusterClass %q: %s",
410-
newCluster.GetClassKey(), clusterClassPollErr.Error())))
411-
// Return early with errors if the new ClusterClass can't be retrieved.
412-
return allWarnings, allErrs
413-
}
414-
415403
// Check to see if the ClusterClass referenced in the old version of the Cluster exists.
416-
oldClusterClass, err := webhook.pollClusterClassForCluster(ctx, oldCluster)
417-
if err != nil {
404+
// Return early with errors if the old ClusterClass can't be retrieved.
405+
oldClusterClass := &clusterv1.ClusterClass{}
406+
if err := webhook.Client.Get(ctx, oldCluster.GetClassKey(), oldClusterClass); err != nil {
418407
allErrs = append(
419408
allErrs, field.Forbidden(
420409
fldPath.Child("class"),
421410
fmt.Sprintf("valid ClusterClass with name %q could not be retrieved, change from class %[1]q to class %q cannot be validated. Error: %s",
422411
oldCluster.GetClassKey(), newCluster.GetClassKey(), err.Error())))
423-
424-
// Return early with errors if the old ClusterClass can't be retrieved.
425412
return allWarnings, allErrs
426413
}
427414

415+
// Note: We don't care if the old ClusterClass is reconciled as the validation below doesn't need it
416+
// and we want to allow to rebase away from a broken ClusterClass.
417+
428418
// Check if the new and old ClusterClasses are compatible with one another.
429419
allErrs = append(allErrs, check.ClusterClassesAreCompatible(oldClusterClass, clusterClass)...)
430420
}
@@ -639,11 +629,11 @@ func validateTopologyMachinePoolVersions(ctx context.Context, ctrlClient client.
639629
return nil
640630
}
641631

642-
func validateClusterRollout(cluster *clusterv1.Cluster) field.ErrorList {
632+
func validateTopologyRollout(topology clusterv1.Topology, fldPath *field.Path) field.ErrorList {
643633
var allErrs field.ErrorList
644634

645-
for _, md := range cluster.Spec.Topology.Workers.MachineDeployments {
646-
fldPath := field.NewPath("spec", "topology", "workers", "machineDeployments").Key(md.Name).Child("rollout")
635+
for _, md := range topology.Workers.MachineDeployments {
636+
fldPath := fldPath.Child("workers", "machineDeployments").Key(md.Name).Child("rollout")
647637
allErrs = append(allErrs, validateRolloutStrategy(fldPath.Child("strategy"), md.Rollout.Strategy.RollingUpdate.MaxUnavailable, md.Rollout.Strategy.RollingUpdate.MaxSurge)...)
648638
}
649639

@@ -917,8 +907,6 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl
917907

918908
allErrs = append(allErrs, check.MachinePoolTopologiesAreValidAndDefinedInClusterClass(cluster, clusterClass)...)
919909

920-
allErrs = append(allErrs, validateClusterRollout(cluster)...)
921-
922910
// Validate the MachineHealthChecks defined in the cluster topology.
923911
allErrs = append(allErrs, validateMachineHealthChecks(cluster, clusterClass)...)
924912
return allErrs
@@ -928,74 +916,78 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl
928916
// In any other case it will return an error.
929917
func (webhook *Cluster) validateClusterClassExistsAndIsReconciled(ctx context.Context, newCluster *clusterv1.Cluster) (*clusterv1.ClusterClass, admission.Warnings, error) {
930918
var allWarnings admission.Warnings
931-
clusterClass, clusterClassPollErr := webhook.pollClusterClassForCluster(ctx, newCluster)
932-
if clusterClassPollErr != nil {
933-
// Add a warning if the Class does not exist or if it has not been successfully reconciled.
934-
switch {
935-
case apierrors.IsNotFound(clusterClassPollErr):
936-
allWarnings = append(allWarnings,
937-
fmt.Sprintf(
938-
"Cluster refers to ClusterClass %s, but this ClusterClass does not exist. "+
939-
"Cluster topology has not been fully validated. "+
940-
"The ClusterClass must be created to reconcile the Cluster", newCluster.GetClassKey()),
941-
)
942-
case errors.Is(clusterClassPollErr, errClusterClassNotReconciled):
943-
allWarnings = append(allWarnings,
944-
fmt.Sprintf(
945-
"Cluster refers to ClusterClass %s, but this ClusterClass hasn't been successfully reconciled. "+
946-
"Cluster topology has not been fully validated. "+
947-
"Please take a look at the ClusterClass status", newCluster.GetClassKey()),
948-
)
949-
// If there's any other error return a generic warning with the error message.
950-
default:
951-
allWarnings = append(allWarnings,
952-
fmt.Sprintf(
953-
"Cluster refers to ClusterClass %s, but this ClusterClass could not be retrieved. "+
954-
"Cluster topology has not been fully validated: %s", newCluster.GetClassKey(), clusterClassPollErr.Error()),
955-
)
956-
}
919+
clusterClass, clusterClassNotReconciled, clusterClassNotFound, err := webhook.pollClusterClassForCluster(ctx, newCluster)
920+
// Add a warning if the Class does not exist or if it has not been successfully reconciled.
921+
switch {
922+
case err != nil:
923+
allWarnings = append(allWarnings,
924+
fmt.Sprintf(
925+
"Cluster refers to ClusterClass %s, but this ClusterClass could not be retrieved. "+
926+
"Cluster topology has not been fully validated: %s", newCluster.GetClassKey(), err.Error()),
927+
)
928+
case clusterClassNotFound:
929+
allWarnings = append(allWarnings,
930+
fmt.Sprintf(
931+
"Cluster refers to ClusterClass %s, but this ClusterClass does not exist. "+
932+
"Cluster topology has not been fully validated. "+
933+
"The ClusterClass must be created to reconcile the Cluster", newCluster.GetClassKey()),
934+
)
935+
case clusterClassNotReconciled:
936+
allWarnings = append(allWarnings,
937+
fmt.Sprintf(
938+
"Cluster refers to ClusterClass %s, but this ClusterClass hasn't been successfully reconciled. "+
939+
"Cluster topology has not been fully validated. "+
940+
"Please take a look at the ClusterClass status", newCluster.GetClassKey()),
941+
)
957942
}
958-
return clusterClass, allWarnings, clusterClassPollErr
943+
return clusterClass, allWarnings, err
959944
}
960945

961946
// pollClusterClassForCluster will retry getting the ClusterClass referenced in the Cluster for two seconds.
962-
func (webhook *Cluster) pollClusterClassForCluster(ctx context.Context, cluster *clusterv1.Cluster) (*clusterv1.ClusterClass, error) {
947+
func (webhook *Cluster) pollClusterClassForCluster(ctx context.Context, cluster *clusterv1.Cluster) (_ *clusterv1.ClusterClass, clusterClassNotReconciled, clusterClassNotFound bool, _ error) {
948+
var errClusterClassNotReconciled = errors.New("ClusterClass is not successfully reconciled")
949+
963950
clusterClass := &clusterv1.ClusterClass{}
964951
var clusterClassPollErr error
965952
_ = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, 2*time.Second, true, func(ctx context.Context) (bool, error) {
966953
if clusterClassPollErr = webhook.Client.Get(ctx, cluster.GetClassKey(), clusterClass); clusterClassPollErr != nil {
967954
return false, nil //nolint:nilerr
968955
}
969956

970-
if clusterClassPollErr = clusterClassIsReconciled(clusterClass); clusterClassPollErr != nil {
971-
return false, nil //nolint:nilerr
957+
if !clusterClassIsReconciled(clusterClass) {
958+
clusterClassPollErr = errClusterClassNotReconciled
959+
return false, nil
972960
}
961+
973962
clusterClassPollErr = nil
974963
return true, nil
975964
})
976965
if clusterClassPollErr != nil {
966+
if apierrors.IsNotFound(clusterClassPollErr) {
967+
return nil, false, true, nil
968+
}
977969
if errors.Is(clusterClassPollErr, errClusterClassNotReconciled) {
978970
// Return ClusterClass if we were able to get it and it's just not reconciled.
979-
return clusterClass, clusterClassPollErr
971+
return clusterClass, true, false, nil
980972
}
981-
return nil, clusterClassPollErr
973+
return nil, false, false, clusterClassPollErr
982974
}
983-
return clusterClass, nil
975+
return clusterClass, false, false, nil
984976
}
985977

986978
// clusterClassIsReconciled returns errClusterClassNotReconciled if the ClusterClass has not successfully reconciled or if the
987979
// ClusterClass variables have not been successfully reconciled.
988-
func clusterClassIsReconciled(clusterClass *clusterv1.ClusterClass) error {
980+
func clusterClassIsReconciled(clusterClass *clusterv1.ClusterClass) bool {
989981
// If the clusterClass metadata generation does not match the status observed generation, the ClusterClass has not been successfully reconciled.
990982
if clusterClass.Generation != clusterClass.Status.ObservedGeneration {
991-
return errClusterClassNotReconciled
983+
return false
992984
}
993985
// If the clusterClass does not have ClusterClassVariablesReconciled==True, the ClusterClass has not been successfully reconciled.
994986
if !conditions.Has(clusterClass, clusterv1.ClusterClassVariablesReadyCondition) ||
995987
conditions.IsFalse(clusterClass, clusterv1.ClusterClassVariablesReadyCondition) {
996-
return errClusterClassNotReconciled
988+
return false
997989
}
998-
return nil
990+
return true
999991
}
1000992

1001993
func validateTopologyMetadata(topology clusterv1.Topology, fldPath *field.Path) field.ErrorList {

internal/webhooks/cluster_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3450,11 +3450,11 @@ func TestClusterClassPollingErrors(t *testing.T) {
34503450
wantErr: false,
34513451
},
34523452
{
3453-
name: "Fail on update if oldCluster ClusterClass generation does not match observedGeneration",
3453+
name: "Pass on update if oldCluster ClusterClass generation does not match observedGeneration",
34543454
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(secondTopology).Build(),
34553455
oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
34563456
clusterClasses: []*clusterv1.ClusterClass{ccGenerationMismatch, secondFullyReconciled},
3457-
wantErr: true,
3457+
wantErr: false,
34583458
},
34593459
{
34603460
name: "Fail on update if old Cluster ClusterClass is not found",

0 commit comments

Comments
 (0)