Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 72 additions & 80 deletions internal/webhooks/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,6 @@ type Cluster struct {
var _ webhook.CustomDefaulter = &Cluster{}
var _ webhook.CustomValidator = &Cluster{}

var errClusterClassNotReconciled = errors.New("ClusterClass is not successfully reconciled")

// Default satisfies the defaulting webhook interface.
func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error {
// We gather all defaulting errors and return them together.
Expand Down Expand Up @@ -109,14 +107,15 @@ func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error {
return apierrors.NewInvalid(clusterv1.GroupVersion.WithKind("Cluster").GroupKind(), cluster.Name, allErrs)
}

clusterClass, err := webhook.pollClusterClassForCluster(ctx, cluster)
clusterClass, clusterClassNotReconciled, clusterClassNotFound, err := webhook.pollClusterClassForCluster(ctx, cluster)
if err != nil {
// If the ClusterClass can't be found or is not up to date ignore the error.
if apierrors.IsNotFound(err) || errors.Is(err, errClusterClassNotReconciled) {
return nil
}
return apierrors.NewInternalError(errors.Wrapf(err, "Cluster %s can't be defaulted. ClusterClass %s can not be retrieved", cluster.Name, cluster.GetClassKey().Name))
}
if clusterClassNotReconciled || clusterClassNotFound {
// If the ClusterClass can't be found or is not reconciled, return as we shouldn't
// default and validate variables in that case.
return nil
}

// Validate cluster class variables transitions that may be enforced by CEL validation rules on variables.
// If no request found in context, then this has not come via a webhook request, so skip validation of old cluster.
Expand Down Expand Up @@ -292,6 +291,8 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
// metadata in topology should be valid
allErrs = append(allErrs, validateTopologyMetadata(newCluster.Spec.Topology, fldPath)...)

allErrs = append(allErrs, validateTopologyRollout(newCluster.Spec.Topology, fldPath)...)

// upgrade concurrency should be a numeric value.
if concurrency, ok := newCluster.Annotations[clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation]; ok {
concurrencyAnnotationField := field.NewPath("metadata", "annotations", clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation)
Expand All @@ -312,39 +313,36 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
}

// Get the ClusterClass referenced in the Cluster.
clusterClass, warnings, clusterClassPollErr := webhook.validateClusterClassExistsAndIsReconciled(ctx, newCluster)
// If the error is anything other than "NotFound" or "NotReconciled" return all errors.
if clusterClassPollErr != nil && (!apierrors.IsNotFound(clusterClassPollErr) && !errors.Is(clusterClassPollErr, errClusterClassNotReconciled)) {
allErrs = append(
allErrs, field.InternalError(
fldPath.Child("class"),
clusterClassPollErr))
return allWarnings, allErrs
// Note: If the ClusterClass is not found, a warning and no err is returned and the ClusterClass is nil.
// Note: If the ClusterClass is not reconciled, a warning and no err is returned and the ClusterClass is returned.
clusterClass, warnings, err := webhook.validateClusterClassExistsAndIsReconciled(ctx, newCluster)
if err != nil {
return allWarnings, append(allErrs, field.InternalError(fldPath.Child("class"), err))
}

// Add the warnings if no error was returned.
allWarnings = append(allWarnings, warnings...)

// If there's no error validate the Cluster based on the ClusterClass.
if clusterClassPollErr == nil {
// If we could get the ClusterClass validate the Cluster based on the ClusterClass.
if clusterClass != nil {
allErrs = append(allErrs, ValidateClusterForClusterClass(newCluster, clusterClass)...)
}

// Validate the Cluster and associated ClusterClass' autoscaler annotations.
// Note: ClusterClass validation is only run if clusterClass is not nil.
allErrs = append(allErrs, validateAutoscalerAnnotationsForCluster(newCluster, clusterClass)...)

if oldCluster != nil { // On update
// The ClusterClass must exist to proceed with update validation. Return an error if the ClusterClass was
// not found.
if apierrors.IsNotFound(clusterClassPollErr) {
// The ClusterClass must exist to proceed with update validation as in that case the ClusterClass
// should always be there (vs. during create it could be racy if Cluster and ClusterClass are created
// at the same time). Return an error if the ClusterClass was not found.
if clusterClass == nil {
allErrs = append(
allErrs, field.InternalError(
fldPath.Child("class"),
clusterClassPollErr))
errors.Errorf("ClusterClass %s not found", newCluster.GetClassKey())))
return allWarnings, allErrs
}

// Topology or Class can not be added on update unless ClusterTopologyUnsafeUpdateClassNameAnnotation is set.
// Topology or Class can not be added or update unless ClusterTopologyUnsafeUpdateClassNameAnnotation is set.
if !oldCluster.Spec.Topology.IsDefined() || oldCluster.GetClassKey().Name == "" {
if _, ok := newCluster.Annotations[clusterv1.ClusterTopologyUnsafeUpdateClassNameAnnotation]; ok {
return allWarnings, allErrs
Expand Down Expand Up @@ -402,29 +400,21 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu

// If the ClusterClass referenced in the Topology has changed compatibility checks are needed.
if oldCluster.GetClassKey() != newCluster.GetClassKey() {
if clusterClassPollErr != nil {
allErrs = append(
allErrs, field.Forbidden(
fldPath.Child("class"),
fmt.Sprintf("cannot rebase to ClusterClass %q: %s",
newCluster.GetClassKey(), clusterClassPollErr.Error())))
// Return early with errors if the new ClusterClass can't be retrieved.
return allWarnings, allErrs
}

// Check to see if the ClusterClass referenced in the old version of the Cluster exists.
oldClusterClass, err := webhook.pollClusterClassForCluster(ctx, oldCluster)
if err != nil {
// Return early with errors if the old ClusterClass can't be retrieved.
oldClusterClass := &clusterv1.ClusterClass{}
if err := webhook.Client.Get(ctx, oldCluster.GetClassKey(), oldClusterClass); err != nil {
allErrs = append(
allErrs, field.Forbidden(
fldPath.Child("class"),
fmt.Sprintf("valid ClusterClass with name %q could not be retrieved, change from class %[1]q to class %q cannot be validated. Error: %s",
oldCluster.GetClassKey(), newCluster.GetClassKey(), err.Error())))

// Return early with errors if the old ClusterClass can't be retrieved.
return allWarnings, allErrs
}

// Note: We don't care if the old ClusterClass is reconciled as the validation below doesn't need it
// and we want to allow to rebase away from a broken ClusterClass.

// Check if the new and old ClusterClasses are compatible with one another.
allErrs = append(allErrs, check.ClusterClassesAreCompatible(oldClusterClass, clusterClass)...)
}
Expand Down Expand Up @@ -639,11 +629,11 @@ func validateTopologyMachinePoolVersions(ctx context.Context, ctrlClient client.
return nil
}

func validateClusterRollout(cluster *clusterv1.Cluster) field.ErrorList {
func validateTopologyRollout(topology clusterv1.Topology, fldPath *field.Path) field.ErrorList {
var allErrs field.ErrorList

for _, md := range cluster.Spec.Topology.Workers.MachineDeployments {
fldPath := field.NewPath("spec", "topology", "workers", "machineDeployments").Key(md.Name).Child("rollout")
for _, md := range topology.Workers.MachineDeployments {
fldPath := fldPath.Child("workers", "machineDeployments").Key(md.Name).Child("rollout")
allErrs = append(allErrs, validateRolloutStrategy(fldPath.Child("strategy"), md.Rollout.Strategy.RollingUpdate.MaxUnavailable, md.Rollout.Strategy.RollingUpdate.MaxSurge)...)
}

Expand Down Expand Up @@ -917,8 +907,6 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl

allErrs = append(allErrs, check.MachinePoolTopologiesAreValidAndDefinedInClusterClass(cluster, clusterClass)...)

allErrs = append(allErrs, validateClusterRollout(cluster)...)

// Validate the MachineHealthChecks defined in the cluster topology.
allErrs = append(allErrs, validateMachineHealthChecks(cluster, clusterClass)...)
return allErrs
Expand All @@ -928,74 +916,78 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl
// In any other case it will return an error.
func (webhook *Cluster) validateClusterClassExistsAndIsReconciled(ctx context.Context, newCluster *clusterv1.Cluster) (*clusterv1.ClusterClass, admission.Warnings, error) {
var allWarnings admission.Warnings
clusterClass, clusterClassPollErr := webhook.pollClusterClassForCluster(ctx, newCluster)
if clusterClassPollErr != nil {
// Add a warning if the Class does not exist or if it has not been successfully reconciled.
switch {
case apierrors.IsNotFound(clusterClassPollErr):
allWarnings = append(allWarnings,
fmt.Sprintf(
"Cluster refers to ClusterClass %s, but this ClusterClass does not exist. "+
"Cluster topology has not been fully validated. "+
"The ClusterClass must be created to reconcile the Cluster", newCluster.GetClassKey()),
)
case errors.Is(clusterClassPollErr, errClusterClassNotReconciled):
allWarnings = append(allWarnings,
fmt.Sprintf(
"Cluster refers to ClusterClass %s, but this ClusterClass hasn't been successfully reconciled. "+
"Cluster topology has not been fully validated. "+
"Please take a look at the ClusterClass status", newCluster.GetClassKey()),
)
// If there's any other error return a generic warning with the error message.
default:
allWarnings = append(allWarnings,
fmt.Sprintf(
"Cluster refers to ClusterClass %s, but this ClusterClass could not be retrieved. "+
"Cluster topology has not been fully validated: %s", newCluster.GetClassKey(), clusterClassPollErr.Error()),
)
}
clusterClass, clusterClassNotReconciled, clusterClassNotFound, err := webhook.pollClusterClassForCluster(ctx, newCluster)
// Add a warning if the Class does not exist or if it has not been successfully reconciled.
switch {
case err != nil:
allWarnings = append(allWarnings,
fmt.Sprintf(
"Cluster refers to ClusterClass %s, but this ClusterClass could not be retrieved. "+
"Cluster topology has not been fully validated: %s", newCluster.GetClassKey(), err.Error()),
)
case clusterClassNotFound:
allWarnings = append(allWarnings,
fmt.Sprintf(
"Cluster refers to ClusterClass %s, but this ClusterClass does not exist. "+
"Cluster topology has not been fully validated. "+
"The ClusterClass must be created to reconcile the Cluster", newCluster.GetClassKey()),
)
case clusterClassNotReconciled:
allWarnings = append(allWarnings,
fmt.Sprintf(
"Cluster refers to ClusterClass %s, but this ClusterClass hasn't been successfully reconciled. "+
"Cluster topology has not been fully validated. "+
"Please take a look at the ClusterClass status", newCluster.GetClassKey()),
)
}
return clusterClass, allWarnings, clusterClassPollErr
return clusterClass, allWarnings, err
}

// pollClusterClassForCluster will retry getting the ClusterClass referenced in the Cluster for two seconds.
func (webhook *Cluster) pollClusterClassForCluster(ctx context.Context, cluster *clusterv1.Cluster) (*clusterv1.ClusterClass, error) {
func (webhook *Cluster) pollClusterClassForCluster(ctx context.Context, cluster *clusterv1.Cluster) (_ *clusterv1.ClusterClass, clusterClassNotReconciled, clusterClassNotFound bool, _ error) {
var errClusterClassNotReconciled = errors.New("ClusterClass is not successfully reconciled")

clusterClass := &clusterv1.ClusterClass{}
var clusterClassPollErr error
_ = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, 2*time.Second, true, func(ctx context.Context) (bool, error) {
if clusterClassPollErr = webhook.Client.Get(ctx, cluster.GetClassKey(), clusterClass); clusterClassPollErr != nil {
return false, nil //nolint:nilerr
}

if clusterClassPollErr = clusterClassIsReconciled(clusterClass); clusterClassPollErr != nil {
return false, nil //nolint:nilerr
if !clusterClassIsReconciled(clusterClass) {
clusterClassPollErr = errClusterClassNotReconciled
return false, nil
}

clusterClassPollErr = nil
return true, nil
})
if clusterClassPollErr != nil {
if apierrors.IsNotFound(clusterClassPollErr) {
return nil, false, true, nil
}
if errors.Is(clusterClassPollErr, errClusterClassNotReconciled) {
// Return ClusterClass if we were able to get it and it's just not reconciled.
return clusterClass, clusterClassPollErr
return clusterClass, true, false, nil
}
return nil, clusterClassPollErr
return nil, false, false, clusterClassPollErr
}
return clusterClass, nil
return clusterClass, false, false, nil
}

// clusterClassIsReconciled returns errClusterClassNotReconciled if the ClusterClass has not successfully reconciled or if the
// ClusterClass variables have not been successfully reconciled.
func clusterClassIsReconciled(clusterClass *clusterv1.ClusterClass) error {
func clusterClassIsReconciled(clusterClass *clusterv1.ClusterClass) bool {
// If the clusterClass metadata generation does not match the status observed generation, the ClusterClass has not been successfully reconciled.
if clusterClass.Generation != clusterClass.Status.ObservedGeneration {
return errClusterClassNotReconciled
return false
}
// If the clusterClass does not have ClusterClassVariablesReconciled==True, the ClusterClass has not been successfully reconciled.
if !conditions.Has(clusterClass, clusterv1.ClusterClassVariablesReadyCondition) ||
conditions.IsFalse(clusterClass, clusterv1.ClusterClassVariablesReadyCondition) {
return errClusterClassNotReconciled
return false
}
return nil
return true
}

func validateTopologyMetadata(topology clusterv1.Topology, fldPath *field.Path) field.ErrorList {
Expand Down
4 changes: 2 additions & 2 deletions internal/webhooks/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3450,11 +3450,11 @@ func TestClusterClassPollingErrors(t *testing.T) {
wantErr: false,
},
{
name: "Fail on update if oldCluster ClusterClass generation does not match observedGeneration",
name: "Pass on update if oldCluster ClusterClass generation does not match observedGeneration",
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(secondTopology).Build(),
oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
clusterClasses: []*clusterv1.ClusterClass{ccGenerationMismatch, secondFullyReconciled},
wantErr: true,
wantErr: false,
},
{
name: "Fail on update if old Cluster ClusterClass is not found",
Expand Down